Examples

Sample Code 1 of JDBC Secondary Development

The following sample code provides the following functions:

Provides the username and key file path in the JDBC URL address so that programs can automatically perform security logins and create Hive connections.

For MRS 1.9.2 or earlier, the default ZooKeeper port number is 24002. For details, see the ZooKeeper configurations on MRS Manager.
Runs HiveQL statements for creating, querying, and deleting tables.

package com.huawei.bigdata.hive.example;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Properties;


import org.apache.hadoop.conf.Configuration;
import com.huawei.bigdata.security.LoginUtil;

public class JDBCExample {
  private static final String HIVE_DRIVER = "org.apache.hive.jdbc.HiveDriver";
  
  private static final String ZOOKEEPER_DEFAULT_LOGIN_CONTEXT_NAME = "Client";
  private static final String ZOOKEEPER_SERVER_PRINCIPAL_KEY = "zookeeper.server.principal";    
  private static final String ZOOKEEPER_DEFAULT_SERVER_PRINCIPAL = "zookeeper/hadoop";
  
  private static Configuration CONF = null; 
  private static String KRB5_FILE = null;
  private static String USER_NAME = null;
  private static String USER_KEYTAB_FILE = null;
  
  private static String zkQuorum = null;//List of ZooKeeper node IP addresses and ports
  private static String auth = null;
  private static String sasl_qop = null;
  private static String zooKeeperNamespace = null;
  private static String serviceDiscoveryMode = null;
  private static String principal = null;
  private static void init() throws IOException{
    CONF = new Configuration();

    Properties clientInfo = null;
    String userdir = System.getProperty("user.dir") + File.separator
        + "conf" + File.separator;
    System.out.println(userdir);
    InputStream fileInputStream = null;
    try{
      clientInfo = new Properties();
      //hiveclient.properties is the client configuration file. If the multi-instance feature is used, you need to replace the file with "hiveclient.properties" of the corresponding instance client.
      //hiveclient.properties is stored in the config directory of the decompressed installation package of the corresponding instance client. 
      String hiveclientProp = userdir + "hiveclient.properties" ;
      File propertiesFile = new File(hiveclientProp);
      fileInputStream = new FileInputStream(propertiesFile);
      clientInfo.load(fileInputStream);
    }catch (Exception e) {
      throw new IOException(e);
    }finally{
      if(fileInputStream != null){
        fileInputStream.close();
        fileInputStream = null;
      }
    }
    //The format of zkQuorum is xxx.xxx.xxx.xxx:2181,xxx.xxx.xxx.xxx:2181,xxx.xxx.xxx.xxx:2181. 
    //xxx.xxx.xxx.xxx is the service IP address of the node where ZooKeeper resides. The default port is 2181.
    zkQuorum =  clientInfo.getProperty("zk.quorum");
    auth = clientInfo.getProperty("auth");
    sasl_qop = clientInfo.getProperty("sasl.qop");
    zooKeeperNamespace = clientInfo.getProperty("zooKeeperNamespace");
    serviceDiscoveryMode = clientInfo.getProperty("serviceDiscoveryMode");
    principal = clientInfo.getProperty("principal"); 
    // Set USER_NAME for the newly created user. xxx indicates the username created previously. For example, if the created user is user, the value of USER_NAME is user.
    USER_NAME = "userx";

    if ("KERBEROS".equalsIgnoreCase(auth)) {
      // Set the path of the keytab and krb5 files of the client.
      USER_KEYTAB_FILE = userdir + "user.keytab";
      KRB5_FILE = userdir + "krb5.conf";

      LoginUtil.setJaasConf(ZOOKEEPER_DEFAULT_LOGIN_CONTEXT_NAME, USER_NAME, USER_KEYTAB_FILE);
      LoginUtil.setZookeeperServerPrincipal(ZOOKEEPER_SERVER_PRINCIPAL_KEY, ZOOKEEPER_DEFAULT_SERVER_PRINCIPAL);

      // Security mode
      // Zookeeper login authentication
      LoginUtil.login(USER_NAME, USER_KEYTAB_FILE, KRB5_FILE, CONF);
    }
  }
  
  /**
   * This example shows how to use the Hive JDBC API to run the HiveQL command. <br>
   * <br>
   * 
   * @throws ClassNotFoundException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws SQLException
   * @throws IOException 
   */
  public static void main(String[] args) throws InstantiationException,
      IllegalAccessException, ClassNotFoundException, SQLException, IOException{
    // Parameter initialization
    init();

    // Define HiveQL. HiveQL must be a single statement and cannot contain ";".
    String[] sqls = {"CREATE TABLE IF NOT EXISTS employees_info(id INT,name STRING)",
        "SELECT COUNT(*) FROM employees_info", "DROP TABLE employees_info"};

    // Build JDBC URL.
    StringBuilder sBuilder = new StringBuilder(
        "jdbc:hive2://").append(zkQuorum).append("/");

    if ("KERBEROS".equalsIgnoreCase(auth)) {
      sBuilder.append(";serviceDiscoveryMode=") 
              .append(serviceDiscoveryMode)
              .append(";zooKeeperNamespace=")
              .append(zooKeeperNamespace)
              .append(";sasl.qop=")
              .append(sasl_qop)
              .append(";auth=")
              .append(auth)
              .append(";principal=")
              .append(principal)
              .append(";");
    } else {
    // Normal mode
      sBuilder.append(";serviceDiscoveryMode=") 
              .append(serviceDiscoveryMode)
              .append(";zooKeeperNamespace=")
              .append(zooKeeperNamespace)
              .append(";auth=none");
    }
    String url = sBuilder.toString();
    
   // Load the Hive JDBC driver.
    Class.forName(HIVE_DRIVER);

    Connection connection = null;
    try {
      System.out.println(url);
      // Obtain the JDBC connection.
      // If the normal mode is used, the second parameter needs to be set to a correct username. Otherwise, the anonymous user will be used for login.
      connection = DriverManager.getConnection(url, "", "");
        
       // Create a table.
      // To import data to a table after the table is created, you can use the LOAD statement. For example, import data from HDFS to the table. 
      // load data inpath '/tmp/employees.txt' overwrite into table employees_info;
      execDDL(connection,sqls[0]);
      System.out.println("Create table success!");
       
      // Query
      execDML(connection,sqls[1]);
        
      // Delete the table.
      execDDL(connection,sqls[2]);
      System.out.println("Delete table success!");
    }
    finally {
      // Close the JDBC connection.
      if (null != connection) {
        connection.close();
      }
    }
  }
  
  public static void execDDL(Connection connection, String sql)
  throws SQLException {
    PreparedStatement statement = null;
    try {
      statement = connection.prepareStatement(sql);
      statement.execute();
    }
    finally {
      if (null != statement) {
        statement.close();
      }
    }
  }


  public static void execDML(Connection connection, String sql) throws SQLException {
    PreparedStatement statement = null;
    ResultSet resultSet = null;
    ResultSetMetaData resultMetaData = null;
    
    try {
      // Execute HiveQL.
      statement = connection.prepareStatement(sql);
      resultSet = statement.executeQuery();
      
      // Output the queried column name to the console.
      resultMetaData = resultSet.getMetaData();
      int columnCount = resultMetaData.getColumnCount();
      for (int i = 1; i <= columnCount; i++) {
        System.out.print(resultMetaData.getColumnLabel(i) + '\t');
      }
      System.out.println();
      
      // Output the query result to the console.
      while (resultSet.next()) {
        for (int i = 1; i <= columnCount; i++) {
          System.out.print(resultSet.getString(i) + '\t');
        }
        System.out.println();
      }
    }
    finally {
      if (null != resultSet) {
        resultSet.close();
      }
      
      if (null != statement) {
        statement.close();
      }
    }
  }
}

Sample Code 2 of JDBC Secondary Development

The following sample code provides the following functions:

Does not provide the username and key file path in the JDBC URL address to create Hive connections. Users perform security logins by themselves.
Runs HiveQL statements for creating, querying, and deleting tables.

When accessing ZooKeeper, programs need to use the jaas configuration file, for example, user.hive.jaas.conf. The details are as follows:

Client {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
keyTab="D:\\workspace\\jdbc-examples\\conf\\user.keytab"
principal="xxx@HADOOP.COM"
useTicketCache=false
storeKey=true
debug=true;
};

You need to modify the keyTab path (absolute path) and principal in the configuration file based on the actual environment, and set environment variable java.security.auth.login.config to the file path.

package com.huawei.bigdata.hive.example;

import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;

public class JDBCExamplePreLogin {
  private static final String HIVE_DRIVER = "org.apache.hive.jdbc.HiveDriver";
  
  /**
   * This example shows how to use the Hive JDBC API to run the HiveQL command. <br>
   * <br>
   * 
   * @throws ClassNotFoundException
   * @throws IllegalAccessException
   * @throws InstantiationException
   * @throws SQLException
   */
  public static void main(String[] args) throws InstantiationException,
      IllegalAccessException, ClassNotFoundException, SQLException ,IOException{
    
    Properties clientInfo = null;
    String userdir = System.getProperty("user.dir") + File.separator
        + "conf" + File.separator;
    InputStream fileInputStream = null;
    try{
      clientInfo = new Properties();
      //hiveclient.properties is the client configuration file. If the multi-instance feature is used, you need to replace the file with "hiveclient.properties" of the corresponding instance client.
      //hiveclient.properties is stored in the config directory of the decompressed installation package of the corresponding instance client. 
      String hiveclientProp = userdir + "hiveclient.properties" ;
      File propertiesFile = new File(hiveclientProp);
      fileInputStream = new FileInputStream(propertiesFile);
      clientInfo.load(fileInputStream);
    }catch (Exception e) {
      throw new IOException(e);
    }finally{
      if(fileInputStream != null){
        fileInputStream.close();
        fileInputStream = null;
      }
    }
    //The format of zkQuorum is xxx.xxx.xxx.xxx:2181,xxx.xxx.xxx.xxx:2181,xxx.xxx.xxx.xxx:2181. 
    //xxx.xxx.xxx.xxx is the service IP address of the node where ZooKeeper resides. The default port is 2181.
    String zkQuorum =  clientInfo.getProperty("zk.quorum");
    String auth = clientInfo.getProperty("auth");
    String sasl_qop = clientInfo.getProperty("sasl.qop");
    String zooKeeperNamespace = clientInfo.getProperty("zooKeeperNamespace");
    String serviceDiscoveryMode = clientInfo.getProperty("serviceDiscoveryMode");
    String principal = clientInfo.getProperty("principal"); 
    
    // Define HiveQL. HiveQL must be a single statement and cannot contain ";".
    String[] sqls = {"CREATE TABLE IF NOT EXISTS employees_info(id INT,name STRING)",
        "SELECT COUNT(*) FROM employees_info", "DROP TABLE employees_info"};

    // Build JDBC URL.
    StringBuilder sBuilder = new StringBuilder(
        "jdbc:hive2://").append(zkQuorum).append("/");
    
    if ("KERBEROS".equalsIgnoreCase(auth)) {

     // Set the attribute java.security.krb5.conf to specify information about the security service to be accessed.
      System.setProperty("java.security.krb5.conf", "conf/krb5.conf");
      // Set the jaas configuration file. Change the values of keyTab and principal in the user.hive.jaas.conf file based on the site requirements.
      System.setProperty("java.security.auth.login.config",
          "conf/user.hive.jaas.conf");
      
      Configuration conf = new Configuration();
      conf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos");
      conf.set(HADOOP_SECURITY_AUTHORIZATION, "true");
      String PRINCIPAL = "username.client.kerberos.principal";
      String KEYTAB = "username.client.keytab.file";
      // Set the path of the keytab file of the client.
      conf.set(KEYTAB, "conf/user.keytab");
      // Set UserPrincipal for the newly created user. The value is a username containing a domain name. For example, if the username is user and the domain is HADOOP.COM, userPrincipal is user@HADOOP.COM.
      conf.set(PRINCIPAL, "xxx@xxx");
      
      // Perform login authentication.
      UserGroupInformation.setConfiguration(conf);
      SecurityUtil.login(conf, KEYTAB, PRINCIPAL);
  
      // Security mode
      sBuilder.append(";serviceDiscoveryMode=") 
              .append(serviceDiscoveryMode)
              .append(";zooKeeperNamespace=")
              .append(zooKeeperNamespace)
              .append(";sasl.qop=")
              .append(sasl_qop)
              .append(";auth=")
              .append(auth)
              .append(";principal=")
              .append(principal)
              .append(";");
    } else {
      // Normal mode
      sBuilder.append(";serviceDiscoveryMode=") 
              .append(serviceDiscoveryMode)
              .append(";zooKeeperNamespace=")
              .append(zooKeeperNamespace)
              .append(";auth=none");
    }
    String url = sBuilder.toString();
    
    // Load the Hive JDBC driver.
    Class.forName(HIVE_DRIVER);

    Connection connection = null;
    try {
      // Obtain the JDBC connection.
      // If the normal mode is used, the second parameter needs to be set to a correct username. Otherwise, the anonymous user will be used for login.
      connection = DriverManager.getConnection(url, "", "");

       // Create a table.
      // To import data to a table after the table is created, you can use the LOAD statement. For example, import data from HDFS to the table. 
      // load data inpath '/tmp/employees.txt' overwrite into table employees_info;
      execDDL(connection,sqls[0]);
      System.out.println("Create table success!");
       
      // Query
      execDML(connection,sqls[1]);
        
      // Delete the table.
      execDDL(connection,sqls[2]);
      System.out.println("Delete table success!");
    }
    finally {
      // Close the JDBC connection.
      if (null != connection) {
        connection.close();
      }
    }
  }
  
  public static void execDDL(Connection connection, String sql)
  throws SQLException {
    PreparedStatement statement = null;
    try {
      statement = connection.prepareStatement(sql);
      statement.execute();
    }
    finally {
      if (null != statement) {
        statement.close();
      }
    }
  }


  public static void execDML(Connection connection, String sql) throws SQLException {
    PreparedStatement statement = null;
    ResultSet resultSet = null;
    ResultSetMetaData resultMetaData = null;
    
    try {
      // Execute HiveQL.
      statement = connection.prepareStatement(sql);
      resultSet = statement.executeQuery();
      
      // Output the queried column name to the console.
      resultMetaData = resultSet.getMetaData();
      int columnCount = resultMetaData.getColumnCount();
      for (int i = 1; i <= columnCount; i++) {
        System.out.print(resultMetaData.getColumnLabel(i) + '\t');
      }
      System.out.println();
      
      // Output the query result to the console.
      while (resultSet.next()) {
        for (int i = 1; i <= columnCount; i++) {
          System.out.print(resultSet.getString(i) + '\t');
        }
        System.out.println();
      }
    }
    finally {
      if (null != resultSet) {
        resultSet.close();
      }
      
      if (null != statement) {
        statement.close();
      }
    }
  }

}

Sample Code 2 of HCatalog Secondary Development

The following sample code demonstrates how to use the HCatInputFormat and HCatOutputFormat APIs provided by HCatalog to submit MapReduce jobs.

public class HCatalogExample extends Configured implements Tool {

    public static class Map extends
            Mapper<LongWritable, HCatRecord, IntWritable, IntWritable> {
        int age;
        @Override
        protected void map(
                LongWritable key,
                HCatRecord value,
                org.apache.hadoop.mapreduce.Mapper<LongWritable, HCatRecord,
                        IntWritable, IntWritable>.Context context)
                throws IOException, InterruptedException {
            age = (Integer) value.get(0);
            context.write(new IntWritable(age), new IntWritable(1));
        }
    }
    
    public static class Reduce extends Reducer<IntWritable, IntWritable,
    IntWritable, HCatRecord> {
      @Override
      protected void reduce(
              IntWritable key,
              java.lang.Iterable<IntWritable> values,
              org.apache.hadoop.mapreduce.Reducer<IntWritable, IntWritable,
              IntWritable, HCatRecord>.Context context)
              throws IOException, InterruptedException {
          int sum = 0;
          Iterator<IntWritable> iter = values.iterator();
          while (iter.hasNext()) {
              sum++;
              iter.next();
          }
          HCatRecord record = new DefaultHCatRecord(2);
          record.set(0, key.get());
          record.set(1, sum);

          context.write(null, record);
        }
    }

    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        String[] otherArgs = args;
        
        String inputTableName = otherArgs[0];
        String outputTableName = otherArgs[1];
        String dbName = "default";

        @SuppressWarnings("deprecation")
        Job job = new Job(conf, "GroupByDemo");
        
        HCatInputFormat.setInput(job, dbName, inputTableName);
        job.setInputFormatClass(HCatInputFormat.class);
        job.setJarByClass(HCatalogExample.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(WritableComparable.class);
        job.setOutputValueClass(DefaultHCatRecord.class);
        
        OutputJobInfo outputjobInfo = OutputJobInfo.create(dbName,outputTableName, null);
        HCatOutputFormat.setOutput(job, outputjobInfo);
        HCatSchema schema = outputjobInfo.getOutputSchema();
        HCatOutputFormat.setSchema(job, schema);
        job.setOutputFormatClass(HCatOutputFormat.class);
        
        return (job.waitForCompletion(true) ? 0 : 1);
    }
    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new HCatalogExample(), args);
        System.exit(exitCode);
    }
}

Parent topic: Development Specifications

Previous topic: Suggestions

Next topic: MapReduce Application Development