Updated on 2024-08-16 GMT+08:00

Analyzing Impala Data

Function Description

This section describes how to use a sample program to complete an analysis task. This section uses JDBC APIs as an example to describe how to submit a data analysis task.

Sample Code

If you submit a data analysis task using Impala JDBC APIs, refer to JDBCExample.java in the sample program.
  1. Change the value of the following variable to false, specifying that the authentication mode for the connected clusters is normal mode.
     // Indicates whether the authentication mode of the connected cluster is security mode.
       boolean isSecureVer = false;
  2. Define Impala SQL. Impala SQL must be a single statement and cannot contain ";".
       // Define HQL, which cannot contain the semicolon (;).
       String[] sqls = {"CREATE TABLE IF NOT EXISTS employees_info(id INT,name STRING)", 
                "SELECT COUNT(*) FROM employees_info", "DROP TABLE employees_info"}; 
  3. Build JDBC URL.
    // Build JDBC URL.
    StringBuilder sBuilder = new StringBuilder(
      "jdbc:hive2://").append("impalad_ip").append("/");
    
    if (isSecurityMode) {
        // Security mode
        sBuilder.append(";auth=")
                .append(clientInfo.getAuth())
                .append(";principal=")
                .append(clientInfo.getPrincipal())
                .append(";");
    } else {
        // Normal mode
        sBuilder.append(";auth=noSasl");
    }
    String url = sBuilder.toString();

    If an Impalad instance is directly connected, an Impalad instance fault will cause Impala access failure.

  4. Load the Hive JDBC driver.
       // Load the Hive JDBC driver.
       Class.forName(HIVE_DRIVER);
  5. Enter a correct username, obtain the JDBC connection, confirm the Impala SQL type (DDL/DML), call APIs to run Impala SQL, return the queried column name and result to the console, and close the JDBC connection.
     
       Connection connection = null; 
         try { 
           // Obtain the JDBC connection.
           // If you set the second parameter to an incorrect username, the anonymous user will be used for login.
           connection = DriverManager.getConnection(url, "userName", ""); 
              
           // Create a table.
           // To import data to a table after the table is created, you can use the LOAD statement. For example, import data from HDFS to the table. 
           //load data inpath '/tmp/employees.txt' overwrite into table employees_info; 
           execDDL(connection,sqls[0]); 
           System.out.println("Create table success!"); 
             
           // Query
           execDML(connection,sqls[1]); 
              
           // Delete the table.
           execDDL(connection,sqls[2]); 
           System.out.println("Delete table success!"); 
         } 
         finally { 
           // Close the JDBC connection.
           if (null != connection) { 
             connection.close(); 
           } 
      
     public static void execDDL(Connection connection, String sql) 
       throws SQLException { 
         PreparedStatement statement = null; 
         try { 
           statement = connection.prepareStatement(sql); 
           statement.execute(); 
         } 
         finally { 
           if (null != statement) { 
             statement.close(); 
           } 
         } 
       } 
      
       public static void execDML(Connection connection, String sql) throws SQLException { 
         PreparedStatement statement = null; 
         ResultSet resultSet = null; 
         ResultSetMetaData resultMetaData = null; 
          
         try { 
    // Run the Impala SQL statement.
           statement = connection.prepareStatement(sql); 
           resultSet = statement.executeQuery(); 
            
           // Output the queried column name to the console.
           resultMetaData = resultSet.getMetaData(); 
           int columnCount = resultMetaData.getColumnCount(); 
           for (int i = 1; i <= columnCount; i++) { 
             System.out.print(resultMetaData.getColumnLabel(i) + '\t'); 
           } 
           System.out.println(); 
            
           // Output the query result to the console.
           while (resultSet.next()) { 
             for (int i = 1; i <= columnCount; i++) { 
               System.out.print(resultSet.getString(i) + '\t'); 
             } 
             System.out.println(); 
           } 
         } 
         finally { 
           if (null != resultSet) { 
             resultSet.close(); 
           } 
            
           if (null != statement) { 
             statement.close(); 
           } 
         } 
       }