Updated on 2022-11-18 GMT+08:00

Initializing the HDFS

Function

Hadoop distributed file system (HDFS) initialization is a prerequisite for using application programming interfaces (APIs) provided by the HDFS. The process of initializing the HDFS is

  1. Load the HDFS service configuration file.
  2. Instantiate a FileSystem.

Obtain the keytab file for Kerberos security authentication in advance.

Configuration File Description

Table 1 lists the configuration files to be used during the login to the HDFS. These files already imported to the conf directory of the hdfs-example-security project.

Table 1 Configuration files

File

Function

core-site.xml

Configures HDFS parameters.

hdfs-site.xml

Configures HDFS parameters.

smallfs-site.xml

Configures SmallFS parameters.

user.keytab

Provides HDFS user information for Kerberos security authentication.

krb5.conf

Contains Kerberos server configuration information.

  • Different clusters cannot share the same user.keytab and krb5.conf files.
  • The log4j.properties file under the conf directory can be configured based on your needs.

Example Codes

The following is code snippets. For complete codes, see the HdfsExample class in com.huawei.bigdata.hdfs.examples.

The initialization codes used when applications are run in Linux and the codes used when applications are run in Windows are the same. The example codes are as follows:
// Complete initialization and authentication.
 confLoad();
 authentication();
 // Creating a sample project 
 HdfsExample hdfs_examples = new HdfsExample("/user/hdfs-examples", "test.txt");


 /**
  * 
  * If the application is running in the Linux OS, the path of core-site.xml, hdfs-site.xml must be modified to the absolute path of the client file in the Linux OS. 
  *
  * 
  */  
 private static void confLoad() throws IOException {
   conf = new Configuration();
   // conf file
   conf.addResource(new Path(PATH_TO_HDFS_SITE_XML));
   conf.addResource(new Path(PATH_TO_CORE_SITE_XML));
   // conf.addResource(new Path(PATH_TO_SMALL_SITE_XML));
 }

 /**
  *Safety authentication 
  *
  */
 private static void authentication() throws IOException {
   // security mode
   if ("kerberos".equalsIgnoreCase(conf.get("hadoop.security.authentication"))) {
     System.setProperty("java.security.krb5.conf", PATH_TO_KRB5_CONF);
     LoginUtil.login(PRNCIPAL_NAME, PATH_TO_KEYTAB, PATH_TO_KRB5_CONF, conf);
   }
 }
 /**
  *Create a sample project. 
  */
 public HdfsExample(String path, String fileName) throws IOException  {
   this.DEST_PATH = path;
   this.FILE_NAME = fileName;
   instanceBuild();
 }

 private void instanceBuild() throws IOException {
   fSystem = FileSystem.get(conf);
The login example codes need to be added for the first login when applications are run in both Windows and Linux. For details on the example codes, see the LoginUtil class in com.huawei.hadoop.security.
public synchronized static void login(String userPrincipal, String userKeytabPath, String krb5ConfPath, Configuration conf)
        throws IOException
    {
        // 1.Check the input parameters. 

        if ((userPrincipal == null) || (userPrincipal.length() <= 0))
        {
            LOG.error("input userPrincipal is invalid.");
            throw new IOException("input userPrincipal is invalid.");
        }
        
        if ((userKeytabPath == null) || (userKeytabPath.length() <= 0))
        {
            LOG.error("input userKeytabPath is invalid.");
            throw new IOException("input userKeytabPath is invalid.");
        }
                
        if ((krb5ConfPath == null) || (krb5ConfPath.length() <= 0))
        {
            LOG.error("input krb5ConfPath is invalid.");
            throw new IOException("input krb5ConfPath is invalid.");
        }
        
        if ((conf == null))
        {
            LOG.error("input conf is invalid.");
            throw new IOException("input conf is invalid.");
        }
        
        // 2.Check whether the file exists.

        File userKeytabFile = new File(userKeytabPath);
        if (!userKeytabFile.exists())
        {
            LOG.error("userKeytabFile(" + userKeytabFile.getAbsolutePath() + ") does not exsit.");
            throw new IOException("userKeytabFile(" + userKeytabFile.getAbsolutePath() + ") does not exsit.");
        }
        if (!userKeytabFile.isFile())
        {
            LOG.error("userKeytabFile(" + userKeytabFile.getAbsolutePath() + ") is not a file.");
            throw new IOException("userKeytabFile(" + userKeytabFile.getAbsolutePath() + ") is not a file.");
        }
        
        File krb5ConfFile = new File(krb5ConfPath);
        if (!krb5ConfFile.exists())
        {
            LOG.error("krb5ConfFile(" + krb5ConfFile.getAbsolutePath() + ") does not exsit.");
            throw new IOException("krb5ConfFile(" + krb5ConfFile.getAbsolutePath() + ") does not exsit.");
        }
        if (!krb5ConfFile.isFile())
        {
            LOG.error("krb5ConfFile(" + krb5ConfFile.getAbsolutePath() + ") is not a file.");
            throw new IOException("krb5ConfFile(" + krb5ConfFile.getAbsolutePath() + ") is not a file.");
        }
        
        // 3.Set and check krb5config. 

        setKrb5Config(krb5ConfFile.getAbsolutePath());        
        setConfiguration(conf);
        
        // 4.Log in to Hadoop to check items.

        loginHadoop(userPrincipal, userKeytabFile.getAbsolutePath());
        LOG.info("Login success!!!!!!!!!!!!!!");
}