Updated on 2024-08-16 GMT+08:00

HDFS C APIs

Function Description

In the C language application development sample code, file operations include creating, reading, writing, appending, and deleting files. For details about related APIs, visit http://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-hdfs/LibHdfs.html.

Sample Code

The following code snippets are used as an example. For complete code, see the HDFS C sample code in the hdfs_test.cMRS_Services_ClientConfig/HDFS/hdfs-c-example/hdfs_test.c file.

  1. Configure the HDFS NameNode parameters and create a connection to HDFS.
    hdfsFS fs = hdfsConnect("default", 0);
    fprintf(stderr, "hdfsConnect- SUCCESS!\n");
  2. Create an HDFS directory.
    const char* dir = "/nativeTest";
    int exitCode = hdfsCreateDirectory(fs, dir);
    if( exitCode == -1 ){
         fprintf(stderr, "Failed to create directory %s \n", dir );
         exit(-1);
    }
    fprintf(stderr, "hdfsCreateDirectory- SUCCESS! : %s\n", dir);
  3. Write data to a file.
    const char* file = "/nativeTest/testfile.txt";
    hdfsFile writeFile = openFile(fs, (char*)file, O_WRONLY |O_CREAT, 0, 0, 0);
    fprintf(stderr, "hdfsOpenFile- SUCCESS! for write : %s\n", file);
    
    if(!hdfsFileIsOpenForWrite(writeFile)){
        fprintf(stderr, "Failed to open %s for writing.\n", file);
        exit(-1);
    }
    
    char* buffer = "Hadoop HDFS Native file write!";
    
    hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
    fprintf(stderr, "hdfsWrite- SUCCESS! : %s\n", file);
    
    printf("Flushing file data ....\n");
    if (hdfsFlush(fs, writeFile)) {
         fprintf(stderr, "Failed to 'flush' %s\n", file);
         exit(-1);
    }
    hdfsCloseFile(fs, writeFile);
    fprintf(stderr, "hdfsCloseFile- SUCCESS! : %s\n", file);
  4. Read a file.
    hdfsFile readFile = openFile(fs, (char*)file, O_RDONLY, 100, 0, 0);
    fprintf(stderr, "hdfsOpenFile- SUCCESS! for read : %s\n", file);
    
    if(!hdfsFileIsOpenForRead(readFile)){
        fprintf(stderr, "Failed to open %s for reading.\n", file);
        exit(-1);
    }
    
    buffer = (char *) malloc(100);
    tSize num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsRead- SUCCESS!, Byte read : %d, File contant : %s \n", num_read ,buffer);
    hdfsCloseFile(fs, readFile);
  5. Specify a location to start to read a file.
    buffer = (char *) malloc(100);
    readFile = openFile(fs, file, O_RDONLY, 100, 0, 0);
    if (hdfsSeek(fs, readFile, 10)) {
         fprintf(stderr, "Failed to 'seek' %s\n", file);
         exit(-1);
    }
    num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsSeek- SUCCESS!, Byte read : %d, File seek contant : %s \n", num_read ,buffer);
    hdfsCloseFile(fs, readFile);
  6. Copy a file.
    const char* destfile = "/nativeTest/testfile1.txt";
    if (hdfsCopy(fs, file, fs, destfile)) {
        fprintf(stderr, "File copy failed, src : %s, des : %s \n", file, destfile);
        exit(-1);
    }
    fprintf(stderr, "hdfsCopy- SUCCESS!, File copied, src : %s, des : %s \n", file, destfile);
  7. Move a file.
    const char* mvfile = "/nativeTest/testfile2.txt";
    if (hdfsMove(fs, destfile, fs, mvfile )) {
        fprintf(stderr, "File move failed, src : %s, des : %s \n", destfile , mvfile);
        exit(-1);
    }
    fprintf(stderr, "hdfsMove- SUCCESS!, File moved, src : %s, des : %s \n", destfile , mvfile);
  8. Rename a file.
    const char* renamefile = "/nativeTest/testfile3.txt";
    if (hdfsRename(fs, mvfile, renamefile)) {
         fprintf(stderr, "File rename failed, Old name : %s, New name : %s \n", mvfile, renamefile);
         exit(-1);
    }
    fprintf(stderr, "hdfsRename- SUCCESS!, File renamed, Old name : %s, New name : %s \n", mvfile, renamefile);
  9. Delete a file.
    if (hdfsDelete(fs, renamefile, 0)) {
        fprintf(stderr, "File delete failed : %s \n", renamefile);
        exit(-1);
    }
    fprintf(stderr, "hdfsDelete- SUCCESS!, File deleted : %s\n",renamefile);
  10. Set the number of file replicas.
    if (hdfsSetReplication(fs, file, 10)) {
        fprintf(stderr, "Failed to set replication : %s \n", file );
        exit(-1);
    }
    fprintf(stderr, "hdfsSetReplication- SUCCESS!, Set replication 10 for %s\n",file);
  11. Configure a user and a user group.
    if (hdfsChown(fs, file, "root", "root")) {
       fprintf(stderr, "Failed to set chown : %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsChown- SUCCESS!, Chown success for %s\n",file);
  12. Set permissions.
    if (hdfsChmod(fs, file, S_IRWXU | S_IRWXG | S_IRWXO)) {
       fprintf(stderr, "Failed to set chmod: %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsChmod- SUCCESS!, Chmod success for %s\n",file);
  13. Set the file time.
    struct timeval now;
    gettimeofday(&now, NULL);
    if (hdfsUtime(fs, file, now.tv_sec, now.tv_sec)) {
       fprintf(stderr, "Failed to set time: %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsUtime- SUCCESS!, Set time success for %s\n",file);
  14. Obtain file information.
    hdfsFileInfo *fileInfo = NULL;
    if((fileInfo = hdfsGetPathInfo(fs, file)) != NULL) {
       printFileInfo(fileInfo);
       hdfsFreeFileInfo(fileInfo, 1);
       fprintf(stderr, "hdfsGetPathInfo - SUCCESS!\n");
    }
  15. Variable directory
    hdfsFileInfo *fileList = 0;
    int numEntries = 0;
    if((fileList = hdfsListDirectory(fs, dir, &numEntries)) != NULL) {
       int i = 0;
       for(i=0; i < numEntries; ++i) {
           printFileInfo(fileList+i);
       }
       hdfsFreeFileInfo(fileList, numEntries);
    }
    fprintf(stderr, "hdfsListDirectory- SUCCESS!, %s\n", dir);
  16. Stream builder API
    buffer = (char *) malloc(100);
    struct hdfsStreamBuilder *builder= hdfsStreamBuilderAlloc(fs, (char*)file, O_RDONLY);
    hdfsStreamBuilderSetBufferSize(builder,100);
    hdfsStreamBuilderSetReplication(builder,20);
    hdfsStreamBuilderSetDefaultBlockSize(builder,10485760);
    readFile = hdfsStreamBuilderBuild(builder);
    num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : %d, File contant : %s \n", num_read ,buffer);
    struct hdfsReadStatistics *stats = NULL;
    hdfsFileGetReadStatistics(readFile, &stats);
    fprintf(stderr, "hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : %" PRId64 ", totalLocalBytesRead : %" PRId64 ", totalShortCircuitBytesRead : %" PRId64 ", totalZeroCopyBytesRead : %" PRId64 "\n", stats->totalBytesRead , stats->totalLocalBytesRead, stats->totalShortCircuitBytesRead,  stats->totalZeroCopyBytesRead);
    hdfsFileFreeReadStatistics(stats);
    free(buffer);
  17. Disconnect HDFS.
    hdfsDisconnect(fs); 

Preparing an Operating Environment

Install the client on the node, for example, to the /opt/client directory. For details about how to install the client, see Client Management in the MapReduce Service User Guide.

  1. You have installed HDFS on the server and confirmed that HDFS is running properly.
  2. The JDK 1.7 or 1.8 has been installed on the client.
  3. Obtain the MRS_HDFS_Client.tar installation package. Run the following commands to decompress the package:

    tar -xvf MRS_HDFS_Client.tar

    tar -xvf MRS_HDFS_ClientConfig.tar

    You are advised to install a client of the same version as the cluster on the server to avoid version incompatibility.

  4. Go to the MRS_HDFS_ClientConfig decompressed folder and run the following command to install the client:

    sh install.sh /opt/client

    In the preceding command, /opt/client is an example user-defined path.

  5. Go to the /opt/client client installation directory and run the following command to initialize the environment variables:

    source bigdata_env

Compiling and Running Applications on Linux

  1. Go to the Linux client directory and run the following commands to import public environment variables:

    cd /opt/client

    source bigdata_env

  2. Run the following command as user hdfs to perform command line authentication:
    kinit hdfs

    The validity period of once kinit is 24 hours. Run the kinit command again when you run the sample application 24 hours later.

  3. Go to the /opt/client/HDFS/hadoop/hdfs-c-example directory and run the following commands to import the environment variables of client C:

    cd /opt/client/HDFS/hadoop/hdfs-c-example

    source component_env_C_example

  4. Run the following command to clear the generated target files and executable files:

    make clean

    The command output is as follows:

    [root@10-120-85-2 hdfs-c-example]# make clean
    rm -f hdfs_test.o
    rm -f hdfs_test
  5. Run the following command to generate a new target and an executable file:

    make (or make all)

    The command output is as follows:

    [root@10-120-85-2 hdfs-c-example]# make all
    cc -c -I/opt/client/HDFS/hadoop/include -Wall -o hdfs_test.o hdfs_test.c
    cc -o hdfs_test hdfs_test.o -lhdfs
  6. Run the following command to run the file to create, read, write, append, and delete the file:

    make run

    The command output is as follows:

    [root@10-120-85-2 hdfs-c-example]# make run
    ./hdfs_test
    hdfsConnect- SUCCESS!
    hdfsCreateDirectory- SUCCESS! : /nativeTest
    hdfsOpenFile- SUCCESS! for write : /nativeTest/testfile.txt
    hdfsWrite- SUCCESS! : /nativeTest/testfile.txt
    Flushing file data ....
    hdfsCloseFile- SUCCESS! : /nativeTest/testfile.txt
    hdfsOpenFile- SUCCESS! for read : /nativeTest/testfile.txt
    hdfsRead- SUCCESS!, Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsSeek- SUCCESS!, Byte read : 21, File seek contant : S Native fi|²* rite! 
    hdfsCopy- SUCCESS!, File copied, src : /nativeTest/testfile.txt, des : /nativeTest/testfile1.txt 
    hdfsMove- SUCCESS!, File moved, src : /nativeTest/testfile1.txt, des : /nativeTest/testfile2.txt 
    hdfsRename- SUCCESS!, File renamed, Old name : /nativeTest/testfile2.txt, New name : /nativeTest/testfile3.txt 
    hdfsDelete- SUCCESS!, File deleted : /nativeTest/testfile3.txt
    hdfsSetReplication- SUCCESS!, Set replication 10 for /nativeTest/testfile.txt
    hdfsChown- SUCCESS!, Chown success for /nativeTest/testfile.txt
    hdfsChmod- SUCCESS!, Chmod success for /nativeTest/testfile.txt
    hdfsUtime- SUCCESS!, Set time success for /nativeTest/testfile.txt
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1480589792, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsGetPathInfo - SUCCESS!
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1480589792, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsListDirectory- SUCCESS!, /nativeTest
    hdfsTruncateFile- SUCCESS!, /nativeTest/testfile.txt
    Block Size : 134217728 
    hdfsGetDefaultBlockSize- SUCCESS!
    Block Size : 134217728 for file /nativeTest/testfile.txt
    hdfsGetDefaultBlockSizeAtPath- SUCCESS!
    HDFS Capacity : 1569475438758
    hdfsGetCapacity- SUCCESS!
    HDFS Used : 1122248
    hdfsGetCapacity- SUCCESS!
    hdfsExists- SUCCESS! /nativeTest/testfile.txt
    hdfsConfGetStr- SUCCESS : hdfs://hacluster 
    hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : 31, totalLocalBytesRead : 31, totalShortCircuitBytesRead : 0, totalZeroCopyBytesRead : 0
    [root@10-120-85-2 hdfs-c-example]# make run
    ./hdfs_test
    hdfsConnect- SUCCESS!
    hdfsCreateDirectory- SUCCESS! : /nativeTest
    hdfsOpenFile- SUCCESS! for write : /nativeTest/testfile.txt
    hdfsWrite- SUCCESS! : /nativeTest/testfile.txt
    Flushing file data ....
    hdfsCloseFile- SUCCESS! : /nativeTest/testfile.txt
    hdfsOpenFile- SUCCESS! for read : /nativeTest/testfile.txt
    hdfsRead- SUCCESS!, Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsSeek- SUCCESS!, Byte read : 21, File seek contant : S Native file write! 
    hdfsPread- SUCCESS!, Byte read : 10, File pead contant : S Native f  
    hdfsCopy- SUCCESS!, File copied, src : /nativeTest/testfile.txt, des : /nativeTest/testfile1.txt 
    hdfsMove- SUCCESS!, File moved, src : /nativeTest/testfile1.txt, des : /nativeTest/testfile2.txt 
    hdfsRename- SUCCESS!, File renamed, Old name : /nativeTest/testfile2.txt, New name : /nativeTest/testfile3.txt 
    hdfsDelete- SUCCESS!, File deleted : /nativeTest/testfile3.txt
    hdfsSetReplication- SUCCESS!, Set replication 10 for /nativeTest/testfile.txt
    hdfsChown- SUCCESS!, Chown success for /nativeTest/testfile.txt
    hdfsChmod- SUCCESS!, Chmod success for /nativeTest/testfile.txt
    hdfsUtime- SUCCESS!, Set time success for /nativeTest/testfile.txt
    
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1500345260, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsGetPathInfo - SUCCESS!
    
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1500345260, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsListDirectory- SUCCESS!, /nativeTest
    hdfsTruncateFile- SUCCESS!, /nativeTest/testfile.txt
    Block Size : 134217728 
    hdfsGetDefaultBlockSize- SUCCESS!
    Block Size : 134217728 for file /nativeTest/testfile.txt
    hdfsGetDefaultBlockSizeAtPath- SUCCESS!
    HDFS Capacity : 102726873909
    hdfsGetCapacity- SUCCESS!
    HDFS Used : 4767076324
    hdfsGetCapacity- SUCCESS!
    hdfsExists- SUCCESS! /nativeTest/testfile.txt
    hdfsConfGetStr- SUCCESS : hdfs://hacluster 
    hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : 31, totalLocalBytesRead : 0, totalShortCircuitBytesRead : 0, totalZeroCopyBytesRead : 0
  7. (Optional) Enter the debug mode.

    make gdb

    Before running this command, you need to install the GDB. For details, see Installing GDB.

    The command output is as follows:

    [root@10-120-85-2 hdfs-c-example]# make gdb
    gdb hdfs_test
    GNU gdb (GDB) SUSE (7.5.1-0.7.29)
    Copyright (C) 2012 Free Software Foundation, Inc.
    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
    This is free software: you are free to change and redistribute it.
    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
    and "show warranty" for details.
    This GDB was configured as "x86_64-suse-linux".
    For bug reporting instructions, please see:
    <http://www.gnu.org/software/gdb/bugs/>...
    Reading symbols from /opt/hadoop-client/HDFS/hadoop/hdfs-c-example/hdfs_test...done.
    (gdb) 
    [root@10-120-85-2 hdfs-c-example]# make gdb
    gdb hdfs_test
    GNU gdb (GDB) SUSE (7.5.1-0.7.29)
    Copyright (C) 2012 Free Software Foundation, Inc.
    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
    This is free software: you are free to change and redistribute it.
    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
    and "show warranty" for details.
    This GDB was configured as "x86_64-suse-linux".
    For bug reporting instructions, please see:
    <http://www.gnu.org/software/gdb/bugs/>...
    Reading symbols from /opt/client/HDFS/hadoop/hdfs-c-example/hdfs_test...done.
    (gdb)

Installing GDB

  1. Download the source code of the termcap package on which GDB depends.

    wget https://ftp.gnu.org/gnu/termcap/termcap-1.3.1.tar.gz

  2. Decompress the termcap source code.

    tar -zxvf termcap-1.3.1.tar.gz

  3. Compile and install termcap.

    cd termcap-1.3.1/

    ./configure && make && make install

  4. Download the GDB source code.

    cd ~

    wget https://ftp.gnu.org/gnu/gdb/gdb-7.6.1.tar.gz

  5. Decompress the GDB source code.

    tar -zxvf gdb-7.6.1.tar.gz

  6. Compile and install the GDB.

    cd gdb-7.6.1/

    ./configure && make && make install

  7. Check whether the GDB installation is successful.

    gdb --version

    If the GDB version information is displayed, the installation is successful.