更新时间:2024-08-03 GMT+08:00

HDFS C API接口介绍

功能简介

C语言应用开发代码样例中所涉及的文件操作主要包括创建文件、读写文件、追加文件、删除文件。完整和详细的接口请直接参考官网上的描述以了解其使用方法:http://hadoop.apache.org/docs/r2.7.2/hadoop-project-dist/hadoop-hdfs/LibHdfs.html

代码样例

下面代码片段仅为演示,具体代码请参见HDFS的C样例代码hdfs_test.c“MRS_Services_ClientConfig/HDFS/hdfs-c-example/hdfs_test.c”文件。

  1. 设置HDFS NameNode参数,建立HDFS文件系统连接。
    hdfsFS fs = hdfsConnect("default", 0);
    fprintf(stderr, "hdfsConnect- SUCCESS!\n");
  2. 创建HDFS目录。
    const char* dir = "/nativeTest";
    int exitCode = hdfsCreateDirectory(fs, dir);
    if( exitCode == -1 ){
         fprintf(stderr, "Failed to create directory %s \n", dir );
         exit(-1);
    }
    fprintf(stderr, "hdfsCreateDirectory- SUCCESS! : %s\n", dir);
  3. 写文件。
    const char* file = "/nativeTest/testfile.txt";
    hdfsFile writeFile = openFile(fs, (char*)file, O_WRONLY |O_CREAT, 0, 0, 0);
    fprintf(stderr, "hdfsOpenFile- SUCCESS! for write : %s\n", file);
    
    if(!hdfsFileIsOpenForWrite(writeFile)){
        fprintf(stderr, "Failed to open %s for writing.\n", file);
        exit(-1);
    }
    
    char* buffer = "Hadoop HDFS Native file write!";
    
    hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
    fprintf(stderr, "hdfsWrite- SUCCESS! : %s\n", file);
    
    printf("Flushing file data ....\n");
    if (hdfsFlush(fs, writeFile)) {
         fprintf(stderr, "Failed to 'flush' %s\n", file);
         exit(-1);
    }
    hdfsCloseFile(fs, writeFile);
    fprintf(stderr, "hdfsCloseFile- SUCCESS! : %s\n", file);
  4. 读文件。
    hdfsFile readFile = openFile(fs, (char*)file, O_RDONLY, 100, 0, 0);
    fprintf(stderr, "hdfsOpenFile- SUCCESS! for read : %s\n", file);
    
    if(!hdfsFileIsOpenForRead(readFile)){
        fprintf(stderr, "Failed to open %s for reading.\n", file);
        exit(-1);
    }
    
    buffer = (char *) malloc(100);
    tSize num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsRead- SUCCESS!, Byte read : %d, File contant : %s \n", num_read ,buffer);
    hdfsCloseFile(fs, readFile);
  5. 指定位置开始读文件。
    buffer = (char *) malloc(100);
    readFile = openFile(fs, file, O_RDONLY, 100, 0, 0);
    if (hdfsSeek(fs, readFile, 10)) {
         fprintf(stderr, "Failed to 'seek' %s\n", file);
         exit(-1);
    }
    num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsSeek- SUCCESS!, Byte read : %d, File seek contant : %s \n", num_read ,buffer);
    hdfsCloseFile(fs, readFile);
  6. 拷贝文件。
    const char* destfile = "/nativeTest/testfile1.txt";
    if (hdfsCopy(fs, file, fs, destfile)) {
        fprintf(stderr, "File copy failed, src : %s, des : %s \n", file, destfile);
        exit(-1);
    }
    fprintf(stderr, "hdfsCopy- SUCCESS!, File copied, src : %s, des : %s \n", file, destfile);
  7. 移动文件。
    const char* mvfile = "/nativeTest/testfile2.txt";
    if (hdfsMove(fs, destfile, fs, mvfile )) {
        fprintf(stderr, "File move failed, src : %s, des : %s \n", destfile , mvfile);
        exit(-1);
    }
    fprintf(stderr, "hdfsMove- SUCCESS!, File moved, src : %s, des : %s \n", destfile , mvfile);
  8. 重命名文件。
    const char* renamefile = "/nativeTest/testfile3.txt";
    if (hdfsRename(fs, mvfile, renamefile)) {
         fprintf(stderr, "File rename failed, Old name : %s, New name : %s \n", mvfile, renamefile);
         exit(-1);
    }
    fprintf(stderr, "hdfsRename- SUCCESS!, File renamed, Old name : %s, New name : %s \n", mvfile, renamefile);
  9. 删除文件。
    if (hdfsDelete(fs, renamefile, 0)) {
        fprintf(stderr, "File delete failed : %s \n", renamefile);
        exit(-1);
    }
    fprintf(stderr, "hdfsDelete- SUCCESS!, File deleted : %s\n",renamefile);
  10. 设置副本数。
    if (hdfsSetReplication(fs, file, 10)) {
        fprintf(stderr, "Failed to set replication : %s \n", file );
        exit(-1);
    }
    fprintf(stderr, "hdfsSetReplication- SUCCESS!, Set replication 10 for %s\n",file);
  11. 设置用户、用户组。
    if (hdfsChown(fs, file, "root", "root")) {
       fprintf(stderr, "Failed to set chown : %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsChown- SUCCESS!, Chown success for %s\n",file);
  12. 设置权限。
    if (hdfsChmod(fs, file, S_IRWXU | S_IRWXG | S_IRWXO)) {
       fprintf(stderr, "Failed to set chmod: %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsChmod- SUCCESS!, Chmod success for %s\n",file);
  13. 设置文件时间。
    struct timeval now;
    gettimeofday(&now, NULL);
    if (hdfsUtime(fs, file, now.tv_sec, now.tv_sec)) {
       fprintf(stderr, "Failed to set time: %s \n", file );
       exit(-1);
    }
    fprintf(stderr, "hdfsUtime- SUCCESS!, Set time success for %s\n",file);
  14. 获取文件信息。
    hdfsFileInfo *fileInfo = NULL;
    if((fileInfo = hdfsGetPathInfo(fs, file)) != NULL) {
       printFileInfo(fileInfo);
       hdfsFreeFileInfo(fileInfo, 1);
       fprintf(stderr, "hdfsGetPathInfo - SUCCESS!\n");
    }
  15. 遍历目录。
    hdfsFileInfo *fileList = 0;
    int numEntries = 0;
    if((fileList = hdfsListDirectory(fs, dir, &numEntries)) != NULL) {
       int i = 0;
       for(i=0; i < numEntries; ++i) {
           printFileInfo(fileList+i);
       }
       hdfsFreeFileInfo(fileList, numEntries);
    }
    fprintf(stderr, "hdfsListDirectory- SUCCESS!, %s\n", dir);
  16. stream builder接口。
    buffer = (char *) malloc(100);
    struct hdfsStreamBuilder *builder= hdfsStreamBuilderAlloc(fs, (char*)file, O_RDONLY);
    hdfsStreamBuilderSetBufferSize(builder,100);
    hdfsStreamBuilderSetReplication(builder,20);
    hdfsStreamBuilderSetDefaultBlockSize(builder,10485760);
    readFile = hdfsStreamBuilderBuild(builder);
    num_read = hdfsRead(fs, readFile, (void*)buffer, 100);
    fprintf(stderr, "hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : %d, File contant : %s \n", num_read ,buffer);
    struct hdfsReadStatistics *stats = NULL;
    hdfsFileGetReadStatistics(readFile, &stats);
    fprintf(stderr, "hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : %" PRId64 ", totalLocalBytesRead : %" PRId64 ", totalShortCircuitBytesRead : %" PRId64 ", totalZeroCopyBytesRead : %" PRId64 "\n", stats->totalBytesRead , stats->totalLocalBytesRead, stats->totalShortCircuitBytesRead,  stats->totalZeroCopyBytesRead);
    hdfsFileFreeReadStatistics(stats);
    free(buffer);
  17. 断开HDFS文件系统连接。
    hdfsDisconnect(fs); 

准备运行环境

在节点上安装客户端,例如安装到“/opt/client”目录,安装方法可参考《MapReduce服务用户指南》的“客户端管理”章节。

  1. 确认服务端HDFS组件已经安装,并正常运行。
  2. 客户端运行环境已安装1.7或1.8版本的JDK。
  3. 获取并解压缩安装“MRS_HDFS_Client.tar”包。执行如下命令解压。

    tar -xvf MRS_HDFS_Client.tar

    tar -xvf MRS_HDFS_ClientConfig.tar

    由于不兼容老版本客户端,建议用户获取与服务端集群相同版本的客户端安装包进行安装部署。

  4. 进入解压文件夹,即“MRS_HDFS_ClientConfig”,执行下列命令安装客户端。

    sh install.sh /opt/client

    其中“/opt/client”为用户自定义路径,此处仅为举例。

  5. 进入客户端安装目录“/opt/client”,执行下列命令初始化环境变量。

    source bigdata_env

Linux中编译并运行程序

  1. 进入Linux客户端目录,运行如下命令导入公共环境变量。

    cd /opt/client

    source bigdata_env

  2. 在该目录下用hdfs用户进行命令行认证。
    kinit hdfs

    kinit一次票据时效24小时。24小时后再次运行样例,需要重新执行kinit命令。

  3. 进入“/opt/client/HDFS/hadoop/hdfs-c-example”目录下,运行如下命令导入C客户端环境变量。

    cd /opt/client/HDFS/hadoop/hdfs-c-example

    source component_env_C_example

  4. 清除之前运行生成的目标文件和可执行文件,运行如下命令。

    make clean

    执行结果如下。

    [root@10-120-85-2 hdfs-c-example]# make clean
    rm -f hdfs_test.o
    rm -f hdfs_test
  5. 编译生成新的目标和可执行文件,运行如下命令。

    make(或make all)

    执行结果如下。

    [root@10-120-85-2 hdfs-c-example]# make all
    cc -c -I/opt/client/HDFS/hadoop/include -Wall -o hdfs_test.o hdfs_test.c
    cc -o hdfs_test hdfs_test.o -lhdfs
  6. 运行文件以实现创建文件、读写追加文件和删除文件的功能,运行如下命令。

    make run

    执行结果如下。

    [root@10-120-85-2 hdfs-c-example]# make run
    ./hdfs_test
    hdfsConnect- SUCCESS!
    hdfsCreateDirectory- SUCCESS! : /nativeTest
    hdfsOpenFile- SUCCESS! for write : /nativeTest/testfile.txt
    hdfsWrite- SUCCESS! : /nativeTest/testfile.txt
    Flushing file data ....
    hdfsCloseFile- SUCCESS! : /nativeTest/testfile.txt
    hdfsOpenFile- SUCCESS! for read : /nativeTest/testfile.txt
    hdfsRead- SUCCESS!, Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsSeek- SUCCESS!, Byte read : 21, File seek contant : S Native fi|²* rite! 
    hdfsCopy- SUCCESS!, File copied, src : /nativeTest/testfile.txt, des : /nativeTest/testfile1.txt 
    hdfsMove- SUCCESS!, File moved, src : /nativeTest/testfile1.txt, des : /nativeTest/testfile2.txt 
    hdfsRename- SUCCESS!, File renamed, Old name : /nativeTest/testfile2.txt, New name : /nativeTest/testfile3.txt 
    hdfsDelete- SUCCESS!, File deleted : /nativeTest/testfile3.txt
    hdfsSetReplication- SUCCESS!, Set replication 10 for /nativeTest/testfile.txt
    hdfsChown- SUCCESS!, Chown success for /nativeTest/testfile.txt
    hdfsChmod- SUCCESS!, Chmod success for /nativeTest/testfile.txt
    hdfsUtime- SUCCESS!, Set time success for /nativeTest/testfile.txt
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1480589792, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsGetPathInfo - SUCCESS!
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1480589792, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsListDirectory- SUCCESS!, /nativeTest
    hdfsTruncateFile- SUCCESS!, /nativeTest/testfile.txt
    Block Size : 134217728 
    hdfsGetDefaultBlockSize- SUCCESS!
    Block Size : 134217728 for file /nativeTest/testfile.txt
    hdfsGetDefaultBlockSizeAtPath- SUCCESS!
    HDFS Capacity : 1569475438758
    hdfsGetCapacity- SUCCESS!
    HDFS Used : 1122248
    hdfsGetCapacity- SUCCESS!
    hdfsExists- SUCCESS! /nativeTest/testfile.txt
    hdfsConfGetStr- SUCCESS : hdfs://hacluster 
    hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : 31, totalLocalBytesRead : 31, totalShortCircuitBytesRead : 0, totalZeroCopyBytesRead : 0
    [root@10-120-85-2 hdfs-c-example]# make run
    ./hdfs_test
    hdfsConnect- SUCCESS!
    hdfsCreateDirectory- SUCCESS! : /nativeTest
    hdfsOpenFile- SUCCESS! for write : /nativeTest/testfile.txt
    hdfsWrite- SUCCESS! : /nativeTest/testfile.txt
    Flushing file data ....
    hdfsCloseFile- SUCCESS! : /nativeTest/testfile.txt
    hdfsOpenFile- SUCCESS! for read : /nativeTest/testfile.txt
    hdfsRead- SUCCESS!, Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsSeek- SUCCESS!, Byte read : 21, File seek contant : S Native file write! 
    hdfsPread- SUCCESS!, Byte read : 10, File pead contant : S Native f  
    hdfsCopy- SUCCESS!, File copied, src : /nativeTest/testfile.txt, des : /nativeTest/testfile1.txt 
    hdfsMove- SUCCESS!, File moved, src : /nativeTest/testfile1.txt, des : /nativeTest/testfile2.txt 
    hdfsRename- SUCCESS!, File renamed, Old name : /nativeTest/testfile2.txt, New name : /nativeTest/testfile3.txt 
    hdfsDelete- SUCCESS!, File deleted : /nativeTest/testfile3.txt
    hdfsSetReplication- SUCCESS!, Set replication 10 for /nativeTest/testfile.txt
    hdfsChown- SUCCESS!, Chown success for /nativeTest/testfile.txt
    hdfsChmod- SUCCESS!, Chmod success for /nativeTest/testfile.txt
    hdfsUtime- SUCCESS!, Set time success for /nativeTest/testfile.txt
    
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1500345260, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsGetPathInfo - SUCCESS!
    
    Name: hdfs://hacluster/nativeTest/testfile.txt, Type: F, Replication: 10, BlockSize: 134217728, Size: 31, LastMod: 1500345260, Owner: root, Group: root, Permissions: 511 (rwxrwxrwx)
    hdfsListDirectory- SUCCESS!, /nativeTest
    hdfsTruncateFile- SUCCESS!, /nativeTest/testfile.txt
    Block Size : 134217728 
    hdfsGetDefaultBlockSize- SUCCESS!
    Block Size : 134217728 for file /nativeTest/testfile.txt
    hdfsGetDefaultBlockSizeAtPath- SUCCESS!
    HDFS Capacity : 102726873909
    hdfsGetCapacity- SUCCESS!
    HDFS Used : 4767076324
    hdfsGetCapacity- SUCCESS!
    hdfsExists- SUCCESS! /nativeTest/testfile.txt
    hdfsConfGetStr- SUCCESS : hdfs://hacluster 
    hdfsStreamBuilderBuild- SUCCESS! File read success. Byte read : 31, File contant : Hadoop HDFS Native file write! 
    hdfsFileGetReadStatistics- SUCCESS! totalBytesRead : 31, totalLocalBytesRead : 0, totalShortCircuitBytesRead : 0, totalZeroCopyBytesRead : 0
  7. 进入debug模式(可选)。

    make gdb

    执行该命令之前需要安装GDB,安装步骤可参考安装GDB

    执行结果如下。

    [root@10-120-85-2 hdfs-c-example]# make gdb
    gdb hdfs_test
    GNU gdb (GDB) SUSE (7.5.1-0.7.29)
    Copyright (C) 2012 Free Software Foundation, Inc.
    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
    This is free software: you are free to change and redistribute it.
    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
    and "show warranty" for details.
    This GDB was configured as "x86_64-suse-linux".
    For bug reporting instructions, please see:
    <http://www.gnu.org/software/gdb/bugs/>...
    Reading symbols from /opt/hadoop-client/HDFS/hadoop/hdfs-c-example/hdfs_test...done.
    (gdb) 
    [root@10-120-85-2 hdfs-c-example]# make gdb
    gdb hdfs_test
    GNU gdb (GDB) SUSE (7.5.1-0.7.29)
    Copyright (C) 2012 Free Software Foundation, Inc.
    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
    This is free software: you are free to change and redistribute it.
    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
    and "show warranty" for details.
    This GDB was configured as "x86_64-suse-linux".
    For bug reporting instructions, please see:
    <http://www.gnu.org/software/gdb/bugs/>...
    Reading symbols from /opt/client/HDFS/hadoop/hdfs-c-example/hdfs_test...done.
    (gdb)

安装GDB

  1. 下载GDB的依赖包termcap的源代码。

    wget https://ftp.gnu.org/gnu/termcap/termcap-1.3.1.tar.gz

  2. 解压termcap源码。

    tar -zxvf termcap-1.3.1.tar.gz

  3. 编译安装termcap。

    cd termcap-1.3.1/

    ./configure && make && make install

  4. 下载GDB源码。

    cd ~

    wget https://ftp.gnu.org/gnu/gdb/gdb-7.6.1.tar.gz

  5. 解压GDB源码

    tar -zxvf gdb-7.6.1.tar.gz

  6. 编译安装GDB。

    cd gdb-7.6.1/

    ./configure && make && make install

  7. 查看GDB是否安装成功。

    gdb --version

    打印出gdb版本信息即为安装成功。