Updated on 2022-11-18 GMT+08:00

Examples

Set Configuration parameters

To set up a connection between an HBase Client and the HBase Server, set the following parameters:

  • hbase.zookeeper.quorum: IP address of Zookeeper. If there are multiple Zookeeper nodes, separate multiple IP addresses by a comma (,).
  • hbase.zookeeper.property.clientPort: Port of Zookeeper.

The Configuration instance created by using HBaseConfiguration.create() will be automatically loaded with the configuration items in the following files:

  • core-default.xml
  • core-site.xml
  • hbase-default.xml
  • hbase-site.xml

Save these configuration files in Source Folder. To create a Source Folder, create a resource folder in the project, right-click the folder, and choose Mark Directory as > Resources Root.

The following table describes the parameters to be configured on the client.

Do not change the values of these parameters.

Parameter

Description

hbase.client.pause

Specifies the time to wait before a retry is performed when an exception occurs. The actual time is calculated based on this value and the number of retires.

hbase.client.retries.number

Specifies the number of retries to be performed when an exception occurs.

hbase.client.retries.longer.multiplier

This parameter is related to the number of retires.

hbase.client.rpc.maxattempts

Specifies the number of retires when the RPC request is not successfully sent.

hbase.regionserver.lease.period

This parameter (in ms) is related to the Scanner timeout period.

hbase.client.write.buffer

This parameter is invalid if AutoFlush is enabled. If AutoFlush is disabled, the HBase Client caches the data to be written. When the size of the data cached reaches the specified limit, the HBase Client initiates a write operation to the HBase cluster.

hbase.client.scanner.caching

Specifies the number of rows allowed for a next request during a Scan.

hbase.client.keyvalue.maxsize

Specifies the maximum value of a keyvalue.

hbase.htable.threads.max

Specifies the maximum number of threads related to data operations in an HTable instance.

hbase.client.prefetch.limit

Before reading or writing data, the client must obtain the address of the Region. Therefore, a client can have some Region addresses pre-cached. This parameter is related to the configuration of the number of Region addresses pre-cached.

Example:

hbaseConfig = HBaseConfiguration.create();
//You do not need to set the following parameters if they are specified in the configuration files.
hbaseConfig.set("hbase.zookeeper.quorum", "172.16.100.1,172.16.100.2,172.16.100.3");
hbaseConfig.set("hbase.zookeeper.property.clientPort", "2181");

Use HTablePool in multi-thread write operations

Use HTablePool for multiple data write threads. Observe the following when using HTablePool to perform multi-thread write operations:

  1. Enable multiple date write threads to share the same HTablePool instance.
  2. Specify maxSize of the HTableInterface instance when instantiating HTablePool. That is, instantiate the class using the following constructor function:

    public HTablePool(final Configuration config, final int maxSize)

    The value of maxSize can be determined based on Threads (the number of data write threads) and Tables (the number of user tables). Generally, maxSize cannot be greater than the product of Threads and Tables. (maxSize <= Threads x Tables)

  3. The client thread obtains an HTableInterface instance with the table name of tableName using HTablePool#getTable(tableName).
  4. An HTableInterface instance can be used by only one thread at a time.
  5. If HTableInterface is not used, call HTablePool#putTable(HTableInterface table) to release it.

Example:

/**
* A certain number of retries is required after a data write failure. The time to wait before each retry is determined based on the number of retries performed.
*/
private static final int[] RETRIES_WAITTIME = {1, 1, 1, 2, 2, 4, 4, 8, 16, 32};
/**
* Specify the number of retries.
*/
private static final int RETRIES = 10;
/**
* The unit of the time to wait after a failure.
*/
private static final int PAUSE_UNIT = 1000;
private static Configuration hadoopConfig;
private static HTablePool tablePool;
private static String[] tables;
/**
* <Initialize HTablePool>
* <Function description>
* @param config
* @see [class, class#method, class#member]
*/
public static void initTablePool()
{
DemoConfig config = DemoConfig.getInstance();
if (hadoopConfig == null)
{
hadoopConfig = HBaseConfiguration.create();
hadoopConfig.set("hbase.zookeeper.quorum", config.getZookeepers());
hadoopConfig.set("hbase.zookeeper.property.clientPort", config.getZookeeperPort());
}
if (tablePool == null)
{
tablePool = new HTablePool(hadoopConfig, config.getTablePoolMaxSize());
tables = config.getTables().split(",");
}
}
public void run()
{
// Initialize HTablePool. Initialize this instance only once because it is shared by multiple threads.
initTablePool();
for (;;)
{
Map<String, Object> data = DataStorage.takeList();
String tableName = tables[(Integer)data.get("table")];
List<Put> list = (List)data.get("list");
// Use Row as the Key and save all puts in the List. This set is used only for querying the data failed to be written when a write operation fails, because the Server only returns the Row of the data failed.
Map<byte[], Put> rowPutMap = null;
// Perform the operation again if it fails (even if some of the data failed to be written). Only the data failed to be written is submitted each time. 
INNER_LOOP :
for (int retry = 0; retry < RETRIES; retry++)
{
// Obtain an HTableInterface instance from HTablePool. Release the instance if it is not required. 
HTableInterface table = tablePool.getTable(tableName);
try
{
table.put(list);
// The operation is successful.
break INNER_LOOP;
}
catch (IOException e)
{
// If the exception type is RetriesExhaustedWithDetailsException, some of the data failed to be written. The exception occurs because the processes in the HBase cluster are abnormal or a large number of Regions are being migrated.
// If the exception type is not RetriesExhaustedWithDetailsException, insert all the data in the list again. 
if (e instanceof RetriesExhaustedWithDetailsException)
{
RetriesExhaustedWithDetailsException ree =
(RetriesExhaustedWithDetailsException)e;
int failures = ree.getNumExceptions();
System.out.println("In this operation, [" + failures + "] data records failed to be inserted.");
// Instantiate the Map when a retry is performed upon the first failure.
if (rowPutMap == null)
{
rowPutMap = new HashMap<byte[], Put>(failures);
for (int m = 0; m < list.size(); m++)
{
Put put = list.get(m);
rowPutMap.put(put.getRow(), put);
}
}
//Clear the original data and then add the data failed to be written. 
list.clear();
for (int m = 0; m < failures; m++)
{
list.add(rowPutMap.get(ree.getRow(m)));
}
}
}
finally
{
// Release the instance after using it.
tablePool.putTable(table);
}
// If an exception occurs, wait some time after releasing the HTableInterface instance. 
try
{
sleep(getWaitTime(retry));
}
catch (InterruptedException e1)
{
System.out.println("Interruped");
}
}
}
}

Create a Put instance

HBase is a column-oriented database. One column of data may correspond to multiple column families, and one column family may correspond to multiple columns. Before data is written, the column (column family name and column name) must be specified.

A Put instance must be created before a row of data is written in an HBase table. The Put instance data consists of (Key, Value). The Value can contain multiple columns of values.

When a (Key, Value) record is added to a Put instance, the family, qualifier, and value added are byte sets. Use the Bytes.toBytes method to convert character strings to byte sets. Do not use the String.toBytes method, because this method cannot ensure correct data coding. Errors occur when the Key or Value contains Chinese characters.

Example:

//The column family name is privateInfo.
private final static byte[] FAMILY_PRIVATE = Bytes.toBytes("privateInfo");
//The privateInfo column family has two columns: "name" and "address".
private final static byte[] COLUMN_NAME = Bytes.toBytes("name");
private final static byte[] COLUMN_ADDR = Bytes.toBytes("address");
/**
* <Create a Put instance. >
* <A put instance with one column family and two columns of data is created. >
* @param rowKey Key key value
* @param name name
* @param address address
* @return
* @see [class, class#method, class#member]
*/
public Put createPut(String rowKey, String name, String address)
{
Put put = new Put(Bytes.toBytes(rowKey));
put.add(FAMILY_PRIVATE, COLUMN_NAME, Bytes.toBytes(name));
        put.add(FAMILY_PRIVATE, COLUMN_ADDR, Bytes.toBytes(address));
return put;
}

Create an HBaseAdmin instance

Example:

private Configuration demoConf = null;
private HBaseAdmin hbaseAdmin = null;
/**
* <Constructor function>
* Import the Configuration instances. 
*/
public HBaseAdminDemo(Configuration conf)
{
this.demoConf = conf;
try
{
// Instantiate HBaseAdmin
hbaseAdmin = new HBaseAdmin(this.demoConf);
}
catch (MasterNotRunningException e)
{
e.printStackTrace();
}
catch (ZooKeeperConnectionException e)
{
e.printStackTrace();
}
}
/**
* <Examples of method using>
* <For details about more methods, see the HBase interface documents. >
* @throws IOException
* @throws ZooKeeperConnectionException
* @throws MasterNotRunningException
* @see [Class, class#method, class#member]
*/
public void demo() throws MasterNotRunningException, ZooKeeperConnectionException, IOException
{
byte[] regionName = Bytes.toBytes("mrtest,jjj,1315449869513.fc41d70b84e9f6e91f9f01affdb06703.");
byte[] encodeName = Bytes.toBytes("fc41d70b84e9f6e91f9f01affdb06703");
// Reallocate a Region.
hbaseAdmin.unassign(regionName, false);
// Actively initiate Balance.
hbaseAdmin.balancer();
// Move a Region. The second parameter is HostName+StartCode of RegionServer, for example,
// host187.example.com,60020,1289493121758. If this parameter is set to null, the Region will be moved at random.
hbaseAdmin.move(encodeName, null);
// Check whether a table exists. 
hbaseAdmin.tableExists("tableName");
// Check whether a table is activated. 
hbaseAdmin.isTableEnabled("tableName");
}
/**
* <Method used to rapidly create a table >
* <Create an HTableDescriptor instance, which contains description of the HTable to be creased. Create the column families, which are associated with the HColumnDescriptor instance. In this example, the column family name is "columnName">.
* @param tableName table name
* @return
* @see [Class, class#method, class#member]
*/
public boolean createTable(String tableName)
{
try {
if (hbaseAdmin.tableExists(tableName)) {
return false;
}
HTableDescriptor tableDesc = new HTableDescriptor(tableName);
HColumnDescriptor fieldADesc = new HColumnDescriptor("columnName".getBytes());
fieldADesc.setBlocksize(640 * 1024);
tableDesc.addFamily(fieldADesc);
hbaseAdmin.createTable(tableDesc);
} catch (Exception e) {
e.printStackTrace();
return false;
}
return true;
}