Updated on 2024-10-23 GMT+08:00

Synchronizing HBase Data from Spark to CarbonData (Java)

The following is a Scala example in which Spark accesses two HBase sample programs at the same time.

The following code snippets are used as an example.

For complete code, see com.huawei.spark.examples.HBaseExternalHivetoCarbon.

public static void main(String[] args) throws Exception {
spark = SparkSession.builder().appName("HBaseExternalHiveToCarbon").getOrCreate();

Timer timer = new Timer();
timer.schedule(new TimerTask() {
public void run() {
timeEnd = timeStart + TIMEWINDOW;

queryTimeStart = transferDateToStr(timeStart);
queryTimeEnd = transferDateToStr(timeEnd);

//run delete logic
cmdsb = new StringBuilder();
cmdsb.append("delete from ")
.append(carbonTableName)
.append("  where key in (select key from ")
.append(externalHiveTableName)
.append(" where modify_time>'")
.append(queryTimeStart)
.append("' and modify_time<'")
.append(queryTimeEnd)
.append("' and valid='0')");
spark.sql(cmdsb.toString());

//run insert logic
cmdsb = new StringBuilder();
cmdsb.append("insert into ")
.append(carbonTableName)
.append("  select * from ")
.append(externalHiveTableName)
.append(" where modify_time>'")
.append(queryTimeStart)
.append("'  and modify_time<'")
.append(queryTimeEnd)
.append("'  and valid='1'");
spark.sql(cmdsb.toString());

timeStart = timeEnd;
}
}, TIMEWINDOW, TIMEWINDOW);
}