Updated on 2023-06-21 GMT+08:00

Creating a Dump Task

Initialize a DIS SDK client instance named dic. For details, see Initializing a DIS SDK Client Instance.

When using the DIS SDK to create a dump task, you need to specify the stream name, dump task name, dump interval, and dump destination.

Creating an OBS Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams can be created on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
OBSDestinationDescriptorRequest descriptor = new OBSDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure the OBS bucket name and folder name. You can create OBS buckets and files on the OBS console or client.
descriptor.setObsBucketPath("obs-dis");
descriptor.setFilePrefix("transfertask");

//Configure the dump interval that is expressed in units of seconds.
descriptor.setDeliverTimeInterval(900);

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//(Optional) Configure the dump file format. By default, the value is Text. Other available options are Parquet and CarbonData.
descriptor.setDestinationFileType(DestinationFileTypeEnum.TEXT.getType());

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setObsDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);

Create an MRS Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
MRSDestinationDescriptorRequest descriptor = new MRSDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure the MRS cluster information about the cluster name and ID. You can create and query cluster information on the MRS console. The cluster must be in non-security mode.
descriptor.setMrsClusterName("mrs_dis");
descriptor.setMrsClusterId("fe69a732-c7d3-4b0f-8cda-ec9eca0cf141");

//Configure the OBS bucket and folder that is used to temporarily store the data to be dumped to MRS and the dump failure data. You can create OBS buckets and folders on the OBS console.
descriptor.setObsBucketPath("obs-dis");
descriptor.setFilePrefix("transfertask");

//Configure the dump interval that is expressed in units of seconds.
descriptor.setDeliverTimeInterval(900);

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//(Optional) Configure the dump file format. By default, the value is Text. Other available options are Parquet and CarbonData.
descriptor.setDestinationFileType(DestinationFileTypeEnum.TEXT.getType());

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setMrsDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);

Creating a DLI Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
UqueryDestinationDescriptorRequest descriptor = new UqueryDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure DLI information about the database and internal table names. You can create and query DLI on the DLI console. The DLI table must be the internal table.
descriptor.setDliDatabaseName("dis_dli");
descriptor.setDliTableName("dis_test");

//Configure the OBS bucket and folder that is used to temporarily store the data to be dumped to DLI and the dump failure data. You can create OBS buckets and folders on the OBS console or client.
descriptor.setObsBucketPath("obs-dis");
descriptor.setFilePrefix("transfertask");

//Configure the dump interval that is expressed in units of seconds.
descriptor.setDeliverTimeInterval(900);

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setDliDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);

Creating a DWS Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
DwsDestinationDescriptorRequest descriptor = new DwsDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure the DWS cluster information about the cluster name, ID, and database. You can create and query clusters on the DWS console, and create tables using its client or other methods.
descriptor.setDwsClusterName("dis_test");
descriptor.setDwsClusterId("92f90f6a-de4d-4689-82f6-320c328b0062");
descriptor.setDwsDatabaseName("postgres");
descriptor.setDwsSchema("dbadmin");
descriptor.setDwsTableName("distable01");
descriptor.setDwsDelimiter("|");
descriptor.setUserName("dbadmin");
descriptor.setUserPassword("xxxx");

//Call KMS to encrypt the DWS password to keep user data secure. You can create and query KMS on the KMS console.
descriptor.setKmsUserKeyName("qiyinshan");
descriptor.setKmsUserKeyId("9521c600-64a8-4971-ad36-7bbfa6d00c41");

//Configure the OBS bucket and folder that is used to temporarily store the data to be dumped to DWS and the dump failure data. You can create OBS buckets and folders on the OBS console or client.
descriptor.setObsBucketPath("obs-dis");
descriptor.setFilePrefix("transfertask");

//Configure the dump interval that is expressed in units of seconds.
descriptor.setDeliverTimeInterval(900);

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setDwsDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);

Creating a CloudTable Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
CloudtableDestinationDescriptorRequest descriptor = new CloudtableDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure the CloudTable cluster information about the cluster name, ID, and database. You can create and query clusters on the CloudTable console, and create tables using its client or other methods.
descriptor.setCloudtableClusterName("dis_test");
descriptor.setCloudtableClusterId("92f90f6a-de4d-4689-82f6-320c328b0062");
descriptor.setCloudtableTableName("dis");

//To configure CloudtableSchema, see CloudTable User Guide.
CloudtableSchema cloudtableSchema = new CloudtableSchema();
List<SchemaField> rowKeySchema = new ArrayList<>();
SchemaField field1 = new SchemaField();
field1.setValue("id");
field1.setType("String");
rowKeySchema.add(field1);
SchemaField rField1 = new SchemaField();
rField1.setValue("group.users.id");
rField1.setType("String");
rowKeySchema.add(rField1);
List<SchemaField> columnsSchema = new ArrayList<>();
SchemaField field2 = new SchemaField();
field2.setColumnFamilyName("user");
field2.setQualifierName("id");
field2.setValue("group.users.id");
field2.setType("String");
SchemaField field3 = new SchemaField();
field3.setColumnFamilyName("user");
field3.setQualifierName("age");
field3.setValue("group.users.age");
field3.setType("Int");
columnsSchema.add(field2);
columnsSchema.add(field3);
cloudtableSchema.setRowKeySchema(rowKeySchema);
cloudtableSchema.setColumnsSchema(columnsSchema);
descriptor.setCloudtableSchema(cloudtableSchema);

//Configure the OBS bucket and folder that is used to store dump failure data. You can create OBS buckets and folders on the OBS console or client.
descriptor.setObsBackupBucketPath("obs-dis");
descriptor.setBackupfilePrefix("transfertask");

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setCloudtableDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);

Creating a CloudTable OpenTSDB Dump Task

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
CreateTransferTaskRequest request = new CreateTransferTaskRequest();

//Configure the stream name. You can create streams on the DIS console.
request.setStreamName(streamName);

//Configure the dump task name.
CloudtableDestinationDescriptorRequest descriptor = new CloudtableDestinationDescriptorRequest();
descriptor.setTransferTaskName(taskName);

//Configure the CloudTable OpenTSDB cluster information about the cluster name, ID, and database. You can create and query clusters on the CloudTable console, and create tables using its client or other methods.
descriptor.setCloudtableClusterName("dlf_test");
descriptor.setCloudtableClusterId("92f90f6a-de4d-4689-82f6-320c328b0062");

//To configure OpenTSDBSchema, see CloudTable User Guide.
List<SchemaField> metricSchema = new ArrayList<>();
SchemaField field1 = new SchemaField();
field1.setValue("group.users.id");
field1.setType("String");
metricSchema.add(field1);
SchemaField timestampSchema = new SchemaField();
timestampSchema.setColumnFamilyName("user");
timestampSchema.setFormat("yyyy/MM/dd HH:mm:ss");
timestampSchema.setValue("group.users.birthday");
timestampSchema.setType("String");
SchemaField valueSchema = new SchemaField();
valueSchema.setValue("group.users.age");
valueSchema.setType("Int");
List<SchemaField> tagsSchema = new ArrayList<>();
SchemaField field2 = new SchemaField();
field2.setName("group.users.id");
field2.setValue("group.users.id");
field2.setType("String");
SchemaField field3 = new SchemaField();
field3.setName("age");
field3.setValue("group.users.age");
field3.setType("Int");
tagsSchema.add(field2);
tagsSchema.add(field3);
OpenTSDBSchema openTSDBSchema = new OpenTSDBSchema();
openTSDBSchema.setMetricSchema(metricSchema);
openTSDBSchema.setTimestampSchema(timestampSchema);
openTSDBSchema.setValueSchema(valueSchema);
openTSDBSchema.setTagsSchema(tagsSchema);
List<OpenTSDBSchema> openTSDBSchemaList = new ArrayList<>();
openTSDBSchemaList.add(openTSDBSchema);
descriptor.setOpentsdbSchema(openTSDBSchemaList);

//Configure the OBS bucket and folder that is used to store dump failure data. You can create OBS buckets and folders on the OBS console or client.
descriptor.setObsBackupBucketPath("obs-dis");
descriptor.setBackupfilePrefix("transfertask");

//(Optional) Create an IAM agency named dis_admin_agency on the DIS management page and use it to access specific cloud services. For the first time to create an IAM agency, you have to authorize it.
descriptor.setAgencyName("dis_admin_agency");

//Configure the initial offset when data is pulled from the DIS stream. The value can be LATEST or TRIM_HORIZON. LATEST is the default value, indicating that data is read from the latest uploaded records in the stream. TRIM_HORIZON indicates that data is read from the earliest unexpired records in the stream.
descriptor.setConsumerStrategy(PartitionCursorTypeEnum.LATEST.name());

request.setCloudtableDestinationDescriptor(descriptor);

After configuring CreateTransferTaskRequest, you can call the createTransferTask method to create the dump task.

1
dic.createTransferTask(request);