数据下载的消费模式
同Kafka类似,当前dis kafka adapter支持三种消费模式。
assign模式
由用户手动指定consumer实例消费哪些具体分区,此时不会拥有group management机制,也就是当group内消费者数量变化或者通道扩缩容的时候不会有重新分配分区的行为发生。代码样例如下所示:
package com.huaweicloud.dis.demo.adapter; import com.huaweicloud.dis.DISConfig; import com.huaweicloud.dis.adapter.kafka.clients.consumer.*; import com.huaweicloud.dis.adapter.kafka.common.PartitionInfo; import com.huaweicloud.dis.adapter.kafka.common.TopicPartition; import com.huaweicloud.dis.adapter.kafka.common.serialization.StringDeserializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties; public class DISKafkaConsumerAssignDemo { private static final Logger LOGGER = LoggerFactory.getLogger(DISKafkaConsumerAssignDemo.class); public static void main(String[] args) { // 认证用的ak和sk直接写到代码中有很大的安全风险,建议在配置文件或者环境变量中密文存放,使用时解密,确保安全; // 本示例以ak和sk保存在环境变量中来实现身份验证为例,运行本示例前请先在本地环境中设置环境变量HUAWEICLOUD_SDK_AK和HUAWEICLOUD_SDK_SK 。 String ak = System.getenv("HUAWEICLOUD_SDK_AK"); String sk = System.getenv("HUAWEICLOUD_SDK_SK"); // YOU ProjectId String projectId = "YOU_PROJECT_ID"; // YOU DIS Stream String streamName = "YOU_STREAM_NAME"; // 消费组ID,用于记录offset String groupId = "YOU_GROUP_ID"; // DIS region String region = "your region"; Properties props = new Properties(); props.setProperty(DISConfig.PROPERTY_AK, ak); props.setProperty(DISConfig.PROPERTY_SK, sk); props.setProperty(DISConfig.PROPERTY_PROJECT_ID, projectId); props.setProperty(DISConfig.PROPERTY_REGION_ID, region); props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.LATEST.name()); // 默认情况下不需要设置endpoint,会自动使用域名访问;如需使用指定的endpoint,解除如下注释并设置endpoint即可 // props.setProperty(DISConfig.PROPERTY_ENDPOINT, "https://dis-${region}.myhuaweicloud.com"); Consumer<String, String> consumer = new DISKafkaConsumer<>(props); List<TopicPartition> topicPartitions = new ArrayList<>(); for (PartitionInfo partitionInfo : consumer.partitionsFor(streamName)) { topicPartitions.add(new TopicPartition(partitionInfo.topic(), partitionInfo.partition())); } // 使用assign模式,指定需要消费的分区 consumer.assign(topicPartitions); while (true) { try { ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE); if (!records.isEmpty()) { for (TopicPartition partition : records.partitions()) { List<ConsumerRecord<String, String>> partitionRecords = records.records(partition); for (ConsumerRecord<String, String> record : partitionRecords) { LOGGER.info("Value [{}], Partition [{}], Offset [{}], Key [{}]", record.value(), record.partition(), record.offset(), record.key()); } } // 数据处理完成之后异步提交当前offset(也可使用同步提交commitSync) consumer.commitAsync(new OffsetCommitCallback() { @Override public void onComplete(Map<TopicPartition, OffsetAndMetadata> map, Exception e) { if (e == null) { LOGGER.debug("Success to commit offset [{}]", map); } else { LOGGER.error("Failed to commit offset [{}]", e.getMessage(), e); } } }); } } catch (Exception e) { LOGGER.info(e.getMessage(), e); } } } }
执行如上程序之后,如果有数据发送到通道中,此时会打印如下日志。
09:36:45.071 INFO c.h.d.a.k.c.DISKafkaConsumer - create DISKafkaConsumer successfully 09:36:49.842 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[sync]. 0], Partition [0], Offset [134], Key [769066] 09:36:49.963 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[sync]. 1], Partition [0], Offset [135], Key [700005] 09:36:50.145 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[sync]. 2], Partition [0], Offset [136], Key [338044] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[sync]. 3], Partition [0], Offset [137], Key [537495] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[sync]. 4], Partition [0], Offset [138], Key [980016] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[async]. 0], Partition [0], Offset [139], Key [182772] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[async]. 1], Partition [0], Offset [140], Key [830271] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[async]. 2], Partition [0], Offset [141], Key [927054] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[async]. 3], Partition [0], Offset [142], Key [268122] 09:36:51.093 INFO c.h.d.d.a.DISKafkaConsumerAssignDemo - Value [Hello world[async]. 4], Partition [0], Offset [143], Key [992787]
subscribe 模式
用户指定通道名称即可,无需指定具体分区,服务端会根据消费者的数量变化或者通道扩缩容变化,触发group management机制,自动给每一个消费者分配分区,保证一个分区只会被一个消费者消费。
代码样例如下所示:
package com.huaweicloud.dis.demo.adapter; import com.huaweicloud.dis.DISConfig; import com.huaweicloud.dis.adapter.kafka.clients.consumer.*; import com.huaweicloud.dis.adapter.kafka.common.TopicPartition; import com.huaweicloud.dis.adapter.kafka.common.serialization.StringDeserializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Properties; public class DISKafkaConsumerSubscribeDemo { private static final Logger LOGGER = LoggerFactory.getLogger(DISKafkaConsumerSubscribeDemo.class); public static void main(String[] args) { // 认证用的ak和sk直接写到代码中有很大的安全风险,建议在配置文件或者环境变量中密文存放,使用时解密,确保安全; // 本示例以ak和sk保存在环境变量中来实现身份验证为例,运行本示例前请先在本地环境中设置环境变量HUAWEICLOUD_SDK_AK和HUAWEICLOUD_SDK_SK 。 String ak = System.getenv("HUAWEICLOUD_SDK_AK"); String sk = System.getenv("HUAWEICLOUD_SDK_SK"); // YOU ProjectId String projectId = "YOU_PROJECT_ID"; // YOU DIS Stream String streamName = "YOU_STREAM_NAME"; // 消费组ID,用于记录offset String groupId = "YOU_GROUP_ID"; // DIS region String region = "your region"; Properties props = new Properties(); props.setProperty(DISConfig.PROPERTY_AK, ak); props.setProperty(DISConfig.PROPERTY_SK, sk); props.setProperty(DISConfig.PROPERTY_PROJECT_ID, projectId); props.setProperty(DISConfig.PROPERTY_REGION_ID, region); props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.LATEST.name()); // 默认情况下不需要设置endpoint,会自动使用域名访问;如需使用指定的endpoint,解除如下注释并设置endpoint即可 // props.setProperty(DISConfig.PROPERTY_ENDPOINT, "https://dis-${region}.myhuaweicloud.com"); Consumer<String, String> consumer = new DISKafkaConsumer<>(props); // 使用subscribe模式,指定需要消费的通道名即可 consumer.subscribe(Collections.singleton(streamName)); while (true) { try { ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE); if (!records.isEmpty()) { for (TopicPartition partition : records.partitions()) { List<ConsumerRecord<String, String>> partitionRecords = records.records(partition); for (ConsumerRecord<String, String> record : partitionRecords) { LOGGER.info("Value [{}], Partition [{}], Offset [{}], Key [{}]", record.value(), record.partition(), record.offset(), record.key()); } } // 数据处理完成之后异步提交当前offset(也可使用同步提交commitSync) consumer.commitAsync(new OffsetCommitCallback() { @Override public void onComplete(Map<TopicPartition, OffsetAndMetadata> map, Exception e) { if (e == null) { LOGGER.debug("Success to commit offset [{}]", map); } else { LOGGER.error("Failed to commit offset [{}]", e.getMessage(), e); } } }); } } catch (Exception e) { LOGGER.info(e.getMessage(), e); } } } }
程序启动后,会每10s发起心跳请求(Heartbeat),然后发起加入消费组的请求(JoinGroup),服务端此时会开始给此消费组中的消费者分配分区,此过程大约需要等待20s,完成之后消费者会发起同步请求(SyncGroup)获取分配结果,等日志中输出Heartbeat {"state":"STABLE"}的信息,表示整个消费组都完成分配,可以正常消费数据了。
此过程的关键日志说明如下
- Heartbeat {"state":"JOINING"}
Heartbeat表示心跳请求,每10s发起一次,用于和服务端保持连接。如果超过1分钟服务端没有收到心跳,会认为消费端已离线,消费组会重新分配。若心跳结果为JOINING表示消费者需要重新加入消费组,若为STABLE表示消费组稳定。
- JoinGroup
如果Heartbeat的结果不为STABLE,则消费者会发起joinGroup的请求,通知服务端自己要加入消费组,服务端收到客户端的join请求之后,会将消费组重新分配,此时返回一个syncDelayedTimeMs,告诉客户端分配需要多久完成,客户端可以等待syncDelayedTimeMs之后,再发起同步请求(SyncGroup)获取分配结果
- SyncGroup
此请求用于获取分配结果,返回的assignment中即为消费者需要消费的通道名和分区
执行样例程序,等待消费组分配完成之后,发送数据到通道,完整的日志如下
09:42:37.296 INFO c.h.d.a.k.c.DISKafkaConsumer - create DISKafkaConsumer successfully 09:42:37.354 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"JOINING"} 09:42:37.363 INFO c.h.d.a.k.c.Coordinator - joinGroupRequest {"groupId":"ding","clientId":"consumer-c2d43144-0823-4eea-aaa8-7af95c536144","interestedStream":["liuhao12"]} 09:42:37.406 INFO c.h.d.a.k.c.Coordinator - joinGroupResponse {"state":"OK","syncDelayedTimeMs":21000} 09:42:58.408 INFO c.h.d.a.k.c.Coordinator - syncGroup {"groupId":"ding","clientId":"consumer-c2d43144-0823-4eea-aaa8-7af95c536144","generation":-1} 09:42:58.451 INFO c.h.d.a.k.c.Coordinator - syncGroup {"state":"OK","generation":33,"assignment":{"dis-test":[0]}} 09:42:58.488 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"STABLE"} 09:43:08.960 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"STABLE"} 09:46:56.227 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[sync]. 0], Partition [0], Offset [144], Key [799704] 09:46:56.327 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[sync]. 1], Partition [0], Offset [145], Key [683757] 09:46:56.449 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[sync]. 2], Partition [0], Offset [146], Key [439062] 09:46:56.535 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[sync]. 3], Partition [0], Offset [147], Key [374939] 09:46:56.654 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[sync]. 4], Partition [0], Offset [148], Key [321528] 09:46:56.749 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[async]. 0], Partition [0], Offset [149], Key [964841] 09:46:56.749 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[async]. 1], Partition [0], Offset [150], Key [520262] 09:46:56.749 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[async]. 2], Partition [0], Offset [151], Key [619119] 09:46:56.749 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[async]. 3], Partition [0], Offset [152], Key [257094] 09:46:56.749 INFO c.h.d.d.a.DISKafkaConsumerSubscribeDemo - Value [Hello world[async]. 4], Partition [0], Offset [153], Key [310331]
subscribePattern 模式
subscribePattern是在subscribe的基础上,用户不用指定具体的通道名称而是使用通配符,例如stream.* 表示会消费 stream1, stream2, stream_123等等。已有/新增/删除的通道,只要匹配通配符,就可被消费组消费。
代码样例如下所示:
package com.huaweicloud.dis.demo.adapter; import com.huaweicloud.dis.DISConfig; import com.huaweicloud.dis.adapter.kafka.clients.consumer.*; import com.huaweicloud.dis.adapter.kafka.common.TopicPartition; import com.huaweicloud.dis.adapter.kafka.common.serialization.StringDeserializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.regex.Pattern; public class DISKafkaConsumerSubscribePatternDemo { private static final Logger LOGGER = LoggerFactory.getLogger(DISKafkaConsumerSubscribePatternDemo.class); public static void main(String[] args) { // 认证用的ak和sk直接写到代码中有很大的安全风险,建议在配置文件或者环境变量中密文存放,使用时解密,确保安全; // 本示例以ak和sk保存在环境变量中来实现身份验证为例,运行本示例前请先在本地环境中设置环境变量HUAWEICLOUD_SDK_AK和HUAWEICLOUD_SDK_SK 。 String ak = System.getenv("HUAWEICLOUD_SDK_AK"); String sk = System.getenv("HUAWEICLOUD_SDK_SK"); // YOU ProjectId String projectId = "YOU_PROJECT_ID"; // YOU DIS Stream String streamName = "YOU_STREAM_NAME"; // 消费组ID,用于记录offset String groupId = "YOU_GROUP_ID"; // DIS region String region = "your region"; Properties props = new Properties(); props.setProperty(DISConfig.PROPERTY_AK, ak); props.setProperty(DISConfig.PROPERTY_SK, sk); props.setProperty(DISConfig.PROPERTY_PROJECT_ID, projectId); props.setProperty(DISConfig.PROPERTY_REGION_ID, region); props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId); props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.LATEST.name()); // 默认情况下不需要设置endpoint,会自动使用域名访问;如需使用指定的endpoint,解除如下注释并设置endpoint即可 // props.setProperty(DISConfig.PROPERTY_ENDPOINT, "https://dis-${region}.myhuaweicloud.com"); Consumer<String, String> consumer = new DISKafkaConsumer<>(props); // 使用subscribePattern模式,指定通配符即可 consumer.subscribe(Pattern.compile(streamNamePattern), new ConsumerRebalanceListener() { @Override public void onPartitionsRevoked(Collection<TopicPartition> collection) { LOGGER.info("onPartitionsRevoked [{}]", collection); } @Override public void onPartitionsAssigned(Collection<TopicPartition> collection) { LOGGER.info("onPartitionsAssigned [{}]", collection); } }); while (true) { try { ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE); if (!records.isEmpty()) { for (TopicPartition partition : records.partitions()) { List<ConsumerRecord<String, String>> partitionRecords = records.records(partition); for (ConsumerRecord<String, String> record : partitionRecords) { LOGGER.info("Value [{}], Partition [{}], Offset [{}], Key [{}]", record.value(), record.partition(), record.offset(), record.key()); } } // 数据处理完成之后异步提交当前offset(也可使用同步提交commitSync) consumer.commitAsync(new OffsetCommitCallback() { @Override public void onComplete(Map<TopicPartition, OffsetAndMetadata> map, Exception e) { if (e == null) { LOGGER.debug("Success to commit offset [{}]", map); } else { LOGGER.error("Failed to commit offset [{}]", e.getMessage(), e); } } }); } } catch (Exception e) { LOGGER.info(e.getMessage(), e); } } } }
程序启动之后,仍然需要等待约20s,服务端分配完成之后,才可以开始消费数据,样例代码执行的日志如下所示
10:10:36.420 INFO c.h.d.a.k.c.DISKafkaConsumer - create DISKafkaConsumer successfully 10:10:36.481 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"JOINING"} 10:10:36.486 INFO c.h.d.a.k.c.Coordinator - joinGroupRequest {"groupId":"ding","clientId":"consumer-cad967ba-70ab-4e02-b184-f60b95fe3256","streamPattern":"stream.*"} 10:10:36.697 INFO c.h.d.a.k.c.Coordinator - joinGroupResponse {"state":"OK","subscription":["stream_hello","stream_world"],"syncDelayedTimeMs":21000} 10:10:57.699 INFO c.h.d.a.k.c.Coordinator - syncGroup {"groupId":"ding","clientId":"consumer-cad967ba-70ab-4e02-b184-f60b95fe3256","generation":-1} 10:10:57.746 INFO c.h.d.a.k.c.Coordinator - syncGroup {"state":"OK","generation":34,"assignment":{"stream_hello":[0],"stream_world":[0]}} 10:10:57.770 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - onPartitionsAssigned [[stream_hello-0, stream_world-0]] 10:10:57.770 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"STABLE"} 10:11:08.466 INFO c.h.d.a.k.c.Coordinator - Heartbeat {"state":"STABLE"} 10:11:09.992 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - Value [Hello world[sync]. 0], Partition [0], Offset [154], Key [181881] 10:11:09.993 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - Value [Hello world[sync]. 1], Partition [0], Offset [155], Key [483023] 10:11:09.993 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - Value [Hello world[sync]. 2], Partition [0], Offset [156], Key [32453] 10:11:10.093 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - Value [Hello world[sync]. 3], Partition [0], Offset [157], Key [111948] 10:11:10.180 INFO c.h.d.d.a.DISKafkaConsumerSubscribePatternDemo - Value [Hello world[sync]. 4], Partition [0], Offset [158], Key [822860]