更新时间:2025-12-10 GMT+08:00
分享

配置SQL作业源表并行度

本章节适用于MRS 3.6.0-LTS及以后版本。

在SQL作业中,可以通过“scan.parallelism”参数设置自定义并行度,以调整作业性能。

  • SQL示例:
    CREATE TABLE KafkaSource (
      `user_id` VARCHAR,
      `user_name` VARCHAR,
      `age` INT
    ) WITH (
      'connector' = 'kafka',
      'topic' = 'test_source',
      'properties.bootstrap.servers' = 'Kafka的Broker实例业务IP:Kafka端口号',
      'properties.group.id' = 'testGroup',
      'scan.startup.mode' = 'earliest-offset',
      'scan.parallelism' = '2',
      'format' = 'csv'
    );
    CREATE TABLE KafkaSink(
      `user_id` VARCHAR,
      `user_name` VARCHAR,
      `age` INT
    ) WITH (
      'connector' = 'kafka',
      'topic' = 'test_sink',
      'properties.bootstrap.servers' = 'Kafka的Broker实例业务IP:Kafka端口号',
      'value.format' = 'csv'
    );
    Insert into
      KafkaSink
    select
      *
    from
      KafkaSource;
  • hints方式示例:
    Insert into
      KafkaSink
    select
      *
    from
      KafkaSource /*+ OPTIONS('scan.parallelism'='2') */;

相关文档