更新时间:2026-06-12 GMT+08:00
分享

Flink场景

MOR表参数简化并托管表服务

create table hudi_mor (业务字段)
with (
'connector'='hudi',
'path' = 'hdfs://hacluster/tmp/hudi/stream_mor',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field'='主键字段',
'hoodie.bucket.index.num.buckets'='20',
'hoodie.managed.by.ldms'='true',
'hive_sync.enable' = 'true',
'hive_sync.table' = '要同步到Hive的表名',
'hive_sync.db' = '要同步到Hive的数据库名',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'Hive客户端hive-site.xml文件中hive.metastore.uris的值',
'properties.hive.metastore.kerberos.principal' = 'Hive客户端hive-site.xml文件中hive.metastore.kerberos.principal的值'
)

Flink写MOR表的场景,如果MRS集群有LDMS服务,则可以在Sink Hudi表的With属性里添加hoodie.managed.by.ldms配置(必须同时配置Hive同步),它会自动设置当前MOR表只生成Compaction计划并关闭Clean和Archive,交给LDMS服务来托管该表的Compaction/Clean/Archive维护操作。并且它还会自动补充以下参数去设置write.precombine.field属性为主键字段(联合主键会取第一个作为write.precombine.field)、设置索引为BUCKET、设置写入方式为LSM、设置Parquet文件的压缩格式为zstd以及开启Log文件的Log Index,这些能力会提升读写性能和存储优化。上述建表语句实现的效果等价于:

create table hudi_mor (业务字段)
with (
'connector'='hudi',
'path' = 'hdfs://hacluster/tmp/hudi/stream_mor',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field'='主键字段',
'write.precombine.field'='主键字段',
'index.type'='BUCKET',
'hoodie.bucket.index.num.buckets'='20',
'hoodie.managed.by.ldms'='true',
'hoodie.lsm.style'='true',
'hoodie.parquet.compression.codec'='zstd',
'hoodie.log.index.enable'='true',
'hoodie.datasource.write.hive_style_partitioning'='true',
'compaction.delta_commits'='10',
'compaction.async.enabled'='false',
'compaction.schedule.enabled'='true',
'clean.async.enabled'='false',
'hoodie.clean.automatic'='false',
'hoodie.archive.automatic'='false',
'hive_sync.enable' = 'true',
'hive_sync.table' = '要同步到Hive的表名',
'hive_sync.db' = '要同步到Hive的数据库名',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'Hive客户端hive-site.xml文件中hive.metastore.uris的值',
'properties.hive.metastore.kerberos.principal' = 'Hive客户端hive-site.xml文件中hive.metastore.kerberos.principal的值'
)

COW表参数简化并托管表服务

create table hudi_cow (业务字段,PRIMARY KEY (主键字段) NOT ENFORCED)
with (
'connector'='hudi',
'path' = 'hdfs://hacluster/tmp/hudi/stream_cow',
'hoodie.managed.by.ldms'='true',
'hive_sync.enable' = 'true',
'hive_sync.table' = '要同步到Hive的表名',
'hive_sync.db' = '要同步到Hive的数据库名',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'Hive客户端hive-site.xml文件中hive.metastore.uris的值',
'properties.hive.metastore.kerberos.principal' = 'Hive客户端hive-site.xml文件中hive.metastore.kerberos.principal的值'
)

Flink写COW表的场景,如果MRS集群有LDMS服务,则可以在Sink Hudi表的With属性里添加hoodie.managed.by.ldms配置(必须同时配置Hive同步),它会自动关闭COW表的Clean和Archive,交给LDMS服务来托管该表的Clean和Archive维护操作。并且它还会自动补充以下参数去设置write.precombine.field属性为主键字段(联合主键会取第一个作为write.precombine.field)、设置索引为MEMORY、设置Parquet文件的压缩格式为zstd。上述建表语句实现的效果等价于:

create table hudi_cow (业务字段,PRIMARY KEY (主键字段) NOT ENFORCED)
with (
'connector'='hudi',
'path' = 'hdfs://hacluster/tmp/hudi/stream_cow',
'write.operation' = 'upert', --默认是更新模式,可设置为insert开启append模式
'index.type' = 'MEMORY',
'write.precombine.field'='主键字段',
'hoodie.parquet.compression.codec'='zstd',
'hoodie.managed.by.ldms'='true',
'clean.async.enabled'='false',
'hoodie.clean.automatic'='false',
'hoodie.archive.automatic'='false',
'hive_sync.enable' = 'true',
'hive_sync.table' = '要同步到Hive的表名',
'hive_sync.db' = '要同步到Hive的数据库名',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'Hive客户端hive-site.xml文件中hive.metastore.uris的值',
'properties.hive.metastore.kerberos.principal' = 'Hive客户端hive-site.xml文件中hive.metastore.kerberos.principal的值'
)

Append模式参数简化并托管表服务

Append模式写入就使用COW表,再搭配高性能小文件合并工具(使用指南15.2.4.17)和LDMS来自动托管Clean/Archive。

create table hudi_cow (业务字段,PRIMARY KEY (主键字段) NOT ENFORCED)
with (
'connector'='hudi',
'path' = 'hdfs://hacluster/tmp/hudi/stream_cow',
'write.operation' = 'insert',
'hoodie.managed.by.ldms'='true',
'hive_sync.enable' = 'true',
'hive_sync.table' = '要同步到Hive的表名',
'hive_sync.db' = '要同步到Hive的数据库名',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'Hive客户端hive-site.xml文件中hive.metastore.uris的值',
'properties.hive.metastore.kerberos.principal' = 'Hive客户端hive-site.xml文件中hive.metastore.kerberos.principal的值'
)

相关文档