更新时间:2026-03-23 GMT+08:00
分享

rangebucket

  1. 创建SLICEGROUP规则生成SLICE和底层bucket的映射关系:

    CREATE SLICEGROUP slicegroupname DISTRIBUTE BY RANGE(column_type) (slice_less_than_item) BUCKETCNT bucketcnt; 

    语法详情请参见《开发指南》中“SQL参考 > SQL语法 > C > CREATE SLICEGROUP”章节。

  2. 创建rangebucket表时,只需要绑定到对应的SLICEGROUOP规则上即可:

    create table table_name (a int, b int)distribute by range(a) to slicegroup sg;

rangebucket使用示例:
--创建表 准备数据
CREATE SLICEGROUP sg1 distribute by range(int) 
(
 slice s1 values less than (5) DATANODE datanode1,
 slice s2 values less than (10) DATANODE datanode2,
 slice s3 values less than (20) DATANODE datanode3,
 slice s4 values less than (maxvalue) datanode datanode4
) bucketcnt 128;

CREATE TABLE t1 (a int , b int, c int)  with (storage_type=astore) distribute by range(a) to slicegroup sg1;
NOTICE:  bucket table need segment storage, set segment to on by default
insert into t1 values(generate_series(1, 40), generate_series(1, 40));
analyze t1;
rangebucket提供了SLICE指定的语法如下所示,指定查询SLICE s1和 s3,计划对应dn1 和 dn3。
-- slice指定
SET enable_fast_query_shipping = on;
explain(costs off, verbose on) select * from t1 slice by (s1, s3) order by a;
                       QUERY PLAN                        
---------------------------------------------------------
 Streaming (type: GATHER)
   Output: a, b, c
   Merge Sort Key: t1.a
   Node/s: (GenGroup) datanode1, datanode3
   ->  Sort
         Output: a, b, c
         Exec Nodes: (group1) datanode1, datanode3
         Sort Key: t1.a
         ->  Seq Scan on rangebucket_pruning.t1
               Output: a, b, c
               Distribute Key: a
               Exec Nodes: (group1) datanode1, datanode3
               Selected Buckets: 2048 2050 
(13 rows)
rangebucket对于分布列的约束条件能进行正确的剪枝,支持关于分布列的各种表达式剪枝。
-- 剪枝
SET enable_fast_query_shipping = off;
explain (costs false,verbose on) select count(*) from t1 where a = 1;
                      QUERY PLAN                      
------------------------------------------------------
 Aggregate
   Output: pg_catalog.count(*)
   Exec Nodes: (GenGroup) datanode1
   ->  Streaming (type: GATHER)
         Output: (count(*))
         Node/s: (GenGroup) datanode1
         ->  Aggregate
               Output: count(*)
               Exec Nodes: (group1) datanode1
               ->  Seq Scan on rangebucket_pruning.t1
                     Output: a, b, c
                     Distribute Key: a
                     Exec Nodes: (group1) datanode1
                     Filter: (t1.a = 1)
                     Selected Buckets: 2048 (15 rows)

-- or表达式
explain (costs false,verbose on) select count(*) from t1 where a = 1 or a = 11;
                          QUERY PLAN                           
---------------------------------------------------------------
 Aggregate
   Output: pg_catalog.count(*)
   Exec Nodes: (GenGroup) datanode1, datanode3
   ->  Streaming (type: GATHER)
         Output: (count(*))
         Node/s: (GenGroup) datanode1, datanode3
         ->  Aggregate
               Output: count(*)
               Exec Nodes: (group1) datanode1, datanode3
               ->  Seq Scan on rangebucket_pruning.t1
                     Output: a, b, c
                     Distribute Key: a
                     Exec Nodes: (group1) datanode1, datanode3
                     Filter: ((t1.a = 1) OR (t1.a = 11))
                     Selected Buckets: 2048 2050 
(15 rows)

相关文档