更新时间:2024-06-07 GMT+08:00

分区级统计信息

指定单分区统计信息收集

当前分区表支持指定单分区统计信息收集,已收集统计信息的分区会在再次收集时自动更新维护。该功能适用于列表分区、哈希分区和范围分区。

gaussdb=# CREATE TABLE only_first_part(id int,name varchar)PARTITION BY RANGE (id) 
 (PARTITION id11 VALUES LESS THAN (1000000),
PARTITION id22 VALUES LESS THAN (2000000),
PARTITION max_id1 VALUES LESS THAN (MAXVALUE));

gaussdb=# INSERT INTO only_first_part SELECT generate_series(1,5000),'test';

gaussdb=# ANALYZE only_first_part PARTITION (id11);
gaussdb=# ANALYZE only_first_part PARTITION (id22);
gaussdb=# ANALYZE only_first_part PARTITION (max_id1);

gaussdb=# SELECT relname, relpages, reltuples FROM pg_partition WHERE relname IN ('id11', 'id22', 'max_id1');
 relname | relpages | reltuples 
---------+----------+-----------
 id11    |       20 |      5000
 id22    |        0 |         0
 max_id1 |        0 |         0
(3 rows)

gaussdb=# \x
gaussdb=# SELECT * FROM pg_stats WHERE tablename ='only_first_part' AND partitionname ='id11';
-[ RECORD 1 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | only_first_part
attname                | name
inherited              | f
null_frac              | 0
avg_width              | 5
n_distinct             | 1
n_dndistinct           | 0
most_common_vals       | {test}
most_common_freqs      | {1}
histogram_bounds       | 
correlation            | 1
most_common_elems      | 
most_common_elem_freqs | 
elem_count_histogram   | 
partitionname          | id11
subpartitionname       | 
-[ RECORD 2 ]----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | public
tablename              | only_first_part
attname                | id
inherited              | f
null_frac              | 0
avg_width              | 4
n_distinct             | -1
n_dndistinct           | 0
most_common_vals       | 
most_common_freqs      | 
histogram_bounds       | {1,50,100,150,200,250,300,350,400,450,500,550,600,650,700,750,800,850,900,950,1000,1050,1100,1150,1200,1250,1300,1350,1400,1450,1500,1550,1600,1650,1700,1750,1800,1850,1900,1950,2000,2050,2100,2150,2200,2250,2300,2350,2400,2450,2500,2550,2600,2650,2700,2750,2800,2850,2900,2950,3000,3050,3100,3150,3200,3250,3300,3350,3400,3450,3500,3550,3600,3650,3700,3750,3800,3850,3900,3950,4000,4050,4100,4150,4200,4250,4300,4350,4400,4450,4500,4550,4600,4650,4700,4750,4800,4850,4900,4950,5000}
correlation            | 1
most_common_elems      | 
most_common_elem_freqs | 
elem_count_histogram   | 
partitionname          | id11
subpartitionname       |

gaussdb=# \x
-- 删除分区表
gaussdb=# DROP TABLE only_first_part;

优化器使用指定分区统计信息

优化器优先使用指定分区的统计信息。如果指定分区未收集统计信息,优化器使用改写分区子句剪枝优化,请参见通过改写分区子句剪枝优化

gaussdb=# CREATE TABLE only_first_part_two
(
    c1 INT,
    c2 BIGINT
)
PARTITION BY LIST(c2)
(
    PARTITION p_1 VALUES (10000, 20000),
    PARTITION p_2 VALUES (300000, 400000, 500000),
    PARTITION p_3 VALUES (DEFAULT)
);

gaussdb=# EXPLAIN SELECT * FROM only_first_part_two PARTITION (p_2);

                                        QUERY PLAN                                        
------------------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..29.45 rows=1945 width=12)
   Iterations: 1
   ->  Partitioned Seq Scan on only_first_part_two  (cost=0.00..29.45 rows=1945 width=12)
         Selected Partitions:  2
(4 rows)

gaussdb=# EXPLAIN SELECT * FROM only_first_part_two PARTITION (p_1) WHERE c2 = 1;
                                       QUERY PLAN                                       
----------------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..34.31 rows=10 width=12)
   Iterations: 0
   ->  Partitioned Seq Scan on only_first_part_two  (cost=0.00..34.31 rows=10 width=12)
         Filter: (c2 = 1)
         Selected Partitions:  NONE
(5 rows)
-- 删除分区表
gaussdb=# DROP TABLE only_first_part_two;

通过改写分区子句剪枝优化

当没有分区级统计信息时,在优化器行数估算模块,通过在逻辑上对分区子句进行伪谓词的改写,利用改写后的伪谓词影响选择率的计算和整表的统计信息获取一个比较准确的行数估算值。

  • 特性只作用于选择率的计算。
  • 特性适用于一级分区和二级分区。
  • 特性只支持范围分区(range partition)、间隔分区(interval partition)和列表分区(list partition)。
  • 对于范围分区和间隔分区,特性只支持单列分区键的改写,不支持多列分区键的改写。
  • 对于列表分区,出于性能考虑,设置列表指定分区的枚举值个数的阈值为40个。
    • 当指定分区的列表枚举值个数超过40时,本特性不再适用。
    • 对于default分区,其列表枚举值个数是所有非default分区的枚举值个数的总和。

示例1:对于范围/间隔分区的改写

gaussdb=# CREATE TABLE test_int4_maxvalue(id INT, name VARCHAR)
PARTITION BY RANGE(id) 
(
    PARTITION id1 VALUES LESS THAN(1000), 
    PARTITION id2 VALUES LESS THAN(2000),
    PARTITION max_id VALUES LESS THAN(MAXVALUE)
);
gaussdb=# INSERT INTO test_int4_maxvalue SELECT GENERATE_SERIES(1,5000),'test';
gaussdb=# ANALYZE test_int4_maxvalue WITH GLOBAL;

-- 查询指定分区id1
gaussdb=# EXPLAIN SELECT * FROM test_int4_maxvalue PARTITION(id1);
                                       QUERY PLAN                                       
----------------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..51.00 rows=1000 width=9)
   Iterations: 1
   ->  Partitioned Seq Scan on test_int4_maxvalue  (cost=0.00..51.00 rows=1000 width=9)
         Selected Partitions:  1
(4 rows)

-- 查询指定分区max_id
gaussdb=# EXPLAIN SELECT * FROM test_int4_maxvalue PARTITION(max_id);
                                       QUERY PLAN                                       
----------------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..51.00 rows=3000 width=9)
   Iterations: 1
   ->  Partitioned Seq Scan on test_int4_maxvalue  (cost=0.00..51.00 rows=3000 width=9)
         Selected Partitions:  3
(4 rows)

-- 删除分区表
gaussdb=# DROP TABLE test_int4_maxvalue;
示例2:对于列表分区的改写
gaussdb=# CREATE TABLE test_default
(
    c1 INT, 
    c2 BIGINT
)
PARTITION BY LIST(c2) 
(
    PARTITION p_1 VALUES (10000, 20000), 
    PARTITION p_2 VALUES (300000, 400000, 500000),
    PARTITION p_3 VALUES (DEFAULT)
);
gaussdb=# INSERT INTO test_default SELECT GENERATE_SERIES(1, 1000), 10000;
gaussdb=# INSERT INTO test_default SELECT GENERATE_SERIES(1001, 2000), 600000;
gaussdb=# ANALYZE test_default WITH GLOBAL;

-- 查询指定分区p_1
gaussdb=# EXPLAIN SELECT * FROM test_default PARTITION(p_1);
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..28.00 rows=1000 width=12)
   Iterations: 1
   ->  Partitioned Seq Scan on test_default  (cost=0.00..28.00 rows=1000 width=12)
         Selected Partitions:  1
(4 rows)

-- 查询指定分区p_3
gaussdb=# EXPLAIN SELECT * FROM test_default PARTITION(p_3);
                                    QUERY PLAN                                     
-----------------------------------------------------------------------------------
 Partition Iterator  (cost=0.00..28.00 rows=1000 width=12)
   Iterations: 1
   ->  Partitioned Seq Scan on test_default  (cost=0.00..28.00 rows=1000 width=12)
         Selected Partitions:  3
(4 rows)

-- 删除分区表
gaussdb=# DROP TABLE test_default;