更新时间:2025-12-12 GMT+08:00

案例:With-Recursive under multi-nodegroup scenario is not shippable

问题根因

查询语句中所涉及的表分属于多个不同的逻辑集群中,这种场景的处理方案一般是通过中间表的方式让所有表在一个逻辑集群,或者把WITH RECURSIVE语句隔离出来以避免影响这个语句的性能。

案例1:通过临时表把WITH RECURSIVE语句中的非当前逻辑集群的表转储到当前逻辑集群中

适用场景:语句中包含WITH RECURSIVE语句,且WITH RECURSIVE只有少部分(1~2个)表分属于其它逻辑集群,这些表数据量不大或者经过过滤条件之后结果集不大。

原始语句如下,语句中目标表dwltax.dwl_tax_tom_cit_tp_account_tmp在逻辑集群"LC_DW1"中,来源表dwtaxdi.dwi_tax_cit_tp_account_r_i中涉及的表来源于逻辑集群“LC_DL1”。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
INSERT INTO dwltax.dwl_tax_tom_cit_tp_account_tmp(account_id,parentid) 
SELECT 
    S.account_id,S.parentid 
FROM (SELECT 
        DISTINCT S.ACCOUNT_ID, S.PARENTID
    FROM (
        WITH RECURSIVE tmp AS (
            SELECT 1 AS LV,
                '/'||T.PARENTID AS path,
                account_ID AS account_ID,
                PARENTID AS PARENTID ,
                ROW_number() over() PATH_NUM,
                T.DEL_FLAG
            FROM dwtaxdi.dwi_tax_cit_tp_account_r_i t
            WHERE T.DEL_FLAG = 'N'
            AND T.ISLEAF_FLAG = 1

            UNION ALL 

            SELECT 1+lv AS LV,
                p.path||'/'||t.PARENTID AS path,
                p.account_ID,
                t.PARENTID AS PARENTID,
                ROW_number() over() PATH_NUM,
                T.DEL_FLAG
            FROM dwtaxdi.dwi_tax_cit_tp_account_r_i t
            INNER JOIN tmp p ON p.PARENTID = t.account_ID
        )

        SELECT lv, 
            '/' ||account_ID||path|| '/'AS ITEM_ID_PATH,
            account_ID,
            PARENTID,
            PATH_NUM
        FROM tmp
        WHERE DEL_FLAG='N'
    ) S
) S

改写后语句

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
CREATE TEMP TABLE DWI_TAX_CIT_TP_ACCOUNT_R_i -- 把来源表数据导入临时表,临时表在逻辑集群“LC_DW1”
WITH(orientation=column) 
TO GROUP "LC_DW1" 
AS SELECT * 
FROM dwtaxdi.DWI_TAX_CIT_TP_ACCOUNT_R_i;

INSERT INTO dwltax.dwl_tax_tom_cit_tp_account_tmp(account_id,parentid)  -- 来源表和目标表都在逻辑集群“LC_DW1”
SELECT 
    S.account_id,S.parentid 
FROM (SELECT 
        DISTINCT S.ACCOUNT_ID, S.PARENTID
    FROM (
        WITH RECURSIVE tmp AS (
            SELECT 1 AS LV,
                '/'||T.PARENTID AS path,
                account_ID AS account_ID,
                PARENTID AS PARENTID ,
                ROW_number() over() PATH_NUM,
                T.DEL_FLAG
            FROM dwi_tax_cit_tp_account_r_i t
            WHERE T.DEL_FLAG = 'N'
            AND T.ISLEAF_FLAG = 1

            UNION ALL 

            SELECT 1+lv AS LV,
                p.path||'/'||t.PARENTID AS path,
                p.account_ID,
                t.PARENTID AS PARENTID,
                ROW_number() over() PATH_NUM,
                T.DEL_FLAG
            FROM dwi_tax_cit_tp_account_r_i t
            INNER JOIN tmp p ON p.PARENTID = t.account_ID
        )

        SELECT lv, 
            '/' ||account_ID||path|| '/'AS ITEM_ID_PATH,
            account_ID,
            PARENTID,
            PATH_NUM
        FROM tmp
        WHERE DEL_FLAG='N'
    ) S
) S

修改点对比

优化后的执行计划

案例2:把WITH RECURSIVE语句部分的结果转储到临时表

适用场景:当WITH RECURSIVE部分结果集比较小,且上一种整改方案的结果集仍比较大的时候,常见的有以下场景

  • WITH RECURSIVE语句涉及到较多其它逻辑集中的表,转储比较麻烦。
  • 当前语句没有直接调用WITH RECURSIVE语句,但是调用的视图中包含WITH RECURSIVE逻辑。

原始语句

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
INSERT INTO dwltax.bif_rpt_item_htp_tax_t(PERIOD_ID,REPORT_ITEM_ID,CRT_CYCLE_ID,LAST_UPD_CYCLE_ID,UPD_JOB_INSTANCE_ID, DEL_FLAG, CRT_JOB_INSTANCE_ID, UCCID )
SELECT
    DISTINCT T.PERIOD_ID ,158004503,-1,-1,-1,'N',-1,T.UCCID
FROM dwltax.dwl_tax_tom_taxpln_htp_tag_v T   
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_account_v  TAX_E1502044 ON T.ACCOUNT_ID =  TAX_E1502044.ACCOUNT_ID  
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_rf_tag_v  TAX_E1502045 ON T.RF_ID =  TAX_E1502045.RF_ID 
WHERE  1 = 1  
AND T.SOURCE_SYSTEM = 'HWHTPLOC01'
AND TAX_E1502044.PARENTID_ACCOUNT_CODE IN ('TARFAdjustmentsManual','TARFAdjustmentsExpenseDeferredReclass')
AND TAX_E1502045.PARENTID_RF_CODE = 'TARFTotal'
AND T.DATACATEGORY_CODE IN ('TARFDeferredNonCurrentLossNR')
AND T.PERIOD_ID=202308

分析发现如上语句的dwltax.dwl_tax_tom_cit_tp_rf_tag_v包含WITH RECURSIVE逻辑。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
CREATE OR REPLACE VIEW dwltax.dwl_tax_tom_cit_tp_rf_tag_v AS  SELECT d2.rf_id AS parent_rf_id, 
    d2.label AS parentid_rf_code, d1.rf_id, d1.label AS rf_code, 
    d1.userdefined1, d1.userdefined2, d1.userdefined3, d1.en_us, d1.en_gb, 
    d2.en_us AS parent_en_us, d2.en_gb AS parent_en_gb, d2.source_system
FROM (SELECT 
        DISTINCT r1.rf_id, r1.parentid
    FROM (SELECT 
                DISTINCT s.rf_id, s.parentid
            FROM (WITH RECURSIVE tmp AS (SELECT 1 AS lv, 
                    '/'::text || t.parentid::text AS path, 
                    t.rf_id, t.parentid
                FROM dwtaxdi.dwi_tax_cit_tp_rf_r_i t1
                RIGHT JOIN dwtaxdi.dwi_tax_cit_tp_rf_r_i t ON t.rf_id = t1.rf_id AND t.parentid = t1.parentid AND t1.del_flag::text = 'N'::text
                WHERE t.del_flag::text = 'N'::text AND t.isleaf_flag = 1::numeric AND 1 = 1 AND 1 = 1 AND 1 = 1

                UNION ALL

                SELECT 1 + p.lv AS lv, 
                    (p.path || '/'::text) || t.parentid::text AS path, 
                    p.rf_id, t.parentid
                FROM tmp p, dwtaxdi.dwi_tax_cit_tp_rf_r_i t1
                RIGHT JOIN dwtaxdi.dwi_tax_cit_tp_rf_r_i t ON t.rf_id = t1.rf_id AND t.parentid = t1.parentid AND t1.del_flag::text = 'N'::text
                WHERE p.parentid = t.rf_id AND t.del_flag::text = 'N'::text AND 1 = 1 AND 1 = 1 AND 1 = 1
            )

            SELECT (('/'::text || tmp.rf_id::text) || tmp.path) || '/'::text AS item_id_path, 
                tmp.rf_id, tmp.parentid
            FROM tmp
        ) s

        UNION ALL

        SELECT r.rf_id, r.rf_id AS parentid
        FROM dwtaxdi.dwi_tax_cit_tp_rf_r_i r
        WHERE r.isleaf_flag = 1::numeric AND r.del_flag::text = 'N'::text) r1
) r, dwtaxdi.dwi_tax_cit_tp_rf_i d1, dwtaxdi.dwi_tax_cit_tp_rf_i d2
WHERE r.rf_id = d1.rf_id AND r.parentid = d2.rf_id;

视图dwltax.dwl_tax_tom_cit_tp_rf_tag_v的结果集只有1w行左右,可以尝试把事务dwltax.dwl_tax_tom_cit_tp_rf_tag_v的结果集转储到中间表,从而保证主查询可以下推。

优化后语句

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-- 8.2.1及以上版本
CREATE TEMP TABLE dwl_tax_tom_cit_tp_rf_tag_v WITH(orientation=column, enable_hstore= true) 
AS
SELECT * FROM dwltax.dwl_tax_tom_cit_tp_rf_tag_v

-- 8.2.0及以下版本
CREATE TEMP TABLE dwl_tax_tom_cit_tp_rf_tag_v WITH(orientation=row) 
AS
SELECT * FROM dwltax.dwl_tax_tom_cit_tp_rf_tag_v

INSERT /*+ set global(enable_force_vector_engine on)*/INTO  
-- 当dwl_tax_tom_cit_tp_rf_tag_v属性为orientation=column, enable_hstore= true时可以不加hint
dwltax.bif_rpt_item_htp_tax_t(PERIOD_ID,REPORT_ITEM_ID,CRT_CYCLE_ID,LAST_UPD_CYCLE_ID,UPD_JOB_INSTANCE_ID, DEL_FLAG, CRT_JOB_INSTANCE_ID, UCCID )
SELECT
    DISTINCT T.PERIOD_ID ,158004503,-1,-1,-1,'N',-1,T.UCCID
FROM dwltax.dwl_tax_tom_taxpln_htp_tag_v T   
INNER JOIN  dwltax.dwl_tax_tom_cit_tp_account_v  TAX_E1502044 ON T.ACCOUNT_ID =  TAX_E1502044.ACCOUNT_ID  
INNER JOIN  pg_temp.dwl_tax_tom_cit_tp_rf_tag_v  TAX_E1502045 ON T.RF_ID =  TAX_E1502045.RF_ID 
WHERE  1 = 1  
AND T.SOURCE_SYSTEM = 'HWHTPLOC01'
AND TAX_E1502044.PARENTID_ACCOUNT_CODE IN ('TARFAdjustmentsManual','TARFAdjustmentsExpenseDeferredReclass')
AND TAX_E1502045.PARENTID_RF_CODE = 'TARFTotal'
AND T.DATACATEGORY_CODE IN ('TARFDeferredNonCurrentLossNR')
AND T.PERIOD_ID=202308

修改点对比