更新时间:2024-11-15 GMT+08:00

SELECT

ANALYZE

Teradata的SELECT命令(缩写关键字为SEL)用于指定从哪一列中检索数据。

在GaussDB(DWS)中使用ANALYZE来收集优化器统计信息,这些统计信息将用于查询性能。

输入:ANALYZE,使用INSERT

1
2
INSERT INTO employee(empno,ename)  Values (1,'John');
COLLECT STAT on employee;

输出

1
2
3
INSERT INTO employee( empno, ename)                                            
SELECT 1 ,'John';
ANALYZE employee;

输入:ANALYZE,使用UPDATE

1
2
3
UPD employee SET ename = 'Jane'
        WHERE ename = 'John';
COLLECT STAT on employee;

输出

1
2
3
UPDATE employee SET ename = 'Jane'
 WHERE ename = 'John';
ANALYZE employee;

输入:ANALYZE,使用DELETE

1
2
DEL FROM employee WHERE ID > 10;
COLLECT STAT on employee;

输出

1
2
DELETE FROM employee WHERE ID > 10;
ANALYZE employee;

子句顺序

从Teradata迁移SELECT语句时,各子句(FROM、WHERE、HAVING和GROUP BY)可按任意顺序排列。如果语句的FROM子句之前包含作为ALIAS的QUALIFY子句,则DSC不会迁移该语句。

可以使用tdMigrateALIAS参数来配置ALIAS的迁移。

输入:子句顺序

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
SELECT expr1 AS alias1
      , expr2 AS alias2 
      , expr3 AS alias3
      , MAX( expr4 ), ...
   FROM tab1 T1 INNER JOIN tab2 T2
     ON T1.c1 = T2.c2 ...
    AND T3.c5 = '010'
    AND ...
  WHERE T1.c7 = '000'
    AND ...
 HAVING alias1 <> 'IC'
         AND alias2 <> 'IC'
         AND alias3 <> ''
  GROUP BY 1, 2, 3 ;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
SELECT
          expr1 AS "alias1"
          ,expr2 AS "alias2"
          ,expr3 AS "alias3"
          ,MAX( expr4 )
          ,...
     FROM
          tab1 T1 INNER JOIN tab2 T2
               ON T1.c1 = T2.c2 ...
          AND T3.c5 = '010'
          AND ...
     WHERE
          T1.c7 = '000'
          AND ...
     GROUP BY
          1 ,2 ,3
     HAVING
          alias1 <> 'IC'
          AND alias2 <> 'IC'
          AND alias3 <> '' ;

输入:子句顺序

1
2
3
4
5
6
7
8
SELECT
          TOP 10 *
     GROUP BY
          DeptNo
     WHERE
          empID < 100
FROM
          tbl_employee;

输出

1
2
3
4
5
6
7
8
9
SELECT
          *
     FROM
          tbl_employee
     WHERE
          empID < 100
     GROUP BY
          DeptNo LIMIT 10
;

如果输入脚本的FROM子句之前包含作为ALIAS的QUALIFY子句,DSC将不会迁移该语句,也不会逐字复制输入的语句。

输入:子句顺序,在FROM子句之前使用QUALIFY作为ALIAS

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
SELECT
          *
     FROM
          table1
     WHERE
          abc = (
               SELECT
                         col1 AS qualify
                    FROM
                         TABLE
                         WHERE
                              col1 = 5
          )
;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
SELECT
          *
     FROM
          table1
     WHERE
          abc = (
               SELECT
                         col1 AS qualify
                    FROM
                         TABLE
                         WHERE
                              col1 = 5
          )
;

扩展Group By子句

如果用户希望数据库根据expr(s)的值对选定的行进行分组,则可指定GROUP BY子句。 如果此子句包含CUBE,ROLLUP或GROUPING SETS扩展,则除了常规分组之外,数据库还会生成超级聚合分组。这些特性在GaussDB(DWS)中不可用,使用UNION ALL操作符可以实现类似的功能。

可以使用extendedGroupByClause参数来配置扩展GROUP BY子句的迁移。

输入:扩展Group By子句,使用CUBE

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
SELECT expr1 AS alias1
      , expr2 AS alias2 
      , expr3 AS alias3
      , MAX( expr4 ), ...
   FROM tab1 T1 INNER JOIN tab2 T2
     ON T1.c1 = T2.c2 ...
    AND T3.c5 = '010'
    AND ...
  WHERE T1.c7 = '000'
    AND ...
 HAVING alias1 <> 'IC'
         AND alias2 <> 'IC'
         AND alias3 <> ''
  GROUP BY 1, 2, 3 ;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
SELECT
          expr1 AS "alias1"
          ,expr2 AS "alias2"
          ,expr3 AS "alias3"
          ,MAX( expr4 )
          ,...
     FROM
          tab1 T1 INNER JOIN tab2 T2
               ON T1.c1 = T2.c2 ...
          AND T3.c5 = '010'
          AND ...
     WHERE
          T1.c7 = '000'
          AND ...
     GROUP BY
          1 ,2 ,3
     HAVING
          alias1 <> 'IC'
          AND alias2 <> 'IC'
          AND alias3 <> '' ;

输入:扩展Group By子句,使用ROLLUP

1
2
3
4
5
SELECT d.dname, e.job, MAX(e.sal)
  FROM emp e RIGHT OUTER JOIN dept d
    ON e.deptno=d.deptno
WHERE e.job IS NOT NULL
GROUP BY ROLLUP (d.dname, e.job);

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
SELECT dname, job, ColumnAlias1
  FROM ( SELECT MAX(e.sal) AS ColumnAlias1, d.dname, e.job
           FROM emp e RIGHT OUTER JOIN dept d
             ON e.deptno = d.deptno
          WHERE e.job IS NOT NULL
          GROUP BY d.dname ,e.job
          UNION ALL 
         SELECT MAX(e.sal) AS ColumnAlias1, d.dname, NULL AS                    
                 job
           FROM emp e RIGHT OUTER JOIN dept d
             ON e.deptno = d.deptno
          WHERE e.job IS NOT NULL
          GROUP BY d.dname
          UNION ALL 
         SELECT MAX( e.sal ) AS ColumnAlias1, NULL AS dname,  
                     NULL AS job
           FROM emp e RIGHT OUTER JOIN dept d
             ON e.deptno = d.deptno
          WHERE e.job IS NOT NULL
        );

输入:扩展Group By子句,使用GROUPING SETS

1
2
3
4
5
SELECT d.dname, e.job, MAX(e.sal)
FROM emp e RIGHT OUTER JOIN dept d
ON e.deptno=d.deptno
WHERE e.job IS NOT NULL
GROUP BY GROUPING SETS(d.dname, e.job);

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
SELECT dname, job, ColumnAlias1
  FROM ( SELECT MAX(e.sal) AS ColumnAlias1
              , d.dname, NULL AS job
           FROM emp e RIGHT OUTER JOIN dept d
             ON e.deptno = d.deptno
          WHERE e.job IS NOT NULL
          GROUP BY d.dname
          UNION ALL 
         SELECT MAX(e.sal) AS ColumnAlias1
              , NULL AS dname, e.job
           FROM emp e RIGHT OUTER JOIN dept d
             ON e.deptno = d.deptno
          WHERE e.job IS NOT NULL
          GROUP BY e.job
        );

SELECT AS

GaussDB(DWS)变量名不区分大小写,TD变量名区分大小写。为保证TD脚本迁移前后正确,需要在SELECT语句变量定义中保留原变量名大小写形式。因此,转换后变量采用AS "变量名"形式定义。

输入示例

SELECT  TRIM('${JOB_NAME}')                                                                    AS JOB_NAME         
       ,CASE WHEN LENGTH(trim(STRTOK('${JOB_NAME}','-',4)))=2
            THEN trim(STRTOK('${JOB_NAME}','-',4))
			 ELSE ''
			 END                                                                                     AS EDW_BANK_NM      
       ,TRIM('${TX_DATE}')                                                                     AS TX_DATE          
       ,USER                                                                                   AS ETL_USER         
       ,CAST( CURRENT_TIMESTAMP(0) AS VARCHAR(19))                                             AS CURR_STIME       
       ,'${ETL_DATA}'                                                                          AS ETL_DATA         
       ,'T61_INDV_CUST_ACCT_ORG_AUM'                                                           AS TARGET_TABLE     
       ,'CAST(''8999-12-31'' AS DATE)'                                                         AS MAXDATE           
;
.IF ERRORCODE <> 0 THEN .QUIT 12

输出示例

SELECT
          TRIM( '${job_name}' ) AS "JOB_NAME"
          ,CASE
                    WHEN LENGTH( TRIM( split_part ( '${job_name}' ,'-' ,4 ) ) ) = 2 THEN TRIM( split_part ( '${job_name}' ,'-' ,4 ) )
               ELSE ''
          END AS "EDW_BANK_NM"
          ,TRIM( '${tx_date}' ) AS "TX_DATE"
          ,USER AS "ETL_USER"
          ,CAST( CURRENT_TIMESTAMP( 0 ) AS VARCHAR( 19 ) ) AS "CURR_STIME"
          ,'${etl_data}' AS "ETL_DATA"
          ,'T61_INDV_CUST_ACCT_ORG_AUM' AS "TARGET_TABLE"
          ,'CAST(''8999-12-31'' AS DATE)' AS "MAXDATE" ;
\if ${ERROR} != 'false' 
 \q 12
\endif 

;

嵌套AS表达式定义,必须拆解多条语句实现

输入示例

SELECT  TRIM('${JOB_NAME}')                                                                    AS JOB_NAME         
       ,'CAST(''0001-01-02'' AS DATE)'                                                         AS ILLDATE          
       ,'T61_INDV_CUST_HOLD_PROD_IND_AUM'                                                      AS TARGET_TABLE         
       ,0                                                                                      AS NULLNUMBER       
       ,'CAST(''00:00:00.999'' AS TIME(3))'                                                    AS NULLTIME         
       ,'CAST(''0001-01-01 00:00:00.000000'' AS TIMESTAMP(6))'                                 AS NULLTIMESTAMP    
       ,'VT_'||TARGET_TABLE                                                                    AS VT_TABLE         
       ,'V'||SUBSTR(TARGET_TABLE,2,CHAR(TARGET_TABLE)-1)                                       AS TARGET_TABLE_V   
       ,'${GDM_DETAIL_DDL}'                                                                    AS V_TDDLDB         
       ,'${GDM_DETAIL_VIEW}'                                                                   AS V_TARGETDB       
       ,'${UDF}'                                                                               AS V_PUB_UDF        

;
.IF ERRORCODE <> 0 THEN .QUIT 12

输出示例

SELECT
          TRIM( '${job_name}' ) AS "JOB_NAME"
          ,'CAST(''0001-01-02'' AS DATE)' AS "ILLDATE"
          ,'T61_INDV_CUST_HOLD_PROD_IND_AUM' AS "TARGET_TABLE"
          ,0 AS "NULLNUMBER"
          ,'CAST(''00:00:00.999'' AS TIME(3))' AS "NULLTIME"
          ,'CAST(''0001-01-01 00:00:00.000000'' AS TIMESTAMP(6))' AS "NULLTIMESTAMP"
          ,'${gdm_detail_ddl}' AS "V_TDDLDB"
          ,'${gdm_detail_view}' AS "V_TARGETDB"
          ,'${udf}' AS "V_PUB_UDF" ;
SELECT
          'VT_' || '${TARGET_TABLE}' AS "VT_TABLE" ;
SELECT
          'V' || SUBSTR( '${TARGET_TABLE}' ,2 ,LENGTH( '${TARGET_TABLE}' ) - 1 ) AS "TARGET_TABLE_V" ;
\if ${ERROR} != 'false' 
 \q 12
\endif 

;

TOP子句

DSC支持迁移使用动态参数的TOP语句。Teradata的TOP子句在GaussDB(DWS)中迁移为LIMIT。

  • 对于包含WITH TIES的TOP语句,需要指定ORDER BY子句,否则工具不会迁移该语句,只会原样复制。
  • 使用TOP和动态参数时:
    • 按照以下形式输入动态参数:
      1
       TOP :<parameter_name>
      

      可使用的字符包括:小写英文字母(a-z)、大写英文字母(A-Z)、数字(0-9)、下划线(_)

输入:SELECT...TOP

1
2
3
4
SELECT TOP 1 c1, COUNT (*) cnt 
  FROM tab1 
 GROUP BY c1 
 ORDER BY cnt;	

输出

1
2
3
4
5
SELECT c1, COUNT( * ) cnt
  FROM tab1
 GROUP BY c1
 ORDER BY cnt 
 LIMIT 1;

输入:SELECT...TOP PERCENT

1
2
3
4
SELECT TOP 10 PERCENT c1, c2 
  FROM employee
 WHERE ...
 ORDER BY c2 DESC;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
WITH top_percent AS (
      SELECT c1, c2
        FROM employee
       WHERE ...
       ORDER BY c2 DESC
                    ) 
SELECT *
  FROM top_percent 
 LIMIT (SELECT CEIL(COUNT( * ) * 10 / 100)
          FROM top_percent);

输入:SELECT...TOP,使用动态参数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
SELECT
           TOP :Limit WITH TIES c1
          ,SUM (c2) sc2
     FROM
          tab1
     WHERE
          c3 > 10
     GROUP BY
          c1
     ORDER BY
          c1
;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
WITH top_ties AS (
     SELECT
                c1
               ,SUM (c2) sc2
               ,rank (
               ) OVER( ORDER BY c1 ) AS TOP_RNK
          FROM
               tab1
          WHERE
               c3 > 10
          GROUP BY
               c1
) SELECT
          c1
          ,sc2
     FROM
          top_ties
     WHERE
          TOP_RNK <= :Limit
     ORDER BY
          TOP_RNK
;

输入:SELECT...TOP,使用动态参数和TIES

1
2
3
4
5
6
7
 SELECT
           TOP :Limit WITH TIES Customer_ID
   FROM
          Customer_t
   ORDER BY
          Customer_ID
;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
WITH top_ties AS (
     SELECT
               Customer_ID
               ,rank (
               ) OVER( order by Customer_id) AS TOP_RNK
          FROM
               Customer_t
) SELECT
          Customer_ID
     FROM
          top_ties
     WHERE
          TOP_RNK <= :Limit
     ORDER BY
          TOP_RNK
;

输入:SELECT...TOP PERCENT,使用动态参数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
SELECT
          TOP :Input_Limit PERCENT WITH TIES c1
          ,SUM (c2) sc2
     FROM
          tab1
     GROUP BY
          c1
     ORDER BY
          c1
;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
WITH top_percent_ties AS (
     SELECT
               c1
               ,SUM (c2) sc2
               ,rank (
               ) OVER( ORDER BY c1 ) AS TOP_RNK
          FROM
               tab1
          GROUP BY
               c1
) SELECT
          c1
          ,sc2
     FROM
          top_percent_ties
     WHERE
          TOP_RNK <= (
               SELECT
                         CEIL(COUNT( * ) * :Input_Limit / 100)
                    FROM
                         top_percent_ties
          )
     ORDER BY
          TOP_RNK
;

SAMPLE子句

Teradata的SAMPLE子句在GaussDB(DWS)中迁移为LIMIT。

工具仅支持在SAMPLE子句中使用单个正整数。

输入:SELECT...SAMPLE

1
2
3
4
SELECT c1, c2, c3 
  FROM tab1 
 WHERE c1 > 1000
SAMPLE 1;

输出

1
2
3
4
SELECT c1, c2, c3
  FROM tab1
 WHERE c1 > 1000
 LIMIT 1;