SELECT
ANALYZE
Teradata的SELECT命令(缩写关键字为SEL)用于指定从哪一列中检索数据。
在GaussDB(DWS)中使用ANALYZE来收集优化器统计信息,这些统计信息将用于查询性能。
输入:ANALYZE,使用INSERT
1 2 |
INSERT INTO employee(empno,ename) Values (1,'John'); COLLECT STAT on employee; |
输出
1 2 3 |
INSERT INTO employee( empno, ename) SELECT 1 ,'John'; ANALYZE employee; |
输入:ANALYZE,使用UPDATE
1 2 3 |
UPD employee SET ename = 'Jane' WHERE ename = 'John'; COLLECT STAT on employee; |
输出
1 2 3 |
UPDATE employee SET ename = 'Jane' WHERE ename = 'John'; ANALYZE employee; |
输入:ANALYZE,使用DELETE
1 2 |
DEL FROM employee WHERE ID > 10; COLLECT STAT on employee; |
输出
1 2 |
DELETE FROM employee WHERE ID > 10; ANALYZE employee; |
子句顺序
从Teradata迁移SELECT语句时,各子句(FROM、WHERE、HAVING和GROUP BY)可按任意顺序排列。如果语句的FROM子句之前包含作为ALIAS的QUALIFY子句,则DSC不会迁移该语句。
可以使用tdMigrateALIAS参数来配置ALIAS的迁移。
输入:子句顺序
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
SELECT expr1 AS alias1 , expr2 AS alias2 , expr3 AS alias3 , MAX( expr4 ), ... FROM tab1 T1 INNER JOIN tab2 T2 ON T1.c1 = T2.c2 ... AND T3.c5 = '010' AND ... WHERE T1.c7 = '000' AND ... HAVING alias1 <> 'IC' AND alias2 <> 'IC' AND alias3 <> '' GROUP BY 1, 2, 3 ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
SELECT expr1 AS "alias1" ,expr2 AS "alias2" ,expr3 AS "alias3" ,MAX( expr4 ) ,... FROM tab1 T1 INNER JOIN tab2 T2 ON T1.c1 = T2.c2 ... AND T3.c5 = '010' AND ... WHERE T1.c7 = '000' AND ... GROUP BY 1 ,2 ,3 HAVING alias1 <> 'IC' AND alias2 <> 'IC' AND alias3 <> '' ; |
输入:子句顺序
1 2 3 4 5 6 7 8 |
SELECT TOP 10 * GROUP BY DeptNo WHERE empID < 100 FROM tbl_employee; |
输出
1 2 3 4 5 6 7 8 9 |
SELECT * FROM tbl_employee WHERE empID < 100 GROUP BY DeptNo LIMIT 10 ; |
如果输入脚本的FROM子句之前包含作为ALIAS的QUALIFY子句,DSC将不会迁移该语句,也不会逐字复制输入的语句。
输入:子句顺序,在FROM子句之前使用QUALIFY作为ALIAS
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
SELECT * FROM table1 WHERE abc = ( SELECT col1 AS qualify FROM TABLE WHERE col1 = 5 ) ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
SELECT * FROM table1 WHERE abc = ( SELECT col1 AS qualify FROM TABLE WHERE col1 = 5 ) ; |
扩展Group By子句
如果用户希望数据库根据expr(s)的值对选定的行进行分组,则可指定GROUP BY子句。 如果此子句包含CUBE,ROLLUP或GROUPING SETS扩展,则除了常规分组之外,数据库还会生成超级聚合分组。这些特性在GaussDB(DWS)中不可用,使用UNION ALL操作符可以实现类似的功能。
可以使用extendedGroupByClause参数来配置扩展GROUP BY子句的迁移。
输入:扩展Group By子句,使用CUBE
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
SELECT expr1 AS alias1 , expr2 AS alias2 , expr3 AS alias3 , MAX( expr4 ), ... FROM tab1 T1 INNER JOIN tab2 T2 ON T1.c1 = T2.c2 ... AND T3.c5 = '010' AND ... WHERE T1.c7 = '000' AND ... HAVING alias1 <> 'IC' AND alias2 <> 'IC' AND alias3 <> '' GROUP BY 1, 2, 3 ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
SELECT expr1 AS "alias1" ,expr2 AS "alias2" ,expr3 AS "alias3" ,MAX( expr4 ) ,... FROM tab1 T1 INNER JOIN tab2 T2 ON T1.c1 = T2.c2 ... AND T3.c5 = '010' AND ... WHERE T1.c7 = '000' AND ... GROUP BY 1 ,2 ,3 HAVING alias1 <> 'IC' AND alias2 <> 'IC' AND alias3 <> '' ; |
输入:扩展Group By子句,使用ROLLUP
1 2 3 4 5 |
SELECT d.dname, e.job, MAX(e.sal) FROM emp e RIGHT OUTER JOIN dept d ON e.deptno=d.deptno WHERE e.job IS NOT NULL GROUP BY ROLLUP (d.dname, e.job); |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
SELECT dname, job, ColumnAlias1 FROM ( SELECT MAX(e.sal) AS ColumnAlias1, d.dname, e.job FROM emp e RIGHT OUTER JOIN dept d ON e.deptno = d.deptno WHERE e.job IS NOT NULL GROUP BY d.dname ,e.job UNION ALL SELECT MAX(e.sal) AS ColumnAlias1, d.dname, NULL AS job FROM emp e RIGHT OUTER JOIN dept d ON e.deptno = d.deptno WHERE e.job IS NOT NULL GROUP BY d.dname UNION ALL SELECT MAX( e.sal ) AS ColumnAlias1, NULL AS dname, NULL AS job FROM emp e RIGHT OUTER JOIN dept d ON e.deptno = d.deptno WHERE e.job IS NOT NULL ); |
输入:扩展Group By子句,使用GROUPING SETS
1 2 3 4 5 |
SELECT d.dname, e.job, MAX(e.sal) FROM emp e RIGHT OUTER JOIN dept d ON e.deptno=d.deptno WHERE e.job IS NOT NULL GROUP BY GROUPING SETS(d.dname, e.job); |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
SELECT dname, job, ColumnAlias1 FROM ( SELECT MAX(e.sal) AS ColumnAlias1 , d.dname, NULL AS job FROM emp e RIGHT OUTER JOIN dept d ON e.deptno = d.deptno WHERE e.job IS NOT NULL GROUP BY d.dname UNION ALL SELECT MAX(e.sal) AS ColumnAlias1 , NULL AS dname, e.job FROM emp e RIGHT OUTER JOIN dept d ON e.deptno = d.deptno WHERE e.job IS NOT NULL GROUP BY e.job ); |
SELECT AS
GaussDB(DWS)变量名不区分大小写,TD变量名区分大小写。为保证TD脚本迁移前后正确,需要在SELECT语句变量定义中保留原变量名大小写形式。因此,转换后变量采用AS "变量名"形式定义。
输入示例
SELECT TRIM('${JOB_NAME}') AS JOB_NAME ,CASE WHEN LENGTH(trim(STRTOK('${JOB_NAME}','-',4)))=2 THEN trim(STRTOK('${JOB_NAME}','-',4)) ELSE '' END AS EDW_BANK_NM ,TRIM('${TX_DATE}') AS TX_DATE ,USER AS ETL_USER ,CAST( CURRENT_TIMESTAMP(0) AS VARCHAR(19)) AS CURR_STIME ,'${ETL_DATA}' AS ETL_DATA ,'T61_INDV_CUST_ACCT_ORG_AUM' AS TARGET_TABLE ,'CAST(''8999-12-31'' AS DATE)' AS MAXDATE ; .IF ERRORCODE <> 0 THEN .QUIT 12
输出示例
SELECT TRIM( '${job_name}' ) AS "JOB_NAME" ,CASE WHEN LENGTH( TRIM( split_part ( '${job_name}' ,'-' ,4 ) ) ) = 2 THEN TRIM( split_part ( '${job_name}' ,'-' ,4 ) ) ELSE '' END AS "EDW_BANK_NM" ,TRIM( '${tx_date}' ) AS "TX_DATE" ,USER AS "ETL_USER" ,CAST( CURRENT_TIMESTAMP( 0 ) AS VARCHAR( 19 ) ) AS "CURR_STIME" ,'${etl_data}' AS "ETL_DATA" ,'T61_INDV_CUST_ACCT_ORG_AUM' AS "TARGET_TABLE" ,'CAST(''8999-12-31'' AS DATE)' AS "MAXDATE" ; \if ${ERROR} != 'false' \q 12 \endif ;
嵌套AS表达式定义,必须拆解多条语句实现
输入示例
SELECT TRIM('${JOB_NAME}') AS JOB_NAME ,'CAST(''0001-01-02'' AS DATE)' AS ILLDATE ,'T61_INDV_CUST_HOLD_PROD_IND_AUM' AS TARGET_TABLE ,0 AS NULLNUMBER ,'CAST(''00:00:00.999'' AS TIME(3))' AS NULLTIME ,'CAST(''0001-01-01 00:00:00.000000'' AS TIMESTAMP(6))' AS NULLTIMESTAMP ,'VT_'||TARGET_TABLE AS VT_TABLE ,'V'||SUBSTR(TARGET_TABLE,2,CHAR(TARGET_TABLE)-1) AS TARGET_TABLE_V ,'${GDM_DETAIL_DDL}' AS V_TDDLDB ,'${GDM_DETAIL_VIEW}' AS V_TARGETDB ,'${UDF}' AS V_PUB_UDF ; .IF ERRORCODE <> 0 THEN .QUIT 12
输出示例
SELECT TRIM( '${job_name}' ) AS "JOB_NAME" ,'CAST(''0001-01-02'' AS DATE)' AS "ILLDATE" ,'T61_INDV_CUST_HOLD_PROD_IND_AUM' AS "TARGET_TABLE" ,0 AS "NULLNUMBER" ,'CAST(''00:00:00.999'' AS TIME(3))' AS "NULLTIME" ,'CAST(''0001-01-01 00:00:00.000000'' AS TIMESTAMP(6))' AS "NULLTIMESTAMP" ,'${gdm_detail_ddl}' AS "V_TDDLDB" ,'${gdm_detail_view}' AS "V_TARGETDB" ,'${udf}' AS "V_PUB_UDF" ; SELECT 'VT_' || '${TARGET_TABLE}' AS "VT_TABLE" ; SELECT 'V' || SUBSTR( '${TARGET_TABLE}' ,2 ,LENGTH( '${TARGET_TABLE}' ) - 1 ) AS "TARGET_TABLE_V" ; \if ${ERROR} != 'false' \q 12 \endif ;
TOP子句
DSC支持迁移使用动态参数的TOP语句。Teradata的TOP子句在GaussDB(DWS)中迁移为LIMIT。
- 对于包含WITH TIES的TOP语句,需要指定ORDER BY子句,否则工具不会迁移该语句,只会原样复制。
- 使用TOP和动态参数时:
- 按照以下形式输入动态参数:
1
TOP :<parameter_name>
可使用的字符包括:小写英文字母(a-z)、大写英文字母(A-Z)、数字(0-9)、下划线(_)
- 按照以下形式输入动态参数:
输入:SELECT...TOP
1 2 3 4 |
SELECT TOP 1 c1, COUNT (*) cnt FROM tab1 GROUP BY c1 ORDER BY cnt; |
输出
1 2 3 4 5 |
SELECT c1, COUNT( * ) cnt FROM tab1 GROUP BY c1 ORDER BY cnt LIMIT 1; |
输入:SELECT...TOP PERCENT
1 2 3 4 |
SELECT TOP 10 PERCENT c1, c2 FROM employee WHERE ... ORDER BY c2 DESC; |
输出
1 2 3 4 5 6 7 8 9 10 |
WITH top_percent AS ( SELECT c1, c2 FROM employee WHERE ... ORDER BY c2 DESC ) SELECT * FROM top_percent LIMIT (SELECT CEIL(COUNT( * ) * 10 / 100) FROM top_percent); |
输入:SELECT...TOP,使用动态参数
1 2 3 4 5 6 7 8 9 10 11 12 |
SELECT TOP :Limit WITH TIES c1 ,SUM (c2) sc2 FROM tab1 WHERE c3 > 10 GROUP BY c1 ORDER BY c1 ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
WITH top_ties AS ( SELECT c1 ,SUM (c2) sc2 ,rank ( ) OVER( ORDER BY c1 ) AS TOP_RNK FROM tab1 WHERE c3 > 10 GROUP BY c1 ) SELECT c1 ,sc2 FROM top_ties WHERE TOP_RNK <= :Limit ORDER BY TOP_RNK ; |
输入:SELECT...TOP,使用动态参数和TIES
1 2 3 4 5 6 7 |
SELECT TOP :Limit WITH TIES Customer_ID FROM Customer_t ORDER BY Customer_ID ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
WITH top_ties AS ( SELECT Customer_ID ,rank ( ) OVER( order by Customer_id) AS TOP_RNK FROM Customer_t ) SELECT Customer_ID FROM top_ties WHERE TOP_RNK <= :Limit ORDER BY TOP_RNK ; |
输入:SELECT...TOP PERCENT,使用动态参数
1 2 3 4 5 6 7 8 9 10 |
SELECT TOP :Input_Limit PERCENT WITH TIES c1 ,SUM (c2) sc2 FROM tab1 GROUP BY c1 ORDER BY c1 ; |
输出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
WITH top_percent_ties AS ( SELECT c1 ,SUM (c2) sc2 ,rank ( ) OVER( ORDER BY c1 ) AS TOP_RNK FROM tab1 GROUP BY c1 ) SELECT c1 ,sc2 FROM top_percent_ties WHERE TOP_RNK <= ( SELECT CEIL(COUNT( * ) * :Input_Limit / 100) FROM top_percent_ties ) ORDER BY TOP_RNK ; |
SAMPLE子句
Teradata的SAMPLE子句在GaussDB(DWS)中迁移为LIMIT。
工具仅支持在SAMPLE子句中使用单个正整数。
输入:SELECT...SAMPLE
1 2 3 4 |
SELECT c1, c2, c3 FROM tab1 WHERE c1 > 1000 SAMPLE 1; |
输出
1 2 3 4 |
SELECT c1, c2, c3 FROM tab1 WHERE c1 > 1000 LIMIT 1; |