更新时间:2024-06-29 GMT+08:00

分析函数

在Teradata中,分析函数统称为有序分析函数,它们为数据挖掘、分析和商业智能提供了强大的分析能力。

ORDER BY中的分析函数

输入:ORDER BY子句中的分析函数

1
2
3
4
SELECT customer_id, customer_name, RANK(customer_id, customer_address DESC) 
   FROM customer_t
 WHERE  customer_state = 'CA'
 ORDER BY RANK(customer_id, customer_address DESC);

输出

1
2
3
4
SELECT customer_id, customer_name, RANK() over(order by customer_id, customer_address DESC) 
   FROM customer_t
 WHERE  customer_state = 'CA'
 ORDER BY RANK() over(order by customer_id DESC, customer_address DESC) ;

输入:GROUP BY子句中的分析函数

1
2
3
4
5
6
7
SELECT customer_city, customer_state, postal_code
     , rank(postal_code)
     , rank() over(partition by customer_state order by postal_code)
    , rank() over(order by postal_code)
  FROM Customer_T 
  GROUP BY customer_state
  ORDER BY customer_state;

输出

1
2
3
4
5
6
SELECT customer_city, customer_state, postal_code
     , rank() over(PARTITION BY customer_state ORDER BY postal_code DESC) 
     , rank() over(partition by customer_state order by postal_code)
     , rank() over(order by postal_code)
  FROM Customer_T 
  ORDER BY customer_state;

PARTITION BY中的分析函数

当输入脚本的PARTITION BY子句中包含数值时,迁移脚本将原样保留该数值。

输入:PARTITION BY子句中的分析函数(包含数值)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
SELECT
          Customer_id
          ,customer_name
          ,rank (
          ) over( partition BY 1 ORDER BY Customer_id )
          ,rank (customer_name)
     FROM
          Customer_t
     GROUP BY
          1
;

输出

SELECT
          Customer_id
          ,customer_name
          ,rank (
          ) over( partition BY 1 ORDER BY Customer_id )
          ,rank (
          ) over( PARTITION BY Customer_id ORDER BY customer_name DESC )
     FROM
          Customer_t
;

窗口函数

窗口函数在查询结果中执行跨行计算。DSC支持以下Teradata窗口函数:

DSC仅支持QUALIFY子句使用一个窗口函数。如果QUALIFY使用多个窗口函数,可能会导致迁移失败。

CSUM

累计函数(CSUM)为一列数值计算运行或累计总数,建议在QUALIFY语句中使用ALIAS。

输入:CSUM,使用GROUP_ID
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
INSERT INTO GSIS_SUM.DW_DAT71 (
   col1
  ,PROD_GROUP
)
   SELECT 
      CSUM(1, T1.col1)
      ,T1.PROD_GROUP
     FROM tab1  T1
    WHERE T1.col1 = 'ABC'
;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
INSERT
     INTO
          GSIS_SUM.DW_DAT71 (
               col1
               ,PROD_GROUP
          ) SELECT
                    SUM (1) over( ORDER BY T1.col1 ROWS UNBOUNDED PRECEDING )
                    ,T1.PROD_GROUP
               FROM
                    tab1 T1
               WHERE
                    T1.col1 = 'ABC'
;
输入:CSUM,使用GROUP_ID
1
2
3
4
5
SELECT  top 10
      CSUM(1, T1.Test_GROUP)
      ,T1.col1
  FROM  $[schema}.  T1
 WHERE T1.Test_GROUP = 'Test_group' group by Test_group order by Test_Group;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
SELECT
       SUM (1) over( partition BY Test_group ORDER BY T1.Test_GROUP ROWS UNBOUNDED PRECEDING )
       ,T1.col1
  FROM
       $[schema}. T1
 WHERE
       T1.Test_GROUP = 'Test_group'
 ORDER BY
       Test_Group LIMIT 10
;
输入:CSUM,使用GROUP BY和QUALIFY
1
2
3
4
SELECT c1, c2, c3, CSUM(c4, c3) 
  FROM tab1 
QUALIFY ROW_NUMBER(c4) = 1  
GROUP BY 1, 2;

输出

1
2
3
4
5
6
7
SELECT c1, c2, c3, ColumnAlias1
  FROM ( SELECT c1, c2, c3
              , SUM (c4) OVER(PARTITION BY 1 ,2 ORDER BY c3 ROWS UNBOUNDED PRECEDING) AS ColumnAlias1
              , ROW_NUMBER( ) OVER(PARTITION BY 1, 2 ORDER BY c4) AS ROW_NUM1
           FROM tab1
       ) Q1
   WHERE Q1.ROW_NUM1 = 1;

MDIFF

MDIFF函数基于预定的查询宽度计算一列的移动差分值。查询宽度即所指定的行数。建议在QUALIFY语句中使用ALIAS。

输入:MDIFF,使用QUALIFY

1
2
3
4
5
6
SELECT DT_A.Acct_ID, DT_A.Trade_Date, DT_A.Stat_PBU_ID
      , CAST( MDIFF( Stat_PBU_ID_3, 1, DT_A.Trade_No ASC ) AS DECIMAL(20,0) ) AS MDIFF_Stat_PBU_ID
   FROM Trade_His DT_A
  WHERE Trade_Date >= CAST( '20170101' AS DATE FORMAT 'YYYYMMDD' ) 
  GROUP BY DT_A.Acct_ID, DT_A.Trade_Date
 QUALIFY MDIFF_Stat_PBU_ID <> 0 OR MDIFF_Stat_PBU_ID IS NULL;

输出

1
2
3
4
5
6
7
SELECT Acct_ID, Trade_Date, Stat_PBU_ID, MDIFF_Stat_PBU_ID
   FROM (SELECT DT_A.Acct_ID, DT_A.Trade_Date, DT_A.Stat_PBU_ID
         , CAST( (Stat_PBU_ID_3 - (LAG(Stat_PBU_ID_3, 1, NULL) OVER (PARTITION BY DT_A.Acct_ID, DT_A.Trade_Date ORDER BY DT_A.Trade_No ASC)))  AS MDIFF_Stat_PBU_ID
           FROM Trade_His DT_A
          WHERE Trade_Date >= CAST( '20170101' AS DATE)
                )
 WHERE MDIFF_Stat_PBU_ID <> 0 OR MDIFF_Stat_PBU_ID IS NULL;

RANK

RANK(col1, col2...)

输入:RANK,使用GROUP BY

1
2
3
4
SELECT  c1, c2, c3, RANK(c4, c1 DESC, c3) AS Rank1 
  FROM  tab1 
 WHERE  ... 
 GROUP BY c1;

输出

1
2
3
SELECT c1, c2, c3, RANK() OVER (PARTITION BY c1 ORDER BY c4, c1 DESC ,c3) AS Rank1
  FROM tab1
 WHERE ...;

ROW_NUMBER

ROW_NUMBER(col1, col2...)

输入:ROW NUMBER,使用GROUP BY和QUALIFY

1
2
3
4
SELECT c1, c2, c3, ROW_NUMBER(c4, c3) 
   FROM tab1 
QUALIFY RANK(c4) = 1  
  GROUP BY 1, 2;

输出

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
SELECT
      c1
     ,c2
     ,c3
     ,ColumnAlias1
  FROM
      (
        SELECT
                c1
               ,c2
               ,c3
               ,ROW_NUMBER( ) over( PARTITION BY 1 ,2 ORDER BY c4 ,c3 ) AS ColumnAlias1
               ,RANK (
               ) over( PARTITION BY 1 ,2 ORDER BY c4 ) AS ROW_NUM1
          FROM
              tab1
      ) Q1
 WHERE
      Q1.ROW_NUM1 = 1
;

COMPRESS(使用*****)

输入

ORDCADBRN VARCHAR(6) CHARACTER SET LATIN CASESPECIFIC TITLE '    ' COMPRESS '******'

输出

ORDCADBRN VARCHAR( 6 ) /* CHARACTER SET LATIN*/ /* CASESPECIFIC*/ /*TITLE '    '*/ /* COMPRESS  '******' */