GROUP BY
GROUP BY
GROUP BY将SELECT语句的输出行划分成包含匹配值的分组。简单的GROUP BY可以包含由输入列组成的任何表达式,也可以是按位置选择输出列的序号。
以下查询是等效的:
SELECT count(*), nationkey FROM customer GROUP BY 2; SELECT count(*), nationkey FROM customer GROUP BY nationkey;
GROUP BY可以按未出现在SELECT语句输出中的输入列名对输出进行分组。
例如:
SELECT count(*) FROM customer GROUP BY mktsegment; GROUPING SETS
可以指定多个列进行分组,结果列中不属于分组列的将被设置为NUll。具有复杂分组语法(GROUPING SETS、CUBE或ROLLUP)的查询只从基础数据源读取一次,而使用UNION ALL的查询将读取基础数据三次。这就是当数据源不具有确定性时,使用UNION ALL的查询可能会产生不一致的结果的原因。
--创建一个航运表 create table shipping(origin_state varchar(25),origin_zip integer,destination_state varchar(25) ,destination_zip integer,package_weight integer); --插入数据 insert into shipping values ('California',94131,'New Jersey',8648,13), ('California',94131,'New Jersey',8540,42), ('California',90210,'Connecticut',6927,1337), ('California',94131,'Colorado',80302,5), ('New York',10002,'New Jersey',8540,3), ('New Jersey',7081,'Connecticut',6708,225); --执行查询Grouping sets SELECT origin_state, origin_zip, destination_state, sum( package_weight ) FROM shipping GROUP BY GROUPING SETS ( ( origin_state ), ( origin_state, origin_zip ), ( destination_state )); --这个的查询在逻辑上等同于多个分组查询的union all SELECT origin_state, NULL,NULL,sum( package_weight ) FROM shipping GROUP BY origin_state UNION ALL SELECT origin_state,origin_zip,NULL,sum( package_weight ) FROM shipping GROUP BY origin_state,origin_zip UNION ALL SELECT NULL,NULL,destination_state,sum( package_weight ) FROM shipping GROUP BY destination_state; --结果 origin_state | origin_zip | destination_state | _col3 --------------|------------|-------------------|------- New Jersey | NULL | NULL | 225 California | 94131 | NULL | 60 California | NULL | NULL | 1397 New York | 10002 | NULL | 3 NULL | NULL | New Jersey | 58 NULL | NULL | Connecticut | 1562 California | 90210 | NULL | 1337 New York | NULL | NULL | 3 NULL | NULL | Colorado | 5 New Jersey | 7081 | NULL | 225 (10 rows)
CUBE
为给定的列生成所有可能的分组,比如 (origin_state, destination_state) 的可能分组为:(origin_state, destination_state),(origin_state),(destination_state),()。
SELECT origin_state, destination_state, sum( package_weight ) FROM shipping GROUP BY CUBE ( origin_state, destination_state ); --等同于 SELECT origin_state, destination_state, sum( package_weight ) FROM shipping GROUP BY GROUPING SETS ( ( origin_state, destination_state ), ( origin_state ), ( destination_state ), ());
ROLLUP
为给定的列集生成部分可能的分类汇总:
SELECT origin_state, origin_zip, sum( package_weight ) FROM shipping GROUP BY ROLLUP ( origin_state, origin_zip ); --等同于 SELECT origin_state, origin_zip, sum( package_weight ) FROM shipping GROUP BY GROUPING SETS ((origin_state,origin_zip ),( origin_state ),());
Group by子句目前不支持使用列的别名,例如:
select count(userid) as num ,dept as aaa from salary group by aaa having sum(sal)>2000;
报错如下:
Query 20210630_084610_00018_wc8n9@default@HetuEngine failed: line 1:63: Column 'aaa' cannot be resolved