GROUP BY
GROUP BY
GROUP BY将SELECT语句的输出行划分成包含匹配值的分组。简单的GROUP BY可以包含由输入列组成的任何表达式,也可以是按位置选择输出列的序号。
以下查询是等效的:
SELECT count(*), nationkey FROM customer GROUP BY 2; SELECT count(*), nationkey FROM customer GROUP BY nationkey;
GROUP BY可以按未出现在SELECT语句输出中的输入列名对输出进行分组。
例如:
SELECT count(*) FROM customer GROUP BY mktsegment; GROUPING SETS
可以指定多个列进行分组,结果列中不属于分组列的将被设置为NUll。具有复杂分组语法(GROUPING SETS、CUBE或ROLLUP)的查询只从基础数据源读取一次,而使用UNION ALL的查询将读取基础数据三次。这就是当数据源不具有确定性时,使用UNION ALL的查询可能会产生不一致的结果的原因。
--创建一个航运表
create table shipping(origin_state varchar(25),origin_zip integer,destination_state varchar(25) ,destination_zip integer,package_weight integer);
--插入数据
insert into shipping values ('California',94131,'New Jersey',8648,13),
('California',94131,'New Jersey',8540,42),
('California',90210,'Connecticut',6927,1337),
('California',94131,'Colorado',80302,5),
('New York',10002,'New Jersey',8540,3),
('New Jersey',7081,'Connecticut',6708,225);
--执行查询Grouping sets
SELECT
origin_state,
origin_zip,
destination_state,
sum( package_weight )
FROM shipping
GROUP BY GROUPING SETS (
( origin_state ),
( origin_state, origin_zip ),
( destination_state ));
--这个的查询在逻辑上等同于多个分组查询的union all
SELECT origin_state, NULL,NULL,sum( package_weight ) FROM shipping GROUP BY origin_state UNION ALL SELECT origin_state,origin_zip,NULL,sum( package_weight ) FROM shipping GROUP BY origin_state,origin_zip UNION ALL SELECT NULL,NULL,destination_state,sum( package_weight ) FROM shipping GROUP BY destination_state;
--结果
origin_state | origin_zip | destination_state | _col3
--------------|------------|-------------------|-------
New Jersey | NULL | NULL | 225
California | 94131 | NULL | 60
California | NULL | NULL | 1397
New York | 10002 | NULL | 3
NULL | NULL | New Jersey | 58
NULL | NULL | Connecticut | 1562
California | 90210 | NULL | 1337
New York | NULL | NULL | 3
NULL | NULL | Colorado | 5
New Jersey | 7081 | NULL | 225
(10 rows)
CUBE
为给定的列生成所有可能的分组,比如 (origin_state, destination_state) 的可能分组为:(origin_state, destination_state),(origin_state),(destination_state),()。
SELECT origin_state, destination_state, sum( package_weight ) FROM shipping GROUP BY CUBE ( origin_state, destination_state ); --等同于 SELECT origin_state, destination_state, sum( package_weight ) FROM shipping GROUP BY GROUPING SETS ( ( origin_state, destination_state ), ( origin_state ), ( destination_state ), ());
ROLLUP
为给定的列集生成部分可能的分类汇总:
SELECT origin_state, origin_zip, sum( package_weight ) FROM shipping GROUP BY ROLLUP ( origin_state, origin_zip ); --等同于 SELECT origin_state, origin_zip, sum( package_weight ) FROM shipping GROUP BY GROUPING SETS ((origin_state,origin_zip ),( origin_state ),());
Group by 子句目前不支持使用列的别名,例如:
select count(userid) as num ,dept as aaa from salary group by aaa having sum(sal)>2000;
报错如下:
Query 20210630_084610_00018_wc8n9@default@HetuEngine failed: line 1:63: Column 'aaa' cannot be resolved