分组查询
分组查询通常用于配合聚合函数,查询分类统计的信息。常见的聚合函数有总行数count()、求和sum()、平均值avg()、最小值min()、最大值max()。一般和GROUP BY联合使用。
若想对分组的查询结果进行统计,则可以将WITH ROLLUP添加到GROUP BY语句中,WITH ROLLUP可以对多列分组查询结果进行统计。
-- 建表并插入数据。
m_db=# CREATE TABLE student (id INT,name VARCHAR(20),class INT);
m_db=# INSERT INTO student VALUES (1, 'Scott', 1), (2, 'Ben', 1);
m_db=# INSERT INTO student VALUES (3, 'Jack', 2),(4, 'Anna', 2),(5, 'Judy', 2);
m_db=# INSERT INTO student VALUES (6, 'Sally', 3), (7, 'Jordan', 3);
--使用聚合函数COUNT()统计每个班级有多少人。
m_db=# SELECT class, count(*) AS cnt FROM student GROUP BY class;
class | cnt
-------+-----
2 | 3
1 | 2
3 | 2
(3 rows)
--使用WITH ROLLUP语法对按班级分组统计后的人数进行汇总,汇总数据在最后一列。
m_db=# SELECT class, count(*) AS cnt FROM student GROUP BY class WITH ROLLUP;
class | cnt
-------+-----
1 | 2
2 | 3
3 | 2
| 7
(4 rows)
--使用WITH ROLLUP语法对按班级,姓名分组统计后的人数进行汇总,汇总数据在最后一列。
解释输出:对每个班级的姓名分组的后面,产生一个额外的统计行,意为每个班的人数。最后在其他行后面,产生一个额外的统计行,意为所有班的总人数。
m_db=# SELECT class,name, count(*) AS cnt FROM student GROUP BY class,name WITH ROLLUP;
class | name | cnt
-------+--------+-----
1 | Ben | 1
1 | Scott | 1
1 | | 2
2 | Anna | 1
2 | Jack | 1
2 | Judy | 1
2 | | 3
3 | Jordan | 1
3 | Sally | 1
3 | | 2
| | 7
(11 rows)
--使用HAVING子句过滤人数少于3个的班级数据。
m_db=# SELECT class, COUNT(*) AS num FROM student GROUP BY class HAVING COUNT(*) < 3;
class | num
-------+-----
1 | 2
3 | 2
(2 rows)
--删除。
m_db=# DROP TABLE student;
-- 建表并插入数据。
m_db=# CREATE TABLE test (
id int primary key NOT NULL,
id_2 int NOT NULL,
s1 varchar(10),
s2 int,
s3 varchar(12)
);
m_db=# ALTER TABLE test ADD CONSTRAINT unique_id_2 UNIQUE(id_2);
m_db=# INSERT INTO test value(1,101,'a',1000,'aa');
m_db=# INSERT INTO test value(2,102,'b',2000,'aaa');
m_db=# INSERT INTO test value(3,103,'c',3000,'aaaa');
m_db=# INSERT INTO test value(4,104,'b',4000,'aaa');
m_db=# INSERT INTO test value(5,105,'c',5000,'aaaaa');
m_db=# CREATE TABLE tb1(a int, b int, c int);
m_db=# INSERT INTO tb1 VALUES(1, 4, 11);
m_db=# INSERT INTO tb1 VALUES(2, 5, 12);
m_db=# INSERT INTO tb1 VALUES(2, 6, 13);
m_db=# INSERT INTO tb1 VALUES(2, 2, 13);
m_db=# CREATE TABLE tb2(a int, b int);
m_db=# INSERT INTO tb2 VALUES(2, 7);
m_db=# INSERT INTO tb2 VALUES(2, 8);
m_db=# INSERT INTO tb2 VALUES(3, 9);
-- 设置SQL MODE。
m_db=# SET SQL_MODE = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';
--SELECT列表中非聚合函数列与GROUP BY字段一致时,不会报错。
m_db=# SELECT s1, s3 FROM test GROUP BY s1,s3;
s1 | s3
----+-------
c | aaaaa
b | aaa
c | aaaa
a | aa
(4 rows)
m_db=# SELECT s1, s3 FROM test GROUP BY s3;
ERROR: Column test.s1 must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT s1, s3 FROM test GROUP BY s3;
^
--SELECT列表中非聚合函数列与GROUP BY字段不一致时,分两种情况:
--1)GROUP BY列表包含主键或唯一非空键时,SQL语句不会报错。
m_db=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id;
s1 | max
----+-----
c | 4
c | 5
a | 2
b | 3
b | 3
(5 rows)
--2)SELECT列表中非聚合函数列,都出现GROUP BY列表或WHERE列表中。且WHERE子句中的列需要等于某一常量。
m_db=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
s1 | sum
----+------
b | 6000
(1 row)
--对于同时含有JOIN子句和GROUP BY子句时:
--1)LEFT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可无右连接表列,不报错。如t1 LEFT JOIN t2,GROUP BY 子句只需包含t1.col1。
m_db=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
a | a
---+---
1 |
2 | 2
(2 rows)
m_db=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
ERROR: Column tb1.a must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a ...
^
--2)RIGHT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可无左连接表列,不报错。如t1 RIGHT JOIN t2,GROUP BY 子句只需包含t2.col2。
m_db=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
a | a
---+---
| 3
2 | 2
(2 rows)
m_db=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
ERROR: Column tb2.a must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a...
^
--3)INNER JOIN/CROSS JOIN/STRAIGHT_JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中包含任一列即可,不报错。
m_db=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
a | a
---+---
2 | 2
(1 row)
m_db=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
a | a
---+---
2 | 2
(1 row)
--删除。
m_db=# DROP TABLE test, tb1, tb2;