更新时间:2025-05-29 GMT+08:00

分组查询

分组查询通常用于配合聚集函数,查询分类统计的信息。常见的聚集函数有总行数count()、求和sum()、平均值avg()、最小值min()、最大值max()。一般和GROUP BY联合使用。

-- 建表并插入数据。
gaussdb=# CREATE TABLE student (id INT,name VARCHAR(20),class INT);
gaussdb=# INSERT INTO student VALUES (1, 'Scott', 1), (2, 'Ben', 1);
gaussdb=# INSERT INTO student VALUES (3, 'Jack', 2),(4, 'Anna', 2),(5, 'Judy', 2);
gaussdb=# INSERT INTO student VALUES (6, 'Sally', 3), (7, 'Jordan', 3);

--使用聚集函数COUNT()统计每个班级有多少人。
gaussdb=# SELECT class, count(*) AS cnt FROM student GROUP BY class; 
 class | cnt 
-------+-----
     2 |   3
     1 |   2
     3 |   2
(3 rows)

--使用HAVING子句过滤人数少于3个的班级数据。
gaussdb=# SELECT class, COUNT(*) AS num FROM student GROUP BY class HAVING COUNT(*) < 3;
 class | num 
-------+-----
     1 |   2
     3 |   2
(2 rows)

--删除。
gaussdb=# DROP TABLE student;
sql_mode中包含ONLY_FULL_GROUP_BY选项,用于校验GROUP BY列表与SELECT列表。
--建表并插入数据。
gaussdb=# CREATE TABLE test (
          id int  primary key NOT NULL,
          id_2 int unique NOT NULL,
          s1 varchar(10),
          s2 int,
          s3 varchar(12)
        );
gaussdb=# INSERT INTO test value(1,101,'a',1000,'aa');
gaussdb=# INSERT INTO test value(2,102,'b',2000,'aaa');
gaussdb=# INSERT INTO test value(3,103,'c',3000,'aaaa');
gaussdb=# INSERT INTO test value(4,104,'b',4000,'aaa');
gaussdb=# INSERT INTO test value(5,105,'c',5000,'aaaaa');
gaussdb=# CREATE TABLE tb1(a int, b int, c int);
gaussdb=# INSERT INTO tb1 VALUES(1, 4, 11);
gaussdb=# INSERT INTO tb1 VALUES(2, 5, 12);
gaussdb=# INSERT INTO tb1 VALUES(2, 6, 13);
gaussdb=# INSERT INTO tb1 VALUES(2, 2, 13);
gaussdb=# CREATE TABLE tb2(a int, b int);
gaussdb=# INSERT INTO tb2 VALUES(2, 7);
gaussdb=# INSERT INTO tb2 VALUES(2, 8);
gaussdb=# INSERT INTO tb2 VALUES(3, 9);

--设置sql_mode。
gaussdb=# SET sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

--SELECT列表中非聚集函数列与GROUP BY字段一致时,不会报错。
gaussdb=# SELECT s1, s3 FROM test GROUP BY s1,s3 ORDER BY s1,s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

gaussdb=# SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
ERROR:  Column "test.s1" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
               ^
--SELECT列表中非聚集函数列与GROUP BY字段不一致时,分两种情况:
--情况一:GROUP BY列表包含主键或唯一非空键时,SQL语句不会报错。
gaussdb=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id ORDER BY id;
 s1 | max 
----+-----
 a  |   2
 b  |   3
 c  |   4
 b  |   3
 c  |   5
(5 rows)

--情况二:SELECT列表中非聚集函数列,都出现GROUP BY列表或WHERE列表中,且WHERE子句中的列需要等于某一常量。
gaussdb=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
 s1 | sum  
----+------
 b  | 6000
(1 row)

--对于同时含有JOIN子句和GROUP BY子句时,存在以下情况:
--情况一:LEFT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可不存在右连接表列,不会报错。如t1 LEFT JOIN t2,GROUP BY 子句只需包含t1.col1。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 1 |  
 2 | 2
(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
ERROR:  Column "tb1.a" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a ...
               ^
--情况二:RIGHT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可不存在左连接表列,不会报错。如t1 RIGHT JOIN t2,GROUP BY 子句只需包含t2.col2。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
   | 3

(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
ERROR:  Column "tb2.a" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a...
                      ^
--情况三:INNER JOIN/CROSS JOIN/STRAIGHT_JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中包含任一列即可,不会报错。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
(1 row)

--设置sql_mode,取消ONLY_FULL_GROUP_BY选项。
gaussdb=# SET SQL_MODE = 'STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

--SELECT列表中非聚集函数列与GROUP BY字段一致时,不会报错。
gaussdb=# SELECT s1, s3 FROM test GROUP BY s1,s3 ORDER BY s1,s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

gaussdb=# SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

--SELECT列表中非聚集函数列与GROUP BY字段不一致时,分两种情况:
--情况一:GROUP BY列表包含主键或唯一非空键时,SQL语句不会报错。
gaussdb=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id ORDER BY id;
 s1 | max 
----+-----
 a  |   2
 b  |   3
 c  |   4
 b  |   3
 c  |   5
(5 rows)

--情况二:SELECT列表中非聚集函数列,都出现GROUP BY列表或WHERE列表中,且WHERE子句中的列需要等于某一常量。
gaussdb=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
 s1 | sum  
----+------
 b  | 6000
(1 row)

--对于同时含有JOIN子句和GROUP BY子句时,存在以下情况:
--情况一:LEFT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可不存在右连接表列,不会报错。如t1 LEFT JOIN t2,GROUP BY 子句只需包含t1.col1。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 1 |  
 2 | 2
(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
 1 |  
(2 rows)

--情况二:RIGHT JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中可不存在左连接表列,不会报错。如t1 LEFT JOIN t2,GROUP BY 子句只需包含t2.col2。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
   | 3

(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
   | 3
(2 rows)

--情况三:INNER JOIN/CROSS JOIN/STRAIGHT_JOIN语法的ON子句中,若条件为两列等值判断,如t1.col1=t2.col2 ...,则GROUP BY子句中包含任一列即可,不报错。
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
(1 row)

--删除。
gaussdb=# DROP TABLE test, tb1, tb2;