Updated on 2025-05-29 GMT+08:00

Group Queries

A group query is usually used together with aggregate functions to query category statistics. Common aggregate functions include count(), sum(), avg(), min(), and max(). GROUP BY is often used in group queries.

-- Create a table and insert data into the table.
gaussdb=# CREATE TABLE student (id INT,name VARCHAR(20),class INT);
gaussdb=# INSERT INTO student VALUES (1, 'Scott', 1), (2, 'Ben', 1);
gaussdb=# INSERT INTO student VALUES (3, 'Jack', 2),(4, 'Anna', 2),(5, 'Judy', 2);
gaussdb=# INSERT INTO student VALUES (6, 'Sally', 3), (7, 'Jordan', 3);

-- Use the aggregate function COUNT() to count the number of people in each class.
gaussdb=# SELECT class, count(*) AS cnt FROM student GROUP BY class; 
 class | cnt 
-------+-----
     2 |   3
     1 |   2
     3 |   2
(3 rows)

-- Use the HAVING clause to retrieve data of classes with fewer than three people.
gaussdb=# SELECT class, COUNT(*) AS num FROM student GROUP BY class HAVING COUNT(*) < 3;
 class | num 
-------+-----
     1 |   2
     3 |   2
(2 rows)

-- Drop.
gaussdb=# DROP TABLE student;
sql_mode contains the ONLY_FULL_GROUP_BY option, which is used to verify the GROUP BY list and SELECT list.
-- Create a table and insert data into the table.
gaussdb=# CREATE TABLE test (
          id int  primary key NOT NULL,
          id_2 int unique NOT NULL,
          s1 varchar(10),
          s2 int,
          s3 varchar(12)
        );
gaussdb=# INSERT INTO test value(1,101,'a',1000,'aa');
gaussdb=# INSERT INTO test value(2,102,'b',2000,'aaa');
gaussdb=# INSERT INTO test value(3,103,'c',3000,'aaaa');
gaussdb=# INSERT INTO test value(4,104,'b',4000,'aaa');
gaussdb=# INSERT INTO test value(5,105,'c',5000,'aaaaa');
gaussdb=# CREATE TABLE tb1(a int, b int, c int);
gaussdb=# INSERT INTO tb1 VALUES(1, 4, 11);
gaussdb=# INSERT INTO tb1 VALUES(2, 5, 12);
gaussdb=# INSERT INTO tb1 VALUES(2, 6, 13);
gaussdb=# INSERT INTO tb1 VALUES(2, 2, 13);
gaussdb=# CREATE TABLE tb2(a int, b int);
gaussdb=# INSERT INTO tb2 VALUES(2, 7);
gaussdb=# INSERT INTO tb2 VALUES(2, 8);
gaussdb=# INSERT INTO tb2 VALUES(3, 9);

-- Set <b>sql_mode</b>.
gaussdb=# SET sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

-- If the non-aggregate function column in the SELECT list is consistent with the column for GROUP BY, no error is reported.
gaussdb=# SELECT s1, s3 FROM test GROUP BY s1,s3 ORDER BY s1,s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

gaussdb=# SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
ERROR:  Column "test.s1" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
               ^
-- If the non-aggregate function column in the SELECT list is inconsistent with the column for GROUP BY, the following two cases exist:
-- Situation 1: If the GROUP BY list contains a primary key or a unique non-null key, the SQL statement does not report an error.
gaussdb=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id ORDER BY id;
 s1 | max 
----+-----
 a  |   2
 b  |   3
 c  |   4
 b  |   3
 c  |   5
(5 rows)

-- Situation 2: All non-aggregate function columns in the SELECT list are displayed in the GROUP BY or WHERE list. Columns in the WHERE clause must be equal to a constant.
gaussdb=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
 s1 | sum  
----+------
 b  | 6000
(1 row)

-- If both the JOIN and GROUP BY clauses are used, the following situations may occur:
-- Situation 1: In the ON clause of the LEFT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain no right join table column and no error will be reported. For example, in t1 LEFT JOIN t2, the GROUP BY clause only needs to contain t1.col1.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 1 |  
 2 | 2
(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
ERROR:  Column "tb1.a" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a ...
               ^
-- Situation 2: In the ON clause of the RIGHT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain no left join table column and no error will be reported. For example, in t1 RIGHT JOIN t2, the GROUP BY clause only needs to contain t2.col2.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
   | 3

(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
ERROR:  Column "tb2.a" must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a...
                      ^
-- Situation 3: In the ON clause of the INNER JOIN/CROSS JOIN/STRAIGHT_JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain either column and no error will be reported.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
(1 row)

-- Disable ONLY_FULL_GROUP_BY in sql_mode.
gaussdb=# SET SQL_MODE = 'STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

-- If the non-aggregate function column in the SELECT list is consistent with the column for GROUP BY, no error is reported.
gaussdb=# SELECT s1, s3 FROM test GROUP BY s1,s3 ORDER BY s1,s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

gaussdb=# SELECT s1, s3 FROM test GROUP BY s3 ORDER BY s3;
 s1 |  s3   
----+-------
 a  | aa
 b  | aaa
 c  | aaaa
 c  | aaaaa
(4 rows)

-- If the non-aggregate function column in the SELECT list is inconsistent with the column for GROUP BY, the following two cases exist:
-- Situation 1: If the GROUP BY list contains a primary key or a unique non-null key, the SQL statement does not report an error.
gaussdb=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id ORDER BY id;
 s1 | max 
----+-----
 a  |   2
 b  |   3
 c  |   4
 b  |   3
 c  |   5
(5 rows)

-- Situation 2: All non-aggregate function columns in the SELECT list are displayed in the GROUP BY or WHERE list. Columns in the WHERE clause must be equal to a constant.
gaussdb=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
 s1 | sum  
----+------
 b  | 6000
(1 row)

-- If both the JOIN and GROUP BY clauses are used, the following situations may occur:
-- Situation 1: In the ON clause of the LEFT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain no right join table column and no error will be reported. For example, in t1 LEFT JOIN t2, the GROUP BY clause only needs to contain t1.col1.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 1 |  
 2 | 2
(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
 1 |  
(2 rows)

-- Situation 2: In the ON clause of the RIGHT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain no left join table column and no error will be reported. For example, in t1 LEFT JOIN t2, the GROUP BY clause only needs to contain t2.col2.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a ORDER BY tb2.a;
 a | a 
---+---
 2 | 2
   | 3

(2 rows)

gaussdb=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
   | 3
(2 rows)

-- Situation 3: In the ON clause of the INNER JOIN/CROSS JOIN/STRAIGHT_JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2 ..., the GROUP BY clause can contain either column and no error will be reported.
gaussdb=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a ORDER BY tb1.a;
 a | a 
---+---
 2 | 2
(1 row)

-- Drop.
gaussdb=# DROP TABLE test, tb1, tb2;