Updated on 2025-10-23 GMT+08:00

Group Queries

A group query is usually used together with aggregate functions to query category statistics. Common aggregate functions include count(), sum(), avg(), min(), and max(). GROUP BY is often used in group queries.

To collect statistics on grouped query results, add WITH ROLLUP to the GROUP BY statement. WITH ROLLUP can collect statistics on grouped query results of multiple columns.

-- Create a table and insert data into the table.
m_db=# CREATE TABLE student (id INT,name VARCHAR(20),class INT);
m_db=# INSERT INTO student VALUES (1, 'Scott', 1), (2, 'Ben', 1);
m_db=# INSERT INTO student VALUES (3, 'Jack', 2),(4, 'Anna', 2),(5, 'Judy', 2);
m_db=# INSERT INTO student VALUES (6, 'Sally', 3), (7, 'Jordan', 3);

-- Use the aggregate function COUNT() to count the number of people in each class.
m_db=# SELECT class, count(*) AS cnt FROM student GROUP BY class; 
 class | cnt 
-------+-----
     2 |   3
     1 |   2
     3 |   2
(3 rows)

-- Use the WITH ROLLUP syntax to summarize the number of people grouped by class. The summary data is in the last column.
m_db=# SELECT class, count(*) AS cnt FROM student GROUP BY class WITH ROLLUP;
 class | cnt 
-------+-----
     1 |   2
     2 |   3
     3 |   2
       |   7
(4 rows)

-- Use the WITH ROLLUP syntax to summarize the number of people grouped by class and name. The summary data is in the last column.
Output interpretation: An additional statistics row is generated under each class, indicating the number of people in each class. An additional statistics row is generated at the end, indicating the total number of people in all classes.
m_db=# SELECT class,name, count(*) AS cnt FROM student GROUP BY class,name WITH ROLLUP;
 class |  name  | cnt 
-------+--------+-----
     1 | Ben    |   1
     1 | Scott  |   1
     1 |        |   2
     2 | Anna   |   1
     2 | Jack   |   1
     2 | Judy   |   1
     2 |        |   3
     3 | Jordan |   1
     3 | Sally  |   1
     3 |        |   2
       |        |   7
(11 rows)

-- Use the HAVING clause to retrieve data of classes with less than three people.
m_db=# SELECT class, COUNT(*) AS num FROM student GROUP BY class HAVING COUNT(*) < 3;
 class | num 
-------+-----
     1 |   2
     3 |   2
(2 rows)

-- Drop.
m_db=# DROP TABLE student;
sql_mode contains the ONLY_FULL_GROUP_BY option, which is used to verify the GROUP BY list and SELECT list.
-- Create a table and insert data into the table.
m_db=# CREATE TABLE test (
          id int  primary key NOT NULL,
          id_2 int NOT NULL,
          s1 varchar(10),
          s2 int,
          s3 varchar(12)
        );
m_db=# ALTER TABLE test ADD CONSTRAINT unique_id_2 UNIQUE(id_2);
m_db=# INSERT INTO test value(1,101,'a',1000,'aa');
m_db=# INSERT INTO test value(2,102,'b',2000,'aaa');
m_db=# INSERT INTO test value(3,103,'c',3000,'aaaa');
m_db=# INSERT INTO test value(4,104,'b',4000,'aaa');
m_db=# INSERT INTO test value(5,105,'c',5000,'aaaaa');
m_db=# CREATE TABLE tb1(a int, b int, c int);
m_db=# INSERT INTO tb1 VALUES(1, 4, 11);
m_db=# INSERT INTO tb1 VALUES(2, 5, 12);
m_db=# INSERT INTO tb1 VALUES(2, 6, 13);
m_db=# INSERT INTO tb1 VALUES(2, 2, 13);
m_db=# CREATE TABLE tb2(a int, b int);
m_db=# INSERT INTO tb2 VALUES(2, 7);
m_db=# INSERT INTO tb2 VALUES(2, 8);
m_db=# INSERT INTO tb2 VALUES(3, 9);

-- Set an SQL mode.
m_db=# SET SQL_MODE = 'ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

-- If the non-aggregate function column in the SELECT list is consistent with the GROUP BY column, no error is reported.
m_db=# SELECT s1, s3 FROM test GROUP BY s1,s3;
 s1 |  s3   
----+-------
 c  | aaaaa
 b  | aaa
 c  | aaaa
 a  | aa
(4 rows)

m_db=# SELECT s1, s3 FROM test GROUP BY s3;
ERROR:  Column test.s1 must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT s1, s3 FROM test GROUP BY s3;
               ^
-- If the non-aggregate function column in the SELECT list is inconsistent with the GROUP BY column, the following two cases exist:
-- 1. If the GROUP BY list contains a primary key or a unique not-null key, the SQL statement does not report an error.
m_db=# SELECT s1, MAX(LENGTH(s3)) FROM test GROUP BY id;
 s1 | max 
----+-----
 c  |   4
 c  |   5
 a  |   2
 b  |   3
 b  |   3
(5 rows)

-- 2. All non-aggregate function columns in the SELECT list are displayed in the GROUP BY or WHERE list. Columns in the WHERE clause must be equal to a constant.
m_db=# SELECT s1, SUM(s2) FROM test WHERE s1 = 'b' GROUP BY s1;
 s1 | sum  
----+------
 b  | 6000
(1 row)

-- If both the JOIN and GROUP BY clauses are used:
-- 1. In the ON clause of the LEFT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2, the GROUP BY clause does not contain the right join table column and no error is reported. For example, in t1 LEFT JOIN t2, the GROUP BY clause contains only t1.col1.
m_db=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
 a | a 
---+---
 1 |  
 2 | 2
(2 rows)

m_db=# SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
ERROR:  Column tb1.a must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 LEFT JOIN tb2 ON tb1.a = tb2.a ...
               ^
-- 2. In the ON clause of the RIGHT JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2, the GROUP BY clause does not contain the left join table column and no error is reported. For example, in t1 RIGHT JOIN t2, the GROUP BY clause contains only t2.col2.
m_db=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
 a | a 
---+---
   | 3
 2 | 2
(2 rows)

m_db=# SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
ERROR:  Column tb2.a must appear in the GROUP BY clause or be used in an aggregate function.
LINE 1: SELECT tb1.a, tb2.a FROM tb1 RIGHT JOIN tb2 ON tb1.a = tb2.a...
                      ^
-- 3. In the ON clause of the INNER JOIN/CROSS JOIN/STRAIGHT_JOIN syntax, if the condition is to check whether two columns are equal, for example, t1.col1=t2.col2, the GROUP BY clause can contain any column and no error is reported.
m_db=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb1.a;
 a | a 
---+---
 2 | 2
(1 row)

m_db=# SELECT tb1.a, tb2.a FROM tb1 INNER JOIN tb2 ON tb1.a = tb2.a GROUP BY tb2.a;
 a | a 
---+---
 2 | 2
(1 row)

-- Drop.
m_db=# DROP TABLE test, tb1, tb2;