更新时间:2024-09-13 GMT+08:00
GaussDB(DWS)常用运维命令集
本章节仅列出运维常用命令,其中查看的系统对象可根据实际情况灵活变通,查询返回的具体字段含义,请参考《开发指南》中关于对应系统表、系统视图、系统函数的介绍。
查看运维状态类
前提条件:正常连接上DWS集群。
- 查看当前业务整体运行情况。
1
select coorname, usename, client_addr, sysdate-query_start as duration, state, enqueue,waiting, pid, query_id, substr(query,1,60) from pgxc_stat_activity where usename != 'Ruby' and usename != 'omm' and state = 'active' order by duration desc;
- 查看当前业务整体并发情况。
1
select usename,coorname,enqueue,state,count(*) from pgxc_stat_activity where usename <> 'omm' and usename <> 'Ruby' group by 1,2,3,4 order by 4,5 desc limit 30;
- 查看当前集群内部整体等待状态。
1
select wait_status,wait_event,count(*) as cnt from pgxc_thread_wait_status where wait_status <> 'wait cmd' and wait_status <> 'synchronize quit' and wait_status <> 'none' and wait_status <> 'wait stream task' group by 1,2 order by 3 desc limit 50;
- 查看当前集群资源池业务运行信息(配置资源管控场景)。
1
select s.resource_pool as rpname, count(1) as session_cnt,sum(case when a.state = 'active' then 1 else 0 end) as active_cnt,sum(case when s.enqueue ='global' then 1 else 0 end) as global_wait,sum(case when s.lane = 'fast' and s.status = 'running' then 1 else 0 end) as fast_run,sum(case when s.lane = 'fast' and s.status = 'pending' and s.enqueue not in ('global','none') then 1 else 0 end) as fast_wait,sum(case when s.lane = 'slow' and s.status = 'running' then 1 else 0 end) as slow_run,sum(case when s.lane = 'slow' and s.status = 'pending' and s.enqueue not in ('global','none') then 1 else 0 end) as slow_wait,sum(case when s.status = 'running' then s.statement_mem else 0 end) as est_mem from pg_catalog.pgxc_session_wlmstat s,pg_catalog.pgxc_stat_activity a where s.threadid=a.pid(+) and s.attribute != 'internal' and s.resource_pool != 'root' group by 1;
- 查看当前集群动态内存水位。
1
select a.nodename,a.memorymbytes as dynamic_used_memory,b.memorymbytes as max_dynamic_memory, dynamic_used_memory/max_dynamic_memory*100 as used_rate from pgxc_total_memory_detail a join pgxc_total_memory_detail b on a.nodename=b.nodename where a.memorytype = 'dynamic_used_memory' and b.memorytype = 'max_dynamic_memory' order by a.memorymbytes desc;
- 查看各类线程内存使用情况(从•查看当前集群动态内存水位。查询到动态内存高水位的CN/DN节点,通过连接该节点查询)。
1
select b.state, sum(totalsize) as totalsize, sum(freesize) as freesize, sum(usedsize) as usedsize from pv_session_memory_detail a , pg_stat_activity b where split_part(a.sessid,'.',2) = b.pid group by b.state order by totalsize desc limit 20;
- 查看当前实例每个session使用内存(从•查看当前集群动态内存水位。查询到动态内存高水位的CN/DN节点,通过连接该节点查询)。
1
select split_part(pv_session_memory_detail.sessid,'.',2) pid,pg_size_pretty(sum(totalsize)) total_size,count(*) context_count from pv_session_memory_detail group by pid order by sum(totalsize) desc;
- 查看当前实例每个SQL使用内存(从•查看当前集群动态内存水位。查询到动态内存高水位的CN/DN节点,通过连接该节点查询)。
1
select sessid, contextname, level,parent, pg_size_pretty(totalsize) as total ,pg_size_pretty(freesize) as freesize, pg_size_pretty(usedsize) as usedsize, datname,query_id, query from pv_session_memory_detail a , pg_stat_activity b where split_part(a.sessid,'.',2) = b.pid order by totalsize desc limit 100;
应急恢复类
应急类操中涉及业务影响的操作均需要和客户确认后实施,禁止自行直接操作。
前提条件:正常连接上DWS集群。
- 单语句查杀,被查杀语句pid一般通过•查看当前业务整体运行情况。获取的pid字段信息。
1 2
execute direct on(cn_name) 'select pg_cancel_backend(被查杀语句pid)'; execute direct on(cn_name) 'select pg_terminate_backend(被查杀语句pid)';
- 批量拼接查杀语句(仅拼接查杀命令,不执行查杀命令)。
1
select 'execute direct on(' || coorname || ') ''select pg_terminate_backend(' || pid || ')'';',sysdate-query_start as dur, substr(query,1,60) from pgxc_stat_activity where usename != 'omm' and usename != 'Ruby' and state = 'active' order by dur desc limit 30;
- 清理空闲连接。
1 2
clean connection to all for database xxx; --清理idle的空闲连接 select * from pgxc_clean_free_conn(); --清理pooler缓存连接
- 修复CCN计数(连接CCN执行)。
1 2
select * from pg_stat_get_workload_struct_info(); --保留CCN信息 select gs_wlm_node_recover(true); --修复CCN计数
- 锁定异常用户。
1 2
alter user usename account lock; --锁用户 alter user usename account unlock; --解锁用户
- 业务加入黑名单操作。
1 2 3
select * from gs_append_blocklist(unique_sql_id); --加黑名单 select * from gs_blocklist_query; --查询现有黑名单 select gs_remove_blocklist(unique_sql_id); --解除黑名单
业务分析类
前提条件:正常连接上DWS集群。
- 查看正在执行SQL的等待视图,实际queryid一般通过•查看当前业务整体运行情况。获取的query_id字段信息。
1
select * from pgxc_thread_wait_status where query_id = 实际queryid order by node_name,wait_status,wait_event;
- 查看正在运行SQL的运行过程信息,实际queryid一般通过•查看当前业务整体运行情况。获取的query_id字段信息。
1
select * from pgxc_wlm_session_statistics where queryid = 实际queryid ;
- 查看历史SQL的运行信息,实际queryid一般通过•查看当前业务整体运行情况。获取的query_id字段信息。
1
select * from pgxc_wlm_session_info where queryid = 实际queryid;
- 查看单表倾斜信息。
1
select * from table_distribution('schema_name','table_name');
- 查看单表脏页率信息。
1
select c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, cast( (n_dead_tup / (n_live_tup + n_dead_tup + 0.0001) * 100) AS numeric(5,2)) AS dirty_page_rate from pg_class c LEFT JOIN pg_namespace n ON n.oid = c.relnamespace where c.oid = 'schema_name.table_name'::regclass::oid;
- 查看表定义、索引信息。
1
select pg_get_tabledef('schema_name.table_name');
- 查看表大小。
1
select pg_size_pretty(pg_table_size('schema_name.table_name'));
- 查看表的创建、修改和最近一次analyze时间。
1
select * from pg_object where object_oid='schema_name.table_name'::regclass;
- 查看详细脏数据信息。
1 2 3 4 5 6 7
start transaction read only; set enable_show_any_tuples = true; set enable_indexscan = off; set enable_bitmapscan = off; select ctid,xmin,xmax,pgxc_is_committed(xmin),pgxc_is_committed(xmax),oid,* from schema_name.table_name; select xmin,xmax,ctid, * from pgxc_node; rollback;
父主题: GaussDB(DWS)集群运维