更新时间:2024-07-25 GMT+08:00
采集诊断仪表盘模板
采集诊断仪表盘模板支持查看ICAgent采集监控、查看ICAgent整体状态和查看ICAgent异常监控。
前提条件
在LTS控制台配置中心页面的“ICAgent采集开关”页签,开启ICAgent诊断开关,请参考设置ICAgent日志采集开关。
查看ICAgent采集监控
- 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
- 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent采集监控”,查看图表详情。
- 过滤日志组ID,所关联的查询分析语句如下所示:
select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup
- 过滤日志流ID,所关联的查询分析语句如下所示:
select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream
- 原始数据流量图表所关联的查询分析语句如下所示:
SELECT case when diff [ 1 ] is null then '0' when diff [ 1 ] > 1024 and diff [ 1 ] <= 1024*1024 then concat(round(diff [ 1 ]*1.0/1024,4),' KB') when diff [ 1 ] > 1024*1024 and diff [ 1 ] < 1024*1024*1024 then concat(round(diff [ 1 ]*1.0/1024/1024,4),' MB') when diff [ 1 ] > 1024*1024*1024 and diff [ 1 ] < 1024*1024*1024*1024 then concat(round(diff [ 1 ]*1.0/1024/1024/1024,4),' GB') when diff [ 1 ] > 1024*1024*1024*1024 then concat(round(diff [ 1 ]*1.0/1024,4),' TB') else concat(round(diff [ 1 ]*1.0,2),' B') END AS "原始流量", case when diff [ 3 ] is null then '昨日无数据' else round(diff [ 3 ] - 1,2) END AS "同比昨日" FROM ( SELECT report_topic, compare ( traffic, 86400 ) AS diff FROM ( SELECT report_topic, sum( read_bytes ) AS traffic FROM log WHERE report_topic = 'icagent_profile' GROUP BY report_topic ) GROUP BY report_topic)
- 采集文件数图表所关联的查询分析语句如下所示:
select diff[1] as "采集文件数" , case when diff[3] is not null then round(diff[3] -1 ,2) else '昨日无数据' end as "同比昨天" from (select compare(uv,86400) as diff from (select report_topic,count(distinct concat(file_name,host_ip)) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
- 采集机器数/同比昨天图表所关联的查询分析语句如下所示
select diff[1] as "采集机器数" , case when diff[3] is not null then round(diff[3] -1 ,2) else '昨日无数据' end as "同比昨天" from (select compare(uv,86400) as diff from (select report_topic,count(distinct host_ip) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
- 数据发送流量图表所关联的查询分析语句如下所示:
SELECT "time", case when traffic is null then 0 else round(traffic*1.0/1024/1024,2) END AS "发送流量 MB" from (SELECT time_format( time_floor ( __time, 'PT5M' ), 'yyyy-MM-dd HH:mm' ) AS "time", sum( read_bytes ) AS "traffic" FROM log WHERE report_topic = 'icagent_profile' GROUP BY "time")
- ICAgent写入次数图表所关联的查询分析语句如下所示:
select time_floor(__time,'PT5M') as \"time\",sum(read_count) as \"写入次数\" where report_topic = 'icagent_profile' group by \"time\"
- 采集机器数图表所关联的查询分析语句如下所示:
select time_floor(__time,'PT5M') as "time" , count(distinct host_ip) as "采集机器数" where report_topic = 'icagent_profile' group by "time"
- 采集文件分布图表所关联的查询分析语句如下所示:
SELECT file_name AS "采集路径", host_ip AS "IP", case when traffic is null then '0' when traffic > 1024 and traffic <= 1024*1024 then concat(round(traffic*1.0/1024,2),' KB') when traffic > 1024*1024 and traffic < 1024*1024*1024 then concat(round(traffic*1.0/1024,2),' MB') when traffic > 1024*1024*1024 and traffic < 1024*1024*1024*1024 then concat(round(traffic*1.0/1024,2),' GB') when traffic > 1024*1024*1024*1024 then concat(round(traffic*1.0/1024,2),' TB') else concat(round(traffic*1.0,2),' B') END AS "采集流量" FROM (SELECT file_name, host_ip, sum( read_bytes ) AS "traffic" WHERE "report_topic" = 'icagent_profile' GROUP BY file_name, host_ip)
- 过滤日志组ID,所关联的查询分析语句如下所示:
查看ICAgent整体状态
- 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
- 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent整体状态”,查看图表详情。
- 活跃ICAgent数图表所关联的查询分析语句如下所示:
select diff[1] as "活跃ICAgent数",case when diff[2] is not null then diff[2] else '昨日无数据' end as "昨日活跃ICAgent数" from (select report_topic,compare(uv,86400) as diff from (SELECT report_topic,COUNT(DISTINCT ip) as uv FROM log where report_topic = 'icagent_status' group by report_topic) group by report_topic)
- 发送延迟/次数趋势图表所关联的查询分析语句如下所示:
select time_floor(__time,'PT5M') as \"time\" , sum(\"metric.lts_cost.below_100_ms\") as \"below_100_ms\" , sum(\"metric.lts_cost.100to500ms\") as \"100to500ms\" , sum(\"metric.lts_cost.500msto1s\") as \"500msto1s\" , sum(\"metric.lts_cost.1sto10s\") as \"1sto10s\", sum(\"metric.lts_cost.10ston\") as \"10ston\" from log where \"report_topic\" = 'icagent_status' group by \"time\"
- 运行状态分布图表所关联的查询分析语句如下所示:
select status,count(DISTINCT ip) as pv from log where report_topic = 'icagent_status' group by status
- CPU趋势图表所关联的查询分析语句如下所示:
select ip,time_floor(__time,'PT5M') as \"time\",avg(\"metric.cpu_usage\") as \"CPU占用率\" from log where report_topic = 'icagent_status' and \"metric.cpu_usage\" is not null group by \"time\",ip order by \"time\"
- ICAgent整体状态图表所关联的查询分析语句如下所示:
select host_name as "主机名",ip as "IP" , version as "版本号" , os as "操作系统" , time_format(MILLIS_TO_TIMESTAMP(ANY_VALUE("metric.start_time")),'yyyy/MM/dd HH:mm:ss ZZZ') as "启动时间",avg("metric.cpu_usage") as "CPU",avg("metric.mem_used")*1.0 as "内存(MB)",status as "运行状态" where report_topic = 'icagent_status' group by host_name,ip,version,os,status
- 活跃ICAgent数图表所关联的查询分析语句如下所示:
查看ICAgent异常监控
- 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
- 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent异常监控”,查看图表详情。
- 过滤日志组ID,所关联的查询分析语句如下所示:
select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup limit 10000
- 过滤日志流ID,所关联的查询分析语句如下所示:
select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream limit 10000
- 关键错误数图表所关联的查询分析语句如下所示:
select diff[1] as "错误数", case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "错误数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' group by report_topic) group by report_topic)
- 丢弃超大行图表所关联的查询分析语句如下所示
select diff[1] as "丢弃行数" , case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "丢弃行数对比昨天" from (select report_topic,compare(pv , 400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by report_topic) group by report_topic)
- 请求LTS失败图表所关联的查询分析语句如下所示:
select diff[1] as "请求失败数" , case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "请求失败数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'HTTP_REQUEST_ALARM' group by report_topic) group by report_topic)
- 文件超过上限问题数图表所关联的查询分析语句如下所示:
select diff[1] as "文件超过上限问题数" ,case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "文件超过上限问题数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'COLLECT_FILE_EXCEED' group by report_topic) group by report_topic)
- 关键错误数(必需处理)图表所关联的查询分析语句如下所示:
select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" , host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
- 丢弃超大行详情图表所关联的查询分析语句如下所示:
select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" , host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
- 请求LTS失败详情图表所关联的查询分析语句如下所示:
select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" , host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' and alarm_type = 'HTTP_REQUEST_ALARM' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
- 文件超过上限问题数详情图表所关联的查询分析语句如下所示:
select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" , host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' and alarm_type = 'COLLECT_FILE_EXCEED' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
- 过滤日志组ID,所关联的查询分析语句如下所示:
父主题: 日志仪表盘模板