更新时间:2024-07-25 GMT+08:00
分享

采集诊断仪表盘模板

采集诊断仪表盘模板支持查看ICAgent采集监控查看ICAgent整体状态查看ICAgent异常监控

前提条件

在LTS控制台配置中心页面的“ICAgent采集开关”页签,开启ICAgent诊断开关,请参考设置ICAgent日志采集开关

查看ICAgent采集监控

  1. 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
  2. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent采集监控”,查看图表详情。

    • 过滤日志组ID,所关联的查询分析语句如下所示:
      select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup
    • 过滤日志流ID,所关联的查询分析语句如下所示:
      select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream
    • 原始数据流量图表所关联的查询分析语句如下所示:
      SELECT
      	case 
      		when diff [ 1 ] is null then '0' 
      		when diff [ 1 ] > 1024 and diff [ 1 ] <= 1024*1024 then concat(round(diff [ 1 ]*1.0/1024,4),' KB')
      		when diff [ 1 ] > 1024*1024 and diff [ 1 ] < 1024*1024*1024 then  concat(round(diff [ 1 ]*1.0/1024/1024,4),' MB')
      		when diff [ 1 ] > 1024*1024*1024 and diff [ 1 ] < 1024*1024*1024*1024 then  concat(round(diff [ 1 ]*1.0/1024/1024/1024,4),' GB')
      		when diff [ 1 ] > 1024*1024*1024*1024 then  concat(round(diff [ 1 ]*1.0/1024,4),' TB')
      		else concat(round(diff [ 1 ]*1.0,2),' B') 
      	END AS "原始流量",
      	case 
      		when diff [ 3 ] is null then '昨日无数据' 
      		else round(diff [ 3 ] - 1,2)
      	END AS "同比昨日" 
      FROM
      	(
      	SELECT
      		report_topic,
      		compare ( traffic, 86400 ) AS diff 
      	FROM
      		( SELECT report_topic, sum( read_bytes ) AS traffic FROM log WHERE report_topic = 'icagent_profile' GROUP BY report_topic ) 
      GROUP BY
      	report_topic)
    • 采集文件数图表所关联的查询分析语句如下所示:
      select diff[1] as "采集文件数" , case when diff[3] is not null then round(diff[3] -1 ,2) else '昨日无数据' end as "同比昨天" from  (select compare(uv,86400) as diff from (select report_topic,count(distinct concat(file_name,host_ip)) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
    • 采集机器数/同比昨天图表所关联的查询分析语句如下所示
      select diff[1] as "采集机器数" ,  case when diff[3] is not null then round(diff[3] -1 ,2) else '昨日无数据' end as "同比昨天" from (select compare(uv,86400) as diff from (select report_topic,count(distinct host_ip) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
    • 数据发送流量图表所关联的查询分析语句如下所示:
      SELECT
      	"time",
      		case 
      		when traffic is null then 0 
      		else round(traffic*1.0/1024/1024,2)
      	END AS "发送流量 MB"
      	from
      	(SELECT
      		time_format( time_floor ( __time, 'PT5M' ), 'yyyy-MM-dd HH:mm' ) AS "time",
      		sum( read_bytes ) AS "traffic" 
      	FROM
      		log 
      	WHERE
      		report_topic = 'icagent_profile' 
      	GROUP BY
      		"time")
    • ICAgent写入次数图表所关联的查询分析语句如下所示:
      select time_floor(__time,'PT5M') as \"time\",sum(read_count) as \"写入次数\" where report_topic = 'icagent_profile'  group by \"time\"
    • 采集机器数图表所关联的查询分析语句如下所示:
      select time_floor(__time,'PT5M') as "time" , count(distinct host_ip) as "采集机器数" where report_topic = 'icagent_profile' group by "time"
    • 采集文件分布图表所关联的查询分析语句如下所示:
      SELECT
      		file_name AS "采集路径",
      		host_ip AS "IP",
      		case 
      			when traffic is null then '0' 
      			when traffic > 1024 and traffic <= 1024*1024 then concat(round(traffic*1.0/1024,2),' KB')
      			when traffic > 1024*1024 and traffic < 1024*1024*1024 then  concat(round(traffic*1.0/1024,2),' MB')
      			when traffic > 1024*1024*1024 and traffic < 1024*1024*1024*1024 then  concat(round(traffic*1.0/1024,2),' GB')
      			when traffic > 1024*1024*1024*1024 then  concat(round(traffic*1.0/1024,2),' TB')
      		else concat(round(traffic*1.0,2),' B') 
      	END AS "采集流量"
      	FROM
      	(SELECT
      		file_name,
      		host_ip,
      		sum( read_bytes ) AS "traffic" 
      	WHERE
      		"report_topic" = 'icagent_profile' 
      	GROUP BY
      		file_name,
      		host_ip)

查看ICAgent整体状态

  1. 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
  2. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent整体状态”,查看图表详情。

    • 活跃ICAgent数图表所关联的查询分析语句如下所示:
      select diff[1] as "活跃ICAgent数",case when diff[2] is not null then diff[2] else '昨日无数据' end as "昨日活跃ICAgent数" from  (select report_topic,compare(uv,86400) as diff from (SELECT   report_topic,COUNT(DISTINCT ip) as uv FROM log where report_topic = 'icagent_status' group by report_topic) group by report_topic)
    • 发送延迟/次数趋势图表所关联的查询分析语句如下所示:
      select time_floor(__time,'PT5M') as \"time\" , sum(\"metric.lts_cost.below_100_ms\") as \"below_100_ms\" , sum(\"metric.lts_cost.100to500ms\") as \"100to500ms\" , sum(\"metric.lts_cost.500msto1s\") as \"500msto1s\" , sum(\"metric.lts_cost.1sto10s\") as \"1sto10s\", sum(\"metric.lts_cost.10ston\") as \"10ston\" from log where \"report_topic\" = 'icagent_status' group by \"time\"
    • 运行状态分布图表所关联的查询分析语句如下所示:
      select status,count(DISTINCT ip) as pv from log where report_topic = 'icagent_status' group by status
    • CPU趋势图表所关联的查询分析语句如下所示:
      select ip,time_floor(__time,'PT5M') as \"time\",avg(\"metric.cpu_usage\") as \"CPU占用率\" from log where report_topic = 'icagent_status' and \"metric.cpu_usage\" is not null group by \"time\",ip order by \"time\"
    • ICAgent整体状态图表所关联的查询分析语句如下所示:
      select host_name as "主机名",ip as "IP" , version as "版本号" , os as "操作系统" , time_format(MILLIS_TO_TIMESTAMP(ANY_VALUE("metric.start_time")),'yyyy/MM/dd HH:mm:ss ZZZ') as "启动时间",avg("metric.cpu_usage") as "CPU",avg("metric.mem_used")*1.0 as "内存(MB)",status as "运行状态" where report_topic = 'icagent_status' group by host_name,ip,version,os,status

查看ICAgent异常监控

  1. 登录云日志服务控制台,在左侧导航栏中选择“仪表盘 ”。
  2. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent异常监控”,查看图表详情。

    • 过滤日志组ID,所关联的查询分析语句如下所示:
      select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup limit 10000
    • 过滤日志流ID,所关联的查询分析语句如下所示:
      select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream limit 10000
    • 关键错误数图表所关联的查询分析语句如下所示:
      select diff[1] as "错误数", case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "错误数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' group by report_topic) group by report_topic)
    • 丢弃超大行图表所关联的查询分析语句如下所示
      select diff[1] as "丢弃行数" ,  case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "丢弃行数对比昨天" from (select report_topic,compare(pv , 400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by report_topic) group by report_topic)
    • 请求LTS失败图表所关联的查询分析语句如下所示:
      select diff[1] as "请求失败数" , case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "请求失败数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'HTTP_REQUEST_ALARM' group by report_topic) group by report_topic)
    • 文件超过上限问题数图表所关联的查询分析语句如下所示:
      select diff[1] as "文件超过上限问题数" ,case when diff[3] is not null then round(diff[3] - 1 , 2) else '昨日无数据' end as "文件超过上限问题数对比昨天" from (select  report_topic,compare(pv , 86400) as diff from (select  report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'COLLECT_FILE_EXCEED' group by report_topic) group by report_topic)
    • 关键错误数(必需处理)图表所关联的查询分析语句如下所示:
      select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
    • 丢弃超大行详情图表所关联的查询分析语句如下所示:
      select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
    • 请求LTS失败详情图表所关联的查询分析语句如下所示:
      select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm'  and alarm_type = 'HTTP_REQUEST_ALARM' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000
    • 文件超过上限问题数详情图表所关联的查询分析语句如下所示:
      select TIME_FORMAT(MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)),'yyyy/MM/dd HH:mm:ss ZZ') as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm'  and alarm_type = 'COLLECT_FILE_EXCEED' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message limit 10000

相关文档