更新时间:2025-09-08 GMT+08:00
分享

采集诊断仪表盘模板

采集诊断仪表盘模板支持查看ICAgent采集监控查看ICAgent整体状态查看ICAgent异常监控

前提条件

在LTS控制台配置中心页面的“ICAgent采集开关”页签,开启ICAgent诊断开关,请参考设置ICAgent日志采集开关

查看ICAgent采集监控

  1. 登录云日志服务控制台,进入“日志管理”页面。
  2. 在左侧导航栏中选择“仪表盘”。
  3. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent采集监控”,查看图表详情。

    • 过滤日志组ID,所关联的查询分析语句如下所示:
      * | select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup
    • 过滤日志流ID,所关联的查询分析语句如下所示:
      * | select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream
    • 原始数据流量图表所关联的查询分析语句如下所示:
      * | SELECT
       case 
        when diff [ 1 ] is null then '0' 
        when diff [ 1 ] > 1024 and diff [ 1 ] <= 1024*1024 then concat(cast(round(diff [ 1 ]*1.0/1024,4) as decimal(12,4)),' KB')
        when diff [ 1 ] > 1024*1024 and diff [ 1 ] < 1024*1024*1024 then  concat(cast(round(diff [ 1 ]*1.0/1024/1024,4) as decimal(12,4)),' MB')
        when diff [ 1 ] > 1024*1024*1024 and diff [ 1 ] < cast(1024 as bigint)*1024*1024*1024 then  concat(cast(round(diff [ 1 ]*1.0/1024/1024/1024,4) as decimal(12,4)),' GB')
        when diff [ 1 ] > cast(1024 as bigint)*1024*1024*1024 then  concat(cast(round(diff [ 1 ]*1.0/1024,4) as decimal(12,4)),' TB')
        else concat(cast(round(diff [ 1 ]*1.0,4) as decimal(12,4)),' B') 
       END AS "原始流量",
       case 
        when diff [ 3 ] is null then '昨日无数据' 
        else cast(cast(round(diff [ 3 ] - 1,2) as decimal(12,2)) as varchar)
       END AS "同比昨日" 
      FROM
       (
       SELECT
        report_topic,
        compare ( traffic, 86400 ) AS diff 
       FROM
        ( SELECT report_topic, sum( read_bytes ) AS traffic FROM log WHERE report_topic = 'icagent_profile' GROUP BY report_topic ) 
      GROUP BY
       report_topic)
    • 采集文件数图表所关联的查询分析语句如下所示:
      * | select diff[1] as "采集文件数" , case when diff[3] is not null then cast(cast(round(diff [ 3 ] - 1,2) as decimal(12,2)) as varchar) else '昨日无数据' end as "同比昨天" from  (select compare(uv,86400) as diff from (select report_topic,count(distinct concat(file_name,host_ip)) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
    • 采集机器数/同比昨天图表所关联的查询分析语句如下所示
      * | select diff[1] as "采集机器数" ,  case when diff[3] is not null then cast(cast(round(diff [ 3 ] - 1,2) as decimal(12,2)) as varchar) else '昨日无数据' end as "同比昨天" from (select compare(uv,86400) as diff from (select report_topic,count(distinct host_ip) as uv from log where report_topic = 'icagent_profile' group by report_topic) group by report_topic)
    • 数据发送流量图表所关联的查询分析语句如下所示:
      report_topic = "icagent_profile"  | SELECT
       "time",
        case 
        when traffic is null then 0 
        else round(traffic*1.0/1024/1024,2)
       END AS "发送流量 MB"
       from
       (SELECT
        time_floor ( __time, 'PT5M' ) AS "time",
        sum( read_bytes ) AS "traffic" 
       FROM
        log 
       GROUP BY
        "time") order by "time"
    • ICAgent写入次数图表所关联的查询分析语句如下所示:
      * | select time_floor(__time,'PT5M') as \"time\",sum(read_count) as \"写入次数\" where report_topic = 'icagent_profile'  group by \"time\" order by \"time\"
    • 采集机器数图表所关联的查询分析语句如下所示:
      * | select time_floor(__time,'PT5M') as "time" , count(distinct host_ip) as "采集机器数" where report_topic = 'icagent_profile' group by "time" order by "time"
    • 采集文件分布图表所关联的查询分析语句如下所示:
      * | SELECT
        file_name AS "采集路径",
        host_ip AS "IP",
        case 
         when traffic is null then '0' 
         when traffic > 1024 and traffic <= 1024*1024 then concat(cast(round(traffic*1.0/1024,4) as decimal(12,4)),' KB')
         when traffic > 1024*1024 and traffic < 1024*1024*1024 then  concat(cast(round(traffic*1.0/1024,4) as decimal(12,4)),' MB')
         when traffic > 1024*1024*1024 and traffic < cast(1024 as bigint)*1024*1024*1024 then  concat(cast(round(traffic*1.0/1024,4) as decimal(12,4)),' GB')
         when traffic > cast(1024 as bigint)*1024*1024*1024 then  concat(cast(round(traffic*1.0/1024,2) as decimal(12,4)),' TB')
        else concat(cast(round(traffic*1.0,4) as decimal(12,4)),' B') 
       END AS "采集流量"
       FROM
       (SELECT
        file_name,
        host_ip,
        sum( read_bytes ) AS "traffic" 
       WHERE
        "report_topic" = 'icagent_profile' 
       GROUP BY
        file_name,
        host_ip)

查看ICAgent整体状态

  1. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent整体状态”,查看图表详情。

    • 活跃ICAgent数图表所关联的查询分析语句如下所示:
      * | select diff[1] as "活跃ICAgent数",case when diff[2] is not null then cast(diff[2] as varchar) else '昨日无数据' end as "昨日活跃ICAgent数" from  (select report_topic,compare(uv,86400) as diff from (SELECT   report_topic,COUNT(DISTINCT ip) as uv FROM log where report_topic = 'icagent_status' group by report_topic) group by report_topic)
    • 发送延迟/次数趋势图表所关联的查询分析语句如下所示:
      * | select time_floor(__time,'PT5M') as "time" , sum("metric.lts_cost.below_100_ms") as "below_100_ms" , sum("metric.lts_cost.100to500ms") as "100to500ms" , sum("metric.lts_cost.500msto1s") as "500msto1s" , sum("metric.lts_cost.1sto10s") as "1sto10s", sum("metric.lts_cost.10ston") as "10ston" from log where "report_topic" = 'icagent_status' group by "time" order by "time"
    • 运行状态分布图表所关联的查询分析语句如下所示:
      * | select status,count(DISTINCT ip) as pv from log where report_topic = 'icagent_status' group by status
    • CPU趋势图表所关联的查询分析语句如下所示:
      * | select ip,time_floor(__time,'PT5M') as "time",avg("metric.cpu_usage") as "CPU占用率" from log where report_topic = 'icagent_status' and "metric.cpu_usage" is not null group by "time",ip order by "time"
    • ICAgent整体状态图表所关联的查询分析语句如下所示:
      * | select host_name as "主机名",ip as "IP" , version as "版本号" , os as "操作系统" , MILLIS_TO_TIMESTAMP(ANY_VALUE("metric.start_time")) as "启动时间",avg("metric.cpu_usage") as "CPU",avg("metric.mem_used")*1.0/1024 as "内存(KB)",status as "运行状态" where report_topic = 'icagent_status' group by host_name,ip,version,os,status order by "启动时间" desc

查看ICAgent异常监控

  1. 在仪表盘模板下方,选择“采集诊断仪表盘模板 > ICAgent异常监控”,查看图表详情。

    • 过滤日志组ID,所关联的查询分析语句如下所示:
      * | select loggroup from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by loggroup limit 10000
    • 过滤日志流ID,所关联的查询分析语句如下所示:
      * | select logstream from log where report_topic = 'icagent_profile' or report_topic = 'icagent_alarm' group by logstream limit 10000
    • 关键错误数图表所关联的查询分析语句如下所示:
      * | select diff[1] as "错误数", case when diff[3] is not null then cast(cast(round(diff[3] - 1 , 4) as decimal(12,4)) as varchar) else '昨日无数据' end as "错误数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' group by report_topic) group by report_topic)
    • 丢弃超大行图表所关联的查询分析语句如下所示
      * | select diff[1] as "丢弃行数" ,  case when diff[3] is not null then cast(cast(round(diff[3] - 1 , 4) as decimal(12,4)) as varchar) else '昨日无数据' end as "丢弃行数对比昨天" from (select report_topic,compare(pv , 400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by report_topic) group by report_topic)
    • 请求LTS失败图表所关联的查询分析语句如下所示:
      * | select diff[1] as "请求失败数" , case when diff[3] is not null then cast(cast(round(diff[3] - 1 , 4) as decimal(12,4)) as varchar) else '昨日无数据' end as "请求失败数对比昨天" from (select report_topic,compare(pv , 86400) as diff from (select report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'HTTP_REQUEST_ALARM' group by report_topic) group by report_topic)
    • 文件超过上限问题数图表所关联的查询分析语句如下所示:
      * | select diff[1] as "文件超过上限问题数" ,case when diff[3] is not null then cast(cast(round(diff[3] - 1 , 4) as decimal(12,4)) as varchar) else '昨日无数据' end as "文件超过上限问题数对比昨天" from (select  report_topic,compare(pv , 86400) as diff from (select  report_topic,count(1) as pv from log where report_topic = 'icagent_alarm' and alarm_type = 'COLLECT_FILE_EXCEED' group by report_topic) group by report_topic)
    • 关键错误数(必需处理)图表所关联的查询分析语句如下所示:
      * | select MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)) as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message order by "最近发生时间" desc limit 10000
    • 丢弃超大行详情图表所关联的查询分析语句如下所示:
      * | select MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)) as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm' and alarm_type = 'DISCARD_BIG_LINE' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message order by "最近发生时间" desc limit 10000
    • 请求LTS失败详情图表所关联的查询分析语句如下所示:
      * | select MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)) as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm'  and alarm_type = 'HTTP_REQUEST_ALARM' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message order by "最近发生时间" desc limit 10000
    • 文件超过上限问题数详情图表所关联的查询分析语句如下所示:
      * | select MILLIS_TO_TIMESTAMP(ANY_VALUE(report_time/1000000)) as "最近发生时间" , loggroup as "日志组ID" ,logstream as "日志流ID" ,  host_ip as "IP" , alarm_type as "告警类型", os as "系统类型" , alarm_message as "告警详情" where report_topic = 'icagent_alarm'  and alarm_type = 'COLLECT_FILE_EXCEED' group by loggroup,logstream,host_ip,alarm_type,os,alarm_message order by "最近发生时间" desc limit 10000

相关文档