通过计算cpu空闲使用率 用1-空闲率 则为cpu使用率
node_cpu_seconds 表示机器在非重启情况下开机到现在所使用的时间(s)
由于cpu资源会不断的被机器利用,其中对应了很多模式,包括内核进程、用户进程、io等待、中断等使用模式
可以通过查询node_cpu_seconds 指标来看各种对应的模式
cpu空闲对应的模式:model=“idle”:空闲的cpu时间
irate(node_cpu_seconds_total{mode="idle"}[5m])
avg:聚合函数 求平均值
avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])
通过1-cpu空闲率得出每个机器的平均使用率
1- (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])))
转换为 100%
100 - (avg by (instance) (irate(node_cpu{instance="xxx", mode="idle"}[5m])) * 100)
查询集群所有机器的cpu平均使用率
avg( 1 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]))))
avg by (instance, mode) (irate(node_cpu{instance="xxx"}[5m])) * 100
node_load1{instance="xxx"} // 1分钟负载
node_load5{instance="xxx"} // 5分钟负载
node_load15{instance="xxx"} // 15分钟负载
100-(node_memory_MemFree_bytes{instance="192.168.119.69:9796"}+node_memory_Cached_bytes{instance="192.168.119.69:9796"}+node_memory_Buffers_bytes{instance="192.168.119.69:9796"})/node_memory_MemTotal_bytes{instance="192.168.119.69:9796"}*100
(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes * 100
100 - node_filesystem_free_bytes{mountpoint = "/",instance="xxx",fstype!~"rootfs|selinuxfs|autofs|rpc_pipefs|tmpfs|udev|none|devpts|sysfs|debugfs|fuse.*"} /
node_filesystem_size_bytes{mountpoint = "/",instance="xxx",fstype!~"rootfs|selinuxfs|autofs|rpc_pipefs|tmpfs|udev|none|devpts|sysfs|debugfs|fuse.*"} * 100
或者你也可以直接使用 {fstype=“xxx”} 来指定想查看的磁盘信息
// 上行带宽
sum by (instance) (irate(node_network_receive_bytes_total{instance="xxx",device!~"bond.*?|lo"}[5m])/128)
// 下行带宽
sum by (instance) (irate(node_network_transmit_bytes_total{instance="xxx",device!~"bond.*?|lo"}[5m])/128)
// 入包量
sum by (instance) (rate(node_network_receive_bytes_total{instance="xxx",device!="lo"}[5m]))
// 出包量
sum by (instance) (rate(node_network_transmit_bytes_total{instance="xxx",device!="lo"}[5m]))
写
irate(node_disk_writes_completed_total{instance=~"^192.168.119.68:9796",device=~"[a-z]*[a-z]"}[5m])
//IOPS读 查询条件
String IOPSRead = "sum without(device) (node_disk_reads_completed_total{instance=~\"^" + instance + "\"})";
node_disk_reads_completed_total/node_disk_read_time_seconds_total
读
irate(node_disk_reads_completed_total{instance=~"^192.168.119.68:9796",device=~"[a-z]*[a-z]"}[5m])
sum by(component) (irate(apiserver_request_total[5m]))
avg by(component)((sum by(component)(apiserver_request_duration_seconds_sum))/(sum by(component)(apiserver_request_duration_seconds_count)))
sum (scheduler_schedule_attempts_total)
sum (scheduler_schedule_attempts_total{result!="scheduled"})
1- (avg(irate(node_cpu_seconds_total{mode="idle"}[5m])))
100 * (1 - sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes))
(sum(node_filesystem_size_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"})
- sum(node_filesystem_free_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"})
) / sum(node_filesystem_size_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"}) * 100
sum (machine_cpu_cores{node=~"^.*$"})
(1 - (avg(irate(node_cpu_seconds_total{mode="idle",node=~"^.*$"}[5m])))) * sum(machine_cpu_cores{node=~"^.*$"})
sum (machine_memory_bytes{node=~"^.*$"})
sum(node_memory_MemTotal_bytes{device!~"rootfs|HarddiskVolume.+",node =~"^.*$"}) - sum(node_memory_MemAvailable_bytes{device!~"rootfs|HarddiskVolume.+",node =~"^.*$"})
sum(node_filesystem_size_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"})
sum(node_filesystem_size_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"})
- sum(node_filesystem_free_bytes{device!~"rootfs|HarddiskVolume.+",node=~"^.*$"})
sum (kube_node_status_capacity_pods)
不用---> sum(kube_pod_status_phase{namespace=~".*", phase=~".*"})
sum(kubelet_running_pod_count)
sum(kube_pod_status_phase{namespace=~".*", phase="Running"})