配置 prometheus.yml,默认配置 https://prometheus.io/docs/prometheus/latest/getting_started/
mkdir /opt/promethus
cd /opt/promethus/
vim prometheus.yml
默认 prometheus.yml
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: 'prometheus'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
拉取镜像
docker pull prom/prometheus
开始安装
docker run -d \
-p 9090:9090 \
-v /opt/prometheus:/etc/prometheus \
--name promethenus \
--restart=always \
prom/prometheus
-p 映射端口
-v 挂载文件
-name 实例命名
–restart=always 重启 docker 容器自动启动
浏览器 ip 加端口 9090 即可访问
配置 alertmanager.yml,默认配置 https://prometheus.io/docs/prometheus/latest/getting_started/
mkdir /opt/alertmanager
cd /opt/alertmanager/
vim alertmanager.yml
默认 alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
浏览器 ip 加端口 9090 即可访问
拉取镜像
docker pull bitnami/alertmanager:latest
开始安装
docker run -d \
-p 9093:9093 \
--name alertmanager
-v /opt/alertmanager/alertmanager.yml:/opt/bitnami/alertmanager/conf/config.yml \
--restart=always \
bitnami/alertmanager:latest
浏览器 ip 加端口 9093 即可访问
prometheus.yml 示例
global:
scrape_interval: 15s
external_labels:
monitor: 'codelab-monitor'
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
- job_name: 'service'
static_configs:
- targets: ['192.168.xxx.0:9100']
- targets: ['192.168.xxx.1:9100']
alerting:
alertmanagers:
- static_configs:
- targets:
- [服务器 ip]:9093
rule_files:
- "rules.yml"
alerting.alertmanagers.static_configs.targets:AlertManager 部署的服务器 [ ip:端口 ]
rule_files: 新建 rules.yml,路径和 prometheus.yml 在相同的地方
rules.yml 示例
groups:
- name: CPU-rule
rules:
- alert: High-CPU-80
expr: 100-avg(irate(node_cpu_seconds_total{job="component",mode="idle"}[5m]))by(instance)*100 > 80
for: 1m
labels:
severity: warning
annotations:
description: "{{$labels.instance}}: Client CPU is above 80% (current value is: {{ $value }}"
- alert: High-CPU-90
expr: 100-avg(irate(node_cpu_seconds_total{job="component",mode="idle"}[5m]))by(instance)*100 > 90
for: 1m
labels:
severity: warning
annotations:
description: "{{$labels.instance}}: Client CPU is above 90% (current value is: {{ $value }}"
- name: Menory-rule
rules:
- alert: HighMenory-80
expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes)))* 100 > 80
for: 1m
labels:
severity: warning
annotations:
description: "jobname:{{$labels.job}}, instance:{{$labels.instance}}, Client num is above 80%, current value is: {{ $value }}"
- alert: HighMenory-90
expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes)))* 100 > 90
for: 1m
labels:
severity: warning
annotations:
description: "jobname:{{$labels.job}}, instance:{{$labels.instance}}, Client num is above 90%, current value is: {{ $value }}"
- name: jvm-rule
rules:
- alert: High-jvm-80
expr: jvm_memory_usage_after_gc_percent{} * 100 > 80
for: 1m
labels:
severity: warning
annotations:
description: "jobname:{{$labels.job}}, application :{{$labels.application }}, jvm num is above 80%, current value is: {{ $value }}"
- alert: High-jvm-90
expr: jvm_memory_usage_after_gc_percent{} * 100 > 90
for: 1m
labels:
severity: warning
annotations:
description: "jobname:{{$labels.job}}, application :{{$labels.application }}, jvm num is above 90%, current value is: {{ $value }}"
name:类似于分组,示例中分了 cpu、内存、jvm
alert:自定义命名
expr:指标值
for:持续时间
label:标签,自定义
annotations:注解,自定义
指标状态 
监控目标状态,status - targets

alertmanager.yml 示例
global:
smtp_smarthost: 'smtp.exmail.qq.com:465'
smtp_from: 'fat@qq.com'
smtp_auth_username: 'fat@qq.com'
smtp_auth_password: '111'
smtp_require_tls: false
route:
receiver: mail
receivers:
- name: 'mail'
email_configs:
- to: 'fat1@qq.com'
- to: 'fat2@qq.com'
