root@consumer:/apps/prometheus# vim rules/record-rule-mysql.yaml

groups:

- name: mysqld_rules

rules:

# Record slave lag seconds for pre-computed timeseries that takes

# `mysql_slave_status_sql_delay` into account

- record: instance:mysql_slave_lag_seconds

expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay

root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml

groups:

- name: custom_rules

interval: 5s

rules:

- record: instance:node_cpu:avg_rate5m

expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100

- record: instace:node_memory_MemFree_percent

expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes

- record: instance:root:node_filesystem_free_percent

expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}

# Record slave lag via heartbeat method

- record: instance:mysql_heartbeat_lag_seconds

expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds

- record: job:mysql_transactions:rate5m

expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))

root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml

groups:

- name: custom_rules

interval: 5s

rules:

- record: instance:node_cpu:avg_rate5m

expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100

- record: instace:node_memory_MemFree_percent

expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes

- record: instance:root:node_filesystem_free_percent

expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}

Prometheus添加引用上面的二个文件

安装grafana wget https://mirrors.tuna.tsinghua.edu.cn/grafana/apt/pool/main/g/grafana/grafana_10.0.1_amd64.deb

dpkg 安装

修改配置为中文

添加源

添加模版

查看

部署altermanager wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0/alertmanager-0.26.0.linux-amd64.tar.gz

root@consumer:~# mv alertmanager-0.22.2.linux-amd64 /usr/local/alertmanager

vi /lib/systemd/system/alertmanager.service

[Unit]

Description=Prometheus Alertmanager

After=network.target

[Service]

ExecStart=/usr/local/alertmanager/alertmanager --config.file="/usr/local/alertmanager/alertmanager.yml"

[Install]

WantedBy=multi-user.targe

启动

systemctl daemon-reload

systemctl start alertmanager.service

systemctl status alertmanager.service

systemctl enable alertmanager.service

访问alertmanager界面

http://192.168.1.73:9093/#/stat

us

altermanager配置发件和收件信息

cat alertmanager.yml

global:

#在5m内收到Prometheus发来相同告警情况下认为告警已经恢复

resolve_timeout: 5m

#SMTP邮件服务器配置

smtp_smarthost: 'smtp.qq.com:465'

smtp_from: '2368756722@qq.com'

smtp_auth_username: '2368756722@qq.com'

smtp_auth_password: 'yipmyirnobcsecej'

smtp_require_tls: false #访问smtp服务器是否需要tls

smtp_hello: "qq.com" #向SMTP服务器发送测试消息的内容

route:

group_by: ['alertname']

group_wait: 30s

group_interval: 5m

repeat_interval: 1h

receiver: 'email'

receivers:

- name: 'web.hook'

webhook_configs:

- url: 'http://127.0.0.1:5001/'

- name: 'email'

email_configs:

- to: '15115440657@163.com'

send_resolved: true

inhibit_rules:

- source_match:

severity: 'critical'

target_match:

severity: 'warning'

equal: ['alertname', 'dev', 'instance']

Prometheus添加rule文件 root@consumer:/apps/prometheus# ls rules2

Prometheus查看

配置文件添加

cat alertmanager.yml

global:

#在5m内收到Prometheus发来相同告警情况下认为告警已经恢复

resolve_timeout: 5m

#SMTP邮件服务器配置

smtp_smarthost: 'smtp.qq.com:465'

smtp_from: '2368756722@qq.com'

smtp_auth_username: '2368756722@qq.com'

smtp_auth_password: 'yipmyirnobcsecej'

smtp_require_tls: false #访问smtp服务器是否需要tls

smtp_hello: "qq.com" #向SMTP服务器发送测试消息的内容

route:

group_by: ['alertname']

group_wait: 30s

group_interval: 5m

repeat_interval: 1h

receiver: 'email'

templates:

- '/usr/local/alertmanager/email_template.tmpl' #启用告警的模板

receivers:

- name: 'web.hook'

webhook_configs:

- url: 'http://127.0.0.1:5001/'

- name: 'email'

email_configs:

- to: '15115440657@163.com'

headers:

subject: "{{ .Status | toUpper }} {{ .CommonLabels.env }}:{{ .CommonLabels.cluster }} {{ .CommonLabels.alertname }}"

html: '{{ template "email.to.html" . }}'

send_resolved: true

inhibit_rules:

- source_match:

severity: 'critical'

target_match:

severity: 'warning'

equal: ['alertname', 'dev', 'instance']

邮件告警模版

root@consumer:/usr/local/alertmanager# cat email_template.tmpl

{{ define "email.to.html" }}

{{- if gt (len .Alerts.Firing) 0 -}}

{{ range .Alerts }}

=========start==========

告警程序: prometheus_alert

告警级别: {{ .Labels.severity }}

告警类型: {{ .Labels.alertname }}

告警主机: {{ .Labels.instance }}

告警主题: {{ .Annotations.summary }}

告警详情: {{ .Annotations.description }}

触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}

=========end==========

{{ end }}{{ end -}}

{{- if gt (len .Alerts.Resolved) 0 -}}

{{ range .Alerts }}

=========start==========

告警程序: prometheus_alert

告警级别: {{ .Labels.severity }}

告警类型: {{ .Labels.alertname }}

告警主机: {{ .Labels.instance }}

告警主题: {{ .Annotations.summary }}

告警详情: {{ .Annotations.description }}

触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}

恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }}

=========end==========

{{ end }}{{ end -}}

{{- end }}

精彩链接

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: