1. 邮件方式发送告警
1.1 配置stmp账户
- 首先进入 qq 邮箱后台,然后点击设置
- 选择 【帐户】,再选中开启 IMAP 服务,获取
授权密码
1.2 创建configmap
yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: monitor
data:
alertmanager.yml: |-
global:
resolve_timeout: 1m
smtp_smarthost: 'smtp.qq.com:465' # 邮箱服务器的SMTP主机配置
smtp_from: '104xxx@qq.com' # 发送邮件主题
smtp_auth_username: '104xxx@qq.com' # 登录用户名
smtp_auth_password: 'djuxxxxxbeee' # 此处的auth password是邮箱的第三方登录授权密码,而非用户密码
smtp_require_tls: false # 有些邮箱需要开启此配置,这里使用的是企微邮箱,仅做测试,不需要开启此功能。
smtp_hello: '@qq.com'
templates:
- '/etc/alertmanager/*.tmpl'
route:
group_by: ['env','instance','type','group','job','alertname','cluster']
group_wait: 10s
group_interval: 2m
repeat_interval: 10m
receiver: 'email'
routes:
- receiver: 'email'
match:
severity: critical
- receiver: 'wechat'
match:
severity: critical
- receiver: 'webhook'
match:
severity: critical
receivers:
- name: 'email'
email_configs:
- to: 'hxopexxx@163.com'
send_resolved: true
html: '{{ template "email.to.html" . }}'
headers: { Subject: "系统监控告警{{- if gt (len .Alerts.Resolved) 0 -}}恢复{{ end }}" }
#- name: 'devops'
# email_configs:
# - to: 'hxopensource@163.com,xxx@qq.com'
# send_resolved: true
# html: '{{ template "email.to.html" . }}'
- name: 'wechat'
wechat_configs:
- corp_id: 'wwe158cxxx'
to_party: '1'
to_user: '@all'
agent_id: 1000007
api_secret: '0UATPXAb10hW0Kbzxxxzl5BTIapn_rs'
send_resolved: true
- name: 'webhook'
webhook_configs:
- url: 'http://webhook-dingtalk.monitor.svc.cluster.local:8060/dingtalk/webhook1/send'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
wechat.tmpl: |-
{{ define "wechat.default.message" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{- range $index, $alert := .Alerts -}}
{{- if eq $index 0 }}
========= 监控报警 =========
告警状态:{{ .Status }}
告警级别:{{ .Labels.severity }}
告警类型:{{ $alert.Labels.alertname }}
故障主机: {{ $alert.Labels.instance }}
告警主题: {{ $alert.Annotations.summary }}
告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};
触发阀值:{{ .Annotations.value }}
故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
========= = end = =========
{{- end }}
{{- end }}
{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{- range $index, $alert := .Alerts -}}
{{- if eq $index 0 }}
========= 告警恢复 =========
告警类型:{{ .Labels.alertname }}
告警状态:{{ .Status }}
告警主题: {{ $alert.Annotations.summary }}
告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};
故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
恢复时间: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{{- if gt (len $alert.Labels.instance) 0 }}
实例信息: {{ $alert.Labels.instance }}
{{- end }}
========= = end = =========
{{- end }}
{{- end }}
{{- end }}
{{- end }}
email.tmpl: |-
{{ define "email.from" }}xxx.com{{ end }}
{{ define "email.to" }}xxx.com{{ end }}
{{ define "email.to.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{ range .Alerts }}
========= 监控报警 =========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
========= = end = =========<br>
{{ end }}{{ end -}}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{ range .Alerts }}
========= 告警恢复 =========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }} <br>
========= = end = =========<br>
{{ end }}{{ end -}}
{{- end }}
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: monitor
data:
alertmanager.yml: |-
global:
resolve_timeout: 1m
smtp_smarthost: 'smtp.qq.com:465' # 邮箱服务器的SMTP主机配置
smtp_from: '104xxx@qq.com' # 发送邮件主题
smtp_auth_username: '104xxx@qq.com' # 登录用户名
smtp_auth_password: 'djuxxxxxbeee' # 此处的auth password是邮箱的第三方登录授权密码,而非用户密码
smtp_require_tls: false # 有些邮箱需要开启此配置,这里使用的是企微邮箱,仅做测试,不需要开启此功能。
smtp_hello: '@qq.com'
templates:
- '/etc/alertmanager/*.tmpl'
route:
group_by: ['env','instance','type','group','job','alertname','cluster']
group_wait: 10s
group_interval: 2m
repeat_interval: 10m
receiver: 'email'
routes:
- receiver: 'email'
match:
severity: critical
- receiver: 'wechat'
match:
severity: critical
- receiver: 'webhook'
match:
severity: critical
receivers:
- name: 'email'
email_configs:
- to: 'hxopexxx@163.com'
send_resolved: true
html: '{{ template "email.to.html" . }}'
headers: { Subject: "系统监控告警{{- if gt (len .Alerts.Resolved) 0 -}}恢复{{ end }}" }
#- name: 'devops'
# email_configs:
# - to: 'hxopensource@163.com,xxx@qq.com'
# send_resolved: true
# html: '{{ template "email.to.html" . }}'
- name: 'wechat'
wechat_configs:
- corp_id: 'wwe158cxxx'
to_party: '1'
to_user: '@all'
agent_id: 1000007
api_secret: '0UATPXAb10hW0Kbzxxxzl5BTIapn_rs'
send_resolved: true
- name: 'webhook'
webhook_configs:
- url: 'http://webhook-dingtalk.monitor.svc.cluster.local:8060/dingtalk/webhook1/send'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
wechat.tmpl: |-
{{ define "wechat.default.message" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{- range $index, $alert := .Alerts -}}
{{- if eq $index 0 }}
========= 监控报警 =========
告警状态:{{ .Status }}
告警级别:{{ .Labels.severity }}
告警类型:{{ $alert.Labels.alertname }}
故障主机: {{ $alert.Labels.instance }}
告警主题: {{ $alert.Annotations.summary }}
告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};
触发阀值:{{ .Annotations.value }}
故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
========= = end = =========
{{- end }}
{{- end }}
{{- end }}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{- range $index, $alert := .Alerts -}}
{{- if eq $index 0 }}
========= 告警恢复 =========
告警类型:{{ .Labels.alertname }}
告警状态:{{ .Status }}
告警主题: {{ $alert.Annotations.summary }}
告警详情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}};
故障时间: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
恢复时间: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}
{{- if gt (len $alert.Labels.instance) 0 }}
实例信息: {{ $alert.Labels.instance }}
{{- end }}
========= = end = =========
{{- end }}
{{- end }}
{{- end }}
{{- end }}
email.tmpl: |-
{{ define "email.from" }}xxx.com{{ end }}
{{ define "email.to" }}xxx.com{{ end }}
{{ define "email.to.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{ range .Alerts }}
========= 监控报警 =========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
========= = end = =========<br>
{{ end }}{{ end -}}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{ range .Alerts }}
========= 告警恢复 =========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }} <br>
========= = end = =========<br>
{{ end }}{{ end -}}
{{- end }}
- 配置文件热更新
curl -XPOST http://alertmanager.ikubernetes.net/-/reload
curl -XPOST http://alertmanager.ikubernetes.net/-/reload
- 测试发送邮件
bash
curl -XPOST -H 'Content-Type: application/json' http://alertmanager.ikubernetes.net/api/v1/alerts -d '[{"labels":{"severity":"critical"},"annotations":{"summary":"This is a testalert"}}]'
curl -XPOST -H 'Content-Type: application/json' http://alertmanager.ikubernetes.net/api/v1/alerts -d '[{"labels":{"severity":"critical"},"annotations":{"summary":"This is a testalert"}}]'
- 效果
1.3 效果
- 恢复告警
2. 模版
2.1
yaml
{{ $var := .externalURL}}{{ range $k,$v:=.alerts }}
{{if eq $v.status "resolved"}}
#### [Prometheus恢复信息]({{$v.generatorURL}})
##### <font color="#FF0000">告警名称</font>:[{{$v.labels.alertname}}]({{$var}})
##### <font color="#FF0000">告警级别</font>:{{$v.labels.severity}}
##### <font color="#FF0000">触发时间</font>:{{GetCSTtime $v.startsAt}}
##### <font color="#02b340">恢复时间</font>:{{GetCSTtime $v.endsAt}}
##### <font color="#FF0000">故障实例</font>:{{$v.labels.instance}}
##### <font color="#FF0000">告警详情</font>:{{$v.annotations.description}}
{{else}}
#### [Prometheus告警信息]({{$v.generatorURL}})
##### <font color="#FF0000">告警名称</font>:[{{$v.labels.alertname}}]({{$var}})
##### <font color="#FF0000">告警级别</font>:{{$v.labels.severity}}
##### <font color="#FF0000">触发时间</font>:{{GetCSTtime $v.startsAt}}
##### <font color="#FF0000">故障实例</font>:{{$v.labels.instance}}
##### <font color="#FF0000">告警详情</font>:{{$v.annotations.description}}
{{end}}
{{ end }}
{{ $var := .externalURL}}{{ range $k,$v:=.alerts }}
{{if eq $v.status "resolved"}}
#### [Prometheus恢复信息]({{$v.generatorURL}})
##### <font color="#FF0000">告警名称</font>:[{{$v.labels.alertname}}]({{$var}})
##### <font color="#FF0000">告警级别</font>:{{$v.labels.severity}}
##### <font color="#FF0000">触发时间</font>:{{GetCSTtime $v.startsAt}}
##### <font color="#02b340">恢复时间</font>:{{GetCSTtime $v.endsAt}}
##### <font color="#FF0000">故障实例</font>:{{$v.labels.instance}}
##### <font color="#FF0000">告警详情</font>:{{$v.annotations.description}}
{{else}}
#### [Prometheus告警信息]({{$v.generatorURL}})
##### <font color="#FF0000">告警名称</font>:[{{$v.labels.alertname}}]({{$var}})
##### <font color="#FF0000">告警级别</font>:{{$v.labels.severity}}
##### <font color="#FF0000">触发时间</font>:{{GetCSTtime $v.startsAt}}
##### <font color="#FF0000">故障实例</font>:{{$v.labels.instance}}
##### <font color="#FF0000">告警详情</font>:{{$v.annotations.description}}
{{end}}
{{ end }}
2.2
yaml
template_email.tmpl: |-
{{ define "email.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}{{ range .Alerts }}
@报警<br>
<strong>实例:</strong> {{ .Labels.instance }}<br>
<strong>概述:</strong> {{ .Annotations.summary }}<br>
<strong>详情:</strong> {{ .Annotations.description }}<br>
<strong>时间:</strong> {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
{{ end }}{{ end -}}<br>
{{- if gt (len .Alerts.Resolved) 0 -}}{{ range .Alerts }}<br>
@恢复<br>
<strong>实例:</strong> {{ .Labels.instance }}<br>
<strong>信息:</strong> {{ .Annotations.summary }}<br>
<strong>恢复:</strong> {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
{{ end }}{{ end -}}
{{- end }}
template_email.tmpl: |-
{{ define "email.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}{{ range .Alerts }}
@报警<br>
<strong>实例:</strong> {{ .Labels.instance }}<br>
<strong>概述:</strong> {{ .Annotations.summary }}<br>
<strong>详情:</strong> {{ .Annotations.description }}<br>
<strong>时间:</strong> {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
{{ end }}{{ end -}}<br>
{{- if gt (len .Alerts.Resolved) 0 -}}{{ range .Alerts }}<br>
@恢复<br>
<strong>实例:</strong> {{ .Labels.instance }}<br>
<strong>信息:</strong> {{ .Annotations.summary }}<br>
<strong>恢复:</strong> {{ (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
{{ end }}{{ end -}}
{{- end }}