1. domain_exporter
1.1 介绍
检查域名状况
https://github.com/caarlos0/domain_exporter
指标
指标名称 | 指标类型 | 指标含义 |
---|---|---|
domain_expiry_days | gauge | 显示了域名距离过期还剩余的天数 |
domain_probe_success | gauge | 对域名检测是否成功。1 表示成功,0 表示失败 |
domain_probe_duration_seconds | gauge | 完成⼀次WHOIS 查询所需要的时间,单位为秒。如果延迟过⾼,说明连接WHOIS服务器的响应⽐较慢 |
1.2 部署
1.容器方式
bash
docker run -d --restart=always --name domain_exporter -p 9222:9222 registry.cn-zhangjiakou.aliyuncs.com/hsuing/domain_exporter:v1.23.0
docker run -d --restart=always --name domain_exporter -p 9222:9222 registry.cn-zhangjiakou.aliyuncs.com/hsuing/domain_exporter:v1.23.0
2.k8s方式
请看k8s章节
3.二进制方式
- 创建目录
bash
mkdir /opt/prometheus/domain_exporter
mkdir /opt/prometheus/domain_exporter
- 创建用户
bash
useradd -rs /bin/false prometheus
chown -R prometheus. /opt/prometheus/domain_exporter
useradd -rs /bin/false prometheus
chown -R prometheus. /opt/prometheus/domain_exporter
- 下载
bash
wget https://github.com/caarlos0/domain_exporter/releases/download/v1.23.0/domain_exporter_1.23.0_linux_amd64.tar.gz
# 解压
tar zxvf domain_exporter_1.23.0_linux_amd64.tar.gz -C /opt/prometheus/domain_exporter
wget https://github.com/caarlos0/domain_exporter/releases/download/v1.23.0/domain_exporter_1.23.0_linux_amd64.tar.gz
# 解压
tar zxvf domain_exporter_1.23.0_linux_amd64.tar.gz -C /opt/prometheus/domain_exporter
4. 配置systemd
bash
cat >/etc/systemd/system/domain_exporter.service <<"EOF"
[Unit]
Description=domain_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=prometheus
Group=prometheus
WorkingDirectory=/opt/prometheus/domain_exporter
ExecStart=/opt/prometheus/domain_exporter/domain_exporter --bind=":9222"
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
cat >/etc/systemd/system/domain_exporter.service <<"EOF"
[Unit]
Description=domain_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=prometheus
Group=prometheus
WorkingDirectory=/opt/prometheus/domain_exporter
ExecStart=/opt/prometheus/domain_exporter/domain_exporter --bind=":9222"
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF
- 启动
bash
systemctl daemon-reload
systemctl enable --now domain_exporter
systemctl daemon-reload
systemctl enable --now domain_exporter
- 测试
bash
curl http://localhost:9222/probe?target=www.baidu.com
# HELP domain_expiry_days time in days until the domain expires
# TYPE domain_expiry_days gauge
domain_expiry_days{domain="baidu.com"} 655
# HELP domain_probe_duration_seconds returns how long the probe took to complete in seconds
# TYPE domain_probe_duration_seconds gauge
domain_probe_duration_seconds{domain="baidu.com"} 3.30020302
# HELP domain_probe_success whether the probe was successful or not
# TYPE domain_probe_success gauge
domain_probe_success{domain="baidu.com"} 1
curl http://localhost:9222/probe?target=www.baidu.com
# HELP domain_expiry_days time in days until the domain expires
# TYPE domain_expiry_days gauge
domain_expiry_days{domain="baidu.com"} 655
# HELP domain_probe_duration_seconds returns how long the probe took to complete in seconds
# TYPE domain_probe_duration_seconds gauge
domain_probe_duration_seconds{domain="baidu.com"} 3.30020302
# HELP domain_probe_success whether the probe was successful or not
# TYPE domain_probe_success gauge
domain_probe_success{domain="baidu.com"} 1
1.3 配置prometheus
yaml
- job_name: "domain_exporter"
metrics_path: /probe
static_configs:
- targets:
- baidu.com
- jd.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.137.131:19222 #domain_exporter address
- job_name: "domain_exporter"
metrics_path: /probe
static_configs:
- targets:
- baidu.com
- jd.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 192.168.137.131:19222 #domain_exporter address
- 热更
bash
curl -X POST http://localhost:9090/-/reload
curl -X POST http://localhost:9090/-/reload
1.4 配置告警
yaml
groups:
- name: domain
rules:
- alert: 域名SlowHttp
expr: avg_over_time(probe_http_duration_seconds[1m]) > 10
for: 1m
labels:
severity: warning
annotations:
summary: HTTP请求超时 (instance {{ $labels.instance }})
description: "HTTP请求超时超过10秒n VALUE = {{ $value }}n LABELS = {{ $labels }}"
- alert: 域名检测失败
expr: domain_probe_success == 0
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }} ,域名检测'
description: '{{ $labels.domain }}, 域名检测失败,请及时查看!!!'
- alert: 域名过期 <15天
expr: domain_expiry_days < 15
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }},域名过期'
description: '{{ $labels.domain }},将在15天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
- alert: 域名过期 <5天
expr: domain_expiry_days < 5
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }},域名过期'
description: '{{ $labels.domain }},将在5天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
- alert: 域名证书已过期
expr: probe_ssl_earliest_cert_expiry - time() <= 0
for: 30m
labels:
severity: critical
annotations:
summary: 域名证书已过期 (instance {{ $labels.instance }})
description: "域名证书已过期n VALUE = {{ $value }}n LABELS = {{ $labels }}"
groups:
- name: domain
rules:
- alert: 域名SlowHttp
expr: avg_over_time(probe_http_duration_seconds[1m]) > 10
for: 1m
labels:
severity: warning
annotations:
summary: HTTP请求超时 (instance {{ $labels.instance }})
description: "HTTP请求超时超过10秒n VALUE = {{ $value }}n LABELS = {{ $labels }}"
- alert: 域名检测失败
expr: domain_probe_success == 0
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }} ,域名检测'
description: '{{ $labels.domain }}, 域名检测失败,请及时查看!!!'
- alert: 域名过期 <15天
expr: domain_expiry_days < 15
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }},域名过期'
description: '{{ $labels.domain }},将在15天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
- alert: 域名过期 <5天
expr: domain_expiry_days < 5
for: 2h
labels:
severity: warning
annotations:
summary: '{{ $labels.instance }},域名过期'
description: '{{ $labels.domain }},将在5天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
- alert: 域名证书已过期
expr: probe_ssl_earliest_cert_expiry - time() <= 0
for: 30m
labels:
severity: critical
annotations:
summary: 域名证书已过期 (instance {{ $labels.instance }})
description: "域名证书已过期n VALUE = {{ $value }}n LABELS = {{ $labels }}"