Skip to content

1. domain_exporter

1.1 介绍

检查域名状况

https://github.com/caarlos0/domain_exporter

指标

指标名称指标类型指标含义
domain_expiry_daysgauge显示了域名距离过期还剩余的天数
domain_probe_successgauge对域名检测是否成功。1 表示成功,0 表示失败
domain_probe_duration_secondsgauge完成⼀次WHOIS 查询所需要的时间,单位为秒。如果延迟过⾼,说明连接WHOIS服务器的响应⽐较慢

1.2 部署

1.容器方式

bash
docker run -d --restart=always --name domain_exporter -p 9222:9222 registry.cn-zhangjiakou.aliyuncs.com/hsuing/domain_exporter:v1.23.0
docker run -d --restart=always --name domain_exporter -p 9222:9222 registry.cn-zhangjiakou.aliyuncs.com/hsuing/domain_exporter:v1.23.0

2.k8s方式

请看k8s章节

3.二进制方式

  • 创建目录
bash
mkdir /opt/prometheus/domain_exporter
mkdir /opt/prometheus/domain_exporter
  • 创建用户
bash
useradd -rs /bin/false prometheus

chown -R prometheus. /opt/prometheus/domain_exporter
useradd -rs /bin/false prometheus

chown -R prometheus. /opt/prometheus/domain_exporter
  • 下载
bash
wget https://github.com/caarlos0/domain_exporter/releases/download/v1.23.0/domain_exporter_1.23.0_linux_amd64.tar.gz

# 解压
tar zxvf domain_exporter_1.23.0_linux_amd64.tar.gz -C /opt/prometheus/domain_exporter
wget https://github.com/caarlos0/domain_exporter/releases/download/v1.23.0/domain_exporter_1.23.0_linux_amd64.tar.gz

# 解压
tar zxvf domain_exporter_1.23.0_linux_amd64.tar.gz -C /opt/prometheus/domain_exporter

4. 配置systemd

bash
cat >/etc/systemd/system/domain_exporter.service <<"EOF"
[Unit]
Description=domain_exporter
Documentation=https://prometheus.io/
After=network.target

[Service]
Type=simple
User=prometheus
Group=prometheus
WorkingDirectory=/opt/prometheus/domain_exporter
ExecStart=/opt/prometheus/domain_exporter/domain_exporter --bind=":9222"
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
KillMode=process
Restart=on-failure

[Install]
WantedBy=multi-user.target
EOF
cat >/etc/systemd/system/domain_exporter.service <<"EOF"
[Unit]
Description=domain_exporter
Documentation=https://prometheus.io/
After=network.target

[Service]
Type=simple
User=prometheus
Group=prometheus
WorkingDirectory=/opt/prometheus/domain_exporter
ExecStart=/opt/prometheus/domain_exporter/domain_exporter --bind=":9222"
ExecReload=/bin/kill -HUP $MAINPID
TimeoutStopSec=20s
KillMode=process
Restart=on-failure

[Install]
WantedBy=multi-user.target
EOF
  • 启动
bash
systemctl daemon-reload
systemctl enable --now domain_exporter
systemctl daemon-reload
systemctl enable --now domain_exporter
  • 测试
bash
curl http://localhost:9222/probe?target=www.baidu.com

# HELP domain_expiry_days time in days until the domain expires
# TYPE domain_expiry_days gauge
domain_expiry_days{domain="baidu.com"} 655
# HELP domain_probe_duration_seconds returns how long the probe took to complete in seconds
# TYPE domain_probe_duration_seconds gauge
domain_probe_duration_seconds{domain="baidu.com"} 3.30020302
# HELP domain_probe_success whether the probe was successful or not
# TYPE domain_probe_success gauge
domain_probe_success{domain="baidu.com"} 1
curl http://localhost:9222/probe?target=www.baidu.com

# HELP domain_expiry_days time in days until the domain expires
# TYPE domain_expiry_days gauge
domain_expiry_days{domain="baidu.com"} 655
# HELP domain_probe_duration_seconds returns how long the probe took to complete in seconds
# TYPE domain_probe_duration_seconds gauge
domain_probe_duration_seconds{domain="baidu.com"} 3.30020302
# HELP domain_probe_success whether the probe was successful or not
# TYPE domain_probe_success gauge
domain_probe_success{domain="baidu.com"} 1

1.3 配置prometheus

yaml
  - job_name: "domain_exporter"
    metrics_path: /probe
    static_configs:
    - targets:
      - baidu.com
      - jd.com
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.137.131:19222 #domain_exporter address
  - job_name: "domain_exporter"
    metrics_path: /probe
    static_configs:
    - targets:
      - baidu.com
      - jd.com
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 192.168.137.131:19222 #domain_exporter address
  • 热更
bash
curl -X POST http://localhost:9090/-/reload
curl -X POST http://localhost:9090/-/reload

1.4 配置告警

yaml
    groups:
    - name: domain
      rules:
      - alert: 域名SlowHttp
        expr: avg_over_time(probe_http_duration_seconds[1m]) > 10
        for: 1m
        labels:
          severity: warning
        annotations:
          summary: HTTP请求超时 (instance {{ $labels.instance }})
          description: "HTTP请求超时超过10秒n  VALUE = {{ $value }}n  LABELS = {{ $labels }}"

      - alert: 域名检测失败
        expr: domain_probe_success == 0
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }} ,域名检测'
          description: '{{ $labels.domain }}, 域名检测失败,请及时查看!!!'
      - alert: 域名过期 <15天
        expr: domain_expiry_days < 15
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }},域名过期'
          description: '{{ $labels.domain }},将在15天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
      - alert: 域名过期 <5天
        expr: domain_expiry_days < 5
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }},域名过期'
          description: '{{ $labels.domain }},将在5天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
      - alert: 域名证书已过期
        expr: probe_ssl_earliest_cert_expiry - time() <= 0
        for: 30m
        labels:
          severity: critical
        annotations:
          summary: 域名证书已过期 (instance {{ $labels.instance }})
          description: "域名证书已过期n  VALUE = {{ $value }}n  LABELS = {{ $labels }}"
    groups:
    - name: domain
      rules:
      - alert: 域名SlowHttp
        expr: avg_over_time(probe_http_duration_seconds[1m]) > 10
        for: 1m
        labels:
          severity: warning
        annotations:
          summary: HTTP请求超时 (instance {{ $labels.instance }})
          description: "HTTP请求超时超过10秒n  VALUE = {{ $value }}n  LABELS = {{ $labels }}"

      - alert: 域名检测失败
        expr: domain_probe_success == 0
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }} ,域名检测'
          description: '{{ $labels.domain }}, 域名检测失败,请及时查看!!!'
      - alert: 域名过期 <15天
        expr: domain_expiry_days < 15
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }},域名过期'
          description: '{{ $labels.domain }},将在15天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
      - alert: 域名过期 <5天
        expr: domain_expiry_days < 5
        for: 2h
        labels:
          severity: warning
        annotations:
          summary: '{{ $labels.instance }},域名过期'
          description: '{{ $labels.domain }},将在5天后过期,当前剩余天数:{{ $value }},请及时查看!!!'
      - alert: 域名证书已过期
        expr: probe_ssl_earliest_cert_expiry - time() <= 0
        for: 30m
        labels:
          severity: critical
        annotations:
          summary: 域名证书已过期 (instance {{ $labels.instance }})
          description: "域名证书已过期n  VALUE = {{ $value }}n  LABELS = {{ $labels }}"