Skip to content

1. redis_exporter

1.1 介绍

Prometheus exporter for ValKey metrics (Redis-compatible),and cluster. Supports ValKey and Redis 2.x, 3.x, 4.x, 5.x, 6.x, and 7.x

1.2 部署

  • 下载
bash
wget https://github.com/oliver006/redis_exporter/releases/download/v1.66.0/redis_exporter-v1.66.0.linux-amd64.tar.gz
wget https://github.com/oliver006/redis_exporter/releases/download/v1.66.0/redis_exporter-v1.66.0.linux-amd64.tar.gz
  • 解压
bash
mkdir -p /opt/redis_exporter

tar zxvf redis_exporter-v1.66.0.linux-amd64.tar.gz --strip-components=1 -C /opt/redis_exporter/ redis_exporter-v1.66.0.linux-amd64/redis_exporter
mkdir -p /opt/redis_exporter

tar zxvf redis_exporter-v1.66.0.linux-amd64.tar.gz --strip-components=1 -C /opt/redis_exporter/ redis_exporter-v1.66.0.linux-amd64/redis_exporter
  • 配置systemd
bash
tee /lib/systemd/system/redis_exporter.service <<EOF

[Unit]
Description=redis_exporter
Documentation=https://github.com/oliver006/redis_exporter
After=network.target
[Service]
Type=simple
User=root
ExecStart=/opt/redis_exporter/redis_exporter -redis.addr 10.103.236.200:9379 -redis.password 0f4649985edfdf11ae10a  -web.listen-address :9121
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
[Install]
WantedBy=multi-user.target
EOF
tee /lib/systemd/system/redis_exporter.service <<EOF

[Unit]
Description=redis_exporter
Documentation=https://github.com/oliver006/redis_exporter
After=network.target
[Service]
Type=simple
User=root
ExecStart=/opt/redis_exporter/redis_exporter -redis.addr 10.103.236.200:9379 -redis.password 0f4649985edfdf11ae10a  -web.listen-address :9121
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
[Install]
WantedBy=multi-user.target
EOF
  • 启动
bash
systemctl daemon-reload

systemctl start redis_exporter.service

systemctl status redis_exporter.service

systemctl enable redis_exporter.service
systemctl daemon-reload

systemctl start redis_exporter.service

systemctl status redis_exporter.service

systemctl enable redis_exporter.service
  • 访问

http://ip:9121/metrics

1.3 配置Prometheus采集

  • 配置
yaml
###################### redis ######################
    - job_name: 'redis-cluster'
      scrape_interval: 15s
      scrape_timeout: 15s
      static_configs:
      - targets:
        - redis://10.103.236.200:9379
      metrics_path: /scrape
      relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 10.103.236.200:9121
    - job_name: 'redis_exporter'
      scrape_interval: 15s
      scrape_timeout: 15s
      static_configs:
      - targets:
        - 10.103.236.200:9121
    ###################### redis ######################
###################### redis ######################
    - job_name: 'redis-cluster'
      scrape_interval: 15s
      scrape_timeout: 15s
      static_configs:
      - targets:
        - redis://10.103.236.200:9379
      metrics_path: /scrape
      relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: 10.103.236.200:9121
    - job_name: 'redis_exporter'
      scrape_interval: 15s
      scrape_timeout: 15s
      static_configs:
      - targets:
        - 10.103.236.200:9121
    ###################### redis ######################

ip和端口根据环境进行修改

  • 热更服务
bash
 curl -XPOST  http://prometheus.ikubernetes.net/-/reload
 curl -XPOST  http://prometheus.ikubernetes.net/-/reload

1.报警规则

yaml
 ############## Redis_rules ###########
  redis.rules: |
    groups:
    - name: "Redis.rules"
      rules:
      - alert: Redis_Down
        expr: redis_up == 0
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis down (instance {{ $labels.instance }})
          description: "Redis instance is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_DisconnectedSlaves
        expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis disconnected slaves (instance {{ $labels.instance }})
          description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_Replication_Broken
        expr: delta(redis_connected_slaves[1m]) < 0
        for: 10m
        labels:
          severity: critical
        annotations:
          summary: Redis replication broken (instance {{ $labels.instance }})
          description: "Redis instance lost a slave\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_Cluster_Flapping
        expr: changes(redis_connected_slaves[1m]) > 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis cluster flapping (instance {{ $labels.instance }})
          description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_MissingBackup
        expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis missing backup (instance {{ $labels.instance }})
          description: "Redis has not been backuped for 24 hours\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_OutOfSystemMemory
        expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis out of system memory (instance {{ $labels.instance }})
          description: "Redis is running out of system memory (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_OutOfConfiguredMaxmemory
        expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
          description: "Redis is running out of configured maxmemory (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_TooManyConnections
        expr: redis_connected_clients > 1000
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis too many connections (instance {{ $labels.instance }})
          description: "Redis instance has too many connections\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_NotEnoughConnections
        expr: redis_connected_clients < 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis not enough connections (instance {{ $labels.instance }})
          description: "Redis instance should have more connections (> 5)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_RejectedConnections
        expr: increase(redis_rejected_connections_total[1m]) > 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis rejected connections (instance {{ $labels.instance }})
          description: "Some connections to Redis has been rejected\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
 ############## Redis_rules ###########
  redis.rules: |
    groups:
    - name: "Redis.rules"
      rules:
      - alert: Redis_Down
        expr: redis_up == 0
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis down (instance {{ $labels.instance }})
          description: "Redis instance is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_DisconnectedSlaves
        expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis disconnected slaves (instance {{ $labels.instance }})
          description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_Replication_Broken
        expr: delta(redis_connected_slaves[1m]) < 0
        for: 10m
        labels:
          severity: critical
        annotations:
          summary: Redis replication broken (instance {{ $labels.instance }})
          description: "Redis instance lost a slave\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_Cluster_Flapping
        expr: changes(redis_connected_slaves[1m]) > 1
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: Redis cluster flapping (instance {{ $labels.instance }})
          description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_MissingBackup
        expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis missing backup (instance {{ $labels.instance }})
          description: "Redis has not been backuped for 24 hours\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_OutOfSystemMemory
        expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis out of system memory (instance {{ $labels.instance }})
          description: "Redis is running out of system memory (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_OutOfConfiguredMaxmemory
        expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
          description: "Redis is running out of configured maxmemory (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_TooManyConnections
        expr: redis_connected_clients > 1000
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis too many connections (instance {{ $labels.instance }})
          description: "Redis instance has too many connections\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_NotEnoughConnections
        expr: redis_connected_clients < 1
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis not enough connections (instance {{ $labels.instance }})
          description: "Redis instance should have more connections (> 5)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
      - alert: Redis_RejectedConnections
        expr: increase(redis_rejected_connections_total[1m]) > 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: Redis rejected connections (instance {{ $labels.instance }})
          description: "Some connections to Redis has been rejected\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"

参考:

https://github.com/bdossantos/prometheus-alert-rules/blob/master/rules/redis.yml

1.4 grafana

image-20241124160018023

https://github.com/ops-center/grafana-dashboards/tree/master/redis

https://appscode.com/blog/post/monitor-redis-with-grafana-dashboard-in-aws/

https://logit.io/blog/post/top-grafana-dashboards-and-visualisations/

2. k8s环境

2.1 创建secrets

bash
kubectl create -n monitor secret generic redis-exporter-auth \
  --from-literal=user=USER \
  --from-literal=password=PASSWORD
kubectl create -n monitor secret generic redis-exporter-auth \
  --from-literal=user=USER \
  --from-literal=password=PASSWORD

1.单独redis_exporter

yaml
apiVersion: apps/v1
kind: Deployment
metadata:
 name: redis-exporter
spec:
 selector:
   matchLabels:
     app: redis-exporter
 replicas: 1
 template:
   metadata:
     labels:
       app: redis-exporter
     annotations:
       prometheus.io/port: "9121"
       prometheus.io/scrape: "true"
   spec:
     containers:
     - name: redis-exporter
       image: regiregistry.cn-zhangjiakou.aliyuncs.com/hsuing/redis_exporter:v6
       ports:
         - containerPort: 9121
       env:
       - name: TZ
         value: "Asia/Shanghai"
       - name: REDIS_ADDR
         value: 'redis://redis:6379'
       - name: REDIS_USER
         valueFrom:
           secretKeyRef:
             name: redis-exporter-auth
             key: user
       - name: REDIS_PASSWORD
         valueFrom:
           secretKeyRef:
             name: redis-exporter-auth
             key: password
       resources:
         limits:
           memory: "256Mi"
           cpu: "256m"
apiVersion: apps/v1
kind: Deployment
metadata:
 name: redis-exporter
spec:
 selector:
   matchLabels:
     app: redis-exporter
 replicas: 1
 template:
   metadata:
     labels:
       app: redis-exporter
     annotations:
       prometheus.io/port: "9121"
       prometheus.io/scrape: "true"
   spec:
     containers:
     - name: redis-exporter
       image: regiregistry.cn-zhangjiakou.aliyuncs.com/hsuing/redis_exporter:v6
       ports:
         - containerPort: 9121
       env:
       - name: TZ
         value: "Asia/Shanghai"
       - name: REDIS_ADDR
         value: 'redis://redis:6379'
       - name: REDIS_USER
         valueFrom:
           secretKeyRef:
             name: redis-exporter-auth
             key: user
       - name: REDIS_PASSWORD
         valueFrom:
           secretKeyRef:
             name: redis-exporter-auth
             key: password
       resources:
         limits:
           memory: "256Mi"
           cpu: "256m"

2.redis和redis_exporter

yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis-cart
spec:
  selector:
    matchLabels:
      app: redis-cart
  template:
    metadata:
      labels:
        app: redis-cart
    spec:
      containers:
      - name: redis
        image: redis:alpine
        ports:
        - containerPort: 6379
        readinessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 6379
        livenessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 6379
        volumeMounts:
        - mountPath: /data
          name: redis-data
        resources:
          limits:
            memory: 256Mi
            cpu: 125m
          requests:
            cpu: 70m
            memory: 200Mi
      - name: redis-exporter
        image: oliver006/redis_exporter:latest
        env:
        - name: TZ
          value: "Asia/Shanghai"
        securityContext:
          runAsUser: 59000
          runAsGroup: 59000
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
        resources:
          requests:
            cpu: 100m
            memory: 100Mi
        ports:
        - containerPort: 9121
          name: metrics
        readinessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 9121
      volumes:
      - name: redis-data
        emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
  name: redis-cart
  labels:
    app: redis-cart
spec:
  type: ClusterIP
  selector:
    app: redis-cart
  ports:
  - name: redis
    port: 6379
    targetPort: 6379
  - name: metrics
    port: 9121
    targetPort: 9121
apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis-cart
spec:
  selector:
    matchLabels:
      app: redis-cart
  template:
    metadata:
      labels:
        app: redis-cart
    spec:
      containers:
      - name: redis
        image: redis:alpine
        ports:
        - containerPort: 6379
        readinessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 6379
        livenessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 6379
        volumeMounts:
        - mountPath: /data
          name: redis-data
        resources:
          limits:
            memory: 256Mi
            cpu: 125m
          requests:
            cpu: 70m
            memory: 200Mi
      - name: redis-exporter
        image: oliver006/redis_exporter:latest
        env:
        - name: TZ
          value: "Asia/Shanghai"
        securityContext:
          runAsUser: 59000
          runAsGroup: 59000
          allowPrivilegeEscalation: false
          capabilities:
            drop:
            - ALL
        resources:
          requests:
            cpu: 100m
            memory: 100Mi
        ports:
        - containerPort: 9121
          name: metrics
        readinessProbe:
          periodSeconds: 5
          tcpSocket:
            port: 9121
      volumes:
      - name: redis-data
        emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
  name: redis-cart
  labels:
    app: redis-cart
spec:
  type: ClusterIP
  selector:
    app: redis-cart
  ports:
  - name: redis
    port: 6379
    targetPort: 6379
  - name: metrics
    port: 9121
    targetPort: 9121

https://blog.csdn.net/sinat_14840559/article/details/127676664

https://sysdig.com/blog/redis-prometheus/

https://redis.io/docs/latest/integrate/prometheus-with-redis-cloud/

https://wiki.eryajf.net/pages/2497.html#_5-配置-grafana-的模板