1. redis_exporter
1.1 介绍
Prometheus exporter for ValKey metrics (Redis-compatible),and cluster. Supports ValKey and Redis 2.x, 3.x, 4.x, 5.x, 6.x, and 7.x
1.2 部署
- 下载
bash
wget https://github.com/oliver006/redis_exporter/releases/download/v1.66.0/redis_exporter-v1.66.0.linux-amd64.tar.gz
wget https://github.com/oliver006/redis_exporter/releases/download/v1.66.0/redis_exporter-v1.66.0.linux-amd64.tar.gz
- 解压
bash
mkdir -p /opt/redis_exporter
tar zxvf redis_exporter-v1.66.0.linux-amd64.tar.gz --strip-components=1 -C /opt/redis_exporter/ redis_exporter-v1.66.0.linux-amd64/redis_exporter
mkdir -p /opt/redis_exporter
tar zxvf redis_exporter-v1.66.0.linux-amd64.tar.gz --strip-components=1 -C /opt/redis_exporter/ redis_exporter-v1.66.0.linux-amd64/redis_exporter
- 配置systemd
bash
tee /lib/systemd/system/redis_exporter.service <<EOF
[Unit]
Description=redis_exporter
Documentation=https://github.com/oliver006/redis_exporter
After=network.target
[Service]
Type=simple
User=root
ExecStart=/opt/redis_exporter/redis_exporter -redis.addr 10.103.236.200:9379 -redis.password 0f4649985edfdf11ae10a -web.listen-address :9121
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
[Install]
WantedBy=multi-user.target
EOF
tee /lib/systemd/system/redis_exporter.service <<EOF
[Unit]
Description=redis_exporter
Documentation=https://github.com/oliver006/redis_exporter
After=network.target
[Service]
Type=simple
User=root
ExecStart=/opt/redis_exporter/redis_exporter -redis.addr 10.103.236.200:9379 -redis.password 0f4649985edfdf11ae10a -web.listen-address :9121
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
[Install]
WantedBy=multi-user.target
EOF
- 启动
bash
systemctl daemon-reload
systemctl start redis_exporter.service
systemctl status redis_exporter.service
systemctl enable redis_exporter.service
systemctl daemon-reload
systemctl start redis_exporter.service
systemctl status redis_exporter.service
systemctl enable redis_exporter.service
- 访问
1.3 配置Prometheus采集
- 配置
yaml
###################### redis ######################
- job_name: 'redis-cluster'
scrape_interval: 15s
scrape_timeout: 15s
static_configs:
- targets:
- redis://10.103.236.200:9379
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 10.103.236.200:9121
- job_name: 'redis_exporter'
scrape_interval: 15s
scrape_timeout: 15s
static_configs:
- targets:
- 10.103.236.200:9121
###################### redis ######################
###################### redis ######################
- job_name: 'redis-cluster'
scrape_interval: 15s
scrape_timeout: 15s
static_configs:
- targets:
- redis://10.103.236.200:9379
metrics_path: /scrape
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 10.103.236.200:9121
- job_name: 'redis_exporter'
scrape_interval: 15s
scrape_timeout: 15s
static_configs:
- targets:
- 10.103.236.200:9121
###################### redis ######################
ip和端口根据环境进行修改
- 热更服务
bash
curl -XPOST http://prometheus.ikubernetes.net/-/reload
curl -XPOST http://prometheus.ikubernetes.net/-/reload
1.报警规则
yaml
############## Redis_rules ###########
redis.rules: |
groups:
- name: "Redis.rules"
rules:
- alert: Redis_Down
expr: redis_up == 0
for: 5m
labels:
severity: critical
annotations:
summary: Redis down (instance {{ $labels.instance }})
description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_DisconnectedSlaves
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 5m
labels:
severity: critical
annotations:
summary: Redis disconnected slaves (instance {{ $labels.instance }})
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_Replication_Broken
expr: delta(redis_connected_slaves[1m]) < 0
for: 10m
labels:
severity: critical
annotations:
summary: Redis replication broken (instance {{ $labels.instance }})
description: "Redis instance lost a slave\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_Cluster_Flapping
expr: changes(redis_connected_slaves[1m]) > 1
for: 5m
labels:
severity: critical
annotations:
summary: Redis cluster flapping (instance {{ $labels.instance }})
description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_MissingBackup
expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
for: 5m
labels:
severity: warning
annotations:
summary: Redis missing backup (instance {{ $labels.instance }})
description: "Redis has not been backuped for 24 hours\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_OutOfSystemMemory
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0
for: 5m
labels:
severity: warning
annotations:
summary: Redis out of system memory (instance {{ $labels.instance }})
description: "Redis is running out of system memory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_OutOfConfiguredMaxmemory
expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
description: "Redis is running out of configured maxmemory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_TooManyConnections
expr: redis_connected_clients > 1000
for: 5m
labels:
severity: warning
annotations:
summary: Redis too many connections (instance {{ $labels.instance }})
description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_NotEnoughConnections
expr: redis_connected_clients < 1
for: 5m
labels:
severity: warning
annotations:
summary: Redis not enough connections (instance {{ $labels.instance }})
description: "Redis instance should have more connections (> 5)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_RejectedConnections
expr: increase(redis_rejected_connections_total[1m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: Redis rejected connections (instance {{ $labels.instance }})
description: "Some connections to Redis has been rejected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
############## Redis_rules ###########
redis.rules: |
groups:
- name: "Redis.rules"
rules:
- alert: Redis_Down
expr: redis_up == 0
for: 5m
labels:
severity: critical
annotations:
summary: Redis down (instance {{ $labels.instance }})
description: "Redis instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_DisconnectedSlaves
expr: count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1
for: 5m
labels:
severity: critical
annotations:
summary: Redis disconnected slaves (instance {{ $labels.instance }})
description: "Redis not replicating for all slaves. Consider reviewing the redis replication status.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_Replication_Broken
expr: delta(redis_connected_slaves[1m]) < 0
for: 10m
labels:
severity: critical
annotations:
summary: Redis replication broken (instance {{ $labels.instance }})
description: "Redis instance lost a slave\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_Cluster_Flapping
expr: changes(redis_connected_slaves[1m]) > 1
for: 5m
labels:
severity: critical
annotations:
summary: Redis cluster flapping (instance {{ $labels.instance }})
description: "Changes have been detected in Redis replica connection. This can occur when replica nodes lose connection to the master and reconnect (a.k.a flapping).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_MissingBackup
expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
for: 5m
labels:
severity: warning
annotations:
summary: Redis missing backup (instance {{ $labels.instance }})
description: "Redis has not been backuped for 24 hours\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_OutOfSystemMemory
expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 and on(instance) redis_memory_max_bytes > 0
for: 5m
labels:
severity: warning
annotations:
summary: Redis out of system memory (instance {{ $labels.instance }})
description: "Redis is running out of system memory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_OutOfConfiguredMaxmemory
expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: Redis out of configured maxmemory (instance {{ $labels.instance }})
description: "Redis is running out of configured maxmemory (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_TooManyConnections
expr: redis_connected_clients > 1000
for: 5m
labels:
severity: warning
annotations:
summary: Redis too many connections (instance {{ $labels.instance }})
description: "Redis instance has too many connections\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_NotEnoughConnections
expr: redis_connected_clients < 1
for: 5m
labels:
severity: warning
annotations:
summary: Redis not enough connections (instance {{ $labels.instance }})
description: "Redis instance should have more connections (> 5)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: Redis_RejectedConnections
expr: increase(redis_rejected_connections_total[1m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: Redis rejected connections (instance {{ $labels.instance }})
description: "Some connections to Redis has been rejected\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
参考:
https://github.com/bdossantos/prometheus-alert-rules/blob/master/rules/redis.yml
1.4 grafana
https://github.com/ops-center/grafana-dashboards/tree/master/redis
https://appscode.com/blog/post/monitor-redis-with-grafana-dashboard-in-aws/
https://logit.io/blog/post/top-grafana-dashboards-and-visualisations/
2. k8s环境
2.1 创建secrets
bash
kubectl create -n monitor secret generic redis-exporter-auth \
--from-literal=user=USER \
--from-literal=password=PASSWORD
kubectl create -n monitor secret generic redis-exporter-auth \
--from-literal=user=USER \
--from-literal=password=PASSWORD
1.单独redis_exporter
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-exporter
spec:
selector:
matchLabels:
app: redis-exporter
replicas: 1
template:
metadata:
labels:
app: redis-exporter
annotations:
prometheus.io/port: "9121"
prometheus.io/scrape: "true"
spec:
containers:
- name: redis-exporter
image: regiregistry.cn-zhangjiakou.aliyuncs.com/hsuing/redis_exporter:v6
ports:
- containerPort: 9121
env:
- name: TZ
value: "Asia/Shanghai"
- name: REDIS_ADDR
value: 'redis://redis:6379'
- name: REDIS_USER
valueFrom:
secretKeyRef:
name: redis-exporter-auth
key: user
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-exporter-auth
key: password
resources:
limits:
memory: "256Mi"
cpu: "256m"
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-exporter
spec:
selector:
matchLabels:
app: redis-exporter
replicas: 1
template:
metadata:
labels:
app: redis-exporter
annotations:
prometheus.io/port: "9121"
prometheus.io/scrape: "true"
spec:
containers:
- name: redis-exporter
image: regiregistry.cn-zhangjiakou.aliyuncs.com/hsuing/redis_exporter:v6
ports:
- containerPort: 9121
env:
- name: TZ
value: "Asia/Shanghai"
- name: REDIS_ADDR
value: 'redis://redis:6379'
- name: REDIS_USER
valueFrom:
secretKeyRef:
name: redis-exporter-auth
key: user
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-exporter-auth
key: password
resources:
limits:
memory: "256Mi"
cpu: "256m"
2.redis和redis_exporter
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-cart
spec:
selector:
matchLabels:
app: redis-cart
template:
metadata:
labels:
app: redis-cart
spec:
containers:
- name: redis
image: redis:alpine
ports:
- containerPort: 6379
readinessProbe:
periodSeconds: 5
tcpSocket:
port: 6379
livenessProbe:
periodSeconds: 5
tcpSocket:
port: 6379
volumeMounts:
- mountPath: /data
name: redis-data
resources:
limits:
memory: 256Mi
cpu: 125m
requests:
cpu: 70m
memory: 200Mi
- name: redis-exporter
image: oliver006/redis_exporter:latest
env:
- name: TZ
value: "Asia/Shanghai"
securityContext:
runAsUser: 59000
runAsGroup: 59000
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 9121
name: metrics
readinessProbe:
periodSeconds: 5
tcpSocket:
port: 9121
volumes:
- name: redis-data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: redis-cart
labels:
app: redis-cart
spec:
type: ClusterIP
selector:
app: redis-cart
ports:
- name: redis
port: 6379
targetPort: 6379
- name: metrics
port: 9121
targetPort: 9121
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-cart
spec:
selector:
matchLabels:
app: redis-cart
template:
metadata:
labels:
app: redis-cart
spec:
containers:
- name: redis
image: redis:alpine
ports:
- containerPort: 6379
readinessProbe:
periodSeconds: 5
tcpSocket:
port: 6379
livenessProbe:
periodSeconds: 5
tcpSocket:
port: 6379
volumeMounts:
- mountPath: /data
name: redis-data
resources:
limits:
memory: 256Mi
cpu: 125m
requests:
cpu: 70m
memory: 200Mi
- name: redis-exporter
image: oliver006/redis_exporter:latest
env:
- name: TZ
value: "Asia/Shanghai"
securityContext:
runAsUser: 59000
runAsGroup: 59000
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 9121
name: metrics
readinessProbe:
periodSeconds: 5
tcpSocket:
port: 9121
volumes:
- name: redis-data
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: redis-cart
labels:
app: redis-cart
spec:
type: ClusterIP
selector:
app: redis-cart
ports:
- name: redis
port: 6379
targetPort: 6379
- name: metrics
port: 9121
targetPort: 9121
https://blog.csdn.net/sinat_14840559/article/details/127676664
https://sysdig.com/blog/redis-prometheus/
https://redis.io/docs/latest/integrate/prometheus-with-redis-cloud/