You've already forked nrpe
add redis
This commit is contained in:
@@ -45,3 +45,14 @@ nrpe_mysql_longqueries_critical: 1200
|
||||
|
||||
nrpe_proc_age_warning: 400
|
||||
nrpe_proc_age_critical: 600
|
||||
|
||||
nrpe_redis_memory_warning: 80
|
||||
nrpe_redis_memory_critical: 90
|
||||
nrpe_redis_connected_clients_warning: 200
|
||||
nrpe_redis_connected_clients_critical: 500
|
||||
nrpe_redis_hitrate_warning: 80
|
||||
nrpe_redis_hitrate_critical: 50
|
||||
nrpe_redis_fragments_warning: '1.5'
|
||||
nrpe_redis_fragments_critical: '2.0'
|
||||
nrpe_redis_replication_lag_warning: 10
|
||||
nrpe_redis_replication_lag_critical: 60
|
||||
211
files/nrpe/check_redis_health
Executable file
211
files/nrpe/check_redis_health
Executable file
@@ -0,0 +1,211 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Nagios plugin to check for redis health metrics.
|
||||
#
|
||||
# Copyright (c) 2026, GitHub Copilot
|
||||
#
|
||||
# This script is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This script is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
# Nagios exit codes
|
||||
STATE_OK=0
|
||||
STATE_WARNING=1
|
||||
STATE_CRITICAL=2
|
||||
STATE_UNKNOWN=3
|
||||
|
||||
# --- Valeurs par défaut ---
|
||||
CHECK_TYPE=""
|
||||
WARN=0
|
||||
CRIT=0
|
||||
AUTH=""
|
||||
|
||||
# --- Aide ---
|
||||
usage() {
|
||||
cat << EOF
|
||||
Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>]
|
||||
|
||||
Types de check (-x):
|
||||
ping : Test de connexion simple (PONG)
|
||||
memory : Utilisation mémoire en % (Ex: -w 80 -c 90)
|
||||
frag : Ratio de fragmentation (Ex: -w 1.5 -c 2.0)
|
||||
hitrate : Taux d'efficacité du cache en % (Ex: -w 80 -c 50)
|
||||
persistence : État des sauvegardes RDB/AOF
|
||||
replication : Statut Master/Slave et lag (Ex: -w 10 -c 60)
|
||||
clients : Nombre de clients connectés (Ex: -w 200 -c 500)
|
||||
|
||||
Optionnelle:
|
||||
-a : Mot de passe Redis (sera passé via REDISCLI_AUTH)
|
||||
EOF
|
||||
exit $STATE_UNKNOWN
|
||||
}
|
||||
|
||||
# --- Parsing des arguments ---
|
||||
while getopts "x:w:c:a:?" opt; do
|
||||
case "$opt" in
|
||||
x) CHECK_TYPE="$OPTARG" ;;
|
||||
w) WARN="$OPTARG" ;;
|
||||
c) CRIT="$OPTARG" ;;
|
||||
a) AUTH="$OPTARG" ;;
|
||||
*) usage ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$CHECK_TYPE" ]; then usage; fi
|
||||
|
||||
# Export du mot de passe pour redis-cli
|
||||
if [ -n "$AUTH" ]; then
|
||||
export REDISCLI_AUTH="$AUTH"
|
||||
fi
|
||||
|
||||
EXIT_CODE=$STATE_OK
|
||||
INSTANCES_FOUND=0
|
||||
|
||||
# Listes pour stocker les résultats
|
||||
CRIT_LIST=""
|
||||
WARN_LIST=""
|
||||
OK_LIST=""
|
||||
|
||||
# 1. Détection des ports
|
||||
if command -v ss &> /dev/null; then
|
||||
PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
||||
elif command -v netstat &> /dev/null; then
|
||||
PORTS=$(netstat -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
||||
else
|
||||
PORTS=$(ps -ef | grep 'redis-server' | grep -v grep | grep -oP '(?<=:)\d{4,5}' | sort -u)
|
||||
fi
|
||||
|
||||
if [ -z "$PORTS" ]; then
|
||||
echo "OK: Aucun Redis détecté"
|
||||
exit $STATE_OK
|
||||
fi
|
||||
|
||||
# 2. Exécution du check
|
||||
for PORT in $PORTS; do
|
||||
# On ignore les ports < 1024 sauf le 6379 standard
|
||||
if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi
|
||||
|
||||
# Vérification si l'instance répond
|
||||
if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then
|
||||
MSG="Port $PORT (Erreur Connexion/Auth)"
|
||||
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
||||
[ "$EXIT_CODE" -lt $STATE_CRITICAL ] && EXIT_CODE=$STATE_CRITICAL
|
||||
((INSTANCES_FOUND++))
|
||||
continue
|
||||
fi
|
||||
|
||||
((INSTANCES_FOUND++))
|
||||
CURR_STATUS=$STATE_OK
|
||||
VAL_STR=""
|
||||
|
||||
case "$CHECK_TYPE" in
|
||||
ping)
|
||||
VAL_STR="PONG"
|
||||
;;
|
||||
memory)
|
||||
MEM_INFO=$(redis-cli -p "$PORT" info memory 2>/dev/null)
|
||||
USED=$(echo "$MEM_INFO" | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
|
||||
MAX=$(echo "$MEM_INFO" | grep "maxmemory:" | cut -d: -f2 | tr -d '\r')
|
||||
USED_H=$(echo "$MEM_INFO" | grep "used_memory_human:" | cut -d: -f2 | tr -d '\r')
|
||||
MAX_H=$(echo "$MEM_INFO" | grep "maxmemory_human:" | cut -d: -f2 | tr -d '\r')
|
||||
if [ "$MAX" -gt 0 ]; then
|
||||
VALUE=$(( USED * 100 / MAX ))
|
||||
VAL_STR="${VALUE}% (Used:${USED_H}, Max:${MAX_H})"
|
||||
if [ "$VALUE" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$VALUE" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||
else
|
||||
VAL_STR="NoLimit (Used:${USED_H})"
|
||||
fi
|
||||
;;
|
||||
frag)
|
||||
FRAG=$(redis-cli -p "$PORT" info memory 2>/dev/null | grep "mem_fragmentation_ratio:" | cut -d: -f2 | tr -d '\r')
|
||||
IS_CRIT=$(echo "$FRAG >= $CRIT" | bc -l 2>/dev/null)
|
||||
IS_WARN=$(echo "$FRAG >= $WARN" | bc -l 2>/dev/null)
|
||||
VAL_STR="Ratio:$FRAG"
|
||||
if [ "$IS_CRIT" == "1" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$IS_WARN" == "1" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||
;;
|
||||
hitrate)
|
||||
STATS=$(redis-cli -p "$PORT" info stats 2>/dev/null)
|
||||
HITS=$(echo "$STATS" | grep "keyspace_hits:" | cut -d: -f2 | tr -d '\r')
|
||||
MISSES=$(echo "$STATS" | grep "keyspace_misses:" | cut -d: -f2 | tr -d '\r')
|
||||
TOTAL=$((HITS + MISSES))
|
||||
VALUE=100
|
||||
[ "$TOTAL" -gt 0 ] && VALUE=$(( HITS * 100 / TOTAL ))
|
||||
VAL_STR="${VALUE}% (Hits:$HITS, Misses:$MISSES)"
|
||||
if [ "$VALUE" -le "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$VALUE" -le "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||
;;
|
||||
persistence)
|
||||
PERS=$(redis-cli -p "$PORT" info persistence 2>/dev/null)
|
||||
RDB=$(echo "$PERS" | grep "rdb_last_bgsave_status:" | cut -d: -f2 | tr -d '\r')
|
||||
AOF=$(echo "$PERS" | grep "aof_last_write_status:" | cut -d: -f2 | tr -d '\r')
|
||||
VAL_STR="RDB:$RDB, AOF:$AOF"
|
||||
if [ "$RDB" != "ok" ] || [ "$AOF" != "ok" ]; then CURR_STATUS=$STATE_CRITICAL; fi
|
||||
;;
|
||||
replication)
|
||||
REPL=$(redis-cli -p "$PORT" info replication 2>/dev/null)
|
||||
ROLE=$(echo "$REPL" | grep "role:" | cut -d: -f2 | tr -d '\r')
|
||||
if [ "$ROLE" == "slave" ]; then
|
||||
LINK=$(echo "$REPL" | grep "master_link_status:" | cut -d: -f2 | tr -d '\r')
|
||||
LAG=$(echo "$REPL" | grep "master_last_io_seconds_ago:" | cut -d: -f2 | tr -d '\r')
|
||||
VAL_STR="Slave, Link:$LINK, Lag:${LAG}s"
|
||||
if [ "$LINK" != "up" ] || [ "$LAG" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$LAG" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||
else
|
||||
SLAVE_COUNT=$(echo "$REPL" | grep "connected_slaves:" | cut -d: -f2 | tr -d '\r')
|
||||
VAL_STR="Master, Slaves:$SLAVE_COUNT"
|
||||
fi
|
||||
;;
|
||||
clients)
|
||||
CLIENTS=$(redis-cli -p "$PORT" info clients 2>/dev/null | grep "connected_clients:" | cut -d: -f2 | tr -d '\r')
|
||||
MAX_CLIENTS=$(redis-cli -p "$PORT" config get maxclients 2>/dev/null | tail -n1)
|
||||
VAL_STR="$CLIENTS clients"
|
||||
if [ -n "$MAX_CLIENTS" ]; then VAL_STR="$VAL_STR (Max:$MAX_CLIENTS)"; fi
|
||||
if [ "$CLIENTS" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$CLIENTS" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||
;;
|
||||
*) usage ;;
|
||||
esac
|
||||
|
||||
# Stockage du résultat
|
||||
MSG="Port $PORT ($VAL_STR)"
|
||||
case "$CURR_STATUS" in
|
||||
$STATE_CRITICAL)
|
||||
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
||||
[ "$EXIT_CODE" -lt $STATE_CRITICAL ] && EXIT_CODE=$STATE_CRITICAL
|
||||
;;
|
||||
$STATE_WARNING)
|
||||
if [ -z "$WARN_LIST" ]; then WARN_LIST="$MSG"; else WARN_LIST="$WARN_LIST, $MSG"; fi
|
||||
[ "$EXIT_CODE" -lt $STATE_WARNING ] && EXIT_CODE=$STATE_WARNING
|
||||
;;
|
||||
$STATE_OK)
|
||||
if [ -z "$OK_LIST" ]; then OK_LIST="$MSG"; else OK_LIST="$OK_LIST, $MSG"; fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Construction du message de sortie
|
||||
if [ "$EXIT_CODE" -eq $STATE_OK ]; then
|
||||
echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST"
|
||||
exit $STATE_OK
|
||||
fi
|
||||
|
||||
FINAL_MSG=""
|
||||
if [ "$EXIT_CODE" -eq $STATE_CRITICAL ]; then
|
||||
FINAL_MSG="CRITICAL: "
|
||||
if [ -n "$CRIT_LIST" ]; then FINAL_MSG="${FINAL_MSG}${CRIT_LIST}"; fi
|
||||
if [ -n "$WARN_LIST" ]; then
|
||||
if [ -n "$CRIT_LIST" ]; then FINAL_MSG="${FINAL_MSG} - "; fi
|
||||
FINAL_MSG="${FINAL_MSG}WARNING: ${WARN_LIST}"
|
||||
fi
|
||||
elif [ "$EXIT_CODE" -eq $STATE_WARNING ]; then
|
||||
FINAL_MSG="WARNING: $WARN_LIST"
|
||||
fi
|
||||
|
||||
echo "$FINAL_MSG"
|
||||
exit "$EXIT_CODE"
|
||||
@@ -104,3 +104,22 @@ command[check_k8s_replicasets]=/usr/bin/sudo /usr/lib/nagios/plugins/check_k8s_r
|
||||
command[check_k8s_pod_restarts]=/usr/bin/sudo /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% if nrpe_redis_memory_warning is defined or nrpe_redis_memory_critical is defined or nrpe_redis_persistence is defined %}
|
||||
# redis
|
||||
command[check_redis_health]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x ping
|
||||
command[check_redis_memory]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x memory -w {{ nrpe_redis_memory_warning }} -c {{ nrpe_redis_memory_critical }}
|
||||
command[check_redis_persistence]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x persistence
|
||||
{% if nrpe_redis_connected_clients_warning is defined or nrpe_redis_connected_clients_critical is defined %}
|
||||
command[check_redis_health_clients]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x clients -w {{ nrpe_redis_connected_clients_warning }} -c {{ nrpe_redis_connected_clients_critical }}
|
||||
{% endif %}
|
||||
{% if nrpe_redis_hitrate_warning is defined or nrpe_redis_hitrate_critical is defined %}
|
||||
command[check_redis_health_hitrate]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x hitrate -w {{ nrpe_redis_hitrate_warning }} -c {{ nrpe_redis_hitrate_critical }}
|
||||
{% endif %}
|
||||
{% if nrpe_redis_fragments_warning is defined or nrpe_redis_fragments_critical is defined %}
|
||||
command[check_redis_health_frag]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x frag -w {{ nrpe_redis_fragments_warning }} -c {{ nrpe_redis_fragments_critical }}
|
||||
{% endif %}
|
||||
{% if nrpe_redis_replication_lag_warning is defined or nrpe_redis_replication_lag_critical is defined %}
|
||||
command[check_redis_health_replication]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x replication -w {{ nrpe_redis_replication_lag_warning }} -c {{ nrpe_redis_replication_lag_critical }}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
@@ -12,3 +12,4 @@ nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pki_certs
|
||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pv_pvc
|
||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_replicasets
|
||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_redis_health
|
||||
Reference in New Issue
Block a user