You've already forked nrpe
add redis
This commit is contained in:
@@ -45,3 +45,14 @@ nrpe_mysql_longqueries_critical: 1200
|
|||||||
|
|
||||||
nrpe_proc_age_warning: 400
|
nrpe_proc_age_warning: 400
|
||||||
nrpe_proc_age_critical: 600
|
nrpe_proc_age_critical: 600
|
||||||
|
|
||||||
|
nrpe_redis_memory_warning: 80
|
||||||
|
nrpe_redis_memory_critical: 90
|
||||||
|
nrpe_redis_connected_clients_warning: 200
|
||||||
|
nrpe_redis_connected_clients_critical: 500
|
||||||
|
nrpe_redis_hitrate_warning: 80
|
||||||
|
nrpe_redis_hitrate_critical: 50
|
||||||
|
nrpe_redis_fragments_warning: '1.5'
|
||||||
|
nrpe_redis_fragments_critical: '2.0'
|
||||||
|
nrpe_redis_replication_lag_warning: 10
|
||||||
|
nrpe_redis_replication_lag_critical: 60
|
||||||
211
files/nrpe/check_redis_health
Executable file
211
files/nrpe/check_redis_health
Executable file
@@ -0,0 +1,211 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Nagios plugin to check for redis health metrics.
|
||||||
|
#
|
||||||
|
# Copyright (c) 2026, GitHub Copilot
|
||||||
|
#
|
||||||
|
# This script is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This script is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# Nagios exit codes
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
|
||||||
|
# --- Valeurs par défaut ---
|
||||||
|
CHECK_TYPE=""
|
||||||
|
WARN=0
|
||||||
|
CRIT=0
|
||||||
|
AUTH=""
|
||||||
|
|
||||||
|
# --- Aide ---
|
||||||
|
usage() {
|
||||||
|
cat << EOF
|
||||||
|
Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>]
|
||||||
|
|
||||||
|
Types de check (-x):
|
||||||
|
ping : Test de connexion simple (PONG)
|
||||||
|
memory : Utilisation mémoire en % (Ex: -w 80 -c 90)
|
||||||
|
frag : Ratio de fragmentation (Ex: -w 1.5 -c 2.0)
|
||||||
|
hitrate : Taux d'efficacité du cache en % (Ex: -w 80 -c 50)
|
||||||
|
persistence : État des sauvegardes RDB/AOF
|
||||||
|
replication : Statut Master/Slave et lag (Ex: -w 10 -c 60)
|
||||||
|
clients : Nombre de clients connectés (Ex: -w 200 -c 500)
|
||||||
|
|
||||||
|
Optionnelle:
|
||||||
|
-a : Mot de passe Redis (sera passé via REDISCLI_AUTH)
|
||||||
|
EOF
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Parsing des arguments ---
|
||||||
|
while getopts "x:w:c:a:?" opt; do
|
||||||
|
case "$opt" in
|
||||||
|
x) CHECK_TYPE="$OPTARG" ;;
|
||||||
|
w) WARN="$OPTARG" ;;
|
||||||
|
c) CRIT="$OPTARG" ;;
|
||||||
|
a) AUTH="$OPTARG" ;;
|
||||||
|
*) usage ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$CHECK_TYPE" ]; then usage; fi
|
||||||
|
|
||||||
|
# Export du mot de passe pour redis-cli
|
||||||
|
if [ -n "$AUTH" ]; then
|
||||||
|
export REDISCLI_AUTH="$AUTH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
EXIT_CODE=$STATE_OK
|
||||||
|
INSTANCES_FOUND=0
|
||||||
|
|
||||||
|
# Listes pour stocker les résultats
|
||||||
|
CRIT_LIST=""
|
||||||
|
WARN_LIST=""
|
||||||
|
OK_LIST=""
|
||||||
|
|
||||||
|
# 1. Détection des ports
|
||||||
|
if command -v ss &> /dev/null; then
|
||||||
|
PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
||||||
|
elif command -v netstat &> /dev/null; then
|
||||||
|
PORTS=$(netstat -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
||||||
|
else
|
||||||
|
PORTS=$(ps -ef | grep 'redis-server' | grep -v grep | grep -oP '(?<=:)\d{4,5}' | sort -u)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z "$PORTS" ]; then
|
||||||
|
echo "OK: Aucun Redis détecté"
|
||||||
|
exit $STATE_OK
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. Exécution du check
|
||||||
|
for PORT in $PORTS; do
|
||||||
|
# On ignore les ports < 1024 sauf le 6379 standard
|
||||||
|
if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi
|
||||||
|
|
||||||
|
# Vérification si l'instance répond
|
||||||
|
if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then
|
||||||
|
MSG="Port $PORT (Erreur Connexion/Auth)"
|
||||||
|
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
||||||
|
[ "$EXIT_CODE" -lt $STATE_CRITICAL ] && EXIT_CODE=$STATE_CRITICAL
|
||||||
|
((INSTANCES_FOUND++))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
((INSTANCES_FOUND++))
|
||||||
|
CURR_STATUS=$STATE_OK
|
||||||
|
VAL_STR=""
|
||||||
|
|
||||||
|
case "$CHECK_TYPE" in
|
||||||
|
ping)
|
||||||
|
VAL_STR="PONG"
|
||||||
|
;;
|
||||||
|
memory)
|
||||||
|
MEM_INFO=$(redis-cli -p "$PORT" info memory 2>/dev/null)
|
||||||
|
USED=$(echo "$MEM_INFO" | grep "used_memory:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
MAX=$(echo "$MEM_INFO" | grep "maxmemory:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
USED_H=$(echo "$MEM_INFO" | grep "used_memory_human:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
MAX_H=$(echo "$MEM_INFO" | grep "maxmemory_human:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
if [ "$MAX" -gt 0 ]; then
|
||||||
|
VALUE=$(( USED * 100 / MAX ))
|
||||||
|
VAL_STR="${VALUE}% (Used:${USED_H}, Max:${MAX_H})"
|
||||||
|
if [ "$VALUE" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$VALUE" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||||
|
else
|
||||||
|
VAL_STR="NoLimit (Used:${USED_H})"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
frag)
|
||||||
|
FRAG=$(redis-cli -p "$PORT" info memory 2>/dev/null | grep "mem_fragmentation_ratio:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
IS_CRIT=$(echo "$FRAG >= $CRIT" | bc -l 2>/dev/null)
|
||||||
|
IS_WARN=$(echo "$FRAG >= $WARN" | bc -l 2>/dev/null)
|
||||||
|
VAL_STR="Ratio:$FRAG"
|
||||||
|
if [ "$IS_CRIT" == "1" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$IS_WARN" == "1" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||||
|
;;
|
||||||
|
hitrate)
|
||||||
|
STATS=$(redis-cli -p "$PORT" info stats 2>/dev/null)
|
||||||
|
HITS=$(echo "$STATS" | grep "keyspace_hits:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
MISSES=$(echo "$STATS" | grep "keyspace_misses:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
TOTAL=$((HITS + MISSES))
|
||||||
|
VALUE=100
|
||||||
|
[ "$TOTAL" -gt 0 ] && VALUE=$(( HITS * 100 / TOTAL ))
|
||||||
|
VAL_STR="${VALUE}% (Hits:$HITS, Misses:$MISSES)"
|
||||||
|
if [ "$VALUE" -le "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$VALUE" -le "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||||
|
;;
|
||||||
|
persistence)
|
||||||
|
PERS=$(redis-cli -p "$PORT" info persistence 2>/dev/null)
|
||||||
|
RDB=$(echo "$PERS" | grep "rdb_last_bgsave_status:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
AOF=$(echo "$PERS" | grep "aof_last_write_status:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
VAL_STR="RDB:$RDB, AOF:$AOF"
|
||||||
|
if [ "$RDB" != "ok" ] || [ "$AOF" != "ok" ]; then CURR_STATUS=$STATE_CRITICAL; fi
|
||||||
|
;;
|
||||||
|
replication)
|
||||||
|
REPL=$(redis-cli -p "$PORT" info replication 2>/dev/null)
|
||||||
|
ROLE=$(echo "$REPL" | grep "role:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
if [ "$ROLE" == "slave" ]; then
|
||||||
|
LINK=$(echo "$REPL" | grep "master_link_status:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
LAG=$(echo "$REPL" | grep "master_last_io_seconds_ago:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
VAL_STR="Slave, Link:$LINK, Lag:${LAG}s"
|
||||||
|
if [ "$LINK" != "up" ] || [ "$LAG" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$LAG" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||||
|
else
|
||||||
|
SLAVE_COUNT=$(echo "$REPL" | grep "connected_slaves:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
VAL_STR="Master, Slaves:$SLAVE_COUNT"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
clients)
|
||||||
|
CLIENTS=$(redis-cli -p "$PORT" info clients 2>/dev/null | grep "connected_clients:" | cut -d: -f2 | tr -d '\r')
|
||||||
|
MAX_CLIENTS=$(redis-cli -p "$PORT" config get maxclients 2>/dev/null | tail -n1)
|
||||||
|
VAL_STR="$CLIENTS clients"
|
||||||
|
if [ -n "$MAX_CLIENTS" ]; then VAL_STR="$VAL_STR (Max:$MAX_CLIENTS)"; fi
|
||||||
|
if [ "$CLIENTS" -ge "$CRIT" ]; then CURR_STATUS=$STATE_CRITICAL; elif [ "$CLIENTS" -ge "$WARN" ]; then CURR_STATUS=$STATE_WARNING; fi
|
||||||
|
;;
|
||||||
|
*) usage ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Stockage du résultat
|
||||||
|
MSG="Port $PORT ($VAL_STR)"
|
||||||
|
case "$CURR_STATUS" in
|
||||||
|
$STATE_CRITICAL)
|
||||||
|
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
||||||
|
[ "$EXIT_CODE" -lt $STATE_CRITICAL ] && EXIT_CODE=$STATE_CRITICAL
|
||||||
|
;;
|
||||||
|
$STATE_WARNING)
|
||||||
|
if [ -z "$WARN_LIST" ]; then WARN_LIST="$MSG"; else WARN_LIST="$WARN_LIST, $MSG"; fi
|
||||||
|
[ "$EXIT_CODE" -lt $STATE_WARNING ] && EXIT_CODE=$STATE_WARNING
|
||||||
|
;;
|
||||||
|
$STATE_OK)
|
||||||
|
if [ -z "$OK_LIST" ]; then OK_LIST="$MSG"; else OK_LIST="$OK_LIST, $MSG"; fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Construction du message de sortie
|
||||||
|
if [ "$EXIT_CODE" -eq $STATE_OK ]; then
|
||||||
|
echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST"
|
||||||
|
exit $STATE_OK
|
||||||
|
fi
|
||||||
|
|
||||||
|
FINAL_MSG=""
|
||||||
|
if [ "$EXIT_CODE" -eq $STATE_CRITICAL ]; then
|
||||||
|
FINAL_MSG="CRITICAL: "
|
||||||
|
if [ -n "$CRIT_LIST" ]; then FINAL_MSG="${FINAL_MSG}${CRIT_LIST}"; fi
|
||||||
|
if [ -n "$WARN_LIST" ]; then
|
||||||
|
if [ -n "$CRIT_LIST" ]; then FINAL_MSG="${FINAL_MSG} - "; fi
|
||||||
|
FINAL_MSG="${FINAL_MSG}WARNING: ${WARN_LIST}"
|
||||||
|
fi
|
||||||
|
elif [ "$EXIT_CODE" -eq $STATE_WARNING ]; then
|
||||||
|
FINAL_MSG="WARNING: $WARN_LIST"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$FINAL_MSG"
|
||||||
|
exit "$EXIT_CODE"
|
||||||
@@ -104,3 +104,22 @@ command[check_k8s_replicasets]=/usr/bin/sudo /usr/lib/nagios/plugins/check_k8s_r
|
|||||||
command[check_k8s_pod_restarts]=/usr/bin/sudo /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
command[check_k8s_pod_restarts]=/usr/bin/sudo /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if nrpe_redis_memory_warning is defined or nrpe_redis_memory_critical is defined or nrpe_redis_persistence is defined %}
|
||||||
|
# redis
|
||||||
|
command[check_redis_health]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x ping
|
||||||
|
command[check_redis_memory]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x memory -w {{ nrpe_redis_memory_warning }} -c {{ nrpe_redis_memory_critical }}
|
||||||
|
command[check_redis_persistence]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x persistence
|
||||||
|
{% if nrpe_redis_connected_clients_warning is defined or nrpe_redis_connected_clients_critical is defined %}
|
||||||
|
command[check_redis_health_clients]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x clients -w {{ nrpe_redis_connected_clients_warning }} -c {{ nrpe_redis_connected_clients_critical }}
|
||||||
|
{% endif %}
|
||||||
|
{% if nrpe_redis_hitrate_warning is defined or nrpe_redis_hitrate_critical is defined %}
|
||||||
|
command[check_redis_health_hitrate]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x hitrate -w {{ nrpe_redis_hitrate_warning }} -c {{ nrpe_redis_hitrate_critical }}
|
||||||
|
{% endif %}
|
||||||
|
{% if nrpe_redis_fragments_warning is defined or nrpe_redis_fragments_critical is defined %}
|
||||||
|
command[check_redis_health_frag]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x frag -w {{ nrpe_redis_fragments_warning }} -c {{ nrpe_redis_fragments_critical }}
|
||||||
|
{% endif %}
|
||||||
|
{% if nrpe_redis_replication_lag_warning is defined or nrpe_redis_replication_lag_critical is defined %}
|
||||||
|
command[check_redis_health_replication]=/usr/bin/sudo /usr/lib/nagios/plugins/check_redis_health -x replication -w {{ nrpe_redis_replication_lag_warning }} -c {{ nrpe_redis_replication_lag_critical }}
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
@@ -12,3 +12,4 @@ nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pki_certs
|
|||||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pv_pvc
|
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pv_pvc
|
||||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_replicasets
|
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_replicasets
|
||||||
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_k8s_pod_restarts
|
||||||
|
nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_redis_health
|
||||||
Reference in New Issue
Block a user