Files
nrpe/files/nrpe/check_k8s_pod_restarts
T
2025-12-31 15:17:51 +01:00

49 lines
1.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# check_k8s_pod_restarts
# Vérifie s'il y a eu des redémarrages de pods (événements "Killing") dans les X dernières minutes.
# Retour: 0=OK, 2=CRITICAL, 3=UNKNOWN
#
# Usage:
# sudo /usr/lib/nagios/plugins/check_k8s_pod_restarts [minutes]
#
MINUTES=${1:-5}
# Require kubectl
if ! command -v kubectl >/dev/null 2>&1; then
echo "UNKNOWN - kubectl not found"
exit 3
fi
# cutoff en epoch (GNU date)
if ! cutoff=$(date -d "$MINUTES minutes ago" +%s 2>/dev/null); then
echo "UNKNOWN - date parsing failed (on macOS use gdate from coreutils)"
exit 3
fi
matches=()
while IFS=$'\t' read -r ns pod last msg; do
# skip empty lines
[[ -z "$last" ]] && continue
# convert last timestamp to epoch (works with GNU date; handles timezone/fractions)
if ! ts=$(date -d "$last" +%s 2>/dev/null); then
# if parsing fails, skip the event
continue
fi
if (( ts >= cutoff )); then
# safe message truncation
shortmsg=$(echo "$msg" | tr '\n' ' ' | cut -c1-300)
matches+=("$ns\t$pod\t$last\t$shortmsg")
fi
done < <(kubectl get events --all-namespaces --field-selector reason=Killing -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.involvedObject.name,LAST:.lastTimestamp,MESSAGE:.message' --no-headers 2>/dev/null || true)
if [[ ${#matches[@]} -eq 0 ]]; then
echo "OK - no pod restarts in the last ${MINUTES} minutes"
exit 0
else
echo "CRITICAL - ${#matches[@]} pod restarts in the last ${MINUTES} minutes:"
for m in "${matches[@]}"; do
IFS=$'\t' read -r ns pod last shortmsg <<< "$m"
echo " - ${ns}/${pod} at ${last} : ${shortmsg}"
done
exit 2
fi