You've already forked nrpe
202 lines
6.7 KiB
Bash
Executable File
202 lines
6.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# check_k8s_pv_pvc
|
|
# Vérifie l'état des PersistentVolumes (PV) et PersistentVolumeClaims (PVC) Kubernetes.
|
|
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
|
#
|
|
# Usage examples:
|
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --crit 1 # CRITICAL si >=1 problème
|
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --ignore-ns kube-system # ignorer kube-system
|
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --pvc-age-min 10 --crit 2 # ignorer PVC récents <10min, CRIT si >=2
|
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --check-pv --check-pvc # (par défaut les 2 sont vérifiés)
|
|
#
|
|
set -euo pipefail
|
|
|
|
# Defaults
|
|
WARN=${WARN:-0}
|
|
CRIT=${CRIT:-1}
|
|
IGNORE_NS=""
|
|
INCLUDE_NS=""
|
|
PVC_AGE_MIN=${PVC_AGE_MIN:-5} # en minutes : ignore PVC créés il y a moins de X minutes (défaut 5)
|
|
CHECK_PV=1
|
|
CHECK_PVC=1
|
|
|
|
print_usage() {
|
|
cat <<EOF
|
|
Usage: $0 [options]
|
|
Options:
|
|
--warn N seuil WARN si >= N objets en erreur (default 0)
|
|
--crit M seuil CRIT si >= M objets en erreur (default 1)
|
|
--ignore-ns a,b,c namespaces à ignorer (comma separated)
|
|
--namespaces a,b limiter aux namespaces donnés (comma separated)
|
|
--pvc-age-min N ignore PVC créés il y a moins de N minutes (default 5)
|
|
--no-pv disable PV checks
|
|
--no-pvc disable PVC checks
|
|
-h, --help affiche cette aide
|
|
EOF
|
|
}
|
|
|
|
# Parse args
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--warn) WARN="$2"; shift 2;;
|
|
--crit) CRIT="$2"; shift 2;;
|
|
--ignore-ns) IGNORE_NS="$2"; shift 2;;
|
|
--namespaces) INCLUDE_NS="$2"; shift 2;;
|
|
--pvc-age-min) PVC_AGE_MIN="$2"; shift 2;;
|
|
--no-pv) CHECK_PV=0; shift 1;;
|
|
--no-pvc) CHECK_PVC=0; shift 1;;
|
|
-h|--help) print_usage; exit 3;;
|
|
*) echo "Unknown arg: $1"; print_usage; exit 3;;
|
|
esac
|
|
done
|
|
|
|
if ! command -v kubectl >/dev/null 2>&1; then
|
|
echo "UNKNOWN - kubectl not found"
|
|
exit 3
|
|
fi
|
|
|
|
# Build namespace filters
|
|
ignore_pattern=""
|
|
if [[ -n "$IGNORE_NS" ]]; then
|
|
IFS=',' read -ra arr <<< "$IGNORE_NS"
|
|
for ns in "${arr[@]}"; do
|
|
ignore_pattern="${ignore_pattern}|^${ns}\$"
|
|
done
|
|
ignore_pattern="${ignore_pattern#|}"
|
|
fi
|
|
|
|
include_pattern=""
|
|
if [[ -n "$INCLUDE_NS" ]]; then
|
|
IFS=',' read -ra arr2 <<< "$INCLUDE_NS"
|
|
for ns in "${arr2[@]}"; do
|
|
include_pattern="${include_pattern}|^${ns}\$"
|
|
done
|
|
include_pattern="${include_pattern#|}"
|
|
fi
|
|
|
|
now_s=$(date +%s)
|
|
|
|
# Initialize problems array safely (fix pour "variable sans liaison")
|
|
problems=()
|
|
|
|
# Helper: namespace filter
|
|
ns_allowed() {
|
|
local ns="$1"
|
|
if [[ -n "$include_pattern" ]]; then
|
|
if ! echo "$ns" | egrep -q "$include_pattern"; then
|
|
return 1
|
|
fi
|
|
fi
|
|
if [[ -n "$ignore_pattern" ]]; then
|
|
if echo "$ns" | egrep -q "$ignore_pattern"; then
|
|
return 1
|
|
fi
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# 1) Check PVCs
|
|
if (( CHECK_PVC == 1 )); then
|
|
# gather: namespace, name, phase, volumeName, creationTimestamp
|
|
mapfile -t pvc_lines < <(kubectl get pvc -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\t"}{.metadata.name}{"\t"}{.status.phase}{"\t"}{.spec.volumeName}{"\t"}{.metadata.creationTimestamp}{"\n"}{end}' 2>/dev/null || true)
|
|
|
|
for line in "${pvc_lines[@]}"; do
|
|
ns=$(echo "$line" | awk -F'\t' '{print $1}')
|
|
name=$(echo "$line" | awk -F'\t' '{print $2}')
|
|
phase=$(echo "$line" | awk -F'\t' '{print $3}')
|
|
vol=$(echo "$line" | awk -F'\t' '{print $4}')
|
|
created=$(echo "$line" | awk -F'\t' '{print $5}')
|
|
|
|
# filter namespaces
|
|
if ! ns_allowed "$ns"; then
|
|
continue
|
|
fi
|
|
|
|
# ignore PVC newly created (to avoid noise during normal provisioning)
|
|
if [[ -n "$created" && "$PVC_AGE_MIN" -gt 0 ]]; then
|
|
created_s=0
|
|
created_s=$(date -d "$created" +%s 2>/dev/null || echo 0)
|
|
age_min=$(( (now_s - created_s) / 60 ))
|
|
if (( age_min < PVC_AGE_MIN )); then
|
|
continue
|
|
fi
|
|
fi
|
|
|
|
# Consider non-Bound phases as problematic (Pending, Lost, Failed)
|
|
# Bound is OK; if Bound but no volumeName -> problem
|
|
if [[ "$phase" != "Bound" ]]; then
|
|
problems+=("PVC ${ns}/${name} phase=${phase} created=${created}")
|
|
continue
|
|
fi
|
|
|
|
if [[ -z "$vol" || "$vol" == "null" ]]; then
|
|
problems+=("PVC ${ns}/${name} Bound but no volumeName assigned")
|
|
continue
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 2) Check PVs
|
|
if (( CHECK_PV == 1 )); then
|
|
# gather: name, phase, capacity.storage, claimRef.namespace, claimRef.name, reclaimPolicy
|
|
mapfile -t pv_lines < <(kubectl get pv -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\t"}{.spec.capacity.storage}{"\t"}{.spec.claimRef.namespace}{"\t"}{.spec.claimRef.name}{"\t"}{.spec.persistentVolumeReclaimPolicy}{"\n"}{end}' 2>/dev/null || true)
|
|
|
|
for line in "${pv_lines[@]}"; do
|
|
name=$(echo "$line" | awk -F'\t' '{print $1}')
|
|
phase=$(echo "$line" | awk -F'\t' '{print $2}')
|
|
cap=$(echo "$line" | awk -F'\t' '{print $3}')
|
|
claim_ns=$(echo "$line" | awk -F'\t' '{print $4}')
|
|
claim_name=$(echo "$line" | awk -F'\t' '{print $5}')
|
|
reclaim=$(echo "$line" | awk -F'\t' '{print $6}')
|
|
|
|
# If PV is bound, check namespace filter of its claim (only report if claim namespace allowed)
|
|
if [[ -n "$claim_ns" && "$claim_ns" != "null" ]]; then
|
|
if ! ns_allowed "$claim_ns"; then
|
|
continue
|
|
fi
|
|
else
|
|
# claim_ns empty => PV not bound to claim
|
|
# Consider phases indicating issues: Released, Failed
|
|
if [[ "$phase" == "Released" || "$phase" == "Failed" ]]; then
|
|
problems+=("PV ${name} phase=${phase} reclaim=${reclaim} (no claim)")
|
|
continue
|
|
fi
|
|
# Optionally, consider Available PV without claim as possibly orphaned:
|
|
# Uncomment next lines to treat Available PVs as warning/problem
|
|
# if [[ "$phase" == "Available" ]]; then
|
|
# problems+=("PV ${name} is Available (unbound) capacity=${cap} reclaim=${reclaim}")
|
|
# fi
|
|
fi
|
|
|
|
# If bound, but claim cannot be found (partial sanity check)
|
|
if [[ "$phase" == "Bound" ]]; then
|
|
if [[ -z "$claim_ns" || -z "$claim_name" || "$claim_ns" == "null" || "$claim_name" == "null" ]]; then
|
|
problems+=("PV ${name} Bound but missing claimRef (phase=${phase})")
|
|
continue
|
|
fi
|
|
# try to ensure the claim exists (if denied by namespace filter it's been skipped earlier)
|
|
if ! kubectl get pvc -n "${claim_ns}" "${claim_name}" >/dev/null 2>&1; then
|
|
problems+=("PV ${name} Bound to ${claim_ns}/${claim_name} but PVC resource not found")
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
|
|
count=${#problems[@]}
|
|
|
|
if (( count == 0 )); then
|
|
echo "OK - PV/PVC checks passed"
|
|
exit 0
|
|
fi
|
|
|
|
# Severity decision
|
|
if (( count >= CRIT )); then
|
|
echo "CRITICAL - ${count} PV/PVC problems: ${problems[*]}"
|
|
exit 2
|
|
elif (( count >= WARN )); then
|
|
echo "WARNING - ${count} PV/PVC problems: ${problems[*]}"
|
|
exit 1
|
|
else
|
|
echo "OK - ${count} PV/PVC problems but below thresholds"
|
|
exit 0
|
|
fi |