You've already forked nrpe
add k8s check & config
This commit is contained in:
135
files/nrpe/check_k8s_replicasets
Normal file
135
files/nrpe/check_k8s_replicasets
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# check_k8s_replicasets
|
||||
# Vérifie les ReplicaSets Kubernetes : readyReplicas < spec.replicas
|
||||
# Retour: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||
#
|
||||
# Usage:
|
||||
# sudo /usr/lib/nagios/plugins/check_k8s_replicasets [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
WARN=${WARN:-0} # nombre de RS en erreur pour WARNING
|
||||
CRIT=${CRIT:-1} # nombre de RS en erreur pour CRITICAL par défaut (1 => 1 RS -> CRITICAL)
|
||||
IGNORE_NS=""
|
||||
INCLUDE_NS=""
|
||||
AGE_MIN=0
|
||||
|
||||
print_usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||
--warn N : seuil warn si >=N ReplicaSets en erreur (default 0)
|
||||
--crit M : seuil crit si >=M ReplicaSets en erreur (default 1)
|
||||
--ignore-ns LIST : comma separated namespaces to ignore (default none)
|
||||
--namespaces LIST: comma separated namespaces to check only (default all)
|
||||
--age-min N : ignore ReplicaSets created less than N minutes ago (avoid flapping during rollout)
|
||||
EOF
|
||||
}
|
||||
|
||||
# parse args
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--warn) WARN="$2"; shift 2;;
|
||||
--crit) CRIT="$2"; shift 2;;
|
||||
--ignore-ns) IGNORE_NS="$2"; shift 2;;
|
||||
--namespaces) INCLUDE_NS="$2"; shift 2;;
|
||||
--age-min) AGE_MIN="$2"; shift 2;;
|
||||
-h|--help) print_usage; exit 3;;
|
||||
*) echo "Unknown arg: $1"; print_usage; exit 3;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! command -v kubectl >/dev/null 2>&1; then
|
||||
echo "UNKNOWN - kubectl not found"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Build filter for namespace inclusion/exclusion (regex)
|
||||
ignore_pattern=""
|
||||
if [[ -n "$IGNORE_NS" ]]; then
|
||||
IFS=',' read -ra arr <<< "$IGNORE_NS"
|
||||
for ns in "${arr[@]}"; do
|
||||
ignore_pattern="${ignore_pattern}|^${ns}\$"
|
||||
done
|
||||
ignore_pattern="${ignore_pattern#|}"
|
||||
fi
|
||||
|
||||
include_pattern=""
|
||||
if [[ -n "$INCLUDE_NS" ]]; then
|
||||
IFS=',' read -ra arr2 <<< "$INCLUDE_NS"
|
||||
for ns in "${arr2[@]}"; do
|
||||
include_pattern="${include_pattern}|^${ns}\$"
|
||||
done
|
||||
include_pattern="${include_pattern#|}"
|
||||
fi
|
||||
|
||||
# Initialize failures array to avoid "variable sans liaison" when set -u is active
|
||||
failures=()
|
||||
|
||||
# Collect ReplicaSets: namespace, name, desired(spec.replicas), ready(status.readyReplicas), creationTimestamp
|
||||
# If fields missing, jsonpath returns nothing -> we normalize later
|
||||
mapfile -t lines < <(kubectl get rs -A -o jsonpath='{range .items[*]}{.metadata.namespace}{"\t"}{.metadata.name}{"\t"}{.spec.replicas}{"\t"}{.status.readyReplicas}{"\t"}{.metadata.creationTimestamp}{"\n"}{end}' 2>/dev/null || true)
|
||||
|
||||
now_s=$(date +%s)
|
||||
|
||||
for line in "${lines[@]}"; do
|
||||
# Skip empty lines if any
|
||||
[[ -z "$line" ]] && continue
|
||||
|
||||
ns=$(echo "$line" | awk -F'\t' '{print $1}')
|
||||
name=$(echo "$line" | awk -F'\t' '{print $2}')
|
||||
desired=$(echo "$line" | awk -F'\t' '{print $3}')
|
||||
ready=$(echo "$line" | awk -F'\t' '{print $4}')
|
||||
created=$(echo "$line" | awk -F'\t' '{print $5}')
|
||||
|
||||
# normalize numeric values
|
||||
desired=${desired:-0}
|
||||
ready=${ready:-0}
|
||||
|
||||
# namespace filtering
|
||||
if [[ -n "$include_pattern" ]]; then
|
||||
if ! echo "$ns" | egrep -q "$include_pattern"; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
if [[ -n "$ignore_pattern" ]]; then
|
||||
if echo "$ns" | egrep -q "$ignore_pattern"; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# age filtering (skip very recent RS)
|
||||
if [[ -n "$created" && "$AGE_MIN" -gt 0 ]]; then
|
||||
created_s=0
|
||||
# convert to epoch; if conversion fails, keep created_s=0 so we don't skip
|
||||
if created_s=$(date -d "$created" +%s 2>/dev/null || echo 0); then :; fi
|
||||
age_min=$(( (now_s - created_s) / 60 ))
|
||||
if (( age_min < AGE_MIN )); then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Only consider RS where desired > 0 (skip zero-scale RS)
|
||||
if (( desired > 0 )) && (( ready < desired )); then
|
||||
failures+=("${ns}/${name} (desired=${desired},ready=${ready})")
|
||||
fi
|
||||
done
|
||||
|
||||
count=${#failures[@]}
|
||||
|
||||
# If there are no failures and the cluster reports none, return OK
|
||||
if (( count == 0 )); then
|
||||
echo "OK - all ReplicaSets report ready==desired"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Determine severity based on thresholds
|
||||
if (( count >= CRIT )); then
|
||||
echo "CRITICAL - ${count} ReplicaSets not fully ready: ${failures[*]}"
|
||||
exit 2
|
||||
elif (( count >= WARN )); then
|
||||
echo "WARNING - ${count} ReplicaSets not fully ready: ${failures[*]}"
|
||||
exit 1
|
||||
else
|
||||
echo "OK - ${count} ReplicaSets not fully ready but below thresholds"
|
||||
exit 0
|
||||
fi
|
||||
Reference in New Issue
Block a user