check scripts - english traduction

This commit is contained in:
Ludovic Cartier
2026-06-01 17:49:46 +02:00
parent 1ab1166c1e
commit 3892502c27
8 changed files with 111 additions and 111 deletions
+13 -13
View File
@@ -1,7 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_etcd_health # check_etcd_health
# Verifie la santé d'etcd et (optionnel) la creation/verifieation des snapshots. # Checks etcd health and (optionally) snapshot creation/verification.
# Retourne : 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Usage example: # Usage example:
# sudo /usr/lib/nagios/plugins/check_etcd_health \ # sudo /usr/lib/nagios/plugins/check_etcd_health \
@@ -10,10 +10,10 @@
# --test-snapshot --snapshot-dir /var/backups/etcd --snapshot-max-age 24 # --test-snapshot --snapshot-dir /var/backups/etcd --snapshot-max-age 24
# #
# Notes: # Notes:
# - Par securite, execute ce script sur un master (ou via NRPE/SSH) avec un utilisateur ayant acces aux clefs. # - For security, run this script on a master (or via NRPE/SSH) with a user having access to the keys.
# - --snapshot-max-age en heures (defaut 24). Mettre 0 pour desactiver la verification d'age. # - --snapshot-max-age in hours (default 24). Set to 0 to disable age verification.
# - --test-snapshot creerera un snapshot temporaire pour valider la creation + verification via `etcdctl snapshot status`. # - --test-snapshot will create a temporary snapshot to validate creation + verification via `etcdctl snapshot status`.
# - Si --keep-snapshot-on-failure est active, le snapshot temporaire sera conserve en cas d'erreur pour debug. # - If --keep-snapshot-on-failure is enabled, the temporary snapshot will be kept on error for debugging.
ETCDCTL=${ETCDCTL:-/usr/local/bin/etcdctl} ETCDCTL=${ETCDCTL:-/usr/local/bin/etcdctl}
@@ -21,14 +21,14 @@ print_usage() {
cat <<EOF cat <<EOF
Usage: $0 --endpoints ENDPOINTS --cacert CA --cert CERT --key KEY [options] Usage: $0 --endpoints ENDPOINTS --cacert CA --cert CERT --key KEY [options]
Options: Options:
--warn-db-mb N avertissement si DB >= N MB (default 1024) --warn-db-mb N warn if DB >= N MB (default 1024)
--crit-db-mb M critique si DB >= M MB (default 1800) --crit-db-mb M critical if DB >= M MB (default 1800)
--timeout SECS etcdctl timeout (default 10) --timeout SECS etcdctl timeout (default 10)
--test-snapshot tenter de creer un snapshot temporaire et verifier son status --test-snapshot attempt to create a temporary snapshot and verify its status
--snapshot-dir DIR repertoire pour snapshots temporaires (default /var/backups/etcd) --snapshot-dir DIR directory for temporary snapshots (default /var/backups/etcd)
--keep-snapshot-on-failure conserver le snapshot temporaire si creation echoue (default false) --keep-snapshot-on-failure keep temporary snapshot on failure (default false)
--snapshot-max-age HRS verifier qu'il existe un snapshot plus recent que HRS heures (default 24). Mettre 0 pour desactiver. --snapshot-max-age HRS check that a snapshot newer than HRS hours exists (default 24). Set 0 to disable.
-h, --help affiche cette aide -h, --help show this help
EOF EOF
} }
+7 -7
View File
@@ -1,19 +1,19 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_k8s_deployments # check_k8s_deployments
# Vérifie les Deployments Kubernetes: availableReplicas < spec.replicas # Checks Kubernetes Deployments: availableReplicas < spec.replicas
# Retour: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Usage: # Usage:
# sudo /usr/lib/nagios/plugins/check_k8s_deployments [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES] # sudo /usr/lib/nagios/plugins/check_k8s_deployments [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
# #
# Exemples: # Examples:
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --crit 1 # sudo /usr/lib/nagios/plugins/check_k8s_deployments --crit 1
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --ignore-ns kube-system,monitoring # sudo /usr/lib/nagios/plugins/check_k8s_deployments --ignore-ns kube-system,monitoring
# #
set -euo pipefail set -euo pipefail
WARN=${WARN:-0} # nombre de deploys en erreur pour WARNING WARN=${WARN:-0} # number of failed deploys for WARNING
CRIT=${CRIT:-1} # nombre de deploys en erreur pour CRITICAL par défaut (1 => tout problème -> CRITICAL) CRIT=${CRIT:-1} # number of failed deploys for CRITICAL by default (1 => any issue -> CRITICAL)
IGNORE_NS="" IGNORE_NS=""
INCLUDE_NS="" INCLUDE_NS=""
AGE_MIN=0 AGE_MIN=0
@@ -21,8 +21,8 @@ AGE_MIN=0
print_usage() { print_usage() {
cat <<EOF cat <<EOF
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES] Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
--warn N : seuil warn si >=N déploiements en erreur (default 0) --warn N : warn threshold if >=N deployments in error (default 0)
--crit M : seuil crit si >=M déploiements en erreur (default 1) --crit M : crit threshold if >=M deployments in error (default 1)
--ignore-ns LIST : comma separated namespaces to ignore (default none) --ignore-ns LIST : comma separated namespaces to ignore (default none)
--namespaces LIST: comma separated namespaces to check only (default all) --namespaces LIST: comma separated namespaces to check only (default all)
--age-min N : ignore deployments created less than N minutes ago (avoid flapping during rollout) --age-min N : ignore deployments created less than N minutes ago (avoid flapping during rollout)
+15 -15
View File
@@ -1,14 +1,14 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_k8s_jobs_cronjobs # check_k8s_jobs_cronjobs
# Vérifie l'état des Kubernetes Jobs et CronJobs. # Checks the state of Kubernetes Jobs and CronJobs.
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Fonctions principales : # Main features:
# - détecte Jobs avec des échecs (.status.failed > 0) ou des Jobs "actifs" trop vieux # - detects Jobs with failures (.status.failed > 0) or active Jobs that are too old
# - recherche d'événements récents (type=Warning) liés aux Jobs dans les X dernières minutes # - searches for recent events (type=Warning) related to Jobs in the last X minutes
# - vérifie pour les CronJobs que lastScheduleTime n'est pas trop ancien (configurable) si non suspendu # - checks for CronJobs that lastScheduleTime is not too old (configurable) if not suspended
# #
# Usage (exemples) : # Usage (examples):
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --crit 1 --recent-minutes 5 # sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --crit 1 --recent-minutes 5
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --ignore-ns kube-system --cron-max-age 120 # sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --ignore-ns kube-system --cron-max-age 120
# #
@@ -28,15 +28,15 @@ print_usage() {
cat <<EOF cat <<EOF
Usage: $0 [options] Usage: $0 [options]
Options: Options:
--warn N seuil WARN si >= N objets en erreur (default 0) --warn N warn threshold if >= N objects in error (default 0)
--crit M seuil CRIT si >= M objets en erreur (default 1) --crit M crit threshold if >= M objects in error (default 1)
--ignore-ns ns1,ns2 namespaces à ignorer --ignore-ns ns1,ns2 namespaces to ignore
--namespaces ns1,ns2 limiter aux namespaces donnés (comma separated) --namespaces ns1,ns2 limit to given namespaces (comma separated)
--age-min MINUTES considérer un job "actif" normal si démarré moins de MINUTES (default 60) --age-min MINUTES consider an active job normal if started less than MINUTES ago (default 60)
--recent-minutes MIN chercher événements de Job (Warning) dans les MIN dernières minutes (default 5) --recent-minutes MIN look for Job events (Warning) in the last MIN minutes (default 5)
--check-cron activer la vérification des CronJobs (default ON) --check-cron enable CronJob verification (default ON)
--cron-max-age MINUTES si lastScheduleTime > MINUTES => alerter (default 60). Mettre 0 pour désactiver. --cron-max-age MINUTES alert if lastScheduleTime > MINUTES (default 60). Set 0 to disable.
-h, --help : affiche l'aide -h, --help : show help
EOF EOF
} }
+6 -6
View File
@@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_k8s_pki_certs # check_k8s_pki_certs
# Vérifie les certificats PEM sous /etc/kubernetes/pki (par défaut) et alerte si expiration <= warn_days (30j par défaut). # Checks PEM certificates under /etc/kubernetes/pki (by default) and alerts if expiry <= warn_days (30d by default).
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Usage: # Usage:
@@ -19,11 +19,11 @@ print_usage() {
Usage: $0 [--path PATH] [--warn-days N] [--crit-days M] [--recursive] [-h|--help] Usage: $0 [--path PATH] [--warn-days N] [--crit-days M] [--recursive] [-h|--help]
Options: Options:
--path PATH répertoire à scanner (default: $PKI_PATH) --path PATH directory to scan (default: $PKI_PATH)
--warn-days N seuil warning en jours (default: $WARN_DAYS) --warn-days N warning threshold in days (default: $WARN_DAYS)
--crit-days M seuil critical en jours (default: $CRIT_DAYS) --crit-days M critical threshold in days (default: $CRIT_DAYS)
--recursive scanner récursivement PATH et sous-dirs --recursive scan PATH and subdirectories recursively
-h, --help affiche cette aide -h, --help show this help
EOF EOF
} }
+12 -12
View File
@@ -1,13 +1,13 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_k8s_pv_pvc # check_k8s_pv_pvc
# Vérifie l'état des PersistentVolumes (PV) et PersistentVolumeClaims (PVC) Kubernetes. # Checks the state of Kubernetes PersistentVolumes (PV) and PersistentVolumeClaims (PVC).
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Usage examples: # Usage examples:
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --crit 1 # CRITICAL si >=1 problème # sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --crit 1 # CRITICAL if >=1 issue
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --ignore-ns kube-system # ignorer kube-system # sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --ignore-ns kube-system # ignore kube-system
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --pvc-age-min 10 --crit 2 # ignorer PVC récents <10min, CRIT si >=2 # sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --pvc-age-min 10 --crit 2 # ignore recent PVCs <10min, CRIT if >=2
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --check-pv --check-pvc # (par défaut les 2 sont vérifiés) # sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --check-pv --check-pvc # (both checked by default)
# #
set -euo pipefail set -euo pipefail
@@ -16,7 +16,7 @@ WARN=${WARN:-0}
CRIT=${CRIT:-1} CRIT=${CRIT:-1}
IGNORE_NS="" IGNORE_NS=""
INCLUDE_NS="" INCLUDE_NS=""
PVC_AGE_MIN=${PVC_AGE_MIN:-5} # en minutes : ignore PVC créés il y a moins de X minutes (défaut 5) PVC_AGE_MIN=${PVC_AGE_MIN:-5} # in minutes: ignore PVCs created less than X minutes ago (default 5)
CHECK_PV=1 CHECK_PV=1
CHECK_PVC=1 CHECK_PVC=1
@@ -24,14 +24,14 @@ print_usage() {
cat <<EOF cat <<EOF
Usage: $0 [options] Usage: $0 [options]
Options: Options:
--warn N seuil WARN si >= N objets en erreur (default 0) --warn N warn threshold if >= N objects in error (default 0)
--crit M seuil CRIT si >= M objets en erreur (default 1) --crit M crit threshold if >= M objects in error (default 1)
--ignore-ns a,b,c namespaces à ignorer (comma separated) --ignore-ns a,b,c namespaces to ignore (comma separated)
--namespaces a,b limiter aux namespaces donnés (comma separated) --namespaces a,b limit to given namespaces (comma separated)
--pvc-age-min N ignore PVC créés il y a moins de N minutes (default 5) --pvc-age-min N ignore PVCs created less than N minutes ago (default 5)
--no-pv disable PV checks --no-pv disable PV checks
--no-pvc disable PVC checks --no-pvc disable PVC checks
-h, --help affiche cette aide -h, --help show this help
EOF EOF
} }
+6 -6
View File
@@ -1,15 +1,15 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# check_k8s_replicasets # check_k8s_replicasets
# Vérifie les ReplicaSets Kubernetes : readyReplicas < spec.replicas # Checks Kubernetes ReplicaSets: readyReplicas < spec.replicas
# Retour: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN # Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
# #
# Usage: # Usage:
# sudo /usr/lib/nagios/plugins/check_k8s_replicasets [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES] # sudo /usr/lib/nagios/plugins/check_k8s_replicasets [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
# #
set -euo pipefail set -euo pipefail
WARN=${WARN:-0} # nombre de RS en erreur pour WARNING WARN=${WARN:-0} # number of failed RS for WARNING
CRIT=${CRIT:-1} # nombre de RS en erreur pour CRITICAL par défaut (1 => 1 RS -> CRITICAL) CRIT=${CRIT:-1} # number of failed RS for CRITICAL by default (1 => 1 RS -> CRITICAL)
IGNORE_NS="" IGNORE_NS=""
INCLUDE_NS="" INCLUDE_NS=""
AGE_MIN=0 AGE_MIN=0
@@ -17,8 +17,8 @@ AGE_MIN=0
print_usage() { print_usage() {
cat <<EOF cat <<EOF
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES] Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
--warn N : seuil warn si >=N ReplicaSets en erreur (default 0) --warn N : warn threshold if >=N ReplicaSets in error (default 0)
--crit M : seuil crit si >=M ReplicaSets en erreur (default 1) --crit M : crit threshold if >=M ReplicaSets in error (default 1)
--ignore-ns LIST : comma separated namespaces to ignore (default none) --ignore-ns LIST : comma separated namespaces to ignore (default none)
--namespaces LIST: comma separated namespaces to check only (default all) --namespaces LIST: comma separated namespaces to check only (default all)
--age-min N : ignore ReplicaSets created less than N minutes ago (avoid flapping during rollout) --age-min N : ignore ReplicaSets created less than N minutes ago (avoid flapping during rollout)
+30 -30
View File
@@ -3,9 +3,9 @@
# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups. # Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups.
# #
# Checks: # Checks:
# 1. Connectivité à l'API PBS → WARNING si injoignable # 1. PBS API connectivity → WARNING if unreachable
# 2. Présence d'un backup du jour → CRITICAL si absent # 2. Backup present for today → CRITICAL if missing
# 3. Statut de vérification du backup → WARNING si non vérifié / échec # 3. Backup verification status → WARNING if unverified / failed
STATE_OK=0 STATE_OK=0
STATE_WARNING=1 STATE_WARNING=1
@@ -26,20 +26,20 @@ usage() {
Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k] Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k]
Options: Options:
-H Hôte PBS (IP ou FQDN) -H PBS host (IP or FQDN)
-T API token au format user@realm!tokenid:secret -T API token in format user@realm!tokenid:secret
-s Nom du datastore PBS -s PBS datastore name
-n Backup ID à vérifier (backup-id : nom d'hôte, ou ID numérique pour vm/ct) -n Backup ID to check (backup-id: hostname, or numeric ID for vm/ct)
-t Type de backup : host (défaut), vm, ct -t Backup type: host (default), vm, ct
-P Port de l'API PBS (défaut : 8007) -P PBS API port (default: 8007)
-N Namespace PBS (optionnel) -N PBS namespace (optional)
-k Ignorer les erreurs de certificat SSL -k Ignore SSL certificate errors
Exemple (host) : Example (host):
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
-s main -n myhost -k -s main -n myhost -k
Exemple (vm) : Example (vm):
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
-s main -n 100 -t vm -k -s main -n 100 -t vm -k
EOF EOF
@@ -65,54 +65,54 @@ if [[ -z "$PBS_HOST" || -z "$PBS_TOKEN" || -z "$PBS_DATASTORE" || -z "$BACKUP_ID
fi fi
if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then
echo "UNKNOWN: type de backup invalide '${BACKUP_TYPE}' (valeurs acceptées : host, vm, ct)" echo "UNKNOWN: invalid backup type '${BACKUP_TYPE}' (accepted values: host, vm, ct)"
exit $STATE_UNKNOWN exit $STATE_UNKNOWN
fi fi
# --- Options curl --- # --- curl options ---
CURL_OPTS=(-sf --max-time 15) CURL_OPTS=(-sf --max-time 15)
[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k) [[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k)
# --- URL de l'API --- # --- API URL ---
BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json" BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json"
SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_DATASTORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}" SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_DATASTORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}"
[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}" [[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}"
# --- Appel API --- # --- API call ---
# PBS utilise PBSAPIToken (différent de PVEAPIToken utilisé par Proxmox VE) # PBS uses PBSAPIToken (different from PVEAPIToken used by Proxmox VE)
RESPONSE=$(curl "${CURL_OPTS[@]}" \ RESPONSE=$(curl "${CURL_OPTS[@]}" \
-H "Authorization: PBSAPIToken=${PBS_TOKEN}" \ -H "Authorization: PBSAPIToken=${PBS_TOKEN}" \
"$SNAPSHOTS_URL" 2>&1) "$SNAPSHOTS_URL" 2>&1)
CURL_EXIT=$? CURL_EXIT=$?
if [[ $CURL_EXIT -ne 0 ]]; then if [[ $CURL_EXIT -ne 0 ]]; then
echo "WARNING: Impossible de contacter l'API PBS (${PBS_HOST}:${PBS_PORT}) - code curl : ${CURL_EXIT}" echo "WARNING: Cannot reach PBS API (${PBS_HOST}:${PBS_PORT}) - curl exit code: ${CURL_EXIT}"
exit $STATE_WARNING exit $STATE_WARNING
fi fi
# --- Validation JSON --- # --- JSON validation ---
if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
echo "WARNING: Réponse invalide de l'API PBS (réponse non-JSON)" echo "WARNING: Invalid response from PBS API (non-JSON response)"
exit $STATE_WARNING exit $STATE_WARNING
fi fi
# Vérifier que l'API n'a pas retourné une erreur applicative # Check that the API did not return an application error
API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null) API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null)
if [[ -n "$API_ERROR" ]]; then if [[ -n "$API_ERROR" ]]; then
echo "WARNING: Erreur API PBS : ${API_ERROR}" echo "WARNING: PBS API error: ${API_ERROR}"
exit $STATE_WARNING exit $STATE_WARNING
fi fi
# --- Extraction des snapshots --- # --- Snapshot extraction ---
SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []') SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []')
TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length') TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length')
if [[ "$TOTAL_COUNT" -eq 0 ]]; then if [[ "$TOTAL_COUNT" -eq 0 ]]; then
echo "CRITICAL: Aucun backup trouvé pour '${BACKUP_ID}' dans le datastore '${PBS_DATASTORE}'" echo "CRITICAL: No backup found for '${BACKUP_ID}' in datastore '${PBS_DATASTORE}'"
exit $STATE_CRITICAL exit $STATE_CRITICAL
fi fi
# --- Filtrage des backups du jour (heure locale) --- # --- Filter today's backups (local time) ---
TODAY_START=$(date -d "today 00:00:00" +%s) TODAY_START=$(date -d "today 00:00:00" +%s)
TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s) TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s)
@@ -125,20 +125,20 @@ TODAY_COUNT=$(echo "$TODAY_SNAPSHOTS" | jq 'length')
if [[ "$TODAY_COUNT" -eq 0 ]]; then if [[ "$TODAY_COUNT" -eq 0 ]]; then
LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max') LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max')
LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M") LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M")
echo "CRITICAL: Aucun backup aujourd'hui pour '${BACKUP_ID}' — dernier backup connu : ${LATEST_DATE}" echo "CRITICAL: No backup today for '${BACKUP_ID}' — last known backup: ${LATEST_DATE}"
exit $STATE_CRITICAL exit $STATE_CRITICAL
fi fi
# --- Backup le plus récent du jour --- # --- Most recent backup of the day ---
LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last') LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last')
BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]') BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]')
BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M") BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M")
VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"') VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"')
if [[ "$VERIFY_STATE" != "ok" ]]; then if [[ "$VERIFY_STATE" != "ok" ]]; then
echo "WARNING: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent mais statut de vérification : ${VERIFY_STATE}" echo "WARNING: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present but verification status: ${VERIFY_STATE}"
exit $STATE_WARNING exit $STATE_WARNING
fi fi
echo "OK: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent et vérifié" echo "OK: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present and verified"
exit $STATE_OK exit $STATE_OK
+22 -22
View File
@@ -23,33 +23,33 @@ STATE_WARNING=1
STATE_CRITICAL=2 STATE_CRITICAL=2
STATE_UNKNOWN=3 STATE_UNKNOWN=3
# --- Valeurs par défaut --- # --- Default values ---
CHECK_TYPE="" CHECK_TYPE=""
WARN=0 WARN=0
CRIT=0 CRIT=0
AUTH="" AUTH=""
# --- Aide --- # --- Usage ---
usage() { usage() {
cat << EOF cat << EOF
Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>] Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>]
Types de check (-x): Check types (-x):
ping : Test de connexion simple (PONG) ping : Simple connection test (PONG)
memory : Utilisation mémoire en % (Ex: -w 80 -c 90) memory : Memory usage in % (e.g. -w 80 -c 90)
frag : Ratio de fragmentation (Ex: -w 1.5 -c 2.0) frag : Fragmentation ratio (e.g. -w 1.5 -c 2.0)
hitrate : Taux d'efficacité du cache en % (Ex: -w 80 -c 50) hitrate : Cache hit rate in % (e.g. -w 80 -c 50)
persistence : État des sauvegardes RDB/AOF persistence : RDB/AOF backup status
replication : Statut Master/Slave et lag (Ex: -w 10 -c 60) replication : Master/Slave status and lag (e.g. -w 10 -c 60)
clients : Nombre de clients connectés (Ex: -w 200 -c 500) clients : Number of connected clients (e.g. -w 200 -c 500)
Optionnelle: Optional:
-a : Mot de passe Redis (sera passé via REDISCLI_AUTH) -a : Redis password (passed via REDISCLI_AUTH)
EOF EOF
exit $STATE_UNKNOWN exit $STATE_UNKNOWN
} }
# --- Parsing des arguments --- # --- Argument parsing ---
while getopts "x:w:c:a:?" opt; do while getopts "x:w:c:a:?" opt; do
case "$opt" in case "$opt" in
x) CHECK_TYPE="$OPTARG" ;; x) CHECK_TYPE="$OPTARG" ;;
@@ -62,7 +62,7 @@ done
if [ -z "$CHECK_TYPE" ]; then usage; fi if [ -z "$CHECK_TYPE" ]; then usage; fi
# Export du mot de passe pour redis-cli # Export password for redis-cli
if [ -n "$AUTH" ]; then if [ -n "$AUTH" ]; then
export REDISCLI_AUTH="$AUTH" export REDISCLI_AUTH="$AUTH"
fi fi
@@ -70,12 +70,12 @@ fi
EXIT_CODE=$STATE_OK EXIT_CODE=$STATE_OK
INSTANCES_FOUND=0 INSTANCES_FOUND=0
# Listes pour stocker les résultats # Lists to store results
CRIT_LIST="" CRIT_LIST=""
WARN_LIST="" WARN_LIST=""
OK_LIST="" OK_LIST=""
# 1. Détection des ports # 1. Port detection
if command -v ss &> /dev/null; then if command -v ss &> /dev/null; then
PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u) PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
elif command -v netstat &> /dev/null; then elif command -v netstat &> /dev/null; then
@@ -85,16 +85,16 @@ else
fi fi
if [ -z "$PORTS" ]; then if [ -z "$PORTS" ]; then
echo "OK: Aucun Redis tecté" echo "OK: No Redis instance detected"
exit $STATE_OK exit $STATE_OK
fi fi
# 2. Exécution du check # 2. Run check
for PORT in $PORTS; do for PORT in $PORTS; do
# On ignore les ports < 1024 sauf le 6379 standard # Skip ports < 1024 except standard 6379
if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi
# Vérification si l'instance répond # Check if instance responds
if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then
MSG="Port $PORT (Erreur Connexion/Auth)" MSG="Port $PORT (Erreur Connexion/Auth)"
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
@@ -172,7 +172,7 @@ for PORT in $PORTS; do
*) usage ;; *) usage ;;
esac esac
# Stockage du résultat # Store result
MSG="Port $PORT ($VAL_STR)" MSG="Port $PORT ($VAL_STR)"
case "$CURR_STATUS" in case "$CURR_STATUS" in
$STATE_CRITICAL) $STATE_CRITICAL)
@@ -189,7 +189,7 @@ for PORT in $PORTS; do
esac esac
done done
# Construction du message de sortie # Build output message
if [ "$EXIT_CODE" -eq $STATE_OK ]; then if [ "$EXIT_CODE" -eq $STATE_OK ]; then
echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST" echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST"
exit $STATE_OK exit $STATE_OK