You've already forked nrpe
check scripts - english traduction
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_etcd_health
|
# check_etcd_health
|
||||||
# Verifie la santé d'etcd et (optionnel) la creation/verifieation des snapshots.
|
# Checks etcd health and (optionally) snapshot creation/verification.
|
||||||
# Retourne : 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Usage example:
|
# Usage example:
|
||||||
# sudo /usr/lib/nagios/plugins/check_etcd_health \
|
# sudo /usr/lib/nagios/plugins/check_etcd_health \
|
||||||
@@ -10,10 +10,10 @@
|
|||||||
# --test-snapshot --snapshot-dir /var/backups/etcd --snapshot-max-age 24
|
# --test-snapshot --snapshot-dir /var/backups/etcd --snapshot-max-age 24
|
||||||
#
|
#
|
||||||
# Notes:
|
# Notes:
|
||||||
# - Par securite, execute ce script sur un master (ou via NRPE/SSH) avec un utilisateur ayant acces aux clefs.
|
# - For security, run this script on a master (or via NRPE/SSH) with a user having access to the keys.
|
||||||
# - --snapshot-max-age en heures (defaut 24). Mettre 0 pour desactiver la verification d'age.
|
# - --snapshot-max-age in hours (default 24). Set to 0 to disable age verification.
|
||||||
# - --test-snapshot creerera un snapshot temporaire pour valider la creation + verification via `etcdctl snapshot status`.
|
# - --test-snapshot will create a temporary snapshot to validate creation + verification via `etcdctl snapshot status`.
|
||||||
# - Si --keep-snapshot-on-failure est active, le snapshot temporaire sera conserve en cas d'erreur pour debug.
|
# - If --keep-snapshot-on-failure is enabled, the temporary snapshot will be kept on error for debugging.
|
||||||
|
|
||||||
ETCDCTL=${ETCDCTL:-/usr/local/bin/etcdctl}
|
ETCDCTL=${ETCDCTL:-/usr/local/bin/etcdctl}
|
||||||
|
|
||||||
@@ -21,14 +21,14 @@ print_usage() {
|
|||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: $0 --endpoints ENDPOINTS --cacert CA --cert CERT --key KEY [options]
|
Usage: $0 --endpoints ENDPOINTS --cacert CA --cert CERT --key KEY [options]
|
||||||
Options:
|
Options:
|
||||||
--warn-db-mb N avertissement si DB >= N MB (default 1024)
|
--warn-db-mb N warn if DB >= N MB (default 1024)
|
||||||
--crit-db-mb M critique si DB >= M MB (default 1800)
|
--crit-db-mb M critical if DB >= M MB (default 1800)
|
||||||
--timeout SECS etcdctl timeout (default 10)
|
--timeout SECS etcdctl timeout (default 10)
|
||||||
--test-snapshot tenter de creer un snapshot temporaire et verifier son status
|
--test-snapshot attempt to create a temporary snapshot and verify its status
|
||||||
--snapshot-dir DIR repertoire pour snapshots temporaires (default /var/backups/etcd)
|
--snapshot-dir DIR directory for temporary snapshots (default /var/backups/etcd)
|
||||||
--keep-snapshot-on-failure conserver le snapshot temporaire si creation echoue (default false)
|
--keep-snapshot-on-failure keep temporary snapshot on failure (default false)
|
||||||
--snapshot-max-age HRS verifier qu'il existe un snapshot plus recent que HRS heures (default 24). Mettre 0 pour desactiver.
|
--snapshot-max-age HRS check that a snapshot newer than HRS hours exists (default 24). Set 0 to disable.
|
||||||
-h, --help affiche cette aide
|
-h, --help show this help
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,19 +1,19 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_k8s_deployments
|
# check_k8s_deployments
|
||||||
# Vérifie les Deployments Kubernetes: availableReplicas < spec.replicas
|
# Checks Kubernetes Deployments: availableReplicas < spec.replicas
|
||||||
# Retour: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_deployments [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
# sudo /usr/lib/nagios/plugins/check_k8s_deployments [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||||
#
|
#
|
||||||
# Exemples:
|
# Examples:
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --crit 1
|
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --crit 1
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --ignore-ns kube-system,monitoring
|
# sudo /usr/lib/nagios/plugins/check_k8s_deployments --ignore-ns kube-system,monitoring
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
WARN=${WARN:-0} # nombre de deploys en erreur pour WARNING
|
WARN=${WARN:-0} # number of failed deploys for WARNING
|
||||||
CRIT=${CRIT:-1} # nombre de deploys en erreur pour CRITICAL par défaut (1 => tout problème -> CRITICAL)
|
CRIT=${CRIT:-1} # number of failed deploys for CRITICAL by default (1 => any issue -> CRITICAL)
|
||||||
IGNORE_NS=""
|
IGNORE_NS=""
|
||||||
INCLUDE_NS=""
|
INCLUDE_NS=""
|
||||||
AGE_MIN=0
|
AGE_MIN=0
|
||||||
@@ -21,8 +21,8 @@ AGE_MIN=0
|
|||||||
print_usage() {
|
print_usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||||
--warn N : seuil warn si >=N déploiements en erreur (default 0)
|
--warn N : warn threshold if >=N deployments in error (default 0)
|
||||||
--crit M : seuil crit si >=M déploiements en erreur (default 1)
|
--crit M : crit threshold if >=M deployments in error (default 1)
|
||||||
--ignore-ns LIST : comma separated namespaces to ignore (default none)
|
--ignore-ns LIST : comma separated namespaces to ignore (default none)
|
||||||
--namespaces LIST: comma separated namespaces to check only (default all)
|
--namespaces LIST: comma separated namespaces to check only (default all)
|
||||||
--age-min N : ignore deployments created less than N minutes ago (avoid flapping during rollout)
|
--age-min N : ignore deployments created less than N minutes ago (avoid flapping during rollout)
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_k8s_jobs_cronjobs
|
# check_k8s_jobs_cronjobs
|
||||||
# Vérifie l'état des Kubernetes Jobs et CronJobs.
|
# Checks the state of Kubernetes Jobs and CronJobs.
|
||||||
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Fonctions principales :
|
# Main features:
|
||||||
# - détecte Jobs avec des échecs (.status.failed > 0) ou des Jobs "actifs" trop vieux
|
# - detects Jobs with failures (.status.failed > 0) or active Jobs that are too old
|
||||||
# - recherche d'événements récents (type=Warning) liés aux Jobs dans les X dernières minutes
|
# - searches for recent events (type=Warning) related to Jobs in the last X minutes
|
||||||
# - vérifie pour les CronJobs que lastScheduleTime n'est pas trop ancien (configurable) si non suspendu
|
# - checks for CronJobs that lastScheduleTime is not too old (configurable) if not suspended
|
||||||
#
|
#
|
||||||
# Usage (exemples) :
|
# Usage (examples):
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --crit 1 --recent-minutes 5
|
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --crit 1 --recent-minutes 5
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --ignore-ns kube-system --cron-max-age 120
|
# sudo /usr/lib/nagios/plugins/check_k8s_jobs_cronjobs --ignore-ns kube-system --cron-max-age 120
|
||||||
#
|
#
|
||||||
@@ -28,15 +28,15 @@ print_usage() {
|
|||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: $0 [options]
|
Usage: $0 [options]
|
||||||
Options:
|
Options:
|
||||||
--warn N seuil WARN si >= N objets en erreur (default 0)
|
--warn N warn threshold if >= N objects in error (default 0)
|
||||||
--crit M seuil CRIT si >= M objets en erreur (default 1)
|
--crit M crit threshold if >= M objects in error (default 1)
|
||||||
--ignore-ns ns1,ns2 namespaces à ignorer
|
--ignore-ns ns1,ns2 namespaces to ignore
|
||||||
--namespaces ns1,ns2 limiter aux namespaces donnés (comma separated)
|
--namespaces ns1,ns2 limit to given namespaces (comma separated)
|
||||||
--age-min MINUTES considérer un job "actif" normal si démarré moins de MINUTES (default 60)
|
--age-min MINUTES consider an active job normal if started less than MINUTES ago (default 60)
|
||||||
--recent-minutes MIN chercher événements de Job (Warning) dans les MIN dernières minutes (default 5)
|
--recent-minutes MIN look for Job events (Warning) in the last MIN minutes (default 5)
|
||||||
--check-cron activer la vérification des CronJobs (default ON)
|
--check-cron enable CronJob verification (default ON)
|
||||||
--cron-max-age MINUTES si lastScheduleTime > MINUTES => alerter (default 60). Mettre 0 pour désactiver.
|
--cron-max-age MINUTES alert if lastScheduleTime > MINUTES (default 60). Set 0 to disable.
|
||||||
-h, --help : affiche l'aide
|
-h, --help : show help
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_k8s_pki_certs
|
# check_k8s_pki_certs
|
||||||
# Vérifie les certificats PEM sous /etc/kubernetes/pki (par défaut) et alerte si expiration <= warn_days (30j par défaut).
|
# Checks PEM certificates under /etc/kubernetes/pki (by default) and alerts if expiry <= warn_days (30d by default).
|
||||||
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
@@ -19,11 +19,11 @@ print_usage() {
|
|||||||
Usage: $0 [--path PATH] [--warn-days N] [--crit-days M] [--recursive] [-h|--help]
|
Usage: $0 [--path PATH] [--warn-days N] [--crit-days M] [--recursive] [-h|--help]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--path PATH répertoire à scanner (default: $PKI_PATH)
|
--path PATH directory to scan (default: $PKI_PATH)
|
||||||
--warn-days N seuil warning en jours (default: $WARN_DAYS)
|
--warn-days N warning threshold in days (default: $WARN_DAYS)
|
||||||
--crit-days M seuil critical en jours (default: $CRIT_DAYS)
|
--crit-days M critical threshold in days (default: $CRIT_DAYS)
|
||||||
--recursive scanner récursivement PATH et sous-dirs
|
--recursive scan PATH and subdirectories recursively
|
||||||
-h, --help affiche cette aide
|
-h, --help show this help
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+12
-12
@@ -1,13 +1,13 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_k8s_pv_pvc
|
# check_k8s_pv_pvc
|
||||||
# Vérifie l'état des PersistentVolumes (PV) et PersistentVolumeClaims (PVC) Kubernetes.
|
# Checks the state of Kubernetes PersistentVolumes (PV) and PersistentVolumeClaims (PVC).
|
||||||
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Exit codes: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Usage examples:
|
# Usage examples:
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --crit 1 # CRITICAL si >=1 problème
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --crit 1 # CRITICAL if >=1 issue
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --ignore-ns kube-system # ignorer kube-system
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --ignore-ns kube-system # ignore kube-system
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --pvc-age-min 10 --crit 2 # ignorer PVC récents <10min, CRIT si >=2
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --pvc-age-min 10 --crit 2 # ignore recent PVCs <10min, CRIT if >=2
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --check-pv --check-pvc # (par défaut les 2 sont vérifiés)
|
# sudo /usr/lib/nagios/plugins/check_k8s_pv_pvc --check-pv --check-pvc # (both checked by default)
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ WARN=${WARN:-0}
|
|||||||
CRIT=${CRIT:-1}
|
CRIT=${CRIT:-1}
|
||||||
IGNORE_NS=""
|
IGNORE_NS=""
|
||||||
INCLUDE_NS=""
|
INCLUDE_NS=""
|
||||||
PVC_AGE_MIN=${PVC_AGE_MIN:-5} # en minutes : ignore PVC créés il y a moins de X minutes (défaut 5)
|
PVC_AGE_MIN=${PVC_AGE_MIN:-5} # in minutes: ignore PVCs created less than X minutes ago (default 5)
|
||||||
CHECK_PV=1
|
CHECK_PV=1
|
||||||
CHECK_PVC=1
|
CHECK_PVC=1
|
||||||
|
|
||||||
@@ -24,14 +24,14 @@ print_usage() {
|
|||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: $0 [options]
|
Usage: $0 [options]
|
||||||
Options:
|
Options:
|
||||||
--warn N seuil WARN si >= N objets en erreur (default 0)
|
--warn N warn threshold if >= N objects in error (default 0)
|
||||||
--crit M seuil CRIT si >= M objets en erreur (default 1)
|
--crit M crit threshold if >= M objects in error (default 1)
|
||||||
--ignore-ns a,b,c namespaces à ignorer (comma separated)
|
--ignore-ns a,b,c namespaces to ignore (comma separated)
|
||||||
--namespaces a,b limiter aux namespaces donnés (comma separated)
|
--namespaces a,b limit to given namespaces (comma separated)
|
||||||
--pvc-age-min N ignore PVC créés il y a moins de N minutes (default 5)
|
--pvc-age-min N ignore PVCs created less than N minutes ago (default 5)
|
||||||
--no-pv disable PV checks
|
--no-pv disable PV checks
|
||||||
--no-pvc disable PVC checks
|
--no-pvc disable PVC checks
|
||||||
-h, --help affiche cette aide
|
-h, --help show this help
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# check_k8s_replicasets
|
# check_k8s_replicasets
|
||||||
# Vérifie les ReplicaSets Kubernetes : readyReplicas < spec.replicas
|
# Checks Kubernetes ReplicaSets: readyReplicas < spec.replicas
|
||||||
# Retour: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
# Returns: 0=OK, 1=WARNING, 2=CRITICAL, 3=UNKNOWN
|
||||||
#
|
#
|
||||||
# Usage:
|
# Usage:
|
||||||
# sudo /usr/lib/nagios/plugins/check_k8s_replicasets [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
# sudo /usr/lib/nagios/plugins/check_k8s_replicasets [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||||
#
|
#
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
WARN=${WARN:-0} # nombre de RS en erreur pour WARNING
|
WARN=${WARN:-0} # number of failed RS for WARNING
|
||||||
CRIT=${CRIT:-1} # nombre de RS en erreur pour CRITICAL par défaut (1 => 1 RS -> CRITICAL)
|
CRIT=${CRIT:-1} # number of failed RS for CRITICAL by default (1 => 1 RS -> CRITICAL)
|
||||||
IGNORE_NS=""
|
IGNORE_NS=""
|
||||||
INCLUDE_NS=""
|
INCLUDE_NS=""
|
||||||
AGE_MIN=0
|
AGE_MIN=0
|
||||||
@@ -17,8 +17,8 @@ AGE_MIN=0
|
|||||||
print_usage() {
|
print_usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
Usage: $0 [--warn N] [--crit M] [--ignore-ns ns1,ns2] [--namespaces ns1,ns2] [--age-min MINUTES]
|
||||||
--warn N : seuil warn si >=N ReplicaSets en erreur (default 0)
|
--warn N : warn threshold if >=N ReplicaSets in error (default 0)
|
||||||
--crit M : seuil crit si >=M ReplicaSets en erreur (default 1)
|
--crit M : crit threshold if >=M ReplicaSets in error (default 1)
|
||||||
--ignore-ns LIST : comma separated namespaces to ignore (default none)
|
--ignore-ns LIST : comma separated namespaces to ignore (default none)
|
||||||
--namespaces LIST: comma separated namespaces to check only (default all)
|
--namespaces LIST: comma separated namespaces to check only (default all)
|
||||||
--age-min N : ignore ReplicaSets created less than N minutes ago (avoid flapping during rollout)
|
--age-min N : ignore ReplicaSets created less than N minutes ago (avoid flapping during rollout)
|
||||||
|
|||||||
+30
-30
@@ -3,9 +3,9 @@
|
|||||||
# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups.
|
# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups.
|
||||||
#
|
#
|
||||||
# Checks:
|
# Checks:
|
||||||
# 1. Connectivité à l'API PBS → WARNING si injoignable
|
# 1. PBS API connectivity → WARNING if unreachable
|
||||||
# 2. Présence d'un backup du jour → CRITICAL si absent
|
# 2. Backup present for today → CRITICAL if missing
|
||||||
# 3. Statut de vérification du backup → WARNING si non vérifié / échec
|
# 3. Backup verification status → WARNING if unverified / failed
|
||||||
|
|
||||||
STATE_OK=0
|
STATE_OK=0
|
||||||
STATE_WARNING=1
|
STATE_WARNING=1
|
||||||
@@ -26,20 +26,20 @@ usage() {
|
|||||||
Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k]
|
Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
-H Hôte PBS (IP ou FQDN)
|
-H PBS host (IP or FQDN)
|
||||||
-T API token au format user@realm!tokenid:secret
|
-T API token in format user@realm!tokenid:secret
|
||||||
-s Nom du datastore PBS
|
-s PBS datastore name
|
||||||
-n Backup ID à vérifier (backup-id : nom d'hôte, ou ID numérique pour vm/ct)
|
-n Backup ID to check (backup-id: hostname, or numeric ID for vm/ct)
|
||||||
-t Type de backup : host (défaut), vm, ct
|
-t Backup type: host (default), vm, ct
|
||||||
-P Port de l'API PBS (défaut : 8007)
|
-P PBS API port (default: 8007)
|
||||||
-N Namespace PBS (optionnel)
|
-N PBS namespace (optional)
|
||||||
-k Ignorer les erreurs de certificat SSL
|
-k Ignore SSL certificate errors
|
||||||
|
|
||||||
Exemple (host) :
|
Example (host):
|
||||||
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
|
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
|
||||||
-s main -n myhost -k
|
-s main -n myhost -k
|
||||||
|
|
||||||
Exemple (vm) :
|
Example (vm):
|
||||||
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
|
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
|
||||||
-s main -n 100 -t vm -k
|
-s main -n 100 -t vm -k
|
||||||
EOF
|
EOF
|
||||||
@@ -65,54 +65,54 @@ if [[ -z "$PBS_HOST" || -z "$PBS_TOKEN" || -z "$PBS_DATASTORE" || -z "$BACKUP_ID
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then
|
if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then
|
||||||
echo "UNKNOWN: type de backup invalide '${BACKUP_TYPE}' (valeurs acceptées : host, vm, ct)"
|
echo "UNKNOWN: invalid backup type '${BACKUP_TYPE}' (accepted values: host, vm, ct)"
|
||||||
exit $STATE_UNKNOWN
|
exit $STATE_UNKNOWN
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Options curl ---
|
# --- curl options ---
|
||||||
CURL_OPTS=(-sf --max-time 15)
|
CURL_OPTS=(-sf --max-time 15)
|
||||||
[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k)
|
[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k)
|
||||||
|
|
||||||
# --- URL de l'API ---
|
# --- API URL ---
|
||||||
BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json"
|
BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json"
|
||||||
SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_DATASTORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}"
|
SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_DATASTORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}"
|
||||||
[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}"
|
[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}"
|
||||||
|
|
||||||
# --- Appel API ---
|
# --- API call ---
|
||||||
# PBS utilise PBSAPIToken (différent de PVEAPIToken utilisé par Proxmox VE)
|
# PBS uses PBSAPIToken (different from PVEAPIToken used by Proxmox VE)
|
||||||
RESPONSE=$(curl "${CURL_OPTS[@]}" \
|
RESPONSE=$(curl "${CURL_OPTS[@]}" \
|
||||||
-H "Authorization: PBSAPIToken=${PBS_TOKEN}" \
|
-H "Authorization: PBSAPIToken=${PBS_TOKEN}" \
|
||||||
"$SNAPSHOTS_URL" 2>&1)
|
"$SNAPSHOTS_URL" 2>&1)
|
||||||
CURL_EXIT=$?
|
CURL_EXIT=$?
|
||||||
|
|
||||||
if [[ $CURL_EXIT -ne 0 ]]; then
|
if [[ $CURL_EXIT -ne 0 ]]; then
|
||||||
echo "WARNING: Impossible de contacter l'API PBS (${PBS_HOST}:${PBS_PORT}) - code curl : ${CURL_EXIT}"
|
echo "WARNING: Cannot reach PBS API (${PBS_HOST}:${PBS_PORT}) - curl exit code: ${CURL_EXIT}"
|
||||||
exit $STATE_WARNING
|
exit $STATE_WARNING
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Validation JSON ---
|
# --- JSON validation ---
|
||||||
if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
|
if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
|
||||||
echo "WARNING: Réponse invalide de l'API PBS (réponse non-JSON)"
|
echo "WARNING: Invalid response from PBS API (non-JSON response)"
|
||||||
exit $STATE_WARNING
|
exit $STATE_WARNING
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Vérifier que l'API n'a pas retourné une erreur applicative
|
# Check that the API did not return an application error
|
||||||
API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null)
|
API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null)
|
||||||
if [[ -n "$API_ERROR" ]]; then
|
if [[ -n "$API_ERROR" ]]; then
|
||||||
echo "WARNING: Erreur API PBS : ${API_ERROR}"
|
echo "WARNING: PBS API error: ${API_ERROR}"
|
||||||
exit $STATE_WARNING
|
exit $STATE_WARNING
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Extraction des snapshots ---
|
# --- Snapshot extraction ---
|
||||||
SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []')
|
SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []')
|
||||||
TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length')
|
TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length')
|
||||||
|
|
||||||
if [[ "$TOTAL_COUNT" -eq 0 ]]; then
|
if [[ "$TOTAL_COUNT" -eq 0 ]]; then
|
||||||
echo "CRITICAL: Aucun backup trouvé pour '${BACKUP_ID}' dans le datastore '${PBS_DATASTORE}'"
|
echo "CRITICAL: No backup found for '${BACKUP_ID}' in datastore '${PBS_DATASTORE}'"
|
||||||
exit $STATE_CRITICAL
|
exit $STATE_CRITICAL
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Filtrage des backups du jour (heure locale) ---
|
# --- Filter today's backups (local time) ---
|
||||||
TODAY_START=$(date -d "today 00:00:00" +%s)
|
TODAY_START=$(date -d "today 00:00:00" +%s)
|
||||||
TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s)
|
TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s)
|
||||||
|
|
||||||
@@ -125,20 +125,20 @@ TODAY_COUNT=$(echo "$TODAY_SNAPSHOTS" | jq 'length')
|
|||||||
if [[ "$TODAY_COUNT" -eq 0 ]]; then
|
if [[ "$TODAY_COUNT" -eq 0 ]]; then
|
||||||
LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max')
|
LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max')
|
||||||
LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M")
|
LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M")
|
||||||
echo "CRITICAL: Aucun backup aujourd'hui pour '${BACKUP_ID}' — dernier backup connu : ${LATEST_DATE}"
|
echo "CRITICAL: No backup today for '${BACKUP_ID}' — last known backup: ${LATEST_DATE}"
|
||||||
exit $STATE_CRITICAL
|
exit $STATE_CRITICAL
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- Backup le plus récent du jour ---
|
# --- Most recent backup of the day ---
|
||||||
LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last')
|
LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last')
|
||||||
BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]')
|
BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]')
|
||||||
BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M")
|
BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M")
|
||||||
VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"')
|
VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"')
|
||||||
|
|
||||||
if [[ "$VERIFY_STATE" != "ok" ]]; then
|
if [[ "$VERIFY_STATE" != "ok" ]]; then
|
||||||
echo "WARNING: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent mais statut de vérification : ${VERIFY_STATE}"
|
echo "WARNING: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present but verification status: ${VERIFY_STATE}"
|
||||||
exit $STATE_WARNING
|
exit $STATE_WARNING
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "OK: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent et vérifié"
|
echo "OK: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present and verified"
|
||||||
exit $STATE_OK
|
exit $STATE_OK
|
||||||
|
|||||||
@@ -23,33 +23,33 @@ STATE_WARNING=1
|
|||||||
STATE_CRITICAL=2
|
STATE_CRITICAL=2
|
||||||
STATE_UNKNOWN=3
|
STATE_UNKNOWN=3
|
||||||
|
|
||||||
# --- Valeurs par défaut ---
|
# --- Default values ---
|
||||||
CHECK_TYPE=""
|
CHECK_TYPE=""
|
||||||
WARN=0
|
WARN=0
|
||||||
CRIT=0
|
CRIT=0
|
||||||
AUTH=""
|
AUTH=""
|
||||||
|
|
||||||
# --- Aide ---
|
# --- Usage ---
|
||||||
usage() {
|
usage() {
|
||||||
cat << EOF
|
cat << EOF
|
||||||
Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>]
|
Usage: $0 -x [type] -w <warning> -c <critical> [-a <password>]
|
||||||
|
|
||||||
Types de check (-x):
|
Check types (-x):
|
||||||
ping : Test de connexion simple (PONG)
|
ping : Simple connection test (PONG)
|
||||||
memory : Utilisation mémoire en % (Ex: -w 80 -c 90)
|
memory : Memory usage in % (e.g. -w 80 -c 90)
|
||||||
frag : Ratio de fragmentation (Ex: -w 1.5 -c 2.0)
|
frag : Fragmentation ratio (e.g. -w 1.5 -c 2.0)
|
||||||
hitrate : Taux d'efficacité du cache en % (Ex: -w 80 -c 50)
|
hitrate : Cache hit rate in % (e.g. -w 80 -c 50)
|
||||||
persistence : État des sauvegardes RDB/AOF
|
persistence : RDB/AOF backup status
|
||||||
replication : Statut Master/Slave et lag (Ex: -w 10 -c 60)
|
replication : Master/Slave status and lag (e.g. -w 10 -c 60)
|
||||||
clients : Nombre de clients connectés (Ex: -w 200 -c 500)
|
clients : Number of connected clients (e.g. -w 200 -c 500)
|
||||||
|
|
||||||
Optionnelle:
|
Optional:
|
||||||
-a : Mot de passe Redis (sera passé via REDISCLI_AUTH)
|
-a : Redis password (passed via REDISCLI_AUTH)
|
||||||
EOF
|
EOF
|
||||||
exit $STATE_UNKNOWN
|
exit $STATE_UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- Parsing des arguments ---
|
# --- Argument parsing ---
|
||||||
while getopts "x:w:c:a:?" opt; do
|
while getopts "x:w:c:a:?" opt; do
|
||||||
case "$opt" in
|
case "$opt" in
|
||||||
x) CHECK_TYPE="$OPTARG" ;;
|
x) CHECK_TYPE="$OPTARG" ;;
|
||||||
@@ -62,7 +62,7 @@ done
|
|||||||
|
|
||||||
if [ -z "$CHECK_TYPE" ]; then usage; fi
|
if [ -z "$CHECK_TYPE" ]; then usage; fi
|
||||||
|
|
||||||
# Export du mot de passe pour redis-cli
|
# Export password for redis-cli
|
||||||
if [ -n "$AUTH" ]; then
|
if [ -n "$AUTH" ]; then
|
||||||
export REDISCLI_AUTH="$AUTH"
|
export REDISCLI_AUTH="$AUTH"
|
||||||
fi
|
fi
|
||||||
@@ -70,12 +70,12 @@ fi
|
|||||||
EXIT_CODE=$STATE_OK
|
EXIT_CODE=$STATE_OK
|
||||||
INSTANCES_FOUND=0
|
INSTANCES_FOUND=0
|
||||||
|
|
||||||
# Listes pour stocker les résultats
|
# Lists to store results
|
||||||
CRIT_LIST=""
|
CRIT_LIST=""
|
||||||
WARN_LIST=""
|
WARN_LIST=""
|
||||||
OK_LIST=""
|
OK_LIST=""
|
||||||
|
|
||||||
# 1. Détection des ports
|
# 1. Port detection
|
||||||
if command -v ss &> /dev/null; then
|
if command -v ss &> /dev/null; then
|
||||||
PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
PORTS=$(ss -tlnp | grep 'redis-server' | awk '{print $4}' | awk -F: '{print $NF}' | sort -u)
|
||||||
elif command -v netstat &> /dev/null; then
|
elif command -v netstat &> /dev/null; then
|
||||||
@@ -85,16 +85,16 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$PORTS" ]; then
|
if [ -z "$PORTS" ]; then
|
||||||
echo "OK: Aucun Redis détecté"
|
echo "OK: No Redis instance detected"
|
||||||
exit $STATE_OK
|
exit $STATE_OK
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# 2. Exécution du check
|
# 2. Run check
|
||||||
for PORT in $PORTS; do
|
for PORT in $PORTS; do
|
||||||
# On ignore les ports < 1024 sauf le 6379 standard
|
# Skip ports < 1024 except standard 6379
|
||||||
if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi
|
if [ "$PORT" -lt 1024 ] && [ "$PORT" -ne 6379 ]; then continue; fi
|
||||||
|
|
||||||
# Vérification si l'instance répond
|
# Check if instance responds
|
||||||
if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then
|
if ! redis-cli -p "$PORT" PING 2>/dev/null | grep -q "PONG"; then
|
||||||
MSG="Port $PORT (Erreur Connexion/Auth)"
|
MSG="Port $PORT (Erreur Connexion/Auth)"
|
||||||
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
if [ -z "$CRIT_LIST" ]; then CRIT_LIST="$MSG"; else CRIT_LIST="$CRIT_LIST, $MSG"; fi
|
||||||
@@ -172,7 +172,7 @@ for PORT in $PORTS; do
|
|||||||
*) usage ;;
|
*) usage ;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# Stockage du résultat
|
# Store result
|
||||||
MSG="Port $PORT ($VAL_STR)"
|
MSG="Port $PORT ($VAL_STR)"
|
||||||
case "$CURR_STATUS" in
|
case "$CURR_STATUS" in
|
||||||
$STATE_CRITICAL)
|
$STATE_CRITICAL)
|
||||||
@@ -189,7 +189,7 @@ for PORT in $PORTS; do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
# Construction du message de sortie
|
# Build output message
|
||||||
if [ "$EXIT_CODE" -eq $STATE_OK ]; then
|
if [ "$EXIT_CODE" -eq $STATE_OK ]; then
|
||||||
echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST"
|
echo "OK: All $INSTANCES_FOUND instance(s) are healthy ($CHECK_TYPE). $OK_LIST"
|
||||||
exit $STATE_OK
|
exit $STATE_OK
|
||||||
|
|||||||
Reference in New Issue
Block a user