From b7a057581811514392854da68f3b685eadae26b3 Mon Sep 17 00:00:00 2001 From: Ludovic Cartier Date: Sun, 31 May 2026 18:53:17 +0200 Subject: [PATCH] add check_pbs_backup --- README.md | 30 ++++++++ files/nrpe/check_pbs_backup | 144 ++++++++++++++++++++++++++++++++++++ templates/nrpe.j2 | 7 ++ 3 files changed, 181 insertions(+) create mode 100755 files/nrpe/check_pbs_backup diff --git a/README.md b/README.md index 9b8cd15..bf96907 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ This Ansible role installs and configures NRPE plugins for monitoring various sy - pod restarts - pv & pvc - replicasets +- pbs (proxmox backup server) - raid - mdadm - 3ware @@ -66,6 +67,7 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p - `check_needrestart` - `check_nvme_smart` - `check_nvme_temperature` +- `check_pbs_backup` - `check_postfix_mailqueue` - `check_postgresql` - `check_proc_age` @@ -135,6 +137,13 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p | `nrpe_ntp_host` | `europe.pool.ntp.org` | `check_ntp` | NTP host to check. | | `nrpe_ntp_warning` | `10` | `check_ntp` | Warning threshold for NTP offset. | | `nrpe_ntp_critical` | `15` | `check_ntp` | Critical threshold for NTP offset. | +| `nrpe_pbs.host` | `-` | `check_pbs_backup` | Hôte PBS (IP ou FQDN). | +| `nrpe_pbs.token` | `-` | `check_pbs_backup` | API token PBS au format `user@realm!tokenid:secret`. | +| `nrpe_pbs.store` | `-` | `check_pbs_backup` | Nom du datastore PBS. | +| `nrpe_pbs.backups` | `-` | `check_pbs_backup` | Liste des backup-id à vérifier. | +| `nrpe_pbs.port` | `8007` | `check_pbs_backup` | Port de l'API PBS (optionnel). | +| `nrpe_pbs.namespace` | `-` | `check_pbs_backup` | Namespace PBS (optionnel). | +| `nrpe_pbs.ssl_insecure` | `false` | `check_pbs_backup` | Ignorer les erreurs de certificat SSL (optionnel). | ## Example Playbooks @@ -165,6 +174,27 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p nrpe_disk_usage_critical: 85 ``` +### PBS Backups + +```yaml +--- +- hosts: myserver + roles: + - role: nrpe + vars: + nrpe_pbs: + host: pbs.example.com + token: "backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + store: main + backups: + - myhost + - otherhost + ssl_insecure: true # optionnel, si certificat auto-signé + namespace: mynamespace # optionnel +``` + +> L'API token doit avoir le privilege `DatastoreAudit` sur le datastore concerné. + ## License MIT \ No newline at end of file diff --git a/files/nrpe/check_pbs_backup b/files/nrpe/check_pbs_backup new file mode 100755 index 0000000..f4b0ae7 --- /dev/null +++ b/files/nrpe/check_pbs_backup @@ -0,0 +1,144 @@ +#!/bin/bash +# +# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups. +# +# Checks: +# 1. Connectivité à l'API PBS → WARNING si injoignable +# 2. Présence d'un backup du jour → CRITICAL si absent +# 3. Statut de vérification du backup → WARNING si non vérifié / échec + +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +PBS_HOST="" +PBS_PORT=8007 +PBS_TOKEN="" +PBS_STORE="" +BACKUP_ID="" +BACKUP_TYPE="host" +NAMESPACE="" +SKIP_SSL=0 + +usage() { + cat << EOF +Usage: $0 -H -T -s -n [-t ] [-P ] [-N ] [-k] + +Options: + -H Hôte PBS (IP ou FQDN) + -T API token au format user@realm!tokenid:secret + -s Nom du datastore PBS + -n Backup ID à vérifier (backup-id : nom d'hôte, ou ID numérique pour vm/ct) + -t Type de backup : host (défaut), vm, ct + -P Port de l'API PBS (défaut : 8007) + -N Namespace PBS (optionnel) + -k Ignorer les erreurs de certificat SSL + +Exemple (host) : + $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ + -s main -n myhost -k + +Exemple (vm) : + $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ + -s main -n 100 -t vm -k +EOF + exit $STATE_UNKNOWN +} + +while getopts "H:T:s:n:t:P:N:k" opt; do + case "$opt" in + H) PBS_HOST="$OPTARG" ;; + T) PBS_TOKEN="$OPTARG" ;; + s) PBS_STORE="$OPTARG" ;; + n) BACKUP_ID="$OPTARG" ;; + t) BACKUP_TYPE="$OPTARG" ;; + P) PBS_PORT="$OPTARG" ;; + N) NAMESPACE="$OPTARG" ;; + k) SKIP_SSL=1 ;; + *) usage ;; + esac +done + +if [[ -z "$PBS_HOST" || -z "$PBS_TOKEN" || -z "$PBS_STORE" || -z "$BACKUP_ID" ]]; then + usage +fi + +if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then + echo "UNKNOWN: type de backup invalide '${BACKUP_TYPE}' (valeurs acceptées : host, vm, ct)" + exit $STATE_UNKNOWN +fi + +# --- Options curl --- +CURL_OPTS=(-sf --max-time 15) +[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k) + +# --- URL de l'API --- +BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json" +SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_STORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}" +[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}" + +# --- Appel API --- +# PBS utilise PBSAPIToken (différent de PVEAPIToken utilisé par Proxmox VE) +RESPONSE=$(curl "${CURL_OPTS[@]}" \ + -H "Authorization: PBSAPIToken=${PBS_TOKEN}" \ + "$SNAPSHOTS_URL" 2>&1) +CURL_EXIT=$? + +if [[ $CURL_EXIT -ne 0 ]]; then + echo "WARNING: Impossible de contacter l'API PBS (${PBS_HOST}:${PBS_PORT}) - code curl : ${CURL_EXIT}" + exit $STATE_WARNING +fi + +# --- Validation JSON --- +if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then + echo "WARNING: Réponse invalide de l'API PBS (réponse non-JSON)" + exit $STATE_WARNING +fi + +# Vérifier que l'API n'a pas retourné une erreur applicative +API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null) +if [[ -n "$API_ERROR" ]]; then + echo "WARNING: Erreur API PBS : ${API_ERROR}" + exit $STATE_WARNING +fi + +# --- Extraction des snapshots --- +SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []') +TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length') + +if [[ "$TOTAL_COUNT" -eq 0 ]]; then + echo "CRITICAL: Aucun backup trouvé pour '${BACKUP_ID}' dans le datastore '${PBS_STORE}'" + exit $STATE_CRITICAL +fi + +# --- Filtrage des backups du jour (heure locale) --- +TODAY_START=$(date -d "today 00:00:00" +%s) +TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s) + +TODAY_SNAPSHOTS=$(echo "$SNAPSHOTS" | jq \ + --argjson ts "$TODAY_START" \ + --argjson te "$TOMORROW_START" \ + '[.[] | select(.["backup-time"] >= $ts and .["backup-time"] < $te)]') +TODAY_COUNT=$(echo "$TODAY_SNAPSHOTS" | jq 'length') + +if [[ "$TODAY_COUNT" -eq 0 ]]; then + LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max') + LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M") + echo "CRITICAL: Aucun backup aujourd'hui pour '${BACKUP_ID}' — dernier backup connu : ${LATEST_DATE}" + exit $STATE_CRITICAL +fi + +# --- Backup le plus récent du jour --- +LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last') +BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]') +BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M") +VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"') + +if [[ "$VERIFY_STATE" != "ok" ]]; then + echo "WARNING: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent mais statut de vérification : ${VERIFY_STATE}" + exit $STATE_WARNING +fi + +echo "OK: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent et vérifié" +exit $STATE_OK diff --git a/templates/nrpe.j2 b/templates/nrpe.j2 index 32cf893..9374e01 100644 --- a/templates/nrpe.j2 +++ b/templates/nrpe.j2 @@ -162,3 +162,10 @@ command[check_ceph_mon]=/usr/lib/nagios/plugins/check_ceph mon command[check_ceph_cap]=/usr/lib/nagios/plugins/check_ceph cap command[check_ceph_pg]=/usr/lib/nagios/plugins/check_ceph pg {% endif %} + +{% if nrpe_pbs is defined %} +# pbs backups +{% for backup_id in nrpe_pbs.backups %} +command[check_pbs_backup_{{ backup_id }}]=/usr/lib/nagios/plugins/check_pbs_backup -H {{ nrpe_pbs.host }} -T {{ nrpe_pbs.token }} -s {{ nrpe_pbs.store }} -n {{ backup_id }}{{ ' -t ' + nrpe_pbs.type if nrpe_pbs.type is defined else '' }}{{ ' -P ' + nrpe_pbs.port|string if nrpe_pbs.port is defined else '' }}{{ ' -N ' + nrpe_pbs.namespace if nrpe_pbs.namespace is defined else '' }}{{ ' -k' if nrpe_pbs.ssl_insecure | default(false) else '' }} +{% endfor %} +{% endif %} \ No newline at end of file