add check_pbs_backup

This commit is contained in:
Ludovic Cartier
2026-05-31 18:53:17 +02:00
parent 68b4fedeb9
commit b7a0575818
3 changed files with 181 additions and 0 deletions
+30
View File
@@ -34,6 +34,7 @@ This Ansible role installs and configures NRPE plugins for monitoring various sy
- pod restarts - pod restarts
- pv & pvc - pv & pvc
- replicasets - replicasets
- pbs (proxmox backup server)
- raid - raid
- mdadm - mdadm
- 3ware - 3ware
@@ -66,6 +67,7 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p
- `check_needrestart` - `check_needrestart`
- `check_nvme_smart` - `check_nvme_smart`
- `check_nvme_temperature` - `check_nvme_temperature`
- `check_pbs_backup`
- `check_postfix_mailqueue` - `check_postfix_mailqueue`
- `check_postgresql` - `check_postgresql`
- `check_proc_age` - `check_proc_age`
@@ -135,6 +137,13 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p
| `nrpe_ntp_host` | `europe.pool.ntp.org` | `check_ntp` | NTP host to check. | | `nrpe_ntp_host` | `europe.pool.ntp.org` | `check_ntp` | NTP host to check. |
| `nrpe_ntp_warning` | `10` | `check_ntp` | Warning threshold for NTP offset. | | `nrpe_ntp_warning` | `10` | `check_ntp` | Warning threshold for NTP offset. |
| `nrpe_ntp_critical` | `15` | `check_ntp` | Critical threshold for NTP offset. | | `nrpe_ntp_critical` | `15` | `check_ntp` | Critical threshold for NTP offset. |
| `nrpe_pbs.host` | `-` | `check_pbs_backup` | Hôte PBS (IP ou FQDN). |
| `nrpe_pbs.token` | `-` | `check_pbs_backup` | API token PBS au format `user@realm!tokenid:secret`. |
| `nrpe_pbs.store` | `-` | `check_pbs_backup` | Nom du datastore PBS. |
| `nrpe_pbs.backups` | `-` | `check_pbs_backup` | Liste des backup-id à vérifier. |
| `nrpe_pbs.port` | `8007` | `check_pbs_backup` | Port de l'API PBS (optionnel). |
| `nrpe_pbs.namespace` | `-` | `check_pbs_backup` | Namespace PBS (optionnel). |
| `nrpe_pbs.ssl_insecure` | `false` | `check_pbs_backup` | Ignorer les erreurs de certificat SSL (optionnel). |
## Example Playbooks ## Example Playbooks
@@ -165,6 +174,27 @@ The following checks are deployed to `/usr/lib/nagios/plugins/` (or configured p
nrpe_disk_usage_critical: 85 nrpe_disk_usage_critical: 85
``` ```
### PBS Backups
```yaml
---
- hosts: myserver
roles:
- role: nrpe
vars:
nrpe_pbs:
host: pbs.example.com
token: "backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
store: main
backups:
- myhost
- otherhost
ssl_insecure: true # optionnel, si certificat auto-signé
namespace: mynamespace # optionnel
```
> L'API token doit avoir le privilege `DatastoreAudit` sur le datastore concerné.
## License ## License
MIT MIT
+144
View File
@@ -0,0 +1,144 @@
#!/bin/bash
#
# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups.
#
# Checks:
# 1. Connectivité à l'API PBS → WARNING si injoignable
# 2. Présence d'un backup du jour → CRITICAL si absent
# 3. Statut de vérification du backup → WARNING si non vérifié / échec
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
PBS_HOST=""
PBS_PORT=8007
PBS_TOKEN=""
PBS_STORE=""
BACKUP_ID=""
BACKUP_TYPE="host"
NAMESPACE=""
SKIP_SSL=0
usage() {
cat << EOF
Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k]
Options:
-H Hôte PBS (IP ou FQDN)
-T API token au format user@realm!tokenid:secret
-s Nom du datastore PBS
-n Backup ID à vérifier (backup-id : nom d'hôte, ou ID numérique pour vm/ct)
-t Type de backup : host (défaut), vm, ct
-P Port de l'API PBS (défaut : 8007)
-N Namespace PBS (optionnel)
-k Ignorer les erreurs de certificat SSL
Exemple (host) :
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
-s main -n myhost -k
Exemple (vm) :
$0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
-s main -n 100 -t vm -k
EOF
exit $STATE_UNKNOWN
}
while getopts "H:T:s:n:t:P:N:k" opt; do
case "$opt" in
H) PBS_HOST="$OPTARG" ;;
T) PBS_TOKEN="$OPTARG" ;;
s) PBS_STORE="$OPTARG" ;;
n) BACKUP_ID="$OPTARG" ;;
t) BACKUP_TYPE="$OPTARG" ;;
P) PBS_PORT="$OPTARG" ;;
N) NAMESPACE="$OPTARG" ;;
k) SKIP_SSL=1 ;;
*) usage ;;
esac
done
if [[ -z "$PBS_HOST" || -z "$PBS_TOKEN" || -z "$PBS_STORE" || -z "$BACKUP_ID" ]]; then
usage
fi
if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then
echo "UNKNOWN: type de backup invalide '${BACKUP_TYPE}' (valeurs acceptées : host, vm, ct)"
exit $STATE_UNKNOWN
fi
# --- Options curl ---
CURL_OPTS=(-sf --max-time 15)
[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k)
# --- URL de l'API ---
BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json"
SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_STORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}"
[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}"
# --- Appel API ---
# PBS utilise PBSAPIToken (différent de PVEAPIToken utilisé par Proxmox VE)
RESPONSE=$(curl "${CURL_OPTS[@]}" \
-H "Authorization: PBSAPIToken=${PBS_TOKEN}" \
"$SNAPSHOTS_URL" 2>&1)
CURL_EXIT=$?
if [[ $CURL_EXIT -ne 0 ]]; then
echo "WARNING: Impossible de contacter l'API PBS (${PBS_HOST}:${PBS_PORT}) - code curl : ${CURL_EXIT}"
exit $STATE_WARNING
fi
# --- Validation JSON ---
if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
echo "WARNING: Réponse invalide de l'API PBS (réponse non-JSON)"
exit $STATE_WARNING
fi
# Vérifier que l'API n'a pas retourné une erreur applicative
API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null)
if [[ -n "$API_ERROR" ]]; then
echo "WARNING: Erreur API PBS : ${API_ERROR}"
exit $STATE_WARNING
fi
# --- Extraction des snapshots ---
SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []')
TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length')
if [[ "$TOTAL_COUNT" -eq 0 ]]; then
echo "CRITICAL: Aucun backup trouvé pour '${BACKUP_ID}' dans le datastore '${PBS_STORE}'"
exit $STATE_CRITICAL
fi
# --- Filtrage des backups du jour (heure locale) ---
TODAY_START=$(date -d "today 00:00:00" +%s)
TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s)
TODAY_SNAPSHOTS=$(echo "$SNAPSHOTS" | jq \
--argjson ts "$TODAY_START" \
--argjson te "$TOMORROW_START" \
'[.[] | select(.["backup-time"] >= $ts and .["backup-time"] < $te)]')
TODAY_COUNT=$(echo "$TODAY_SNAPSHOTS" | jq 'length')
if [[ "$TODAY_COUNT" -eq 0 ]]; then
LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max')
LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M")
echo "CRITICAL: Aucun backup aujourd'hui pour '${BACKUP_ID}' — dernier backup connu : ${LATEST_DATE}"
exit $STATE_CRITICAL
fi
# --- Backup le plus récent du jour ---
LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last')
BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]')
BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M")
VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"')
if [[ "$VERIFY_STATE" != "ok" ]]; then
echo "WARNING: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent mais statut de vérification : ${VERIFY_STATE}"
exit $STATE_WARNING
fi
echo "OK: Backup '${BACKUP_ID}' du ${BACKUP_TIME} présent et vérifié"
exit $STATE_OK
+7
View File
@@ -162,3 +162,10 @@ command[check_ceph_mon]=/usr/lib/nagios/plugins/check_ceph mon
command[check_ceph_cap]=/usr/lib/nagios/plugins/check_ceph cap command[check_ceph_cap]=/usr/lib/nagios/plugins/check_ceph cap
command[check_ceph_pg]=/usr/lib/nagios/plugins/check_ceph pg command[check_ceph_pg]=/usr/lib/nagios/plugins/check_ceph pg
{% endif %} {% endif %}
{% if nrpe_pbs is defined %}
# pbs backups
{% for backup_id in nrpe_pbs.backups %}
command[check_pbs_backup_{{ backup_id }}]=/usr/lib/nagios/plugins/check_pbs_backup -H {{ nrpe_pbs.host }} -T {{ nrpe_pbs.token }} -s {{ nrpe_pbs.store }} -n {{ backup_id }}{{ ' -t ' + nrpe_pbs.type if nrpe_pbs.type is defined else '' }}{{ ' -P ' + nrpe_pbs.port|string if nrpe_pbs.port is defined else '' }}{{ ' -N ' + nrpe_pbs.namespace if nrpe_pbs.namespace is defined else '' }}{{ ' -k' if nrpe_pbs.ssl_insecure | default(false) else '' }}
{% endfor %}
{% endif %}