#!/bin/bash
#
# Nagios/Icinga2 plugin to check Proxmox Backup Server (PBS) backups.
#
# Checks:
#   1. PBS API connectivity                   → WARNING if unreachable
#   2. Backup present for today               → CRITICAL if missing
#   3. Backup verification status             → WARNING if unverified / failed

STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

PBS_HOST=""
PBS_PORT=8007
PBS_TOKEN=""
PBS_DATASTORE=""
BACKUP_ID=""
BACKUP_TYPE="host"
NAMESPACE=""
SKIP_SSL=0

usage() {
    cat << EOF
Usage: $0 -H <host> -T <api-token> -s <store> -n <backup-id> [-t <type>] [-P <port>] [-N <namespace>] [-k]

Options:
  -H  PBS host (IP or FQDN)
  -T  API token in format user@realm!tokenid:secret
  -s  PBS datastore name
  -n  Backup ID to check (backup-id: hostname, or numeric ID for vm/ct)
  -t  Backup type: host (default), vm, ct
  -P  PBS API port (default: 8007)
  -N  PBS namespace (optional)
  -k  Ignore SSL certificate errors

Example (host):
  $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
     -s main -n myhost -k

Example (vm):
  $0 -H pbs.example.com -T backup@pbs!monitoring:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \
     -s main -n 100 -t vm -k
EOF
    exit $STATE_UNKNOWN
}

while getopts "H:T:s:n:t:P:N:k" opt; do
    case "$opt" in
        H) PBS_HOST="$OPTARG" ;;
        T) PBS_TOKEN="$OPTARG" ;;
        s) PBS_DATASTORE="$OPTARG" ;;
        n) BACKUP_ID="$OPTARG" ;;
        t) BACKUP_TYPE="$OPTARG" ;;
        P) PBS_PORT="$OPTARG" ;;
        N) NAMESPACE="$OPTARG" ;;
        k) SKIP_SSL=1 ;;
        *) usage ;;
    esac
done

if [[ -z "$PBS_HOST" || -z "$PBS_TOKEN" || -z "$PBS_DATASTORE" || -z "$BACKUP_ID" ]]; then
    usage
fi

if [[ "$BACKUP_TYPE" != "host" && "$BACKUP_TYPE" != "vm" && "$BACKUP_TYPE" != "ct" ]]; then
    echo "UNKNOWN: invalid backup type '${BACKUP_TYPE}' (accepted values: host, vm, ct)"
    exit $STATE_UNKNOWN
fi

# --- curl options ---
CURL_OPTS=(-sf --max-time 15)
[[ "$SKIP_SSL" -eq 1 ]] && CURL_OPTS+=(-k)

# --- API URL ---
BASE_URL="https://${PBS_HOST}:${PBS_PORT}/api2/json"
SNAPSHOTS_URL="${BASE_URL}/admin/datastore/${PBS_DATASTORE}/snapshots?backup-type=${BACKUP_TYPE}&backup-id=${BACKUP_ID}"
[[ -n "$NAMESPACE" ]] && SNAPSHOTS_URL+="&ns=${NAMESPACE}"

# --- API call ---
# PBS uses PBSAPIToken (different from PVEAPIToken used by Proxmox VE)
RESPONSE=$(curl "${CURL_OPTS[@]}" \
    -H "Authorization: PBSAPIToken=${PBS_TOKEN}" \
    "$SNAPSHOTS_URL" 2>&1)
CURL_EXIT=$?

if [[ $CURL_EXIT -ne 0 ]]; then
    echo "WARNING: Cannot reach PBS API (${PBS_HOST}:${PBS_PORT}) - curl exit code: ${CURL_EXIT}"
    exit $STATE_WARNING
fi

# --- JSON validation ---
if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
    echo "WARNING: Invalid response from PBS API (non-JSON response)"
    exit $STATE_WARNING
fi

# Check that the API did not return an application error
API_ERROR=$(echo "$RESPONSE" | jq -r '.errors // empty' 2>/dev/null)
if [[ -n "$API_ERROR" ]]; then
    echo "WARNING: PBS API error: ${API_ERROR}"
    exit $STATE_WARNING
fi

# --- Snapshot extraction ---
SNAPSHOTS=$(echo "$RESPONSE" | jq -r '.data // []')
TOTAL_COUNT=$(echo "$SNAPSHOTS" | jq 'length')

if [[ "$TOTAL_COUNT" -eq 0 ]]; then
    echo "CRITICAL: No backup found for '${BACKUP_ID}' in datastore '${PBS_DATASTORE}'"
    exit $STATE_CRITICAL
fi

# --- Filter today's backups (local time) ---
TODAY_START=$(date -d "today 00:00:00" +%s)
TOMORROW_START=$(date -d "tomorrow 00:00:00" +%s)

TODAY_SNAPSHOTS=$(echo "$SNAPSHOTS" | jq \
    --argjson ts "$TODAY_START" \
    --argjson te "$TOMORROW_START" \
    '[.[] | select(.["backup-time"] >= $ts and .["backup-time"] < $te)]')
TODAY_COUNT=$(echo "$TODAY_SNAPSHOTS" | jq 'length')

if [[ "$TODAY_COUNT" -eq 0 ]]; then
    LATEST_EPOCH=$(echo "$SNAPSHOTS" | jq '[.[]["backup-time"]] | max')
    LATEST_DATE=$(date -d "@${LATEST_EPOCH}" "+%Y-%m-%d %H:%M")
    echo "CRITICAL: No backup today for '${BACKUP_ID}' — last known backup: ${LATEST_DATE}"
    exit $STATE_CRITICAL
fi

# --- Most recent backup of the day ---
LATEST=$(echo "$TODAY_SNAPSHOTS" | jq 'sort_by(.["backup-time"]) | last')
BACKUP_EPOCH=$(echo "$LATEST" | jq -r '.["backup-time"]')
BACKUP_TIME=$(date -d "@${BACKUP_EPOCH}" "+%Y-%m-%d %H:%M")
VERIFY_STATE=$(echo "$LATEST" | jq -r '.verification.state // "unverified"')

if [[ "$VERIFY_STATE" != "ok" ]]; then
    echo "WARNING: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present but verification status: ${VERIFY_STATE}"
    exit $STATE_WARNING
fi

echo "OK: Backup '${BACKUP_ID}' from ${BACKUP_TIME} present and verified"
exit $STATE_OK
