From 2bb48bf32ac57fcc9663e7ab9d4c970c41fcab36 Mon Sep 17 00:00:00 2001 From: Ludovic Cartier Date: Mon, 16 Mar 2026 15:47:43 +0100 Subject: [PATCH] add pvesr & pve quorum check --- files/nrpe/check_pve_quorum | 35 +++++++++++++++++++++++++++++++++++ files/nrpe/check_pvesr | 27 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 files/nrpe/check_pve_quorum create mode 100644 files/nrpe/check_pvesr diff --git a/files/nrpe/check_pve_quorum b/files/nrpe/check_pve_quorum new file mode 100644 index 0000000..25a8e6a --- /dev/null +++ b/files/nrpe/check_pve_quorum @@ -0,0 +1,35 @@ +#!/bin/bash + +# Nagios Exit Codes +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +EXPECTED_VOTES=3 +QUORUM_INFO=$(pvecm status) + +# Check specifically for "Quorate:" for status (Yes/No) +HAS_QUORUM=$(echo "$QUORUM_INFO" | grep "Quorate:" | awk '{print $2}') + +# Check specifically for "Total votes:" for current count +VOTES=$(echo "$QUORUM_INFO" | grep "Total votes:" | awk '{print $3}') + +# Debug just in case (optional) +if [ -z "$HAS_QUORUM" ] || [ -z "$VOTES" ]; then + echo "UNKNOWN - Unable to parse pvecm status output" + exit $STATE_UNKNOWN +fi + +if [ "$HAS_QUORUM" != "Yes" ]; then + echo "CRITICAL - Cluster lost QUORUM! (Quorate: $HAS_QUORUM, Votes: $VOTES) | votes=$VOTES" + exit $STATE_CRITICAL +fi + +if [ "$VOTES" -lt "$EXPECTED_VOTES" ]; then + echo "WARNING - Quorum OK but partial ($VOTES/$EXPECTED_VOTES votes). One member is missing. | votes=$VOTES" + exit $STATE_WARNING +else + echo "OK - Cluster healthy ($VOTES/$EXPECTED_VOTES votes) | votes=$VOTES" + exit $STATE_OK +fi \ No newline at end of file diff --git a/files/nrpe/check_pvesr b/files/nrpe/check_pvesr new file mode 100644 index 0000000..0c15079 --- /dev/null +++ b/files/nrpe/check_pvesr @@ -0,0 +1,27 @@ +#!/bin/bash + +# Nagios Exit Codes +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 + +STATUS_OUTPUT=$(pvesr status | tail -n +2) + +if [ -z "$STATUS_OUTPUT" ]; then + echo "OK - No replication job configured." + exit $STATE_OK +fi + +ERRORS=$(echo "$STATUS_OUTPUT" | awk '$NF != "OK" || $(NF-1) > 0 {print $1}') + +if [ -n "$ERRORS" ]; then + COUNT=$(echo "$ERRORS" | wc -l) + ERROR_LIST=$(echo "$ERRORS" | xargs | sed 's/ /, /g') + echo "CRITICAL - $COUNT replication(s) failed : $ERROR_LIST | replication_errors=$COUNT" + exit $STATE_CRITICAL +else + COUNT_OK=$(echo "$STATUS_OUTPUT" | wc -l) + echo "OK - All replication jobs are operational ($COUNT_OK jobs) | replication_errors=0" + exit $STATE_OK +fi \ No newline at end of file