You've already forked nrpe
add pvesr & pve quorum check
This commit is contained in:
@@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Nagios Exit Codes
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
|
||||||
|
EXPECTED_VOTES=3
|
||||||
|
QUORUM_INFO=$(pvecm status)
|
||||||
|
|
||||||
|
# Check specifically for "Quorate:" for status (Yes/No)
|
||||||
|
HAS_QUORUM=$(echo "$QUORUM_INFO" | grep "Quorate:" | awk '{print $2}')
|
||||||
|
|
||||||
|
# Check specifically for "Total votes:" for current count
|
||||||
|
VOTES=$(echo "$QUORUM_INFO" | grep "Total votes:" | awk '{print $3}')
|
||||||
|
|
||||||
|
# Debug just in case (optional)
|
||||||
|
if [ -z "$HAS_QUORUM" ] || [ -z "$VOTES" ]; then
|
||||||
|
echo "UNKNOWN - Unable to parse pvecm status output"
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$HAS_QUORUM" != "Yes" ]; then
|
||||||
|
echo "CRITICAL - Cluster lost QUORUM! (Quorate: $HAS_QUORUM, Votes: $VOTES) | votes=$VOTES"
|
||||||
|
exit $STATE_CRITICAL
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$VOTES" -lt "$EXPECTED_VOTES" ]; then
|
||||||
|
echo "WARNING - Quorum OK but partial ($VOTES/$EXPECTED_VOTES votes). One member is missing. | votes=$VOTES"
|
||||||
|
exit $STATE_WARNING
|
||||||
|
else
|
||||||
|
echo "OK - Cluster healthy ($VOTES/$EXPECTED_VOTES votes) | votes=$VOTES"
|
||||||
|
exit $STATE_OK
|
||||||
|
fi
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Nagios Exit Codes
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
|
||||||
|
STATUS_OUTPUT=$(pvesr status | tail -n +2)
|
||||||
|
|
||||||
|
if [ -z "$STATUS_OUTPUT" ]; then
|
||||||
|
echo "OK - No replication job configured."
|
||||||
|
exit $STATE_OK
|
||||||
|
fi
|
||||||
|
|
||||||
|
ERRORS=$(echo "$STATUS_OUTPUT" | awk '$NF != "OK" || $(NF-1) > 0 {print $1}')
|
||||||
|
|
||||||
|
if [ -n "$ERRORS" ]; then
|
||||||
|
COUNT=$(echo "$ERRORS" | wc -l)
|
||||||
|
ERROR_LIST=$(echo "$ERRORS" | xargs | sed 's/ /, /g')
|
||||||
|
echo "CRITICAL - $COUNT replication(s) failed : $ERROR_LIST | replication_errors=$COUNT"
|
||||||
|
exit $STATE_CRITICAL
|
||||||
|
else
|
||||||
|
COUNT_OK=$(echo "$STATUS_OUTPUT" | wc -l)
|
||||||
|
echo "OK - All replication jobs are operational ($COUNT_OK jobs) | replication_errors=0"
|
||||||
|
exit $STATE_OK
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user