You've already forked nrpe
add pvesr & pve quorum check
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Nagios Exit Codes
|
||||
STATE_OK=0
|
||||
STATE_WARNING=1
|
||||
STATE_CRITICAL=2
|
||||
STATE_UNKNOWN=3
|
||||
|
||||
EXPECTED_VOTES=3
|
||||
QUORUM_INFO=$(pvecm status)
|
||||
|
||||
# Check specifically for "Quorate:" for status (Yes/No)
|
||||
HAS_QUORUM=$(echo "$QUORUM_INFO" | grep "Quorate:" | awk '{print $2}')
|
||||
|
||||
# Check specifically for "Total votes:" for current count
|
||||
VOTES=$(echo "$QUORUM_INFO" | grep "Total votes:" | awk '{print $3}')
|
||||
|
||||
# Debug just in case (optional)
|
||||
if [ -z "$HAS_QUORUM" ] || [ -z "$VOTES" ]; then
|
||||
echo "UNKNOWN - Unable to parse pvecm status output"
|
||||
exit $STATE_UNKNOWN
|
||||
fi
|
||||
|
||||
if [ "$HAS_QUORUM" != "Yes" ]; then
|
||||
echo "CRITICAL - Cluster lost QUORUM! (Quorate: $HAS_QUORUM, Votes: $VOTES) | votes=$VOTES"
|
||||
exit $STATE_CRITICAL
|
||||
fi
|
||||
|
||||
if [ "$VOTES" -lt "$EXPECTED_VOTES" ]; then
|
||||
echo "WARNING - Quorum OK but partial ($VOTES/$EXPECTED_VOTES votes). One member is missing. | votes=$VOTES"
|
||||
exit $STATE_WARNING
|
||||
else
|
||||
echo "OK - Cluster healthy ($VOTES/$EXPECTED_VOTES votes) | votes=$VOTES"
|
||||
exit $STATE_OK
|
||||
fi
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Nagios Exit Codes
|
||||
STATE_OK=0
|
||||
STATE_WARNING=1
|
||||
STATE_CRITICAL=2
|
||||
STATE_UNKNOWN=3
|
||||
|
||||
STATUS_OUTPUT=$(pvesr status | tail -n +2)
|
||||
|
||||
if [ -z "$STATUS_OUTPUT" ]; then
|
||||
echo "OK - No replication job configured."
|
||||
exit $STATE_OK
|
||||
fi
|
||||
|
||||
ERRORS=$(echo "$STATUS_OUTPUT" | awk '$NF != "OK" || $(NF-1) > 0 {print $1}')
|
||||
|
||||
if [ -n "$ERRORS" ]; then
|
||||
COUNT=$(echo "$ERRORS" | wc -l)
|
||||
ERROR_LIST=$(echo "$ERRORS" | xargs | sed 's/ /, /g')
|
||||
echo "CRITICAL - $COUNT replication(s) failed : $ERROR_LIST | replication_errors=$COUNT"
|
||||
exit $STATE_CRITICAL
|
||||
else
|
||||
COUNT_OK=$(echo "$STATUS_OUTPUT" | wc -l)
|
||||
echo "OK - All replication jobs are operational ($COUNT_OK jobs) | replication_errors=0"
|
||||
exit $STATE_OK
|
||||
fi
|
||||
Reference in New Issue
Block a user