diff --git a/files/nrpe/check_disk_usage b/files/nrpe/check_disk_usage new file mode 100755 index 0000000..a4ca113 --- /dev/null +++ b/files/nrpe/check_disk_usage @@ -0,0 +1,148 @@ +#!/bin/bash +# +# ============================================================================= +# +# Nagios plugin to check disk space usage. +# +# Author: GitHub Copilot +# Version: 1.0 +# +# ============================================================================= +# +# This script checks the disk space utilization of all mounted filesystems. +# It is designed to be used as a Nagios/Icinga check command. +# +# The script compares the percentage of used space against configurable +# warning and critical thresholds. If any filesystems exceed these thresholds, +# it will exit with the appropriate status and display a human-readable +# table of the problematic filesystems. +# +# Thresholds are managed via the -w (warning) and -c (critical) flags. +# +# Nagios Exit Codes: +# 0 - OK: All filesystems are within thresholds. +# 1 - WARNING: One or more filesystems have exceeded the warning threshold. +# 2 - CRITICAL: One or more filesystems have exceeded the critical threshold. +# 3 - UNKNOWN: The script encountered an error (e.g., bad arguments or +# the 'df' command failed). +# +# ============================================================================= + +# Nagios Exit Codes +OK=0 +WARNING=1 +CRITICAL=2 +UNKNOWN=3 + +# Default Thresholds (in percentage) +WARN_THRESHOLD=80 +CRIT_THRESHOLD=90 + +# --- Functions --- + +# Function to display help/usage information +usage() { + echo "Usage: $0 [-w ] [-c ]" + echo " -w: Warning threshold percentage (e.g., 80). Default: ${WARN_THRESHOLD}%" + echo " -c: Critical threshold percentage (e.g., 90). Default: ${CRIT_THRESHOLD}%" + echo " -h: Display this help message" + exit $UNKNOWN +} + +# --- Argument Parsing --- + +while getopts "w:c:h" opt; do + case ${opt} in + w) + WARN_THRESHOLD=${OPTARG} + ;; + c) + CRIT_THRESHOLD=${OPTARG} + ;; + h) + usage + ;; + *) + usage + ;; + esac +done + +# Validate that thresholds are numbers +if ! [[ "$WARN_THRESHOLD" =~ ^[0-9]+$ ]] || ! [[ "$CRIT_THRESHOLD" =~ ^[0-9]+$ ]]; then + echo "UNKNOWN: Warning and critical thresholds must be integers." + exit $UNKNOWN +fi + +# --- Main Logic --- + +# Final exit status, assuming OK until a problem is found +final_status=$OK +# Summary message for the first line of output +summary_message="OK: All filesystems are within thresholds." + +# Arrays to store problematic filesystems +declare -a critical_filesystems +declare -a warning_filesystems + +# Get the disk usage data. Exit if the command fails. +df_output=$(df -hP) +if [ $? -ne 0 ]; then + echo "UNKNOWN: 'df -hP' command failed to execute." + exit $UNKNOWN +fi + + +# Use process substitution to read df output line by line, skipping the header +# This avoids creating a subshell for the loop, so variables can be modified. +while read -r line; do + # Extract usage percentage and mount point + usage_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//') + + # Skip non-numeric usage percentages (e.g., for certain pseudo-filesystems) + if ! [[ "$usage_percent" =~ ^[0-9]+$ ]]; then + continue + fi + + # Check against thresholds + if [ "$usage_percent" -ge "$CRIT_THRESHOLD" ]; then + critical_filesystems+=("$line") + final_status=$CRITICAL + summary_message="CRITICAL: Filesystem usage has exceeded critical threshold." + elif [ "$usage_percent" -ge "$WARN_THRESHOLD" ]; then + warning_filesystems+=("$line") + if [ "$final_status" -ne "$CRITICAL" ]; then + final_status=$WARNING + summary_message="WARNING: Filesystem usage has exceeded warning threshold." + fi + fi +done < <(echo "${df_output}" | tail -n +2) + + +# --- Output Generation --- + +# Print the one-line summary for Nagios +echo "$summary_message" + +# If there are any issues, print the detailed table +if [ "$final_status" -ne "$OK" ]; then + echo "" # Add a newline for better formatting + printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on" + printf "%s\n" "---------------------------------------------------------------------------------------------------------------" + + # Print critical filesystems + if [ ${#critical_filesystems[@]} -gt 0 ]; then + for item in "${critical_filesystems[@]}"; do + printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item") + done + fi + + # Print warning filesystems + if [ ${#warning_filesystems[@]} -gt 0 ]; then + for item in "${warning_filesystems[@]}"; do + printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item") + done + fi +fi + +exit $final_status