add check disk usage (bash edition)

2025-09-18 17:44:52 +02:00
parent c3248d2351
commit 1b3c286e44
1 changed files with 148 additions and 0 deletions
@@ -0,0 +1,148 @@
+#!/bin/bash
+#
+# =============================================================================
+#
+# Nagios plugin to check disk space usage.
+#
+# Author: GitHub Copilot
+# Version: 1.0
+#
+# =============================================================================
+#
+# This script checks the disk space utilization of all mounted filesystems.
+# It is designed to be used as a Nagios/Icinga check command.
+#
+# The script compares the percentage of used space against configurable
+# warning and critical thresholds. If any filesystems exceed these thresholds,
+# it will exit with the appropriate status and display a human-readable
+# table of the problematic filesystems.
+#
+# Thresholds are managed via the -w (warning) and -c (critical) flags.
+#
+# Nagios Exit Codes:
+# 0 - OK: All filesystems are within thresholds.
+# 1 - WARNING: One or more filesystems have exceeded the warning threshold.
+# 2 - CRITICAL: One or more filesystems have exceeded the critical threshold.
+# 3 - UNKNOWN: The script encountered an error (e.g., bad arguments or
+#              the 'df' command failed).
+#
+# =============================================================================
+
+# Nagios Exit Codes
+OK=0
+WARNING=1
+CRITICAL=2
+UNKNOWN=3
+
+# Default Thresholds (in percentage)
+WARN_THRESHOLD=80
+CRIT_THRESHOLD=90
+
+# --- Functions ---
+
+# Function to display help/usage information
+usage() {
+    echo "Usage: $0 [-w <warning_threshold>] [-c <critical_threshold>]"
+    echo "  -w: Warning threshold percentage (e.g., 80). Default: ${WARN_THRESHOLD}%"
+    echo "  -c: Critical threshold percentage (e.g., 90). Default: ${CRIT_THRESHOLD}%"
+    echo "  -h: Display this help message"
+    exit $UNKNOWN
+}
+
+# --- Argument Parsing ---
+
+while getopts "w:c:h" opt; do
+    case ${opt} in
+        w)
+            WARN_THRESHOLD=${OPTARG}
+            ;;
+        c)
+            CRIT_THRESHOLD=${OPTARG}
+            ;;
+        h)
+            usage
+            ;;
+        *)
+            usage
+            ;;
+    esac
+done
+
+# Validate that thresholds are numbers
+if ! [[ "$WARN_THRESHOLD" =~ ^[0-9]+$ ]] || ! [[ "$CRIT_THRESHOLD" =~ ^[0-9]+$ ]]; then
+    echo "UNKNOWN: Warning and critical thresholds must be integers."
+    exit $UNKNOWN
+fi
+
+# --- Main Logic ---
+
+# Final exit status, assuming OK until a problem is found
+final_status=$OK
+# Summary message for the first line of output
+summary_message="OK: All filesystems are within thresholds."
+
+# Arrays to store problematic filesystems
+declare -a critical_filesystems
+declare -a warning_filesystems
+
+# Get the disk usage data. Exit if the command fails.
+df_output=$(df -hP)
+if [ $? -ne 0 ]; then
+    echo "UNKNOWN: 'df -hP' command failed to execute."
+    exit $UNKNOWN
+fi
+
+
+# Use process substitution to read df output line by line, skipping the header
+# This avoids creating a subshell for the loop, so variables can be modified.
+while read -r line; do
+    # Extract usage percentage and mount point
+    usage_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//')
+
+    # Skip non-numeric usage percentages (e.g., for certain pseudo-filesystems)
+    if ! [[ "$usage_percent" =~ ^[0-9]+$ ]]; then
+        continue
+    fi
+
+    # Check against thresholds
+    if [ "$usage_percent" -ge "$CRIT_THRESHOLD" ]; then
+        critical_filesystems+=("$line")
+        final_status=$CRITICAL
+        summary_message="CRITICAL: Filesystem usage has exceeded critical threshold."
+    elif [ "$usage_percent" -ge "$WARN_THRESHOLD" ]; then
+        warning_filesystems+=("$line")
+        if [ "$final_status" -ne "$CRITICAL" ]; then
+            final_status=$WARNING
+            summary_message="WARNING: Filesystem usage has exceeded warning threshold."
+        fi
+    fi
+done < <(echo "${df_output}" | tail -n +2)
+
+
+# --- Output Generation ---
+
+# Print the one-line summary for Nagios
+echo "$summary_message"
+
+# If there are any issues, print the detailed table
+if [ "$final_status" -ne "$OK" ]; then
+    echo "" # Add a newline for better formatting
+    printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on"
+    printf "%s\n" "---------------------------------------------------------------------------------------------------------------"
+
+    # Print critical filesystems
+    if [ ${#critical_filesystems[@]} -gt 0 ]; then
+        for item in "${critical_filesystems[@]}"; do
+            printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item")
+        done
+    fi
+
+    # Print warning filesystems
+    if [ ${#warning_filesystems[@]} -gt 0 ]; then
+        for item in "${warning_filesystems[@]}"; do
+            printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item")
+        done
+    fi
+fi
+
+exit $final_status