diff --git a/files/nrpe/check_disk_advanced b/files/nrpe/check_disk_advanced deleted file mode 100755 index 3457768..0000000 Binary files a/files/nrpe/check_disk_advanced and /dev/null differ diff --git a/files/nrpe/check_disk_usage b/files/nrpe/check_disk_usage index a4ca113..b7d0a63 100755 --- a/files/nrpe/check_disk_usage +++ b/files/nrpe/check_disk_usage @@ -2,29 +2,37 @@ # # ============================================================================= # -# Nagios plugin to check disk space usage. +# Nagios plugin to check disk space and inode usage. # # Author: GitHub Copilot -# Version: 1.0 +# Version: 1.2 # # ============================================================================= # -# This script checks the disk space utilization of all mounted filesystems. +# This script checks both block and inode utilization of all mounted filesystems. # It is designed to be used as a Nagios/Icinga check command. # -# The script compares the percentage of used space against configurable -# warning and critical thresholds. If any filesystems exceed these thresholds, -# it will exit with the appropriate status and display a human-readable -# table of the problematic filesystems. +# The script compares usage percentages against configurable thresholds. +# It can also exclude specific mount points or filesystem types. # -# Thresholds are managed via the -w (warning) and -c (critical) flags. +# Block Usage Flags: +# -w <%>: Warning threshold for block usage. +# -c <%>: Critical threshold for block usage. +# +# Inode Usage Flags: +# -W <%>: Warning threshold for inode usage. +# -C <%>: Critical threshold for inode usage. +# +# Exclusion Flags (can be used multiple times): +# -e : Exclude a mount point by exact match (e.g., -e /tmp). +# -E : Exclude mount points matching a regex pattern (e.g., -E '/run/user/.*'). +# -x : Exclude a filesystem type (e.g., -x tmpfs). # # Nagios Exit Codes: -# 0 - OK: All filesystems are within thresholds. -# 1 - WARNING: One or more filesystems have exceeded the warning threshold. -# 2 - CRITICAL: One or more filesystems have exceeded the critical threshold. -# 3 - UNKNOWN: The script encountered an error (e.g., bad arguments or -# the 'df' command failed). +# 0 - OK: All checked filesystems are within thresholds. +# 1 - WARNING: Usage has exceeded a warning threshold. +# 2 - CRITICAL: Usage has exceeded a critical threshold. +# 3 - UNKNOWN: The script encountered an error. # # ============================================================================= @@ -34,115 +42,135 @@ WARNING=1 CRITICAL=2 UNKNOWN=3 -# Default Thresholds (in percentage) +# --- Defaults --- WARN_THRESHOLD=80 CRIT_THRESHOLD=90 +INODE_WARN_THRESHOLD=80 +INODE_CRIT_THRESHOLD=90 +declare -a EXCLUDE_MOUNTS +declare -a EXCLUDE_MOUNTS_REGEX +declare -a EXCLUDE_TYPES # --- Functions --- -# Function to display help/usage information usage() { - echo "Usage: $0 [-w ] [-c ]" - echo " -w: Warning threshold percentage (e.g., 80). Default: ${WARN_THRESHOLD}%" - echo " -c: Critical threshold percentage (e.g., 90). Default: ${CRIT_THRESHOLD}%" - echo " -h: Display this help message" + echo "Usage: $0 -w -c [-W ] [-C ] [-e ] [-E ] [-x ]" + echo " Block Usage Thresholds (%):" + echo " -w: Warning threshold. Default: ${WARN_THRESHOLD}%" + echo " -c: Critical threshold. Default: ${CRIT_THRESHOLD}%" + echo " Inode Usage Thresholds (%):" + echo " -W: Warning threshold. Default: ${INODE_WARN_THRESHOLD}%" + echo " -C: Critical threshold. Default: ${INODE_CRIT_THRESHOLD}%" + echo " Exclusions (can be specified multiple times):" + echo " -e: Mount point to exclude by exact match (e.g., -e /tmp)." + echo " -E: Regex for mount points to exclude (e.g., -E '/run/user/.*')." + echo " -x: Filesystem type to exclude (e.g., -x tmpfs)." + echo " Help:" + echo " -h: Display this help message." exit $UNKNOWN } # --- Argument Parsing --- - -while getopts "w:c:h" opt; do - case ${opt} in - w) - WARN_THRESHOLD=${OPTARG} - ;; - c) - CRIT_THRESHOLD=${OPTARG} - ;; - h) - usage - ;; - *) - usage - ;; +while [[ $# -gt 0 ]]; do + case "$1" in + -w) WARN_THRESHOLD="$2"; shift 2 ;; + -c) CRIT_THRESHOLD="$2"; shift 2 ;; + -W) INODE_WARN_THRESHOLD="$2"; shift 2 ;; + -C) INODE_CRIT_THRESHOLD="$2"; shift 2 ;; + -e) EXCLUDE_MOUNTS+=("$2"); shift 2 ;; + -E) EXCLUDE_MOUNTS_REGEX+=("$2"); shift 2 ;; + -x) EXCLUDE_TYPES+=("$2"); shift 2 ;; + -h) usage ;; + *) echo "Unknown option: $1"; usage ;; esac done -# Validate that thresholds are numbers -if ! [[ "$WARN_THRESHOLD" =~ ^[0-9]+$ ]] || ! [[ "$CRIT_THRESHOLD" =~ ^[0-9]+$ ]]; then - echo "UNKNOWN: Warning and critical thresholds must be integers." - exit $UNKNOWN -fi - # --- Main Logic --- -# Final exit status, assuming OK until a problem is found final_status=$OK -# Summary message for the first line of output summary_message="OK: All filesystems are within thresholds." +declare -a critical_alerts +declare -a warning_alerts +declare -A inode_usage_map -# Arrays to store problematic filesystems -declare -a critical_filesystems -declare -a warning_filesystems +# 1. Get inode usage and store it in a map +df_inode_output=$(df -iP) +if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -iP' failed."; exit $UNKNOWN; fi -# Get the disk usage data. Exit if the command fails. -df_output=$(df -hP) -if [ $? -ne 0 ]; then - echo "UNKNOWN: 'df -hP' command failed to execute." - exit $UNKNOWN -fi - - -# Use process substitution to read df output line by line, skipping the header -# This avoids creating a subshell for the loop, so variables can be modified. while read -r line; do - # Extract usage percentage and mount point - usage_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//') - - # Skip non-numeric usage percentages (e.g., for certain pseudo-filesystems) - if ! [[ "$usage_percent" =~ ^[0-9]+$ ]]; then - continue + [ -z "$line" ] && continue + mount_point=$(echo "$line" | awk '{print $6}') + iuse_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//') + if [[ "$mount_point" && "$iuse_percent" =~ ^[0-9]+$ ]]; then + inode_usage_map["$mount_point"]=$iuse_percent fi +done < <(echo "${df_inode_output}" | tail -n +2) - # Check against thresholds - if [ "$usage_percent" -ge "$CRIT_THRESHOLD" ]; then - critical_filesystems+=("$line") +# 2. Get block usage and perform checks +df_block_output=$(df -hPT) +if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -hPT' failed."; exit $UNKNOWN; fi + +while read -r line; do + [ -z "$line" ] && continue + + fs_type=$(echo "$line" | awk '{print $2}') + block_percent=$(echo "$line" | awk '{print $6}' | sed 's/%//') + mount_point=$(echo "$line" | awk '{print $7}') + + # --- Exclusion Logic --- + is_excluded=false + # a) Check for exact mount point match + for excluded_mount in "${EXCLUDE_MOUNTS[@]}"; do + if [[ "$mount_point" == "$excluded_mount" ]]; then is_excluded=true; break; fi + done + [ "$is_excluded" = true ] && continue + + # b) Check for regex mount point match + for pattern in "${EXCLUDE_MOUNTS_REGEX[@]}"; do + if [[ "$mount_point" =~ $pattern ]]; then is_excluded=true; break; fi + done + [ "$is_excluded" = true ] && continue + + # c) Check for filesystem type match + for excluded_type in "${EXCLUDE_TYPES[@]}"; do + if [[ "$fs_type" == "$excluded_type" ]]; then is_excluded=true; break; fi + done + [ "$is_excluded" = true ] && continue + + # --- Threshold Checks --- + inode_percent=${inode_usage_map["$mount_point"]} + + if [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$CRIT_THRESHOLD" ]] || \ + [[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_CRIT_THRESHOLD" ]]; then + critical_alerts+=("$line") final_status=$CRITICAL summary_message="CRITICAL: Filesystem usage has exceeded critical threshold." - elif [ "$usage_percent" -ge "$WARN_THRESHOLD" ]; then - warning_filesystems+=("$line") + elif [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$WARN_THRESHOLD" ]] || \ + [[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_WARN_THRESHOLD" ]]; then + warning_alerts+=("$line") if [ "$final_status" -ne "$CRITICAL" ]; then final_status=$WARNING summary_message="WARNING: Filesystem usage has exceeded warning threshold." fi fi -done < <(echo "${df_output}" | tail -n +2) - +done < <(echo "${df_block_output}" | tail -n +2) # --- Output Generation --- -# Print the one-line summary for Nagios echo "$summary_message" -# If there are any issues, print the detailed table if [ "$final_status" -ne "$OK" ]; then - echo "" # Add a newline for better formatting - printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on" - printf "%s\n" "---------------------------------------------------------------------------------------------------------------" + echo "" + printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Type" "Size" "Used" "Avail" "Use%" "IUse%" "Mounted on" + printf "%s\n" "----------------------------------------------------------------------------------------------------------------------------------" - # Print critical filesystems - if [ ${#critical_filesystems[@]} -gt 0 ]; then - for item in "${critical_filesystems[@]}"; do - printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item") - done - fi - - # Print warning filesystems - if [ ${#warning_filesystems[@]} -gt 0 ]; then - for item in "${warning_filesystems[@]}"; do - printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item") - done - fi + all_alerts=("${critical_alerts[@]}" "${warning_alerts[@]}") + for item in "${all_alerts[@]}"; do + mount_point=$(echo "$item" | awk '{print $7}') + inode_percent=${inode_usage_map["$mount_point"]:-"N/A"} + formatted_item=$(echo "$item" | awk -v iuse="$inode_percent%" '{printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s", $1, $2, $3, $4, $5, $6, iuse, $7}') + echo "$formatted_item" + done fi exit $final_status