Files
nrpe/files/nrpe/check_disk_usage
2025-09-19 09:18:44 +02:00

177 lines
6.3 KiB
Bash
Executable File

#!/bin/bash
#
# =============================================================================
#
# Nagios plugin to check disk space and inode usage.
#
# Author: GitHub Copilot
# Version: 1.2
#
# =============================================================================
#
# This script checks both block and inode utilization of all mounted filesystems.
# It is designed to be used as a Nagios/Icinga check command.
#
# The script compares usage percentages against configurable thresholds.
# It can also exclude specific mount points or filesystem types.
#
# Block Usage Flags:
# -w <%>: Warning threshold for block usage.
# -c <%>: Critical threshold for block usage.
#
# Inode Usage Flags:
# -W <%>: Warning threshold for inode usage.
# -C <%>: Critical threshold for inode usage.
#
# Exclusion Flags (can be used multiple times):
# -e <mount>: Exclude a mount point by exact match (e.g., -e /tmp).
# -E <regex>: Exclude mount points matching a regex pattern (e.g., -E '/run/user/.*').
# -x <type>: Exclude a filesystem type (e.g., -x tmpfs).
#
# Nagios Exit Codes:
# 0 - OK: All checked filesystems are within thresholds.
# 1 - WARNING: Usage has exceeded a warning threshold.
# 2 - CRITICAL: Usage has exceeded a critical threshold.
# 3 - UNKNOWN: The script encountered an error.
#
# =============================================================================
# Nagios Exit Codes
OK=0
WARNING=1
CRITICAL=2
UNKNOWN=3
# --- Defaults ---
WARN_THRESHOLD=80
CRIT_THRESHOLD=90
INODE_WARN_THRESHOLD=80
INODE_CRIT_THRESHOLD=90
declare -a EXCLUDE_MOUNTS
declare -a EXCLUDE_MOUNTS_REGEX
declare -a EXCLUDE_TYPES
# --- Functions ---
usage() {
echo "Usage: $0 -w <warn> -c <crit> [-W <iwarn>] [-C <icrit>] [-e <mount>] [-E <regex>] [-x <type>]"
echo " Block Usage Thresholds (%):"
echo " -w: Warning threshold. Default: ${WARN_THRESHOLD}%"
echo " -c: Critical threshold. Default: ${CRIT_THRESHOLD}%"
echo " Inode Usage Thresholds (%):"
echo " -W: Warning threshold. Default: ${INODE_WARN_THRESHOLD}%"
echo " -C: Critical threshold. Default: ${INODE_CRIT_THRESHOLD}%"
echo " Exclusions (can be specified multiple times):"
echo " -e: Mount point to exclude by exact match (e.g., -e /tmp)."
echo " -E: Regex for mount points to exclude (e.g., -E '/run/user/.*')."
echo " -x: Filesystem type to exclude (e.g., -x tmpfs)."
echo " Help:"
echo " -h: Display this help message."
exit $UNKNOWN
}
# --- Argument Parsing ---
while [[ $# -gt 0 ]]; do
case "$1" in
-w) WARN_THRESHOLD="$2"; shift 2 ;;
-c) CRIT_THRESHOLD="$2"; shift 2 ;;
-W) INODE_WARN_THRESHOLD="$2"; shift 2 ;;
-C) INODE_CRIT_THRESHOLD="$2"; shift 2 ;;
-e) EXCLUDE_MOUNTS+=("$2"); shift 2 ;;
-E) EXCLUDE_MOUNTS_REGEX+=("$2"); shift 2 ;;
-x) EXCLUDE_TYPES+=("$2"); shift 2 ;;
-h) usage ;;
*) echo "Unknown option: $1"; usage ;;
esac
done
# --- Main Logic ---
final_status=$OK
summary_message="OK: All filesystems are within thresholds."
declare -a critical_alerts
declare -a warning_alerts
declare -A inode_usage_map
# 1. Get inode usage and store it in a map
df_inode_output=$(df -iP)
if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -iP' failed."; exit $UNKNOWN; fi
while read -r line; do
[ -z "$line" ] && continue
mount_point=$(echo "$line" | awk '{print $6}')
iuse_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//')
if [[ "$mount_point" && "$iuse_percent" =~ ^[0-9]+$ ]]; then
inode_usage_map["$mount_point"]=$iuse_percent
fi
done < <(echo "${df_inode_output}" | tail -n +2)
# 2. Get block usage and perform checks
df_block_output=$(df -hPT)
if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -hPT' failed."; exit $UNKNOWN; fi
while read -r line; do
[ -z "$line" ] && continue
fs_type=$(echo "$line" | awk '{print $2}')
block_percent=$(echo "$line" | awk '{print $6}' | sed 's/%//')
mount_point=$(echo "$line" | awk '{print $7}')
# --- Exclusion Logic ---
is_excluded=false
# a) Check for exact mount point match
for excluded_mount in "${EXCLUDE_MOUNTS[@]}"; do
if [[ "$mount_point" == "$excluded_mount" ]]; then is_excluded=true; break; fi
done
[ "$is_excluded" = true ] && continue
# b) Check for regex mount point match
for pattern in "${EXCLUDE_MOUNTS_REGEX[@]}"; do
if [[ "$mount_point" =~ $pattern ]]; then is_excluded=true; break; fi
done
[ "$is_excluded" = true ] && continue
# c) Check for filesystem type match
for excluded_type in "${EXCLUDE_TYPES[@]}"; do
if [[ "$fs_type" == "$excluded_type" ]]; then is_excluded=true; break; fi
done
[ "$is_excluded" = true ] && continue
# --- Threshold Checks ---
inode_percent=${inode_usage_map["$mount_point"]}
if [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$CRIT_THRESHOLD" ]] || \
[[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_CRIT_THRESHOLD" ]]; then
critical_alerts+=("$line")
final_status=$CRITICAL
summary_message="CRITICAL: Filesystem usage has exceeded critical threshold."
elif [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$WARN_THRESHOLD" ]] || \
[[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_WARN_THRESHOLD" ]]; then
warning_alerts+=("$line")
if [ "$final_status" -ne "$CRITICAL" ]; then
final_status=$WARNING
summary_message="WARNING: Filesystem usage has exceeded warning threshold."
fi
fi
done < <(echo "${df_block_output}" | tail -n +2)
# --- Output Generation ---
echo "$summary_message"
if [ "$final_status" -ne "$OK" ]; then
echo ""
printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Type" "Size" "Used" "Avail" "Use%" "IUse%" "Mounted on"
printf "%s\n" "----------------------------------------------------------------------------------------------------------------------------------"
all_alerts=("${critical_alerts[@]}" "${warning_alerts[@]}")
for item in "${all_alerts[@]}"; do
mount_point=$(echo "$item" | awk '{print $7}')
inode_percent=${inode_usage_map["$mount_point"]:-"N/A"}
formatted_item=$(echo "$item" | awk -v iuse="$inode_percent%" '{printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s", $1, $2, $3, $4, $5, $6, iuse, $7}')
echo "$formatted_item"
done
fi
exit $final_status