update check_disk_usage - handle inode, exclude & regex
This commit is contained in:
Binary file not shown.
@@ -2,29 +2,37 @@
|
|||||||
#
|
#
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
#
|
#
|
||||||
# Nagios plugin to check disk space usage.
|
# Nagios plugin to check disk space and inode usage.
|
||||||
#
|
#
|
||||||
# Author: GitHub Copilot
|
# Author: GitHub Copilot
|
||||||
# Version: 1.0
|
# Version: 1.2
|
||||||
#
|
#
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
#
|
#
|
||||||
# This script checks the disk space utilization of all mounted filesystems.
|
# This script checks both block and inode utilization of all mounted filesystems.
|
||||||
# It is designed to be used as a Nagios/Icinga check command.
|
# It is designed to be used as a Nagios/Icinga check command.
|
||||||
#
|
#
|
||||||
# The script compares the percentage of used space against configurable
|
# The script compares usage percentages against configurable thresholds.
|
||||||
# warning and critical thresholds. If any filesystems exceed these thresholds,
|
# It can also exclude specific mount points or filesystem types.
|
||||||
# it will exit with the appropriate status and display a human-readable
|
|
||||||
# table of the problematic filesystems.
|
|
||||||
#
|
#
|
||||||
# Thresholds are managed via the -w (warning) and -c (critical) flags.
|
# Block Usage Flags:
|
||||||
|
# -w <%>: Warning threshold for block usage.
|
||||||
|
# -c <%>: Critical threshold for block usage.
|
||||||
|
#
|
||||||
|
# Inode Usage Flags:
|
||||||
|
# -W <%>: Warning threshold for inode usage.
|
||||||
|
# -C <%>: Critical threshold for inode usage.
|
||||||
|
#
|
||||||
|
# Exclusion Flags (can be used multiple times):
|
||||||
|
# -e <mount>: Exclude a mount point by exact match (e.g., -e /tmp).
|
||||||
|
# -E <regex>: Exclude mount points matching a regex pattern (e.g., -E '/run/user/.*').
|
||||||
|
# -x <type>: Exclude a filesystem type (e.g., -x tmpfs).
|
||||||
#
|
#
|
||||||
# Nagios Exit Codes:
|
# Nagios Exit Codes:
|
||||||
# 0 - OK: All filesystems are within thresholds.
|
# 0 - OK: All checked filesystems are within thresholds.
|
||||||
# 1 - WARNING: One or more filesystems have exceeded the warning threshold.
|
# 1 - WARNING: Usage has exceeded a warning threshold.
|
||||||
# 2 - CRITICAL: One or more filesystems have exceeded the critical threshold.
|
# 2 - CRITICAL: Usage has exceeded a critical threshold.
|
||||||
# 3 - UNKNOWN: The script encountered an error (e.g., bad arguments or
|
# 3 - UNKNOWN: The script encountered an error.
|
||||||
# the 'df' command failed).
|
|
||||||
#
|
#
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
@@ -34,115 +42,135 @@ WARNING=1
|
|||||||
CRITICAL=2
|
CRITICAL=2
|
||||||
UNKNOWN=3
|
UNKNOWN=3
|
||||||
|
|
||||||
# Default Thresholds (in percentage)
|
# --- Defaults ---
|
||||||
WARN_THRESHOLD=80
|
WARN_THRESHOLD=80
|
||||||
CRIT_THRESHOLD=90
|
CRIT_THRESHOLD=90
|
||||||
|
INODE_WARN_THRESHOLD=80
|
||||||
|
INODE_CRIT_THRESHOLD=90
|
||||||
|
declare -a EXCLUDE_MOUNTS
|
||||||
|
declare -a EXCLUDE_MOUNTS_REGEX
|
||||||
|
declare -a EXCLUDE_TYPES
|
||||||
|
|
||||||
# --- Functions ---
|
# --- Functions ---
|
||||||
|
|
||||||
# Function to display help/usage information
|
|
||||||
usage() {
|
usage() {
|
||||||
echo "Usage: $0 [-w <warning_threshold>] [-c <critical_threshold>]"
|
echo "Usage: $0 -w <warn> -c <crit> [-W <iwarn>] [-C <icrit>] [-e <mount>] [-E <regex>] [-x <type>]"
|
||||||
echo " -w: Warning threshold percentage (e.g., 80). Default: ${WARN_THRESHOLD}%"
|
echo " Block Usage Thresholds (%):"
|
||||||
echo " -c: Critical threshold percentage (e.g., 90). Default: ${CRIT_THRESHOLD}%"
|
echo " -w: Warning threshold. Default: ${WARN_THRESHOLD}%"
|
||||||
echo " -h: Display this help message"
|
echo " -c: Critical threshold. Default: ${CRIT_THRESHOLD}%"
|
||||||
|
echo " Inode Usage Thresholds (%):"
|
||||||
|
echo " -W: Warning threshold. Default: ${INODE_WARN_THRESHOLD}%"
|
||||||
|
echo " -C: Critical threshold. Default: ${INODE_CRIT_THRESHOLD}%"
|
||||||
|
echo " Exclusions (can be specified multiple times):"
|
||||||
|
echo " -e: Mount point to exclude by exact match (e.g., -e /tmp)."
|
||||||
|
echo " -E: Regex for mount points to exclude (e.g., -E '/run/user/.*')."
|
||||||
|
echo " -x: Filesystem type to exclude (e.g., -x tmpfs)."
|
||||||
|
echo " Help:"
|
||||||
|
echo " -h: Display this help message."
|
||||||
exit $UNKNOWN
|
exit $UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- Argument Parsing ---
|
# --- Argument Parsing ---
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
while getopts "w:c:h" opt; do
|
case "$1" in
|
||||||
case ${opt} in
|
-w) WARN_THRESHOLD="$2"; shift 2 ;;
|
||||||
w)
|
-c) CRIT_THRESHOLD="$2"; shift 2 ;;
|
||||||
WARN_THRESHOLD=${OPTARG}
|
-W) INODE_WARN_THRESHOLD="$2"; shift 2 ;;
|
||||||
;;
|
-C) INODE_CRIT_THRESHOLD="$2"; shift 2 ;;
|
||||||
c)
|
-e) EXCLUDE_MOUNTS+=("$2"); shift 2 ;;
|
||||||
CRIT_THRESHOLD=${OPTARG}
|
-E) EXCLUDE_MOUNTS_REGEX+=("$2"); shift 2 ;;
|
||||||
;;
|
-x) EXCLUDE_TYPES+=("$2"); shift 2 ;;
|
||||||
h)
|
-h) usage ;;
|
||||||
usage
|
*) echo "Unknown option: $1"; usage ;;
|
||||||
;;
|
|
||||||
*)
|
|
||||||
usage
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
# Validate that thresholds are numbers
|
|
||||||
if ! [[ "$WARN_THRESHOLD" =~ ^[0-9]+$ ]] || ! [[ "$CRIT_THRESHOLD" =~ ^[0-9]+$ ]]; then
|
|
||||||
echo "UNKNOWN: Warning and critical thresholds must be integers."
|
|
||||||
exit $UNKNOWN
|
|
||||||
fi
|
|
||||||
|
|
||||||
# --- Main Logic ---
|
# --- Main Logic ---
|
||||||
|
|
||||||
# Final exit status, assuming OK until a problem is found
|
|
||||||
final_status=$OK
|
final_status=$OK
|
||||||
# Summary message for the first line of output
|
|
||||||
summary_message="OK: All filesystems are within thresholds."
|
summary_message="OK: All filesystems are within thresholds."
|
||||||
|
declare -a critical_alerts
|
||||||
|
declare -a warning_alerts
|
||||||
|
declare -A inode_usage_map
|
||||||
|
|
||||||
# Arrays to store problematic filesystems
|
# 1. Get inode usage and store it in a map
|
||||||
declare -a critical_filesystems
|
df_inode_output=$(df -iP)
|
||||||
declare -a warning_filesystems
|
if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -iP' failed."; exit $UNKNOWN; fi
|
||||||
|
|
||||||
# Get the disk usage data. Exit if the command fails.
|
|
||||||
df_output=$(df -hP)
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "UNKNOWN: 'df -hP' command failed to execute."
|
|
||||||
exit $UNKNOWN
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
# Use process substitution to read df output line by line, skipping the header
|
|
||||||
# This avoids creating a subshell for the loop, so variables can be modified.
|
|
||||||
while read -r line; do
|
while read -r line; do
|
||||||
# Extract usage percentage and mount point
|
[ -z "$line" ] && continue
|
||||||
usage_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//')
|
mount_point=$(echo "$line" | awk '{print $6}')
|
||||||
|
iuse_percent=$(echo "$line" | awk '{print $5}' | sed 's/%//')
|
||||||
# Skip non-numeric usage percentages (e.g., for certain pseudo-filesystems)
|
if [[ "$mount_point" && "$iuse_percent" =~ ^[0-9]+$ ]]; then
|
||||||
if ! [[ "$usage_percent" =~ ^[0-9]+$ ]]; then
|
inode_usage_map["$mount_point"]=$iuse_percent
|
||||||
continue
|
|
||||||
fi
|
fi
|
||||||
|
done < <(echo "${df_inode_output}" | tail -n +2)
|
||||||
|
|
||||||
# Check against thresholds
|
# 2. Get block usage and perform checks
|
||||||
if [ "$usage_percent" -ge "$CRIT_THRESHOLD" ]; then
|
df_block_output=$(df -hPT)
|
||||||
critical_filesystems+=("$line")
|
if [ $? -ne 0 ]; then echo "UNKNOWN: 'df -hPT' failed."; exit $UNKNOWN; fi
|
||||||
|
|
||||||
|
while read -r line; do
|
||||||
|
[ -z "$line" ] && continue
|
||||||
|
|
||||||
|
fs_type=$(echo "$line" | awk '{print $2}')
|
||||||
|
block_percent=$(echo "$line" | awk '{print $6}' | sed 's/%//')
|
||||||
|
mount_point=$(echo "$line" | awk '{print $7}')
|
||||||
|
|
||||||
|
# --- Exclusion Logic ---
|
||||||
|
is_excluded=false
|
||||||
|
# a) Check for exact mount point match
|
||||||
|
for excluded_mount in "${EXCLUDE_MOUNTS[@]}"; do
|
||||||
|
if [[ "$mount_point" == "$excluded_mount" ]]; then is_excluded=true; break; fi
|
||||||
|
done
|
||||||
|
[ "$is_excluded" = true ] && continue
|
||||||
|
|
||||||
|
# b) Check for regex mount point match
|
||||||
|
for pattern in "${EXCLUDE_MOUNTS_REGEX[@]}"; do
|
||||||
|
if [[ "$mount_point" =~ $pattern ]]; then is_excluded=true; break; fi
|
||||||
|
done
|
||||||
|
[ "$is_excluded" = true ] && continue
|
||||||
|
|
||||||
|
# c) Check for filesystem type match
|
||||||
|
for excluded_type in "${EXCLUDE_TYPES[@]}"; do
|
||||||
|
if [[ "$fs_type" == "$excluded_type" ]]; then is_excluded=true; break; fi
|
||||||
|
done
|
||||||
|
[ "$is_excluded" = true ] && continue
|
||||||
|
|
||||||
|
# --- Threshold Checks ---
|
||||||
|
inode_percent=${inode_usage_map["$mount_point"]}
|
||||||
|
|
||||||
|
if [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$CRIT_THRESHOLD" ]] || \
|
||||||
|
[[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_CRIT_THRESHOLD" ]]; then
|
||||||
|
critical_alerts+=("$line")
|
||||||
final_status=$CRITICAL
|
final_status=$CRITICAL
|
||||||
summary_message="CRITICAL: Filesystem usage has exceeded critical threshold."
|
summary_message="CRITICAL: Filesystem usage has exceeded critical threshold."
|
||||||
elif [ "$usage_percent" -ge "$WARN_THRESHOLD" ]; then
|
elif [[ "$block_percent" =~ ^[0-9]+$ && "$block_percent" -ge "$WARN_THRESHOLD" ]] || \
|
||||||
warning_filesystems+=("$line")
|
[[ "$inode_percent" =~ ^[0-9]+$ && "$inode_percent" -ge "$INODE_WARN_THRESHOLD" ]]; then
|
||||||
|
warning_alerts+=("$line")
|
||||||
if [ "$final_status" -ne "$CRITICAL" ]; then
|
if [ "$final_status" -ne "$CRITICAL" ]; then
|
||||||
final_status=$WARNING
|
final_status=$WARNING
|
||||||
summary_message="WARNING: Filesystem usage has exceeded warning threshold."
|
summary_message="WARNING: Filesystem usage has exceeded warning threshold."
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
done < <(echo "${df_output}" | tail -n +2)
|
done < <(echo "${df_block_output}" | tail -n +2)
|
||||||
|
|
||||||
|
|
||||||
# --- Output Generation ---
|
# --- Output Generation ---
|
||||||
|
|
||||||
# Print the one-line summary for Nagios
|
|
||||||
echo "$summary_message"
|
echo "$summary_message"
|
||||||
|
|
||||||
# If there are any issues, print the detailed table
|
|
||||||
if [ "$final_status" -ne "$OK" ]; then
|
if [ "$final_status" -ne "$OK" ]; then
|
||||||
echo "" # Add a newline for better formatting
|
echo ""
|
||||||
printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Size" "Used" "Avail" "Use%" "Mounted on"
|
printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s\n" "Filesystem" "Type" "Size" "Used" "Avail" "Use%" "IUse%" "Mounted on"
|
||||||
printf "%s\n" "---------------------------------------------------------------------------------------------------------------"
|
printf "%s\n" "----------------------------------------------------------------------------------------------------------------------------------"
|
||||||
|
|
||||||
# Print critical filesystems
|
all_alerts=("${critical_alerts[@]}" "${warning_alerts[@]}")
|
||||||
if [ ${#critical_filesystems[@]} -gt 0 ]; then
|
for item in "${all_alerts[@]}"; do
|
||||||
for item in "${critical_filesystems[@]}"; do
|
mount_point=$(echo "$item" | awk '{print $7}')
|
||||||
printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item")
|
inode_percent=${inode_usage_map["$mount_point"]:-"N/A"}
|
||||||
done
|
formatted_item=$(echo "$item" | awk -v iuse="$inode_percent%" '{printf "%-30s %-15s %-10s %-10s %-10s %-10s %-10s %-30s", $1, $2, $3, $4, $5, $6, iuse, $7}')
|
||||||
fi
|
echo "$formatted_item"
|
||||||
|
done
|
||||||
# Print warning filesystems
|
|
||||||
if [ ${#warning_filesystems[@]} -gt 0 ]; then
|
|
||||||
for item in "${warning_filesystems[@]}"; do
|
|
||||||
printf "%-30s %-10s %-10s %-10s %-10s %-30s\n" $(echo "$item")
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exit $final_status
|
exit $final_status
|
||||||
|
|||||||
Reference in New Issue
Block a user