#!/bin/bash # Nagios Exit Codes STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 # Default values warning=70 critical=80 # Retrieve arguments while getopts "d:w:c:" opt; do case $opt in d) disk=$OPTARG ;; w) warning=$OPTARG ;; c) critical=$OPTARG ;; *) echo "Usage: $0 -d -w -c "; exit $STATE_UNKNOWN ;; esac done # Check if disk is specified if [ -z "$disk" ]; then echo "ERROR: Device (e.g., /dev/nvme0) not specified." exit $STATE_UNKNOWN fi # Check if smartctl is installed if ! command -v smartctl &> /dev/null; then echo "ERROR: smartctl is not installed." exit $STATE_UNKNOWN fi # Extract temperature (looking for 'Temperature:' in smartctl output) TEMP=$(smartctl -A "$disk" | grep -i 'Temperature:' | awk '{print $2}') # Check if a numeric value was retrieved if ! [[ "$TEMP" =~ ^[0-9]+$ ]]; then echo "ERROR: Could not read temperature on $disk (check sudo permissions)." exit $STATE_UNKNOWN fi # Nagios output logic if [ "$TEMP" -ge "$critical" ]; then echo "CRITICAL - NVMe Temperature: ${TEMP}°C (Threshold: ${critical}) | temp=${TEMP};${warning};${critical};0;100" exit $STATE_CRITICAL elif [ "$TEMP" -ge "$warning" ]; then echo "WARNING - NVMe Temperature: ${TEMP}°C (Threshold: ${warning}) | temp=${TEMP};${warning};${critical};0;100" exit $STATE_WARNING else echo "OK - NVMe Temperature: ${TEMP}°C | temp=${TEMP};${warning};${critical};0;100" exit $STATE_OK fi