#!/usr/bin/env bash
# check_k8s_pod_restarts
# Vérifie s'il y a eu des redémarrages de pods (événements "Killing") dans les X dernières minutes.
# Retour: 0=OK, 2=CRITICAL, 3=UNKNOWN
#
# Usage: 
#  sudo /usr/lib/nagios/plugins/check_k8s_pod_restarts [minutes]
#
MINUTES=${1:-5}

# Require kubectl
if ! command -v kubectl >/dev/null 2>&1; then
  echo "UNKNOWN - kubectl not found"
  exit 3
fi

# cutoff en epoch (GNU date)
if ! cutoff=$(date -d "$MINUTES minutes ago" +%s 2>/dev/null); then
  echo "UNKNOWN - date parsing failed (on macOS use gdate from coreutils)"
  exit 3
fi

matches=()
while IFS=$'\t' read -r ns pod last msg; do
  # skip empty lines
  [[ -z "$last" ]] && continue
  # convert last timestamp to epoch (works with GNU date; handles timezone/fractions)
  if ! ts=$(date -d "$last" +%s 2>/dev/null); then
    # if parsing fails, skip the event
    continue
  fi
  if (( ts >= cutoff )); then
    # safe message truncation
    shortmsg=$(echo "$msg" | tr '\n' ' ' | cut -c1-300)
    matches+=("$ns\t$pod\t$last\t$shortmsg")
  fi
done < <(kubectl get events --all-namespaces --field-selector reason=Killing -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.involvedObject.name,LAST:.lastTimestamp,MESSAGE:.message' --no-headers 2>/dev/null || true)

if [[ ${#matches[@]} -eq 0 ]]; then
  echo "OK - no pod restarts in the last ${MINUTES} minutes"
  exit 0
else
  echo "CRITICAL - ${#matches[@]} pod restarts in the last ${MINUTES} minutes:"
  for m in "${matches[@]}"; do
    IFS=$'\t' read -r ns pod last shortmsg <<< "$m"
    echo " - ${ns}/${pod} at ${last} : ${shortmsg}"
  done
  exit 2
fi