diff --git a/defaults/main.yml b/defaults/main.yml index 2fe3548..a558eca 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -4,6 +4,12 @@ nrpe_allowed_hosts: '127.0.0.1,51.158.69.165,49.12.224.53' nrpe_load_warning: "{{ ansible_processor_cores|int }}" nrpe_load_critical: "{{ (ansible_processor_cores * 2)|int }}" +nrpe_check_total_procs_warning: 500 +nrpe_check_total_procs_critical: 800 + +nrpe_check_zombie_procs_warning: 5 +nrpe_check_zombie_procs_critical: 10 + nrpe_disk_usage_warning: 80 nrpe_disk_usage_critical: 90 @@ -34,3 +40,8 @@ nrpe_postgresql_backend_critical: 90 nrpe_mysql_host: localhost nrpe_mysql_user: nagios nrpe_mysql_password: changeme_ +nrpe_mysql_longqueries_warning: 600 +nrpe_mysql_longqueries_critical: 1200 + +nrpe_proc_age_warning: 400 +nrpe_proc_age_critical: 600 \ No newline at end of file diff --git a/files/nrpe/check_process b/files/nrpe/check_process deleted file mode 100755 index 98aa774..0000000 --- a/files/nrpe/check_process +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -rc=0 -for proc in cron rsyslogd ntpd munin-node; do - sudo /usr/lib/nagios/plugins/check_procs -C $proc -c 1: - rc=$(($rc|$?)) -done diff --git a/templates/nrpe.j2 b/templates/nrpe.j2 index 3b55e91..182cdaa 100644 --- a/templates/nrpe.j2 +++ b/templates/nrpe.j2 @@ -5,16 +5,17 @@ dont_blame_nrpe=1 # base command[check_load]=/usr/lib/nagios/plugins/check_load -w {{ nrpe_load_warning }} -c {{ nrpe_load_critical }} command[check_memory]=/usr/lib/nagios/plugins/check_memory -w {{ nrpe_memory_warning }} -c {{ nrpe_memory_critical }} -W {{ nrpe_swap_warning }} -C {{ nrpe_swap_critical }} -command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z -command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 500 -c 800 -command[check_process]=/usr/lib/nagios/plugins/check_process +command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w {{ nrpe_check_zombie_procs_warning }} -c {{ nrpe_check_zombie_procs_critical }} -s Z +command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w {{ nrpe_check_total_procs_warning }} -c {{ nrpe_check_total_procs_critical }} command[check_dns]=/usr/lib/nagios/plugins/check_dns -H google.com {% if nrpe_ssl_host is defined %} command[check_ssl]=/usr/lib/nagios/plugins/check_http --sni '{{ nrpe_ssl_host }}' -C 14,3 {% endif %} -command[check_eth]=/usr/lib/nagios/plugins/check_eth -i {{ ansible_default_ipv4.interface }} -w {{ nrpe_eth_warning }} -c {{ nrpe_eth_critical }} -command[check_proc_fail2ban]=/usr/lib/nagios/plugins/check_procs -a fail2ban -w 1: -c 1: -command[check_proc_age]=/usr/lib/nagios/plugins/check_proc_age -p -w 400 -c 600 +command[check_bandwidth]=/usr/lib/nagios/plugins/check_eth -i {{ ansible_default_ipv4.interface }} -w {{ nrpe_bandwidth_warning }} -c {{ nrpe_bandwidth_critical }} +command[check_systemd_fail2ban]=/usr/lib/nagios/plugins/check_systemd_service fail2ban +{% if nrpe_proc_age_process is defined or nrpe_proc_age_warning is defined or nrpe_proc_age_critical is defined %} +command[check_proc_age]=/usr/lib/nagios/plugins/check_proc_age -p {{ nrpe_proc_age_process }} -w {{ nrpe_proc_age_warning }} -c {{ nrpe_proc_age_critical }} +{% endif %} command[check_systemd_failed]=/usr/lib/nagios/plugins/check_systemd_failed command[check_needrestart]=/usr/lib/nagios/plugins/check_needrestart @@ -32,22 +33,20 @@ command[check_rw_root]=/usr/lib/nagios/plugins/check_rofs / {% if nrpe_mysql is defined %} # mysql command[check_mysql]=/usr/lib/nagios/plugins/check_mysql -u {{ nrpe_mysql_user }} -p{{ nrpe_mysql_password }} -H {{ nrpe_mysql_host }} -d mysql -command[check_mysql_longqueries]=/usr/lib/nagios/plugins/check_mysql_longqueries -u {{ nrpe_mysql_user }} -p {{ nrpe_mysql_password }} -H {{ nrpe_mysql_host }} -w 600 -c 1200 +command[check_mysql_longqueries]=/usr/lib/nagios/plugins/check_mysql_longqueries -u {{ nrpe_mysql_user }} -p {{ nrpe_mysql_password }} -H {{ nrpe_mysql_host }} -w {{ nrpe_mysql_longqueries_warning }} -c {{ nrpe_mysql_longqueries_critical }} {% endif %} {% if nrpe_postgresql is defined %} # postgresql command[check_pgsql_port]=/usr/lib/nagios/plugins/check_tcp -p {{ nrpe_postgresql_port }} -H {{ nrpe_postgresql_host }} -command[check_proc_postgresql]=/usr/lib/nagios/plugins/check_systemd_service postgresql +command[check_systemd_postgresql]=/usr/lib/nagios/plugins/check_systemd_service postgresql command[check_pgsql_connection]=/usr/lib/nagios/plugins/check_postgresql -H {{ nrpe_postgresql_host }} -p {{ nrpe_postgresql_port }} --dbuser={{ nrpe_postgresql_user }} --dbpass={{ nrpe_postgresql_password }} --action=connection command[check_pgsql_backends]=/usr/lib/nagios/plugins/check_postgresql -H {{ nrpe_postgresql_host }} -p {{ nrpe_postgresql_port }} --dbuser={{ nrpe_postgresql_user }} --dbpass={{ nrpe_postgresql_password }} --action=backends -w {{ nrpe_postgresql_backend_warning }} -c {{ nrpe_postgresql_backend_critical }} {% endif %} -{% if nrpe_mail is defined %} +{% if nrpe_smtp_host is defined or nrpe_mail_service is defined %} # mail command[check_smtp]=/usr/lib/nagios/plugins/check_tcp -p 25 -H {{ nrpe_smtp_host }} -{% endif %} -{% if nrpe_mail_service is defined %} {% if nrpe_mail_service == 'postfix' %} command[check_mailq]=/usr/bin/sudo /usr/lib/nagios/plugins/check_postfix_mailqueue -w {{ nrpe_mailq_warning }} -c {{ nrpe_mailq_critical }} {% elif nrpe_mail_service == 'exim' %} @@ -55,6 +54,8 @@ command[check_mailq]=/usr/bin/sudo /usr/lib/nagios/plugins/check_exim_mailqueue {% endif %} {% endif %} +{% if nrpe_raid_soft is defined or nrpe_raid_3ware is defined or nrpe_raid is defined %} +# raid {% if nrpe_raid_soft is defined %} command[check_mdadm]=/usr/lib/nagios/plugins/check_mdadm {% endif %} @@ -64,17 +65,19 @@ command[check_3ware]=/usr/bin/sudo /usr/lib/nagios/plugins/check_3ware {% if nrpe_raid is defined %} command[check_3ware]=/usr/bin/sudo /usr/lib/nagios/plugins/check_raid {% endif %} +{% endif %} {% if nrpe_docker_container is defined %} +# docker containers {% for container in nrpe_docker_container %} command[check_docker_{{ container }}]=/usr/lib/nagios/plugins/check_docker --containers {{ container }}.* --status running --restarts 2:5 --present {% endfor %} {% endif %} -{% if nrpe_process is defined %} -# process -{% for process in nrpe_process %} -command[check_proc_{{ process }}]=/usr/lib/nagios/plugins/check_systemd_service {{ process }} +{% if nrpe_service is defined %} +# systemd service +{% for service in nrpe_service %} +command[check_systemd_{{ service }}]=/usr/lib/nagios/plugins/check_systemd_service {{ service }} {% endfor %} {% endif %} @@ -82,9 +85,9 @@ command[check_proc_{{ process }}]=/usr/lib/nagios/plugins/check_systemd_service # kubernetes {% if nrpe_kubernetes is defined %} ## nodes -command[check_proc_kubelet]=/usr/lib/nagios/plugins/check_systemd_service kubelet -command[check_proc_etcd]=/usr/lib/nagios/plugins/check_systemd_service etcd -command[check_proc_containerd]=/usr/lib/nagios/plugins/check_systemd_service containerd +command[check_systemd_kubelet]=/usr/lib/nagios/plugins/check_systemd_service kubelet +command[check_systemd_etcd]=/usr/lib/nagios/plugins/check_systemd_service etcd +command[check_systemd_containerd]=/usr/lib/nagios/plugins/check_systemd_service containerd {% endif %} {% if nrpe_kubernetes_manager is defined %} ## manager / control plane