initial commit

2024-12-16 19:24:54 +01:00
parent 27957d4418
commit 85029a0f01
20 changed files with 14349 additions and 0 deletions
--- a/defaults/main.yml
+++ b/defaults/main.yml
@ -0,0 +1,20 @@
+---
+nrpe_allowed_hosts: '127.0.0.1,212.85.154.82,51.158.69.165'
+
+nrpe_load_warning: '`cat /proc/cpuinfo |grep -c processor`'
+nrpe_load_critical: '`echo "$(($(cat /proc/cpuinfo |grep -c processor) * 2 ))"`'
+
+nrpe_memory_warning: 80
+nrpe_memory_critical: 90
+
+nrpe_swap_warning: 40
+nrpe_swap_critical: 60
+
+nrpe_exim_warning: 10
+nrpe_exim_critical: 20
+
+nrpe_postfix_warning: 10
+nrpe_postfix_critical: 20
+
+nrpe_eth_warning: '12M'
+nrpe_eth_critical: '15M'
--- a/files/nrpe/check_3ware
+++ b/files/nrpe/check_3ware
@ -0,0 +1,344 @@
+#!/usr/bin/perl
+
+#  -------------------------------------------------------
+#             -=- <check_3ware-raid.pl> -=-
+#  -------------------------------------------------------
+#
+#  Description : yet another plugin to check your 3ware RAID
+#  controller
+#
+#  Version : 0.1
+#  -------------------------------------------------------
+#  In :
+#     - see the How to use section
+#
+#  Out :
+#     - only print on the standard output
+#
+#  Features :
+#     - perfdata output
+#
+#  Fix Me/Todo :
+#     - too many things ;) but let me know what do you think about it
+#
+# ####################################################################
+
+# ####################################################################
+# GPL v3
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# ####################################################################
+
+# ####################################################################
+# How to use :
+# ------------
+#
+# 1 to use this script you have to install firt tw_cli. You can find
+#   the source here : http://www.3ware.com/support/download.asp
+#   just follow the instructions to compile and deploy it
+#
+# 2 then you just have to run the following command :
+#	$ ./check_3ware-raid.pl --help
+#
+# If you need to use this script with NRPE you just have to do the
+# following steps :
+#
+# 1 allow your user to run the script with the sudo rights. Just add
+#   something like that in your /etc/sudoers (use visudo) :
+#     nagios ALL=(ALL) NOPASSWD: /<path-to>/check_3ware-raid.pl
+#
+# 2 then just add this kind of line in your NRPE config file :
+#   command[check_3ware]=/usr/bin/sudo /<path-to>/check_3ware-raid.pl
+#
+# 3 don't forget to restart your NRPE daemon
+#
+# ####################################################################
+
+# ####################################################################
+# Changelog :
+# -----------
+#
+# --------------------------------------------------------------------
+#   Date:28/11/2009   Version:0.1     Author:Erwan Ben Souiden
+#   >> creation
+# ####################################################################
+
+# ####################################################################
+#            Don't touch anything under this line!
+#        You shall not pass - Gandalf is watching you
+# ####################################################################
+
+use strict;
+use warnings;
+use Getopt::Long qw(:config no_ignore_case);
+
+# Generic variables
+# -----------------
+my $version = '0.1';
+my $author = 'Erwan Labynocle Ben Souiden';
+my $a_mail = 'erwan@aleikoum.net';
+my $script_name = 'check_3ware-raid.pl';
+my $verbose_value = 0;
+my $version_value = 0;
+my $more_value = 0;
+my $help_value = 0;
+my $perfdata_value = 0;
+my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
+
+# Plugin default variables
+# ------------------------
+my $display = 'CHECK 3ware RAID - ';
+my ($critical,$warning) = (2,1);
+my $tw_cli_path = '/usr/sbin/tw_cli';
+my ($id_controller,$action) = ("",'disk_check');
+
+GetOptions (
+    'P=s' => \ $tw_cli_path,
+    'path-tw_cli=s' => \ $tw_cli_path,
+    'w=i' => \ $warning,
+    'warning=i' => \ $warning,
+    'c=i' => \ $critical,
+    'critical=i' => \ $critical,
+    'action=s' => \ $action,
+    'a=s' => \ $action,
+    'C=s' => \ $id_controller,
+    'controller=s' => \ $id_controller,
+    'm' => \ $more_value,
+    'more' => \ $more_value,
+    'V' => \ $version_value,
+    'version' => \ $version_value,
+    'h' => \ $help_value,
+    'H' => \ $help_value,
+    'help' => \ $help_value,
+    'display=s' => \ $display,
+    'D=s' => \ $display,
+    'perfdata' => \ $perfdata_value,
+    'p' => \ $perfdata_value,
+    'v' => \ $verbose_value,
+    'verbose' => \ $verbose_value
+);
+
+print_usage() if ($help_value);
+print_version() if ($version_value);
+
+
+# Syntax check of your specified options
+# --------------------------------------
+
+print "DEBUG : action : $action, path-tw_cli : $tw_cli_path\n" if ($verbose_value);
+if (($action eq "") or ($tw_cli_path eq "")) {
+    print $display.'one or more following arguments are missing :action/path-tw_cli'."\n";
+    exit $ERRORS{"UNKNOWN"};
+}
+
+print "DEBUG : check if $tw_cli_path exists and is executable\n" if ($verbose_value);
+if(! -x $tw_cli_path) {
+    print $display."$tw_cli_path".' is not executable by you'."\n";
+    exit $ERRORS{"UNKNOWN"};
+}
+
+print "DEBUG : warning threshold : $warning, critical threshold : $critical\n" if ($verbose_value);
+if (($critical < 0) or ($warning < 0) or ($critical < $warning)) {
+    print $display.'the thresholds must be integers and the critical threshold higher or equal than the warning threshold'."\n";
+    exit $ERRORS{"UNKNOWN"};
+}
+
+print "DEBUG : controller : $id_controller\n" if ($verbose_value);
+if ($id_controller ne "") {
+	if (check_controller("$tw_cli_path",$id_controller) != 0) {
+		print $display.'UNKNOWN - problem with the controller '."$id_controller ".'may be it does not exist'."\n";
+		exit $ERRORS{"UNKNOWN"};
+	}
+}
+
+# Core script
+# -----------
+my ($return,$return_more,$plugstate) = ("","","OK");
+
+my @controller_list;
+if (! $id_controller) {
+	@controller_list = list_all_controller("$tw_cli_path");
+	if (! @controller_list) {
+		print $display.'UNKNOWN - problem to have the controllers list'."\n";
+		exit $ERRORS{"UNKNOWN"};
+	}
+}
+else {
+	push(@controller_list,$id_controller);
+}
+
+print "DEBUG : action = $action\n" if ($verbose_value);
+
+my @show_return;
+
+# disk_check action
+# -----------------
+if ($action eq 'disk_check') {
+	my ($c_ok,$c_other) = (0,0);
+	foreach (@controller_list) {
+		@show_return = `$tw_cli_path /$_ show`;
+		foreach (@show_return) {
+			if ($_=~/^(p\d+)\s+(\S+)\s/ ) {
+				print "DEBUG : disk $1/status $2\n" if ($verbose_value);
+				$c_ok++ if ($2 eq "OK");
+				$c_other++ if (($2 ne "OK") and ($2 ne "NOT-PRESENT"));
+				$return_more .= " ($1,$2)";
+			}
+		}
+		$return .= "$c_ok disk(s) detected as OK";
+		$return .= " and $c_other with potential problem" if ($c_other);
+		$return .= " -$return_more" if ($more_value);
+		$return .= " | disksOK=$c_ok disksNOK=$c_other" if ($perfdata_value);
+        $plugstate = "WARNING" if ($c_other >= $warning);
+        $plugstate = "CRITICAL" if ($c_other >= $critical);
+	}
+}
+
+# unit action
+# -----------
+elsif ($action eq 'unit_check') {
+	my ($c_ok,$c_rebuild,$c_other) = (0,0,0);
+	foreach (@controller_list) {
+		@show_return = `$tw_cli_path /$_ show`;
+		foreach (@show_return) {
+			if ($_=~/^(u\d+)\s+(\S+)\s+(\S+)/) {
+				print "DEBUG : disk $1/type $2/status $3\n" if ($verbose_value);
+				$c_ok++ if ($3 eq "OK");
+				$c_rebuild++ if ($3 eq "REBUILD");
+				$c_other++ if (($3 ne "OK") and ($3 ne "REBUILD"));
+				$return_more .= " ($1,$2,$3)";
+			}
+		}
+		$return .= "$c_ok unit(s) detected as OK";
+		$return .= " and $c_rebuild as REBUILD" if ($c_rebuild);
+		$return .= "and $c_other with potential problem" if ($c_other);
+		$return .= " -$return_more" if ($more_value);
+		$return .= " | unitOK=$c_ok unitREBUILD=$c_rebuild unitNOK=$c_other" if ($perfdata_value);
+		$plugstate = "WARNING" if ($c_rebuild);
+		$plugstate = "CRITICAL" if ($c_other);
+	}
+}
+
+else {
+	$return .= "action must be unit_check|disk_check";
+	$action = "";
+	$plugstate = "UNKNOWN";
+}
+
+print $display.$action." - ".$plugstate." - ".$return;
+exit $ERRORS{$plugstate};
+
+# ####################################################################
+# function 1 :  display the help
+# ------------------------------
+sub print_usage {
+    print <<EOT;
+$script_name version $version by $author
+
+This plugin checks state of your physical disks and logical units of a 3ware RAID card.
+
+Usage: /<path-to>/$script_name [-a unit_check|disk_check] [-p] [-D "$display"] [-v] [-m] [-c 2] [-w 1] [-C /c1]
+
+Options:
+ -h, --help
+    Print detailed help screen
+ -V, --version
+    Print version information
+ -D, --display=STRING
+    to modify the output display...
+    default is "CHECK 3ware RAID - "
+ -P, --path-tw_cli=STRING
+    specify the path to the tw_cli binary
+    default value is /usr/sbin/tw_cli
+ -a, --action=STRING
+    specify the action : unit_check|disk_check
+    default is disk_check
+    disk_check : display state of all physical disks
+    unit_check : display state of all logical unit
+ -C, --controller=STRING
+    allow you to specify only one controller to check
+    the default behavior is to check each time every controller
+ -c, --critical=INT
+    specify a critical threshold for the number of disks in a non-OK state.
+    default is 2
+    only for the disk_check action
+ -w, --warning=INT
+    specify a warning threshold for the number of disks in a non-OK state.
+    default is 1
+    only for the disk_check action
+ -m, --more
+    Print a longer output. By default, the output is not complet because
+    Nagios may truncate it. This option is just for you
+ -p, --perfdata
+    If you want to activate the perfdata output
+ -v, --verbose
+    Show details for command-line debugging (Nagios may truncate the output)
+
+Send email to $a_mail if you have questions
+regarding use of this software. To submit patches or suggest improvements,
+send email to $a_mail
+This plugin has been created by $author
+
+Hope you will enjoy it ;)
+
+Remember :
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+EOT
+    exit $ERRORS{"UNKNOWN"};
+}
+
+# function 2 :  display version information
+# -----------------------------------------
+sub print_version {
+    print <<EOT;
+$script_name version $version
+EOT
+    exit $ERRORS{"UNKNOWN"};
+}
+
+# function 3 : check if controller exists
+# ---------------------------------------
+sub check_controller {
+    my ($tw_cli_path,$id_controller) = @_;
+    system("$tw_cli_path /$id_controller show >> /dev/null 2>&1");
+    return $?;
+}
+
+# function 4 : return the controllers list
+# ----------------------------------------
+sub list_all_controller {
+    my ($tw_cli_path) = @_;
+    my @controller_list;
+    my @cmd_output = `$tw_cli_path show`;
+    if ($? == 0) {
+        foreach (@cmd_output) {
+            if ($_=~/^(c\d+)\s/ ) {
+                push(@controller_list,$1);
+            }
+        }
+    }
+    return @controller_list;
+}
--- a/files/nrpe/check_disk_advanced
+++ b/files/nrpe/check_disk_advanced
--- a/files/nrpe/check_dns
+++ b/files/nrpe/check_dns
--- a/files/nrpe/check_docker
+++ b/files/nrpe/check_docker
@ -0,0 +1,985 @@
+#!/usr/bin/env python3
+# logging.basicConfig(level=logging.DEBUG)
+import math
+from collections import deque, namedtuple, UserDict, defaultdict
+from sys import argv
+
+import argparse
+import json
+import logging
+import os
+import re
+import socket
+import stat
+import traceback
+from concurrent import futures
+from datetime import datetime, timezone
+from functools import lru_cache
+from http.client import HTTPConnection
+from urllib import request
+from urllib.error import HTTPError, URLError
+from urllib.request import AbstractHTTPHandler, HTTPHandler, HTTPSHandler, OpenerDirector, HTTPRedirectHandler, \
+    Request, HTTPBasicAuthHandler
+
+logger = logging.getLogger()
+__author__ = 'Tim Laurence'
+__copyright__ = "Copyright 2018"
+__credits__ = ['Tim Laurence']
+__license__ = "GPL"
+__version__ = "2.1.0"
+
+'''
+nrpe compatible check for docker containers.
+
+Requires Python 3
+
+Note: I really would have preferred to have used requests for all the network connections but that would have added a
+dependency.
+'''
+
+DEFAULT_SOCKET = '/var/run/docker.sock'
+DEFAULT_TIMEOUT = 10.0
+DEFAULT_PORT = 2375
+DEFAULT_MEMORY_UNITS = 'B'
+DEFAULT_HEADERS = [('Accept', 'application/vnd.docker.distribution.manifest.v2+json')]
+DEFAULT_PUBLIC_REGISTRY = 'registry-1.docker.io'
+
+# The second value is the power to raise the base to.
+UNIT_ADJUSTMENTS_TEMPLATE = {
+    '%': 0,
+    'B': 0,
+    'KB': 1,
+    'MB': 2,
+    'GB': 3,
+    'TB': 4
+}
+unit_adjustments = None
+
+# Reduce message to a single OK unless a checks fail.
+no_ok = False
+
+# Suppress performance data reporting
+no_performance = False
+
+OK_RC = 0
+WARNING_RC = 1
+CRITICAL_RC = 2
+UNKNOWN_RC = 3
+
+# These hold the final results
+rc = -1
+messages = []
+performance_data = []
+
+ImageName = namedtuple('ImageName', "registry name tag full_name")
+
+
+class ThresholdSpec(UserDict):
+    def __init__(self, warn, crit, units=''):
+        super().__init__(warn=warn, crit=crit, units=units)
+
+    def __getattr__(self, item):
+        return self[item]
+
+
+# How much threading can we do? We are generally not CPU bound so I am using this a worse case cap
+DEFAULT_PARALLELISM = 10
+
+# Holds list of all threads
+threads = []
+
+# This is used during testing
+DISABLE_THREADING = False
+
+
+# Hacked up urllib to handle sockets
+#############################################################################################
+# Docker runs a http connection over a socket. http.client is knows how to deal with these
+# but lacks some niceties. Urllib wraps that and makes up for some of the deficiencies but
+# cannot fix the fact http.client can't read from socket files. In order to take advantage of
+# urllib and http.client's  capabilities the class below tweaks HttpConnection and passes it
+# to urllib registering for socket:// connections
+
+class SocketFileHandler(AbstractHTTPHandler):
+    class SocketFileToHttpConnectionAdaptor(HTTPConnection):
+        def __init__(self, socket_file, timeout=DEFAULT_TIMEOUT):
+            super().__init__(host='', port=0, timeout=timeout)
+            self.socket_file = socket_file
+
+        def connect(self):
+            self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
+            self.sock.settimeout(self.timeout)
+            self.sock.connect(self.socket_file)
+
+    def socket_open(self, req):
+        socket_file, path = req.selector.split(':', 1)
+        req.host = socket_file
+        req.selector = path
+        return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)
+
+
+# Tokens are not cached because I expect the callers to cache the responses
+class Oauth2TokenAuthHandler(HTTPBasicAuthHandler):
+    auth_failure_tracker = defaultdict(int)
+
+    def http_response(self, request, response):
+        code, hdrs = response.code, response.headers
+
+        www_authenticate_header = response.headers.get('www-authenticate', None)
+        if code == 401 and www_authenticate_header:
+            scheme = www_authenticate_header.split()[0]
+            if scheme.lower() == 'bearer':
+                return self.process_oauth2(request, response, www_authenticate_header)
+
+        return response
+
+    https_response = http_response
+
+    @staticmethod
+    def _get_outh2_token(www_authenticate_header):
+        auth_fields = dict(re.findall(r"""(?:(?P<key>[^ ,=]+)="([^"]+)")""", www_authenticate_header))
+
+        auth_url = "{realm}?scope={scope}&service={service}".format(
+            realm=auth_fields['realm'],
+            scope=auth_fields['scope'],
+            service=auth_fields['service'],
+        )
+        token_request = Request(auth_url)
+        token_request.add_header("Content-Type", "application/x-www-form-urlencoded; charset=utf-8")
+        token_response = request.urlopen(token_request)
+        return process_urllib_response(token_response)['token']
+
+    def process_oauth2(self, request, response, www_authenticate_header):
+
+        # This keep infinite auth loops from happening
+        full_url = request.full_url
+        self.auth_failure_tracker[full_url] += 1
+        if self.auth_failure_tracker[full_url] > 1:
+            raise HTTPError(full_url, 401, "Stopping Oauth2 failure loop for {}".format(full_url),
+                            response.headers, response)
+
+        auth_token = self._get_outh2_token(www_authenticate_header)
+
+        request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
+        return self.parent.open(request, timeout=request.timeout)
+
+
+# Got some help from this example https://gist.github.com/FiloSottile/2077115
+class HeadRequest(Request):
+    def get_method(self):
+        return "HEAD"
+
+
+better_urllib_get = OpenerDirector()
+better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
+better_urllib_get.add_handler(HTTPHandler())
+better_urllib_get.add_handler(HTTPSHandler())
+better_urllib_get.add_handler(HTTPRedirectHandler())
+better_urllib_get.add_handler(SocketFileHandler())
+better_urllib_get.add_handler(Oauth2TokenAuthHandler())
+
+
+class RegistryError(Exception):
+    def __init__(self, response):
+        self.response_obj = response
+
+
+# Util functions
+#############################################################################################
+def parse_thresholds(spec, include_units=True, units_required=True):
+    """
+    Given a spec string break it up into ':' separated chunks. Convert strings to ints as it makes sense
+
+    :param spec: The threshold specification being parsed
+    :param include_units: Specifies that units should be processed and returned if present
+    :param units_required: Mark spec as invalid if the units are missing.
+    :return: A list containing the thresholds in order of warn, crit, and units(if included and present)
+    """
+    parts = deque(spec.split(':'))
+    if not all(parts):
+        raise ValueError("Blanks are not allowed in a threshold specification: {}".format(spec))
+
+    # Warn
+    warn = int(parts.popleft())
+    # Crit
+    crit = int(parts.popleft())
+
+    units = ''
+    if include_units:
+        if len(parts):
+            # units
+            units = parts.popleft()
+        elif units_required:
+            raise ValueError("Missing units in {}".format(spec))
+
+    if len(parts) != 0:
+        raise ValueError("Too many threshold specifiers in {}".format(spec))
+
+    return ThresholdSpec(warn=warn, crit=crit, units=units)
+
+
+def pretty_time(seconds):
+    remainder = seconds
+    result = []
+    if remainder > 24 * 60 * 60:
+        days, remainder = divmod(remainder, 24 * 60 * 60)
+        result.append("{}d".format(int(days)))
+    if remainder > 60 * 60:
+        hours, remainder = divmod(remainder, 60 * 60)
+        result.append("{}h".format(int(hours)))
+    if remainder > 60:
+        minutes, remainder = divmod(remainder, 60)
+        result.append("{}min".format(int(minutes)))
+    result.append("{}s".format(int(remainder)))
+    return result
+
+
+def evaluate_numeric_thresholds(container, value, thresholds, name, short_name,
+                                min=None, max=None, greater_than=True):
+    rounder = lambda x: round(x, 2)
+
+    INTEGER_UNITS = ['B', '%', '']
+
+    # Some units don't have decimal places
+    rounded_value = int(value) if thresholds.units in INTEGER_UNITS else rounder(value)
+
+    perf_string = "{container}_{short_name}={value}{units};{warn};{crit}".format(
+        container=container,
+        short_name=short_name,
+        value=rounded_value,
+        **thresholds)
+    if min is not None:
+        rounded_min = math.floor(min) if thresholds.units in INTEGER_UNITS else rounder(min)
+        perf_string += ';{}'.format(rounded_min)
+        if max is not None:
+            rounded_max = math.ceil(max) if thresholds.units in INTEGER_UNITS else rounder(max)
+            perf_string += ';{}'.format(rounded_max)
+
+    global performance_data
+    performance_data.append(perf_string)
+
+    if thresholds.units == 's':
+        nice_time = ' '.join(pretty_time(rounded_value)[:2])
+        results_str = "{} {} is {}".format(container, name, nice_time)
+    else:
+        results_str = "{} {} is {}{}".format(container, name, rounded_value, thresholds.units)
+
+    if greater_than:
+        comparator = lambda value, threshold: value >= threshold
+    else:
+        comparator = lambda value, threshold: value <= threshold
+
+    if comparator(value, thresholds.crit):
+        critical(results_str)
+    elif comparator(value, thresholds.warn):
+        warning(results_str)
+    else:
+        ok(results_str)
+
+
+@lru_cache(maxsize=None)
+def get_url(url):
+    logger.debug("get_url: {}".format(url))
+    response = better_urllib_get.open(url, timeout=timeout)
+    logger.debug("get_url: {} {}".format(url, response.status))
+    return process_urllib_response(response), response.status
+
+
+@lru_cache(maxsize=None)
+def head_url(url):
+    # Follow redirects
+    response = better_urllib_get.open(HeadRequest(url), timeout=timeout)
+    logger.debug("{} {}".format(url, response.status))
+    return response
+
+
+def process_urllib_response(response):
+    response_bytes = response.read()
+    body = response_bytes.decode('utf-8')
+    # logger.debug("BODY: {}".format(body))
+    return json.loads(body)
+
+
+def get_container_info(name):
+    content, _ = get_url(daemon + '/containers/{container}/json'.format(container=name))
+    return content
+
+
+def get_image_info(name):
+    content, _ = get_url(daemon + '/images/{image}/json'.format(image=name))
+    return content
+
+
+def get_state(container):
+    return get_container_info(container)['State']
+
+
+def get_stats(container):
+    content, _ = get_url(daemon + '/containers/{container}/stats?stream=0'.format(container=container))
+    return content
+
+
+def get_ps_name(name_list):
+    # Pick the name that starts with a '/' but doesn't contain a '/' and return that value
+    for name in name_list:
+        if '/' not in name[1:] and name[0] == '/':
+            return name[1:]
+    else:
+        raise NameError("Error when trying to identify 'ps' name in {}".format(name_list))
+
+
+def get_containers(names, require_present):
+    containers_list, _ = get_url(daemon + '/containers/json?all=1')
+
+    all_container_names = set(get_ps_name(x['Names']) for x in containers_list)
+
+    if 'all' in names:
+        return all_container_names
+
+    filtered = set()
+    for matcher in names:
+        found = False
+        for candidate in all_container_names:
+            if re.match("^{}$".format(matcher), candidate):
+                filtered.add(candidate)
+                found = True
+        # If we don't find a container that matches out regex
+        if require_present and not found:
+            critical("No containers match {}".format(matcher))
+
+    return filtered
+
+
+def get_container_digest(container):
+    # find registry and tag
+    inspection = get_container_info(container)
+    image_id = inspection['Image']
+    image_info = get_image_info(image_id)
+    try:
+        return image_info['RepoDigests'][0].split('@')[1]
+    except IndexError:
+        return None
+
+
+def get_container_image_urls(container):
+    inspection = get_container_info(container)
+    image_id = inspection['Image']
+    image_info = get_image_info(image_id)
+    return image_info['RepoTags']
+
+
+def normalize_image_name_to_manifest_url(image_name, insecure_registries):
+    parsed_url = parse_image_name(image_name)
+
+    lower_insecure = [reg.lower() for reg in insecure_registries]
+
+    # Registry query url
+    scheme = 'http' if parsed_url.registry.lower() in lower_insecure else 'https'
+    url = '{scheme}://{registry}/v2/{image_name}/manifests/{image_tag}'.format(scheme=scheme,
+                                                                               registry=parsed_url.registry,
+                                                                               image_name=parsed_url.name,
+                                                                               image_tag=parsed_url.tag)
+    return url, parsed_url.registry
+
+
+# Auth servers seem picky about being hit too hard. Can't figure out why. ;)
+# As result it is best to single thread this check
+# This is based on https://docs.docker.com/registry/spec/auth/token/#requesting-a-token
+def get_digest_from_registry(url):
+    logger.debug("get_digest_from_registry")
+    # query registry
+    # TODO: Handle logging in if needed
+    registry_info = head_url(url=url)
+
+    digest = registry_info.getheader('Docker-Content-Digest', None)
+    if digest is None:
+        raise RegistryError(response=registry_info)
+    return digest
+
+
+def set_rc(new_rc):
+    global rc
+    rc = new_rc if new_rc > rc else rc
+
+
+def ok(message):
+    set_rc(OK_RC)
+    messages.append('OK: ' + message)
+
+
+def warning(message):
+    set_rc(WARNING_RC)
+    messages.append('WARNING: ' + message)
+
+
+def critical(message):
+    set_rc(CRITICAL_RC)
+    messages.append('CRITICAL: ' + message)
+
+
+def unknown(message):
+    set_rc(UNKNOWN_RC)
+    messages.append('UNKNOWN: ' + message)
+
+
+def require_running(name):
+    def inner_decorator(func):
+        def wrapper(container, *args, **kwargs):
+            container_state = get_state(container)
+            state = normalize_state(container_state)
+            if state.lower() == "running":
+                func(container, *args, **kwargs)
+            else:
+                # container is not running, can't perform check
+                critical('{container} is not "running", cannot check {check}"'.format(container=container,
+                                                                                      check=name))
+
+        return wrapper
+
+    return inner_decorator
+
+
+def multithread_execution(disable_threading=DISABLE_THREADING):
+    def inner_decorator(func):
+        def wrapper(container, *args, **kwargs):
+            if DISABLE_THREADING:
+                func(container, *args, **kwargs)
+            else:
+                threads.append(parallel_executor.submit(func, container, *args, **kwargs))
+
+        return wrapper
+
+    return inner_decorator
+
+
+def singlethread_execution(disable_threading=DISABLE_THREADING):
+    def inner_decorator(func):
+        def wrapper(container, *args, **kwargs):
+            if DISABLE_THREADING:
+                func(container, *args, **kwargs)
+            else:
+                threads.append(serial_executor.submit(func, container, *args, **kwargs))
+
+        return wrapper
+
+    return inner_decorator
+
+
+def parse_image_name(image_name):
+    """
+    Parses image names into their constituent parts.
+    :param image_name:
+    :return: ImageName
+    """
+
+    # These are based on information found here
+    #   https://docs.docker.com/engine/reference/commandline/tag/#extended-description
+    #   https://github.com/docker/distribution/blob/master/reference/regexp.go
+    host_segment_re = '[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?'
+    hostname_re = r'({host_segment}\.)+{host_segment}'.format(host_segment=host_segment_re)
+    registry_re = r'((?P<registry>({hostname_re}(:\d+)?|{host_segment_re}:\d+))/)'.format(
+        host_segment_re=host_segment_re, hostname_re=hostname_re)
+    name_component_ends_re = '[a-z0-9]'
+    name_component_middle_re = '[a-z0-9._-]'  # Ignoring spec limit of two _
+    name_component_re = '({end}{middle}*{end}|{end})'.format(end=name_component_ends_re,
+                                                             middle=name_component_middle_re)
+    image_name_re = "(?P<image_name>({name_component}/)*{name_component})".format(name_component=name_component_re)
+    image_tag_re = '(?P<image_tag>[a-zA-Z0-9_][a-zA-Z0-9_.-]*)'
+    full_re = '^{registry}?{image_name}(:{image_tag})?$'.format(registry=registry_re, image_name=image_name_re,
+                                                                image_tag=image_tag_re)
+    parsed = re.match(full_re, image_name)
+
+    registry = parsed.group('registry') if parsed.group('registry') else DEFAULT_PUBLIC_REGISTRY
+
+    image_name = parsed.group('image_name')
+    image_name = image_name if '/' in image_name or registry != DEFAULT_PUBLIC_REGISTRY else 'library/' + image_name
+
+    image_tag = parsed.group('image_tag')
+    image_tag = image_tag if image_tag else 'latest'
+
+    full_image_name = "{registry}/{image_name}:{image_tag}".format(
+        registry=registry,
+        image_name=image_name,
+        image_tag=image_tag)
+
+    return ImageName(registry=registry, name=image_name, tag=image_tag, full_name=full_image_name)
+
+
+def normalize_state(status_info):
+    # Ugh, docker used to report state in as silly way then they figured out how to do it better.
+    # This tries the simpler new way and if that doesn't work fails back to the old way
+
+    # On new docker engines the status holds whatever the current state is, running, stopped, paused, etc.
+    if "Status" in status_info:
+        return status_info['Status']
+
+    status = 'Exited'
+    if status_info["Restarting"]:
+        status = 'Restarting'
+    elif status_info["Paused"]:
+        status = 'Paused'
+    elif status_info["Dead"]:
+        status = 'Dead'
+    elif status_info["Running"]:
+        return "Running"
+    return status
+
+
+# Checks
+#############################################################################################
+
+@multithread_execution()
+@require_running(name='memory')
+def check_memory(container, thresholds):
+    if not thresholds.units in unit_adjustments:
+        unknown("Memory units must be one of  {}".format(list(unit_adjustments.keys())))
+        return
+
+    inspection = get_stats(container)
+
+    # Subtracting cache to match what `docker stats` does.
+    adjusted_usage = inspection['memory_stats']['usage'] - inspection['memory_stats']['stats']['total_cache']
+    if thresholds.units == '%':
+        max = 100
+        usage = int(100 * adjusted_usage / inspection['memory_stats']['limit'])
+    else:
+        max = inspection['memory_stats']['limit'] / unit_adjustments[thresholds.units]
+        usage = adjusted_usage / unit_adjustments[thresholds.units]
+
+    evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='memory',
+                                short_name='mem', min=0, max=max)
+
+
+@multithread_execution()
+def check_status(container, desired_state):
+    normized_desired_state = desired_state.lower()
+    normalized_state = normalize_state(get_state(container)).lower()
+    if normized_desired_state != normalized_state:
+        critical("{} state is not {}".format(container, desired_state))
+        return
+    ok("{} status is {}".format(container, desired_state))
+
+
+@multithread_execution()
+@require_running('health')
+def check_health(container):
+    state = get_state(container)
+    if "Health" in state and "Status" in state["Health"]:
+        health = state["Health"]["Status"]
+        message = "{} is {}".format(container, health)
+        if health == 'healthy':
+            ok(message)
+        elif health == 'unhealthy':
+            critical(message)
+        else:
+            unknown(message)
+    else:
+        unknown('{} has no health check data'.format(container))
+
+
+@multithread_execution()
+@require_running('uptime')
+def check_uptime(container, thresholds):
+    inspection = get_container_info(container)['State']['StartedAt']
+    only_secs = inspection[0:19]
+    start = datetime.strptime(only_secs, "%Y-%m-%dT%H:%M:%S")
+    start = start.replace(tzinfo=timezone.utc)
+    now = datetime.now(timezone.utc)
+    uptime = (now - start).total_seconds()
+
+    graph_padding = 2
+    thresholds.units = 's'
+    evaluate_numeric_thresholds(container=container, value=uptime, thresholds=thresholds, name='uptime',
+                                short_name='up', min=0, max=graph_padding, greater_than=False)
+
+
+@multithread_execution()
+@require_running('restarts')
+def check_restarts(container, thresholds):
+    inspection = get_container_info(container)
+
+    restarts = int(inspection['RestartCount'])
+    graph_padding = 2
+    evaluate_numeric_thresholds(container=container, value=restarts, thresholds=thresholds, name='restarts',
+                                short_name='re', min=0, max=graph_padding)
+
+
+@singlethread_execution()
+def check_version(container, insecure_registries):
+    image_digest = get_container_digest(container)
+    if image_digest is None:
+        unknown('Checksum missing for "{}", try doing a pull'.format(container))
+        return
+
+    image_urls = get_container_image_urls(container=container)
+    if len(image_urls) > 1:
+        unknown('"{}" has multiple tags/names. Unsure which one to use to check the version.'.format(container))
+        return
+    elif len(image_urls) == 0:
+        unknown('"{}" has last no repository tag. Is this anywhere else?'.format(container))
+        return
+
+    url, registry = normalize_image_name_to_manifest_url(image_urls[0], insecure_registries)
+
+    try:
+        registry_hash = get_digest_from_registry(url)
+    except URLError as e:
+        if hasattr(e.reason, 'reason') and e.reason.reason == 'UNKNOWN_PROTOCOL':
+            unknown(
+                "TLS error connecting to registry {} for {}, should you use the '--insecure-registry' flag?" \
+                    .format(registry, container))
+            return
+        elif hasattr(e.reason, 'strerror') and e.reason.strerror == 'nodename nor servname provided, or not known':
+            unknown(
+                "Cannot reach registry for {} at {}".format(container, url))
+            return
+        else:
+            raise e
+    except RegistryError as e:
+        unknown("Cannot check version, couldn't retrieve digest for {} while checking {}.".format(container, url))
+        return
+
+    if registry_hash == image_digest:
+        ok("{}'s version matches registry".format(container))
+        return
+    critical("{}'s version does not match registry".format(container))
+
+
+def calculate_cpu_capacity_precentage(info, stats):
+    host_config = info['HostConfig']
+
+    if 'online_cpus' in stats['cpu_stats']:
+        num_cpus = stats['cpu_stats']['online_cpus']
+    else:
+        num_cpus = len(stats['cpu_stats']['cpu_usage']['percpu_usage'])
+
+    # Identify limit system being used
+    # --cpus
+    if 'NanoCpus' in host_config and host_config['NanoCpus'] != 0:
+        period = 1000000000
+        quota = host_config['NanoCpus']
+    # --cpu-quota
+    elif 'CpuQuota' in host_config and host_config['CpuQuota'] != 0:
+        period = 100000 if host_config['CpuPeriod'] == 0 else host_config['CpuPeriod']
+        quota = host_config['CpuQuota']
+    # unlimited
+    else:
+        period = 1
+        quota = num_cpus
+
+    if period * num_cpus < quota:
+        # This handles the case where the quota is actually bigger than amount available by all the cpus.
+        available_limit_ratio = 1
+    else:
+        available_limit_ratio = (period * num_cpus) / quota
+
+    cpu_delta = stats['cpu_stats']['cpu_usage']['total_usage'] - stats['precpu_stats']['cpu_usage']['total_usage']
+    system_delta = stats['cpu_stats']['system_cpu_usage'] - stats['precpu_stats']['system_cpu_usage']
+    usage = (cpu_delta / system_delta) * available_limit_ratio
+    usage = round(usage * 100, 0)
+    return usage
+
+
+@multithread_execution()
+@require_running('cpu')
+def check_cpu(container, thresholds):
+    info = get_container_info(container)
+
+    stats = get_stats(container=container)
+
+    usage = calculate_cpu_capacity_precentage(info=info, stats=stats)
+
+    max = 100
+    thresholds.units = '%'
+    evaluate_numeric_thresholds(container=container, value=usage, thresholds=thresholds, name='cpu', short_name='cpu',
+                                min=0, max=max)
+
+
+def process_args(args):
+    parser = argparse.ArgumentParser(description='Check docker containers.')
+
+    # Connect to local socket or ip address
+    connection_group = parser.add_mutually_exclusive_group()
+    connection_group.add_argument('--connection',
+                                  dest='connection',
+                                  action='store',
+                                  default=DEFAULT_SOCKET,
+                                  type=str,
+                                  metavar='[/<path to>/docker.socket|<ip/host address>:<port>]',
+                                  help='Where to find docker daemon socket. (default: %(default)s)')
+
+    connection_group.add_argument('--secure-connection',
+                                  dest='secure_connection',
+                                  action='store',
+                                  type=str,
+                                  metavar='[<ip/host address>:<port>]',
+                                  help='Where to find TLS protected docker daemon socket.')
+
+    base_group = parser.add_mutually_exclusive_group()
+    base_group.add_argument('--binary_units',
+                            dest='units_base',
+                            action='store_const',
+                            const=1024,
+                            help='Use a base of 1024 when doing calculations of KB, MB, GB, & TB (This is default)')
+
+    base_group.add_argument('--decimal_units',
+                            dest='units_base',
+                            action='store_const',
+                            const=1000,
+                            help='Use a base of 1000 when doing calculations of KB, MB, GB, & TB')
+    parser.set_defaults(units_base=1024)
+
+    # Connection timeout
+    parser.add_argument('--timeout',
+                        dest='timeout',
+                        action='store',
+                        type=float,
+                        default=DEFAULT_TIMEOUT,
+                        help='Connection timeout in seconds. (default: %(default)s)')
+
+    # Container name
+    parser.add_argument('--containers',
+                        dest='containers',
+                        action='store',
+                        nargs='+',
+                        type=str,
+                        default=['all'],
+                        help='One or more RegEx that match the names of the container(s) to check. If omitted all containers are checked. (default: %(default)s)')
+
+    # Container name
+    parser.add_argument('--present',
+                        dest='present',
+                        default=False,
+                        action='store_true',
+                        help='Modifies --containers so that each RegEx must match at least one container.')
+
+    # Threads
+    parser.add_argument('--threads',
+                        dest='threads',
+                        default=DEFAULT_PARALLELISM,
+                        action='store',
+                        type=int,
+                        help='This + 1 is the maximum number of concurent threads/network connections. (default: %(default)s)')
+
+    # CPU
+    parser.add_argument('--cpu',
+                        dest='cpu',
+                        action='store',
+                        type=str,
+                        metavar='WARN:CRIT',
+                        help='Check cpu usage percentage taking into account any limits. Valid values are 0 - 100.')
+
+    # Memory
+    parser.add_argument('--memory',
+                        dest='memory',
+                        action='store',
+                        type=str,
+                        metavar='WARN:CRIT:UNITS',
+                        help='Check memory usage taking into account any limits. Valid values for units are %%,B,KB,MB,GB.')
+
+    # State
+    parser.add_argument('--status',
+                        dest='status',
+                        action='store',
+                        type=str,
+                        help='Desired container status (running, exited, etc).')
+
+    # Health
+    parser.add_argument('--health',
+                        dest='health',
+                        default=None,
+                        action='store_true',
+                        help="Check container's health check status")
+
+    # Age
+    parser.add_argument('--uptime',
+                        dest='uptime',
+                        action='store',
+                        type=str,
+                        metavar='WARN:CRIT',
+                        help='Minimum container uptime in seconds. Use when infrequent crashes are tolerated.')
+
+    # Version
+    parser.add_argument('--version',
+                        dest='version',
+                        default=None,
+                        action='store_true',
+                        help='Check if the running images are the same version as those in the registry. Useful for finding stale images. Does not support login.')
+
+    # Version
+    parser.add_argument('--insecure-registries',
+                        dest='insecure_registries',
+                        action='store',
+                        nargs='+',
+                        type=str,
+                        default=[],
+                        help='List of registries to connect to with http(no TLS). Useful when using "--version" with images from insecure registries.')
+
+    # Restart
+    parser.add_argument('--restarts',
+                        dest='restarts',
+                        action='store',
+                        type=str,
+                        metavar='WARN:CRIT',
+                        help='Container restart thresholds.')
+
+    # no-ok
+    parser.add_argument('--no-ok',
+                        dest='no_ok',
+                        action='store_true',
+                        help='Make output terse suppressing OK messages. If all checks are OK return a single OK.')
+
+    # no-performance
+    parser.add_argument('--no-performance',
+                        dest='no_performance',
+                        action='store_true',
+                        help='Suppress performance data. Reduces output when performance data is not being used.')
+
+    parser.add_argument('-V', action='version', version='%(prog)s {}'.format(__version__))
+
+    if len(args) == 0:
+        parser.print_help()
+
+    parsed_args = parser.parse_args(args=args)
+
+    global timeout
+    timeout = parsed_args.timeout
+
+    global daemon
+    global connection_type
+    if parsed_args.secure_connection:
+        daemon = 'https://' + parsed_args.secure_connection
+        connection_type = 'https'
+    elif parsed_args.connection:
+        if parsed_args.connection[0] == '/':
+            daemon = 'socket://' + parsed_args.connection + ':'
+            connection_type = 'socket'
+        else:
+            daemon = 'http://' + parsed_args.connection
+            connection_type = 'http'
+
+    return parsed_args
+
+
+def no_checks_present(parsed_args):
+    # Look for all functions whose name starts with 'check_'
+    checks = [key[6:] for key in globals().keys() if key.startswith('check_')]
+    # Act like --present is a check though it is not implemented like one
+    return all(getattr(parsed_args, check) is None for check in checks) and not parsed_args.present
+
+
+def socketfile_permissions_failure(parsed_args):
+    if connection_type == 'socket':
+        return not (os.path.exists(parsed_args.connection)
+                    and stat.S_ISSOCK(os.stat(parsed_args.connection).st_mode)
+                    and os.access(parsed_args.connection, os.R_OK)
+                    and os.access(parsed_args.connection, os.W_OK))
+    else:
+        return False
+
+
+def print_results():
+    if no_ok:
+        # Remove all the "OK"s
+        filtered_messages = [message for message in messages if not message.startswith('OK: ')]
+        if len(filtered_messages) == 0:
+            messages_concat = 'OK'
+        else:
+            messages_concat = '; '.join(filtered_messages)
+
+    else:
+        messages_concat = '; '.join(messages)
+
+    if no_performance or len(performance_data) == 0:
+        print(messages_concat)
+    else:
+        perfdata_concat = ' '.join(performance_data)
+        print(messages_concat + '|' + perfdata_concat)
+
+
+def perform_checks(raw_args):
+    args = process_args(raw_args)
+
+    global parallel_executor
+    parallel_executor = futures.ThreadPoolExecutor(max_workers=args.threads)
+    global serial_executor
+    serial_executor = futures.ThreadPoolExecutor(max_workers=1)
+
+    global unit_adjustments
+    unit_adjustments = {key: args.units_base ** value for key, value in UNIT_ADJUSTMENTS_TEMPLATE.items()}
+
+    global no_ok
+    no_ok = args.no_ok
+
+    global no_performance
+    no_performance = args.no_ok
+
+    if socketfile_permissions_failure(args):
+        unknown("Cannot access docker socket file. User ID={}, socket file={}".format(os.getuid(), args.connection))
+        return
+
+    if args.containers == ["all"] and args.present:
+        unknown("You can not use --present without --containers")
+        return
+
+    if no_checks_present(args):
+        unknown("No checks specified.")
+        return
+
+    # Here is where all the work happens
+    #############################################################################################
+    containers = get_containers(args.containers, args.present)
+
+    if len(containers) == 0 and not args.present:
+        unknown("No containers names found matching criteria")
+        return
+
+    for container in containers:
+
+        # Check status
+        if args.status:
+            check_status(container, args.status)
+
+        # Check version
+        if args.version:
+            check_version(container, args.insecure_registries)
+
+        # below are checks that require a 'running' status
+
+        # Check status
+        if args.health:
+            check_health(container)
+
+        # Check cpu usage
+        if args.cpu:
+            check_cpu(container, parse_thresholds(args.cpu, units_required=False))
+
+        # Check memory usage
+        if args.memory:
+            check_memory(container, parse_thresholds(args.memory, units_required=False))
+
+        # Check uptime
+        if args.uptime:
+            check_uptime(container, parse_thresholds(args.uptime, include_units=False))
+
+        # Check restart count
+        if args.restarts:
+            check_restarts(container, parse_thresholds(args.restarts, include_units=False))
+
+
+def main():
+    try:
+        perform_checks(argv[1:])
+
+        # get results to let exceptions in threads bubble out
+        [x.result() for x in futures.as_completed(threads)]
+
+    except Exception as e:
+        traceback.print_exc()
+        unknown("Exception raised during check': {}".format(repr(e)))
+    print_results()
+    exit(rc)
+
+
+if __name__ == '__main__':
+    main()
--- a/files/nrpe/check_eth
+++ b/files/nrpe/check_eth
@ -0,0 +1,181 @@
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+use constant BITS  => 8;
+use constant BYTES => 1;
+
+my $iface     = "";
+my $bandwidth = "";
+my $warning   = "";
+my $critical  = "";
+my $percent   = "";
+
+GetOptions(
+    "i|interface=s" => \$iface,
+    "w|warning=s"   => \$warning,
+    "c|critical=s"  => \$critical,
+    "b|bandwidth=s" => \$bandwidth,
+    "p|percent"     => \$percent
+);
+
+my $bitmod = BYTES;
+
+my $tmpfile = "/tmp/traffic";
+my $output  = "";
+my $line    = "";
+
+my %status = ( 'OK'       => 0, 
+               'WARNING'  => 1, 
+               'CRITICAL' => 2, 
+               'UNKNOWN'  => 3 
+           );
+my $exit_status = $status{OK}; 
+
+my %data = ( 'time'    => 0, 'last_time'    => 0, 
+             'rxbytes' => 0, 'last_rxbytes' => 0,
+             'txbytes' => 0, 'last_txbytes' => 0
+         );
+
+my %speed = ( 'tx' => 0, 
+              'rx' => 0, 
+              'interval' => 1 
+          );
+
+usage() if ( !$iface || !$warning || !$critical ); 
+if ( $percent ) {
+    usage() if ( !$bandwidth || $bandwidth !~ /^\d+[kKmMgG]$/ );
+    usage() if ( $warning !~ /^\d{1,3}$/ || $warning>100 || $critical !~ /^\d{1,3}$/ || $critical>100 );
+    $bandwidth = human2bytes($bandwidth);
+} else {
+    $warning = human2bytes($warning);
+    $critical = human2bytes($critical);
+    usage() if ( !$warning || !$critical )
+}
+usage() if ( $warning > $critical );
+
+open ( NET, "</proc/net/dev" ) or die ( "Can't open /proc/net/dev: $!" );
+while ( <NET> ) {
+    chomp();
+    if ( $_ =~ /^\s*$iface\:\s*(\d+)(?:\s*(?:\d+)){7}\s*(\d+)(?:\s*(?:\d+)){7}\s*$/ ) {
+        $data{time} = time - 1; 
+        $data{rxbytes} = $1; 
+        $data{txbytes} = $2;
+        last;
+    }
+}
+close( NET );
+
+if ( $data{time} == 0 && $data{rxbytes} == 0 && $data{txbytes} == 0 ) {
+    exit $status{UNKNOWN};
+}
+
+if ( open( TMP, "<$tmpfile-$iface" ) ) {
+    my @line = <TMP>; chomp( @line );
+    ( $data{last_time}, $data{last_rxbytes}, $data{last_txbytes} ) = split( ":", $line[0] );
+}
+
+if ( open( TMP, ">$tmpfile-$iface" ) ) {
+    print( TMP "$data{time}:$data{rxbytes}:$data{txbytes}\n" );
+    close( TMP ); 
+}
+
+$data{last_time} = $data{time} if ( !$data{last_time} || $data{last_time} > $data{time} );
+$data{last_rxbytes} = $data{rxbytes} if ( !$data{last_rxbytes} || $data{last_rxbytes} > $data{rxbytes} );
+$data{last_txbytes} = $data{txbytes} if ( !$data{last_txbytes} || $data{last_txbytes} > $data{txbytes} );
+
+$speed{interval} = $data{time} - $data{last_time} + 1;
+$speed{rx} = ( $data{rxbytes} - $data{last_rxbytes} ) / $speed{interval};
+$speed{tx} = ( $data{txbytes} - $data{last_txbytes} ) / $speed{interval};
+
+$output = "RX Bytes: ". bytes2human($data{rxbytes}) ."B, TX Bytes: ". bytes2human($data{txbytes}) ."B; ";
+$output .= sprintf( "RX Speed: %s%sps, TX Speed: %s%sps; ", 
+           bytes2human($speed{rx}*$bitmod), ($bitmod==BITS)?"b":"B", bytes2human($speed{tx}*$bitmod), ($bitmod==BITS)?"b":"B" );
+
+if ( $percent ) {
+    if ( ( $speed{rx} / $bandwidth ) * 100 > $critical || ( $speed{tx} / $bandwidth ) * 100 > $critical ) {
+        $exit_status = $status{CRITICAL};
+        $output .= "CRITICAL";
+    } elsif ( ( $speed{rx} / $bandwidth ) * 100 > $warning || ( $speed{tx} / $bandwidth ) * 100 > $warning ) {
+        $exit_status = $status{WARNING};
+        $output .= "WARNING";
+    } else {
+        $output .= "OK";
+    }
+} else {
+    if ( ( $speed{rx} > $critical ) or ( $speed{tx} > $critical ) ) {
+        $exit_status = $status{CRITICAL};
+        $output .= "CRITICAL";
+    } elsif ( ( $speed{rx} > $warning ) or ( $speed{tx} > $warning ) ) {
+        $exit_status = $status{WARNING};
+        $output .= "WARNING";
+    } else {
+        $output .= "OK";
+    }
+}
+
+$output .= " bandwidth utilization";
+$output .= sprintf( " | rx=%.0f;%2.0f;%2.0f tx=%.0f;%2.0f;%2.0f", 
+           $speed{rx}*$bitmod, ($percent)?$warning*$bandwidth/100:$warning, ($percent)?$critical*$bandwidth/100:$critical, 
+           $speed{tx}*$bitmod, ($percent)?$warning*$bandwidth/100:$warning, ($percent)?$critical*$bandwidth/100:$critical );
+
+print "$output\n";
+exit( $exit_status );
+
+
+# helper function
+sub bytes2human {
+    my $bytes = shift;
+    return 0 if !$bytes;
+
+    my @units = ( '','K','M','G','T' );
+    my $offset = 0;
+
+    while ( $bytes > 1024 ){
+        $bytes = $bytes / 1024;
+        $offset++;
+    }
+    return sprintf( "%2.0f%s", $bytes, $units[$offset] );
+}
+
+sub human2bytes {
+    my $value = shift;
+    return 0 if ( !$value || $value !~ /^(\d+)(\w)$/ );
+    my ($number, $scale) = ($1,$2);
+
+    my $bitmod = ( $scale =~ /[kmg]/ ) ? BITS : BYTES;
+    my @units = ( '','K','M','G','T' );
+    my $offset = 0;
+
+    while( $units[$offset] ne "\u$scale" && $offset <= scalar(@units) ) {
+        $number *= 1024;
+        $offset++;
+    }
+
+    return $number/$bitmod;
+}
+
+sub usage {
+    print <<EOU;
+
+    Usage: $0 -i <interface> -w <warn> -c <critical> [-p -b <bandwidth>]
+
+    -i, --interface STRING
+        Network interface name (example: eth0)
+    -w, --warning STRING
+        Warning interface speed level (K/M/G Bps, k/m/g bps)
+        If using with -p value should be in percentage (1-100)
+    -c, --critilcal STRING
+        Critical interface speed level (K/M/G Bps, k/m/g bps)
+        If using with -p value should be in percentage (1-100)
+    -p
+        Calculate warning and critical levels in percentage based on interface bandwidth
+    -b, --bandwidth STRING
+        Interface bandwidth value (K/M/G Bps, k/m/g bps)
+
+EOU
+unlink($tmpfile);
+    exit $status{UNKNOWN};
+}
--- a/files/nrpe/check_exim_mailqueue
+++ b/files/nrpe/check_exim_mailqueue
@ -0,0 +1,139 @@
+#!/bin/sh
+###############################################
+#
+# Nagios script to check Exim mail queue status
+#
+# Copyright 2007, 2008 Ian Yates
+#
+# NOTE: Depending on your config, the nagios user will probably be
+#       needed to be added to the exim group for this script to function correctly
+#
+# See usage for command line switches
+#
+# You need to add the following to /etc/sudoers:
+# nagios  ALL=NOPASSWD:/usr/local/exim/bin/exim
+#
+# Created: 2006-07-31 (i.yates@uea.ac.uk)
+# Updated: 2007-04-30 (i.yates@uea.ac.uk) - Linux/sudo tweaks
+# Updated: 2008-03-26 (i.yates@uea.ac.uk) - Fixed bug in critical/warning level checking which could result in erroneous results.
+# Updated: 2008-11-27 (i.yates@uea.ac.uk) - Added GPLv3 licence
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+###############################################
+
+. /usr/lib/nagios/plugins/utils.sh
+
+VERSION="1.3"
+
+EXIM=/usr/sbin/exim
+SUDO=/usr/bin/sudo
+
+FLAG_VERBOSE=FALSE
+LEVEL_WARN=""
+LEVEL_CRIT=""
+RESULT=""
+EXIT_STATUS=$STATE_OK
+
+
+###############################################
+#
+## FUNCTIONS
+#
+
+## Print usage
+usage() {
+	echo " check_eximailqueue $VERSION - Nagios Exim mail queue check script"
+	echo ""
+	echo " Usage: check_eximailqueue -w <warning queue size> -c <critical queue size> [ -v ] [ -h ]"
+	echo ""
+	echo "		 -w  Queue size at which a warning is triggered"
+	echo "		 -c  Queue size at which a critical is triggered"
+	echo "		 -v  Verbose output (ignored for now)"
+	echo "		 -h  Show this page"
+	echo ""
+}
+
+## Process command line options
+doopts() {
+	if ( `test 0 -lt $#` )
+	then
+		while getopts w:c:vh myarg "$@"
+		do
+			case $myarg in
+				h|\?)
+					usage
+					exit;;
+				w)
+					LEVEL_WARN=$OPTARG;;
+				c)
+					LEVEL_CRIT=$OPTARG;;
+				v)
+					FLAG_VERBOSE=TRUE;;
+				*)	# Default
+					usage
+					exit;;
+			esac
+		done
+	else
+		usage
+		exit
+	fi
+}
+
+
+# Write output and return result
+theend() {
+	echo $RESULT
+	exit $EXIT_STATUS
+}
+
+
+#
+## END FUNCTIONS
+#
+
+#############################################
+#
+## MAIN
+#
+
+
+# Handle command line options
+doopts $@
+
+# Do the do
+OUTPUT=`$SUDO -u root $EXIM -bpc`
+if test -z "$OUTPUT" ; then
+	RESULT="Mailqueue WARNING - query returned no output!"
+	EXIT_STATUS=$STATE_WARNING
+else
+	if test "$OUTPUT" -lt "$LEVEL_WARN" ; then
+		RESULT="Mailqueue OK - $OUTPUT messages on queue"
+		EXIT_STATUS=$STATE_OK
+	else
+		if test "$OUTPUT" -ge "$LEVEL_CRIT" ; then
+			RESULT="Mailqueue CRITICAL - $OUTPUT messages on queue"
+			EXIT_STATUS=$STATE_CRITICAL
+		else
+			if test "$OUTPUT" -ge "$LEVEL_WARN" ; then
+				RESULT="Mailqueue WARNING - $OUTPUT messages on queue"
+				EXIT_STATUS=$STATE_WARNING
+			fi
+		fi
+	fi
+fi
+
+# Quit and return information and exit status
+theend
--- a/files/nrpe/check_mdadm
+++ b/files/nrpe/check_mdadm
@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Created by Sebastian Grewe, Jammicron Technology
+#
+
+# Get count of raid arrays
+RAID_DEVICES=`grep ^md -c /proc/mdstat`
+
+# Get count of degraded arrays
+#RAID_STATUS=`grep "\[.*_.*\]" /proc/mdstat -c`
+RAID_STATUS=`egrep "\[.*(=|>|\.).*\]" /proc/mdstat -c`
+
+# Is an array currently recovering, get percentage of recovery
+RAID_RECOVER=`grep recovery /proc/mdstat | awk '{print $4}'`
+RAID_RESYNC=`grep resync /proc/mdstat | awk '{print $4}'`
+RAID_CHECK=`grep check /proc/mdstat | awk '{print $4}'`
+
+# Check raid status
+# RAID recovers --> Warning
+if [[ $RAID_RECOVER ]]; then
+STATUS="WARNING - Checked $RAID_DEVICES arrays, recovering : $RAID_RECOVER"
+EXIT=1
+elif [[ $RAID_RESYNC ]]; then
+STATUS="WARNING - Checked $RAID_DEVICES arrays, resync : $RAID_RESYNC"
+EXIT=1
+elif [[ $RAID_CHECK ]]; then
+STATUS="OK - Checked $RAID_DEVICES arrays, check : $RAID_CHECK"
+EXIT=0
+# RAID ok
+elif [[ $RAID_STATUS == "0" ]]; then
+STATUS="OK - Checked $RAID_DEVICES arrays."
+EXIT=0
+# All else critical, better save than sorry
+else
+EXTEND_RAID_STATUS=`egrep "\[.*(=|>|\.|_).*\]" /proc/mdstat | awk '{print $2}' | uniq -c | xargs echo`
+STATUS="WARNING- Checked $RAID_DEVICES arrays, $RAID_STATUS have failed check: $EXTEND_RAID_STATUS "
+EXIT=1
+fi
+
+# Status and quit
+echo $STATUS
+exit $EXIT
--- a/files/nrpe/check_memory
+++ b/files/nrpe/check_memory
@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+
+#Set script name
+SCRIPT=`basename ${BASH_SOURCE[0]}`
+
+#Set default values
+optMW=95
+optMC=98
+optSW=95
+optSC=98
+
+# help function
+function printHelp {
+  echo -e \\n"Help for $SCRIPT"\\n
+  echo -e "Basic usage: $SCRIPT -w {warning} -c {critical} -W {warning} -C {critical}"\\n
+  echo "Command switches are optional, default values for warning is 95% and critical is 98%"
+  echo "-w - Sets warning value for Memory Usage. Default is 95%"
+  echo "-c - Sets critical value for Memory Usage. Default is 98%"
+  echo "-W - Sets warning value for Swap Usage. Default is 95%"
+  echo "-C - Sets critical value for Swap Usage. Default is 98%"
+  echo -e "-h  - Displays this help message"\\n
+  echo -e "Example: $SCRIPT -w 80 -c 90 -W 40 -C 60"\\n
+  echo -e \\n\\n"Author: Lukasz Gogolin, lukasz.gogolin@gmail.com"
+  echo -e "Git: http://bitbucket.org/lgogolin/nagios_plugins"
+  exit 1
+}
+
+# regex to check is OPTARG an integer
+re='^[0-9]+$'
+
+while getopts :w:c:W:C:h FLAG; do
+  case $FLAG in
+    w)
+      if ! [[ $OPTARG =~ $re ]] ; then
+        echo "error: Not a number" >&2; exit 1
+      else
+        optMW=$OPTARG
+      fi
+      ;;
+    c)
+      if ! [[ $OPTARG =~ $re ]] ; then
+        echo "error: Not a number" >&2; exit 1
+      else
+        optMC=$OPTARG
+      fi
+      ;;
+    W)
+      if ! [[ $OPTARG =~ $re ]] ; then
+        echo "error: Not a number" >&2; exit 1
+      else
+        optSW=$OPTARG
+      fi
+      ;;
+    C)
+      if ! [[ $OPTARG =~ $re ]] ; then
+        echo "error: Not a number" >&2; exit 1
+      else
+        optSC=$OPTARG
+      fi
+      ;;
+    h)
+      printHelp
+      ;;
+    \?)
+      echo -e \\n"Option - $OPTARG not allowed."
+      printHelp
+      exit 2
+      ;;
+  esac
+done
+
+shift $((OPTIND-1))
+
+
+
+
+
+array=( $(cat /proc/meminfo | egrep 'MemTotal|MemFree|Buffers|Cached|SwapTotal|SwapFree' |awk '{print $1 " " $2}' |tr '\n' ' ' |tr -d ':' |awk '{ printf("%i %i %i %i %i %i %i", $2, $4, $6, $8, $10, $12, $14) }') )
+
+memTotal_k=${array[0]}
+memTotal_b=$(($memTotal_k*1024))
+memFree_k=${array[1]}
+memFree_b=$(($memFree_k*1024))
+memBuffer_k=${array[2]}
+memBuffer_b=$(($memBuffer_k*1024))
+memCache_k=${array[3]}
+memCache_b=$(($memCache_k*1024))
+memTotal_m=$(($memTotal_k/1024))
+memFree_m=$(($memFree_k/1024))
+memBuffer_m=$(($memBuffer_k/1024))
+memCache_m=$(($memCache_k/1024))
+memUsed_b=$(($memTotal_b-$memFree_b-$memBuffer_b-$memCache_b))
+memUsed_m=$(($memTotal_m-$memFree_m-$memBuffer_m-$memCache_m))
+memUsedPrc=$((($memUsed_b*100)/$memTotal_b))
+
+swapTotal_k=${array[5]}
+swapTotal_b=$(($swapTotal_k*1024))
+swapFree_k=${array[6]}
+swapFree_b=$(($swapFree_k*1024))
+swapUsed_k=$(($swapTotal_k-$swapFree_k))
+swapUsed_b=$(($swapUsed_k*1024))
+swapTotal_m=$(($swapTotal_k/1024))
+swapFree_m=$(($swapFree_k/1024))
+swapUsed_m=$(($swapTotal_m-$swapFree_m))
+
+if [ $swapTotal_k -eq 0 ]; then
+    swapUsedPrc=0
+else
+    swapUsedPrc=$((($swapUsed_k*100)/$swapTotal_k))
+fi
+
+message="[MEMORY] Total: $memTotal_m MB - Used: $memUsed_m MB - $memUsedPrc% [SWAP] Total: $swapTotal_m MB - Used: $swapUsed_m MB - $swapUsedPrc% | MTOTAL=$memTotal_b;;;; MUSED=$memUsed_b;;;; MCACHE=$memCache_b;;;; MBUFFER=$memBuffer_b;;;; STOTAL=$swapTotal_b;;;; SUSED=$swapUsed_b;;;;"
+
+
+if [ $memUsedPrc -ge $optMC ] || [ $swapUsedPrc -ge $optSC ]; then
+  echo -e $message
+  $(exit 2)
+elif [ $memUsedPrc -ge $optMW ] || [ $swapUsedPrc -ge $optSW ]; then
+  echo -e $message
+  $(exit 1)
+else
+  echo -e $message
+  $(exit 0)
+fi
--- a/files/nrpe/check_mysql_longqueries
+++ b/files/nrpe/check_mysql_longqueries
@ -0,0 +1,237 @@
+#!/usr/bin/perl
+# $Id$
+#
+# check_mysql_longqueries plugin for Nagios
+#
+# Copyright (C) 2009  Vincent Rivellino <vrivellino@paybycash.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+#
+#
+# Checks MySQL's processlist to see if there are queries running longer than
+# defined thresholds.
+#
+# Requires the following modules:
+#        DBI
+#        Monitoring::Plugin
+#
+# Copyright Notice: GPLv2
+#
+# CHANGES
+#
+# 30 Jan 2009 - Vincent Rivellino <vrivellino@paybycash.com>
+#               Initial version released.
+#
+# 02 Mar 2020 - Ludovic Cartier <ludovic.cartier@brainsys.io>
+#		Replace Nagios::Plugin by Monitoring::Plugin
+#		need debian package libmonitoring-plugin-perl
+#
+
+use warnings;
+use strict;
+use DBI;
+use Monitoring::Plugin;
+
+
+## setup Monitoring::Plugin
+my $np = Monitoring::Plugin->new(
+	usage    => "Usage: %s [-v|--verbose] [-H <host>] [-P <port>] [-S <socket>] [-u <user>] [-p <password>] -w <warn time> -c <crit time>",
+	version  => "1.0",
+	license  => "Copyright (C) 2009  Vincent Rivellino <vrivellino\@paybycash.com>\n" .
+              "This plugin comes with ABSOLUTELY NO WARRANTY.  This is free software, and you\n" .
+              "are welcome to redistribute it under the conditions of version 2 of the GPL."
+);
+
+## add command line arguments
+$np->add_arg(
+	spec => 'host|H=s',
+	help => "-H, --host\n   MySQL server host"
+);
+$np->add_arg(
+	spec => 'port|P=i',
+	help => "-P, --port\n   MySQL server port"
+);
+$np->add_arg(
+	spec => 'socket|S=s',
+	help => "-S, --socket\n   MySQL server socket"
+);
+$np->add_arg(
+	spec => 'user|u=s',
+	help => "-u, --user\n   database user (must have privilege to SHOW PROCESSLIST)"
+);
+$np->add_arg(
+	spec => 'password|p=s',
+	help => "-p, --password\n   database password"
+);
+$np->add_arg(
+	spec => 'warn|w=i',
+	help => "-w, --warn\n   Query time in seconds to generate a WARNING",
+	required => 1
+);
+$np->add_arg(
+	spec => 'crit|c=i',
+	help => "-c, --crit\n   Query time in seconds to generate a CRITICAL",
+	required => 1
+);
+$np->add_arg(
+	spec => 'db=s',
+	help => "--db\n   Only check queries running on this database\n   To specify more than one, separate with commas."
+);
+$np->add_arg(
+	spec => 'skip_db=s',
+	help => "--skip_db\n   Don't check queries running on this database\n   To specify more than one, separate with commas."
+);
+$np->add_arg(
+	spec => 'clientuser=s',
+	help => "--clientuser\n   Only check queries running by this MySQL user\n   To specify more than one, separate with commas."
+);
+$np->add_arg(
+	spec => 'skip_clientuser=s',
+	help => "--skip_clientuser\n   Don't check queries running by this MySQL user\n   To specify more than one, separate with commas."
+);
+$np->add_arg(
+	spec => 'clienthost=s',
+	help => "--clienthost\n   Only check queries running from this client host\n   To specify more than one, separate with commas."
+);
+$np->add_arg(
+	spec => 'skip_clienthost=s',
+	help => "--skip_clienthost\n   Don't check queries running from this client host\n   To specify more than one, separate with commas."
+);
+
+
+## parse the command line arguments
+$np->getopts;
+my $verbose = $np->opts->verbose || 0;
+
+if ( $verbose >= 2 ) {
+	print "Plugin options:\n";
+	printf "    %-23s %d\n", "verbose:", $verbose;
+	printf "    %-23s %s\n", "host:", $np->opts->host || '';
+	printf "    %-23s %s\n", "port:", $np->opts->port || '';
+	printf "    %-23s %s\n", "socket:", $np->opts->socket || '';
+	printf "    %-23s %s\n", "user:", $np->opts->user || '';
+	printf "    %-23s %s\n", "password:", $np->opts->password || '';
+	printf "    %-23s %d\n", "warn:", $np->opts->warn;
+	printf "    %-23s %d\n", "crit:", $np->opts->crit;
+	printf "    %-23s %s\n", "db:", $np->opts->db || '';
+	printf "    %-23s %s\n", "skip_db:", $np->opts->skip_db || '';
+	printf "    %-23s %s\n", "clientuser:", $np->opts->clientuser || '';
+	printf "    %-23s %s\n", "skip_clientuser:", $np->opts->skip_clientuser || '';
+	printf "    %-23s %s\n", "clienthost:", $np->opts->clienthost || '';
+	printf "    %-23s %s\n", "skip_clienthost:", $np->opts->skip_clienthost || '';
+}
+
+# extract restrictions from args - will grep() these lists
+my @db     = split( '/,/', $np->opts->db      || '' );
+my @skipdb = split( '/,/', $np->opts->skip_db || '' );
+my @clientuser     = split( '/,/', $np->opts->clientuser      || '' );
+my @skipclientuser = split( '/,/', $np->opts->skip_clientuser || '' );
+my @clienthost     = split( '/,/', $np->opts->clienthost      || '' );
+my @skipclienthost = split( '/,/', $np->opts->skip_clienthost || '' );
+
+alarm $np->opts->timeout;
+
+## setup the dsn - no need to specify a database
+my $dsn = 'DBI:mysql:';
+
+## if we're connecting to localhost (by name) or the host isn't defined ...
+if ( ! $np->opts->host || $np->opts->host eq 'localhost' ) {
+	# connect via a local socket (if it's defined)
+	$dsn .= ';mysql_socket=' . $np->opts->socket
+		if $np->opts->socket;
+
+## otherwise, attempt to connect via host and/or port (if they're defined)
+} else {
+	$dsn .= ';host=' . $np->opts->host
+		if $np->opts->host;
+	$dsn .= ';port=' . $np->opts->port
+		if $np->opts->port;
+}
+
+## print dsn if really verbose
+print "DSN: '$dsn'  USER: '", $np->opts->user || '', "' PASS: '", $np->opts->password || '', "'\n"
+	if $verbose >= 2;
+
+## connect to the database server
+my $dbh = DBI->connect( $dsn, $np->opts->user || '', $np->opts->password || '',
+                        { RaiseError => 0, PrintError => 0, AutoCommit => 1 } )
+	or $np->nagios_exit( UNKNOWN, "Could not connect to database: $DBI::errstr" );
+
+## get the list of running queries
+my $sth = $dbh->prepare( 'SHOW FULL PROCESSLIST' );
+$sth->execute();
+$np->nagios_exit( UNKNOWN, $sth->errstr ) if $sth->err;
+
+## bind each row result to a hash
+my %row;
+$sth->bind_columns( \( @row{ @{$sth->{NAME_lc} } } ));
+
+
+## use these to keep track of the longest-running query
+my $longquery_info = '';
+my $longquery_time = 0;
+
+## process the results
+my $count = 0;
+while ( $sth->fetch ) {
+	$count++;
+
+	# skip if time is zero or NULL
+	next unless $row{'time'};
+
+	# skip ignorable results
+	next if $row{'user'} eq 'system user';
+	next if $row{'command'} =~ m/(Sleep|Binlog Dump|Ping|Processlist)/io;
+
+	# extract connection info
+	my $db = $row{'db'} || '';
+	my $user = $row{'user'} || '';
+	my $host = $row{'host'} || '';
+	$host =~ s/:\d+$//o;
+
+	# skip if connection info does or doest match criteria
+	next if $np->opts->db and grep !/^$db$/, @db;
+	next if $np->opts->skip_db and grep /^$db$/, @skipdb;
+
+	next if $np->opts->clientuser and grep !/^$user$/, @clientuser;
+	next if $np->opts->skip_clientuser and grep /^$user$/, @skipclientuser;
+
+	next if $np->opts->clienthost and grep !/^$host$/, @clienthost;
+	next if $np->opts->skip_clienthost and grep /^$host$/, @skipclienthost;
+
+	# only save the longest running query
+	if ( $row{'time'} > $longquery_time ) {
+		$longquery_time = $row{'time'};
+		$longquery_info = "TIME: $row{'time'}";
+		foreach my $k ( sort keys %row ) {
+			next if $k eq 'time' or $k eq 'info';
+			$longquery_info .= " $k=" . ( $row{$k} || 'NULL' );
+		}
+		$longquery_info .= " INFO=" . ( $row{'info'} || 'NULL' );
+	}
+}
+
+# we're done with the db handle
+$dbh->disconnect;
+
+# OK if no long queries were found
+$np->nagios_exit( OK, "No long running queries found ($count threads checked)" ) unless $longquery_info;
+
+# check for crit
+$np->nagios_exit( CRITICAL, $longquery_info ) if $longquery_time >= $np->opts->crit;
+$np->nagios_exit( WARNING, $longquery_info ) if $longquery_time >= $np->opts->warn;
+
+# OK if if the longest query didn't match crit & warn
+$np->nagios_exit( OK, "No long running queries found ($count threads checked)" );
--- a/files/nrpe/check_postfix_mailqueue
+++ b/files/nrpe/check_postfix_mailqueue
@ -0,0 +1,140 @@
+#!/bin/bash
+###################################################################
+# check_postfix_mailqueue is developped with GPL Licence 2.0
+#
+# GPL License: http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
+#
+# Developped by : Bjoern Bongermino
+#
+###################################################################
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+####################################################################
+
+# Uncomment to enable debugging
+# set -x
+
+PROGNAME=`basename $0`
+VERSION="Version 1.0"
+AUTHOR="Bjoern Bongermino (http://www.bongermino.de)"
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+warning=0
+critical=0
+
+print_version() {
+    echo "$PROGNAME $VERSION $AUTHOR"
+}
+
+print_help() {
+    print_version $PROGNAME $VERSION
+    echo ""
+    echo "$PROGNAME - Checks postfix mailqueue statistic"
+    echo ""
+    echo "$PROGNAME is a Nagios plugin which generates statistics"
+    echo "for the postfix mailqueue and checks for corrupt messages."
+	echo "The following values will be checked:"
+	echo "maildrop: Localy posted mail"
+	echo "incoming: Processed local mail and received from network"
+	echo "active: Mails being delivered (should be small)"
+	echo "deferred: Stuck mails (that will be retried later)"
+	echo "corrupt: Messages found to not be in correct format (shold be 0)"
+	echo "hold: Recent addition, messages put on hold indefinitly - delete of free"
+    echo ""
+    echo "Usage: $PROGNAME -w WARN-Level -c CRIT-Level"
+    echo ""
+    echo "Options:"
+    echo "  -w)"
+    echo "     Warning level for deferred mails"
+    echo "  -c)"
+    echo "     Critical level for deferred mail"
+    echo "  -h)"
+    echo "     This help"
+    echo "  -v)"
+    echo "     Version"
+    exit $STATE_OK
+}
+
+# Check for parameters
+while test -n "$1"; do
+    case "$1" in
+		-h)
+			print_help
+			exit $STATE_OK;;
+		-v)
+			print_version
+			exit $STATE_OK;;
+		-w)
+			warning=$2
+			shift
+			;;
+		-c)
+			critical=$2
+			shift
+			;;
+		*)
+			check_postfix_mailqueue
+			;;
+	esac
+	shift
+done
+
+check_postfix_mailqueue() {
+# Can be set via environment, but default is fetched by postconf (if available,
+# else /var/spool/postfix) 
+if which postconf > /dev/null ; then
+   SPOOLDIR=${spooldir:-`postconf -h queue_directory`}
+else
+   SPOOLDIR=${spooldir:-/var/spool/postfix}
+fi
+
+cd $SPOOLDIR >/dev/null 2>/dev/null || {
+     echo -n "Cannot cd to $SPOOLDIR"
+     exit $STATE_CRITICAL
+}
+
+# Get values
+deferred=`(test -d deferred && find deferred -type f ) | wc -l`
+active=`(test -d active && find active -type f ) | wc -l`
+maildrop=`(test -d maildrop && find maildrop -type f ) | wc -l`
+incoming=`(test -d incoming && find incoming -type f ) | wc -l`
+corrupt=`(test -d corrupt && find corrupt -type f ) | wc -l`
+hold=`( test -d hold && find hold -type f ) | wc -l`
+}
+
+check_postfix_mailqueue
+values="Deferred mails=$deferred Active deliveries=$active Locally posted mails=$maildrop Incoming mails=$incoming Corrupt mails=$corrupt Mails on hold=$hold"
+perfdata="deferred=$deferred;; active=$active;; maildrop=$maildrop;; incoming=$incoming;; corrupt=$corrupt;; hold=$hold;;"
+
+if [ $corrupt -gt 0 ]; then
+	echo -n "Postfix Mailqueue CRITICAL - $corrupt corrupt messages found! | $perfdata"
+	exit $STATE_CRITICAL
+fi
+
+if [ $warning -gt 0 ] && [ $critical -gt 0 ]; then
+   if [ $deferred -gt $critical ]; then
+      echo -n "Postfix Mailqueue CRITICAL - $values | $perfdata"
+      exit $STATE_CRITICAL
+   elif [ $deferred -gt $warning ]; then
+      echo -n "Postfix Mailqueue WARNING - $values | $perfdata"
+      exit $STATE_WARNING
+   else
+      echo -n "Postfix Mailqueue OK - $values | $perfdata"
+      exit $STATE_OK
+   fi
+else
+   echo -n "Postfix Mailqueue OK - $values | $perfdata"
+   exit $STATE_OK
+fi
--- a/files/nrpe/check_postgresql
+++ b/files/nrpe/check_postgresql
--- a/files/nrpe/check_proc_age
+++ b/files/nrpe/check_proc_age
@ -0,0 +1,101 @@
+#! /bin/bash
+
+# Nagios plugin
+# created 09.01.2011 by symphonic.mushroom@gmail.com
+# modified 04.24.2012 by symphonic.mushroom@gmail.com with the advices from formwandler
+# modified 07.22.2017 by symphonic.mushroom@gmail.com with the help from Toby Wahlers toby@100.rpm.com
+# check if processes matching to a pattern are exceeding a given elapsed time
+# return a Nagios exit code depending on the result
+# 0 = OK
+# 1 = WARNING
+# 2 = CRITICAL
+# 3 = UNKNOWN
+
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+# for help printing
+print_help() {
+        echo "This Nagios plugin check if processes matching to a pattern are exceeding a given elapsed time"
+        echo "Usage : $0 -p <process_name> -w <seconds> -c <seconds> "
+        echo " -p parameter : name of the monitoring process. For granularity, quote commands with spaces."
+        echo " -w parameter : minimal elapsed time for status WARNING on NAGIOS, in seconds."
+        echo " -c parameter : minimal elapsed time for status CRITICAL on NAGIOS, in seconds."
+	echo "returned performance data : number of process; oldest time in minutes; warning time in minutes; critical time in minutes; 0;"
+        exit 3
+}
+
+# check if there is at least one argument
+if [ -z $1 ]
+        then echo "Missing arguments"
+        echo "try \'$0 --help\' for help"
+        exit 3
+fi
+
+# print help
+if [[ ( $1 = "--help" || $1 = "-h" ) ]]
+        then print_help
+        exit 3
+fi
+
+# assign value to arguments
+# print an error in case of unkown argument
+while getopts ":w:c:p:" options
+do
+    case $options in
+        w ) warning=$OPTARG ;;
+        c ) critical=$OPTARG ;;
+        p ) proc=$OPTARG ;;
+        * ) echo "Unknown argument"
+        echo "try \'$0 --help\' for help"
+        exit 3 ;;
+    esac
+done
+
+# check if all arguments are present
+if [[ ( -z $warning || -z $critical || -z $proc ) ]]
+        then echo "Missing argument"
+        echo "try \'$0 --help\' for help"
+        exit 3
+fi
+
+#calculate number of process
+nbproc=$(ps -A -o args | grep -w "$proc" | grep -v $0 | grep -v grep | wc -l)
+if [ $nbproc -gt 0 ]
+        then
+
+#calculate age of oldest process
+        ageproc=$(ps -A -o etime,comm,args | grep "$proc" | grep -v $0 | grep -v grep | gawk '{split($1,t,":");split(t[1],td,"-");if (td[2]) {ta=td[1]*86400; t[1]=td[2]} else {ta=0}; if (t[3]) {$1=(t[1]*60+t[2])*60+t[3]+ta} else {$1=t[1]*60+t[2]};if (NR==1) {maxi=$1;} else {if ($1>maxi){maxi=$1;}}};END {print maxi}')
+        case $ageproc in
+                ?|[0-5]? ) maxage=$ageproc" Seconds";;
+                ??|???|[0-2]???|3[0-5]?? ) maxage=$(($ageproc/60))" Minutes";;
+                * ) maxage=$(($ageproc/3600))" Hours "$(($ageproc % 3600 / 60))" minutes";;
+        esac
+         msg="there are $nbproc process $proc, oldest has got $maxage age"
+         perfmaxage=$(($ageproc/60))
+         perfdata="Processes=${nbproc:-0} MaxAge=${perfmaxage:-0}Minutes;$(($warning/60));$(($critical/60));0;"
+                if [ $ageproc -gt $critical ]
+                        then echo "CRITICAL: $msg | $perfdata"
+                        exit 2
+                elif [ $ageproc -gt $warning ]
+                        then echo "WARNING: $msg | $perfdata"
+                        exit 1
+                else echo "OK: $msg | $perfdata"
+                exit 0
+                fi
+        else
+        echo "OK: there is no process matching $proc"
+        exit 0
+fi
--- a/files/nrpe/check_process
+++ b/files/nrpe/check_process
@ -0,0 +1,7 @@
+#!/bin/bash
+
+rc=0
+for proc in cron rsyslogd ntpd munin-node; do
+    sudo /usr/lib/nagios/plugins/check_procs -C $proc -c 1:
+    rc=$(($rc|$?))
+done
--- a/files/nrpe/check_rofs
+++ b/files/nrpe/check_rofs
@ -0,0 +1,33 @@
+#!/bin/bash
+# checks for read_only fs
+# @Author Joerg 'johe' Stephan <johe.stephan@googlemail.com>
+#
+
+E_SUCCESS="0"
+E_WARNING="1"
+E_CRITICAL="2"
+E_UNKNOWN="3"
+
+if [ -z $1 ]; then
+        echo "Usage: check_rofs.sh <mountpoint>"
+else tfs=$1
+fi
+
+
+cat /proc/mounts | while read diskid mountpoint fs options rub1 rub2; do
+if [ x$mountpoint = x$tfs ]; then
+        if grep -q rw <<<$options; then
+                echo "The Filesystem mounted on $tfs is writeable"
+                exit ${E_SUCCESS}
+                else
+                        if grep -q ro <<<$options; then
+                                echo "The Filesystem mounted on $tfs is NOT writeable"
+                                exit ${E_CRITICAL}
+                                else
+                                        echo "Test result empty (For any reason)"
+                                        exit ${E_WARNING}
+                        fi
+        fi
+fi
+
+done
--- a/files/nrpe/check_systemd_service
+++ b/files/nrpe/check_systemd_service
@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Copyright © 2016, 2017 Mohamed El Morabity <melmorabity@fedoraproject.com>
+#
+# This module is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This software is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+PLUGINDIR=$(dirname $0)
+. $PLUGINDIR/utils.sh
+
+
+if [[ $# -ne 1 ]]; then
+    echo "Usage: ${0##*/} <service name>"
+    exit $STATE_UNKNOWN
+fi
+
+service=$1
+
+
+status=$(systemctl is-enabled $service 2>/dev/null)
+r=$?
+if [[ -z "$status" ]]; then
+    echo "ERROR: service $service doesn't exist"
+    exit $STATE_CRITICAL
+fi
+
+if [[ $r -ne 0 ]]; then
+    echo "ERROR: service $service is $status"
+    exit $STATE_CRITICAL
+fi
+
+
+systemctl --quiet is-active $service
+if [[ $? -ne 0 ]]; then
+    echo "ERROR: service $service is not running"
+    exit $STATE_CRITICAL
+fi
+
+echo "OK: service $service is running"
+exit $STATE_OK
--- a/tasks/main.yml
+++ b/tasks/main.yml
@ -0,0 +1,3 @@
+---
+- name: "monitoring | install nrpe"
+  include: nrpe.yml
--- a/tasks/nrpe.yml
+++ b/tasks/nrpe.yml
@ -0,0 +1,42 @@
+---
+- name: nrpe | apt update cache
+  apt:
+    update_cache: yes
+    cache_valid_time: 86400 #One day
+
+- name: nrpe | install nrpe packages
+  apt:
+    name: "{{ item }}"
+    update_cache: true
+    state: present
+  with_items:
+    - nagios-nrpe-server
+    - libmonitoring-plugin-perl
+    - monitoring-plugins-standard
+    - libdbd-mysql-perl
+
+- name: nrpe | copy nrpe configuration
+  template:
+    src: "nrpe.j2"
+    dest: "/etc/nagios/nrpe.d/brainsys.cfg"
+    mode: "0644"
+    force: yes
+    backup: yes
+
+- name: nrpe | copy nrpe plugins
+  copy:
+    src: nrpe/
+    dest: /usr/lib/nagios/plugins
+    mode: 0755
+
+- name: nrpe | restart nagios-nrpe-server
+  systemd:
+    state: restarted
+    name: nagios-nrpe-server
+
+- name: nrpe | allow nagios user to specific sudo
+  template:
+    src: nrpe.sudoers.j2
+    dest: /etc/sudoers.d/nrpe
+    validate: 'visudo -cf %s'
+    mode: 0440
--- a/templates/nrpe.j2
+++ b/templates/nrpe.j2
@ -0,0 +1,51 @@
+allowed_hosts={{ nrpe_allowed_hosts }}
+dont_blame_nrpe=1
+
+command[check_load]=/usr/lib/nagios/plugins/check_load -w {{ nrpe_load_warning }} -c {{ nrpe_load_critical }}
+command[check_memory]=/usr/lib/nagios/plugins/check_memory -w {{ nrpe_memory_warning }} -c {{ nrpe_memory_critical }} -W {{ nrpe_swap_warning }} -C {{ nrpe_swap_critical }}
+command[check_mailq]=/usr/bin/sudo /usr/lib/nagios/plugins/check_postfix_mailqueue -w {{ nrpe_postfix_warning }} -c {{ nrpe_postfix_critical }}
+command[check_smtp]=/usr/lib/nagios/plugins/check_tcp -p 25
+command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
+command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 500 -c 800
+command[check_process]=/usr/lib/nagios/plugins/check_process
+command[check_dns]=/usr/lib/nagios/plugins/check_dns -H google.com
+command[check_ssl]=/usr/lib/nagios/plugins/check_http --sni 'www.brainsys.io' -C 14,3
+command[check_eth]=/usr/lib/nagios/plugins/check_eth -i {{ ansible_default_ipv4.interface }} -w {{ nrpe_eth_warning }} -c {{ nrpe_eth_critical }}
+command[check_proc_fail2ban]=/usr/lib/nagios/plugins/check_procs -a fail2ban -w 1: -c 1:
+command[check_proc_age]=/usr/lib/nagios/plugins/check_proc_age -p <proc> -w 400 -c 600
+
+# disk
+# -w space warning / -c space critical / -W inode warning / -K inode criticak / -C reset after
+command[check_disk_advanced]=/usr/lib/nagios/plugins/check_disk_advanced -x /lib/init/rw -x /sys -x /dev/shm -X tmpfs -X nsfs -X proc -X sysfs -X devtmpfs -X overlay -X tracefs -w 10% -c 3% -W 10% -K 3% -H
+command[check_disk_root]=/usr/lib/nagios/plugins/check_disk -w 30% -W 30% -c 10% -K 10% -p /
+command[check_rw_root]=/usr/lib/nagios/plugins/check_rofs /
+command[check_disk_data]=/usr/lib/nagios/plugins/check_disk -w 30% -W 30% -c 10% -K 10% -p /data
+command[check_rw_data]=/usr/lib/nagios/plugins/check_rofs /data
+
+# mysql
+command[check_mysql]=/usr/lib/nagios/plugins/check_mysql -u nagios -pBu[VetFeifoipVithlok2odHabrAiltAjHavciUjRi -d mysql -H 127.0.0.1
+command[check_mysql_longqueries]=/usr/lib/nagios/plugins/check_mysql_longqueries -u nagios -pBu[VetFeifoipVithlok2odHabrAiltAjHavciUjRi -H 127.0.0.1 -w 600 -c 1200
+
+# postgresql
+command[check_pgsql_port]=/usr/lib/nagios/plugins/check_tcp -p 5432
+command[check_pgsql_connection]=/usr/lib/nagios/plugins/check_postgresql -H 127.0.0.1 -p 5432 --dbuser=nagios --dbpass=uDUTHt14FC3w4cE9vRk4XyZFD3KWlx --action=connection
+command[check_pgsql_backends]=/usr/lib/nagios/plugins/check_postgresql -H 127.0.0.1 -p 5432 --dbuser=nagios --dbpass=uDUTHt14FC3w4cE9vRk4XyZFD3KWlx --action=backends -w 175 -c 190
+
+# raid
+command[check_mdadm]=/usr/lib/nagios/plugins/check_mdadm
+command[check_3ware]=/usr/bin/sudo /usr/lib/nagios/plugins/check_3ware
+
+# services
+command[check_proc_docker]=/usr/lib/nagios/plugins/check_systemd_service docker
+command[check_proc_haproxy]=/usr/lib/nagios/plugins/check_systemd_service haproxy
+command[check_proc_nginx]=/usr/lib/nagios/plugins/check_systemd_service nginx
+command[check_proc_php5.6]=/usr/lib/nagios/plugins/check_systemd_service php5.6-fpm
+command[check_proc_php7.0]=/usr/lib/nagios/plugins/check_systemd_service php7.0-fpm
+command[check_proc_php7.1]=/usr/lib/nagios/plugins/check_systemd_service php7.1-fpm
+command[check_proc_php7.2]=/usr/lib/nagios/plugins/check_systemd_service php7.2-fpm
+command[check_proc_php7.3]=/usr/lib/nagios/plugins/check_systemd_service php7.3-fpm
+command[check_proc_php7.4]=/usr/lib/nagios/plugins/check_systemd_service php7.4-fpm
+command[check_proc_php8.0]=/usr/lib/nagios/plugins/check_systemd_service php8.0-fpm
+command[check_proc_php8.1]=/usr/lib/nagios/plugins/check_systemd_service php8.1-fpm
+command[check_proc_mysql]=/usr/lib/nagios/plugins/check_systemd_service mysql
+command[check_proc_postgresql]=/usr/lib/nagios/plugins/check_systemd_service postgresql
--- a/templates/nrpe.sudoers.j2
+++ b/templates/nrpe.sudoers.j2
@ -0,0 +1,2 @@
+nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_postfix_mailqueue -w {{ nrpe_postfix_warning }} -c {{ nrpe_postfix_critical }}
+nagios  ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/check_exim_mailqueue -w {{ nrpe_exim_warning }} -c {{ nrpe_exim_critical }}