Table des matières

, , , , , , ,

Supervision - sonde Nagios - Mémoire Linux et CPU

Voir :

Voir SNMP :

Voir aussi :

Sonde Nagios supervision de la RAM & CPU via SNMPv3

Voir :

Voir aussi :

check_snmp_mem_cpu.sh

#! /bin/bash
 
# Creative Commons CC0 Public Domain Licence
 
set -euo pipefail
IFS=$' \t\n'
export LC_ALL=C
 
SCRIPT_NAME=$(basename "$0")
 
SNMP_HOST=localhost # DEFAULT
#SNMP_PASSWD='P@ssw0rd'  # YOU SHOULD USE ~/.snmp/snmp.conf FOR CREDS INSTEAD OF ARGUMENT PARAM
#SNMP_USER=nagios
 
# Pour iowait
declare -i INTERVAL
INTERVAL=5
 
exec 6>&1 # Link file descriptor #6 with stdout. Saves stdout.
 
# FIX IGNORED ~/.snmp/snmp.conf WHEN 'sudo -u nagios check_snmp_host.sh'
HOME=$(getent passwd "$(whoami)" | cut -d: -f6)
 
# DEFAULT VALUES
DEFAULT_MEM_WARNING_THRESHOLD=80
DEFAULT_MEM_CRITICAL_THRESHOLD=90
DEFAULT_DISK_WARNING_THRESHOLD=80
DEFAULT_DISK_CRITICAL_THRESHOLD=90
DEFAULT_CPU_WARNING_THRESHOLD=95
DEFAULT_CPU_CRITICAL_THRESHOLD=99
DEFAULT_SWAP_WARNING_THRESHOLD=80
DEFAULT_SWAP_CRITICAL_THRESHOLD=90
DEFAULT_IOWAIT_WARNING_THRESHOLD=20
DEFAULT_IOWAIT_CRITICAL_THRESHOLD=30
DEFAULT_USERS_WARNING_THRESHOLD=100
DEFAULT_USERS_CRITICAL_THRESHOLD=200
DEFAULT_PROCS_WARNING_THRESHOLD=600
DEFAULT_PROCS_CRITICAL_THRESHOLD=1000
DEFAULT_LOAD_WARNING_THRESHOLD=100,100,100
DEFAULT_LOAD_CRITICAL_THRESHOLD=200,200,200
PART_EXCLUDE=".*cdrom.*"
 
OID_STORAGE_DESC=.1.3.6.1.2.1.25.2.3.1.3
OID_STORAGE_SIZE=.1.3.6.1.2.1.25.2.3.1.5
OID_STORAGE_USED=.1.3.6.1.2.1.25.2.3.1.6
OID_CPU_PERCENT_USED=.1.3.6.1.2.1.25.3.3.1.2
OID_SWAP_TOTAL=.1.3.6.1.4.1.2021.4.3.0
OID_SWAP_FREE=.1.3.6.1.4.1.2021.4.4.0
#OID_SWAP_OUT=.1.3.6.1.4.1.2021.11.4
OID_LOADAVG=1.3.6.1.4.1.2021.10.1.5
OID_CPU_IOWAIT=.1.3.6.1.4.1.2021.11.54.0
OID_USERS_COUNT=.1.3.6.1.2.1.25.1.5.0
OID_PROCS_COUNT=.1.3.6.1.2.1.25.4.2.1.2
 
exit_malfunction() {
	exec 1>&6 6>&- # Restore stdout and close file descriptor #6
	echo "${LABEL:-UNDEF} UNKNOWN - ERROR $*"
	exit 3
}
 
# NAGIOS EXIT STATUS
# STATE_OK=0 ; STATE_WARNING=1 ; STATE_CRITICAL=2 ; STATE_UNKNOWN=3; STATE_DEPENDENT=4
STATUS_MSG=UNKNOWN
typeset -i STATUS_CODE=3
trap 'if [ $STATUS_CODE -eq 3 ] && [ $? -ne 0 ] ; then
  exit_malfunction "Unexpected error"
fi' EXIT ERR
 
command_exists() {
	command -v "$1" >/dev/null 2>&1 || (
		exit_malfunction "I require $1 but it's not installed. Aborting."
	)
}
 
command_exists snmpget
command_exists snmpwalk
 
calc() { awk "BEGIN { print $* }"; }
 
get_nproc() {
	snmpwalk -r 2 -v 3 "$SNMP_HOST" "$OID_CPU_PERCENT_USED" | wc -l || exit_malfunction "SNMP failed"
}
 
get_mem() {
	local -i USED
	local -i SIZE
	local -i i
	local -i MEM_PHYS_USED
	local -i MEM_PHYS_SIZE
	local -i MEM_BUFF_USED
	local -i MEM_CACH_USED
	local -i MEM_TOTA_USED
	local REAL_MEM_PHYS_USED_PERCENT
 
	for i in {10..1}; do
		# DESC="$(snmpget -r 2 -v 3 -a MD5 -A "$SNMP_PASSWD" -l authNoPriv -u "$SNMP_USER" "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction
		DESC="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
 
		if echo "$DESC" | grep -q -i -e "memory"; then
			USED="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_USED}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
			SIZE="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_SIZE}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
		fi
 
		if echo "$DESC" | grep -q -i -e "Physical memory"; then
			MEM_PHYS_USED="$USED"
			MEM_PHYS_SIZE="$SIZE"
		elif echo "$DESC" | grep -q -i -e "Memory buffers"; then
			MEM_BUFF_USED="$USED"
		elif echo "$DESC" | grep -q -i -e "Cached memory"; then
			MEM_CACH_USED="$USED"
		fi
	done
 
	# BECAUSE FREE MEMORY DONT MEAN AVAILABLE MEMORY
	MEM_TOTA_USED=$((MEM_PHYS_USED - MEM_BUFF_USED - MEM_CACH_USED))
	REAL_MEM_PHYS_USED_PERCENT=$(calc $MEM_TOTA_USED \* 100/"$MEM_PHYS_SIZE")
	REAL_MEM_PHYS_USED_PERCENT=$(printf "%.2f" "$REAL_MEM_PHYS_USED_PERCENT")
 
	if [ "${REAL_MEM_PHYS_USED_PERCENT%.*}" -ge "${MEM_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${REAL_MEM_PHYS_USED_PERCENT%.*}" -ge "${MEM_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - ${REAL_MEM_PHYS_USED_PERCENT}% used ($((MEM_TOTA_USED / 1024)) MB / $((MEM_PHYS_SIZE / 1024)) MB)|TOTAL=$((MEM_PHYS_SIZE / 1024));USED=$((MEM_PHYS_USED / 1024));CACHE=$((MEM_CACH_USED / 1024));BUFFER=$((MEM_BUFF_USED / 1024))"
 
}
 
get_disk() {
	local -i PART_USED
	local -i PART_SIZE
	local LISTE
	local REGEX_PART
	local REGEX_EXCLUDE
	LISTE=$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "$OID_STORAGE_DESC" | cut -d'=' -f1 | tr -d ' ' | grep -o '[0-9]\+$') || exit_malfunction "SNMP failed"
 
	if [ "${PART:-}" = "" ]; then
		REGEX_PART='^/'
		REGEX_EXCLUDE="^${PART_EXCLUDE}$"
	else
		REGEX_PART="^${PART%/}/?$"
		REGEX_EXCLUDE='^$'
	fi
	for i in $LISTE; do
		DESC="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2 | xargs)" || exit_malfunction "SNMP failed"
 
		if echo "$DESC" | grep -E -v -i -e "$REGEX_EXCLUDE" | grep -q -E -e "$REGEX_PART"; then
			PART_USED="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_USED}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
			PART_SIZE="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_SIZE}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
			PART_USED_PERCENT=$(calc $PART_USED \* 100/"$PART_SIZE")
			PART_USED_PERCENT=$(printf "%.2f" "$PART_USED_PERCENT")
 
			if [ "${PART_USED_PERCENT%.*}" -ge "${DISK_CRITICAL_THRESHOLD}" ]; then
				STATUS_MSG="CRITICAL"
				STATUS_CODE=2
			elif [ "${PART_USED_PERCENT%.*}" -ge "${DISK_WARNING_THRESHOLD}" ]; then
				STATUS_MSG="WARNING"
				STATUS_CODE=1
			else
				STATUS_MSG="OK"
				STATUS_CODE=0
			fi
 
			if [ "${PART:-}" == "" ]; then
				if [ "$STATUS_CODE" -ne 0 ]; then
					echo "$LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}"
					break
				else
					echo "DEBUG: $LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}" >&2
				fi
			else
				echo "$LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}"
			fi
		fi
	done
	if [ "${PART_USED_PERCENT:-}" = "" ]; then
		exit_malfunction "PART: ${PART:-} not found"
	fi
	if [ "${PART:-}" == "" ] && [ "$STATUS_CODE" -eq 0 ]; then
		echo "$LABEL $STATUS_MSG - DISK ALL OK"
	fi
 
}
 
get_cpu() {
	local -i MAX_CPU_PERCENT
 
	MAX_CPU_PERCENT="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_CPU_PERCENT_USED}" | cut -d= -f2 | cut -d: -f2 | sort -n | tail -1)" || exit_malfunction "SNMP failed"
 
	if [ "${MAX_CPU_PERCENT}" -ge "${CPU_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${MAX_CPU_PERCENT}" -ge "${CPU_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - MAX CORE USAGE: ${MAX_CPU_PERCENT}%|MAX_CORE_USAGE_PERCENT=${MAX_CPU_PERCENT}"
}
 
get_swap() {
	local -i SWAP_TOTAL
	local -i SWAP_FREE
	local SWAP_USED_PERCENT
 
	SWAP_TOTAL="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_SWAP_TOTAL}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
	SWAP_FREE="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_SWAP_FREE}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
	SWAP_USED=$((SWAP_TOTAL - SWAP_FREE))
	SWAP_USED_PERCENT=$(calc $SWAP_USED \* 100/"$SWAP_TOTAL")
	SWAP_USED_PERCENT=$(printf "%.2f" "$SWAP_USED_PERCENT")
 
	if [ "${SWAP_USED_PERCENT%.*}" -ge "${SWAP_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${SWAP_USED_PERCENT%.*}" -ge "${SWAP_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - ${SWAP_USED_PERCENT}% used ( $((SWAP_USED / 1024)) MB / $((SWAP_TOTAL / 1024)) MB)|SWAP_TOTAL=${SWAP_TOTAL};SWAP_FREE=${SWAP_FREE}"
}
 
get_iowait() {
	declare -i IOWAIT_FIRST
	declare -i IOWAIT_SECOND
 
	IOWAIT_FIRST=$(snmpget -Oqv -v3 "$SNMP_HOST" "$OID_CPU_IOWAIT") || exit_malfunction "SNMP failed"
	sleep $INTERVAL
	IOWAIT_SECOND=$(snmpget -Oqv -v3 "$SNMP_HOST" "$OID_CPU_IOWAIT") || exit_malfunction "SNMP failed"
 
	IOWAIT_PERCENT=$(calc $((IOWAIT_SECOND - IOWAIT_FIRST)) / "$(get_nproc)" / $INTERVAL)
	IOWAIT_PERCENT=$(printf "%.2f" "$IOWAIT_PERCENT")
 
	if [ "${IOWAIT_PERCENT%.*}" -ge "${IOWAIT_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${IOWAIT_PERCENT%.*}" -ge "${IOWAIT_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - ${IOWAIT_PERCENT}%|IOWAIT_PERCENT=${IOWAIT_PERCENT}"
}
 
get_users_count() {
	declare -i USERS_COUNT
 
	USERS_COUNT="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_USERS_COUNT}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed"
 
	if [ "${USERS_COUNT}" -ge "${USERS_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${USERS_COUNT}" -ge "${USERS_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - ${USERS_COUNT} users currently logged in|users=${USERS_COUNT}"
}
 
get_procs() {
	declare -i PROCS_COUNT
 
	PROCS_COUNT=$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_PROCS_COUNT}" | wc -l) || exit_malfunction "SNMP failed"
 
	if [ "${PROCS_COUNT}" -ge "${PROCS_CRITICAL_THRESHOLD}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${PROCS_COUNT}" -ge "${PROCS_WARNING_THRESHOLD}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - ${PROCS_COUNT} processes|processes=${PROCS_COUNT}"
}
 
get_load() {
	local LOADAVG_1M_PERCENT
	local LOADAVG_5M_PERCENT
	local LOADAVG_15M_PERCENT
 
	LOADAVG_1M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.1" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed"
	LOADAVG_5M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.2" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed"
	LOADAVG_15M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.3" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed"
 
	LOADAVG_1M_PERCENT=$(calc $((LOADAVG_1M_PERCENT / $(get_nproc))))
	LOADAVG_5M_PERCENT=$(calc $((LOADAVG_5M_PERCENT / $(get_nproc))))
	LOADAVG_15M_PERCENT=$(calc $((LOADAVG_15M_PERCENT / $(get_nproc))))
 
	local -i LOAD_WARN_1M
	local -i LOAD_WARN_5M
	local -i LOAD_WARN_15M
	local -i LOAD_CRIT_1M
	local -i LOAD_CRIT_5M
	local -i LOAD_CRIT_15M
	IFS=',' read -r LOAD_WARN_1M LOAD_WARN_5M LOAD_WARN_15M <<<"$LOAD_WARNING_THRESHOLD"
	IFS=',' read -r LOAD_CRIT_1M LOAD_CRIT_5M LOAD_CRIT_15M <<<"$LOAD_CRITICAL_THRESHOLD"
 
	if [ "${LOADAVG_1M_PERCENT%.*}" -ge "${LOAD_CRIT_1M}" ] || [ "${LOADAVG_5M_PERCENT%.*}" -ge "${LOAD_CRIT_5M}" ] || [ "${LOADAVG_15M_PERCENT%.*}" -ge "${LOAD_CRIT_15M}" ]; then
		STATUS_MSG="CRITICAL"
		STATUS_CODE=2
	elif [ "${LOADAVG_1M_PERCENT%.*}" -ge "${LOAD_WARN_1M}" ] || [ "${LOADAVG_5M_PERCENT%.*}" -ge "${LOAD_WARN_5M}" ] || [ "${LOADAVG_15M_PERCENT%.*}" -ge "${LOAD_WARN_15M}" ]; then
		STATUS_MSG="WARNING"
		STATUS_CODE=1
	else
		STATUS_MSG="OK"
		STATUS_CODE=0
	fi
 
	echo "$LABEL $STATUS_MSG - LOAD AVERAGE: ${LOADAVG_1M_PERCENT}%, ${LOADAVG_5M_PERCENT}%, ${LOADAVG_15M_PERCENT}%|LOAD1=$LOADAVG_1M_PERCENT;LOAD5=$LOADAVG_5M_PERCENT;LOAD15=$LOADAVG_15M_PERCENT"
}
 
usage() {
	cat >&2 <<EOF
Usage: $SCRIPT_NAME -t <mem|disk|cpu|swap|iowait|users|procs> [ -H <destination> ] [ -w <warnlevel> ] [ -c <critlevel> ]
       $SCRIPT_NAME -t disk                                   [ -H <destination> ] [ -w <warnlevel> ] [ -c <critlevel> ] [ -p <mount_point> ]
       $SCRIPT_NAME -t disk                                   [ -H <destination> ] [ -w <warnlevel> ] [ -c <critlevel> ] [ -e <exclude_mount_point_regex> ]
 
Examples: 
  $SCRIPT_NAME -t disk -p /var/lib
  $SCRIPT_NAME -t disk -e '/mnt/.*'
  $SCRIPT_NAME -t load -w 110,105,100 -c 160,130,100
 
EOF
}
 
main() {
	if [ "${TYPE:-}" = 'mem' ]; then
		LABEL=MEMORY
		typeset -i MEM_CRITICAL_THRESHOLD
		typeset -i MEM_WARNING_THRESHOLD
		MEM_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_MEM_CRITICAL_THRESHOLD}
		MEM_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_MEM_WARNING_THRESHOLD}
		get_mem
	elif [ "${TYPE:-}" = 'disk' ]; then
		LABEL=DISK
		typeset -i DISK_CRITICAL_THRESHOLD
		typeset -i DISK_WARNING_THRESHOLD
		DISK_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_DISK_CRITICAL_THRESHOLD}
		DISK_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_DISK_WARNING_THRESHOLD}
		get_disk
	elif [ "${TYPE:-}" = 'cpu' ]; then
		LABEL=CPU
		typeset -i CPU_CRITICAL_THRESHOLD
		typeset -i CPU_WARNING_THRESHOLD
		CPU_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_CPU_CRITICAL_THRESHOLD}
		CPU_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_CPU_WARNING_THRESHOLD}
		get_cpu
	elif [ "${TYPE:-}" = 'swap' ]; then
		LABEL=SWAP
		typeset -i SWAP_CRITICAL_THRESHOLD
		typeset -i SWAP_WARNING_THRESHOLD
		SWAP_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_SWAP_CRITICAL_THRESHOLD}
		SWAP_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_SWAP_WARNING_THRESHOLD}
		get_swap
	elif [ "${TYPE:-}" = 'iowait' ]; then
		LABEL=IOWAIT
		typeset -i IOWAIT_CRITICAL_THRESHOLD
		typeset -i IOWAIT_WARNING_THRESHOLD
		IOWAIT_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_IOWAIT_CRITICAL_THRESHOLD}
		IOWAIT_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_IOWAIT_WARNING_THRESHOLD}
		get_iowait
	elif [ "${TYPE:-}" = 'users' ]; then
		LABEL=USERS
		typeset -i USERS_CRITICAL_THRESHOLD
		typeset -i USERS_WARNING_THRESHOLD
		USERS_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_USERS_CRITICAL_THRESHOLD}
		USERS_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_USERS_WARNING_THRESHOLD}
		get_users_count
	elif [ "${TYPE:-}" = 'procs' ]; then
		LABEL=PROCS
		typeset -i PROCS_CRITICAL_THRESHOLD
		typeset -i PROCS_WARNING_THRESHOLD
		PROCS_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_PROCS_CRITICAL_THRESHOLD}
		PROCS_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_PROCS_WARNING_THRESHOLD}
		get_procs
	elif [ "${TYPE:-}" = 'load' ]; then
		LABEL=LOAD
		LOAD_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_LOAD_CRITICAL_THRESHOLD}
		LOAD_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_LOAD_WARNING_THRESHOLD}
		get_load
	else
		LABEL=UNDEF
		usage
		exit 3
	fi
}
 
while [ "${1-}" != "" ]; do
	case $1 in
	-h | --help)
		usage
		exit 0
		;;
	-t | --type)
		shift
		TYPE=$1
		;;
	-c | --critical)
		shift
		CRITICAL_THRESHOLD=$1
		;;
	-w | --warning)
		shift
		WARNING_THRESHOLD=$1
		;;
	-H | --hostname)
		shift
		SNMP_HOST=$1
		;;
	-p | --partition)
		shift
		PART=$1
		;;
	-e | --exclude-partition)
		shift
		PART_EXCLUDE=$1
		;;
	--) # End of all options
		shift
		break
		;;
	-*)
		echo "$SCRIPT_NAME: invalid option" >&2
		echo "Try '$SCRIPT_NAME --help' for more information." >&2
		exit 1
		;;
	*)
		usage
		;;
	esac
	shift
done
 
main
exit $STATUS_CODE

Conf SNMPv3 pour tester ce script :

/etc/snmp/snmpd.conf

skipNFSInHostResources 1
 
# rwuser: a SNMPv3 read-write user
#   arguments:  user [noauth|auth|priv] [restriction_oid]
 
rwuser nagios auth
rwuser nagios

/usr/share/snmp/snmpd.conf

rwuser nagios auth
createUser nagios MD5 P@ssw0rd

Pb Timeout snmpget

On peut avoir des problèmes de Timeout SNMP. Pour pallier à ce problème il peut être utile de mettre skipNFSInHostResources 1 dans /etc/snmp/snmpd.conf

Sonde Nagios supervision de la RAM en local via la commande free

Source : https://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem_ng-2Esh/details

check_mem_ng.sh

#!/bin/bash
################################################################################
#This plugin is loosely inspired by check_mem v1.1 from Lukasz Gogolin
#https://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem-2Esh/details
#I did a code cleanup and added a check of "free" version, because since
#procps-ng 3.3.0 (RHEL 7+) check_mem.sh doesn't work anymore (buff/cache column)
################################################################################
# 1.1 - Added -l flag and LEGACY_PERFATA var to put legacy perfdata mode
################################################################################
#Nagios Constants
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
SCRIPTPATH=`echo $0 | /bin/sed -e 's,[\\/][^\\/][^\\/]*$,,'`
if [[ -f ${SCRIPTPATH}/utils.sh ]]; then
        . ${SCRIPTPATH}/utils.sh # use nagios utils to set real STATE_* return values
fi
 
#Useful functions
printversion(){
        echo "$0 $VERSION"
        echo
}
 
printusage() {
        printversion
        echo "Usage:"
        echo "  check_mem_ng.sh [-w <warnlevel>] [-c <critlevel>] [-v] [-l]"
        echo "    checks local host available memory"
        echo "    warnlevel and critlevel is percentage value without %"
        echo "    defaults being respectively 80 et 90"
        echo "    add -v for verbose (debuging purpose)"
        echo "    add -l for legacy perfdata mode (or change LEGACY_PERFDATA variable in script)"
        echo "  check_mem_ng.sh -V"
        echo "    prints version"
        echo "  check_mem_ng.sh -h"
        echo "    prints help (this message)"
}
 
 
printvariables() {
        echo "Variables:"
        #Add all your variables at the en of the "for" line to display them in verbose
        for i in WARNING_THRESHOLD CRITICAL_THRESHOLD FINAL_STATE FINAL_COMMENT LEGACY_PERFDATA FREE_OUTPUT TOTAL_MEM FREE_MEM BUFFCACHE_MEM BUFF_MEM CACHE_MEM USED_MEM TOTAL_MEM_MB USED_MEM_MB WARNING_THRESHOLD_B CRITICAL_THRESHOLD_B USED_MEM_PRC ENABLE_PERFDATA VERSION
        do
                echo -n "$i : "
                eval echo \$${i}
        done
        echo
}
 
#Set to unknown in case of unplaned exit
FINAL_STATE=$STATE_UNKNOWN
FINAL_COMMENT="UNKNOWN: Unplaned exit. You should check that everything is alright"
 
#Default values
WARNING_THRESHOLD=80
CRITICAL_THRESHOLD=90
ENABLE_PERFDATA=1
VERSION="1.1"
VERBOSE=0
 
#####FORCE LEGACY MODE#####
#put 1 to force legacy perfdata mode without using "-l" flag (no configuration change in nrpe.cfg)
LEGACY_PERFDATA=0
#####FORCE LEGACY MODE#####
 
#Process arguments
while getopts ":c:hlvVw:" opt; do
        case $opt in
                c)
                        CRITICAL_THRESHOLD=$OPTARG
                        ;;
                h)
                        printusage
                        exit $STATE_OK
                        ;;
                l)
                        LEGACY_PERFDATA=1
                        ;;
                v)
                        echo "Verbose mode ON"
                        echo
                        VERBOSE=1
                        ;;
                V)
                        printversion
                        exit $STATE_UNKNOWN
                        ;;
                w)
                        WARNING_THRESHOLD=$OPTARG
                        ;;
                \?)
                        echo "UNKNOWN: Invalid option: -$OPTARG"
                        exit $STATE_UNKNOWN
                        ;;
                :)
                        echo "UNKNOWN: Option -$OPTARG requires an argument."
                        exit $STATE_UNKNOWN
                        ;;
        esac
done
 
#Real check begins here
FREE_OUTPUT=`free -b | grep Mem:`
TOTAL_MEM=`echo $FREE_OUTPUT |awk '{print $2}'`
FREE_MEM=`echo $FREE_OUTPUT |awk '{print $4}'`
if [ `free -V | grep procps-ng | wc -l` -eq 1  ]; then
        #procps-ng, free will display buff/cache as one column
        BUFFCACHE_MEM=`echo $FREE_OUTPUT |awk '{print $6}'`
        if [ $LEGACY_PERFDATA -eq 1 ] ; then
                #Get BUFFER and CACHE separatly for legacy output
                FREE_OUTPUT2=`free -b | grep Mem:`
                BUFF_MEM=`echo $FREE_OUTPUT2 |awk '{print $6}'`
                CACHE_MEM=`echo $FREE_OUTPUT2 |awk '{print $7}'`
        fi
else
        #procps, free will display buff/cache as two separate columns
        BUFF_MEM=`echo $FREE_OUTPUT |awk '{print $6}'`
        CACHE_MEM=`echo $FREE_OUTPUT |awk '{print $7}'`
        BUFFCACHE_MEM=$(($BUFF_MEM+$CACHE_MEM))
fi
 
#Deduce used memory from total/free/buffer+cache
USED_MEM=$(($TOTAL_MEM-$FREE_MEM-$BUFFCACHE_MEM))
 
#Convert in MB for pseudo "human readable" plugin output. Todo, real human readable?
TOTAL_MEM_MB=`echo "$TOTAL_MEM / 1048576" | bc`
USED_MEM_MB=`echo "$USED_MEM / 1048576" | bc`
WARNING_THRESHOLD_B=`echo "$WARNING_THRESHOLD * $TOTAL_MEM / 100" | bc`
CRITICAL_THRESHOLD_B=`echo "$CRITICAL_THRESHOLD * $TOTAL_MEM / 100" | bc`
 
#Convert in percents used memory
USED_MEM_PRC=$((($USED_MEM*100)/$TOTAL_MEM))
 
#Check if available memory is below thresholds
if [ "$USED_MEM_PRC" -ge "$CRITICAL_THRESHOLD" ]; then
        FINAL_STATE=$STATE_CRITICAL
        FINAL_COMMENT="CRITICAL: Memory above critical threshold. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used!"
elif [ "$USED_MEM_PRC" -ge "$WARNING_THRESHOLD" ]; then
        FINAL_STATE=$STATE_WARNING
        FINAL_COMMENT="WARNING: Memory above warning threshold. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used!"
else
        FINAL_STATE=$STATE_OK
        FINAL_COMMENT="OK: Memory below thresholds. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used."
fi
 
#Perfdata processing
if [ $ENABLE_PERFDATA -eq 1 ] ; then
        #Added check_mem.sh legacy perfdata mode, to ease migration
        if [ $LEGACY_PERFDATA -eq 1 ] ; then
                PERFDATA=" | TOTAL=$TOTAL_MEM;;;; USED=$USED_MEM;;;; CACHE=$CACHE_MEM;;;; BUFFER=$BUFF_MEM;;;;"
        else
                PERFDATA=" | Memory_Used=${USED_MEM}B;$WARNING_THRESHOLD_B;$CRITICAL_THRESHOLD_B;0;$TOTAL_MEM;"
        fi
fi
 
#Script end, display verbose information
if [[ $VERBOSE -eq 1 ]] ; then
        printvariables
fi
 
echo ${FINAL_COMMENT}${PERFDATA}
exit $FINAL_STATE