{{tag>Brouillon Mémoire SNMP Nagios Supervision Script Bash CPU}} # Supervision - sonde Nagios - Mémoire Linux et CPU Voir : * [[Mémoire Linux]] Voir SNMP : * http://www.net-snmp.org/docs/mibs/host.html * https://mibs.observium.org/mib/HOST-RESOURCES-MIB/ * https://docs.centreon.com/fr/pp/integrations/plugin-packs/procedures/operatingsystems-linux-snmp/ Voir aussi : * https://packages.debian.org/sid/all/nagios-snmp-plugins/filelist * https://wiki.monitoring-fr.org/nagios/plugins/check_multi.html ## Sonde Nagios supervision de la RAM & CPU via SNMPv3 Voir : * [[Notes supervision Nagios|Exemple de conf Nagios pour check_snmp_mem.sh]] Voir aussi : * https://github.com/ynlamy/check_snmp_memory/blob/master/check_snmp_memory ''check_snmp_mem_cpu.sh'' ~~~bash #! /bin/bash # Creative Commons CC0 Public Domain Licence set -euo pipefail IFS=$' \t\n' export LC_ALL=C SCRIPT_NAME=$(basename "$0") SNMP_HOST=localhost # DEFAULT #SNMP_PASSWD='P@ssw0rd' # YOU SHOULD USE ~/.snmp/snmp.conf FOR CREDS INSTEAD OF ARGUMENT PARAM #SNMP_USER=nagios # Pour iowait declare -i INTERVAL INTERVAL=5 exec 6>&1 # Link file descriptor #6 with stdout. Saves stdout. # FIX IGNORED ~/.snmp/snmp.conf WHEN 'sudo -u nagios check_snmp_host.sh' HOME=$(getent passwd "$(whoami)" | cut -d: -f6) # DEFAULT VALUES DEFAULT_MEM_WARNING_THRESHOLD=80 DEFAULT_MEM_CRITICAL_THRESHOLD=90 DEFAULT_DISK_WARNING_THRESHOLD=80 DEFAULT_DISK_CRITICAL_THRESHOLD=90 DEFAULT_CPU_WARNING_THRESHOLD=95 DEFAULT_CPU_CRITICAL_THRESHOLD=99 DEFAULT_SWAP_WARNING_THRESHOLD=80 DEFAULT_SWAP_CRITICAL_THRESHOLD=90 DEFAULT_IOWAIT_WARNING_THRESHOLD=20 DEFAULT_IOWAIT_CRITICAL_THRESHOLD=30 DEFAULT_USERS_WARNING_THRESHOLD=100 DEFAULT_USERS_CRITICAL_THRESHOLD=200 DEFAULT_PROCS_WARNING_THRESHOLD=600 DEFAULT_PROCS_CRITICAL_THRESHOLD=1000 DEFAULT_LOAD_WARNING_THRESHOLD=100,100,100 DEFAULT_LOAD_CRITICAL_THRESHOLD=200,200,200 PART_EXCLUDE=".*cdrom.*" OID_STORAGE_DESC=.1.3.6.1.2.1.25.2.3.1.3 OID_STORAGE_SIZE=.1.3.6.1.2.1.25.2.3.1.5 OID_STORAGE_USED=.1.3.6.1.2.1.25.2.3.1.6 OID_CPU_PERCENT_USED=.1.3.6.1.2.1.25.3.3.1.2 OID_SWAP_TOTAL=.1.3.6.1.4.1.2021.4.3.0 OID_SWAP_FREE=.1.3.6.1.4.1.2021.4.4.0 #OID_SWAP_OUT=.1.3.6.1.4.1.2021.11.4 OID_LOADAVG=1.3.6.1.4.1.2021.10.1.5 OID_CPU_IOWAIT=.1.3.6.1.4.1.2021.11.54.0 OID_USERS_COUNT=.1.3.6.1.2.1.25.1.5.0 OID_PROCS_COUNT=.1.3.6.1.2.1.25.4.2.1.2 exit_malfunction() { exec 1>&6 6>&- # Restore stdout and close file descriptor #6 echo "${LABEL:-UNDEF} UNKNOWN - ERROR $*" exit 3 } # NAGIOS EXIT STATUS # STATE_OK=0 ; STATE_WARNING=1 ; STATE_CRITICAL=2 ; STATE_UNKNOWN=3; STATE_DEPENDENT=4 STATUS_MSG=UNKNOWN typeset -i STATUS_CODE=3 trap 'if [ $STATUS_CODE -eq 3 ] && [ $? -ne 0 ] ; then exit_malfunction "Unexpected error" fi' EXIT ERR command_exists() { command -v "$1" >/dev/null 2>&1 || ( exit_malfunction "I require $1 but it's not installed. Aborting." ) } command_exists snmpget command_exists snmpwalk calc() { awk "BEGIN { print $* }"; } get_nproc() { snmpwalk -r 2 -v 3 "$SNMP_HOST" "$OID_CPU_PERCENT_USED" | wc -l || exit_malfunction "SNMP failed" } get_mem() { local -i USED local -i SIZE local -i i local -i MEM_PHYS_USED local -i MEM_PHYS_SIZE local -i MEM_BUFF_USED local -i MEM_CACH_USED local -i MEM_TOTA_USED local REAL_MEM_PHYS_USED_PERCENT for i in {10..1}; do # DESC="$(snmpget -r 2 -v 3 -a MD5 -A "$SNMP_PASSWD" -l authNoPriv -u "$SNMP_USER" "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction DESC="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" if echo "$DESC" | grep -q -i -e "memory"; then USED="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_USED}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" SIZE="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_SIZE}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" fi if echo "$DESC" | grep -q -i -e "Physical memory"; then MEM_PHYS_USED="$USED" MEM_PHYS_SIZE="$SIZE" elif echo "$DESC" | grep -q -i -e "Memory buffers"; then MEM_BUFF_USED="$USED" elif echo "$DESC" | grep -q -i -e "Cached memory"; then MEM_CACH_USED="$USED" fi done # BECAUSE FREE MEMORY DONT MEAN AVAILABLE MEMORY MEM_TOTA_USED=$((MEM_PHYS_USED - MEM_BUFF_USED - MEM_CACH_USED)) REAL_MEM_PHYS_USED_PERCENT=$(calc $MEM_TOTA_USED \* 100/"$MEM_PHYS_SIZE") REAL_MEM_PHYS_USED_PERCENT=$(printf "%.2f" "$REAL_MEM_PHYS_USED_PERCENT") if [ "${REAL_MEM_PHYS_USED_PERCENT%.*}" -ge "${MEM_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${REAL_MEM_PHYS_USED_PERCENT%.*}" -ge "${MEM_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - ${REAL_MEM_PHYS_USED_PERCENT}% used ($((MEM_TOTA_USED / 1024)) MB / $((MEM_PHYS_SIZE / 1024)) MB)|TOTAL=$((MEM_PHYS_SIZE / 1024));USED=$((MEM_PHYS_USED / 1024));CACHE=$((MEM_CACH_USED / 1024));BUFFER=$((MEM_BUFF_USED / 1024))" } get_disk() { local -i PART_USED local -i PART_SIZE local LISTE local REGEX_PART local REGEX_EXCLUDE LISTE=$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "$OID_STORAGE_DESC" | cut -d'=' -f1 | tr -d ' ' | grep -o '[0-9]\+$') || exit_malfunction "SNMP failed" if [ "${PART:-}" = "" ]; then REGEX_PART='^/' REGEX_EXCLUDE="^${PART_EXCLUDE}$" else REGEX_PART="^${PART%/}/?$" REGEX_EXCLUDE='^$' fi for i in $LISTE; do DESC="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_DESC}.${i}" | cut -d= -f2 | cut -d: -f2 | xargs)" || exit_malfunction "SNMP failed" if echo "$DESC" | grep -E -v -i -e "$REGEX_EXCLUDE" | grep -q -E -e "$REGEX_PART"; then PART_USED="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_USED}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" PART_SIZE="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_STORAGE_SIZE}.${i}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" PART_USED_PERCENT=$(calc $PART_USED \* 100/"$PART_SIZE") PART_USED_PERCENT=$(printf "%.2f" "$PART_USED_PERCENT") if [ "${PART_USED_PERCENT%.*}" -ge "${DISK_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${PART_USED_PERCENT%.*}" -ge "${DISK_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi if [ "${PART:-}" == "" ]; then if [ "$STATUS_CODE" -ne 0 ]; then echo "$LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}" break else echo "DEBUG: $LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}" >&2 fi else echo "$LABEL $STATUS_MSG - DISK ${DESC} ${PART_USED_PERCENT}%|PART_USED=${PART_USED};PART_SIZE=${PART_SIZE}" fi fi done if [ "${PART_USED_PERCENT:-}" = "" ]; then exit_malfunction "PART: ${PART:-} not found" fi if [ "${PART:-}" == "" ] && [ "$STATUS_CODE" -eq 0 ]; then echo "$LABEL $STATUS_MSG - DISK ALL OK" fi } get_cpu() { local -i MAX_CPU_PERCENT MAX_CPU_PERCENT="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_CPU_PERCENT_USED}" | cut -d= -f2 | cut -d: -f2 | sort -n | tail -1)" || exit_malfunction "SNMP failed" if [ "${MAX_CPU_PERCENT}" -ge "${CPU_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${MAX_CPU_PERCENT}" -ge "${CPU_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - MAX CORE USAGE: ${MAX_CPU_PERCENT}%|MAX_CORE_USAGE_PERCENT=${MAX_CPU_PERCENT}" } get_swap() { local -i SWAP_TOTAL local -i SWAP_FREE local SWAP_USED_PERCENT SWAP_TOTAL="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_SWAP_TOTAL}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" SWAP_FREE="$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_SWAP_FREE}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" SWAP_USED=$((SWAP_TOTAL - SWAP_FREE)) SWAP_USED_PERCENT=$(calc $SWAP_USED \* 100/"$SWAP_TOTAL") SWAP_USED_PERCENT=$(printf "%.2f" "$SWAP_USED_PERCENT") if [ "${SWAP_USED_PERCENT%.*}" -ge "${SWAP_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${SWAP_USED_PERCENT%.*}" -ge "${SWAP_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - ${SWAP_USED_PERCENT}% used ( $((SWAP_USED / 1024)) MB / $((SWAP_TOTAL / 1024)) MB)|SWAP_TOTAL=${SWAP_TOTAL};SWAP_FREE=${SWAP_FREE}" } get_iowait() { declare -i IOWAIT_FIRST declare -i IOWAIT_SECOND IOWAIT_FIRST=$(snmpget -Oqv -v3 "$SNMP_HOST" "$OID_CPU_IOWAIT") || exit_malfunction "SNMP failed" sleep $INTERVAL IOWAIT_SECOND=$(snmpget -Oqv -v3 "$SNMP_HOST" "$OID_CPU_IOWAIT") || exit_malfunction "SNMP failed" IOWAIT_PERCENT=$(calc $((IOWAIT_SECOND - IOWAIT_FIRST)) / "$(get_nproc)" / $INTERVAL) IOWAIT_PERCENT=$(printf "%.2f" "$IOWAIT_PERCENT") if [ "${IOWAIT_PERCENT%.*}" -ge "${IOWAIT_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${IOWAIT_PERCENT%.*}" -ge "${IOWAIT_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - ${IOWAIT_PERCENT}%|IOWAIT_PERCENT=${IOWAIT_PERCENT}" } get_users_count() { declare -i USERS_COUNT USERS_COUNT="$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_USERS_COUNT}" | cut -d= -f2 | cut -d: -f2)" || exit_malfunction "SNMP failed" if [ "${USERS_COUNT}" -ge "${USERS_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${USERS_COUNT}" -ge "${USERS_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - ${USERS_COUNT} users currently logged in|users=${USERS_COUNT}" } get_procs() { declare -i PROCS_COUNT PROCS_COUNT=$(snmpwalk -r 2 -v 3 "$SNMP_HOST" "${OID_PROCS_COUNT}" | wc -l) || exit_malfunction "SNMP failed" if [ "${PROCS_COUNT}" -ge "${PROCS_CRITICAL_THRESHOLD}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${PROCS_COUNT}" -ge "${PROCS_WARNING_THRESHOLD}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - ${PROCS_COUNT} processes|processes=${PROCS_COUNT}" } get_load() { local LOADAVG_1M_PERCENT local LOADAVG_5M_PERCENT local LOADAVG_15M_PERCENT LOADAVG_1M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.1" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed" LOADAVG_5M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.2" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed" LOADAVG_15M_PERCENT=$(snmpget -r 2 -v 3 "$SNMP_HOST" "${OID_LOADAVG}.3" | cut -d= -f2 | cut -d: -f2) || exit_malfunction "SNMP failed" LOADAVG_1M_PERCENT=$(calc $((LOADAVG_1M_PERCENT / $(get_nproc)))) LOADAVG_5M_PERCENT=$(calc $((LOADAVG_5M_PERCENT / $(get_nproc)))) LOADAVG_15M_PERCENT=$(calc $((LOADAVG_15M_PERCENT / $(get_nproc)))) local -i LOAD_WARN_1M local -i LOAD_WARN_5M local -i LOAD_WARN_15M local -i LOAD_CRIT_1M local -i LOAD_CRIT_5M local -i LOAD_CRIT_15M IFS=',' read -r LOAD_WARN_1M LOAD_WARN_5M LOAD_WARN_15M <<<"$LOAD_WARNING_THRESHOLD" IFS=',' read -r LOAD_CRIT_1M LOAD_CRIT_5M LOAD_CRIT_15M <<<"$LOAD_CRITICAL_THRESHOLD" if [ "${LOADAVG_1M_PERCENT%.*}" -ge "${LOAD_CRIT_1M}" ] || [ "${LOADAVG_5M_PERCENT%.*}" -ge "${LOAD_CRIT_5M}" ] || [ "${LOADAVG_15M_PERCENT%.*}" -ge "${LOAD_CRIT_15M}" ]; then STATUS_MSG="CRITICAL" STATUS_CODE=2 elif [ "${LOADAVG_1M_PERCENT%.*}" -ge "${LOAD_WARN_1M}" ] || [ "${LOADAVG_5M_PERCENT%.*}" -ge "${LOAD_WARN_5M}" ] || [ "${LOADAVG_15M_PERCENT%.*}" -ge "${LOAD_WARN_15M}" ]; then STATUS_MSG="WARNING" STATUS_CODE=1 else STATUS_MSG="OK" STATUS_CODE=0 fi echo "$LABEL $STATUS_MSG - LOAD AVERAGE: ${LOADAVG_1M_PERCENT}%, ${LOADAVG_5M_PERCENT}%, ${LOADAVG_15M_PERCENT}%|LOAD1=$LOADAVG_1M_PERCENT;LOAD5=$LOADAVG_5M_PERCENT;LOAD15=$LOADAVG_15M_PERCENT" } usage() { cat >&2 < [ -H ] [ -w ] [ -c ] $SCRIPT_NAME -t disk [ -H ] [ -w ] [ -c ] [ -p ] $SCRIPT_NAME -t disk [ -H ] [ -w ] [ -c ] [ -e ] Examples: $SCRIPT_NAME -t disk -p /var/lib $SCRIPT_NAME -t disk -e '/mnt/.*' $SCRIPT_NAME -t load -w 110,105,100 -c 160,130,100 EOF } main() { if [ "${TYPE:-}" = 'mem' ]; then LABEL=MEMORY typeset -i MEM_CRITICAL_THRESHOLD typeset -i MEM_WARNING_THRESHOLD MEM_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_MEM_CRITICAL_THRESHOLD} MEM_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_MEM_WARNING_THRESHOLD} get_mem elif [ "${TYPE:-}" = 'disk' ]; then LABEL=DISK typeset -i DISK_CRITICAL_THRESHOLD typeset -i DISK_WARNING_THRESHOLD DISK_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_DISK_CRITICAL_THRESHOLD} DISK_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_DISK_WARNING_THRESHOLD} get_disk elif [ "${TYPE:-}" = 'cpu' ]; then LABEL=CPU typeset -i CPU_CRITICAL_THRESHOLD typeset -i CPU_WARNING_THRESHOLD CPU_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_CPU_CRITICAL_THRESHOLD} CPU_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_CPU_WARNING_THRESHOLD} get_cpu elif [ "${TYPE:-}" = 'swap' ]; then LABEL=SWAP typeset -i SWAP_CRITICAL_THRESHOLD typeset -i SWAP_WARNING_THRESHOLD SWAP_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_SWAP_CRITICAL_THRESHOLD} SWAP_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_SWAP_WARNING_THRESHOLD} get_swap elif [ "${TYPE:-}" = 'iowait' ]; then LABEL=IOWAIT typeset -i IOWAIT_CRITICAL_THRESHOLD typeset -i IOWAIT_WARNING_THRESHOLD IOWAIT_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_IOWAIT_CRITICAL_THRESHOLD} IOWAIT_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_IOWAIT_WARNING_THRESHOLD} get_iowait elif [ "${TYPE:-}" = 'users' ]; then LABEL=USERS typeset -i USERS_CRITICAL_THRESHOLD typeset -i USERS_WARNING_THRESHOLD USERS_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_USERS_CRITICAL_THRESHOLD} USERS_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_USERS_WARNING_THRESHOLD} get_users_count elif [ "${TYPE:-}" = 'procs' ]; then LABEL=PROCS typeset -i PROCS_CRITICAL_THRESHOLD typeset -i PROCS_WARNING_THRESHOLD PROCS_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_PROCS_CRITICAL_THRESHOLD} PROCS_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_PROCS_WARNING_THRESHOLD} get_procs elif [ "${TYPE:-}" = 'load' ]; then LABEL=LOAD LOAD_CRITICAL_THRESHOLD=${CRITICAL_THRESHOLD:-$DEFAULT_LOAD_CRITICAL_THRESHOLD} LOAD_WARNING_THRESHOLD=${WARNING_THRESHOLD:-$DEFAULT_LOAD_WARNING_THRESHOLD} get_load else LABEL=UNDEF usage exit 3 fi } while [ "${1-}" != "" ]; do case $1 in -h | --help) usage exit 0 ;; -t | --type) shift TYPE=$1 ;; -c | --critical) shift CRITICAL_THRESHOLD=$1 ;; -w | --warning) shift WARNING_THRESHOLD=$1 ;; -H | --hostname) shift SNMP_HOST=$1 ;; -p | --partition) shift PART=$1 ;; -e | --exclude-partition) shift PART_EXCLUDE=$1 ;; --) # End of all options shift break ;; -*) echo "$SCRIPT_NAME: invalid option" >&2 echo "Try '$SCRIPT_NAME --help' for more information." >&2 exit 1 ;; *) usage ;; esac shift done main exit $STATUS_CODE ~~~ Conf SNMPv3 pour tester ce script : ''/etc/snmp/snmpd.conf'' ~~~bash skipNFSInHostResources 1 # rwuser: a SNMPv3 read-write user # arguments: user [noauth|auth|priv] [restriction_oid] rwuser nagios auth rwuser nagios ~~~ ''/usr/share/snmp/snmpd.conf'' ~~~bash rwuser nagios auth createUser nagios MD5 P@ssw0rd ~~~ ### Pb Timeout snmpget On peut avoir des problèmes de Timeout SNMP. Pour pallier à ce problème il peut être utile de mettre ''skipNFSInHostResources 1'' dans /etc/snmp/snmpd.conf ## Sonde Nagios supervision de la RAM en local via la commande free Source : https://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem_ng-2Esh/details ''check_mem_ng.sh'' ~~~bash #!/bin/bash ################################################################################ #This plugin is loosely inspired by check_mem v1.1 from Lukasz Gogolin #https://exchange.nagios.org/directory/Plugins/System-Metrics/Memory/check_mem-2Esh/details #I did a code cleanup and added a check of "free" version, because since #procps-ng 3.3.0 (RHEL 7+) check_mem.sh doesn't work anymore (buff/cache column) ################################################################################ # 1.1 - Added -l flag and LEGACY_PERFATA var to put legacy perfdata mode ################################################################################ #Nagios Constants STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 SCRIPTPATH=`echo $0 | /bin/sed -e 's,[\\/][^\\/][^\\/]*$,,'` if [[ -f ${SCRIPTPATH}/utils.sh ]]; then . ${SCRIPTPATH}/utils.sh # use nagios utils to set real STATE_* return values fi #Useful functions printversion(){ echo "$0 $VERSION" echo } printusage() { printversion echo "Usage:" echo " check_mem_ng.sh [-w ] [-c ] [-v] [-l]" echo " checks local host available memory" echo " warnlevel and critlevel is percentage value without %" echo " defaults being respectively 80 et 90" echo " add -v for verbose (debuging purpose)" echo " add -l for legacy perfdata mode (or change LEGACY_PERFDATA variable in script)" echo " check_mem_ng.sh -V" echo " prints version" echo " check_mem_ng.sh -h" echo " prints help (this message)" } printvariables() { echo "Variables:" #Add all your variables at the en of the "for" line to display them in verbose for i in WARNING_THRESHOLD CRITICAL_THRESHOLD FINAL_STATE FINAL_COMMENT LEGACY_PERFDATA FREE_OUTPUT TOTAL_MEM FREE_MEM BUFFCACHE_MEM BUFF_MEM CACHE_MEM USED_MEM TOTAL_MEM_MB USED_MEM_MB WARNING_THRESHOLD_B CRITICAL_THRESHOLD_B USED_MEM_PRC ENABLE_PERFDATA VERSION do echo -n "$i : " eval echo \$${i} done echo } #Set to unknown in case of unplaned exit FINAL_STATE=$STATE_UNKNOWN FINAL_COMMENT="UNKNOWN: Unplaned exit. You should check that everything is alright" #Default values WARNING_THRESHOLD=80 CRITICAL_THRESHOLD=90 ENABLE_PERFDATA=1 VERSION="1.1" VERBOSE=0 #####FORCE LEGACY MODE##### #put 1 to force legacy perfdata mode without using "-l" flag (no configuration change in nrpe.cfg) LEGACY_PERFDATA=0 #####FORCE LEGACY MODE##### #Process arguments while getopts ":c:hlvVw:" opt; do case $opt in c) CRITICAL_THRESHOLD=$OPTARG ;; h) printusage exit $STATE_OK ;; l) LEGACY_PERFDATA=1 ;; v) echo "Verbose mode ON" echo VERBOSE=1 ;; V) printversion exit $STATE_UNKNOWN ;; w) WARNING_THRESHOLD=$OPTARG ;; \?) echo "UNKNOWN: Invalid option: -$OPTARG" exit $STATE_UNKNOWN ;; :) echo "UNKNOWN: Option -$OPTARG requires an argument." exit $STATE_UNKNOWN ;; esac done #Real check begins here FREE_OUTPUT=`free -b | grep Mem:` TOTAL_MEM=`echo $FREE_OUTPUT |awk '{print $2}'` FREE_MEM=`echo $FREE_OUTPUT |awk '{print $4}'` if [ `free -V | grep procps-ng | wc -l` -eq 1 ]; then #procps-ng, free will display buff/cache as one column BUFFCACHE_MEM=`echo $FREE_OUTPUT |awk '{print $6}'` if [ $LEGACY_PERFDATA -eq 1 ] ; then #Get BUFFER and CACHE separatly for legacy output FREE_OUTPUT2=`free -b | grep Mem:` BUFF_MEM=`echo $FREE_OUTPUT2 |awk '{print $6}'` CACHE_MEM=`echo $FREE_OUTPUT2 |awk '{print $7}'` fi else #procps, free will display buff/cache as two separate columns BUFF_MEM=`echo $FREE_OUTPUT |awk '{print $6}'` CACHE_MEM=`echo $FREE_OUTPUT |awk '{print $7}'` BUFFCACHE_MEM=$(($BUFF_MEM+$CACHE_MEM)) fi #Deduce used memory from total/free/buffer+cache USED_MEM=$(($TOTAL_MEM-$FREE_MEM-$BUFFCACHE_MEM)) #Convert in MB for pseudo "human readable" plugin output. Todo, real human readable? TOTAL_MEM_MB=`echo "$TOTAL_MEM / 1048576" | bc` USED_MEM_MB=`echo "$USED_MEM / 1048576" | bc` WARNING_THRESHOLD_B=`echo "$WARNING_THRESHOLD * $TOTAL_MEM / 100" | bc` CRITICAL_THRESHOLD_B=`echo "$CRITICAL_THRESHOLD * $TOTAL_MEM / 100" | bc` #Convert in percents used memory USED_MEM_PRC=$((($USED_MEM*100)/$TOTAL_MEM)) #Check if available memory is below thresholds if [ "$USED_MEM_PRC" -ge "$CRITICAL_THRESHOLD" ]; then FINAL_STATE=$STATE_CRITICAL FINAL_COMMENT="CRITICAL: Memory above critical threshold. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used!" elif [ "$USED_MEM_PRC" -ge "$WARNING_THRESHOLD" ]; then FINAL_STATE=$STATE_WARNING FINAL_COMMENT="WARNING: Memory above warning threshold. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used!" else FINAL_STATE=$STATE_OK FINAL_COMMENT="OK: Memory below thresholds. Total: ${TOTAL_MEM_MB} MB - Used: ${USED_MEM_MB} MB - ${USED_MEM_PRC}% used." fi #Perfdata processing if [ $ENABLE_PERFDATA -eq 1 ] ; then #Added check_mem.sh legacy perfdata mode, to ease migration if [ $LEGACY_PERFDATA -eq 1 ] ; then PERFDATA=" | TOTAL=$TOTAL_MEM;;;; USED=$USED_MEM;;;; CACHE=$CACHE_MEM;;;; BUFFER=$BUFF_MEM;;;;" else PERFDATA=" | Memory_Used=${USED_MEM}B;$WARNING_THRESHOLD_B;$CRITICAL_THRESHOLD_B;0;$TOTAL_MEM;" fi fi #Script end, display verbose information if [[ $VERBOSE -eq 1 ]] ; then printvariables fi echo ${FINAL_COMMENT}${PERFDATA} exit $FINAL_STATE ~~~