Outils pour utilisateurs

Outils du site


tech:notes_supervision_consommation_cpu

Ceci est une ancienne révision du document !


Notes supervision consommation CPU

A superviser

  • Nombre total de process
  • Nombre total de threads ps -efL |wc -l
  • Loadaverage
  • IOWAIT

IOWAIT

Voir : https://kb.vander.host/operating-systems/how-to-monitor-disk-performance-iowait-on-linux/

top
sar
iostat -d 2 %iowait
iostat -c 5 100
snmpget -Oqv -v3 localhost .1.3.6.1.4.1.2021.11.54.0
./centreon_plugins.pl --plugin=os::linux::snmp::plugin --hostname=localhost --snmp-version=3 --snmp-username "nagios" --authprotocol MD5 --authpassphrase "P@ssw0rd" --mode cpu-detailed --warning-wait=15 --critical-wait=25

Script check_cpu_stats.sh

Source : https://github.com/Napsty/check_cpu_stats/blob/main/check_cpu_stats.sh

check_cpu_stats.sh
#!/bin/bash
# ==============================================================================
# CPU Utilization Statistics plugin for Nagios 
#
# Original author:  Steve Bosek
# Creation date:    8 September 2007
# Description:      Monitoring plugin (script) to check cpu utilization statistics.
#                   This script has been designed and written on Unix platforms
#                   requiring iostat as external program.
#                   The script is used to query 6 of the key cpu statistics
#                   (user,system,iowait,steal,nice,idle) at the same time.
# History/Changes:  HISTORY moved out of plugin into Git repository / README.md
# License:          GNU General Public License v3.0 (GPL3), see LICENSE in Git repository
#
# Copyright 2007-2009,2011 Steve Bosek
# Copyright 2008 Bas van der Doorn
# Copyright 2008 Philipp Lemke
# Copyright 2016 Philipp Dallig
# Copyright 2022-2023 Claudio Kuenzler
#
# Usage:   ./check_cpu_stats.sh [-w <user,system,iowait>] [-c <user,system,iowait>] ( [-i <report interval>] [-n <report number> ] [-b <N,processname>])
#
# Example: ./check_cpu_stats.sh
#          ./check_cpu_stats.sh -w 70,40,30 -c 90,60,40
#          ./check_cpu_stats.sh -w 70,40,30 -c 90,60,40 -i 3 -n 5 -b '1,apache2' -b '1,running process'
# ========================================================================================
# -----------------------------------------------------------------------------------------
# Plugin description
PROGNAME=$(basename $0)
RELEASE="Revision 3.1.5"
 
# Paths to commands used in this script.  These may have to be modified to match your system setup.
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
IOSTAT="iostat"
#Needed for HP-UX
SAR="/usr/bin/sar"
 
# Nagios return codes
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
 
# Plugin default parameters value if not defined
LIST_WARNING_THRESHOLD=${LIST_WARNING_THRESHOLD:="70,40,30"}
LIST_CRITICAL_THRESHOLD=${LIST_CRITICAL_THRESHOLD:="90,60,40"}
INTERVAL_SEC=${INTERVAL_SEC:="1"}
NUM_REPORT=${NUM_REPORT:="3"}
# -----------------------------------------------------------------------------------------
# Check required commands
if [ `uname` = "HP-UX" ];then
  if [ ! -x $SAR ]; then
    echo "UNKNOWN: sar not found or is not executable by the nagios user."
    exit $STATE_UNKNOWN
  fi
else
  for cmd in iostat; do
  if ! `command -v ${cmd} >/dev/null 2>&1`; then
    echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
    exit ${STATE_UNKNOWN}
  fi
done
fi
# -----------------------------------------------------------------------------------------
# Functions plugin usage
print_release() {
  echo "$RELEASE"
  exit ${STATE_UNKNOWN}
}
 
print_usage() {
  echo ""
  echo "$PROGNAME $RELEASE - Monitoring plugin to check CPU Utilization"
  echo ""
  echo "Usage: check_cpu_stats.sh [-w] [-c] [-i] [-n] [-b]+"
  echo ""
  echo "  -w  Warning threshold in % for warn_user,warn_system,warn_iowait CPU (default : 70,40,30)"
  echo "  -c  Critical threshold in % for crit_user,crit_system,crit_iowait CPU (default : 90,60,40)"
  echo "  -i  Interval in seconds for iostat (default : 1)"
  echo "  -n  Number of reports for iostat (default : 3)"
  echo "  -b  The plugin will exit OK when condition matches (number of CPUs and process running), expects an input of N,process (e.g. 4,apache2). Can be used multiple times: -b 1,puppet -b 4,apache2 -b 4,containerd. Works only under Linux."
  echo "  -v  Show version"
  echo "  -h  Show this page"
  echo ""
  echo "Usage: $PROGNAME"
  echo "Usage: $PROGNAME --help"
  echo ""
  exit 0
}
 
print_help() {
  print_usage
    echo ""
    echo "This plugin will check cpu utilization (user,system,iowait,idle in %)"
    echo ""
  exit 0
}
# -----------------------------------------------------------------------------------------
# Parse parameters
if [ "${1}" = "--help" ]; then print_help; exit $STATE_UNKNOWN; fi
 
while getopts "c:w:i:n:b:hv" Input
do
  case ${Input} in
  w)      LIST_WARNING_THRESHOLD=${OPTARG};;
  c)      LIST_CRITICAL_THRESHOLD=${OPTARG};;
  i)      INTERVAL_SEC=${OPTARG};;
  n)      NUM_REPORT=${OPTARG};;
  b)      BAIL+=("${OPTARG}");;
  h)      print_help;;
  v)      print_release;;
  *)      print_help;;
  esac
done
# -----------------------------------------------------------------------------------------
# List to Table for warning threshold
TAB_WARNING_THRESHOLD=( `echo $LIST_WARNING_THRESHOLD | sed 's/,/ /g'` )
if [ "${#TAB_WARNING_THRESHOLD[@]}" -ne "3" ]; then
  echo "ERROR : Bad count parameter in Warning Threshold"
  exit $STATE_WARNING
else  
USER_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[0]}`
SYSTEM_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[1]}`
IOWAIT_WARNING_THRESHOLD=`echo ${TAB_WARNING_THRESHOLD[2]}` 
fi
 
# List to Table for critical threshold
TAB_CRITICAL_THRESHOLD=( `echo $LIST_CRITICAL_THRESHOLD | sed 's/,/ /g'` )
if [ "${#TAB_CRITICAL_THRESHOLD[@]}" -ne "3" ]; then
  echo "ERROR : Bad count parameter in CRITICAL Threshold"
  exit $STATE_WARNING
else 
USER_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[0]}`
SYSTEM_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[1]}`
IOWAIT_CRITICAL_THRESHOLD=`echo ${TAB_CRITICAL_THRESHOLD[2]}`
fi
 
if [ ${TAB_WARNING_THRESHOLD[0]} -ge ${TAB_CRITICAL_THRESHOLD[0]} -o ${TAB_WARNING_THRESHOLD[1]} -ge ${TAB_CRITICAL_THRESHOLD[1]} -o ${TAB_WARNING_THRESHOLD[2]} -ge ${TAB_CRITICAL_THRESHOLD[2]} ]; then
  echo "ERROR : Critical CPU Threshold lower as Warning CPU Threshold "
  exit $STATE_WARNING
fi 
# -----------------------------------------------------------------------------------------
# CPU Utilization Statistics Unix Plateform ( Linux,AIX,Solaris are supported )
case `uname` in
  Linux )
      CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g' | tr -s ' ' ';' | sed '/^$/d' | tail -1`
      CPU_REPORT_SECTIONS=`echo ${CPU_REPORT} | grep ';' -o | wc -l`
      CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 2`
      CPU_NICE=`echo $CPU_REPORT | cut -d ";" -f 3`
      CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 4`
      CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 5`
      if [ ${CPU_REPORT_SECTIONS} -ge 6 ]; then
      CPU_STEAL=`echo $CPU_REPORT | cut -d ";" -f 6`
      CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 7`
      NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=${CPU_NICE}%, steal=${CPU_STEAL}% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=${CPU_NICE}%;0;0;0; CpuSteal=${CPU_STEAL}%;0;0;0;"
      else
      CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
      NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=${CPU_NICE}%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=${CPU_NICE}%;0;0;0; CpuSteal=0.0%;0;0;0;"
      fi
 
      # Bail out possible under certain situations
      if [[ ${#BAIL[*]} -gt 0 ]]; then
        BC_CPU=$(nproc)
        o=0
	while [ ${o} -lt ${#BAIL[*]} ]; do
          BAIL_CPU[${o}]=$(echo "${BAIL[${o}]}" | awk -F',' '{print $1}')
          BAIL_PROCESS[${o}]=$(echo "${BAIL[${o}]}" | awk -F',' '{print $2}')
          BC_PROCESS=$(ps aux | grep "${BAIL_PROCESS[${o}]}" | egrep -v "(grep|check_cpu_stats)" | awk '{print $2}')
          if [[ ${BAIL_CPU[${o}]} -eq ${BC_CPU} && ${BC_PROCESS} -gt 0 ]]; then
            echo "CPU STATISTICS OK - bailing out because of matched bailout patterns - ${NAGIOS_DATA}"
            exit $STATE_OK
          fi
          let o++
        done
      fi
 
      ;;
  AIX ) CPU_REPORT=`iostat -t $INTERVAL_SEC $NUM_REPORT | sed -e 's/,/./g'|tr -s ' ' ';' | tail -1`
      CPU_USER=`echo $CPU_REPORT | cut -d ";" -f 4`
      CPU_SYSTEM=`echo $CPU_REPORT | cut -d ";" -f 5`
      CPU_IOWAIT=`echo $CPU_REPORT | cut -d ";" -f 7`
      CPU_IDLE=`echo $CPU_REPORT | cut -d ";" -f 6`
      NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=0.00%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
            ;;
  SunOS ) CPU_REPORT=`iostat -c $INTERVAL_SEC $NUM_REPORT | tail -1`
          CPU_USER=`echo $CPU_REPORT | awk '{ print $1 }'`
          CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $2 }'`
          CPU_IOWAIT=`echo $CPU_REPORT | awk '{ print $3 }'`
          CPU_IDLE=`echo $CPU_REPORT | awk '{ print $4 }'`
          NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}%, iowait=${CPU_IOWAIT}%, idle=${CPU_IDLE}%, nice=0.00%, steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT}%;${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
          ;;
  HP-UX) CPU_REPORT=`$SAR $INTERVAL_SEC $NUM_REPORT | grep Average`
          CPU_USER=`echo $CPU_REPORT | awk '{ print $2 }'`
          CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $3 }'`
          CPU_IOWAIT=`echo $CPU_REPORT | awk '{ print $4 }'`
          CPU_IDLE=`echo $CPU_REPORT | awk '{ print $5 }'`
          NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=${CPU_IOWAIT}% idle=${CPU_IDLE}% nice=0.00% steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=${CPU_IOWAIT};${TAB_WARNING_THRESHOLD[2]};${TAB_CRITICAL_THRESHOLD[2]};0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
          ;;  
  #  MacOS X test       
  # Darwin ) CPU_REPORT=`iostat -w $INTERVAL_SEC -c $NUM_REPORT | tail -1`
    #   CPU_USER=`echo $CPU_REPORT | awk '{ print $4 }'`
    #   CPU_SYSTEM=`echo $CPU_REPORT | awk '{ print $5 }'`
    #   CPU_IDLE=`echo $CPU_REPORT | awk '{ print $6 }'`
    #   NAGIOS_DATA="user=${CPU_USER}% system=${CPU_SYSTEM}% iowait=0.00% idle=${CPU_IDLE}% nice=0.00% steal=0.00% | CpuUser=${CPU_USER}%;${TAB_WARNING_THRESHOLD[0]};${TAB_CRITICAL_THRESHOLD[0]};0; CpuSystem=${CPU_SYSTEM}%;${TAB_WARNING_THRESHOLD[1]};${TAB_CRITICAL_THRESHOLD[1]};0; CpuIowait=0.0%;0;0;0; CpuIdle=${CPU_IDLE}%;0;0;0; CpuNice=0.0%;0;0;0; CpuSteal=0.0%;0;0;0;"
    #   ;;
  *)  echo "UNKNOWN: `uname` not yet supported by this plugin. Coming soon !"
      exit $STATE_UNKNOWN 
      ;;
esac
# -----------------------------------------------------------------------------------------
# Add for integer shell issue
CPU_USER_MAJOR=`echo $CPU_USER| cut -d "." -f 1`
CPU_SYSTEM_MAJOR=`echo $CPU_SYSTEM | cut -d "." -f 1`
CPU_IOWAIT_MAJOR=`echo $CPU_IOWAIT | cut -d "." -f 1`
CPU_IDLE_MAJOR=`echo $CPU_IDLE | cut -d "." -f 1`
# -----------------------------------------------------------------------------------------
# Return
if [ ${CPU_USER_MAJOR} -ge $USER_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
    exit $STATE_CRITICAL
    elif [ ${CPU_SYSTEM_MAJOR} -ge $SYSTEM_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
    exit $STATE_CRITICAL
    elif [ ${CPU_IOWAIT_MAJOR} -ge $IOWAIT_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS CRITICAL : ${NAGIOS_DATA}"
    exit $STATE_CRITICAL
    elif [ ${CPU_USER_MAJOR} -ge $USER_WARNING_THRESHOLD ] && [ ${CPU_USER_MAJOR} -lt $USER_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
    exit $STATE_WARNING 
    elif [ ${CPU_SYSTEM_MAJOR} -ge $SYSTEM_WARNING_THRESHOLD ] && [ ${CPU_SYSTEM_MAJOR} -lt $SYSTEM_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
    exit $STATE_WARNING 
    elif  [ ${CPU_IOWAIT_MAJOR} -ge $IOWAIT_WARNING_THRESHOLD ] && [ ${CPU_IOWAIT_MAJOR} -lt $IOWAIT_CRITICAL_THRESHOLD ]; then
    echo "CPU STATISTICS WARNING : ${NAGIOS_DATA}"
    exit $STATE_WARNING   
else
    echo "CPU STATISTICS OK : ${NAGIOS_DATA}"
    exit $STATE_OK
fi
 
echo "CPU STATISTICS UNKNOWN: Should never reach this."
exit $STATE_UNKNOWN
tech/notes_supervision_consommation_cpu.1742825205.txt.gz · Dernière modification : de 127.0.0.1

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki