nagios|icinga 监控特定端口TCP连接数

来源:互联网 发布:mysql排错指南 pdf 编辑:程序博客网 时间:2024/05/29 09:49

用法

  • 将脚本放在nagios-plugins目录下
    /usr/local/nagios/libexec/check_max_cons.sh
  • 添加自定义命令到 nrpe.cfg
    command[check_conns_args]=/usr/local/nagios/libexec/check_max_cons.sh -s $ARG1$ -w $ARG2$ -c $ARG3$
  • 重启插件(加入xinted服务的直接service xinetd restart)
    /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d

效果图

  • 最大连接数超过3000报警,4000严重
  • 当前连接数864

shell 插件 (check_max_conns.sh)

#!/bin/bash#nagios exit codeSTATE_OK=0  STATE_WARNING=1  STATE_CRITICAL=2  STATE_UNKNOWN=3#helphelp () {          local command=`basename $0`        echo "NAME        ${command} -- check network statusSYNOPSIS          ${command} [OPTION]DESCRIPTION          -H IP ADDRESS        -p LOCAL PORT        -s [TIME_WAIT|FIN_WAIT|ESTABLISHED|CLOSING|SYN_SEND|TIMED_WAIT|LISTEN]        -w warning        -c criticalUSAGE:  Total connections:          $0 -w 1000 -c 2000Port:          $0 -p PORT-w 1000 -c 2000Host and Port:          $0 -H HOST -p PORT-w 1000 -c 2000Status:          $0 -H HOST  -p PORT-s ESTABLISHED -w 1000 -c 2000" 1>&2        exit ${STATE_WARNING}}check_num () {          local num_str="$1"        echo ${num_str}|grep -E '^[0-9]+$' >/dev/null 2>&1 || local stat='not a positive integers!'        if [ "${stat}" = 'not a positive integers!' ];then                echo "${num_str} ${stat}" 1>&2                exit ${STATE_WARNING}        else                local num_int=`echo ${num_str}*1|bc`                if [ ${num_int} -lt 0 ];then                        echo "${num_int} must be greater than 0!" 1>&2                        exit ${STATE_WARNING}                fi        fi}check_ip () {          local ip_str="$1"        echo "${ip_str}"|grep -P '^\d{1,3}(\.\d{1,3}){3}$' >/dev/null 2>&1 || local stat='not a ip!'        if [ "${ip_stat}" = 'not a ip!' ];then        echo "${ip_str} ${stat}" 1>&2                exit ${STATE_WARNING}    fi}check_state () {          local stat_str="$1"        if [ -n "${stat_str}" ];then                case "${stat_str}" in                                TIME_WAIT|FIN_WAIT|ESTABLISHED|CLOSING|SYN_SEND|TIMED_WAIT)                                        cmd="netstat -nt|grep ${stat_str}"                ;;                                LISTEN)                                        cmd="netstat -ntl"                                ;;                *)                                        echo "This script only support [TIME_WAIT|FIN_WAIT|ESTABLISHED|CLOSING|SYN_SEND|TIMED_WAIT]" 1>&2                                        exit ${STATE_WARNING}                ;;                esac        fi}logging () {  local now_date=`date -d now +"%F %T"`  local log_path='/var/log/tcp'  local log_name=`date -d "now" +"%F"`local uid=`id -u`  if [ "${uid}" == '0' ];then          test -d ${log_path} || mkdir -p ${log_path}/        chown nagios.nagios -R ${log_path}filog="${log_path}/tcp_stat_${log_name}.log"  echo "${now_date} ${info}"|sed 's/;//g' >> ${log}  test -f ${log} && chown nagios.nagios ${log}  }message () {          local stat="$1"        echo "TCP status is ${stat} - ${info}|Total_connections=${total_connections_int};${warning};${critical};${min};${max}"}#inputwhile getopts w:c:p:H:s:l opt  do          case "$opt" in                w)                         warning=$OPTARG                        check_num "${warning}"                ;;        c)                         critical=$OPTARG                        check_num "${critical}"                ;;        p)                         port="$OPTARG"                        check_num "${port}"                ;;        H)                         ip="$OPTARG"                        check_ip "${ip}"                ;;                s)                         state="$OPTARG"                        check_state "${state}"                ;;                l)                        log_status='on'                ;;        *) help;;        esacdone  shift $[ $OPTIND - 1 ]#[ $# -gt 0 -o -z "${warning}" -o -z "${critical}" ] && help[ $# -gt 0 -o -z "${warning}" ] && helpif [ -n "${warning}" -a -n "${critical}" ];then          if [ ${warning} -ge ${critical} ];then                echo "-w ${warning} must lower than -c ${critical}!" 1>&2                exit ${STATE_UNKNOWN}        fifiif [ -n "${warning}" -a -z "${critical}" ];then          if [ "${warning}" == "0" ];then                critical="${warning}"        else                echo "Critical can not be empty!" 1>&2                exit ${STATE_UNKNOWN}        fifi[ -z "${state}" ] && netstat_cmd="netstat -nt" || netstat_cmd="${cmd}"[ -z "${ip}" -a -z "${port}" ] && run_cmd="${netstat_cmd}"[ -n "${ip}" -a -z "${port}" ] && run_cmd="${netstat_cmd}|grep \"${ip}:\"" [ -n "${port}" -a -z "${ip}" ] && run_cmd="${netstat_cmd}|grep -P \":${port}\s\""[ -n "${port}" -a -n "${ip}" ] && run_cmd="${netstat_cmd}|grep -P \"${ip}:${port}\s\"" info=`eval "${run_cmd}"|\  awk 'BEGIN{OFS=":";ORS="; "}/^tcp/{stats[$(NF)]+=1;sum++}END{print "Total",sum;for (stat in stats) {print stat,stats[stat]}}'`echo "${info}"|grep -E '[0-9]' >/dev/null 2>&1 || info="Total:0"min=0  max=4096  total_connections_str=`echo "${info}"|grep -oP "Total:\d+"|awk -F':' '{print $2}'`  total_connections_int=`echo "${total_connections_str}*1"|bc`  echo "${total_connections_int}"|grep -E '^[0-9]+$' >/dev/null 2>&1 ||\  eval "echo ${total_connections_int} not a number!;exit ${STATE_UNKNOWN}"[ "${log_status}" == 'on' ] && loggingif [ "${warning}" == "0" ];then          if [ ${total_connections_int} -eq 0 ];then                message "Warning"                exit ${STATE_WARNING}        else                message "OK"                exit ${STATE_OK}        fifi[ ${total_connections_int} -lt ${warning} ] && message "OK" && exit ${STATE_OK}[ ${total_connections_int} -ge ${critical} ] && message "Critical" && exit ${STATE_CRITICAL}[ ${total_connections_int} -ge ${warning} ] && message "Warning" && exit ${STATE_WARNING}
0 0