nagios应用2- 监控linux服务器

来源:互联网 发布:mac如何上传文件到qq群 编辑:程序博客网 时间:2024/05/17 07:28

二、监控linux服务器

1.  NRPE模式

1.1配置被监控机

 

创建nagios用户

# useradd nagios

#passwd nagios

 

 

安装nagios-plugins

# tar zxf nagios-plugins-1.4.16.tar.gz

                  # cd nagios-plugins-1.4.16

                  #./configure --with-nagios-user=nagios--with-nagios-group=nagios --prefix=/usr/local/nagios

                  #make

                  #make install

           #chown -R nagios.nagios/usr/local/nagios/

 

安装NRPE

           # tar zxf nrpe-2.13.tar.gz

           #cd nrpe-2.13

           #./configure

           #make all

           # make install-plugin

# make install-daemon

# make install-daemon-config

#chown -R nagios.nagios /usr/local/nagios/

                  

启动NRPE

#/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg –d

 

将NRPE运行添加加入系统启动脚本中

# echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg-d" >> /etc/rc.d/rc.local

 

重启NRPE

#rm –rf /etc/xinetd.d/nrpe               删除nrpe文件

#/etc/init.d/xinetdrestart                重启ninetd.d 服务

# /usr/local/nagios/bin/nrpe-c /usr/local/nagios/etc/nrpe.cfg –d          重启NRPE

 

配置NRPE

#su - nagios

$ vi /usr/local/nagios/etc/nrpe.cfg

###########################################################################

server_port=5666            #NRPE端口

server_address=0.0.0.0              #本机IP或0.0.0.0全部网络接口

nrpe_user=nagios            #NRPE用户

nrpe_group=nagios           #NRPE用户组

allowed_hosts=127.0.0.1,192.168.8.211   #监控服务器IP

command[check_sda1]=/usr/local/nagios/libexec/check_disk-w 20% -c 10% -p /dev/sda1 #监控/boot分区状态

command[check_sda3]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p/dev/sda3     #监控根分区状态

command[check_swap]=/usr/local/nagios/libexec/check_swap-w 20% -c 10%

command[check_javaRAM]=/usr/local/nagios/libexec/check_javaRAM.sh1000 1500

command[check_traffic]=/usr/local/nagios/libexec/check_traffic.sh -V 2c -C public -H 127.0.0.1 -I 2 -w 2000,3000 -c 4000,5000 -K –B

###########################################################################

 

#df –h  #在使用nagios用户查看硬盘分区情况时,如果出现Permission denied的情况,需要把nagios用户加入sudo用户列表并可免密码运行

# vi /etc/sudoers

              Cmnd_Alias NAGIOS_CMD = /bin/df           #指定一个命令

              nagios ALL=NOPASSWD: NAGIOS_CMD           #设定无密码执行


1.2 配置监控服务器

安装NRPE并设置,同被监控机设置


配置nagios监控文件

#vi /usr/local/nagios/etc/objects/192.168.1.13-NRPE.cfg

###########################################################################

        definehost{

              use           linux-server

              host_name     192.168.1.13-NRPE

              alias         192.168.1.13 NRPE

              address       192.168.1.13

              hostgroups    linux-servers

       }

       

      define service{

                use                           generic-service

               host_name                     192.168.1.13-NRPE

               service_description          check_users

               check_command                 nrpe!check_users

         }

###########################################################################

 

#/usr/local/nagios/bin/nagios -v/usr/local/nagios/etc/nagios.cfg    检查nagios配置

#service nagios restart     没有错误重启nagios服务



2.   被动模式

 

2.1  配置监控服务器

 

安装NSCA

# tar zxfnsca-2.9.1.tar.gz

# cd nsca-2.9.1

# ./configure

# make all

 

 ***************************************************************


出现如下提示,说明没有libmcrypt环境:

checking forlibmcrypt - version >= 2.4.11... no

*** Could not run libmcrypt testprogram, checking why...

 

解决方法:

#yum install libmcrypt.x86_64libmcrypt-devel.x86_64 -y

确定/etc/ld.so.conf中有“include ld.so.conf.d/*.conf”

#ldconfig

#make devclean

# ./configure                       重新编译

***************************************************************



# cp src/nsca /usr/local/nagios/bin/

# cp sample-config/nsca.cfg/usr/local/nagios/etc

# chown nagios.nagios/usr/local/nagios/bin/nsca

# chown nagios.nagios/usr/local/nagios/etc/nsca.cfg

# cp init-script /etc/init.d/nsca

# chmod a+x /etc/init.d/nsca

# chkconfig --add nsca

 

 

在监控服务器上配置NSCA

# vi/usr/local/nagios/etc/nsca.cfg

         server_port=5667                                         #端口

         server_address=192.168.8.211                #Nagios的对外IP

         nsca_user=nagios                                         #Nagios用户

         nsca_group=nagios                                      #Nagios用户组

         debug=1                                                          #debug选项即log选项,写入message

         aggregate_writes=1                                    #能够支持更大的监控量,建议开启

max_packet_age=30                                   #数据包过期时间,单位秒

password=password#连接密码

decryption_method=1                                 #数据传输加密方式

 

 

2.2  配置被监控机

 

安装SAND_NSCA

         #cp send_nsca /usr/local/nagios/bin/

# cpsend_nsca.cfg /usr/local/nagios/etc

# chown nagios.nagios/usr/local/nagios/bin/send_nsca

# chown nagios.nagios/usr/local/nagios/etc/send_nsca.cfg

 

编辑一个SAND_NSCA执行脚本

#mkdir /usr/local/nagios/include/nsca                               创建一个目录

# vi/usr/local/nagios/libexec/nsca_check.sh

###########################################################################

         NSER=192.168.8.211

SERVICE_NAME01="partition"

SERVICE_NAME02="load"

SERVICE_NAME03="swap"

SERVICE_NAME04="ssh"

CHECK_PATH="/usr/local/nagios/include/nsca"

       /usr/local/nagios/bin/send_nsca -H"$NSER" -to 60 -c /usr/local/nagios/etc/send_nsca.cfg < /bin/bash"$CHECK_PATH"/nsca_check_"$SERVICE_NAME01".sh

       /usr/local/nagios/bin/send_nsca -H"$NSER" -to 60 -c /usr/local/nagios/etc/send_nsca.cfg < /bin/bash"$CHECK_PATH"/nsca_check_"$SERVICE_NAME02".sh

       /usr/local/nagios/bin/send_nsca -H"$NSER" -to 60 -c /usr/local/nagios/etc/send_nsca.cfg < /bin/bash"$CHECK_PATH"/nsca_check_"$SERVICE_NAME03".sh

       /usr/local/nagios/bin/send_nsca -H"$NSER" -to 60 -c /usr/local/nagios/etc/send_nsca.cfg < /bin/bash"$CHECK_PATH"/nsca_check_"$SERVICE_NAME04".sh

###########################################################################

 

设定一个计划任务,每5分钟执行一下脚本

#su – nagios

$crontab –e

                   */5 * * * * /bin/bash/usr/local/nagios/libexec/nsca_check.sh


编辑nsca脚本

$vi /usr/local/nagios/include/nsca/nsca_check_partition.sh

###########################################################################

#!/bin/bash

#NscaCommon Check Script

hostname=$(hostname)

Plugin_path="/usr/local/nagios/libexec"

 

# Partition

Partitions=$(df-h|awk -F'% ' '{print $2}'|sed '1d'|grep -v '^$\|boot\|shm\|mnt')

forpartition in $Partitions

do

        result=$($Plugin_path/check_disk -w 10%-c 5% -p $partition)

        status=$?

        output=$(echo "$result"|awk-F';' '{print $1}')

        echo -e"$hostname\tpartition\t$status\t$output"

done

###########################################################################

 

 

$vi /usr/local/nagios/include/nsca/nsca_check_load.sh

###########################################################################

 #!/bin/bash

#Nsca Common Check Script

 hostname=$(hostname)

Plugin_path="/usr/local/nagios/libexec"

 

#Load

 result=$($Plugin_path/check_load -w 15,10,5-c 30,25,20)

status=$?

output=$(echo "$result"|awk -F'|''{print $1}')

 echo -e"$hostname\tload\t$status\t$output"\n

###########################################################################

 

 

$vi /usr/local/nagios/include/nsca/nsca_check_ssh.sh

###########################################################################

       #!/bin/bash

       #Nsca Common Check Script

       hostname=$(hostname)

       Plugin_path="/usr/local/nagios/libexec"

 

       #Ssh

       result=$(/etc/init.d/sshd status)

       status=$?

        echo -e"$hostname\tssh\t$status\t$result"

###########################################################################

 

 

$vi /usr/local/nagios/include/nsca/nsca_check_swap.sh

##########################################################################

       #!/bin/bash

       #Nsca Common Check Script

       hostname=$(hostname)

       Plugin_path="/usr/local/nagios/libexec"

        

       #Swap

       result=$($Plugin_path/check_swap -w 50%-c 20%)

       status=$?

       output=$(echo "$result"|awk-F'|' '{print $1}')

       echo -e"$hostname\tswap\t$status\t$output\r"

 ###########################################################################

 

2.3 配置nagios监控文件

    配置ngios.cfg,修改:

    $vi /usr/local/nagios/etc/nagios.cfg

              check_external_commands = 1

              command_check_interval= -1

 

    配置templates.cfg,修改:

    $vi /usr/local/nagios/etc/objects/templates.cfg

        defineservice{

             name                        passive_service

               use                         generic-service

               active_checks_enabled           0

               passive_checks_enabled                  1

              flap_detection_enabled          1

              register                        0

              is_volatile                     0

              check_period                    24x7

              max_check_attempts              3

              normal_check_interval           5

               retry_check_interval            1

              check_freshness                 0

              contact_groups                admins

               check_command                check_dummy!0

              notifications_enabled                       1

               notification_interval               30

              notification_period              24x7

              notification_options          w,u,c,r

              stalking_options               w,c,u

         }

 

    配置commands.cfg,修改:

    $vi /usr/local/nagios/etc/objects/commands.cfg

        definecommand{

        command_name    check_dummy

        command_line    $USER1$/check_dummy $ARG1$

        }

 

    配置监控文件

    $vi /usr/local/nagios/etc/objects/192.168.1.13-NSCA.cfg

        ###########################################################################

                define host{

                                use                          linux-server          

                                host_name           centos_test01    

                                alias                        192.168.1.13 NSCA            

                                address                 192.168.1.13       

                                hostgroups           linux-servers

                }

 

                define service {

                       use                             passive_service

                      host_name                      centos_test01

                      service_description             partition

                }

 

                define service {

                      use                             passive_service

                      host_name                       centos_test01

                      service_description             load

                }

 

                define service {

                      use                             passive_service

                      host_name                       centos_test01

                      service_description             swap

                }

 

                define service {

                      use                              passive_service

                      host_name                       centos_test01

                      service_description              ssh

              }

###########################################################################.

 

注:

1.  Send_nsca发来的数据格式为:

centos_test01             load        0              OK - load average: 0.17, 0.16, 0.17n

被监控主机名【tab】被监控项目名【tab】状态码【tab】备注信息

这个格式由Send_nsca决定的,不可更改

 

2.  监控文件中的host_name必须与Send_nsca数据中的被监控主机名一致,即于真实的被监控主机名相同。

监控文件中的service_description必须与Send_nsca数据中的被监控项目名一致。

原创粉丝点击