nagios 笔记

来源:互联网 发布:安全员c证网络查询 编辑:程序博客网 时间:2024/06/05 19:19

nagios安装

apt-get install nagios3#Nagios web administration password #123123

默认安装apache2 监听80端口
访问 http://10.10.100.54/nagios3/
默认管理员是nagiosadmin 密码自己设置的
密码文件/etc/nagios3/htpasswd.users

修改管理员密码

htpasswd -c /etc/nagios3/htpasswd.users adminNew password: 123123Re-type new password: 123123

设置管理员权限

vim /etc/nagios3/cgi.cfguse_authentication=1authorized_for_system_information=adminauthorized_for_configuration_information=adminauthorized_for_system_commands=adminauthorized_for_all_services=adminauthorized_for_all_hosts=adminauthorized_for_all_service_commands=adminauthorized_for_all_host_commands=admin

手动执行任务

vim /etc/nagios3/nagios.cfg check_external_commands=1 #允许手动这页面上执行任务 **System>Scheduling Queue**

错误

#有可能报错Error: Could not stat() command file '/var/lib/nagios3/rw/nagios.cmd'!sudo /etc/init.d/nagios3 stopsudo dpkg-statoverride --update --add nagios www-data 2710 /var/lib/nagios3/rwsudo dpkg-statoverride --update --add nagios nagios 751 /var/lib/nagios3sudo /etc/init.d/nagios3 start

重启服务就可以看到页面的Current Status>Hosts已经默认监控本机

添加需要监控的主机

vim /etc/nagios3/conf.d/hosts.cfg #默认没这文件define host {    use generic-host    host_name lvs  #主机名称      alias lvs #主机别名不设置默认为host_name    address 10.10.100.100 #需要监控主机的IP    check_interval 1#检查的间隔 1分钟}

定义主机组

vim /etc/nagios3/conf.d/hostgroups_nagios2.cfg# Some generic hostgroup definitions# A simple wildcard hostgroupdefine hostgroup {        hostgroup_name  all                alias           All Servers                members         *  #所有的主机的组        }# A list of your Debian GNU/Linux serversdefine hostgroup {        hostgroup_name  debian-servers                alias           Debian GNU/Linux Servers                members         localhost        }# A list of your web serversdefine hostgroup {        hostgroup_name  http-servers                alias           HTTP servers                members         localhost,lvs #指定组的主机,按,分割,可以添加多个,        }# A list of your ssh-accessible serversdefine hostgroup {        hostgroup_name  ssh-servers                alias           SSH servers                members         localhost        }#添加ftp监控define hostgroup {        hostgroup_name  ftp-servers        alias           FTP Servers        members         lvs}

定义服务项

vim /etc/nagios3/conf.d/services_nagios2.cfg# check that web services are runningdefine service {        hostgroup_name                  http-servers        service_description             HTTP        check_command                   check_http        use                             generic-service        notification_interval           0 ; set > 0 if you want to be renotified}# check that ssh services are runningdefine service {        hostgroup_name                  ssh-servers        service_description             SSH        check_command                   check_ssh        use                             generic-service        notification_interval           0 ; set > 0 if you want to be renotified}# check that ssh services are runningdefine service {        hostgroup_name                  ftp-servers  #这必须在hostgroups_nagios2.cfg有这个主机组                service_description             FTP        check_command                   check_ftp ;检查FTP,插件默认路径为/usr/lib/nagios/plugins/        use                             generic-service        notification_interval           1 ; 通知间隔 0为不通知}}

NRPE

我要监控远程主机的 CPU、硬盘空间、内存等等
Nagios 提供了一个外挂插件,叫 NRPE
它可以让 nagios server 在固定时间去抓 nagios client 被监控的项目回来判断是否ok。 和zabbix agent功能类似

远端

#这需要监控的机器上安装apt-get install nagios-nrpe-servervim /etc/nagios/nrpe.cfg......allowed_hosts=10.10.100.54 #Nagios服务器端的地址......#添加监控项#当前登录的用户大于1警告(warning),大于2危急(critical)command[check_users]=/usr/lib/nagios/plugins/check_users -w 1 -c 2#同load average#当1分钟多于15个进程等待,5分钟多于10个,15分钟多于5个则为警告状态#当1分钟多于30个进程等待,5分钟多于25个,15分钟多于20个则为危急状态command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20#如果空闲空间小于40%就是警告阀值#如果空闲空间小于10%就是危急阀值#-p分区command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 40% -c 10% -p /dev/sda1 #检查进程command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Zcommand[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200......
#重启服务service nagios-nrpe-server restart

监控端

apt-get install nagios-nrpe-plugin#检查通讯是否正常/usr/lib/nagios/plugins/check_nrpe -H 10.10.100.100NRPE v2.15
vim /etc/nagios-plugins/config/check_nrpe.cfg.......#添加define service {        use                             generic-service        hostgroup_name                  all        service_description             NRPE check_hda1 #和监控端的 command[check_hda1]对应        check_command                   check_nrpe_1arg!check_hda1        notification_interval           0}define service {        use                             generic-service        hostgroup_name                  all        service_description             NRPE check_total_procs        check_command                   check_nrpe_1arg!check_total_procs        notification_interval           0}define service {        use                             generic-service        hostgroup_name                  all        service_description             NRPE check_users        check_command                   check_nrpe_1arg!check_users        notification_interval           0}

重启服务后就可以看到效果

通知

定义联系人

vim /etc/nagios3/conf.d/contacts_nagios2.cfgdefine contact{        contact_name                    hu  #联系人称呼        service_notification_period     24x7 #当服务出现异常时,发送通知的时间段,这个时间段"7x24"在timeperiods_nagios2.cfg文件中定义          host_notification_period        24x7 #当主机出现异常时,发送通知的时间段,这个时间段"7x24"在timeperiods_nagios2.cfg文件中定义          service_notification_options    w,u,c,r  #这个定义的是"通知可以被发出的情况"。w(warn)表示警告状态,u(unknown)表示不明状态,c(criticle)表示紧急状态,r(recover)表示恢复状态。也就是在服务出现警告状态、未知状态、紧急状态和重新恢复状态时都发送通知给使用者。          host_notification_options       d,r #定义主机在什么状态下需要发送通知给使用者,d(down)表示宕机状态,r(recovery)表示重新恢复状态。          service_notification_commands   notify-service-by-email  #服务故障时,发送通知的方式,可以是邮件和短信,这里发送的方式是邮件,在commands.cfg文件中定义          host_notification_commands      notify-host-by-email #主机故障时,发送通知的方式,可以是邮件和短信,这里发送的方式是邮件,在commands.cfg文件中定义        email                           xxx@xxx.com #发送邮件的邮箱}

定义联系人组

define contactgroup{        contactgroup_name       hus        members                 hu  #多个人用,分割        }

定义服务器异常的联系人

vim /etc/nagios-plugins/config/check_nrpe.cfgdefine service {        use                             generic-service        hostgroup_name                  all        service_description             NRPE check_hda1         check_command                   check_nrpe_1arg!check_hda1        contact_groups                  hus #如果有异常通知hus组的人}

添加发送邮件

vim /etc/nagios3/commands.cfg#测试使用sendEmail发送QQ邮件define command{        command_name    notify-host-by-email #contact里面定义的host_notification_commands        command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主机: $HOSTALIAS$ is $HOSTSTATE$ **" -xu 9656951@qq.com -xp xxxxxx        }define command{        command_name    notify-service-by-email #contact里面定义的service_notification_commands        command_line    /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$"  |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主机: $HOSTALIAS$ 服务: $SERVICEDESC$ is $SERVICESTATE$ **" -xu 9656951@qq.com -xp xxx        }
1 0
原创粉丝点击