ganglia与nagios组合使用

来源:互联网 发布:淘宝宝贝描述制作流程 编辑:程序博客网 时间:2024/06/07 00:20

1.复制check_ganglia.py到/usr/lib64/nagios/plugins

check_ganglia.py(自行修改的,官方的有BUG)

#!/usr/bin/env pythonimport sysimport getoptimport socketimport xml.parsers.expatclass GParser:  def __init__(self, host, metric):    self.inhost =0    self.inmetric = 0    self.value = None    self.host = host    self.metric = metric  def parse(self, file):    p = xml.parsers.expat.ParserCreate()    p.StartElementHandler = parser.start_element    p.Parse(file)    if self.value == None:      raise Exception('Host/value not found')    return float(self.value)  def start_element(self, name, attrs):    if name == "HOST":      if attrs["NAME"]==self.host:        self.inhost=1    elif self.inhost==1 and name == "METRIC":      if attrs["NAME"]==self.metric:        self.value=attrs["VAL"]def usage():  print """Usage: check_ganglia \-h|--host= -m|--metric= -w|--warning= \-c|--critical= [-s|--server=] [-p|--port=] """  sys.exit(3)if __name__ == "__main__":##############################################################  ganglia_host = '127.0.0.1'  ganglia_port = 8649  host = None  metric = None  warning = None  critical = None  opposite = 0  try:    options, args = getopt.getopt(sys.argv[1:],      "h:m:w:c:s:p:",      ["host=", "metric=", "warning=", "critical=", "server=", "port="],      )  except getopt.GetoptError, err:    print "check_gmond:", str(err)    usage()    sys.exit(3)  for o, a in options:    if o in ("-h", "--host"):       host = a    elif o in ("-m", "--metric"):       metric = a    elif o in ("-w", "--warning"):       warning = float(a)    elif o in ("-c", "--critical"):       critical = float(a)    elif o in ("-p", "--port"):       ganglia_port = int(a)    elif o in ("-s", "--server"):       ganglia_host = a  if critical == None or warning == None or metric == None or host == None:    usage()    sys.exit(3)  try:    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)    s.connect((ganglia_host,ganglia_port))    parser = GParser(host, metric)    makefile = s.makefile("r")    linea = ""    for line in makefile.readlines():      line = line.replace("\n"," ")      linea += line    value = parser.parse(linea)    s.close()  except Exception, err:    print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err)    sys.exit(3)  if critical > warning:    if value >= critical:      print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)      sys.exit(2)    elif value >= warning:      print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)      sys.exit(1)    else:      print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)      sys.exit(0)  else:      if critical >=value:        print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)        sys.exit(2)      elif warning >=value:        print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)        sys.exit(1)      else:        print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)        sys.exit(0)

2.创建/etc/nagios/objects/ganglia-services.cfg

define host {    use linux-server            host_name 1.1.1.1   # 名字随便起,监控的是1上的flume,就写1的ip    address 1.1.1.1 # 名字随便起,监控的是1上的flume,就写1的ip}define hostgroup {    hostgroup_name ganglia-servers    alias   nagios server    members *}define servicegroup {     servicegroup_name ganglia-metrics     alias Ganglia Metrics}define command {    command_name check_ganglia    command_line /usr/lib64/nagios/plugins/check_ganglia.py -h mg -m $ARG1$ -w $ARG2$ -c $ARG3$ # -h 这个需要在命令行上执行脚本看用ip还是主机名合适}define service {     use generic-service     name ganglia-service     hostgroup_name ganglia-servers    service_groups ganglia-metrics     notifications_enabled 0}# 监控flume.CHANNEL.memoryChannel.EventPutSuccessCount,其他复制的改两个地方就行【service_description和check_command】define service{        max_check_attempts      5       ;        normal_check_interval   3       ;        retry_check_interval    2       ;        check_period            24x7    ;        notification_interval   60      ;        notification_period     24x7    ;        notification_options    w,u,c,r ;        contact_groups          admins  ;        use                             ganglia-service        service_description             FLUME发送event数量 # 网页上显示用的        check_command                   check_ganglia!flume.CHANNEL.memoryChannel.EventPutSuccessCount!10!50 # 直接从ganglia标题上复制就行} 

3.修改contacts.cfg

vi /etc/nagios/objects/contacts.cfg

define contact{    contact_name                    nagiosadmin     ; Short name of user    use             generic-contact     ; Inherit default values from generic-contact template (defined above)    alias                           Nagios Admin        ; Full name of user    service_notification_period     workhours                    ;    host_notification_period        workhours                    ;    service_notification_options    w,u,c,r                 ;    host_notification_options       d,u,r                   ;    service_notification_commands   notify-service-by-email        ;    host_notification_commands      notify-host-by-email     ;    email                          12345@qq.com; 【复制以后只改接收邮箱地址就行】}define contactgroup{    contactgroup_name       admins    alias                   bfire    members                 nagiosadmin}

4.修改nagios.cfg

vi /etc/nagios/nagios.cfg

加入cfg_file=/etc/nagios/objects/ganglia-services.cfg

5.重启nagios和apache

service nagios restartservice httpd restart

6.网页设置(http://ip/ganglia)

这里写图片描述

这里写图片描述

7.查看nagios日志

more /var/log/nagios/nagios.log
这里写图片描述
SERVICE NOTIFICATION代表邮件发送成功。
这里写图片描述

8.邮件配置

yum remove sendmailservice postfix restart## 发送测试邮件echo "how are you today" | mail -s "test" 12345@qq.com

其他相关文章:

1. ganglia安装和配置

2. nagios安装和配置

0 0
原创粉丝点击