ganglia与nagios组合使用
来源:互联网 发布:淘宝宝贝描述制作流程 编辑:程序博客网 时间:2024/06/07 00:20
1.复制check_ganglia.py到/usr/lib64/nagios/plugins
check_ganglia.py(自行修改的,官方的有BUG)
#!/usr/bin/env pythonimport sysimport getoptimport socketimport xml.parsers.expatclass GParser: def __init__(self, host, metric): self.inhost =0 self.inmetric = 0 self.value = None self.host = host self.metric = metric def parse(self, file): p = xml.parsers.expat.ParserCreate() p.StartElementHandler = parser.start_element p.Parse(file) if self.value == None: raise Exception('Host/value not found') return float(self.value) def start_element(self, name, attrs): if name == "HOST": if attrs["NAME"]==self.host: self.inhost=1 elif self.inhost==1 and name == "METRIC": if attrs["NAME"]==self.metric: self.value=attrs["VAL"]def usage(): print """Usage: check_ganglia \-h|--host= -m|--metric= -w|--warning= \-c|--critical= [-s|--server=] [-p|--port=] """ sys.exit(3)if __name__ == "__main__":############################################################## ganglia_host = '127.0.0.1' ganglia_port = 8649 host = None metric = None warning = None critical = None opposite = 0 try: options, args = getopt.getopt(sys.argv[1:], "h:m:w:c:s:p:", ["host=", "metric=", "warning=", "critical=", "server=", "port="], ) except getopt.GetoptError, err: print "check_gmond:", str(err) usage() sys.exit(3) for o, a in options: if o in ("-h", "--host"): host = a elif o in ("-m", "--metric"): metric = a elif o in ("-w", "--warning"): warning = float(a) elif o in ("-c", "--critical"): critical = float(a) elif o in ("-p", "--port"): ganglia_port = int(a) elif o in ("-s", "--server"): ganglia_host = a if critical == None or warning == None or metric == None or host == None: usage() sys.exit(3) try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((ganglia_host,ganglia_port)) parser = GParser(host, metric) makefile = s.makefile("r") linea = "" for line in makefile.readlines(): line = line.replace("\n"," ") linea += line value = parser.parse(linea) s.close() except Exception, err: print "CHECKGANGLIA UNKNOWN: Error while getting value \"%s\"" % (err) sys.exit(3) if critical > warning: if value >= critical: print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value) sys.exit(2) elif value >= warning: print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value) sys.exit(1) else: print "CHECKGANGLIA OK: %s is %.2f" % (metric, value) sys.exit(0) else: if critical >=value: print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value) sys.exit(2) elif warning >=value: print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value) sys.exit(1) else: print "CHECKGANGLIA OK: %s is %.2f" % (metric, value) sys.exit(0)
2.创建/etc/nagios/objects/ganglia-services.cfg
define host { use linux-server host_name 1.1.1.1 # 名字随便起,监控的是1上的flume,就写1的ip address 1.1.1.1 # 名字随便起,监控的是1上的flume,就写1的ip}define hostgroup { hostgroup_name ganglia-servers alias nagios server members *}define servicegroup { servicegroup_name ganglia-metrics alias Ganglia Metrics}define command { command_name check_ganglia command_line /usr/lib64/nagios/plugins/check_ganglia.py -h mg -m $ARG1$ -w $ARG2$ -c $ARG3$ # -h 这个需要在命令行上执行脚本看用ip还是主机名合适}define service { use generic-service name ganglia-service hostgroup_name ganglia-servers service_groups ganglia-metrics notifications_enabled 0}# 监控flume.CHANNEL.memoryChannel.EventPutSuccessCount,其他复制的改两个地方就行【service_description和check_command】define service{ max_check_attempts 5 ; normal_check_interval 3 ; retry_check_interval 2 ; check_period 24x7 ; notification_interval 60 ; notification_period 24x7 ; notification_options w,u,c,r ; contact_groups admins ; use ganglia-service service_description FLUME发送event数量 # 网页上显示用的 check_command check_ganglia!flume.CHANNEL.memoryChannel.EventPutSuccessCount!10!50 # 直接从ganglia标题上复制就行}
3.修改contacts.cfg
vi /etc/nagios/objects/contacts.cfg
define contact{ contact_name nagiosadmin ; Short name of user use generic-contact ; Inherit default values from generic-contact template (defined above) alias Nagios Admin ; Full name of user service_notification_period workhours ; host_notification_period workhours ; service_notification_options w,u,c,r ; host_notification_options d,u,r ; service_notification_commands notify-service-by-email ; host_notification_commands notify-host-by-email ; email 12345@qq.com; 【复制以后只改接收邮箱地址就行】}define contactgroup{ contactgroup_name admins alias bfire members nagiosadmin}
4.修改nagios.cfg
vi /etc/nagios/nagios.cfg
加入cfg_file=/etc/nagios/objects/ganglia-services.cfg
5.重启nagios和apache
service nagios restartservice httpd restart
6.网页设置(http://ip/ganglia)
7.查看nagios日志
more /var/log/nagios/nagios.log
SERVICE NOTIFICATION代表邮件发送成功。
8.邮件配置
yum remove sendmailservice postfix restart## 发送测试邮件echo "how are you today" | mail -s "test" 12345@qq.com
其他相关文章:
1. ganglia安装和配置
2. nagios安装和配置
0 0
- ganglia与nagios组合使用
- Ganglia与Nagios介绍
- 完美集群监控组合ganglia和nagios
- Hadoop监控之Nagios 与 Ganglia 整合
- Nagios/Ganglia与Splunk成就云计算监控体系
- Ganglia 和 Nagios
- Nagios、Ganglia和Splunk
- Nagios Ganglia网络监控
- nagios+ganglia监控系统
- Ganglia集成Nagios组合监视企业集群并设置邮件报警
- [转]Ganglia 和 Nagios,第 2 部分: 使用 Nagios 监视企业集群
- Ganglia 和 Nagios,第 2 部分: 使用 Nagios 监视企业集群
- Ganglia 和 Nagios,第 2 部分: 使用 Nagios 监视企业集群
- Ganglia 和 Nagios,第 2 部分: 使用 Nagios 监视企业集群
- Ganglia API安装与使用
- 集成 Nagios 报告 Ganglia 指标
- ganglia集成nagios集群监控
- nagios+ganglia监控Hadoop集群
- poj1001这题坑太多,套路很深,宝宝心里苦
- java验证码生成代码
- 哈希表KV形式的二次探测
- TF-IDF提取关键词
- 微信群发接口发送视频获取media_id返回null
- ganglia与nagios组合使用
- angularjs学习记录--tab选项卡效果
- 深入理解Java:内省(Introspector)
- for循环里的值添加到数组后在循环外打印数组为空
- shell 字符串操作
- 当前线程查看
- tomcat配置
- NAT原理与NAT穿越
- 通用java程序linux启动脚本精简版