ES节点简单监控

来源:互联网 发布:电脑淘宝保证金怎么交 编辑:程序博客网 时间:2024/05/07 05:35

本文通过ES提供的Restful Api来简单监控各节点的内存及负载。

#!/usr/bin/python#coding:utf8import sys, re, time, datetime, socket, smtplibimport os, urllib, urllib2, json, stringDOMAIN_NAME = "es.op.xxx.com"NODES_URL = "http://"+DOMAIN_NAME+":9200/_cluster/state/nodes,master_node"NODE_STAT_URL = "http://"+DOMAIN_NAME+":9200/_nodes/%s/stats?human=true"HEAP_THRESHOLD = 90LOAD_THRESHOLD = 30def main():nodes_name = get_nodes()for node_id in nodes_name :    node_info = get_node_info(node_id)    indices, jvm, load = extract_node_info(node_info)    if jvm['heap_used_percent'] < HEAP_THRESHOLD and load[2] < LOAD_THRESHOLD:        continue;    print time.ctime(), nodes_name[node_id], jvm, load, indices    content =  "机器名: ".decode('utf-8') + nodes_name[node_id] + "\n";    content += "当前索引: ".decode('utf-8') + str(indices['current_index']) + "\n";    content += "当前查询: ".decode('utf-8') + str(indices['current_query']) + "\n";    content += "当前合并: ".decode('utf-8') + str(indices['current_merge']) + "\n";    content += "堆使用: ".decode('utf-8') + str(jvm['heap_used']) + " (" + str(jvm['heap_used_percent']) + "%)" + "\n";    content += "Old GC: ".decode('utf-8') + str(jvm['old_gc_count']) + "\n";    content += "Young GC: ".decode('utf-8') + str(jvm['young_gc_count']) + "\n";    content += "OS Load: ".decode('utf-8') + ','.join(str(x) for x in load) + "\n";    mailSender(content)######################################### 获取ES集群所有节点:host_name node_id########################################def get_nodes():    page = urllib2.urlopen(NODES_URL)    res = json.loads(page.read())    nodes = res.get("nodes")    nodes_name = {}    for id, node_info in nodes.iteritems():        nodes_name[id] = node_info.get('name').decode('utf-8')    return nodes_name########################################## 获取ES集群节点状态#########################################def get_node_info(node_id):    node_info_url = NODE_STAT_URL % node_id    page = urllib2.urlopen(node_info_url)    res = json.loads(page.read())    node_info = res.get("nodes", {}).get(node_id)    return node_info########################################### 提取单个节点的状态信息##########################################def extract_node_info(node_info):    indices = {}    indices['current_index'] = node_info.get('indices',{}).get('indexing',{}).get('index_current',0);     indices['current_query'] = node_info.get('indices',{}).get('search',{}).get('query_current',0);     indices['current_merge'] = node_info.get('indices',{}).get('merges',{}).get('current',0);     jvm = {}    org_jvm_info = node_info.get('jvm',{})    jvm['heap_used'] = org_jvm_info.get('mem',{}).get('heap_used',0);      jvm['heap_used_percent'] = org_jvm_info.get('mem',{}).get('heap_used_percent',0);     jvm['non_heap_used'] = org_jvm_info.get('mem',{}).get('non_heap_used',0);     jvm['old_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('old',{}).get('used',0);    jvm['young_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('young',{}).get('used',0);      jvm['survivor_used'] = org_jvm_info.get('mem',{}).get('pools',{}).get('survivor',{}).get('used',0);      jvm['young_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('young',{}).get('collection_count',0);     jvm['old_gc_count'] = org_jvm_info.get('gc',{}).get('collectors',{}).get('old',{}).get('collection_count',0);     load = node_info.get('os',{}).get('load_average',[]);    return indices, jvm, loadif __name__ == "__main__":    main()

对于CPU及Load太高的机器,发送报警邮件:

from email.mime.text import MIMETextfrom email.header import Headersender = 'monitor@xxx.com'receiver = ['lz@xxx.com','xxx@xxx.com','xxx@xxx.com']subject = 'ES Cluster Monitor: Heap(' + str(HEAP_THRESHOLD) + '%)' + ' Load(' + str(LOAD_THRESHOLD) + ')'smtpServer = 'smtp.exmail.qq.com'userName = 'monitor@xxx.com'password = 'xxx'charset = 'gb2312'def mailSender(content):    msg = MIMEText(content, 'plain', charset)    msg['From'] = sender    msg['To'] = ','.join(receiver)    msg['Subject'] = Header(subject, charset)    smtp = smtplib.SMTP()    smtp.connect(smtpServer)    smtp.login(userName, password)    smtp.sendmail(sender, receiver, msg.as_string())    smtp.quit()

报警邮件如下:

机器名: idc02-xxx-es-06当前索引: 2360563当前查询: 0当前合并: 2堆使用: 29.1gb (92%)Old GC: 5523Young GC: 1360285OS Load: 2.95,2.82,2.21
0 0
原创粉丝点击