openfalcon - agent - fastdfs

来源:互联网 发布:淘宝买太多了会怎么样 编辑:程序博客网 时间:2024/05/29 04:52

过去我们监控fastdfs是使用的sh脚本,报警策略是看uptime是不是一直在直线上升,否则就会报警。

随着openfalcon被大家越来越喜欢,所以各种插件应运而生,但是fastdfs的监控目前市面上没有找到。所以就撸了一个。

  • githug仓库地址 https://github.com/zzlyzq/openfalcon-agent-fastdfs/
#!/usr/bin/python#--encoding:utf8import osimport sysimport reimport pprintimport timeimport jsonimport requestsimport timeimport astimport yaml# 定义falcon上报数据的时候用到的变量falconTs = int(time.time())falconEndpoint = "cluster-fastdfs"falconTimeStamp = 60falconPayload=[]falconAgentUrl="http://127.0.0.1:1988/v1/push"# 对于monitor监控到的数据,用以下的变量去存放采集到的数据serverinfo={}currentGroupNumber=""currentStorageNumber=""# 执行命令cmdLine = "/home/machtalk/opt/fastdfs/usr/bin/fdfs_monitor /home/machtalk/opt/fastdfs/etc/fdfs/client.conf"cmdResult = os.popen(cmdLine).readlines()# 定义函数,由于采集到的数据有好几种格式# 100, 数值# 5.05, 版本号# 2016-10-10 10:10:10, 时间格式# 10.10.10.10, ip地址格式# ACTIVE, 存货代表1,其他使用0# 该函数会把这些转换为float类型或者整形。def falconValue(value):    result = re.findall("(\d+\-\d+\-\d+ \d+\:\d+\:\d+)",value)    if len(result) != 0:        timeString = result[0]        timeTP = time.strptime(timeString,"%Y-%m-%d %H:%M:%S")        timeStamp = time.mktime(timeTP)        return int(timeStamp)    result = re.findall("(\d+\.\d+\.\d+\.\d+$)",value)    if len(result) == 1:        result=result[0].split(".")        #print result        #print int(float(result[0])) * (2**24) + int(float(result[1])) * (2**16) + int(float(result[2])) * (2**8) + int(float(result[3]))        return int(float(result[0])) * (2**24) + int(float(result[1])) * (2**16) + int(float(result[2])) * (2**8) + int(float(result[3]))    result = re.findall("(\d+) MB$",value)    if len(result) == 1:            return float(result[0]) * 1024 * 1024    result = re.findall("(ACTIVE)",value)    if len(result) == 1:        return 1    result = re.findall("(IP_CHANGED)",value)    if len(result) == 1:        return -1    result = re.findall("(\d+\.\d+$)",value)    if len(result) == 1:        return value    result = re.findall("(\d+$)",value)    if len(result) == 1:        return value    else:        print "异常"        return -1# 默认采用GAUGE的形式,如果有COUNTER类型,尤其是时间类型,那么就加入下面的列表def falconType(value):    counterTypeList = []    counterTypeList.append("up time")    counterTypeList.append("join time")    counterTypeList.append("last_heart_beat_time")    counterTypeList.extend(["success_append_count","success_create_link_count","success_delete_count","success_delete_link_count","success_download_count","success_file_open_count","success_file_read_count","success_file_write_count","success_get_meta_count","success_modify_count","success_set_meta_count","success_truncate_count","success_upload_count"])    if value in counterTypeList:        return "COUNTER"    return "GAUGE"# 根据之前的cmdline执行结果,进行处理for line in cmdResult:    # check server_count 和 server_index    result1=re.findall("server_count=(\d+), server_index=(\w+)",line)    if len(result1)==1:        serverinfo['server_count'] = result1[0][0]        serverinfo['server_index'] = result1[0][1]        payloadString="""{ "endpoint": "%s", "metric": "%s", "timestamp": %s, "step": %s, "value": %s, "counterType": "%s", "tags": "%s"} """%(falconEndpoint, "server_count", falconTs, falconTimeStamp, falconValue(serverinfo['server_count']),"GAUGE","")        falconPayload.append(yaml.load(payloadString))        payloadString="""{ "endpoint": "%s", "metric": "%s", "timestamp": %s, "step": %s, "value": %s, "counterType": "%s", "tags": "%s"} """%(falconEndpoint, "server_index", falconTs, falconTimeStamp, falconValue(serverinfo['server_index']),"GAUGE","")        falconPayload.append(yaml.load(payloadString))        continue    # check group count    result2=re.findall("group count: (\d+)",line)     if len(result2) == 1:    serverinfo['group_count'] = result2[0]        payloadString="""{ "endpoint": "%s", "metric": "%s", "timestamp": %s, "step": %s, "value": %s, "counterType": "%s", "tags": "%s"} """%(falconEndpoint, "group_count", falconTs, falconTimeStamp, falconValue(serverinfo['group_count']),"GAUGE","")        falconPayload.append(yaml.load(payloadString))        #print serverinfo        continue    # 如果遇到Group 1    result3=re.findall("Group (\d+)",line)    if len(result3) == 1:        currentGroupNumber="%s"%result3[0]        serverinfo[currentGroupNumber] = {}        #print "找到currentGroupNumber%s"%(currentGroupNumber)        continue    # 开始解析Group下面的    groupInfoList = ["group name", "disk total space", "disk free space", "trunk free space", "storage server count", "active server count", "storage server port", "storage HTTP port", "store path count", "subdir count per path", "current write server index", "current trunk file id"]    for groupInfo in groupInfoList:        result = re.findall("%s = (.+)"%(groupInfo),line)        if len(result) ==1:            serverinfo[currentGroupNumber][groupInfo] = result[0]            payloadString="""{ "endpoint": "%s", "metric": "%s", "timestamp": %s, "step": %s, "value": %s, "counterType": "%s", "tags": "%s"} """%(falconEndpoint, groupInfo, falconTs, falconTimeStamp, falconValue(serverinfo[currentGroupNumber][groupInfo]),falconType(groupInfo),"group="+currentGroupNumber)            falconPayload.append(yaml.load(payloadString))            break    # Storage 1:    result16 = re.findall("Storage (\d+):",line)    if len(result16) == 1:        print result16        currentStorageNumber = result16[0]        serverinfo[currentGroupNumber][currentStorageNumber]={}        #print "遇到了新的Storage:%s"%(currentStorageNumber)        continue    # 使用列表去处理    storage_item_list=["id","ip_addr","http domain","version","join time","up time","total storage","free storage","upload priority","store_path_count","subdir_count_per_path","storage_port","storage_http_port","current_write_path","source","if_trunk_server","connection.alloc_count","connection.current_count","connection.max_count","total_upload_count","success_upload_count","total_append_count","success_append_count","total_modify_count","success_modify_count","total_truncate_count","success_truncate_count","total_set_meta_count","success_set_meta_count","total_delete_count","success_delete_count","total_download_count","success_download_count","total_get_meta_count","success_get_meta_count","total_create_link_count","success_create_link_count","total_delete_link_count","success_delete_link_count","total_upload_bytes","success_upload_bytes","total_append_bytes","success_append_bytes","total_modify_bytes","success_modify_bytes","stotal_download_bytes","success_download_bytes","total_sync_in_bytes","success_sync_in_bytes","total_sync_out_bytes","success_sync_out_bytes","total_file_open_count","success_file_open_count","total_file_read_count","success_file_read_count","total_file_write_count","success_file_write_count","last_heart_beat_time","last_source_update","last_sync_update","last_synced_timestamp", "connection.alloc_count","connection.current_count","connection.max_count","total_upload_count","success_upload_count","total_append_count","success_append_count","total_modify_count","success_modify_count","total_truncate_count","success_truncate_count","total_set_meta_count","success_set_meta_count","total_delete_count","success_delete_count","total_download_count","success_download_count","total_get_meta_count","success_get_meta_count","total_create_link_count","success_create_link_count","total_delete_link_count","success_delete_link_count","total_upload_bytes","success_upload_bytes","total_append_bytes","success_append_bytes","total_modify_bytes","success_modify_bytes","stotal_download_bytes","success_download_bytes","total_sync_in_bytes","success_sync_in_bytes","total_sync_out_bytes","success_sync_out_bytes","total_file_open_count","success_file_open_count","total_file_read_count","success_file_read_count","total_file_write_count","success_file_write_count","last_heart_beat_time","last_source_update","last_sync_update","last_synced_timestamp"]    for storage_item in storage_item_list:        print "开始寻找"+storage_item        result = re.findall("^\s+%s = ([\S ]+)"%storage_item,line)        if len(result) == 1:        #print "发现匹配",            #print line            #print result            serverinfo[currentGroupNumber][currentStorageNumber][storage_item] = result[0]            payloadString="""{ "endpoint": "%s", "metric": "%s", "timestamp": %s, "step": %s, "value": %s, "counterType": "%s", "tags": "%s"} """%(falconEndpoint, storage_item, falconTs, falconTimeStamp, falconValue(serverinfo[currentGroupNumber][currentStorageNumber][storage_item]),falconType(storage_item),"group="+currentGroupNumber+",storage="+currentStorageNumber)            falconPayload.append(yaml.load(payloadString))            break# 以下主要用于打印测试,pprint这个不错,可以格式化列表或者字典#print serverinfo#print len(serverinfo)#pp = pprint.PrettyPrinter(indent = 4)#pp.pprint(serverinfo)#pp.pprint(falconPayload)#print type(falconPayload)#print len(falconPayload)#print type(falconPayload[0])#print json.dumps(falconPayload)r = requests.post(falconAgentUrl, data=json.dumps(falconPayload))print r.text

 结构

  • 采用fdfs_monitor得到当前fastdfs集群的结果
  • 读取monitor返回的结果
    • 采集整理变量(server_count, server_index, group count等)
    • 采集group的整体信息(group name, disk total space, disk free space等)
    • 采集group下面的storage的信息(id, ip_addr, join time, up time, total storage, free storage等)
  • 采用python上报方式,将指标进行dic和list化

监控指标

  • active server count/group=1 (某个group里面,活跃的节点)

这里写图片描述

  • disk free space/group=1 (描述某一个组里面的空闲空间大小)

这里写图片描述

  • group_count (fastdfs集群group的总数量)
    这里写图片描述

  • last_heart_beat_time/group=1,storage=1 (某个storage心跳状态)
    这里写图片描述

  • 某个storage操作计数器
    这里写图片描述

  • 某个storage是否ACTIVE

这里写图片描述

0 0
原创粉丝点击