Apache Eagle上线问题解决

来源:互联网 发布:720云全景源码 编辑:程序博客网 时间:2024/05/18 11:48

之前研究了一下apache eagle,并在测试环境进行了部署测试,这次整理一下正式上线出现的问题和解决办法。

1、新增短信告警。
原生的eagle的告警方式包括邮件、kafka以及系统中记录,我自己在源码中新加了发短信的功能,重新编译后使用。新增和修改的代码如下:
(1)修改了NotificationConstnts.java文件,目录在源码包下的
apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification/base中。修改完的代码如下:

 public class NotificationConstants {    public static final String NOTIFICATION_TYPE = "notificationType";    public static final String EMAIL_NOTIFICATION = "email";    public static final String PHONEMESSAGE_NOTIFICATION = "phonemessage";    public static final String KAFKA_STORE = "kafka";    public static final String EAGLE_STORE = "eagleStore";    // email specific constants    public static final String SUBJECT = "subject";    public static final String SENDER = "sender";    public static final String RECIPIENTS = "recipients";    public static final String TPL_FILE_NAME = "tplFileName";    // kafka specific constants    public static final String TOPIC = "topic";    public static final String BROKER_LIST = "kafka_broker";    //phonemessage specific constants    public static final String RECEIVE = "receive";}

(2)新增类AlertPhoneMessagePlugin.java。放在apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification/plugin下。完整代码如下:

import java.util.List;import java.util.Map;import java.util.Vector;import java.util.concurrent.ConcurrentHashMap;import org.apache.commons.lang3.builder.HashCodeBuilder;import org.apache.eagle.alert.entity.AlertAPIEntity;import org.apache.eagle.alert.entity.AlertDefinitionAPIEntity;import org.apache.eagle.notification.base.NotificationConstants;import org.apache.eagle.notification.base.NotificationStatus;import org.apache.eagle.notification.phoneMessage.AlertPhoneMessageGenerator;import org.apache.eagle.notification.phoneMessage.AlertPhoneMessageGeneratorBuilder;import org.apache.eagle.notification.utils.NotificationPluginUtils;import org.apache.eagle.policy.common.Constants;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import com.typesafe.config.Config;public class AlertPhoneMessagePlugin implements NotificationPlugin{    private static final Logger LOG = LoggerFactory.getLogger(AlertPhoneMessagePlugin.class);    private List<NotificationStatus> statusList = new Vector<>();    private Map<String, List<AlertPhoneMessageGenerator>> phoneMessageGenerators = new ConcurrentHashMap<>();    private Config config;    @Override    public void init(Config config, List<AlertDefinitionAPIEntity> initAlertDefs) throws Exception {        LOG.info(" Creating PhoneMessage Generator... ");        this.config = config;        for( AlertDefinitionAPIEntity entity : initAlertDefs ) {            List<Map<String,String>>  configMaps = NotificationPluginUtils.deserializeNotificationConfig(entity.getNotificationDef());            this.update(entity.getTags().get(Constants.POLICY_ID), configMaps, false);        }    }    @Override    public void update(String policyId, List<Map<String, String>> notificationConfCollection, boolean isPolicyDelete)            throws Exception {        if( isPolicyDelete ){            LOG.info(" Policy been deleted.. Removing reference from Notification Plugin ");            this.phoneMessageGenerators.remove(policyId);            return;        }        Vector<AlertPhoneMessageGenerator> generators = new Vector<>();        for(Map<String, String> notificationConf: notificationConfCollection) {            String notificationType = notificationConf.get(NotificationConstants.NOTIFICATION_TYPE);            if(notificationType == null || notificationType.equalsIgnoreCase(NotificationConstants.PHONEMESSAGE_NOTIFICATION)) {                AlertPhoneMessageGenerator generator = createPhoneMessageGenerator(notificationConf);                generators.add(generator);            }        }        if(generators.size() != 0) {            this.phoneMessageGenerators.put(policyId, generators);            LOG.info("created/updated phonemessage generators for policy " + policyId);        }    }    @Override    public void onAlert(AlertAPIEntity alertEntity) throws Exception {        String policyId = alertEntity.getTags().get(Constants.POLICY_ID);        List<AlertPhoneMessageGenerator> generators = this.phoneMessageGenerators.get(policyId);        for(AlertPhoneMessageGenerator generator: generators) {            boolean isSuccess = generator.sendAlertPhoneMessage(alertEntity);            NotificationStatus status = new NotificationStatus();            if( !isSuccess ) {                status.errorMessage = "Failed to send email";                status.successful = false;            }else {                status.errorMessage = "";                status.successful = true;            }            this.statusList.add(status);        }    }    @Override    public List<NotificationStatus> getStatusList() {        return this.statusList;    }    private AlertPhoneMessageGenerator createPhoneMessageGenerator(Map<String, String> notificationConfig) {        AlertPhoneMessageGenerator gen = AlertPhoneMessageGeneratorBuilder.newBuilder().                withEagleProps(this.config.getObject("eagleProps")).                withReceove(notificationConfig.get(NotificationConstants.RECEIVE)).                build();        return gen;    }    @Override    public int hashCode(){        return new HashCodeBuilder().append(getClass().getCanonicalName()).toHashCode();    }    @Override    public boolean equals(Object o){        if(o == this)            return true;        if(!(o instanceof AlertPhoneMessagePlugin))            return false;        return true;    }}

(3)新建文件夹phoneMessage。放在apache-eagle-0.4.0-incubating-src/eagle-core/eagle-alert/eagle-alert-notification-plugin/src/main/java/org/apache/eagle/notification目录下。进入新建文件夹下新增两个类,分别是AlertPhoneMessageGeneratorBuilder.java和AlertPhoneMessageGenerator.java。完整代码如下:
AlertPhoneMessageGeneratorBuilder.java:

import com.typesafe.config.ConfigObject;public class AlertPhoneMessageGeneratorBuilder {    private AlertPhoneMessageGenerator generator;    private AlertPhoneMessageGeneratorBuilder(){        generator = new AlertPhoneMessageGenerator();    }    public static AlertPhoneMessageGeneratorBuilder newBuilder(){        return new AlertPhoneMessageGeneratorBuilder();    }    public AlertPhoneMessageGeneratorBuilder withReceove(String receive){        generator.setReceive(receive);        return this;    }    public AlertPhoneMessageGeneratorBuilder withEagleProps(ConfigObject eagleProps) {        generator.setEagleProps(eagleProps);        return this;    }    public AlertPhoneMessageGenerator build(){        return this.generator;    }}

AlertPhoneMessageGenerator.java:

import org.apache.eagle.alert.entity.AlertAPIEntity;import org.apache.eagle.notification.plugin.SendAlert;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import com.typesafe.config.ConfigObject;public class AlertPhoneMessageGenerator {    private ConfigObject eagleProps;    private String receive;    private final static Logger LOG = LoggerFactory.getLogger(AlertPhoneMessageGenerator.class);    public ConfigObject getEagleProps() {        return eagleProps;    }    public void setEagleProps(ConfigObject eagleProps) {        this.eagleProps = eagleProps;    }    public String getReceive() {        return receive;    }    public void setReceive(String receive) {        this.receive = receive;    }    public boolean sendAlertPhoneMessage(AlertAPIEntity alertEntity) {        return sendAlertPhoneMessage(alertEntity, receive);     }    private boolean sendAlertPhoneMessage(AlertAPIEntity alertEntity, String receive) {         LOG.info("going to send message.....");         SendAlert send =  new SendAlert();                 String str = alertEntity.getAlertContext().substring(alertEntity.getAlertContext().indexOf("\"alertMessage\""), alertEntity.getAlertContext().indexOf("\"alertEvent\"")-1);         return send.send(receive, "monitor", str);    }}

在AlertPhoneMessageGenerator.java类的sendAlerPhoneMessage方法中调用了SendAlert类的send方法发送短信,其中的str变量对alertEntity.getAlertContext()得到的字符串做了处理,作为发送短信的内容。由于发短信方式各公司不同,所以就不贴出SendAlert类的代码了。

(4)在eagle所使用的数据库中找到alertnotifications_alertnotifications这个表,新增一条记录。
uuid:WSdQ7H_____62aP_YA4exQXAReU
notificationType:phoneMessage
enable:1
description:send alert to phone
className:org.apache.eagle.notification.plugin.AlertPhoneMessagePlugin
fields:[{“name”:”receive”}] (与NotificationConstants类中新增的RECEIVE字段值对应)
这样添加完后,在eagle页面中的增加告警模块就增加了发送短信的设置,可在页面中填写短信发送的号码。
这里写图片描述

2、hive任务监控告警storm任务出错。
现象就是时不时在storm页面中,任务的Spouts报错,查看storm日志,报错是connection refused

2017-04-21T10:09:31.538+0800 o.a.e.j.c.RMResourceFetcher [INFO] Going to fetch job detail information for application_1492370390007_208973 , url: http://rm1:50030/proxy/application_1492370390007_208973/ws/v1/mapreduce/jobs?anonymous=true2017-04-21T10:09:31.541+0800 o.a.e.j.c.RunningJobCrawlerImpl [ERROR] Got an exception when fetching resource, jobId: job_1492370390007_208973java.net.ConnectException: Connection refused        at java.net.PlainSocketImpl.socketConnect(Native Method) ~[na:1.7.0_80]        at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:339) ~[na:1.7.0_80]        at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:200) ~[na:1.7.0_80]        at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:182) ~[na:1.7.0_80]        at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392) ~[na:1.7.0_80]        at java.net.Socket.connect(Socket.java:579) ~[na:1.7.0_80]        at sun.net.NetworkClient.doConnect(NetworkClient.java:175) ~[na:1.7.0_80]        at sun.net.www.http.HttpClient.openServer(HttpClient.java:432) ~[na:1.7.0_80]        at sun.net.www.http.HttpClient.openServer(HttpClient.java:527) ~[na:1.7.0_80]        at sun.net.www.http.HttpClient.<init>(HttpClient.java:211) ~[na:1.7.0_80]        at sun.net.www.http.HttpClient.New(HttpClient.java:308) ~[na:1.7.0_80]        at sun.net.www.http.HttpClient.New(HttpClient.java:326) ~[na:1.7.0_80]        at sun.net.www.protocol.http.HttpURLConnection.getNewHttpClient(HttpURLConnection.java:997) ~[na:1.7.0_80]        at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:933) ~[na:1.7.0_80]        at sun.net.www.protocol.http.HttpURLConnection.connect(HttpURLConnection.java:851) ~[na:1.7.0_80]        at sun.net.www.protocol.http.HttpURLConnection.followRedirect(HttpURLConnection.java:2411) ~[na:1.7.0_80]        at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1558) ~[na:1.7.0_80]        at org.apache.eagle.jobrunning.util.InputStreamUtils.openGZIPInputStream(InputStreamUtils.java:39) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.util.InputStreamUtils.getInputStream(InputStreamUtils.java:51) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.util.InputStreamUtils.getInputStream(InputStreamUtils.java:59) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.crawler.RMResourceFetcher.doFetchRunningJobInfo(RMResourceFetcher.java:140) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.crawler.RMResourceFetcher.getResource(RMResourceFetcher.java:257) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.crawler.RunningJobCrawlerImpl.crawl(RunningJobCrawlerImpl.java:328) ~[stormjar.jar:na]        at org.apache.eagle.jobrunning.storm.JobRunningSpout.nextTuple(JobRunningSpout.java:124) [stormjar.jar:na]        at org.apache.eagle.datastream.storm.SpoutProxy.nextTuple(SpoutProxy.scala:42) [stormjar.jar:na]        at backtype.storm.daemon.executor$fn__6579$fn__6594$fn__6623.invoke(executor.clj:565) [storm-core-0.9.5.jar:0.9.5]        at backtype.storm.util$async_loop$fn__459.invoke(util.clj:463) [storm-core-0.9.5.jar:0.9.5]        at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]        at java.lang.Thread.run(Thread.java:745) [na:1.7.0_80]

原因是,有些任务运行时间很短暂,当eagle去请求 http://rm1:50030/proxy/application_1492370390007_208973/ws/v1/mapreduce/jobs?anonymous=true地址时,如果这个任务运行完了,信息会到jobhistory上,该请求会直接转发到jobhistoty地址,而且是走的hostname不是ip,该计算节点没有配置jobhistory的hostname地址,所以无法请求到信息。如果这个任务没运行完,则返回的是xml文件不会重定向到jobhistory。所以出现了时不时出错的现象。

0 0