Storm Trident+DRPC实例

来源:互联网 发布:易娱网络待遇好吗 编辑:程序博客网 时间:2024/06/09 22:19

需求

Trident实现用户使用浏览器统计
通过DRPC查看结果

开发过程

实现IBatchSpout批量读取日志文件

public class MyBatchSpout implements IBatchSpout {    Fields fields;    HashMap<Long, List<List<Object>>> batches = new HashMap();    public MyBatchSpout(Fields fields) {        this.fields = fields;    }    @Override    public void open(Map map, TopologyContext topologyContext) {    }    @Override    public void emitBatch(long batchId, TridentCollector tridentCollector) {        List<List<Object>> batch = (List) this.batches.get(Long.valueOf(batchId));        if (null == batch) {            batch = new ArrayList<List<Object>>();            //读取日志文件列表            String dataDir = "logs/";            File file = new File(dataDir);            Collection<File> listFiles = FileUtils.listFiles(file, new String[]{"log"}, true);            for (File f : listFiles) {                List<String> readLines = null;                try {                    readLines = FileUtils.readLines(f);                    for (String line : readLines) {                        batch.add(new Values(line));                    }                } catch (IOException e) {                    e.printStackTrace();                }                // 文件已经处理完成,在末尾添加done和时间戳,避免重复读取                try {                    File srcFile = f.getAbsoluteFile();                    File destFile = new File(srcFile + ".done." + System.currentTimeMillis());                    FileUtils.moveFile(srcFile, destFile);                } catch (Exception e) {                    e.printStackTrace();                }                this.batches.put(batchId, batch);            }        }        for (List<Object> list : batch) {            tridentCollector.emit(list);        }    }    @Override    public void ack(long batchId) {        this.batches.remove(Long.valueOf(batchId));    }    @Override    public void close() {    }    @Override    public Map<String, Object> getComponentConfiguration() {        Config conf = new Config();        conf.setMaxTaskParallelism(1);        return conf;    }    @Override    public Fields getOutputFields() {        return this.fields;    }}

实现BaseFunction,从每行日志中分割出IP地址和浏览器信息,合并成一个新字段IP_browser

public static class Split extends BaseFunction{        @Override        public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {            String line=tridentTuple.getString(0);            String[] words=line.split("\"");            String IP=words[0].split(" ")[0];            String browser=words[5];            if (!browser.equals("-")){                String IP_browser=IP+"_"+browser;                tridentCollector.emit(new Values(IP_browser));            }        }    }

实现BaseAggregator,剔除重复的IP_browser,发送新字段IP和数量1

public static class Aggregate extends BaseAggregator<Map<String,Integer>>{        @Override        public Map<String,Integer> init(Object o, TridentCollector tridentCollector) {            return new HashMap<String,Integer>() ;        }        @Override        public void aggregate(Map<String,Integer> map, TridentTuple tridentTuple, TridentCollector tridentCollector) {            String IP_browser=tridentTuple.getStringByField("IP_browser");            String[] words=IP_browser.split("_");           String IP=words[0];            map.put(IP,1);        }        @Override        public void complete(Map<String,Integer> map, TridentCollector tridentCollector) {            for (Map.Entry<String,Integer> e:map.entrySet()){                tridentCollector.emit(new Values(e.getKey()));            }        }    }

创建TridentTopology,提供用于构建Trident实时计算程序的一些接口
创建一个TridentState对象,通过newStream方法从指定的Spout创建一个新的数据输入流
日志拆分合并新字段->聚合去重复->count统计数量
创建DRPC流,函数命名为browserFunction

public static StormTopology buildTopology(LocalDRPC drpc) {        TridentTopology topology = new TridentTopology();        MyBatchSpout spout = new MyBatchSpout(new Fields("line"));        TridentState tridentState=topology.newStream("spout",spout)                .parallelismHint(4)                .each(new Fields("line"),new Split(),new Fields("IP_browser"))                .parallelismHint(4)                .groupBy(new Fields("IP_browser"))                .partitionAggregate(new Fields("IP_browser"),new Aggregate(),new Fields("IP"))                .toStream()                .parallelismHint(4)                .groupBy(new Fields("IP"))                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));        topology.newDRPCStream("browserFunction",drpc)                .stateQuery(tridentState,new Fields("args"),new MapGet(),new Fields("count"))                .each(new Fields("count"),new FilterNull());        return topology.build();    }

本地模式运行DRPC

Config conf = new Config();        conf.setMaxSpoutPending(20);        if (args.length == 0) {            LocalDRPC drpc = new LocalDRPC();            //创建LocalDRPC对象在进程内模拟一个DRPC服务器(类似于LocalCluster在进程内模拟一个Storm集群)            LocalCluster cluster = new LocalCluster();            //创建LocalCluster对象在本地模式运行topology            cluster.submitTopology("wordCounter", conf, buildTopology(drpc));            for (int i = 0; i < 100; i++) {                System.out.println("DRPC RESULT: " + drpc.execute("browserFunction", "114.112.141.6"));                Thread.sleep(1000);            }

运行结果

这里写图片描述