WordCountTopology的实现
来源:互联网 发布:ftp服务器端软件 编辑:程序博客网 时间:2024/06/05 01:13
- 流程图如下:
2. 编写SentenceSpout
package com.ibeifeng.bigdata.storm.topo;import backtype.storm.spout.SpoutOutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.IRichSpout;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichSpout;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Values;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.util.Map;import java.util.Random;/** * Spout开发 * Created by ad on 2016/12/11. *///public class SentenceSpout implements IRichSpout{public class SentenceSpout extends BaseRichSpout{ private static final Logger logger = LoggerFactory.getLogger(SentenceSpout.class); /** * tuple发射器 */ private SpoutOutputCollector collector; private static final String[] SENTENCES = { "hadoop yarn mapreduce spark", "flume hadoop hive spark", "oozie yarn spark storm", "storm yarn mapreduce error", "error flume storm spark" }; /** * 用来声明该组件向后面组件发射的tuple的key名称依次是什么 * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("sentence")); } /** * 用于指定只针对本组件的一些特殊配置 * @return */ @Override public Map<String, Object> getComponentConfiguration() { return null; } /** * Spout 组件的初始化方法 * 创建SentenceSpout组件的实例对象时调用,只执行一次 * @param conf * @param context * @param collector */ @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { // 用实例变量来接收tuple发射器 this.collector = collector; } /** * close方法在该spout关闭前执行,但是并不能得到保证其一定被执行。 * spout是作为task运行在worker内,在cluster模式下, * supervisor会直接kill -9 woker的进程,这样它就无法执行了。 * 而在本地模式下,只要不是kill -9, 如果是发送停止命令, * 是可以保证close的执行的。 */ @Override public void close() { // 收尾工作 } /** * 在对应时刻暂时激活spout */ @Override public void activate() { } /** * 在对应时刻暂时关闭spout */ @Override public void deactivate() { } /** * Spout组件的核心方法 * 循环调用 * 1)如何从数据源上获取数据 逻辑 写在该方法中 * 2)对获取的数据进行一些简单的处理 * 3) 封装tuple,并且向后面的bolt发射 (其实只能指定tuple的value值依次是什么) */ @Override public void nextTuple() { // 随机从数组中获取一一条语句(模拟从数据源中获取数据) String sentence = SENTENCES[new Random().nextInt(SENTENCES.length)]; if(sentence.contains("error")){ logger.error("记录有问题:" + sentence); }else{ // 封装成tuple this.collector.emit(new Values(sentence)); } try { Thread.sleep(10000); } catch (InterruptedException e) { e.printStackTrace(); } } /** * 传入的Object其实是一个id,唯一表示一个tuple。 * 该方法是这个id所对应的tuple被成功处理后执行 * @param msgId */ @Override public void ack(Object msgId) { } /** * 同ack,只不过是tuple处理失败时执行 * @param msgId */ @Override public void fail(Object msgId) { }}
- 编写SplitBlot
package com.ibeifeng.bigdata.storm.topo;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.IRichBolt;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;import java.util.Map;/** * Bolt开发 * Created by ad on 2016/12/11. */public class SplitBolt implements IRichBolt{ /** * bolt组件中发射器 */ private OutputCollector collector; /** * Bolt组件的初始化方法 * * @param stormConf * @param context * @param collector */ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } /** * 每接收到前面组件发射过来的tuple就调用一次 * * bolt对数据处理逻辑写在该方法中 * 处理完后的数据封装成tuple(value部分),继续发射给后面的组件 * 或者执行比如写到数据库、打印到文件等等操作(终点) * * @param input */ @Override public void execute(Tuple input) { String sentence = input.getStringByField("sentence"); if(sentence != null && !"".equals(sentence)){ String[] words = sentence.split(" "); for (String word: words){ this.collector.emit(new Values(word)); } } } /** * cleanup方法在bolt被关闭的时候调用, 它应该清理所有被打开的资源。 * 但是集群不保证这个方法一定会被执行。比如执行task的机器down掉了, * 那么根本就没有办法来调用那个方法。cleanup设计的时候是被用来在 * local mode的时候才被调用(也就是说在一个进程里面模拟整个storm集群), * 并且你想在关闭一些topology的时候避免资源泄漏 */ @Override public void cleanup() { } /** * declareOutputFields定义一个叫做”word”的字段的 * 该bolt/spout输出的字段个数,供下游使用,在该bolt中的execute方法中, * emit发射的字段个数必须和声明的相同,否则报错:Tuple created with wrong * number of fields. Expected 2 fields but got 1 fields * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } @Override public Map<String, Object> getComponentConfiguration() { return null; }}
- 编写CountBlot
package com.ibeifeng.bigdata.storm.topo;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;import java.util.HashMap;import java.util.Map;/** * 单词计数 * Created by ad on 2016/12/11. */public class CountBolt extends BaseRichBolt { private Map<String,Integer> counts; /** * bolt组件中发射器 */ private OutputCollector collector; /** * Bolt组件的初始化方法 * * @param stormConf * @param context * @param collector */ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; this.counts = new HashMap<>(); } @Override public void execute(Tuple input) { String word = input.getStringByField("word"); // 单词的累计 int count = 1; if(counts.containsKey(word)){ count = counts.get(word) + 1; } counts.put(word, count); this.collector.emit(new Values(word, count)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word","count")); }}
- 编写PrintBlot
package com.ibeifeng.bigdata.storm.topo;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Tuple;import java.util.Map;/** * Created by ad on 2016/12/11. */public class PrintBolt extends BaseRichBolt{ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { } @Override public void execute(Tuple input) { String word = input.getStringByField("word"); Integer count = input.getIntegerByField("count"); System.err.println("单词:" + word + ", ----> 累计出现次数:"+ count); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { }}
6.编写测试程序WordCountTopology
package com.ibeifeng.bigdata.storm.topo;import backtype.storm.Config;import backtype.storm.LocalCluster;import backtype.storm.StormSubmitter;import backtype.storm.generated.AlreadyAliveException;import backtype.storm.generated.InvalidTopologyException;import backtype.storm.topology.TopologyBuilder;import backtype.storm.tuple.Fields;/** * wordcountTopology * Created by ad on 2016/12/11. */public class WordCountTopology { private static final String SPOUT_ID = "sentenceSpout"; private static final String SPLIT_BOLT = "splitBolt"; private static final String COUNT_BOLT = "countBolt"; private static final String PRINT_BOLT = "printBolt"; public static void main(String[] args) { // 构造Topology TopologyBuilder builder = new TopologyBuilder(); builder.setSpout(SPOUT_ID,new SentenceSpout()); // 指定 Spout // 指定 SentenceSpout 向SplitBolt发射tuple 随机分组 builder.setBolt(SPLIT_BOLT, new SplitBolt()) //.localOrShuffleGrouping(SPOUT_ID) .shuffleGrouping(SPOUT_ID); builder.setBolt(COUNT_BOLT, new CountBolt()).fieldsGrouping(SPLIT_BOLT, new Fields("word")); builder.setBolt(PRINT_BOLT, new PrintBolt()) .globalGrouping(COUNT_BOLT); // 全局分组 Config conf = new Config(); if(args == null || args.length == 0){ // 本地执行 LocalCluster localCluster = new LocalCluster(); localCluster.submitTopology("wordcount", conf ,builder.createTopology()); }else{ // 提交到集群上执行 conf.setNumWorkers(1); // 指定使用多少个进程来执行该Topology try { StormSubmitter.submitTopology(args[0],conf, builder.createTopology()); } catch (AlreadyAliveException e) { e.printStackTrace(); } catch (InvalidTopologyException e) { e.printStackTrace(); } } }}
0 0
- WordCountTopology的实现
- WordCountTopology 执行日志分析
- 使用IntelliJ IDEA导入storm-starter并测试WordCountTopology运行出现 A JNI error has occurred的原因和解决
- Storm 本地模式运行WordCountTopology
- Strom学习01--例子WordCountTopology
- Apache Storm技术实战之1 -- WordCountTopology
- Apache Storm技术实战之1 -- WordCountTopology
- 三子棋的实现的实现的实现
- JAVA实现的时钟实现
- 模态框的的实现
- 杀毒软件的简单实现的简单实现
- android的实现电话号码的实现
- 音频的实现音乐声音的实现
- malloc的实现、内存池的实现
- 异形窗体的实现
- 个性化的分页实现
- ASP数据库连接的实现
- Struts单选框的实现
- [Leetcode] Subsets
- 编写Python程序时10个常见的错误
- Spring 中 LazyConnectionDataSourceProxy 代理类的源码解析
- css中zoom与transform:scale()的区别
- 关于跳转到空白页面进行新浪微博分享,当点击取消保存到草稿时无法监听,返回空白页问题的解决方案
- WordCountTopology的实现
- python 2.7 : 引用模块时出现TypeError:'module' object is not callable
- linux shell 指令 诸如-d, -f, -e之类的判断表达式
- ubuntu下hive安装(整合hadoop,mysql)
- MyBatis sqlMap 的解析过程
- Design包之材料设计
- Android BLE Scan failed, reason app registration failed for UUID
- centos7下rabbitmq3.6.6安装
- 自己动手部署区块链-hyperledger/fabric-01