storm框架跑wordcount
来源:互联网 发布:ajax请求json数据 编辑:程序博客网 时间:2024/05/21 14:57
1.定义一个spout类引入数据:
MySpout.java
////
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import java.util.Map;
/**
* Created by ahu_lichang on 2017/5/18.
*/
public class MySpout extends BaseRichSpout {
//用来收集Spout输出的Tuple
SpoutOutputCollector collector;
//初始化方法
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
}
//storm 框架在 while(true) 调用nextTuple方法
public void nextTuple() {
collector.emit(new Values("i am lilei love hanmeimei"));
}
//消息源可以发射多条消息流stream.多条消息流可以理解为多种类型的数据
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("sentence"));
}
}
2.定义一个bolt用来分词:
MySplitBolt.java
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import java.util.Map;
public class MySplitBolt extends BaseRichBolt {
OutputCollector collector;
//初始化方法
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
// 被storm框架 while(true) 循环调用 传入参数tuple
//input内容是句子,execute方法将句子切割成单词发出
public void execute(Tuple input) {
String line = input.getString(0);
String[] arrWords = line.split(" ");
for (String word : arrWords) {
collector.emit(new Values(word, 1));
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "num"));
}
}
3.定义一个 bolt用来对单词个数进行sum
MyCountBolt.java
////
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
/**
* Created by ahu_lichang on 2017/5/18.
*/
public class MyCountBolt extends BaseRichBolt {
OutputCollector collector;
//用来保存最后计算的结果key=单词,value=单词个数
Map<String, Integer> map = new HashMap<String, Integer>();
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}
public void execute(Tuple input) {
String word = input.getString(0);
Integer num = input.getInteger(1);
System.out.println(Thread.currentThread().getId() + " word:" + word);
if (map.containsKey(word)) {
Integer count = map.get(word);
map.put(word, count + num);
} else {
map.put(word, num);
}
System.out.println("count:" + map);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//不输出
}
}
4.定义一个主类用来设置并行度
WordCountTopologyMain.java
/////
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
public class WordCountTopologyMain {
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
//1、准备一个TopologyBuilder
//storm框架支持多语言,在Java环境下创建一个拓扑,需要使用TopologyBuilder
TopologyBuilder topologyBuilder = new TopologyBuilder();
//MySpout类,在已知的英文句子中,所及发送一条句子出去
topologyBuilder.setSpout("mySpout", new MySpout(), 2);
//MySplitBolt类,主要是将一行一行的文本内容切割成单词
topologyBuilder.setBolt("mybolt1", new MySplitBolt(), 2).shuffleGrouping("mySpout");
//MyCountBolt类,负责对单词的频率进行累加
topologyBuilder.setBolt("mybolt2", new MyCountBolt(), 4).fieldsGrouping("mybolt1", new Fields("word"));
//2、创建一个configuration,用来指定当前topology 需要的worker的数量
//启动topology的配置信息
Config config = new Config();
//定义你希望集群分配多少个工作进程给你来执行这个topology
config.setNumWorkers(2);
//3、提交任务 -----两种模式 本地模式和集群模式
//这里将拓扑名称写死了mywordcount,所以在集群上打包运行的时候,不用写拓扑名称了!也可用arg[0]
StormSubmitter.submitTopology("mywordcount", config, topologyBuilder.createTopology());
// LocalCluster localCluster = new LocalCluster();
// localCluster.submitTopology("mywordcount",config,topologyBuilder.createTopology());
}
}
- storm框架跑wordcount
- storm运行wordcount例子
- storm wordcount 运
- storm wordcount实例
- Storm实战之WordCount
- Storm实战之WordCount
- Storm实战之WordCount
- Storm实战之WordCount
- 基于storm的wordCount
- Storm实战之WordCount
- Storm之WordCount
- 【Storm初探】wordcount demo
- storm之wordCount
- Storm WordCount 代码
- Storm的wordcount实战示例
- storm程序-单词统计wordcount
- Storm-9.1.0 WordCount jar 实例
- Storm入门程序——WordCount
- uml类图
- Android灯光系统框架分析
- Activity启动模式的那点事
- 计算机网络--网络层
- python基础-条件分支和循环
- storm框架跑wordcount
- 费用流[网络流24题] 餐巾
- html5小栗子---canvas时钟
- QT-程序生成详细信息
- HDU6053 TrickGCD
- 剑指offer:对称二叉树
- Pandas读取csv文件Error总结
- 解题报告:HDU_5663 Hillan and the girl 莫比乌斯反演
- brew使用