基于storm的wordCount
来源:互联网 发布:网络传销的基本特点是 编辑:程序博客网 时间:2024/05/21 09:35
前段时间买了本 storm分布式实时计算模式 看了几天,今天工作之余就想着写个wordCount,扫了几处雷,把代码贴出来、让你们少走几处雷
1:pom文件
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <storm.version>0.9.4</storm.version> </properties> <dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>${storm.version}</version> <scope>compile</scope> </dependency> </dependencies> <repositories> <repository> <id>nexus</id> <name>local private nexus</name> <url>http://maven.oschina.net/content/groups/public/</url> <releases> <enabled>true</enabled> </releases> <snapshots> <enabled>false</enabled> </snapshots> </repository> </repositories> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <artifactSet> <excludes> <exclude>junit:junit</exclude> </excludes> </artifactSet> </configuration> </execution> </executions> </plugin> </plugins> </build>
2:spout
package com.tengrong.wordCount.batch.spout;import backtype.storm.spout.SpoutOutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichSpout;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Values;import java.util.Map;/** * Created by wangmao on 16/12/6. */public class SentenceSpout extends BaseRichSpout { private SpoutOutputCollector collector; private String[] sentences = { "hello wm", "banyan qBao", "my Word" }; private int index = 0; public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("sentence")); } public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } public void nextTuple() { while (true) { this.collector.emit(new Values(sentences[index])); index++; if (index >= sentences.length) { index = 0; } try { Thread.sleep(100); } catch (Exception e) { } } }}
3:splitSentence
package com.tengrong.wordCount.batch.bolt;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;import java.util.Map;/** * Created by wangmao on 16/12/6. */public class SplitSentenceBolt extends BaseRichBolt { private OutputCollector collector; public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector=collector; } public void execute(Tuple input) { String sentence=input.getStringByField("sentence"); String []words=sentence.split(" "); for(String word:words){ this.collector.emit(new Values(word)); } }}4:wordCountBolt
package com.tengrong.wordCount.batch.bolt;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;import java.util.HashMap;import java.util.Map;/** * Created by wangmao on 16/12/6. */public class WordCountBolt extends BaseRichBolt { private OutputCollector collector; private HashMap<String,Long> counts=null; public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word","count")); } public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector=collector; this.counts=new HashMap<String,Long>(); } public void execute(Tuple input) { String word=input.getStringByField("word"); Long count=this.counts.get(word); if(count==null){ count=0L; } count++; this.counts.put(word,count); this.collector.emit(new Values(word,count)); }}
5:reportBolt
package com.tengrong.wordCount.batch.bolt;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Tuple;import java.util.*;/** * Created by wangmao on 16/12/6. */public class ReportBolt extends BaseRichBolt { private HashMap<String,Long> counts=null; public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.counts=new HashMap<String,Long>(); } public void execute(Tuple input) { String word=input.getStringByField("word"); Long count=input.getLongByField("count"); this.counts.put(word,count); } @Override public void cleanup() { System.out.println("----begin-----"); List<String> keys=new ArrayList<String>(); keys.addAll(this.counts.keySet()); Collections.sort(keys); for(String key:keys){ System.out.println(key+":"+this.counts.get(key)); } System.out.println("----end-----"); } public void declareOutputFields(OutputFieldsDeclarer declarer) { }}
6:topology
package com.tengrong.wordCount.batch.topology;import backtype.storm.Config;import backtype.storm.LocalCluster;import backtype.storm.topology.TopologyBuilder;import backtype.storm.tuple.Fields;import com.tengrong.wordCount.batch.spout.SentenceSpout;import com.tengrong.wordCount.batch.bolt.*;/** * Created by wangmao on 16/12/6. */public class WordCountTopology { private static final String SENTENCE_SPOUT_ID="sentences-spout"; private static final String SPLIT_BOLT_ID="split-bolt"; private static final String COUNT_BOLT_ID="count-bolt"; private static final String REPORT_BOLT_ID="report-bolt"; private static final String TOPOLOGY_NAME="word-count-topology"; public static void main(String args[]) { SentenceSpout spout=new SentenceSpout(); SplitSentenceBolt splitSentenceBolt=new SplitSentenceBolt(); WordCountBolt wordCountBolt=new WordCountBolt(); ReportBolt reportBolt=new ReportBolt(); TopologyBuilder builder=new TopologyBuilder(); builder.setSpout(SENTENCE_SPOUT_ID,spout); builder.setBolt(SPLIT_BOLT_ID,splitSentenceBolt).shuffleGrouping(SENTENCE_SPOUT_ID); builder.setBolt(COUNT_BOLT_ID,wordCountBolt).fieldsGrouping(SPLIT_BOLT_ID,new Fields("word")); builder.setBolt(REPORT_BOLT_ID,reportBolt).globalGrouping(COUNT_BOLT_ID); Config config=new Config(); LocalCluster cluster=new LocalCluster(); cluster.submitTopology(TOPOLOGY_NAME,config,builder.createTopology()); try { Thread.sleep(10000); }catch (Exception e){ } cluster.killTopology(TOPOLOGY_NAME); cluster.shutdown(); }}
7:运行topology
maven install 生成jar
java -cp storm-1.0-SNAPSHOT.jar com.tengrong.wordCount.batch.topology.WordCountTopology
----begin-----
Word:25
banyan:26
hello:26
my:25
qBao:26
wm:26
----end-----
源码我的下载里可以下载
1808568908@qq.com,交流学习
0 0
- 基于storm的wordCount
- Storm的wordcount实战示例
- Storm+Kafka+Hbase的wordcount统计
- Storm的wordcount代码编写与分析
- 基于storm的Window
- storm运行wordcount例子
- storm wordcount 运
- storm wordcount实例
- Storm实战之WordCount
- Storm实战之WordCount
- Storm实战之WordCount
- Storm实战之WordCount
- Storm实战之WordCount
- Storm之WordCount
- 【Storm初探】wordcount demo
- storm框架跑wordcount
- storm之wordCount
- Storm WordCount 代码
- Arduino 004 Windows上给Arduino IDE添加模块库
- linux命令——scp 两台linux机器间文件或目录传输
- 每天学习openstack(4)
- Mysql数据库主从心得整理
- 无源晶振时钟计算
- 基于storm的wordCount
- 点击图片,背景音乐播放/停止,播放图标/静音图标随之更换
- Android Studio 配置系列(一):自定义代码注释
- JMS 之 ActiveMQ 第二篇
- 使用java程序update oracle 中 blob 字段
- android使用微信与支付宝支付在小米miui系统上ui线程被异常kill的bug修复
- http://www.cnblogs.com/simeone/p/4026173.html
- 矩阵的乘法
- 程序员面试题一