storm框架跑wordcount

来源:互联网 发布:ajax请求json数据 编辑:程序博客网 时间:2024/05/21 14:57

1.定义一个spout类引入数据:

MySpout.java

////

import backtype.storm.spout.SpoutOutputCollector;
 import backtype.storm.task.TopologyContext;
 import backtype.storm.topology.OutputFieldsDeclarer;
 import backtype.storm.topology.base.BaseRichSpout;
 import backtype.storm.tuple.Fields;
 import backtype.storm.tuple.Values;
 
 import java.util.Map;
 
 /**
  * Created by ahu_lichang on 2017/5/18.
  */
 public class MySpout extends BaseRichSpout {
     //用来收集Spout输出的Tuple
     SpoutOutputCollector collector;
 
     //初始化方法
     public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
         this.collector = collector;
     }
 
     //storm 框架在 while(true) 调用nextTuple方法
     public void nextTuple() {
         collector.emit(new Values("i am lilei love hanmeimei"));
     }
 
     //消息源可以发射多条消息流stream.多条消息流可以理解为多种类型的数据
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
         declarer.declare(new Fields("sentence"));
     }
 }

2.定义一个bolt用来分词:

MySplitBolt.java

import backtype.storm.task.OutputCollector;
 import backtype.storm.task.TopologyContext;
 import backtype.storm.topology.OutputFieldsDeclarer;
 import backtype.storm.topology.base.BaseRichBolt;
 import backtype.storm.tuple.Fields;
 import backtype.storm.tuple.Tuple;
 import backtype.storm.tuple.Values;
 
 import java.util.Map;
 
 public class MySplitBolt extends BaseRichBolt {
     OutputCollector collector;
 
     //初始化方法
     public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
         this.collector = collector;
     }
 
     // 被storm框架 while(true) 循环调用  传入参数tuple
     //input内容是句子,execute方法将句子切割成单词发出
     public void execute(Tuple input) {
         String line = input.getString(0);
         String[] arrWords = line.split(" ");
         for (String word : arrWords) {
             collector.emit(new Values(word, 1));
         }
     }
 
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
         declarer.declare(new Fields("word", "num"));
     }
 }


3.定义一个 bolt用来对单词个数进行sum

MyCountBolt.java

////

import backtype.storm.task.OutputCollector;
 import backtype.storm.task.TopologyContext;
 import backtype.storm.topology.OutputFieldsDeclarer;
 import backtype.storm.topology.base.BaseRichBolt;
 import backtype.storm.tuple.Tuple;
 
 import java.util.HashMap;
 import java.util.Map;
 
 /**
  * Created by ahu_lichang on 2017/5/18.
  */
 public class MyCountBolt extends BaseRichBolt {
     OutputCollector collector;
     //用来保存最后计算的结果key=单词,value=单词个数
     Map<String, Integer> map = new HashMap<String, Integer>();
 
     public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
         this.collector = collector;
     }
 
     public void execute(Tuple input) {
         String word = input.getString(0);
         Integer num = input.getInteger(1);
         System.out.println(Thread.currentThread().getId() + "    word:" + word);
         if (map.containsKey(word)) {
             Integer count = map.get(word);
             map.put(word, count + num);
         } else {
             map.put(word, num);
         }
         System.out.println("count:" + map);
     }
 
     public void declareOutputFields(OutputFieldsDeclarer declarer) {
         //不输出
     }
 }


4.定义一个主类用来设置并行度

WordCountTopologyMain.java

/////

import backtype.storm.Config;
 import backtype.storm.LocalCluster;
 import backtype.storm.StormSubmitter;
 import backtype.storm.generated.AlreadyAliveException;
 import backtype.storm.generated.InvalidTopologyException;
 import backtype.storm.topology.TopologyBuilder;
 import backtype.storm.tuple.Fields;
 
 public class WordCountTopologyMain {
     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
         //1、准备一个TopologyBuilder
         //storm框架支持多语言,在Java环境下创建一个拓扑,需要使用TopologyBuilder
         TopologyBuilder topologyBuilder = new TopologyBuilder();
         //MySpout类,在已知的英文句子中,所及发送一条句子出去
         topologyBuilder.setSpout("mySpout", new MySpout(), 2);
         //MySplitBolt类,主要是将一行一行的文本内容切割成单词
         topologyBuilder.setBolt("mybolt1", new MySplitBolt(), 2).shuffleGrouping("mySpout");
         //MyCountBolt类,负责对单词的频率进行累加
         topologyBuilder.setBolt("mybolt2", new MyCountBolt(), 4).fieldsGrouping("mybolt1", new Fields("word"));


         //2、创建一个configuration,用来指定当前topology 需要的worker的数量
         //启动topology的配置信息
         Config config = new Config();
         //定义你希望集群分配多少个工作进程给你来执行这个topology
         config.setNumWorkers(2);
 
         //3、提交任务  -----两种模式 本地模式和集群模式
         //这里将拓扑名称写死了mywordcount,所以在集群上打包运行的时候,不用写拓扑名称了!也可用arg[0]
         StormSubmitter.submitTopology("mywordcount", config, topologyBuilder.createTopology());
       //  LocalCluster localCluster = new LocalCluster();
       //  localCluster.submitTopology("mywordcount",config,topologyBuilder.createTopology());
     }
 }




原创粉丝点击