storm 例子

来源:互联网 发布:固态硬盘恢复数据公司 编辑:程序博客网 时间:2024/06/08 05:46
package com.zsb.test.spout;import java.util.Arrays;import java.util.Map;import backtype.storm.spout.MultiScheme;import backtype.storm.spout.SchemeAsMultiScheme;import com.ai.baas.storm.util.BaseConstants;import storm.kafka.BrokerHosts;import storm.kafka.KafkaSpout;import storm.kafka.SpoutConfig;import storm.kafka.StringScheme;import storm.kafka.ZkHosts;/** * kafka 数据源 * Date: 2017年3月13日 <br> * @author zhoushanbin */public class KafkaSpoutAgent {private KafkaSpout kafkaSpout;public static final String KAFKA_SPOUT_ID = "kafkaSpout";/** * kafka topic */private static final String KAFKA_SPOUT_TOPIC = "kafka.spout.topic";/** * zk 服务器地址 */private static final String KAFKA_SPOUT_ZK_SERVER = "kafka.spout.zk.server";/** * zk 端口 */private static final String KAFKA_SPOUT_ZK_PORT = "kafka.spout.zk.port";/** * 拓扑名称 */private static final String APP_TOPOLOGY_NAME = "app.topology.name";/** * 注册到zk的地址根节点 */private static final String STORM_KAFKA_INPUT = "/stormkfkinput";/** * true : 从头开始消费,false : zk记录的位置开始消费 */private static final String KAFKA_SPOUT_CONSUMER_FROM_START = "kafka.spout.consumer.fromstart";/** * 获取KafkaSpout 当scheme为空时采用StringScheme * @param conf * @param scheme */public KafkaSpoutAgent(Map<String,Object> conf,MultiScheme scheme){String topic = (String)conf.get(KAFKA_SPOUT_TOPIC);String zkServerStr = String.valueOf(conf.get(BaseConstants.KAFKA_SPOUT_ZK_SERVER));StringBuilder id = new StringBuilder();id.append(conf.get(APP_TOPOLOGY_NAME)).append("_").append(topic);String zkAddr = zkServerStr.replace(" ", "").replace(",", conf.get(KAFKA_SPOUT_ZK_PORT)+",");BrokerHosts brokerHosts = new ZkHosts(zkAddr);SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, STORM_KAFKA_INPUT, id.toString());spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());spoutConf.forceFromStart = Boolean.valueOf(((String)conf.get(KAFKA_SPOUT_CONSUMER_FROM_START)).trim()).booleanValue();spoutConf.zkServers = Arrays.asList(((String)conf.get(KAFKA_SPOUT_ZK_SERVER)).split(",", -1));spoutConf.zkPort = Integer.valueOf((String)conf.get(KAFKA_SPOUT_ZK_PORT)).intValue();if(scheme == null){spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());}else{spoutConf.scheme = scheme;}this.setKafkaSpout(new KafkaSpout(spoutConf));}public KafkaSpout getKafkaSpout() {return kafkaSpout;}public void setKafkaSpout(KafkaSpout kafkaSpout) {this.kafkaSpout = kafkaSpout;}}
package com.zsb.test.entry;import java.util.Map;import com.zsb.test.spout.KafkaSpoutAgent;import backtype.storm.LocalCluster;import backtype.storm.StormSubmitter;import backtype.storm.generated.AlreadyAliveException;import backtype.storm.generated.InvalidTopologyException;import backtype.storm.spout.MultiScheme;import backtype.storm.topology.TopologyBuilder;/** * 拓扑流程定义 storm 版本0.9.6 * Date: 2017年3月13日 <br> * @author zhoushanbin */public abstract class FlowDefine {private TopologyBuilder builder;private Map<String,Object> conf;public FlowDefine(Map<String,Object> conf){this.conf = conf;builder = new TopologyBuilder();}public abstract void defineFlow();public void addKafkaSout(MultiScheme scheme){builder.setSpout(KafkaSpoutAgent.KAFKA_SPOUT_ID, new KafkaSpoutAgent(conf,scheme).getKafkaSpout(), Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_PARALLELISM")).intValue()).setNumTasks(Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_TASK_NUM")));}public void start() throws AlreadyAliveException, InvalidTopologyException{defineFlow();if("local".equals(conf.get("storm.run.type"))){//本地运行模式LocalCluster cluster = new LocalCluster();cluster.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());}else{//集群模式StormSubmitter.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());}}public TopologyBuilder getTopologyBuilder(){return builder;}public int getBoltParallelism(Class<?> clazz){String key = clazz.getSimpleName().toUpperCase()+"_PARALLELISM";return Integer.valueOf((String)conf.get(key)).intValue();}public int getBoltTaskNum(Class<?> clazz){String key = clazz.getSimpleName().toUpperCase()+"_TASK_NUM";return Integer.valueOf((String)conf.get(key)).intValue();}}



package com.zsb.test.entry;import java.util.HashMap;import java.util.Map;import com.zsb.test.bolt.MyTest11Bolt;import com.zsb.test.bolt.MyTest1Bolt;import com.zsb.test.bolt.MyTest12Bolt;import com.zsb.test.bolt.MyTest21Bolt;import com.zsb.test.bolt.MyTest22Bolt;import com.zsb.test.bolt.MyTest2Bolt;import com.zsb.test.spout.KafkaSpoutAgent;import backtype.storm.generated.AlreadyAliveException;import backtype.storm.generated.InvalidTopologyException;import backtype.storm.tuple.Fields;public class StartFlow extends FlowDefine{public StartFlow(Map<String, Object> conf) {super(conf);}@Overridepublic void defineFlow() {//this.addKafkaSout(new KeyValueSchemeAsMultiScheme(new StringKeyValueScheme()));this.addKafkaSout(null);/** * shuffleGrouping(comId) 上游消息同时发往下游的bolt *  */getTopologyBuilder().setBolt(MyTest1Bolt.class.getSimpleName(), new MyTest1Bolt(), getBoltParallelism(MyTest1Bolt.class)).shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID).setNumTasks(getBoltTaskNum(MyTest1Bolt.class));//getTopologyBuilder().setBolt(MyTest2Bolt.class.getSimpleName(), new MyTest2Bolt(), getBoltParallelism(MyTest2Bolt.class))//.shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID)//.setNumTasks(getBoltTaskNum(MyTest2Bolt.class));getTopologyBuilder().setBolt(MyTest11Bolt.class.getSimpleName(), new MyTest11Bolt(), getBoltParallelism(MyTest11Bolt.class)).shuffleGrouping(MyTest1Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID.setNumTasks(getBoltTaskNum(MyTest11Bolt.class));getTopologyBuilder().setBolt(MyTest12Bolt.class.getSimpleName(), new MyTest12Bolt(), getBoltParallelism(MyTest12Bolt.class)).shuffleGrouping(MyTest1Bolt.class.getSimpleName()) //使用默认流ID.setNumTasks(getBoltTaskNum(MyTest12Bolt.class));//getTopologyBuilder().setBolt(MyTest21Bolt.class.getSimpleName(), new MyTest21Bolt(), getBoltParallelism(MyTest21Bolt.class))//.shuffleGrouping(MyTest2Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID//根据 new Fields("testField") 散列值投放//.fieldsGrouping(MyTest2Bolt.class.getSimpleName(), new Fields("testField"))//.setNumTasks(getBoltTaskNum(MyTest21Bolt.class));/**getTopologyBuilder().setBolt(MyTest22Bolt.class.getSimpleName(), new MyTest22Bolt(), getBoltParallelism(MyTest22Bolt.class)).shuffleGrouping(MyTest2Bolt.class.getSimpleName()) //使用默认流ID.setNumTasks(getBoltTaskNum(MyTest22Bolt.class));**/}public static void main(String args[]) throws AlreadyAliveException, InvalidTopologyException{Map<String,Object> conf = new HashMap<String,Object>();conf.put("kafka.spout.topic", "system_monitor_topic");conf.put("kafka.spout.zk.server", "127.0.0.1");conf.put("kafka.spout.zk.port", "2181");conf.put("app.topology.name", "MyTest");conf.put("kafka.spout.consumer.fromstart", "false");conf.put("KAFKASPOUTAGENT_PARALLELISM", "1");conf.put("KAFKASPOUTAGENT_TASK_NUM", "1");conf.put("MYTEST1BOLT_PARALLELISM", "1");conf.put("MYTEST1BOLT_TASK_NUM", "1");conf.put("MYTEST2BOLT_PARALLELISM", "1");conf.put("MYTEST2BOLT_TASK_NUM", "1");conf.put("MYTEST12BOLT_PARALLELISM", "1");conf.put("MYTEST12BOLT_TASK_NUM", "1");conf.put("MYTEST11BOLT_PARALLELISM", "1");conf.put("MYTEST11BOLT_TASK_NUM", "1");conf.put("MYTEST22BOLT_PARALLELISM", "1");conf.put("MYTEST22BOLT_TASK_NUM", "1");conf.put("MYTEST21BOLT_PARALLELISM", "12");conf.put("MYTEST21BOLT_TASK_NUM", "5");conf.put("storm.run.type", "local");conf.put("topology.workers", Integer.valueOf(1));conf.put("topology.max.spout.pending", Integer.valueOf(2));//默认是1;topology.max.spout.pending 的意义在于 ,缓存spout 发送出去的tuple,当下流的bolt还有topology.max.spout.pending 个 tuple 没有消费完时,spout会停下来,等待下游bolt去消费,当tuple 的个数少于topology.max.spout.pending个数时,spout 会继续从消息源读取消息。(这个属性只对可靠消息处理有用)conf.put("topology.enable.classloader", Boolean.valueOf("false"));//禁用了用户自定义的类加载器new StartFlow(conf).start();}}

package com.zsb.test.bolt;import java.util.Map;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.BasicOutputCollector;import backtype.storm.topology.IBasicBolt;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;//BaseRichBolt 需要显示ack/fail;IBasicBolt 无需显示ackpublic class MyTest1Bolt implements IBasicBolt{private Logger LOG = LoggerFactory.getLogger(MyTest1Bolt.class);private OutputCollector collector;/** *  */private static final long serialVersionUID = -6115493059000161669L;/***@SuppressWarnings("rawtypes")@Overridepublic void prepare(Map stormConf, TopologyContext context,OutputCollector collector) {this.collector = collector;LOG.info("配置=【{}】",stormConf.toString());//此处做初始化操作}@Overridepublic void execute(Tuple input) {LOG.info("Tuple类型=【{}】",input.getClass().getName());LOG.info("input1=【{}】",input.getValueByField("str"));collector.emit(new Values(input));collector.ack(input);}***/@Overridepublic void declareOutputFields(OutputFieldsDeclarer declarer) {//declarer.declare(new Fields("MyTest1Bolt"));declarer.declareStream("default",new Fields("MyTest1Bolt"));declarer.declareStream("MyTest11Stream", new Fields("MyTest1Bolt"));}@Overridepublic Map<String, Object> getComponentConfiguration() {return null;}@SuppressWarnings("rawtypes")@Overridepublic void prepare(Map stormConf, TopologyContext context) {}@Overridepublic void execute(Tuple input, BasicOutputCollector collector) {//LOG.info("TEST1=【{}】",input.getValue(0));/***LOG.info("####################TEST1 ST#############");LOG.info("Tuple类型=【{}】",input.getClass().getName());LOG.info("源于上游的数据=【{}】",input.getValues());LOG.info("源于上游的域=【{}】",input.getFields());LOG.info("拓扑分配的消息ID:msgId=【{}】",input.getMessageId());LOG.info("SourceComponent=【{}】",input.getSourceComponent());LOG.info("SourceGlobalStreamid=【{}】",input.getSourceGlobalStreamid());LOG.info("SourceStreamId=【{}】",input.getSourceStreamId());LOG.info("SourceTask=【{}】",input.getSourceTask());LOG.info("####################TEST1 END#############");**/collector.emit("MyTest11Stream",new Values(input));//消息发送时,可指定发送的流ID,不指定默认为default}@Overridepublic void cleanup() {}}

其他的bolt 可参照MyTest1Bolt