Storm整合HDFS
来源:互联网 发布:吴裕泰茶叶推荐 知乎 编辑:程序博客网 时间:2024/06/05 02:52
1.pom文件
<dependencies> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>0.10.0</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.1</version> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-hdfs</artifactId> <version>0.10.0</version> <exclusions> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> </exclusion> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> </exclusion> </exclusions> </dependency></dependencies><!--mvn clean package --><build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>1.4</version> <configuration> <createDependencyReducedPom>true</createDependencyReducedPom> </configuration> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <mainClass></mainClass> </transformer> </transformers> </configuration> </execution> </executions> </plugin> </plugins></build>
2.整合代码
package cn.bfire.storm;import java.util.Map;import java.util.UUID;import org.apache.storm.hdfs.bolt.HdfsBolt;import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;import org.apache.storm.hdfs.bolt.format.FileNameFormat;import org.apache.storm.hdfs.bolt.format.RecordFormat;import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy;import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;import org.apache.storm.hdfs.bolt.sync.SyncPolicy;import backtype.storm.Config;import backtype.storm.StormSubmitter;import backtype.storm.spout.SpoutOutputCollector;import backtype.storm.task.OutputCollector;import backtype.storm.task.TopologyContext;import backtype.storm.topology.OutputFieldsDeclarer;import backtype.storm.topology.TopologyBuilder;import backtype.storm.topology.base.BaseRichBolt;import backtype.storm.topology.base.BaseRichSpout;import backtype.storm.tuple.Fields;import backtype.storm.tuple.Tuple;import backtype.storm.tuple.Values;/** * 需要修改输出目录权限,例如输出目录为:/storm,则执行 "hdfs dfs -chmod 777 /storm" */public class NumAddTopology { public static void main(String[] args) { // 输出字段分隔符 RecordFormat format = new DelimitedRecordFormat().withFieldDelimiter("|"); // 每1000个tuple同步到HDFS一次 SyncPolicy syncPolicy = new CountSyncPolicy(1000); // 每个写出文件的大小为100MB FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(100.0f, FileSizeRotationPolicy.Units.MB); // 设置输出目录 FileNameFormat fileNameFormat = new DefaultFileNameFormat().withPath(args[0]); // 执行HDFS地址 HdfsBolt hdfsBolt = new HdfsBolt().withFsUrl("hdfs://node1:8020").withFileNameFormat(fileNameFormat) .withRecordFormat(format).withRotationPolicy(rotationPolicy).withSyncPolicy(syncPolicy); TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("data_spout", new DataSpout()); builder.setBolt("sum_bolt", new SumBolt()).shuffleGrouping("data_spout"); builder.setBolt("hdfs_bolt", hdfsBolt).shuffleGrouping("sum_bolt"); try { StormSubmitter.submitTopology("num_add", new Config(), builder.createTopology()); } catch (Exception e) { e.printStackTrace(); } } private static class DataSpout extends BaseRichSpout { private SpoutOutputCollector collector; @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } int i = 0; @Override public void nextTuple() { System.out.println("num:" + i); collector.emit(new Values(i++)); // Utils.sleep(1000); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("num")); } } private static class SumBolt extends BaseRichBolt { private OutputCollector collector; @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.collector = collector; } int sum = 0; @Override public void execute(Tuple input) { Integer num = input.getIntegerByField("num"); sum += num; collector.emit(new Values(sum, UUID.randomUUID().toString())); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("sum", "uuid")); } }}
3.打包代码
cmd进入项目根目录,执行,例如:D:\workspace\storm
mvn clean package
4.上传jar包到服务器
jar包位置
D:\workspace\storm_test\target\storm-0.0.1-SNAPSHOT.jar
5.运行程序
storm jar storm-0.0.1-SNAPSHOT.jar cn.bfire.storm.NumAddTopology /storm
0 0
- Storm整合HDFS
- Kafka+Storm+HDFS整合
- Kafka+Storm+HDFS整合
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合架构
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- Kafka+Storm+HDFS整合实践
- flume+kafka+storm+hdfs整合
- Java for Web学习笔记(二十):Session(4)在集群中使用Session
- C#中dynamic的正确用法
- oracle配置
- URL重定向 UrlRewrite
- 分布式文件系统 fastdfs 源码分析 之 文件上传流程分析
- Storm整合HDFS
- 简单的改变元素样式
- Docker是用来干什么的?
- VF01-billing拆分逻辑详解
- Testin内测管理——可能是你最需要的内测解决方案
- 软件版本号规范
- ubuntu下安装docker
- mysql数据库的时候无法输入中文
- Android 5.0 自定义dialog 背景不透明解决方法