Spark Streaming---HDFSwordcount
来源:互联网 发布:等离子数控切割机编程 编辑:程序博客网 时间:2024/06/05 23:46
package com.spark.streaming;import java.util.Arrays;import org.apache.spark.SparkConf;import org.apache.spark.api.java.function.FlatMapFunction;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import org.apache.spark.streaming.Durations;import org.apache.spark.streaming.api.java.JavaDStream;import org.apache.spark.streaming.api.java.JavaPairDStream;import org.apache.spark.streaming.api.java.JavaStreamingContext;import scala.Tuple2;public class HDFSWordcount { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("HDFSWordcount"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5)); JavaDStream<String> lines = jssc.textFileStream("hdfs://node12:8020/Spark/Streaming/WordCount"); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; @Override public Iterable<String> call(String line) throws Exception { return Arrays.asList(line.split(" ")); } }); JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String,Integer>(word, 1); } }); JavaPairDStream<String, Integer> wordcounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1 + v2; } }); wordcounts.print(); jssc.start(); jssc.awaitTermination(); jssc.close(); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
阅读全文
0 0
- Spark Streaming---HDFSwordcount
- Spark Streaming---HDFSwordcount
- Spark组件之Spark Streaming学习4--HdfsWordCount 学习
- Spark2.x学习笔记:17、Spark Streaming之HdfsWordCount 学习
- Spark Streaming
- spark streaming
- Spark/Streaming
- Spark Streaming
- spark streaming
- Spark Streaming
- Spark Streaming
- Spark Streaming
- Spark Streaming
- spark streaming
- Spark Streaming
- Spark Streaming
- Spark Streaming
- Spark Streaming
- 菜鸟前端日记10 (原生JS--数组的操作方法)
- 高级篇 II: 作业的执行
- Makefile赋值符号的使用——"=" ":=" "?=" "+="
- 解决inteijj Idea 编辑器中 @Override 报错问题
- Swift与JS交互
- Spark Streaming---HDFSwordcount
- 如何使用TensorFlow追踪千年隼号
- 正则表达式要点(简要)
- 后起之秀百度云为何能在竞争激烈的云市场上打响进攻战
- 哪几种方式可用来实现线程间通知和唤醒
- nvidia GPU 性能查看
- Java 多线程
- 模板整理: 图论---最小生成树
- python之字符串格式化(format)用法说明