Spark算子(五)
来源:互联网 发布:如何开启3724端口 编辑:程序博客网 时间:2024/06/12 23:11
Point 1:LineCount
package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;/** * Created by Aaron on 2017/7/18. *///计算一个文本不重复的行数有多少public class LineCount { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("LineCount") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile("text.txt"); // 现在这个文本的元素是每一行,然后我们把每一行变成 line => (line ,1) JavaPairRDD<String, Integer> pairlines = lines.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String line) throws Exception { return new Tuple2<String, Integer>(line, 1); } }); JavaPairRDD<String, Integer> results = pairlines.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1+v2; } }); results.foreach(new VoidFunction<Tuple2<String,Integer>>() { private static final long serialVersionUID = 1L; @Override public void call(Tuple2<String, Integer> result) throws Exception { System.out.println(result._1 + " : " + result._2); } }); sc.close(); }}
Point 2:JoinOperator
package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/18. */public class JoinOperator { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JoinOperator").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<Tuple2<String,String>> studentsList = Arrays.asList( new Tuple2<String,String>("1","xuruyun"), new Tuple2<String,String>("2","wangfei"), new Tuple2<String,String>("3","lixin")); List<Tuple2<String,String>> scoreList = Arrays.asList( new Tuple2<String,String>("1","100"), new Tuple2<String,String>("2","90"), new Tuple2<String,String>("3","80"), new Tuple2<String,String>("1","70"), new Tuple2<String,String>("2","60"), new Tuple2<String,String>("3","50")); JavaPairRDD<String, String> students = sc.parallelizePairs(studentsList); JavaPairRDD<String, String> scores = sc.parallelizePairs(scoreList); JavaPairRDD<String, Tuple2<String, String>> result = students.join(scores); result.foreach(new VoidFunction<Tuple2<String, Tuple2<String, String>>>() { @Override public void call(Tuple2<String, Tuple2<String, String>> tuple) throws Exception { System.out.println("id:"+tuple._1); System.out.println("name:"+tuple._2._1); System.out.println("score:"+tuple._2._2); } }); }}
Point 3:IntersectionOperator
package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;public class IntersectionOperator { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("SampleOperator") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> names = Arrays .asList("xurunyun", "liangyongqi", "wangfei","yasaka"); List<String> names1 = Arrays .asList("xurunyun", "liangyongqi2", "wangfei3","yasaka4"); JavaRDD<String> nameRDD = sc.parallelize(names,1); JavaRDD<String> nameRDD1 = sc.parallelize(names1,1); nameRDD.intersection(nameRDD1).foreach(new VoidFunction<String>() { private static final long serialVersionUID = 1L; @Override public void call(String name) throws Exception { System.out.println(name); } }); sc.close(); }}
阅读全文
0 0
- Spark算子(五)
- Spark算子(一)
- Spark算子(二)
- Spark算子(三)
- Spark算子(四)
- Spark算子(六)
- Spark算子(七)
- Spark算子(八)
- Spark算子(九)
- Spark算子执行流程详解之五
- 大数据算子(spark)
- spark算子实战(二)
- spark RDD算子(五)之键值对聚合操作 combineByKey
- Spark 算子
- spark算子
- spark 算子
- Spark算子
- spark算子
- 4款饮食单
- Python3.5 微信图片-日期命名
- 琴弦文字
- Hibernate——ManyToOne双向关联
- laravel 报错 laravel Undefined offset: 1
- Spark算子(五)
- JavaScript实现深拷贝与浅拷贝
- jquery函数总结
- 【分享】pom仓库地址
- RandomAccessFile的用法
- react学习笔记
- oracle简单操作
- ToastUtils工具类
- (转载)poi 列自适应