Spark算子(五)

来源:互联网 发布:如何开启3724端口 编辑:程序博客网 时间:2024/06/12 23:11

Point 1:LineCount

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;/** * Created by Aaron on 2017/7/18. *///计算一个文本不重复的行数有多少public class LineCount {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("LineCount")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        JavaRDD<String> lines = sc.textFile("text.txt");        // 现在这个文本的元素是每一行,然后我们把每一行变成 line => (line ,1)        JavaPairRDD<String, Integer> pairlines = lines.mapToPair(new PairFunction<String, String, Integer>() {            private static final long serialVersionUID = 1L;            @Override            public Tuple2<String, Integer> call(String line) throws Exception {                return new Tuple2<String, Integer>(line, 1);            }        });        JavaPairRDD<String, Integer> results = pairlines.reduceByKey(new Function2<Integer, Integer, Integer>() {            private static final long serialVersionUID = 1L;            @Override            public Integer call(Integer v1, Integer v2) throws Exception {                return v1+v2;            }        });        results.foreach(new VoidFunction<Tuple2<String,Integer>>() {            private static final long serialVersionUID = 1L;            @Override            public void call(Tuple2<String, Integer> result) throws Exception {                System.out.println(result._1 + " : " + result._2);            }        });        sc.close();    }}

Point 2:JoinOperator

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/18. */public class JoinOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("JoinOperator").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<Tuple2<String,String>> studentsList = Arrays.asList(                new Tuple2<String,String>("1","xuruyun"),                new Tuple2<String,String>("2","wangfei"),                new Tuple2<String,String>("3","lixin"));        List<Tuple2<String,String>> scoreList = Arrays.asList(                new Tuple2<String,String>("1","100"),                new Tuple2<String,String>("2","90"),                new Tuple2<String,String>("3","80"),                new Tuple2<String,String>("1","70"),                new Tuple2<String,String>("2","60"),                new Tuple2<String,String>("3","50"));        JavaPairRDD<String, String> students = sc.parallelizePairs(studentsList);        JavaPairRDD<String, String> scores = sc.parallelizePairs(scoreList);        JavaPairRDD<String, Tuple2<String, String>> result = students.join(scores);        result.foreach(new VoidFunction<Tuple2<String, Tuple2<String, String>>>() {            @Override            public void call(Tuple2<String, Tuple2<String, String>> tuple) throws Exception {                System.out.println("id:"+tuple._1);                System.out.println("name:"+tuple._2._1);                System.out.println("score:"+tuple._2._2);            }        });    }}

Point 3:IntersectionOperator

package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;public class IntersectionOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("SampleOperator")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<String> names = Arrays                .asList("xurunyun", "liangyongqi", "wangfei","yasaka");        List<String> names1 = Arrays                .asList("xurunyun", "liangyongqi2", "wangfei3","yasaka4");        JavaRDD<String> nameRDD = sc.parallelize(names,1);        JavaRDD<String> nameRDD1 = sc.parallelize(names1,1);        nameRDD.intersection(nameRDD1).foreach(new VoidFunction<String>() {            private static final long serialVersionUID = 1L;            @Override            public void call(String name) throws Exception {                System.out.println(name);            }        });        sc.close();    }}
原创粉丝点击