Spark算子(八)

来源:互联网 发布:苹果root软件 编辑:程序博客网 时间:2024/05/24 15:35

Point 1:CartesianOperator

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/20. */public class CartesianOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("CartesianOperator").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<String> clothes = Arrays.asList("T恤衫","夹克","皮大衣","衬衫","毛衣");        List<String> trousers = Arrays.asList("西裤","内裤","铅笔裤","皮裤","牛仔裤");        JavaRDD<String> clothesRDD = sc.parallelize(clothes);        JavaRDD<String> trousersRDD = sc.parallelize(trousers);        JavaPairRDD<String,String> pairs = clothesRDD.cartesian(trousersRDD);        for (Tuple2<String,String> result :pairs.collect()){            System.out.println(result);        }    }}

Point 2:CollectOperator

package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function;public class CollectOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("ReduceOperator")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        // 有一个集合,里面有1到10,10个数字,现在我们通过reduce来进行累加        List<Integer> numberList = Arrays.asList(1, 2, 3, 4, 5);        JavaRDD<Integer> numbers = sc.parallelize(numberList);        JavaRDD<Integer> doubleNumbers = numbers.map(new Function<Integer, Integer>() {            private static final long serialVersionUID = 1L;            @Override            public Integer call(Integer v) throws Exception {                return v * 2;            }        });        // 用foreach action操作,collect在远程集群上遍历RDD的元素        // 用collect操作,将分布式 的在远程集群里面的数据拉取到本地!!!        // 这种方式不建议使用,如果数据量大,走大量的网络传输        // 甚至有可能OOM内存溢出,通常情况下你会看到用foreach操作        List<Integer> doubleNumberList = doubleNumbers.collect();        for(Integer num : doubleNumberList){            System.out.println(num);        }        sc.close();    }}

Point 3:CogroupOperator

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/20. */public class CogroupOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("CogroupOperator").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<Tuple2<String,String>> studentsList = Arrays.asList(                new Tuple2<String,String>("1","xuruyun"),                new Tuple2<String,String>("2","wangfei"),                new Tuple2<String,String>("3","lixin"));        List<Tuple2<String,String>> scoreList = Arrays.asList(                new Tuple2<String,String>("1","100"),                new Tuple2<String,String>("2","90"),                new Tuple2<String,String>("3","80"),                new Tuple2<String,String>("1","70"),                new Tuple2<String,String>("2","60"),                new Tuple2<String,String>("3","50"));        JavaPairRDD<String,String> students = sc.parallelizePairs(studentsList);        JavaPairRDD<String,String> scores = sc.parallelizePairs(scoreList);        JavaPairRDD<String, Tuple2<Iterable<String>, Iterable<String>>> result = students.cogroup(scores);        result.foreach(new VoidFunction<Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>>>() {            @Override            //cogroup的用法是,根据key进行聚合,如果没有的话,返回null            public void call(Tuple2<String, Tuple2<Iterable<String>, Iterable<String>>> tuple) throws Exception {                System.out.println("id:"+tuple._1);                System.out.println("name:"+tuple._2._1);                System.out.println("score:"+tuple._2._2);            }        });    }}
原创粉丝点击