Spark算子(九)

来源:互联网 发布:淘宝怎么盗图不被投诉 编辑:程序博客网 时间:2024/06/06 10:40

Point 1:CoalesceOperator

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import java.util.ArrayList;import java.util.Arrays;import java.util.Iterator;import java.util.List;/** * Created by Administrator on 2017/07/21. */public class CoalesceOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("CoalesceOperator").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<String> list = Arrays.asList("yyy-1","yyy-2","yyy-3","yyy-4","yyy-5","yyy-6","yyy-7","yyy-8","yyy-9","yyy-10");        JavaRDD<String> staffRDD = sc.parallelize(list,6);        JavaRDD<String> staffRDD2 = staffRDD.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() {            @Override            public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception {                List<String> list = new ArrayList<String>();                while(iterator.hasNext()){                    String staff = iterator.next();                    list.add("部门["+(index+1)+"]"+staff);                }                return list.iterator();            }        },true);        for (String result1 : staffRDD2.collect()){            System.out.println(result1);        }        JavaRDD<String> staffRDD3 = staffRDD2.coalesce(3);        JavaRDD<String> staffRDD4 = staffRDD3.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() {            @Override            public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception {                List<String> list = new ArrayList<String>();                while(iterator.hasNext()){                    String staff = iterator.next();                    list.add(staff);                }                return list.iterator();            }        },true);        for (String result2 : staffRDD4.collect()){            System.out.println(result2);        }    }}

Point 2:AggregateByKeyOperator

package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.FlatMapFunction;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/21. */public class AggregateByKeyOperator {        public static void main(String[] args) {            SparkConf conf = new SparkConf()                    .setAppName("AggregateByKeyOperator")                    .setMaster("local");            JavaSparkContext sc = new JavaSparkContext(conf);            JavaRDD<String> lines = sc.textFile("./data/text.txt");            JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {                private static final long serialVersionUID = 1L;                @Override                public Iterable<String> call(String line) throws Exception {                    return Arrays.asList(line.split(" "));                }            });            JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {                private static final long serialVersionUID = 1L;                @Override                public Tuple2<String, Integer> call(String word) throws Exception {                    return new Tuple2<String,Integer>(word ,1);                }            });            JavaPairRDD<String, Integer> result = pairs.aggregateByKey(0                    , new Function2<Integer,Integer,Integer>(){                        private static final long serialVersionUID = 1L;                        @Override                        public Integer call(Integer v1, Integer v2)                                throws Exception {                            return v1+v2;                        }                    },  new Function2<Integer,Integer,Integer>(){                        private static final long serialVersionUID = 1L;                        @Override                        public Integer call(Integer v1, Integer v2)                                throws Exception {                            return v1+v2;                        }                    });                    List<Tuple2<String , Integer>> list = result.collect();                    for (Tuple2<String , Integer> wordcount : list){                        System.out.println(wordcount);                    }            sc.close();        }}