Spark算子(一)

来源:互联网 发布:程序员杂志 2016 pdf 编辑:程序博客网 时间:2024/06/05 02:06

Point 1:UnionOperator

package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;public class UnionOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("SampleOperator")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<String> names = Arrays                .asList("xurunyun", "liangyongqi", "wangfei","yasaka");        List<String> names1 = Arrays                .asList("xurunyun", "liangyongqi2", "wangfei3","yasaka4");        JavaRDD<String> nameRDD = sc.parallelize(names,2);        JavaRDD<String> nameRDD1 = sc.parallelize(names1,2);        nameRDD.union(nameRDD1).foreach(new VoidFunction<String>() {            private static final long serialVersionUID = 1L;            @Override            public void call(String arg0) throws Exception {                System.out.println(arg0);            }        });        sc.close();    }}

Point 2:TakeSample

package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;public class TakeSample {    // takeSample = take + sample    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("SampleOperator")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        List<String> names = Arrays                .asList("xuruyun", "liangyongqi", "wangfei","xuruyun");        JavaRDD<String> nameRDD = sc.parallelize(names,1);        List<String> list = nameRDD.takeSample(false,2);        for(String name :list){            System.out.println(name);        }        sc.close();    }}

Point 3:take

package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;public class TakeOperator {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("ReduceOperator")                .setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        // 有一个集合,里面有11010个数字,现在我们通过reduce来进行累加        List<Integer> numberList = Arrays.asList(1, 2, 3, 4, 5);        JavaRDD<Integer> numbers = sc.parallelize(numberList);        List<Integer> top3Numbers = numbers.take(3);        for(Integer num:top3Numbers){            System.out.println(num);        }        sc.close();    }}