Spark算子(一)
来源:互联网 发布:程序员杂志 2016 pdf 编辑:程序博客网 时间:2024/06/05 02:06
Point 1:UnionOperator
package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;public class UnionOperator { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("SampleOperator") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> names = Arrays .asList("xurunyun", "liangyongqi", "wangfei","yasaka"); List<String> names1 = Arrays .asList("xurunyun", "liangyongqi2", "wangfei3","yasaka4"); JavaRDD<String> nameRDD = sc.parallelize(names,2); JavaRDD<String> nameRDD1 = sc.parallelize(names1,2); nameRDD.union(nameRDD1).foreach(new VoidFunction<String>() { private static final long serialVersionUID = 1L; @Override public void call(String arg0) throws Exception { System.out.println(arg0); } }); sc.close(); }}
Point 2:TakeSample
package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;public class TakeSample { // takeSample = take + sample public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("SampleOperator") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> names = Arrays .asList("xuruyun", "liangyongqi", "wangfei","xuruyun"); JavaRDD<String> nameRDD = sc.parallelize(names,1); List<String> list = nameRDD.takeSample(false,2); for(String name :list){ System.out.println(name); } sc.close(); }}
Point 3:take
package com.spark.operator;import java.util.Arrays;import java.util.List;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;public class TakeOperator { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("ReduceOperator") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); // 有一个集合,里面有1到10,10个数字,现在我们通过reduce来进行累加 List<Integer> numberList = Arrays.asList(1, 2, 3, 4, 5); JavaRDD<Integer> numbers = sc.parallelize(numberList); List<Integer> top3Numbers = numbers.take(3); for(Integer num:top3Numbers){ System.out.println(num); } sc.close(); }}
阅读全文
0 0
- Spark算子(一)
- Spark算子补充<一>
- Spark算子讲解(一)
- Spark算子讲解(一)
- Spark算子(二)
- Spark算子(三)
- Spark算子(四)
- Spark算子(五)
- Spark算子(六)
- Spark算子(七)
- Spark算子(八)
- Spark算子(九)
- spark RDD算子(一) parallelize,makeRDD,textFile
- spark学习-22-Spark算子Transformations和Action使用大全(Transformations章(一))
- “戏”说Spark-Spark核心-RDD转换操作算子详解(一)
- 大数据算子(spark)
- spark算子实战(二)
- 大数据:Spark 算子(一)排序算子sortByKey来看大数据平台下如何做排序
- CSS实战note
- phpExcel 笔记
- 01. 波兰计法,逆波兰记法
- ios-description方法
- CentOS 7 安装 maven
- Spark算子(一)
- Redis复制和哨兵
- 对集合进行排序
- Tensorflow中矩阵运算函数
- 02. 哈夫曼算法与文本压缩、解压
- 保证设计水准的八个标准
- rocketmq stats.Log 的统计 cmd awk
- 百度陆奇内部最新演讲:成为优秀工程师的五个方法
- Java字符串知识简单汇总