Spark算子(九)
来源:互联网 发布:淘宝怎么盗图不被投诉 编辑:程序博客网 时间:2024/06/06 10:40
Point 1:CoalesceOperator
package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import java.util.ArrayList;import java.util.Arrays;import java.util.Iterator;import java.util.List;/** * Created by Administrator on 2017/07/21. */public class CoalesceOperator { public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("CoalesceOperator").setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); List<String> list = Arrays.asList("yyy-1","yyy-2","yyy-3","yyy-4","yyy-5","yyy-6","yyy-7","yyy-8","yyy-9","yyy-10"); JavaRDD<String> staffRDD = sc.parallelize(list,6); JavaRDD<String> staffRDD2 = staffRDD.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() { @Override public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception { List<String> list = new ArrayList<String>(); while(iterator.hasNext()){ String staff = iterator.next(); list.add("部门["+(index+1)+"]"+staff); } return list.iterator(); } },true); for (String result1 : staffRDD2.collect()){ System.out.println(result1); } JavaRDD<String> staffRDD3 = staffRDD2.coalesce(3); JavaRDD<String> staffRDD4 = staffRDD3.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() { @Override public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception { List<String> list = new ArrayList<String>(); while(iterator.hasNext()){ String staff = iterator.next(); list.add(staff); } return list.iterator(); } },true); for (String result2 : staffRDD4.collect()){ System.out.println(result2); } }}
Point 2:AggregateByKeyOperator
package com.spark.operator;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.FlatMapFunction;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import scala.Tuple2;import java.util.Arrays;import java.util.List;/** * Created by Administrator on 2017/07/21. */public class AggregateByKeyOperator { public static void main(String[] args) { SparkConf conf = new SparkConf() .setAppName("AggregateByKeyOperator") .setMaster("local"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile("./data/text.txt"); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; @Override public Iterable<String> call(String line) throws Exception { return Arrays.asList(line.split(" ")); } }); JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String,Integer>(word ,1); } }); JavaPairRDD<String, Integer> result = pairs.aggregateByKey(0 , new Function2<Integer,Integer,Integer>(){ private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1+v2; } }, new Function2<Integer,Integer,Integer>(){ private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1+v2; } }); List<Tuple2<String , Integer>> list = result.collect(); for (Tuple2<String , Integer> wordcount : list){ System.out.println(wordcount); } sc.close(); }}
阅读全文
0 0
- Spark算子(九)
- Spark算子(一)
- Spark算子(二)
- Spark算子(三)
- Spark算子(四)
- Spark算子(五)
- Spark算子(六)
- Spark算子(七)
- Spark算子(八)
- 大数据算子(spark)
- spark算子实战(二)
- Spark 算子
- spark算子
- spark 算子
- Spark算子
- spark算子
- spark RDD算子(九)之基本的Action操作 first, take, collect, count, countByValue, reduce, aggregate, fold,top
- HALCON 算子函数(九) Matching
- [LeetCode] 565. Array Nesting
- synchronized和lock的用法区别
- String s=new String("abc")创建了2个对象的原因
- oracle最大连接数修改
- Python中lambda表达式学习
- Spark算子(九)
- leetcode 541. Reverse String II
- OpenResty--搭建简单的CC防护
- ActiveMQ在Win7的搭建与使用
- Android PullToRefresh 分析之扩展RecyclerView
- mysql分类统计cash when then
- 根据工作日历推算工作日的算法
- Java —— EJB 到底是什么
- POJ 2186 Popular Cows 【Tarjan+缩点】