Spark RDD 转化
来源:互联网 发布:sqlalchemy sql语句 编辑:程序博客网 时间:2024/06/04 00:50
package com.fei.simple_project;import java.util.Arrays;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaDoubleRDD;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.DoubleFunction;import org.apache.spark.api.java.function.FlatMapFunction;import org.apache.spark.api.java.function.Function;import org.apache.spark.storage.StorageLevel;import scala.Tuple2;public class App { public static void main( String[] args ) { SparkConf conf = new SparkConf().setAppName("Simple Application"); JavaSparkContext sc = new JavaSparkContext(conf); //使用parallelize创建RDD JavaRDD<Integer> lines1 = sc.parallelize(Arrays.asList(1,2,3,4)); lines1.persist(StorageLevel.MEMORY_ONLY()); System.out.println("222222222Count is:"+lines1.count()); JavaRDD<String> lines2 = sc.parallelize(Arrays.asList("pandas", "like","i like pandas")); lines2.persist(StorageLevel.MEMORY_ONLY()); System.out.println("333333333333Count is:"+lines2.count()); //转化操作 //map, 每个元素返回一个对象 JavaRDD<String> str1RDD = lines2.map(new Function<String, String>(){public String call(String x) {return x.split(" ")[0];} }); System.out.println("44444444444likeRDD Count is:"+str1RDD.count()+" "+ str1RDD.first()); for(String it:str1RDD.collect()){ System.out.println(it+" "); } //flat map,所有元素放入一个对象中返回,返回可迭代内容 JavaRDD<String> flatMapRDD = lines2.flatMap(new FlatMapFunction<String, String>(){public Iterable<String> call(String x) throws Exception {return Arrays.asList(x.split(" "));} }) ; System.out.println("55555555flatMapRDD Count is:"+flatMapRDD.count()+" "+ flatMapRDD.first()); for(String it:flatMapRDD.collect()){ System.out.println(it+" "); } //distinct,集合去重,不常用,开销大 JavaRDD<String> distinctRDD = flatMapRDD.distinct(); System.out.println("6666666666distinctRDD Count is:"+distinctRDD.count()+" "+ distinctRDD.first()); for(String it:distinctRDD.collect()){ System.out.println(it+" "); } //union,合并,包括重复 JavaRDD<String> unionRDD = flatMapRDD.union(str1RDD); System.out.println("777777777777777unionRDD Count is:"+unionRDD.count()+" "+ unionRDD.first()); for(String it:unionRDD.collect()){ System.out.println(it+" "); } //intersection,返回共有的 JavaRDD<String> intersectionRDD = flatMapRDD.intersection(str1RDD); System.out.println("88888888888intersectionRDD Count is:"+intersectionRDD.count()+" "+ intersectionRDD.first()); for(String it:intersectionRDD.collect()){ System.out.println(it+" "); } //subtract,返回只在第一个中 JavaRDD<String> subtractRDD = flatMapRDD.subtract(str1RDD); System.out.println("9999999999subtractRDD Count is:"+subtractRDD.count()); for(String it:subtractRDD.collect()){ System.out.println(it+" "); } //cartesian,返回笛卡尔积 JavaPairRDD<String, String> cartesianRDD = flatMapRDD.cartesian(str1RDD); System.out.println("aaaaaaaaacartesianRDD Count is:"+cartesianRDD.count()+" "+ cartesianRDD.first()); for(Tuple2<String, String> it:cartesianRDD.collect()){ System.out.println(it._1+" "+it._2+"\n"); } //mapToDouble,RDD类型转换 //其他转换还有:flatMapToDouble,flatMapToPair, mapToPair JavaDoubleRDD doubleRDD = lines1.mapToDouble(new DoubleFunction<Integer>(){public double call(Integer x) throws Exception {return (double)x*x;} }); System.out.println("aaaaaaaaadoubleRDD Count is:"+doubleRDD.count()+" "+ doubleRDD.first()); for(double it:doubleRDD.collect()){ System.out.println(it+" "); } //mean是平均数 System.out.println("9999999999doubleRDD mean is:"+doubleRDD.mean()); }}
222222222Count is:4 333333333333Count is:344444444444likeRDD Count is:3 pandaspandas like i 55555555flatMapRDD Count is:5 pandaspandas like i like pandas 6666666666distinctRDD Count is:3 pandaspandas i like 777777777777777unionRDD Count is:8 pandaspandas like i like pandas pandas like i 88888888888intersectionRDD Count is:3 pandaspandas i like 9999999999subtractRDD Count is:0aaaaaaaaacartesianRDD Count is:15 (pandas,pandas)pandas pandaspandas likepandas ilike pandaslike likelike ii pandaslike pandaspandas pandasi likelike likepandas likei ilike ipandas iaaaaaaaaadoubleRDD Count is:4 1.01.0 4.0 9.0 16.0 9999999999doubleRDD mean is:7.5
0 0
- Spark RDD 转化
- Spark RDD 转化与行动基础
- spark中RDD的转化操作和行动操作
- 【spark RDD】RDD编程
- Spark/RDD
- Spark-rdd
- spark RDD
- Spark RDD
- Spark RDD
- spark rdd
- Spark RDD
- Spark rdd
- spark-RDD
- Spark RDD
- spark RDD
- Spark RDD
- spark rdd
- Spark RDD
- POJ 3085:Quick Change
- 决策树与随机森林的R语言实现
- x265-1.8版本-common/pixel.cpp注释
- MySql避免重复插入记录的几种方法
- 通过画五角星,回顾啦三角函数
- Spark RDD 转化
- Android应该掌握的高级技巧
- POJ 3100:Root of the Problem
- x265-1.8版本-common/predict.h注释
- C++Primer学习之三auto和decltype
- POJ 3650:The Seven Percent Solution
- 安卓开发——9patch图的使用
- linux部署及linux命令
- x265-1.8版本-common/scalinglist.cpp注释