Spark简单函数 JAVA/SCALA
来源:互联网 发布:js原型链继承 编辑:程序博客网 时间:2024/06/15 14:57
一、用Java编写的Spark函数
package com.css.ideaSpark;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.*;
import org.apache.spark.broadcast.Broadcast;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
public class TransformationOperation {
public static void main(String[] args){
SparkConf conf = new SparkConf() .setMaster("local") .setAppName("TransformationOperation"); conf.set("spark.testing.memory", "2147480000"); /*SparkWordCountFunction(conf); ParallellizeFunction(conf); FilterFunction(conf); FileSizeFunction(conf); GroupBykeyFunction(conf); reduceByKeyFunction(conf); SortBykeyFunction(conf); JoinByKeyFunction(conf); congroupFunction(conf); BroadcastVariable(conf); accumulableFunction(conf); secondSordFunction(conf); TopnFunction(conf);*/ GroupTopnFunction(conf);}//public static void SparkWordCountFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); JavaRDD<String> lines = sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/input.txt"); JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() { private static final long serialVersionUID = 1L; public Iterable<String> call(String line) throws Exception { return Arrays.asList(line.split(" ")); } }); JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String, Integer>(word,1); } }); JavaPairRDD<String, Integer> wordCounts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1 + v2; } }); wordCounts.foreach(new VoidFunction<Tuple2<String,Integer>>() { public void call(Tuple2<String, Integer> wordCount) throws Exception { System.out.println(wordCount._1 + "------" + wordCount._2+"times."); } private static final long serialVersionUID = 1L; }); sc.close();}public static void ParallellizeFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Integer> numbers= Arrays.asList(1,2,3,4,5); JavaRDD<Integer> numberRDD= sc.parallelize(numbers); int sum = numberRDD.reduce( new Function2<Integer, Integer, Integer>(){ public Integer call(Integer v1,Integer v2) throws Exception{ return v1 +v2; } }); System.out.println(sum); sc.close();}public static void FilterFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Integer> list= Arrays.asList(1,2,3,4,5,6,7,8,9,10); JavaRDD<Integer> numRdd= sc.parallelize(list); JavaRDD<Integer> selectedNumRdd= numRdd.filter(new Function<Integer, Boolean>() { @Override public Boolean call(Integer num) throws Exception { return num%2==0; } }); selectedNumRdd.foreach(new VoidFunction<Integer>() { @Override public void call(Integer num) throws Exception { System.out.println(num); } }); sc.close();}public static void FileSizeFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); JavaRDD<String> lines=sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/input.txt"); JavaRDD<Integer> lineRdd =lines.map(new Function<String, Integer>() { @Override public Integer call(String line) throws Exception { return line.length(); } }); int filesize = lineRdd.reduce(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1+v2; } }); System.out.println(filesize); sc.close();}public static void GroupBykeyFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Tuple2<String,Integer>> scoredata= Arrays.asList( new Tuple2<String,Integer>("class1",70), new Tuple2<String,Integer>("class2",80), new Tuple2<String,Integer>("class3",90), new Tuple2<String,Integer>("class2",60), new Tuple2<String,Integer>("class1",80) ); JavaPairRDD<String,Integer> scoreRdd= sc.parallelizePairs(scoredata); JavaPairRDD<String,Iterable<Integer>> scoreGroupRdd= scoreRdd.groupByKey(); scoreGroupRdd.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() { @Override public void call(Tuple2<String, Iterable<Integer>> tuple) throws Exception { System.out.println("class: "+tuple._1); Iterator<Integer> it= tuple._2.iterator(); while(it.hasNext()){ System.out.println(it.next()); } System.out.println("=============================="); } }); sc.close();}public static void reduceByKeyFunction(SparkConf conf) { JavaSparkContext sc = new JavaSparkContext(conf); List<Tuple2<String, Integer>> scores = Arrays.asList( new Tuple2<String, Integer>("class1", 80), new Tuple2<String, Integer>("class1", 90), new Tuple2<String, Integer>("class2", 80), new Tuple2<String, Integer>("class1", 60), new Tuple2<String, Integer>("class2", 80) ); JavaPairRDD<String, Integer> scoreRdd = sc.parallelizePairs(scores); JavaPairRDD<String,Integer> scoreReduceByKeyRdd= scoreRdd.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer v1, Integer v2) throws Exception { return v1+v2; } }); scoreReduceByKeyRdd.foreach(new VoidFunction<Tuple2<String, Integer>>() { @Override public void call(Tuple2<String, Integer> ScoreTuple) throws Exception { System.out.println("class : "+ScoreTuple._1+" scoreSum: "+ScoreTuple._2); } }); sc.close();}public static void SortBykeyFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Tuple2<Integer, String>> scores = Arrays.asList( new Tuple2<Integer, String>(90,"name1"), new Tuple2<Integer, String>(80,"name2"), new Tuple2<Integer, String>(88,"name3"), new Tuple2<Integer, String>(60,"name4"), new Tuple2<Integer, String>(50,"name5") ); JavaPairRDD<Integer,String> scoresRdd= sc.parallelizePairs(scores); JavaPairRDD<Integer,String> scoresSortByKeyRdd=scoresRdd.sortByKey(false); scoresSortByKeyRdd.foreach(new VoidFunction<Tuple2<Integer, String>>() { @Override public void call(Tuple2<Integer, String> ScoreTuple) throws Exception { System.out.println("score: "+ScoreTuple._1+" name: : "+ScoreTuple._2); } }); sc.close();}public static void JoinByKeyFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Tuple2<Integer,String>> studensInfo = Arrays.asList( new Tuple2<Integer,String>(1,"zhangsan"), new Tuple2<Integer, String>(2,"lisi"), new Tuple2<Integer, String>(3,"wangwu"), new Tuple2<Integer, String>(4,"zhaoliu") ); List<Tuple2<Integer,String>> studentsScore= Arrays.asList( new Tuple2<Integer, String>(1,"99"), new Tuple2<Integer, String>(2,"88"), new Tuple2<Integer, String>(3,"77"), new Tuple2<Integer, String>(4,"66") ); JavaPairRDD<Integer,String> studentIfoRdd= sc.parallelizePairs(studensInfo); JavaPairRDD<Integer,String> studentsScoreRdd= sc.parallelizePairs(studentsScore); JavaPairRDD<Integer,Tuple2<String,String>> joinInfoRdd= studentIfoRdd.join(studentsScoreRdd); joinInfoRdd.foreach(new VoidFunction<Tuple2<Integer, Tuple2<String, String>>>() { @Override public void call(Tuple2<Integer, Tuple2<String, String>> t) throws Exception { System.out.println("id: "+t._1); System.out.println("name: "+t._2._1); System.out.println("score: "+t._2._2); System.out.println("======================================"); } }); sc.close();}public static void congroupFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); List<Tuple2<Integer,String>> studensInfo = Arrays.asList( new Tuple2<Integer,String>(1,"zhangsan"), new Tuple2<Integer, String>(2,"lisi"), new Tuple2<Integer, String>(3,"wangwu"), new Tuple2<Integer, String>(4,"zhaoliu") ); List<Tuple2<Integer,String>> studentsScore= Arrays.asList( new Tuple2<Integer, String>(1,"99"), new Tuple2<Integer, String>(2,"88"), new Tuple2<Integer, String>(3,"77"), new Tuple2<Integer, String>(4,"66"), new Tuple2<Integer, String>(1,"78"), new Tuple2<Integer, String>(3,"78"), new Tuple2<Integer, String>(2,"78") ); JavaPairRDD<Integer,String> studentIfoRdd= sc.parallelizePairs(studensInfo); JavaPairRDD<Integer,String> studentsScoreRdd= sc.parallelizePairs(studentsScore); JavaPairRDD<Integer,Tuple2<Iterable<String>,Iterable<String>>> joinInfoRdd= studentIfoRdd.cogroup(studentsScoreRdd); joinInfoRdd.foreach(new VoidFunction<Tuple2<Integer, Tuple2<Iterable<String>, Iterable<String>>>>() { @Override public void call(Tuple2<Integer, Tuple2<Iterable<String>, Iterable<String>>> t) throws Exception { System.out.println("id: "+t._1); System.out.println("name: "+t._2._1); System.out.println("score: "+t._2._2); System.out.println("================================="); } }); sc.close();}public static void BroadcastVariable(SparkConf conf){ JavaSparkContext sc = new JavaSparkContext(conf); List<Integer> numss= Arrays.asList(1,2,3,4,5); JavaRDD<Integer> numsRdd= sc.parallelize(numss); final Broadcast broadcastvariable =sc.broadcast(2); numsRdd.foreach(new VoidFunction<Integer>() { @Override public void call(Integer data) throws Exception { System.out.println(Integer.valueOf(broadcastvariable.getValue().toString())*data); } }); sc.close();}public static void accumulableFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); final Accumulator<Integer> accumulator =sc.accumulator(0); List<Integer> numss= Arrays.asList(1,2,3,4,5); JavaRDD<Integer> numsRdd= sc.parallelize(numss); numsRdd.foreach(new VoidFunction<Integer>() { @Override public void call(Integer data) throws Exception { accumulator.add(data); } }); System.out.println(accumulator.value()); sc.close();}public static void secondSordFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); JavaRDD<String> linesRdd=sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/data.txt"); JavaPairRDD<MyKey,String> myKeyPairsRdd= linesRdd.mapToPair(new PairFunction<String, MyKey, String>() { @Override public Tuple2<MyKey, String> call(String line) throws Exception { int firstkey=Integer.valueOf(line.split(" ")[0]); int secondkey=Integer.valueOf(line.split(" ")[1]); MyKey mykey = new MyKey(firstkey,secondkey); return new Tuple2<MyKey,String>(mykey,line); } }); JavaPairRDD<MyKey,String> sortedRdd= myKeyPairsRdd.sortByKey(); JavaRDD<String> resultRDd =sortedRdd.map(new Function<Tuple2<MyKey, String>, String>() { @Override public String call(Tuple2<MyKey, String> myKeyStringTuple2) throws Exception { return myKeyStringTuple2._2; } }); resultRDd.foreach(new VoidFunction<String>() { @Override public void call(String line) throws Exception { System.out.println(line); } }); sc.close();}public static void TopnFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); JavaRDD<String> linesRdd=sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/data2.txt"); JavaPairRDD<Integer,String> PairsRdd= linesRdd.mapToPair(new PairFunction<String, Integer, String>() { @Override public Tuple2<Integer, String> call(String line) throws Exception { return new Tuple2<Integer,String>(Integer.valueOf(line),line); } }); JavaPairRDD<Integer,String> topnRdd= PairsRdd.sortByKey(false); JavaRDD<String> resultRdd= topnRdd.map(new Function<Tuple2<Integer, String>, String>() { @Override public String call(Tuple2<Integer, String> Tuple2) throws Exception { return Tuple2._2; } }); Iterator<String> It=resultRdd.take(3).iterator(); while( It.hasNext()){ System.out.println(It.next()); } sc.close();}public static void GroupTopnFunction(SparkConf conf){ JavaSparkContext sc= new JavaSparkContext(conf); JavaRDD<String> linesRdd=sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/data3.txt"); JavaPairRDD<String,Integer> pairRDD= linesRdd.mapToPair(new PairFunction<String , String, Integer>() { @Override public Tuple2<String, Integer> call(String line) throws Exception { return new Tuple2<String, Integer>(line.split(" ")[0],Integer.valueOf(line.split(" ")[1])); } }); JavaPairRDD<String,Iterable<Integer>> groupRdd= pairRDD.groupByKey(); JavaPairRDD<String,Iterable<Integer>> groupTop3Rdd=groupRdd.mapToPair(new PairFunction<Tuple2<String, Iterable<Integer>>, String, Iterable<Integer>>() { @Override public Tuple2<String, Iterable<Integer>> call(Tuple2<String, Iterable<Integer>> TuplePara) throws Exception { String className= TuplePara._1; Iterator<Integer> It= TuplePara._2.iterator(); Integer top3[] =new Integer[3]; while (It.hasNext()){ Integer score = It.next(); for(int i=0;i<3;i++){ if(top3[i]==null){ top3[i]=score; break; }else if(score>top3[i]){ for(int j=2;j>i;j--){ top3[j]=top3[j-1]; } top3[i] =score; break; } } } return new Tuple2<String,Iterable<Integer>>(className,Arrays.asList(top3)); } }); groupTop3Rdd.foreach(new VoidFunction<Tuple2<String, Iterable<Integer>>>() { @Override public void call(Tuple2<String, Iterable<Integer>> TuplePara) throws Exception { System.out.println("className: "+TuplePara._1); Iterator<Integer> iterator= TuplePara._2.iterator(); while (iterator.hasNext()) { System.out.println("score: "+iterator.next()); } } }); sc.close();}
}
//实现二次排序 自己创建的Key
package com.css.ideaSpark;
import scala.math.Ordered;
import java.io.Serializable;
public class MyKey implements Ordered,Serializable{
private int firstKey;private int secondKey;public MyKey(int firstKey, int secondKey) { this.firstKey = firstKey; this.secondKey = secondKey;}public int getFirstKey() { return firstKey;}public int getSecondKey() { return secondKey;}public void setFirstKey(int firstKey) { this.firstKey = firstKey;}public void setSecondKey(int secondKey) { this.secondKey = secondKey;}@Overridepublic boolean $less(MyKey that) { if(this.getFirstKey()<that.getFirstKey()){ return true; }else if((this.getFirstKey()==that.getFirstKey())&&(this.getSecondKey()<that.getSecondKey())){ return true; }else{ return false; }}@Overridepublic boolean $greater(MyKey that) { if(this.getFirstKey()>that.getFirstKey()){ return true; }else if((this.getFirstKey()==that.getFirstKey())&&(this.getSecondKey()>that.getSecondKey())){ return true; }else{ return false; }}@Overridepublic boolean $less$eq(MyKey that) { if($less(that)||this.getFirstKey()==that.getFirstKey()&&this.getSecondKey()==that.getSecondKey()){ return true; }else{ return false; }}@Overridepublic boolean $greater$eq(MyKey that) { if($greater(that)|this.getFirstKey()==that.getFirstKey()&&this.getSecondKey()==that.getSecondKey()){ return true; }else{ return false; }}@Overridepublic int compare(MyKey that) { if(this.getFirstKey()!=that.getFirstKey()){ return this.getFirstKey()-that.getFirstKey(); }else{ return this.getSecondKey()-that.getSecondKey(); }}@Overridepublic int compareTo(MyKey that) { if(this.getFirstKey()!=that.getFirstKey()){ return this.getFirstKey()-that.getFirstKey(); }else{ return this.getSecondKey()-that.getSecondKey(); }}@Overridepublic boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MyKey myKey = (MyKey) o; if (firstKey != myKey.firstKey) return false; return secondKey == myKey.secondKey;}@Overridepublic int hashCode() { final int prime=31; int result=1; result = prime * result + firstKey; result = prime * result + secondKey; return result;}
}
二、用Scala程序编写的Spark函数
package com.css.scala
import breeze.optimize.linear.LinearProgram
import org.apache.spark.{SparkConf, SparkContext}
import scala.util.control.Breaks
object TransformationOperation {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
.setMaster(“local”)
.setAppName(“TransformationOperation”)
/* ScalaWordCountFunction(conf)
ParallelizeFunction(conf)
FilterFunction(conf)
FileSizeFunction(conf)
GroupBykeyFunction(conf)
ReduceByKeyFunction(conf)
SortByKeyFunction(conf)
JoinBykeyFunction(conf)
CoGroupFunction(conf)
broadcastVariable(conf)
accmulableFunction(conf)
secondSordFunction(conf)
TopnFunction(conf)*/
GroupTopnFunction(conf)
}
def ScalaWordCountFunction(conf: SparkConf):Unit={
val sc= new SparkContext(conf)val lines= sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/input.txt")val words = lines.flatMap(line =>line.split(" "))val pairs = words.map(word =>(word,1))val wordCounts=pairs.reduceByKey(_+_)wordCounts.foreach(wordCount =>println(wordCount._1+" appread "+wordCount._2+" times"))// lines.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect().foreach(println)sc.stop()
}
def ParallelizeFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)val numbers = Array(1,2,3,4,5)val numberRdd= sc.parallelize(numbers)val sum =numberRdd.reduce(_+_)println(sum)sc.stop();
}
def FilterFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)val nums= Array(1,2,3,4,5,6,7,8,9,10)val numRdd= sc.parallelize(nums)// numRdd.filter(num=>num%2==0)numRdd.filter(_%2==0).foreach(println(_))sc.stop()
}
def FileSizeFunction(conf :SparkConf): Unit ={
val sc= new SparkContext(conf)val linesRdd= sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/input.txt")val fileSZ= linesRdd.map(line=>(line.length)).reduce(_+_);println(fileSZ)sc.stop()
}
def GroupBykeyFunction(conf :SparkConf): Unit ={
val sc= new SparkContext(conf)val scores = Array(("class1",77),("class2",80),("class3",90),("class1",87),("class3",78))val scoresRdd = sc.parallelize(scores)scoresRdd.groupByKey().foreach(score=>{ println("class: "+score._1) score._2.foreach(scoredata=>println(scoredata)) println("=============================")})sc.stop()
}
def ReduceByKeyFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)
val scores = Array(new Tuple2(“class1”,80),
new Tuple2(“class1”,80),
new Tuple2(“class2”,80),
new Tuple2(“class2”,80),
new Tuple2(“class1”,80))
val scoresRdd = sc.parallelize(scores)scoresRdd.reduceByKey(_+_).foreach(classScore=>{println("class: "+classScore._1+" scoreSum "+classScore._2)})
}
def SortByKeyFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)val scores = Array(new Tuple2(90,"name1"), new Tuple2(80,"name2"), new Tuple2(88,"name3"), new Tuple2(89,"name4"), new Tuple2(80,"name5"), new Tuple2(90,"name6"), new Tuple2(99,"name7"))val scoresRdd = sc.parallelize(scores)scoresRdd.sortBy(_._1,false).foreach(Score=>{println("score: "+Score._1+" name "+Score._2)})
}
def JoinBykeyFunction(conf :SparkConf): Unit ={
val sc= new SparkContext(conf)
val studentsInfo=Array(new Tuple2(1,”name1”),
new Tuple2(2,”name2”),
new Tuple2(3,”name3”),
new Tuple2(4,”name4”),
new Tuple2(5,”name5”))
val studentsScore = Array(new Tuple2(1,"99"), new Tuple2(2,"88"), new Tuple2(3,"77"), new Tuple2(4,"66"), new Tuple2(2,"97"))val studentsInfoRdd=sc.parallelize(studentsInfo)val studentsScoreRdd=sc.parallelize(studentsScore)val joinRdd=studentsInfoRdd.join(studentsScoreRdd)joinRdd.foreach(joinPara=>{ println("ID: "+joinPara._1) println("name: "+joinPara._2._1) println("score: "+joinPara._2._2) println("============================")})sc.stop()
}
def CoGroupFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)val studentInfo= Array(new Tuple2(1,"name1"), new Tuple2(2,"name2"), new Tuple2(3,"name3"), new Tuple2(4,"name4"), new Tuple2(4,"name5"))val studentScore=Array(new Tuple2(1,"77"), new Tuple2(2,"88"), new Tuple2(3,"99"), new Tuple2(4,"66"), new Tuple2(1,"77"), new Tuple2(2,"88"), new Tuple2(3,"99"), new Tuple2(4,"66"))val studentInfoRdd= sc.parallelize(studentInfo)val studentScoreRdd=sc.parallelize(studentScore)val cogroupRdd= studentInfoRdd.cogroup(studentScoreRdd)cogroupRdd.foreach(congroupValue=>{ println("id: "+congroupValue._1) println("name: "+congroupValue._2._1) println("score: "+congroupValue._2._2)})sc.stop()
}
def broadcastVariable(conf:SparkConf): Unit ={ val sc= new SparkContext(conf) val broadcastvaliale = sc.broadcast(2) val arrNum= Array(1,2,3,4,5) val arrNumRdd= sc.parallelize(arrNum) arrNumRdd.foreach(data=>println(broadcastvaliale.value*data)) sc.stop()}def accmulableFunction(conf:SparkConf): Unit ={ val sc = new SparkContext(conf) val accmulableVariable=sc.accumulator(0) val arrNum= Array(1,2,3,4,5) val arrNumRdd= sc.parallelize(arrNum) arrNumRdd.foreach(data=>{ accmulableVariable+=data if(data==5) println(accmulableVariable) }) sc.stop()}
def secondSordFunction(conf :SparkConf): Unit ={
val sc= new SparkContext(conf)val linesRdd= sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/data.txt")val pairSortKey=linesRdd.map(line=>( new SecondSortKey(line.split(" ")(0).toInt,line.split(" ")(1).toInt), line));val sortPairRdd = pairSortKey.sortByKey()val sortResultRdd= sortPairRdd.map(line=>line._2)sortResultRdd.foreach(x=>println(x))sc.stop()
}
def TopnFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)
val linesRdd= sc.textFile(“/opt/modules/hadoop-2.5.0-cdh5.3.6/data2.txt”)
val pairRdd=linesRdd.map(line=>(line.toInt,line));
val sortPairRdd = pairRdd.sortByKey(false)val sortResultRdd= sortPairRdd.map(line=>line._2)val top3=sortResultRdd.take(3)top3.foreach(x=>println(x))sc.stop()
}
def GroupTopnFunction(conf:SparkConf): Unit ={
val sc= new SparkContext(conf)val linesRdd= sc.textFile("/opt/modules/hadoop-2.5.0-cdh5.3.6/data3.txt")val pairRdd= linesRdd.map(line=>(line.split(" ")(0),line.split(" ")(1))).groupByKey() .map(score=>{ val iterator=score._2.iterator val top3= new Array[Integer](3) val loop= new Breaks while (iterator.hasNext){ val temp = iterator.next() loop.breakable( for(i<- 0 until 3 ){ if(top3(i)==null){ top3(i)=temp.toInt loop.break() }else if(temp.toInt > top3(i)){ for(j<-Range(2,0,-1) if(j>i)){ top3(j)=top3(j-1) } top3(i)=temp.toInt loop.break() } } ) } println(top3(0)) println(top3(1)) println(top3(2)) new Tuple2(score._1,top3)})pairRdd .foreach(classScore=>{ println("class: "+classScore._1) val iterator=classScore._2.iterator while (iterator.hasNext){ println("score: "+iterator.next()) }})sc.stop()
}
def test(): Unit ={
var i=0;
for(j<-Range(2,0,-1) if(j>i)){
println("j="+j+"i="+i)}
}
}
//实现二次排序 自己创建的Key
package com.css.scala
class SecondSortKey(val firstKey:Int,val secondKey:Int)extends Ordered[SecondSortKey]with Serializable {
override def compare(that: SecondSortKey) :Int={
if(this.firstKey!=that.firstKey) {
this.firstKey-that.firstKey
}else{
this.secondKey-that.secondKey
}
}
}
- Spark简单函数 JAVA/SCALA
- Spark wordcount - Python, Scala, Java
- spark:--一些简单的scala语句--1
- spark:--一些简单的scala语句--2
- spark:--一些简单的scala语句--3
- spark:--一些简单的scala语句--4
- spark:--一些简单的scala语句--5
- spark:--一些简单的scala语句--6
- scala-eclipse 编写spark简单程序 WordCount
- IDEA SPARK SCALA 搭建简单的helloworld
- scala 随机数函数截取 spark 数据集
- 一步步学spark之一scala函数1.3
- Spark基础-Scala函数式编程
- Spark基础-Scala高阶函数
- Spark基础-Scala集合函数式编程
- Spark中,分享Java和Scala APIs
- Spark:用Scala和Java实现WordCount
- Spark:用Scala和Java实现WordCount
- python 编码 中文汉字显示问题小结
- 【SignalR学习系列】3. SignalR实时高刷新率程序
- leetcode 125
- 黑马商城项目_导航条圆点的定位
- 迭代器的内部类实现
- Spark简单函数 JAVA/SCALA
- C#通过项目预处理事件获取SVN版本号
- string的特性
- 【Android】oui.txt格式化的sqlite数据库文件直接导入
- MINI TUTORIAL: NEWSLETTER POPUP IN MAGENTO (WITH COOKIE)
- func_get_args()、func_get_arg()与func_num_args()
- Python支持的所有转移序列
- 【SignalR学习系列】4. SignalR广播程序
- 【SignalR学习系列】5. SignalR WPF程序