Spark二次排序(Java+Scala)
来源:互联网 发布:知乎注册要用真名 编辑:程序博客网 时间:2024/06/05 11:43
1.基础排序算法
sc.textFile("/data/putfile.txt").flatMap(_.split(" ")).map(word=>(word,1)).reduceByKey(_+_,1).map(pair=>(pair._2,pair._1)).sortByKey(false).map(pair=>(pair._2,pair._1)).collect
2.二次排序算法(Java实现)
import java.io.Serializable;import scala.math.Ordered;public class SecondarySortKey implements Ordered<SecondarySortKey>,Serializable { //需要二次排序的Key private int first; private int seconde; /** * 二次排序的构造方法 * @param first * @param seconde */ public SecondarySortKey(int first, int seconde) { super(); this.first = first; this.seconde = seconde; } @Override public boolean $greater(SecondarySortKey other) { // TODO Auto-generated method stub if(this.first> other.getFirst()){ return true; }else if(this.first==other.getFirst()&&this.seconde>other.seconde){ return true; } return false; } @Override public boolean $greater$eq(SecondarySortKey other) { // TODO Auto-generated method stub if(this.$greater(other)){ return true; }else if(this.first==other.getFirst()&&this.seconde==other.getSeconde()){ return true; } return false; } @Override public boolean $less(SecondarySortKey other) { // TODO Auto-generated method stub if(this.first<other.getFirst()){ return true; }else if(this.first==other.getFirst()&&this.seconde<other.getSeconde()){ return true; } return false; } @Override public boolean $less$eq(SecondarySortKey other) { // TODO Auto-generated method stub if(this.$less(other)){ return true; }else if(this.first==other.getFirst()&&this.seconde==other.getSeconde()){ return true; } return false; } @Override public int compare(SecondarySortKey other) { // TODO Auto-generated method stub if(this.first - other.getFirst()!=0){ return this.first-other.getFirst(); }else{ return this.seconde-other.getSeconde(); } } @Override public int compareTo(SecondarySortKey other) { // TODO Auto-generated method stub if(this.first-other.getFirst()!=0){ return this.first-other.getFirst(); }else{ return this.seconde-other.getSeconde(); } } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + first; result = prime * result + seconde; return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; SecondarySortKey other = (SecondarySortKey) obj; if (first != other.first) return false; if (seconde != other.seconde) return false; return true; } public int getFirst() { return first; } public void setFirst(int first) { this.first = first; } public int getSeconde() { return seconde; } public void setSeconde(int seconde) { this.seconde = seconde; }}
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function;import org.apache.spark.api.java.function.PairFunction;import org.apache.spark.api.java.function.VoidFunction;import scala.Tuple2;/** * 二次排序:具体的实现步骤 * 第一步:按照Ordered和Serializable接口实现自定义排序的Key * 第二步:将要进行二次排序的文件加载进来生成<key,value>类型的RDD * 第三步:使用SortByKey基于自定义的Key进行二次排序 * 第四步:去除调排序的key,只保留排序的结果 * * @author Shuai.Zh * */public class SecondarySortApp { public static void main(String[] args) { // TODO Auto-generated method stub SparkConf conf=new SparkConf(); conf.setAppName("SecondarySort"); conf.setMaster("local"); JavaSparkContext sc=new JavaSparkContext(conf); JavaRDD<String> lines=sc.textFile("C:\\Users\\Administrator\\Desktop\\1.txt"); JavaPairRDD<SecondarySortKey, String> pairs=lines.mapToPair(new PairFunction<String, SecondarySortKey, String>() { @Override public Tuple2<SecondarySortKey, String> call(String line) throws Exception { // TODO Auto-generated method stub String[] splited=line.split(" "); SecondarySortKey key=new SecondarySortKey(Integer.valueOf(splited[0]),Integer.valueOf(splited[1])); return new Tuple2<SecondarySortKey, String>(key, line); } }); JavaPairRDD<SecondarySortKey, String> sorted=pairs.sortByKey();//完成二次排序 //过滤掉排序后自定的key,保留排序的结果 JavaRDD<String> secondarySorted=sorted.map(new Function<Tuple2<SecondarySortKey,String>, String>() { @Override public String call(Tuple2<SecondarySortKey, String> sortedCount) throws Exception { // TODO Auto-generated method stub return sortedCount._2; } }); secondarySorted.foreach(new VoidFunction<String>() { @Override public void call(String sorted) throws Exception { // TODO Auto-generated method stub System.out.println(sorted); } });; }}
二次排序Scala实现
import org.apache.spark.SparkConf/** * Created by Administrator on 2017/6/1. */class SecondarySortkey(val first:Int,val second :Int) extends Ordered[SecondarySortkey] with Serializable { def compare(other:SecondarySortkey): Int ={ if(this.first-other.first!=0){ this.first-other.first }else{ this.second-other.second } }}
import org.apache.spark.{SparkConf, SparkContext}/** * Created by Administrator on 2017/6/1. */object SecondarySortApp { def main(args: Array[String]): Unit = { val conf=new SparkConf().setAppName("sort").setMaster("local") val sc=new SparkContext(conf) val lines=sc.textFile("C:\\Users\\Administrator\\Desktop\\1.txt") val pairWithSortKey=lines.map(line=> ( new SecondarySortkey(line.split(" ")(0).toInt,line.split(" ")(1).toInt),line )) val sorted=pairWithSortKey.sortByKey(false) val sortedResult=sorted.map(sortedLine=>sortedLine._2) sortedResult.collect().foreach(println) }}
阅读全文
0 0
- Spark二次排序(Java+Scala)
- Spark基础排序+二次排序(java+scala)
- Spark Scala 二次排序
- Spark Scala 二次排序
- Spark——二次排序(scala)
- Spark:Scala实现二次排序
- Spark Java 二次排序
- Spark Scala 实现二次排序和相加
- spark二次排序简单例子(JAVA)
- Spark:Java实现 二次排序
- 二次排序(Scala版)
- SPARK排序算法,使用Scala开发 二次排序 自定义KEY值,相比JAVA的罗嗦,Scala优雅简洁!!!
- Spark 中的二次排序Java实现
- Spark的高级排序(二次排序)
- Spark的高级排序(二次排序)
- Scala之二次排序
- Spark应用(二) 二次排序
- Spark中的二次排序
- Adny Rubin
- 从零开始学习OpenCL开发(二)一个最简单的示例与简单性能分析
- iOS 25个性能优化/内存优化常用方法
- 解决PKIX path building failed的问题-验证可以解决问题
- Spark性能优化:shuffle调优
- Spark二次排序(Java+Scala)
- iOS 下载 解压 使用 zip
- 区块链技术浅析之一:区块链是什么
- 如何使用Android自带的资源
- AndroidStudio mac常用快捷键
- 金蝶BOS框架,EntityViewInfo的使用
- 计算机网络之物理层
- Java遍历Map的方法
- HTML4.0和HTML5 事件之间的差异