Spark 自定义排序

来源:互联网 发布:公知精英是什么意思 编辑:程序博客网 时间:2024/06/03 16:45



Spark 自定义排序

第一种

package day02import org.apache.spark.{SparkConf, SparkContext}//sort=>规则 先按照faveValue  如果faveValue相等再比较age//name faveValue  ageobject User_D_Sort {  def main(args: Array[String]): Unit = {    val conf=new SparkConf().setAppName("UrlCount").setMaster("local[2]")    val sc=new SparkContext(conf)    val rdd1 = sc.parallelize(List(("yuihatano", 90, 28, 1), ("angelababy", 90, 27, 2),("JuJingYi", 95, 22, 3)))      //val rdd2=rdd1.sortBy(x=>Girl(x._2,x._3),false)    val rdd2=rdd1.sortBy(x=>Girl(x._2,x._3),true)      println(rdd2.collect().toBuffer)    }}//第一种方式case class Girl(val favevalue:Int,val age:Int) extends  Ordered[Girl] with  Serializable{  override def compare(that: Girl) : Int={    if(this.favevalue==that.favevalue){      that.age-this.age    }else{      this.favevalue-that.favevalue    }  }}

第二种


import org.apache.spark.{SparkConf, SparkContext}object OrderContext {  implicit val girlOrdering  = new Ordering[Girl] {    override def compare(x: Girl, y: Girl): Int = {      if(x.faceValue > y.faceValue) 1      else if (x.faceValue == y.faceValue) {        if(x.age > y.age) -1 else 1      } else -1    }  }}//sort =>规则 先按faveValue,比较年龄//name,faveValue,ageobject CustomSort {  def main(args: Array[String]) {    val conf = new SparkConf().setAppName("CustomSort").setMaster("local[2]")    val sc = new SparkContext(conf)    val rdd1 = sc.parallelize(List(("yuihatano", 90, 28, 1), ("angelababy", 90, 27, 2),("JuJingYi", 95, 22, 3)))    import OrderContext._    val rdd2 = rdd1.sortBy(x => Girl(x._2, x._3), false)    println(rdd2.collect().toBuffer)    sc.stop()  }}/**  * 第二种,通过隐式转换完成排序 /case class Girl(faceValue: Int, age: Int) extends Serializable