scala外部排序算法

来源:互联网 发布:知乎手机如何发文章 编辑:程序博客网 时间:2024/06/05 04:32

scala 外部排序算法


源代码

/**  * Created by wilbur on 6/27/17.  */import java.io._import java.utilimport scala.util.control.Breaks._object externalSort {  def main(args: Array[String]): Unit = {      val timebegin = System.currentTimeMillis()      val fr=new BufferedReader(new FileReader("/home/user/inputdata"))//源数据文件读取      val size=10000000//这里是定义我们将源文件中以10000000条记录作为单位进行分割      val nums=new Array[Int](size)  //临时存放分割时的记录      val fileNames=new util.ArrayList[String]()//保存所有分割文件的名称      var index = 0      breakable(while(true){        val num=fr.readLine()//从原文件中读取一条记录        if(num==null){//如果读取完毕后,进行一次排序并保存          fileNames.add(sortAndSave(nums,index))          break        }        nums(index)=num.toInt        index=index+1        if(index==size){//当nums里面读的数字到达长度边界时,排序,存储          fileNames.add(sortAndSave(nums,index))//sortAndSave是将nums中前index条记录先快速排序,然后存入文件,最好将文件名返回          index=0//重置index        }      })      fr.close()      mergeSort(fileNames)//将所有fileNames的文件进行合并      val timeover = System.currentTimeMillis()      println("The merge runtime is  " + (timeover - timebegin) / 1000.0 + "  s"+"\n\n\n")    }  //每读取size行的数据后,排序输出    def sortAndSave(nums:Array[Int],size:Int):String={      qsort(nums,0,size-1)      val fileName="/project/sort/temp/"+System.nanoTime()      val bw=new PrintWriter(new FileWriter(fileName))      for(i<-nums.indices)        bw.println(nums(i))      bw.close()      fileName    }//依次读入两个文件进行归并    def mergeSort(fileNames:util.ArrayList[String]):Unit={      val tempFileNames=new util.ArrayList[String]()      var i=0      while(i<fileNames.size()){        val resultFileName="/project/sort/temp/"+System.nanoTime()        tempFileNames.add(resultFileName)        val bw=new PrintWriter(new FileWriter(resultFileName))        val file1=new File(fileNames.get(i))        val br1=new BufferedReader(new FileReader(file1))        i+=1        if(i<fileNames.size()){          val file2=new File(fileNames.get(i))          val br2=new BufferedReader(new FileReader(file2))          var num1:Int=0          var num2:Int=0          var isFrist = true          var firstNext = true          var numVal1:String=""          var numVal2:String=""          breakable(while(true){            if(isFrist){              numVal1=br1.readLine()              numVal2=br2.readLine()              num1= Integer.valueOf(numVal1)              num2= Integer.valueOf(numVal2)              isFrist=false            }            else if(firstNext) numVal1=br1.readLine()            else              numVal2=br2.readLine()            if(numVal1!=null&&numVal2!=null){              if(firstNext){                num1=Integer.valueOf(numVal1)              }              else num2=Integer.valueOf(numVal2)              if(num1<num2){                bw.println(num1)                firstNext=true              }else{                bw.println(num2)                firstNext=false              }            }else{              if(numVal1!=null)bw.println(numVal1)              if(numVal2!=null)bw.println(numVal2)              break            }          })          breakable(while(true){            numVal2=br2.readLine()            if(numVal2!=null)bw.println(numVal2)            else break          })          br2.close()          file2.delete()        }        breakable(while(true){          val numVal1=br1.readLine()          if(numVal1!=null){            bw.println(numVal1)          }          else break        })        br1.close()        file1.delete()        bw.close()        i+=1      }      val size=tempFileNames.size()      if(size>1){        mergeSort(tempFileNames)      }else if(size==1){        val file=new File(tempFileNames.get(0))        file.renameTo(new File("/project/sort/temp/result"))      }    }    def qsort(inputData: Array[Int], left: Int, right: Int): Unit = {  //快速排序      if (left < right) {        var i = left        var j = right        val x = inputData(i)        while (i < j) {          while (i < j && inputData(j) > x) j = j - 1 /* 从右向左找第一个小于x的数 */          if (i < j) {            inputData(i) = inputData(j)            i = i + 1          }          while (i < j && inputData(i) < x) i = i + 1 /* 从左向右找第一个大于x的数 */          if (i < j) {            inputData(j) = inputData(i)            j = j - 1          }        }        inputData(i) = x        qsort(inputData, left, i - 1) /* 递归调用 */        qsort(inputData, i + 1, right)      }    }  }
原创粉丝点击