大数据的排序策略,分而治之,多路归并

来源:互联网 发布:手机网络管理软件 编辑:程序博客网 时间:2024/06/14 20:52

package test;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;

/**
 * @author wenhuan
 * skype:jasonwenhuan
*/
public class HugeDataSort {
 public final static String ORIGINALPATH ="E:/bigdatatest/bigData.txt";
 public final static String TEMPFILEPATH ="E:/bigdatatest/";
 public final static String LASTFILEPATH ="E:/bigdatatest/";
 public final static String LASTFILENAME ="last.txt";
 public final static int BIGDATALENGTH = 10000000;
 public final static int TEMPFILELENGTH = 1000000;
 public static int rewriteTime = 1;
 private static File tempFiles[];
 public static int writeTime = 0;
 public static int threadNumber = 2;

 public static void main(String[] args) throws IOException {
generateDate();
splitBigFileToLittleFile();
unitAllTempFileAndDeleteTempFile();
}

 public static void generateDate() throws IOException {
 BufferedWriter writer = new BufferedWriter(new FileWriter(ORIGINALPATH ));
 Random random = new Random();
 for (int i = 0; i < BIGDATALENGTH; i++) {
 writer.write(String. valueOf(random.nextInt(BIGDATALENGTH)) +"n");
}
writer.close();
}

 public static void splitBigFileToLittleFile() throws IOException {
 BufferedReader br = new BufferedReader(new FileReader(ORIGINALPATH ));
 tempFiles = new File[BIGDATALENGTH / TEMPFILELENGTH];
 for (int i = 0; i < tempFiles. length; i++) {
 tempFiles[i] = new File(TEMPFILEPATH +"sortTempFile"+ i +".txt");

 BufferedWriter writer = new BufferedWriter(new FileWriter(
tempFiles[i]));
 List<Integer> smallLine = new ArrayList<Integer>();
 for (int j = 0; j < TEMPFILELENGTH; j++) {
 String text = null;
 if ((text = br.readLine()) != null) {
 smallLine.add(Integer. parseInt(text));
}
}
 Collections. sort(smallLine);
 for (Integer line : smallLine) {
 writer.write(String. valueOf(line)
 + System.getProperty("line.separator"));
}
writer.close();
}
}

 public static void multiWaysMergeSort(String[] files) throws IOException {

 if (files.length == 1) {
 String lastFilePath = LASTFILEPATH + LASTFILENAME ;
copyFile(files[0],lastFilePath,false);
deleteFile(files[0]);
return;
}

 /*List<String> listFiles = Arrays.asList(files);
 int filesEveryThread = tempFiles.length/threadNumber;
 for( int j=0;j<threadNumber;j++){
 int from = 0;
 int to = 0;
 from = filesEveryThread * j;
 if (j == threadNumber - 1) {
 to = listFiles.size();
 } else {
 to = threadNumber * (j + 1);
}
 List<String> list = listFiles.subList(from, to);
}*/

 for (int i = 0; i < files.length; i++) {
 if(i == files.length -1){
renameFile(files[i],i);
break;
}
 BufferedReader br1 = new BufferedReader(new FileReader(files[i]));
 BufferedReader br2 = new BufferedReader(new FileReader(files[i+1]));
 BufferedWriter writer = new BufferedWriter(new FileWriter(TEMPFILEPATH +"last_"+ rewriteTime +"_"+ i +".txt"));
 String s1 = br1.readLine();
 String s2 = br2.readLine();

 while (s1 != null || s2 != null) {
 int mergeResult = -1;
 if(s1 != null && s2 != null){
 mergeResult = merge(Integer.parseInt(s1.toString()),
 Integer. parseInt(s2.toString()));
}

 if (mergeResult == 0) {
writer.write(s2);
 writer.write(System. getProperty("line.separator"));
 s2 = br2.readLine();
}
 if (mergeResult == 1) {
writer.write(s1);
 writer.write(System. getProperty("line.separator"));
 s1 = br1.readLine();
 s2 = br2.readLine();
}
 if(mergeResult == 2){
writer.write(s1);
 writer.write(System. getProperty("line.separator"));
 s1 = br1.readLine();
}
 if(s1 == null && s2 != null){
writer.write(s2);
 writer.write(System. getProperty("line.separator"));
 s2 = br2.readLine();
}
 if(s2 == null && s1 != null){
writer.write(s1);
 writer.write(System. getProperty("line.separator"));
 s1 = br1.readLine();
}
 System. out.println("write time :"+ writeTime++);
}
br1.close();
br2.close();
deleteFile(files[i]);
deleteFile(files[i+1]);
i++;
writer.close();
}
rewriteTime++;
 multiWaysMergeSort(getTempFiles ("last_"));

}

 public static int merge(int a, int b) {
 if (a > b) {
 return 0;
 } else if (a == b) {
 return 1;
 } else {
 return 2;
}
}

 public static void unitAllTempFileAndDeleteTempFile() throws IOException {
 String[] files = getTempFiles("sortTempFile");
multiWaysMergeSort(files);
}

 public static String[] getTempFiles(final String startName) {
 File f = new File(TEMPFILEPATH );
 String[] files = f.list( new FilenameFilter() {
@Override
 public boolean accept(File dir, String name) {
 return name.startsWith(startName == null ?"": startName);
}
});
 String[] retFiles = new String[files.length ];
 for (int i = 0; i < files.length; i++) {
 retFiles[i] = TEMPFILEPATH + files[i];
}
 return retFiles;
}

 public static void copyFile(String org, String dst, boolean useBuffer) {
 FileInputStream fis = null;
 FileOutputStream fos = null;
 BufferedOutputStream bos = null;

 try {
 fis = new FileInputStream(org);
 fos = new FileOutputStream(dst);
 bos = new BufferedOutputStream(new FileOutputStream(dst));
 int length = 0;
 byte[] bytes = new byte[1024];
 while ((length = fis.read(bytes)) != -1) {
 if (useBuffer) {
 bos.write(bytes, 0, length);
 } else {
 fos.write(bytes, 0, length);
}
}

 } catch (FileNotFoundException e) {
e.printStackTrace();
 } catch (IOException e) {
e.printStackTrace();
 } finally {
 if (fos != null) {
 try {
fos.close();
 } catch (IOException e) {
}
}
 if (bos != null) {
 try {
bos.close();
 } catch (IOException e) {
}
}

 if (fis != null) {
 try {
fis.close();
 } catch (IOException e) {
}
}
}
}

 public static boolean deleteFile(String filePath){
 boolean flag = false;
 File f = new File(filePath);

if(f.exists()){
f.delete();
 flag = true;
}
 return flag;
}

 public static boolean renameFile(String fileName, int i){
 File file = new File(fileName);
 return file.renameTo(new File(TEMPFILEPATH +"last_"+ rewriteTime +"_"+ i +".txt"));
}

 class MyThread implements Runnable{
@Override
 public void run() {

}
}
}

 

0 0