hash取模将大文件转成小文件,可排序,可求TopN

来源:互联网 发布:证券软件下载 编辑:程序博客网 时间:2024/06/08 16:41

声明:参考某文章代码(记不住地址了),将其代码修改为按hash将数据分到不同文件中

import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.lang.reflect.Constructor;import java.util.*;/** * @author wangyuyuan *         将一个大文件中的数据排序 无法一次读入内存情况的处理方法 */public class LargeDataSortTest {    static File file = new File("F:" + File.separator + "dataTest" + File.separator + "data.txt");    static File file1 = new File("F:" + File.separator + "dataTest" + File.separator + "dataSorted.txt");    public static void main(String[] args) throws Exception {//        createData();        System.out.println("大文件写入成功");        separateFile();        System.out.println("文件拆分成功");//        everySingleFileSort();//        System.out.println("小文件排序完成");//        mergeFile();//        System.out.println("所有排序都已完成");    }    public static void createData() throws IOException {        FileWriter fw = new FileWriter(file);        BufferedWriter bw = new BufferedWriter(fw);        Random random = new Random();        for (int i = 0; i < 1000000; i++) {            bw.write(random.nextInt(Integer.MAX_VALUE) + "\r\n");        }        bw.close();        fw.close();    }    public static void separateFile() throws IOException {        FileReader fr = new FileReader(file);        BufferedReader br = new BufferedReader(fr);        FileWriter fw = null;        BufferedWriter bw = null;        HashMap<Integer,FileWriter> fwList = new HashMap<Integer,FileWriter>();        HashMap<Integer,BufferedWriter> bwList = new HashMap<Integer,BufferedWriter>();        for(int i = 0 ; i< 20 ; i++ ){            fw = new FileWriter("F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt");            bw = new BufferedWriter(fw);            fwList.put(i,fw);            bwList.put(i,bw);        }        while (br.ready()) {            Integer readNumber = Integer.valueOf(br.readLine());            int a = readNumber.hashCode() % 20;            BufferedWriter bufferedWriter = bwList.get(a);            bufferedWriter.write(readNumber + "\r\n");        }        //遍历关闭所有子文件流        /*for (Iterator iterator = bwList.iterator(); iterator.hasNext(); ) {            BufferedWriter it = (BufferedWriter) iterator.next();            it.close();        }*/        for(Map.Entry<Integer,BufferedWriter> a : bwList.entrySet()){            a.getValue().close();        }        /*for (Iterator iterator = fwList.iterator(); iterator.hasNext(); ) {            FileWriter it = (FileWriter) iterator.next();            it.close();        }*/        for(Map.Entry<Integer,FileWriter> a : fwList.entrySet()){            a.getValue().close();        }        br.close();        fr.close();    }    //对每个小文件进行排序    public static void everySingleFileSort() throws Exception {        LinkedList<Integer> numbers;        for (int i = 0; i < 20; i++) {            numbers = new LinkedList<Integer>();            String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt";            FileReader fr = new FileReader(path);            BufferedReader br = new BufferedReader(fr);            while (br.ready()) {                numbers.add(Integer.parseInt(br.readLine()));            }            Collections.sort(numbers);            numbersWrite(numbers, path);            br.close();            fr.close();        }    }    //将排好序的没个文件写回到小文件中    public static void numbersWrite(LinkedList<Integer> numbers, String path) throws IOException {        FileWriter fw = new FileWriter(path);        BufferedWriter bw = new BufferedWriter(fw);        for (Iterator<Integer> iterator = numbers.iterator(); iterator.hasNext(); ) {            Integer num = (Integer) iterator.next();            bw.write(num + "\r\n");        }        bw.close();        fw.close();    }    //再将所有小文件整合到一个大文件中    public static void mergeFile() throws Exception {        PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20, new Obj());        FileReader fr = null;        BufferedReader br = null;        FileWriter fw = new FileWriter(file1);        BufferedWriter bw = new BufferedWriter(fw);        List<FileReader> frList = new LinkedList<FileReader>();        List<BufferedReader> brList = new LinkedList<BufferedReader>();        int n;        for (int i = 0; i < 20; i++) {            String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt";            fr = new FileReader(path);            br = new BufferedReader(fr);            frList.add(fr);            brList.add(br);        }        //把每个小文件的第一个数读入队列中        for (int i = 0; i <= 20; i++) {            BufferedReader buffR;            if (i == 20) {                while (queue.size() != 0) {                    Obj obj = queue.poll();                    bw.write(obj.a + "\r\n");                    buffR = brList.get(obj.b);                    while (buffR.ready() && queue.size() < 20) {                        n = Integer.parseInt(buffR.readLine());                        queue.add(new Obj(n, obj.b));                    }                }                break;            }            buffR = brList.get(i);            while (buffR.ready() && queue.size() < 20) {                n = Integer.parseInt(buffR.readLine());                Obj obj = new Obj(n, i);                queue.add(obj);                break;            }        }        bw.close();        fw.close();        //遍历关闭所有子文件流        for (Iterator iterator = brList.iterator(); iterator.hasNext(); ) {            BufferedReader it = (BufferedReader) iterator.next();            it.close();        }        for (Iterator iterator = frList.iterator(); iterator.hasNext(); ) {            FileReader it = (FileReader) iterator.next();            it.close();        }    }}class Obj implements Comparator<Obj> {    int a, b;    Obj() {    }    Obj(int a, int b) {        this.a = a;        this.b = b;    }    public int compare(Obj o1, Obj o2) {        return o1.a - o2.a;    }}