hash取模将大文件转成小文件,可排序,可求TopN
来源:互联网 发布:证券软件下载 编辑:程序博客网 时间:2024/06/08 16:41
声明:参考某文章代码(记不住地址了),将其代码修改为按hash将数据分到不同文件中
import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.lang.reflect.Constructor;import java.util.*;/** * @author wangyuyuan * 将一个大文件中的数据排序 无法一次读入内存情况的处理方法 */public class LargeDataSortTest { static File file = new File("F:" + File.separator + "dataTest" + File.separator + "data.txt"); static File file1 = new File("F:" + File.separator + "dataTest" + File.separator + "dataSorted.txt"); public static void main(String[] args) throws Exception {// createData(); System.out.println("大文件写入成功"); separateFile(); System.out.println("文件拆分成功");// everySingleFileSort();// System.out.println("小文件排序完成");// mergeFile();// System.out.println("所有排序都已完成"); } public static void createData() throws IOException { FileWriter fw = new FileWriter(file); BufferedWriter bw = new BufferedWriter(fw); Random random = new Random(); for (int i = 0; i < 1000000; i++) { bw.write(random.nextInt(Integer.MAX_VALUE) + "\r\n"); } bw.close(); fw.close(); } public static void separateFile() throws IOException { FileReader fr = new FileReader(file); BufferedReader br = new BufferedReader(fr); FileWriter fw = null; BufferedWriter bw = null; HashMap<Integer,FileWriter> fwList = new HashMap<Integer,FileWriter>(); HashMap<Integer,BufferedWriter> bwList = new HashMap<Integer,BufferedWriter>(); for(int i = 0 ; i< 20 ; i++ ){ fw = new FileWriter("F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt"); bw = new BufferedWriter(fw); fwList.put(i,fw); bwList.put(i,bw); } while (br.ready()) { Integer readNumber = Integer.valueOf(br.readLine()); int a = readNumber.hashCode() % 20; BufferedWriter bufferedWriter = bwList.get(a); bufferedWriter.write(readNumber + "\r\n"); } //遍历关闭所有子文件流 /*for (Iterator iterator = bwList.iterator(); iterator.hasNext(); ) { BufferedWriter it = (BufferedWriter) iterator.next(); it.close(); }*/ for(Map.Entry<Integer,BufferedWriter> a : bwList.entrySet()){ a.getValue().close(); } /*for (Iterator iterator = fwList.iterator(); iterator.hasNext(); ) { FileWriter it = (FileWriter) iterator.next(); it.close(); }*/ for(Map.Entry<Integer,FileWriter> a : fwList.entrySet()){ a.getValue().close(); } br.close(); fr.close(); } //对每个小文件进行排序 public static void everySingleFileSort() throws Exception { LinkedList<Integer> numbers; for (int i = 0; i < 20; i++) { numbers = new LinkedList<Integer>(); String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt"; FileReader fr = new FileReader(path); BufferedReader br = new BufferedReader(fr); while (br.ready()) { numbers.add(Integer.parseInt(br.readLine())); } Collections.sort(numbers); numbersWrite(numbers, path); br.close(); fr.close(); } } //将排好序的没个文件写回到小文件中 public static void numbersWrite(LinkedList<Integer> numbers, String path) throws IOException { FileWriter fw = new FileWriter(path); BufferedWriter bw = new BufferedWriter(fw); for (Iterator<Integer> iterator = numbers.iterator(); iterator.hasNext(); ) { Integer num = (Integer) iterator.next(); bw.write(num + "\r\n"); } bw.close(); fw.close(); } //再将所有小文件整合到一个大文件中 public static void mergeFile() throws Exception { PriorityQueue<Obj> queue = new PriorityQueue<Obj>(20, new Obj()); FileReader fr = null; BufferedReader br = null; FileWriter fw = new FileWriter(file1); BufferedWriter bw = new BufferedWriter(fw); List<FileReader> frList = new LinkedList<FileReader>(); List<BufferedReader> brList = new LinkedList<BufferedReader>(); int n; for (int i = 0; i < 20; i++) { String path = "F:" + File.separator + "dataTest" + File.separator + "data" + i + ".txt"; fr = new FileReader(path); br = new BufferedReader(fr); frList.add(fr); brList.add(br); } //把每个小文件的第一个数读入队列中 for (int i = 0; i <= 20; i++) { BufferedReader buffR; if (i == 20) { while (queue.size() != 0) { Obj obj = queue.poll(); bw.write(obj.a + "\r\n"); buffR = brList.get(obj.b); while (buffR.ready() && queue.size() < 20) { n = Integer.parseInt(buffR.readLine()); queue.add(new Obj(n, obj.b)); } } break; } buffR = brList.get(i); while (buffR.ready() && queue.size() < 20) { n = Integer.parseInt(buffR.readLine()); Obj obj = new Obj(n, i); queue.add(obj); break; } } bw.close(); fw.close(); //遍历关闭所有子文件流 for (Iterator iterator = brList.iterator(); iterator.hasNext(); ) { BufferedReader it = (BufferedReader) iterator.next(); it.close(); } for (Iterator iterator = frList.iterator(); iterator.hasNext(); ) { FileReader it = (FileReader) iterator.next(); it.close(); } }}class Obj implements Comparator<Obj> { int a, b; Obj() { } Obj(int a, int b) { this.a = a; this.b = b; } public int compare(Obj o1, Obj o2) { return o1.a - o2.a; }}
阅读全文
0 0
- hash取模将大文件转成小文件,可排序,可求TopN
- 将pvr.ccz 和 plist 文件转成 小图 python
- TopN排序和TopN 数组排序
- image 文件转成 PDF文件
- 读文件转成byte
- app文件转成ipa
- tif转成jpg文件
- 文件转成base64
- .p12文件转成.pem
- Day20 实现TopN 排序
- 文件转成bety[] 图片转成 hex值
- 一个可序列化的C#对象,如何转成一个XML格式的文件或字符串
- 快速排序 (解决小划分文件)
- Lucene小练十四(文件排序)
- 文件的hash值
- 使用hash拆分文件
- 获取文件HASH算法
- 快速文件hash
- 转载:Intellij IDEA连接Git
- GitHub上README.md教程
- iptables介绍与实战
- Lua自定义面向对象
- linux命令之系统服务控制练习
- hash取模将大文件转成小文件,可排序,可求TopN
- 项目csdn图片地址
- [UVA 122] Trees on the level 二叉树好难+BFS
- java的volatile关键字之非线程安全
- Add two numbers
- PTA 7-22(排序) 模拟EXCEL排序(25 分)25分代码 结构体排序
- Linux 基础操作(七)————系统服务的控制
- Removed Interval HDU
- (四)系统虚拟化关键技术