JAVA多线程读取同一个文件,加速对文件内容的获取

来源:互联网 发布:淘宝秒杀抢购 编辑:程序博客网 时间:2024/06/06 02:22

  前几天,朋友托我帮个忙,问我能不能用多线程的方式,读取一个文件,获取里面的内容。他大概想做的事情,就是读取文件里面每一行的内容,然后分析一下,再插入到数据库这样。但是,由于他那个记录内容的文件实在是太大了,虽然他弄成了单生产者-多消费者的模型,整体的处理速度还是非常的慢,因为读取速度不够快。所以,他就问我要怎么多线程读取同一个文件里面的内容,形成多生产者-多消费者的模型,从而提高速度。

  因此就有了下面的demo试的代码,只要传一个文件路径,读取文件的线程数,分隔符,回调这4个参数即可,并且还配上了测试代码。


 下面是我本地跑出来的测试结果(测试文件,是一个190MB大的文件):

3线程(本机2核4线程) 耗时 3231498毫秒
2线程 耗时 278592毫秒
单线程 耗时397115毫秒
cpu线程数(4线程)耗时245657 毫秒


package demo.demo;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.RandomAccessFile;import java.io.UnsupportedEncodingException;import java.security.InvalidParameterException;import java.util.Random;import java.util.UUID;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;import java.util.concurrent.atomic.AtomicInteger;public class ThreadReadFileHelper {// 模拟数据private static void writeData() throws FileNotFoundException, IOException {FileOutputStream fileOutputStream = new FileOutputStream("C:\\Users\\lianghaohui\\Desktop\\test.txt");Random random = new Random();for (int n = 0; n < 1000000; n++) {int count = random.nextInt(10) + 1;StringBuilder builder = new StringBuilder();for (int i = 0; i < count; i++) {builder.append(UUID.randomUUID().toString());}builder.append("\n");fileOutputStream.write(builder.toString().getBytes());}fileOutputStream.close();System.out.println("ok");}private static AtomicInteger atomicInteger = new AtomicInteger(0);// 231498耗时 3线程(本机2核4线程)// 278592耗时 2线程// 397115耗时 单线程// 245657耗时 cpu线程数(4线程)public static void main(String[] args) throws Exception {long beginTime = System.currentTimeMillis();ThreadReadFileHelper helper = new ThreadReadFileHelper();helper.read("C:\\Users\\lianghaohui\\Desktop\\test.txt", Runtime.getRuntime().availableProcessors(), '\n', new StringCallback("UTF-8") {@Overridevoid callback(String data) {int count = atomicInteger.incrementAndGet();System.out.println(count);if (count == 1000000) {System.out.println("总耗时毫秒:" + (System.currentTimeMillis() - beginTime));System.out.println(data);}}});// RandomAccessFile randomAccessFile = new RandomAccessFile("C:\\Users\\lianghaohui\\Desktop\\test.txt", "r");// while (true) {// if (randomAccessFile.readLine() == null) {// System.out.println("总耗时毫秒:" + (System.currentTimeMillis() - beginTime));// break;// } else {// int count = atomicInteger.incrementAndGet();// System.out.println(count);// }// }// randomAccessFile.close();}public void read(String path, int threadCount, char separator, StringCallback callback) throws IOException {if (threadCount < 1) {throw new InvalidParameterException("The threadCount can not be less than 1");}if (path == null || path.isEmpty()) {throw new InvalidParameterException("The path can not be null or empty");}if (callback == null) {throw new InvalidParameterException("The callback can not be null");}RandomAccessFile randomAccessFile = new RandomAccessFile(path, "r");long fileTotalLength = randomAccessFile.length();long gap = fileTotalLength / threadCount;long checkIndex = 0;long[] beginIndexs = new long[threadCount];long[] endIndexs = new long[threadCount];for (int n = 0; n < threadCount; n++) {beginIndexs[n] = checkIndex;if (n + 1 == threadCount) {endIndexs[n] = fileTotalLength;break;}checkIndex += gap;long gapToEof = getGapToEof(checkIndex, randomAccessFile, separator);checkIndex += gapToEof;endIndexs[n] = checkIndex;}ExecutorService executorService = Executors.newFixedThreadPool(threadCount);executorService.execute(() -> {try {readData(beginIndexs[0], endIndexs[0], path, randomAccessFile, separator, callback);} catch (Exception e) {e.printStackTrace();}});for (int n = 1; n < threadCount; n++) {long begin = beginIndexs[n];long end = endIndexs[n];executorService.execute(() -> {try {readData(begin, end, path, null, separator, callback);} catch (Exception e) {e.printStackTrace();}});}}private long getGapToEof(long beginIndex, RandomAccessFile randomAccessFile, char separator) throws IOException {randomAccessFile.seek(beginIndex);long count = 0;while (randomAccessFile.read() != separator) {count++;}count++;return count;}private void readData(long begin, long end, String path, RandomAccessFile randomAccessFile, char separator, StringCallback callback) throws FileNotFoundException, IOException {System.out.println("开始工作" + Thread.currentThread().getName());if (randomAccessFile == null) {randomAccessFile = new RandomAccessFile(path, "r");}randomAccessFile.seek(begin);StringBuilder builder = new StringBuilder();while (true) {int read = randomAccessFile.read();begin++;if (separator == read) {if (callback != null) {callback.callback0(builder.toString());}builder = new StringBuilder();} else {builder.append((char) read);}if (begin >= end) {break;}}randomAccessFile.close();}public static abstract class StringCallback {private String charsetName;private ExecutorService executorService = Executors.newSingleThreadExecutor();public StringCallback(String charsetName) {this.charsetName = charsetName;}private void callback0(String data) {executorService.execute(() -> {try {callback(new String(data.getBytes("ISO-8859-1"), charsetName));} catch (UnsupportedEncodingException e) {e.printStackTrace();}});}abstract void callback(String data);}}


原创粉丝点击