找出多个文本中频率高的单词(2)

来源:互联网 发布:js模仿360加速球效果 编辑:程序博客网 时间:2024/05/22 02:06

接上篇,我打算用用concurrent包里的CountDownLatch类去实现。


还是直接上代码吧:

Main.java

package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;public class Main {public static void main(String[] args) {int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));ExecutorService es = Executors.newFixedThreadPool(threadNumber);SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];try {CountDownLatch doneSignals = new CountDownLatch(threadNumber);// 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了for (int i = 0; i < threadNumber; i++) {threads[i] = new SingleThreadStatistics(doneSignals);es.execute(threads[i]);}doneSignals.await();Map<String, Integer> map = mergeThreadMap(threads);display(map);} catch (InterruptedException e) {e.printStackTrace();} finally {es.shutdown();}}private static Map<String, Integer> mergeThreadMap(SingleThreadStatistics[] threads) {Map<String, Integer> map = new HashMap<String, Integer>();for (SingleThreadStatistics singleThreadStatistics : threads) {Map<String, Integer> threadMap = singleThreadStatistics.getMap();for (Map.Entry<String, Integer> entry : threadMap.entrySet()) {String threadWord = entry.getKey();Integer threadWordCount = entry.getValue();Integer wordCount = map.get(threadWord);if (wordCount == null) {map.put(threadWord, threadWordCount);} else {map.put(threadWord, threadWordCount + wordCount);}}}return map;}private static void display(Map<String, Integer> map) {for (Map.Entry<String, Integer> entry : map.entrySet()) {System.out.print(entry.getKey());System.out.println("   ," + entry.getValue());}}}

SingleThreadStatistics.java

package com.anders.thread;import java.io.File;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;public class SingleThreadStatistics implements Runnable {private Map<String, Integer> map = new HashMap<String, Integer>();private CountDownLatch doneSignals;public SingleThreadStatistics(CountDownLatch doneSignals) {this.doneSignals = doneSignals;}@Overridepublic void run() {while (true) {File file = FileManager.getFile();if (file == null) {break;}FileManager.parseFile(file, map);}doneSignals.countDown();}// --------getter/setter------------public Map<String, Integer> getMap() {return map;}}

FileManager.java

package com.anders.thread;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.nio.ByteBuffer;import java.nio.channels.FileChannel;import java.util.ArrayList;import java.util.Collections;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * Manage files and offer single for every thread *  * @author Anders *  */public class FileManager {private static List<File> fileList;private static int index = 0;static {String dirPath = PropertiesUtil.get("DirName");String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();fileList = getFiles(path);}public synchronized static File getFile() {if (index == fileList.size()) {return null;}File file = fileList.get(index);index++;return file;}private static List<File> getFiles(String dirPath) {File dir = new File(dirPath);if (!dir.exists() || !dir.isDirectory()) {return Collections.emptyList();}File[] files = dir.listFiles();//判断 是不是  以txt结尾的文件Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));List<File> list = new ArrayList<File>();for (File file : files) {Matcher matcher = pattern.matcher(file.getName());if (matcher.matches()) {list.add(file);}}return list;}//读取文件  使用的是java.nio的filechannel 和bytebufferpublic static void parseFile(File file, Map<String, Integer> map) {FileInputStream ins = null;try {ins = new FileInputStream(file);FileChannel fIns = ins.getChannel();ByteBuffer buffer = ByteBuffer.allocate(1024);while (true) {buffer.clear();int r = fIns.read(buffer);if (r == -1) {break;}buffer.flip();buffer2word(buffer, map);}fIns.close();} catch (Exception e) {e.printStackTrace();} finally {try {if (ins != null) {ins.close();}} catch (IOException e) {e.printStackTrace();}}}//这个是  将读取的内容,提取出  英语字母private static void buffer2word(ByteBuffer buffer, Map<String, Integer> map) {StringBuilder str = new StringBuilder();for (int i = 0; i < buffer.limit(); i++) {byte b = buffer.get();if (isEnglishChar(b)) {str.append((char) b);} else {word2map(str.toString(), map);str = new StringBuilder();}}}//将  英语单词放到Map中private static void word2map(String word, Map<String, Integer> map) {Integer count = map.get(word);if (null == count) {map.put(word, 1);} else {map.put(word, ++count);}}//看看是否是  英语字符private static boolean isEnglishChar(byte b) {//通过ASCLL码  判断if (b > 65 && b < 91) {return true;}if (b > 97 && b < 123) {return true;}return false;}}


config.properties

ThreadNumber=3DirName=txtFileType=.*.txt

以上是使用CountDownLatch 实现的 当然也可以使用future+ExecutorService 实现,下一篇就是


其实我觉得最重要的代码是  FileManager里的

public synchronized static File getFile() {if (index == fileList.size()) {return null;}File file = fileList.get(index);index++;return file;}
这部分代码,因为只要  每个thread 分别得到不同的文件,就可以了。

而且还有一个很重要的一点就是  验证index是否已经读取完所有的文件  要和index++放在一个同步块里面,不然会引起线程安全问题


原创粉丝点击