找出多个文本中频率高的单词(2)
来源:互联网 发布:js模仿360加速球效果 编辑:程序博客网 时间:2024/05/22 02:06
接上篇,我打算用用concurrent包里的CountDownLatch类去实现。
还是直接上代码吧:
Main.java
package com.anders.thread;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;public class Main {public static void main(String[] args) {int threadNumber = Integer.parseInt(PropertiesUtil.get("ThreadNumber"));ExecutorService es = Executors.newFixedThreadPool(threadNumber);SingleThreadStatistics[] threads = new SingleThreadStatistics[threadNumber];try {CountDownLatch doneSignals = new CountDownLatch(threadNumber);// 这是在 文件数比线程数多的情况下,若文件比线程数少的话,加个判断就可以了for (int i = 0; i < threadNumber; i++) {threads[i] = new SingleThreadStatistics(doneSignals);es.execute(threads[i]);}doneSignals.await();Map<String, Integer> map = mergeThreadMap(threads);display(map);} catch (InterruptedException e) {e.printStackTrace();} finally {es.shutdown();}}private static Map<String, Integer> mergeThreadMap(SingleThreadStatistics[] threads) {Map<String, Integer> map = new HashMap<String, Integer>();for (SingleThreadStatistics singleThreadStatistics : threads) {Map<String, Integer> threadMap = singleThreadStatistics.getMap();for (Map.Entry<String, Integer> entry : threadMap.entrySet()) {String threadWord = entry.getKey();Integer threadWordCount = entry.getValue();Integer wordCount = map.get(threadWord);if (wordCount == null) {map.put(threadWord, threadWordCount);} else {map.put(threadWord, threadWordCount + wordCount);}}}return map;}private static void display(Map<String, Integer> map) {for (Map.Entry<String, Integer> entry : map.entrySet()) {System.out.print(entry.getKey());System.out.println(" ," + entry.getValue());}}}
SingleThreadStatistics.java
package com.anders.thread;import java.io.File;import java.util.HashMap;import java.util.Map;import java.util.concurrent.CountDownLatch;public class SingleThreadStatistics implements Runnable {private Map<String, Integer> map = new HashMap<String, Integer>();private CountDownLatch doneSignals;public SingleThreadStatistics(CountDownLatch doneSignals) {this.doneSignals = doneSignals;}@Overridepublic void run() {while (true) {File file = FileManager.getFile();if (file == null) {break;}FileManager.parseFile(file, map);}doneSignals.countDown();}// --------getter/setter------------public Map<String, Integer> getMap() {return map;}}
FileManager.java
package com.anders.thread;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.nio.ByteBuffer;import java.nio.channels.FileChannel;import java.util.ArrayList;import java.util.Collections;import java.util.List;import java.util.Map;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * Manage files and offer single for every thread * * @author Anders * */public class FileManager {private static List<File> fileList;private static int index = 0;static {String dirPath = PropertiesUtil.get("DirName");String path = FileManager.class.getClassLoader().getResource(dirPath).getPath();fileList = getFiles(path);}public synchronized static File getFile() {if (index == fileList.size()) {return null;}File file = fileList.get(index);index++;return file;}private static List<File> getFiles(String dirPath) {File dir = new File(dirPath);if (!dir.exists() || !dir.isDirectory()) {return Collections.emptyList();}File[] files = dir.listFiles();//判断 是不是 以txt结尾的文件Pattern pattern = Pattern.compile(PropertiesUtil.get("FileType"));List<File> list = new ArrayList<File>();for (File file : files) {Matcher matcher = pattern.matcher(file.getName());if (matcher.matches()) {list.add(file);}}return list;}//读取文件 使用的是java.nio的filechannel 和bytebufferpublic static void parseFile(File file, Map<String, Integer> map) {FileInputStream ins = null;try {ins = new FileInputStream(file);FileChannel fIns = ins.getChannel();ByteBuffer buffer = ByteBuffer.allocate(1024);while (true) {buffer.clear();int r = fIns.read(buffer);if (r == -1) {break;}buffer.flip();buffer2word(buffer, map);}fIns.close();} catch (Exception e) {e.printStackTrace();} finally {try {if (ins != null) {ins.close();}} catch (IOException e) {e.printStackTrace();}}}//这个是 将读取的内容,提取出 英语字母private static void buffer2word(ByteBuffer buffer, Map<String, Integer> map) {StringBuilder str = new StringBuilder();for (int i = 0; i < buffer.limit(); i++) {byte b = buffer.get();if (isEnglishChar(b)) {str.append((char) b);} else {word2map(str.toString(), map);str = new StringBuilder();}}}//将 英语单词放到Map中private static void word2map(String word, Map<String, Integer> map) {Integer count = map.get(word);if (null == count) {map.put(word, 1);} else {map.put(word, ++count);}}//看看是否是 英语字符private static boolean isEnglishChar(byte b) {//通过ASCLL码 判断if (b > 65 && b < 91) {return true;}if (b > 97 && b < 123) {return true;}return false;}}
ThreadNumber=3DirName=txtFileType=.*.txt
以上是使用CountDownLatch 实现的 当然也可以使用future+ExecutorService 实现,下一篇就是
其实我觉得最重要的代码是 FileManager里的
public synchronized static File getFile() {if (index == fileList.size()) {return null;}File file = fileList.get(index);index++;return file;}这部分代码,因为只要 每个thread 分别得到不同的文件,就可以了。
而且还有一个很重要的一点就是 验证index是否已经读取完所有的文件 要和index++放在一个同步块里面,不然会引起线程安全问题
- 找出多个文本中频率高的单词(2)
- 找出多个文本中频率高的单词(1)
- 找出多个文本中频率高的单词(3)
- 找出文件中最高频率的前k个单词
- 查找文本中n个出现频率最高的单词
- 查找文本中n个出现频率最高的单词
- 统计文本中各单词出现的频率(JavaWeb)
- linux shell查找文本中n个出现频率最高的单词
- 输入一行文本,其中包含多个单词,找出最长的单词长度
- 【C语言助教】输入一行文本,其中包含多个单词,找出最长的单词长度
- 分析一个英文txt文本中单词出现的频率
- python实现统计文本中单词出现的频率
- 统计文本中每个单词出现的频率(附C++完整程序)
- 统计一TXT文档中单词出现频率,输出频率最高的10个单词
- 统计文本中单词使用频率
- 找出文本中存在的坏单词-后缀trie
- 统计文本中英文单词的出现频率
- Python查找文本频率最高的单词
- TCP/IP详解学习笔记(7)-广播和多播,IGMP协议 .
- Java开发常用方法汇总
- cygwin $HOME 为cygdrive/c/Users/xxx而不是/etc/passwd中倒数第二列所指家目录/home/yyy
- Wget用法以及参数解释
- 虚拟机安装linux后 分辨率设置方法
- 找出多个文本中频率高的单词(2)
- SSH框架结合iReport报表开发错误解决
- linux系统下找不到.so文件->解决方法
- 多个txt文件合并
- POJ 1753 Flip Game (递归枚举)
- Column 'name' cannot be null Query: insert into category (id,name,description) values(?,?,?) Paramet
- Java杂记——当枚举遇到switch
- SSH登陆到终端的时候去掉Your default context is root提示
- 分布式