Multiple Threads: Word Frequency
来源:互联网 发布:unity3d 粒子特效爆炸 编辑:程序博客网 时间:2024/05/18 13:29
Directory and File:
data3(dir) file1.txt(file) file2.txt(file) file3.txt(file)filter(dir) filter.txt(file)
File contents:
file1.txt
Free Shipping Mini Car Auto12v Fresh Air Purifier Oxygen Barfreeshipping freeshipping
file2.txt
freeshipping new Electromagnetic parking sensor no holes need to be drilledfreeshippingfreeshipping
file3.txt
DC 12V 1 to 3 Car Cigarette Lighter Socket Power Adapter Splitter with 1 USB Port free shipping #9622 [aaa bbb] ccc{ ddd}freeshipping
Filter contents:
filter.txt
sensor bbb lighter auto12v usb oxygen ddd parking cigarette port 1freeshippingnoneed
Java Code:
WordsAnalysis.java
package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.io.*;import java.text.SimpleDateFormat;import java.util.*;public class WordsAnalysis { private static final String FILTER_WORDS_FILE_PATH = "//Users//robot//TEMP//testData//filter//filter.txt"; private static Set<String> filterWordsSet = new HashSet<String>(); /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { loadFilterWords(); File f = new File("//Users//robot//TEMP//testData//data3"); File[] fs = f.listFiles(); // 分成两半 List<File> files1 = new ArrayList<File>(); for (int i = 0; i < fs.length/2; i++) { files1.add(fs[i]); } List<File> files2 = new ArrayList<File>(); for (int i = fs.length/2; i < fs.length; i++) { files2.add(fs[i]); } // 工作线程总数 int threadCount = 0; // 共享数据 AllCountModel acm = new AllCountModel(); acm.setThreadCount(++threadCount); CountWordsThread tt1 = new CountWordsThread(files1, acm); // 1号线程 System.out.println("Thread 1: start!"); tt1.start(); acm.setThreadCount(++threadCount); CountWordsThread tt2 = new CountWordsThread(files2, acm); // 2号线程 System.out.println("Thread 2: start!"); tt2.start(); MonitorThread mt = new MonitorThread(acm); // 监视线程 System.out.println("Thread Monitor: start!"); mt.start(); } /** * * @param file * @param wordsMap * @return * @throws IOException */ public Map<String, Integer> countWords(File file, Map<String, Integer> wordsMap) throws IOException{ String text = readFile(file).toLowerCase();// 将所有字母化为小写 text = text.replaceAll("[`~!@#$%^&*()+=|{}':;',//\\[//\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]|\\s+|\t|\r", " "); String words[] = text.split("\\s+");// 取出单词,并将单词存入数组中 for (int i = 0; i < words.length; i++) { String word = words[i].trim(); // 重现的单词 if(wordsMap.containsKey(word) && !filterWordsSet.contains(word)){ // 计数 wordsMap.put(word, (wordsMap.get(word) + 1)); }else if(!wordsMap.containsKey(word) && !filterWordsSet.contains(word)){ // 第一次出现的新单词 wordsMap.put(word, 1); } } return wordsMap; } public static List<Map.Entry<String, Integer>> hashSort(Map<String, Integer> dataHash) { List<Map.Entry<String, Integer>> list_Data = new ArrayList<Map.Entry<String, Integer>>(dataHash.entrySet()); Collections.sort(list_Data, new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) { if (o2.getValue() != null && o1.getValue() != null && o2.getValue().compareTo(o1.getValue()) > 0) { return 1; } else { return -1; } } }); return list_Data; } /** * 打印结果 * @param AllCountModel 共享的结果集 */ public static void show(AllCountModel acm){ System.out.println("Number of threads left: "+acm.getThreadCount()); Map<String, Integer> dataHash = acm.getDataHash(); List<Map.Entry<String, Integer>> dataList = hashSort(dataHash); System.out.println("Start: write word and frequency"); int size = dataList.size(); int number = 1; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式 System.out.println("========================"+df.format(new Date())+"=========================");// new Date()为获取当前系统时间 System.out.println(String.format("%-15s", "word number") + String.format("%-30s", "word") + String.format("%-15s","frequency")); for (int i = 0; i < size; i++) { String word = dataList.get(i).getKey(); int frequency = dataList.get(i).getValue(); System.out.print(String.format("%-15d", number++)); System.out.print(String.format("%-30s", word)); System.out.print(String.format("%-15d", frequency)); System.out.println(); } System.out.println("End: write word and frequency"); } public static void loadFilterWords() { String filterWordsText = readFileByPath(FILTER_WORDS_FILE_PATH); String words[] = filterWordsText.split("\\s+|\\t|\\r|\\n");// 取出单词,并将单词存入数组中 System.out.println("Number of filter words: "+words.length); for(String word : words){ filterWordsSet.add(word); } } /** * read content from filePath and return content * @param filePath */ public static String readFileByPath(String filePath) { File file = new File(filePath); StringBuffer result = new StringBuffer(); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(file)); String tempString = null; while ((tempString = reader.readLine()) != null) { result.append(" "); result.append(tempString); } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } return result.toString(); } public static String readFile(File file) { //File file = new File(filePath); StringBuffer result = new StringBuffer(); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(file)); String tempString = null; while ((tempString = reader.readLine()) != null) { result.append(" "); result.append(tempString); } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e1) { } } } return result.toString(); }}
AllCountModel.java
package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.util.HashMap;import java.util.Map;public class AllCountModel { // 在运行的线程总数 private int threadCount; //所有线程共有的结构:dataHash,用于存储最终的结果集 private static Map<String, Integer> dataHash = new HashMap<String, Integer>(); public int getThreadCount() { return threadCount; } public void setThreadCount(int threadCount) { this.threadCount = threadCount; } public Map<String, Integer> getDataHash(){ return dataHash; } public void setDataHash(Map<String, Integer> wordsMap){ for (String key : wordsMap.keySet()) { if ((this.dataHash.get(key) != null)) { // 将单个线程中生成的map数据映射到公共的dataHash:value对应单词出现的频率,单词已在dataHash中存在,则value相加 int value = ((Integer) this.dataHash.get(key)).intValue()+((Integer) wordsMap.get(key)).intValue(); this.dataHash.put(key, new Integer(value)); } else if((this.dataHash.get(key) == null) ){ // 将单个线程中生成的map数据映射到公共的dataHash:value对应单词出现的频率,单词未在dataHash中存在,则赋为该线程的value值 this.dataHash.put(key, ((Integer) wordsMap.get(key)).intValue()); } } }}
CountWordsThread.java
package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.HashMap;import java.util.Map;public class CountWordsThread extends Thread{ private List<File> files = new ArrayList<File>(); private Map<String, Integer> wordsMap = new HashMap<String, Integer>(); private AllCountModel allCountModel; // 每一个线程都传入不一样的files,所以不用担心这个对象的同步冲突 public CountWordsThread(List<File> files, AllCountModel allCountModel){ this.files = files; this.allCountModel = allCountModel; } public void run() { WordsAnalysis wa = new WordsAnalysis(); // 解析传入的全部文件 for (File file : files) { try { // 解析文件内容 wordsMap = wa.countWords(file, wordsMap); } catch (IOException e) { e.printStackTrace(); } } // 锁住共享数据(必须这么做,否则共享的数据会紊乱) synchronized (allCountModel) { // 更新线程总数 allCountModel.setThreadCount(allCountModel.getThreadCount() - 1); System.out.println("Thread: stop!"); // 更新结果集 allCountModel.setDataHash(wordsMap); } }}
MonitorThread.java
package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */public class MonitorThread extends Thread{ // 共享数据 private AllCountModel acm; public MonitorThread(AllCountModel acm){ this.acm = acm; } public void run() { while(true){ try { // 隔段时间检查一次 sleep(500); } catch (InterruptedException e) { e.printStackTrace(); } // 线程全部工作完毕 if(0 >= acm.getThreadCount()){ // 打印出结果 WordsAnalysis.show(acm); System.out.println("Thread Monitor: end!"); return; } } }}
Outputs:
Number of filter words: 16Thread 1: start!Thread 2: start!Thread Monitor: start!Thread: stop!Thread: stop!Number of threads left: 0Start: write word and frequency========================2015-04-01 19:49:12=========================word number word frequency 1 freeshipping 6 2 to 2 3 car 2 4 be 1 5 mini 1 6 holes 1 7 bar 1 8 9622 1 9 air 1 10 ccc 1 11 power 1 12 socket 1 13 drilled 1 14 fresh 1 15 electromagnetic 1 16 new 1 17 3 1 18 splitter 1 19 purifier 1 20 adapter 1 21 12v 1 22 aaa 1 23 with 1 24 dc 1 End: write word and frequencyThread Monitor: end!
0 0
- Multiple Threads: Word Frequency
- Multiple Files: Word Frequency
- Word Frequency
- Word Frequency
- Word Frequency
- Word Frequency
- Word-frequency filter
- [leetcode][bash] Word Frequency
- [Leetcode Shell]Word Frequency
- leetcode-192 Word Frequency
- LeetCode 192 Word Frequency
- 【Leetcode Shell】Word Frequency
- Leetcode: Word Frequency
- [leetcode]Word Frequency
- 192 - Word Frequency
- leetcode192. Word Frequency
- leetcode 192. Word Frequency
- 192. Word Frequency
- 5.10
- iPhone开发之UIPageControl
- Activity的四种加载模式s
- 聊聊web的底层-TCP
- activity和service通信
- Multiple Threads: Word Frequency
- HDU 2602 解题报告
- Android中点击按钮后隐藏输入法
- BCD 十六进制 十进制 相关转换
- 【THOI 2012】 社交网络结构洞
- .Net与JavaMVC思想比较
- JavaEE之捣蛋的Filter
- Linux tcpdump
- 【BZOJ 1857】 [Scoi2010]传送带