Multiple Threads: Word Frequency

来源:互联网 发布:unity3d 粒子特效爆炸 编辑:程序博客网 时间:2024/05/18 13:29

Directory and File:

data3(dir)    file1.txt(file)    file2.txt(file)    file3.txt(file)filter(dir)    filter.txt(file)

File contents:

file1.txt

Free Shipping  Mini Car Auto12v  Fresh Air Purifier Oxygen Barfreeshipping freeshipping

file2.txt

freeshipping new  Electromagnetic parking sensor no holes need to be drilledfreeshippingfreeshipping

file3.txt

DC 12V 1 to 3 Car Cigarette Lighter Socket Power Adapter Splitter with 1 USB Port  free shipping  #9622 [aaa bbb] ccc{ ddd}freeshipping

Filter contents:

filter.txt

sensor            bbb            lighter              auto12v              usb             oxygen             ddd             parking              cigarette             port 1freeshippingnoneed

Java Code:

WordsAnalysis.java

package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.io.*;import java.text.SimpleDateFormat;import java.util.*;public class WordsAnalysis {    private static final String FILTER_WORDS_FILE_PATH = "//Users//robot//TEMP//testData//filter//filter.txt";    private static Set<String> filterWordsSet = new HashSet<String>();    /**     * @param args     * @throws IOException     */    public static void main(String[] args) throws IOException {        loadFilterWords();        File f = new File("//Users//robot//TEMP//testData//data3");        File[] fs = f.listFiles();        // 分成两半        List<File> files1 = new ArrayList<File>();        for (int i = 0; i < fs.length/2; i++) {            files1.add(fs[i]);        }        List<File> files2 = new ArrayList<File>();        for (int i = fs.length/2; i < fs.length; i++) {            files2.add(fs[i]);        }        // 工作线程总数        int threadCount = 0;        // 共享数据        AllCountModel acm = new AllCountModel();        acm.setThreadCount(++threadCount);        CountWordsThread tt1 = new CountWordsThread(files1, acm);        // 1号线程        System.out.println("Thread 1: start!");        tt1.start();        acm.setThreadCount(++threadCount);        CountWordsThread tt2 = new CountWordsThread(files2, acm);        // 2号线程        System.out.println("Thread 2: start!");        tt2.start();        MonitorThread mt = new MonitorThread(acm);        // 监视线程        System.out.println("Thread Monitor: start!");        mt.start();    }    /**     *     * @param file     * @param wordsMap     * @return     * @throws IOException     */    public Map<String, Integer> countWords(File file, Map<String, Integer> wordsMap) throws IOException{        String text = readFile(file).toLowerCase();// 将所有字母化为小写        text = text.replaceAll("[`~!@#$%^&*()+=|{}':;',//\\[//\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]|\\s+|\t|\r", " ");        String words[] = text.split("\\s+");// 取出单词,并将单词存入数组中        for (int i = 0; i < words.length; i++) {            String word = words[i].trim();            // 重现的单词            if(wordsMap.containsKey(word) && !filterWordsSet.contains(word)){                // 计数                wordsMap.put(word, (wordsMap.get(word) + 1));            }else if(!wordsMap.containsKey(word) && !filterWordsSet.contains(word)){                // 第一次出现的新单词                wordsMap.put(word, 1);            }        }        return wordsMap;    }    public static List<Map.Entry<String, Integer>> hashSort(Map<String, Integer> dataHash) {        List<Map.Entry<String, Integer>> list_Data = new ArrayList<Map.Entry<String, Integer>>(dataHash.entrySet());        Collections.sort(list_Data, new Comparator<Map.Entry<String, Integer>>() {            public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {                if (o2.getValue() != null && o1.getValue() != null && o2.getValue().compareTo(o1.getValue()) > 0) {                    return 1;                } else {                    return -1;                }            }        });        return list_Data;    }    /**     * 打印结果     * @param AllCountModel 共享的结果集     */    public static void show(AllCountModel acm){        System.out.println("Number of threads left: "+acm.getThreadCount());        Map<String, Integer> dataHash = acm.getDataHash();        List<Map.Entry<String, Integer>> dataList = hashSort(dataHash);        System.out.println("Start: write word and frequency");        int size = dataList.size();        int number = 1;        SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式        System.out.println("========================"+df.format(new Date())+"=========================");// new Date()为获取当前系统时间        System.out.println(String.format("%-15s", "word number") + String.format("%-30s", "word") + String.format("%-15s","frequency"));        for (int i = 0; i < size; i++) {            String word = dataList.get(i).getKey();            int frequency = dataList.get(i).getValue();            System.out.print(String.format("%-15d", number++));            System.out.print(String.format("%-30s", word));            System.out.print(String.format("%-15d", frequency));            System.out.println();        }        System.out.println("End: write word and frequency");    }    public static void loadFilterWords() {        String filterWordsText = readFileByPath(FILTER_WORDS_FILE_PATH);        String words[] = filterWordsText.split("\\s+|\\t|\\r|\\n");// 取出单词,并将单词存入数组中        System.out.println("Number of filter words: "+words.length);        for(String word : words){            filterWordsSet.add(word);        }    }    /**     * read content from filePath and return content     * @param filePath     */    public static String readFileByPath(String filePath) {        File file = new File(filePath);        StringBuffer result = new StringBuffer();        BufferedReader reader = null;        try {            reader = new BufferedReader(new FileReader(file));            String tempString = null;            while ((tempString = reader.readLine()) != null) {                result.append(" ");                result.append(tempString);            }            reader.close();        } catch (IOException e) {            e.printStackTrace();        } finally {            if (reader != null) {                try {                    reader.close();                } catch (IOException e1) {                }            }        }        return result.toString();    }    public static String readFile(File file) {        //File file = new File(filePath);        StringBuffer result = new StringBuffer();        BufferedReader reader = null;        try {            reader = new BufferedReader(new FileReader(file));            String tempString = null;            while ((tempString = reader.readLine()) != null) {                result.append(" ");                result.append(tempString);            }            reader.close();        } catch (IOException e) {            e.printStackTrace();        } finally {            if (reader != null) {                try {                    reader.close();                } catch (IOException e1) {                }            }        }        return result.toString();    }}

AllCountModel.java

package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.util.HashMap;import java.util.Map;public class AllCountModel {    // 在运行的线程总数    private int threadCount;    //所有线程共有的结构:dataHash,用于存储最终的结果集    private static Map<String, Integer> dataHash = new HashMap<String, Integer>();    public int getThreadCount() {        return threadCount;    }    public void setThreadCount(int threadCount) {        this.threadCount = threadCount;    }    public Map<String, Integer> getDataHash(){        return dataHash;    }    public void setDataHash(Map<String, Integer> wordsMap){        for (String key : wordsMap.keySet()) {            if ((this.dataHash.get(key) != null)) {                // 将单个线程中生成的map数据映射到公共的dataHash:value对应单词出现的频率,单词已在dataHash中存在,则value相加                int value = ((Integer) this.dataHash.get(key)).intValue()+((Integer) wordsMap.get(key)).intValue();                this.dataHash.put(key, new Integer(value));            } else if((this.dataHash.get(key) == null) ){                // 将单个线程中生成的map数据映射到公共的dataHash:value对应单词出现的频率,单词未在dataHash中存在,则赋为该线程的value值                this.dataHash.put(key, ((Integer) wordsMap.get(key)).intValue());            }        }    }}

CountWordsThread.java

package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.HashMap;import java.util.Map;public class CountWordsThread extends Thread{    private List<File> files = new ArrayList<File>();    private Map<String, Integer> wordsMap = new HashMap<String, Integer>();    private AllCountModel allCountModel;    // 每一个线程都传入不一样的files,所以不用担心这个对象的同步冲突    public CountWordsThread(List<File> files, AllCountModel allCountModel){        this.files = files;        this.allCountModel = allCountModel;    }    public void run() {        WordsAnalysis wa = new WordsAnalysis();        // 解析传入的全部文件        for (File file : files) {            try {                // 解析文件内容                wordsMap = wa.countWords(file, wordsMap);            } catch (IOException e) {                e.printStackTrace();            }        }        // 锁住共享数据(必须这么做,否则共享的数据会紊乱)        synchronized (allCountModel) {            // 更新线程总数            allCountModel.setThreadCount(allCountModel.getThreadCount() - 1);            System.out.println("Thread: stop!");            // 更新结果集            allCountModel.setDataHash(wordsMap);        }    }}

MonitorThread.java

package com.algorithms.multiple.threads.frequency.word;/** * Created with IntelliJ IDEA. * User: 1O1O * Date: 2015-04-01 * Time: 19:31 PM * :)~ * MULTIPLE-THREADS-WORD-FREQUENCY:WORD-FREQUENCY */public class MonitorThread extends Thread{    // 共享数据    private AllCountModel acm;    public MonitorThread(AllCountModel acm){        this.acm = acm;    }    public void run() {        while(true){            try {                // 隔段时间检查一次                sleep(500);            } catch (InterruptedException e) {                e.printStackTrace();            }            // 线程全部工作完毕            if(0 >= acm.getThreadCount()){                // 打印出结果                WordsAnalysis.show(acm);                System.out.println("Thread Monitor: end!");                return;            }        }    }}

Outputs:

Number of filter words: 16Thread 1: start!Thread 2: start!Thread Monitor: start!Thread: stop!Thread: stop!Number of threads left: 0Start: write word and frequency========================2015-04-01 19:49:12=========================word number    word                          frequency      1              freeshipping                  6              2              to                            2              3              car                           2              4              be                            1              5              mini                          1              6              holes                         1              7              bar                           1              8              9622                          1              9              air                           1              10             ccc                           1              11             power                         1              12             socket                        1              13             drilled                       1              14             fresh                         1              15             electromagnetic               1              16             new                           1              17             3                             1              18             splitter                      1              19             purifier                      1              20             adapter                       1              21             12v                           1              22             aaa                           1              23             with                          1              24             dc                            1              End: write word and frequencyThread Monitor: end!
0 0
原创粉丝点击