Java实现词频统计(Wordcount)-Map或Hashtable的value排序

来源:互联网 发布:javascript初级教程 编辑:程序博客网 时间:2024/04/30 14:27

我们在文本操作时,经常需要用到词频统计,并对统计后的词频进行排序,然后输出。
以下是我写的一个供大家参考:

package com.qian;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.Hashtable;import java.util.List;import java.util.Map;import java.util.Map.Entry;/* * author:合肥工业大学 管院学院 钱洋  * email:1563178220@qq.com*/public class WordCount {    public static void main(String args []) throws IOException {        Hashtable<String, Integer>  wordCount = new Hashtable<String, Integer>();        BufferedReader reader = new BufferedReader( new InputStreamReader( new FileInputStream( new File("E:\\钱洋个人\\预测数据\\文本合并\\user_content_combine")),"utf-8"));        BufferedWriter Writer = new BufferedWriter( new OutputStreamWriter( new FileOutputStream( new File("E:\\钱洋个人\\预测数据\\wordcount\\user_content_count.txt")),"utf-8"));        String s=null;        int c=0;        while ((s=reader.readLine())!=null) {            String arry[] =s.split(" ");            for (int i = 0; i < arry.length; i++) {                if (!wordCount.containsKey(arry[i])) {                    wordCount.put(arry[i], Integer.valueOf(1));                } else {                    wordCount.put(arry[i], Integer.valueOf(wordCount.get(arry[i]).intValue() + 1));                }            }        }        for (java.util.Map.Entry<String, Integer> j : wordCount.entrySet()) {            String key = j.getKey();            int value = j.getValue();            Writer.append(key+" "+value+"\r\n");        }        //if the number of the word is 1,output the word and the number        //将map.entrySet()转换成list          List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(wordCount.entrySet());          Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {              //降序排序              public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {                  //return o1.getValue().compareTo(o2.getValue());                  return o2.getValue().compareTo(o1.getValue());              }          });          for (Map.Entry<String, Integer> mapping : list) {              System.out.println(mapping.getKey() + ":" + mapping.getValue());          }      }}
0 0
原创粉丝点击