统计文本英文单词总个数,并列出每个单词的个数

来源:互联网 发布:软件开发外包公司 编辑:程序博客网 时间:2024/05/01 03:05
package test;/* * Task :统计文本英文单词总个数,并列出每个单词的个数 * * Date:2014.02.26 * *Author:璀若星辰 * */import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.TreeMap;import java.util.regex.Matcher;import java.util.regex.Pattern;public class IO_Word {  public static List<String>Io_word(String str)throws Exception{    File file = new File(str);    int n = 0;//文章中单词总数    TreeMap<Object, Integer> myTreeMap = new TreeMap<Object, Integer>();//存放键值对    Object word = null;//文章中的单词    Object num = null;//出现的次数    FileInputStream fis = new FileInputStream(file);    try{      InputStreamReader isr = new InputStreamReader(fis, "gb2312");      try{         BufferedReader br = new BufferedReader(isr);         try{           List<String> all = new ArrayList<String>();           String temp = br.readLine();           while (temp !=null){             all.add(temp);             temp = br.readLine();           }           //System.out.println("all="+all.size());          // System.out.println(all.get(0));           Pattern expression = Pattern.compile("[a-zA-Z]+");//定义正则表达式匹配单词           String string1 = all.toString().toLowerCase();//转换成小写           Matcher matcher = expression.matcher(string1);//定义string1的匹配器           while(matcher.find()){             word = matcher.group();//得到一个单词—树映射的键             //System.out.println("word="+word);             n++;             if(myTreeMap.containsKey(word)){               num = myTreeMap.get(word);//得到单词出现的次数               Integer count = (Integer)num;               myTreeMap.put(word, new Integer(count.intValue()+1));             }else {               myTreeMap.put(word, new Integer(1));//否则单词第一次出现,添加到映射中             }           }           System.out.println("统计分析如下:");           System.out.println("txt文章中单词总数"+ n +"个");           /*Iterator<Object> iter = myTreeMap.keySet().iterator();//得到树映射键集合的迭代器           while(iter.hasNext()){             key = iter.next();             System.out.println(((String)key+"-"+myTreeMap.get(key)));           }*/           List<Map.Entry<Object, Integer>> list = new ArrayList<Map.Entry<Object,Integer>>(myTreeMap.entrySet());           System.out.println("list="+list.size());           Collections.sort(list,new Comparator<Map.Entry<Object, Integer>>(){            public int compare(Map.Entry<Object, Integer>zj,  Map.Entry<Object, Integer> zz) {              return (zz.getValue() - zj.getValue());            }           });           for (Entry<Object, Integer> entry : list) {            System.out.println(entry.getKey() + "-" + entry.getValue() );          }           return all;         }finally{           br.close();         }      }finally{        isr.close();      }    }finally{      fis.close();    }  }  public static void main(String[] args) {  try {      IO_Word.Io_word("D:/abc.txt");    } catch (Exception e) {      e.printStackTrace();    }}} 

 运行结果效果如下


0 0
原创粉丝点击