用JavaIO,正则表达式,HashMap统计一个文本单词数量

来源:互联网 发布:linux查询当前时间 编辑:程序博客网 时间:2024/06/05 19:48
import java.io.BufferedReader;import java.io.FileReader;import java.util.HashMap;public class WordCount {    public static void main(String[] args) {        HashMap<String, Integer>mp = new HashMap<>();        mp.put("jack", 1);        mp.put("Tom", 2);        mp.put("rose", 3);        mp.put("jack",4);                    //        System.out.println(mp);//        System.out.println(mp.size());//        System.out.println(mp.keySet());//        System.out.println(mp.values());                try (BufferedReader in = new BufferedReader(new FileReader("filetest.txt"))            ){            String line;            int c = 0;            while(null != (line = in.readLine())) {                                String[] words = line.split("\\W");//单词的边界                                for(String w : words) {                    if(w.trim().length() == 0) continue;//                    if(w.trim().length() == 1) {//                        //长度为1//                        char cc = w.charAt(0);//                        if(!Character.isLetterOrDigit(c)) {//                            continue;//                        }//                    }                    w = w.toLowerCase();//不分大小写的统计                                        if(!mp.containsKey(w)) {                        mp.put(w, 1);                    }else {                        int n = mp.get(w);                        mp.put(w, n+1);                    }                                    }                            }                        for(String key : mp.keySet()) {                int v = mp.get(key);                System.out.printf("%s\t%d\n",key,v);            }                    } catch (Exception e) {            // TODO: handle exception        }    }}

 输出:

 

常见的正则表达式符号:

 

原创粉丝点击