用JavaIO,正则表达式,HashMap统计一个文本单词数量
来源:互联网 发布:linux查询当前时间 编辑:程序博客网 时间:2024/06/05 19:48
import java.io.BufferedReader;import java.io.FileReader;import java.util.HashMap;public class WordCount { public static void main(String[] args) { HashMap<String, Integer>mp = new HashMap<>(); mp.put("jack", 1); mp.put("Tom", 2); mp.put("rose", 3); mp.put("jack",4); // System.out.println(mp);// System.out.println(mp.size());// System.out.println(mp.keySet());// System.out.println(mp.values()); try (BufferedReader in = new BufferedReader(new FileReader("filetest.txt")) ){ String line; int c = 0; while(null != (line = in.readLine())) { String[] words = line.split("\\W");//单词的边界 for(String w : words) { if(w.trim().length() == 0) continue;// if(w.trim().length() == 1) {// //长度为1// char cc = w.charAt(0);// if(!Character.isLetterOrDigit(c)) {// continue;// }// } w = w.toLowerCase();//不分大小写的统计 if(!mp.containsKey(w)) { mp.put(w, 1); }else { int n = mp.get(w); mp.put(w, n+1); } } } for(String key : mp.keySet()) { int v = mp.get(key); System.out.printf("%s\t%d\n",key,v); } } catch (Exception e) { // TODO: handle exception } }}
输出:
常见的正则表达式符号: