统计文章的单词各数

来源:互联网 发布:众筹平台数据统计 编辑:程序博客网 时间:2024/05/16 09:17

给一篇文章,对这篇文章的单词个数进行统计.

package com.lr.string;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.StringTokenizer;public class WordCount {public static void main(String[] args) {String str = null;int WordCount = 0;Map<String, Integer> map = new HashMap<String, Integer>();List<Map.Entry<String, Integer>> listMap = null;try {BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("src/com/lr/string/WordCount.java"))));while ((str = br.readLine()) != null) {StringTokenizer token = new StringTokenizer(str);while (token.hasMoreTokens()) {String word = token.nextToken("+*!:@=\\\"\';?><,./\n\t {()");if (map.containsKey(word)) {map.put(word, map.get(word) + 1);} else {map.put(word, 1);}WordCount++;}}// whilelistMap = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());Collections.sort(listMap,new Comparator<Map.Entry<String, Integer>>() {@Overridepublic int compare(Map.Entry<String, Integer> map1,Map.Entry<String, Integer> map2) {return (map1.getValue().compareTo(map2.getValue()));}});for (Map.Entry<String, Integer> m : listMap) {System.out.println(m.getKey() + ":" + m.getValue());}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}


结果:

getKey:1for:1args:1package:1println:1else:1static:1get:1compare:1nextToken:1class:1compareTo:10:1String[]:1readLine:1try:1sort:1entrySet:1if:1src:1System:1return:1n:1void:1main:1hasMoreTokens:1containsKey:1t:1Override:1out:1InputStreamReader:2com:2br:2Collections:2int:21:2FileNotFoundException:2map2:2map1:2printStackTrace:2put:2IOException:2lr:2Comparator:2catch:2ArrayList:2List:2HashMap:2File:2FileInputStream:2string:2StringTokenizer:3str:3getValue:3while:3token:3null:3m:3BufferedReader:3public:3listMap:4WordCount:4e:4word:5Entry:6map:6io:6util:7Map:8new:8Integer:8String:10}:12import:13java:14


 

原创粉丝点击