Java实现的一个词频统计程序

来源:互联网 发布:中国红十字会知乎 编辑:程序博客网 时间:2024/05/18 03:04
import java.util.HashMap;import java.util.Iterator;public class WordCount {public static void main(String[] args) {String[] text=new String[]{"the weather is good ","today is good","today has good weather","good weather is good"};HashMap<String, Integer> hashMap=new HashMap<String, Integer>();for (int i=0;i<text.length;i++){String temp=text[i];String[] words=temp.split("\\s");for(int j=0;j<words.length;j++){if(!hashMap.containsKey(words[j])){hashMap.put(words[j], new Integer(1));}else{int k=hashMap.get(words[j]).intValue()+1;hashMap.put(words[j], new Integer(k));}}}Iterator iterator=hashMap.keySet().iterator();while(iterator.hasNext()){String word=(String) iterator.next();System.out.println(word+":"+hashMap.get(word));}}}
改进版:
<pre name="code" class="java">package jdbcExc;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.util.HashMap;import java.util.Iterator;import java.util.StringTokenizer;public class WordCount1 {public static void main(String[] args) {//用HashMap存放<单词:词频>这样一个映射关系HashMap<String, Integer> hashMap=new HashMap<String, Integer>();//用正则表达式来过滤字符串中的所有标点符号String regex = "[【】、.。,\"!--;:?\'\\]]"; try {//读取要处理的文件BufferedReader br=new BufferedReader(new FileReader("F:\\file80.txt"));String value;while((value=br.readLine())!=null){value=value.replaceAll(regex, " ");//使用StringTokenizer来分词(StringTokenizer详见JDK文档)StringTokenizer tokenizer = new StringTokenizer(value);while(tokenizer.hasMoreTokens()){String word=tokenizer.nextToken();if(!hashMap.containsKey(word)){                      hashMap.put(word, new Integer(1));                  }else{                      int k=hashMap.get(word).intValue()+1;                      hashMap.put(word, new Integer(k));                  }  }}//遍历HashMap,输出结果Iterator iterator=hashMap.keySet().iterator();          while(iterator.hasNext()){              String word=(String) iterator.next();              System.out.println(word+":\t"+hashMap.get(word));          }  } catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}
0 0
原创粉丝点击