英文单词频率统计工具
来源:互联网 发布:浙江师范行知学院地址 编辑:程序博客网 时间:2024/04/26 18:25
记得有一段时间因为想过要考研,当时要记单词,当然我想要记一些常用的单词,但是哪些单词是常用的呢?现在外面有很多的分频词汇的册子,我也买过,但是总是不放心.于是决定自己写一个程序来统计一下单词的出现频率.这个程序也是那天晚上写的,还比较管用,我们只要把要分析的英文文章放到一个指定的目录下面,它就可以自动的去统计这个目录下面的所有英文资料中各个单词出现的频率并排序后输出.你不妨试试看哦,哈哈.编程是一种乐趣.
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* This class is designed to contain word and it's frequency.
* There is also some useful method.
* @author ZhuTao HUST 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
class Set{
private int num;//The num of the word it already contained.
private int []times;
private String[] word;
public Set(int size){
this.num = 0;
this.times = new int[size];
this.word = new String[size];
}
/**
* This mothod is used to add element into the Set.
* @param word is the word you want to add.
* @param times is the times it appears.
*/
public void addElement(String word,int times)
{
this.times[num] = times;
this.word[num] = word;
num++;
}
/**
* This method is used to sort word by frequency.
*/
public void sort()
{
for(int i = 0;i<this.num;i++)
{
int num = this.times[i];
for(int j = i+1;j<this.num;j++)
{//冒泡排序;
if(this.times[j]>num)
{
num = this.times[j];
String word = this.word[i];
int times = this.times[i];
this.times[i] = this.times[j];
this.word[i] = this.word[j];
this.times[j] = times;
this.word[j] = word;
}
}
}
}
/**
* This method is designed to show the result.
*/
public void showResult()
{
System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
for(int i = 0;i<this.num;i++)
System.out.println(this.word[i]+" : "+this.times[i]);
}
/**
* This method is designed to get the number of words.
* @return The number of words.
*/
public int getCount()
{
return this.num;
}
/**
* This method is designed to get word at number i.
* @param i is the number of word you want to get.
* @return the word[i].
*/
public String getWordAt(int i)
{
return this.word[i];
}
/**
* This method is designed to get frequency of word[i];
* @param i is the number of word.
* @return the frequency of word[i].
*/
public int getFrequency(int i)
{
return this.times[i];
}
}
/**
* This class is designed to annalyse
* word's frequency of English articles.
* @author ZhuTao HUST.
* 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
public class EnglishWord {
private Set resultSet;
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
EnglishWord analyser = new EnglishWord("G://test","G://tao.txt");
}
public EnglishWord(String filepath,String savepath)
{
String content = this.readFile(filepath);
Hashtable table = this.getWordList(content);
this.initSet(table);
this.resultSet.sort();
this.resultSet.showResult();
this.saveResult(savepath);
}
/**
* This method is used to analyse the times of each word appears in
* the file.
* @param content is the content of file you.
* @return the word map.Which contains words and times it appeared.
*/
public Hashtable getWordList(String content)
{
Hashtable wordList = new Hashtable();
int i = 0;
for(;i<content.length();i++)
{
char ch = content.charAt(i);
if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
else break;
}
boolean flag = true;
String word = new String();
char ch ;
for(;i<content.length();i++)
{
ch = content.charAt(i);
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '/''){
word+=ch;
flag = true;
}
else{
//如果已经包含该单词,就计数加一;
if(ch == '-'){i+=2;continue;}
if(flag)
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
flag = false;
word = "";
}
}
if(!word.equals(""))
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
return wordList;
}
/**
* @see This method is used to read content from file.
* @param String filepath is the path and name of the file
* which you want to analyse.
* @return return the content of file in String form.
*/
public String readFile(String filepath)
{
try{
File file= new File(filepath);
if(file.isDirectory())
{
String[] list = file.list();
String str = new String();
System.out.println("文件的个数:"+list.length+" 文件列表如下:");
for(int i =0;i<list.length;i++)
{
System.out.println(list[i]);
FileInputStream read = new FileInputStream(filepath+'//'+list[i]);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
str +=content;
}
return str;
}
else{
FileInputStream read = new FileInputStream(filepath);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
return content;
}
}catch(IOException e){
System.out.println(e);
return null;
}
}
/**
* This method is designed to init a set of word and the times it appeared.
* @param wordList
*/
public void initSet(Hashtable wordList)
{
Enumeration e = wordList.keys();
this.resultSet = new Set(wordList.size());
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
this.resultSet.addElement(word,times);
}
}
/**
* This method is designed to show the word list.
* @param wordList
*/
public void showTable(Hashtable wordList)
{
Enumeration e = wordList.keys();
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
System.out.println(word+" : "+times);
}
}
/**
* This method is designed to write the result into file.
* @param filepath
*/
public void saveResult(String filepath)
{
try{
FileOutputStream write = new FileOutputStream(filepath);
for(int i = 0;i<this.resultSet.getCount();i++)
{
String word = this.resultSet.getWordAt(i);
int times = this.resultSet.getFrequency(i);
write.write(this.format(word,times));
}
write.close();
}catch(IOException e)
{
System.out.println(e);
}
}
/**@author ZhuTao
* This method is designed to format information
* to byte stream.
* @param word
* @param times
* @return a byte stream.
*/
public byte[] format(String word,int times)
{
String str = new String (word+" : "+times+"/r/n");
byte []data = new byte[str.length()];
for(int i = 0;i<str.length();i++)
data[i] = (byte)str.charAt(i);
return data;
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* This class is designed to contain word and it's frequency.
* There is also some useful method.
* @author ZhuTao HUST 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
class Set{
private int num;//The num of the word it already contained.
private int []times;
private String[] word;
public Set(int size){
this.num = 0;
this.times = new int[size];
this.word = new String[size];
}
/**
* This mothod is used to add element into the Set.
* @param word is the word you want to add.
* @param times is the times it appears.
*/
public void addElement(String word,int times)
{
this.times[num] = times;
this.word[num] = word;
num++;
}
/**
* This method is used to sort word by frequency.
*/
public void sort()
{
for(int i = 0;i<this.num;i++)
{
int num = this.times[i];
for(int j = i+1;j<this.num;j++)
{//冒泡排序;
if(this.times[j]>num)
{
num = this.times[j];
String word = this.word[i];
int times = this.times[i];
this.times[i] = this.times[j];
this.word[i] = this.word[j];
this.times[j] = times;
this.word[j] = word;
}
}
}
}
/**
* This method is designed to show the result.
*/
public void showResult()
{
System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
for(int i = 0;i<this.num;i++)
System.out.println(this.word[i]+" : "+this.times[i]);
}
/**
* This method is designed to get the number of words.
* @return The number of words.
*/
public int getCount()
{
return this.num;
}
/**
* This method is designed to get word at number i.
* @param i is the number of word you want to get.
* @return the word[i].
*/
public String getWordAt(int i)
{
return this.word[i];
}
/**
* This method is designed to get frequency of word[i];
* @param i is the number of word.
* @return the frequency of word[i].
*/
public int getFrequency(int i)
{
return this.times[i];
}
}
/**
* This class is designed to annalyse
* word's frequency of English articles.
* @author ZhuTao HUST.
* 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
public class EnglishWord {
private Set resultSet;
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
EnglishWord analyser = new EnglishWord("G://test","G://tao.txt");
}
public EnglishWord(String filepath,String savepath)
{
String content = this.readFile(filepath);
Hashtable table = this.getWordList(content);
this.initSet(table);
this.resultSet.sort();
this.resultSet.showResult();
this.saveResult(savepath);
}
/**
* This method is used to analyse the times of each word appears in
* the file.
* @param content is the content of file you.
* @return the word map.Which contains words and times it appeared.
*/
public Hashtable getWordList(String content)
{
Hashtable wordList = new Hashtable();
int i = 0;
for(;i<content.length();i++)
{
char ch = content.charAt(i);
if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
else break;
}
boolean flag = true;
String word = new String();
char ch ;
for(;i<content.length();i++)
{
ch = content.charAt(i);
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '/''){
word+=ch;
flag = true;
}
else{
//如果已经包含该单词,就计数加一;
if(ch == '-'){i+=2;continue;}
if(flag)
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
flag = false;
word = "";
}
}
if(!word.equals(""))
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
return wordList;
}
/**
* @see This method is used to read content from file.
* @param String filepath is the path and name of the file
* which you want to analyse.
* @return return the content of file in String form.
*/
public String readFile(String filepath)
{
try{
File file= new File(filepath);
if(file.isDirectory())
{
String[] list = file.list();
String str = new String();
System.out.println("文件的个数:"+list.length+" 文件列表如下:");
for(int i =0;i<list.length;i++)
{
System.out.println(list[i]);
FileInputStream read = new FileInputStream(filepath+'//'+list[i]);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
str +=content;
}
return str;
}
else{
FileInputStream read = new FileInputStream(filepath);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
return content;
}
}catch(IOException e){
System.out.println(e);
return null;
}
}
/**
* This method is designed to init a set of word and the times it appeared.
* @param wordList
*/
public void initSet(Hashtable wordList)
{
Enumeration e = wordList.keys();
this.resultSet = new Set(wordList.size());
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
this.resultSet.addElement(word,times);
}
}
/**
* This method is designed to show the word list.
* @param wordList
*/
public void showTable(Hashtable wordList)
{
Enumeration e = wordList.keys();
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
System.out.println(word+" : "+times);
}
}
/**
* This method is designed to write the result into file.
* @param filepath
*/
public void saveResult(String filepath)
{
try{
FileOutputStream write = new FileOutputStream(filepath);
for(int i = 0;i<this.resultSet.getCount();i++)
{
String word = this.resultSet.getWordAt(i);
int times = this.resultSet.getFrequency(i);
write.write(this.format(word,times));
}
write.close();
}catch(IOException e)
{
System.out.println(e);
}
}
/**@author ZhuTao
* This method is designed to format information
* to byte stream.
* @param word
* @param times
* @return a byte stream.
*/
public byte[] format(String word,int times)
{
String str = new String (word+" : "+times+"/r/n");
byte []data = new byte[str.length()];
for(int i = 0;i<str.length();i++)
data[i] = (byte)str.charAt(i);
return data;
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* This class is designed to contain word and it's frequency.
* There is also some useful method.
* @author ZhuTao HUST 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
class Set{
private int num;//The num of the word it already contained.
private int []times;
private String[] word;
public Set(int size){
this.num = 0;
this.times = new int[size];
this.word = new String[size];
}
/**
* This mothod is used to add element into the Set.
* @param word is the word you want to add.
* @param times is the times it appears.
*/
public void addElement(String word,int times)
{
this.times[num] = times;
this.word[num] = word;
num++;
}
/**
* This method is used to sort word by frequency.
*/
public void sort()
{
for(int i = 0;i<this.num;i++)
{
int num = this.times[i];
for(int j = i+1;j<this.num;j++)
{//冒泡排序;
if(this.times[j]>num)
{
num = this.times[j];
String word = this.word[i];
int times = this.times[i];
this.times[i] = this.times[j];
this.word[i] = this.word[j];
this.times[j] = times;
this.word[j] = word;
}
}
}
}
/**
* This method is designed to show the result.
*/
public void showResult()
{
System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
for(int i = 0;i<this.num;i++)
System.out.println(this.word[i]+" : "+this.times[i]);
}
/**
* This method is designed to get the number of words.
* @return The number of words.
*/
public int getCount()
{
return this.num;
}
/**
* This method is designed to get word at number i.
* @param i is the number of word you want to get.
* @return the word[i].
*/
public String getWordAt(int i)
{
return this.word[i];
}
/**
* This method is designed to get frequency of word[i];
* @param i is the number of word.
* @return the frequency of word[i].
*/
public int getFrequency(int i)
{
return this.times[i];
}
}
/**
* This class is designed to annalyse
* word's frequency of English articles.
* @author ZhuTao HUST.
* 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
public class EnglishWord {
private Set resultSet;
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
EnglishWord analyser = new EnglishWord("G://test","G://tao.txt");
}
public EnglishWord(String filepath,String savepath)
{
String content = this.readFile(filepath);
Hashtable table = this.getWordList(content);
this.initSet(table);
this.resultSet.sort();
this.resultSet.showResult();
this.saveResult(savepath);
}
/**
* This method is used to analyse the times of each word appears in
* the file.
* @param content is the content of file you.
* @return the word map.Which contains words and times it appeared.
*/
public Hashtable getWordList(String content)
{
Hashtable wordList = new Hashtable();
int i = 0;
for(;i<content.length();i++)
{
char ch = content.charAt(i);
if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
else break;
}
boolean flag = true;
String word = new String();
char ch ;
for(;i<content.length();i++)
{
ch = content.charAt(i);
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '/''){
word+=ch;
flag = true;
}
else{
//如果已经包含该单词,就计数加一;
if(ch == '-'){i+=2;continue;}
if(flag)
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
flag = false;
word = "";
}
}
if(!word.equals(""))
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
return wordList;
}
/**
* @see This method is used to read content from file.
* @param String filepath is the path and name of the file
* which you want to analyse.
* @return return the content of file in String form.
*/
public String readFile(String filepath)
{
try{
File file= new File(filepath);
if(file.isDirectory())
{
String[] list = file.list();
String str = new String();
System.out.println("文件的个数:"+list.length+" 文件列表如下:");
for(int i =0;i<list.length;i++)
{
System.out.println(list[i]);
FileInputStream read = new FileInputStream(filepath+'//'+list[i]);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
str +=content;
}
return str;
}
else{
FileInputStream read = new FileInputStream(filepath);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
return content;
}
}catch(IOException e){
System.out.println(e);
return null;
}
}
/**
* This method is designed to init a set of word and the times it appeared.
* @param wordList
*/
public void initSet(Hashtable wordList)
{
Enumeration e = wordList.keys();
this.resultSet = new Set(wordList.size());
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
this.resultSet.addElement(word,times);
}
}
/**
* This method is designed to show the word list.
* @param wordList
*/
public void showTable(Hashtable wordList)
{
Enumeration e = wordList.keys();
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
System.out.println(word+" : "+times);
}
}
/**
* This method is designed to write the result into file.
* @param filepath
*/
public void saveResult(String filepath)
{
try{
FileOutputStream write = new FileOutputStream(filepath);
for(int i = 0;i<this.resultSet.getCount();i++)
{
String word = this.resultSet.getWordAt(i);
int times = this.resultSet.getFrequency(i);
write.write(this.format(word,times));
}
write.close();
}catch(IOException e)
{
System.out.println(e);
}
}
/**@author ZhuTao
* This method is designed to format information
* to byte stream.
* @param word
* @param times
* @return a byte stream.
*/
public byte[] format(String word,int times)
{
String str = new String (word+" : "+times+"/r/n");
byte []data = new byte[str.length()];
for(int i = 0;i<str.length();i++)
data[i] = (byte)str.charAt(i);
return data;
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* This class is designed to contain word and it's frequency.
* There is also some useful method.
* @author ZhuTao HUST 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
class Set{
private int num;//The num of the word it already contained.
private int []times;
private String[] word;
public Set(int size){
this.num = 0;
this.times = new int[size];
this.word = new String[size];
}
/**
* This mothod is used to add element into the Set.
* @param word is the word you want to add.
* @param times is the times it appears.
*/
public void addElement(String word,int times)
{
this.times[num] = times;
this.word[num] = word;
num++;
}
/**
* This method is used to sort word by frequency.
*/
public void sort()
{
for(int i = 0;i<this.num;i++)
{
int num = this.times[i];
for(int j = i+1;j<this.num;j++)
{//冒泡排序;
if(this.times[j]>num)
{
num = this.times[j];
String word = this.word[i];
int times = this.times[i];
this.times[i] = this.times[j];
this.word[i] = this.word[j];
this.times[j] = times;
this.word[j] = word;
}
}
}
}
/**
* This method is designed to show the result.
*/
public void showResult()
{
System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
for(int i = 0;i<this.num;i++)
System.out.println(this.word[i]+" : "+this.times[i]);
}
/**
* This method is designed to get the number of words.
* @return The number of words.
*/
public int getCount()
{
return this.num;
}
/**
* This method is designed to get word at number i.
* @param i is the number of word you want to get.
* @return the word[i].
*/
public String getWordAt(int i)
{
return this.word[i];
}
/**
* This method is designed to get frequency of word[i];
* @param i is the number of word.
* @return the frequency of word[i].
*/
public int getFrequency(int i)
{
return this.times[i];
}
}
/**
* This class is designed to annalyse
* word's frequency of English articles.
* @author ZhuTao HUST.
* 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
public class EnglishWord {
private Set resultSet;
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
EnglishWord analyser = new EnglishWord("G://test","G://tao.txt");
}
public EnglishWord(String filepath,String savepath)
{
String content = this.readFile(filepath);
Hashtable table = this.getWordList(content);
this.initSet(table);
this.resultSet.sort();
this.resultSet.showResult();
this.saveResult(savepath);
}
/**
* This method is used to analyse the times of each word appears in
* the file.
* @param content is the content of file you.
* @return the word map.Which contains words and times it appeared.
*/
public Hashtable getWordList(String content)
{
Hashtable wordList = new Hashtable();
int i = 0;
for(;i<content.length();i++)
{
char ch = content.charAt(i);
if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
else break;
}
boolean flag = true;
String word = new String();
char ch ;
for(;i<content.length();i++)
{
ch = content.charAt(i);
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '/''){
word+=ch;
flag = true;
}
else{
//如果已经包含该单词,就计数加一;
if(ch == '-'){i+=2;continue;}
if(flag)
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
flag = false;
word = "";
}
}
if(!word.equals(""))
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
return wordList;
}
/**
* @see This method is used to read content from file.
* @param String filepath is the path and name of the file
* which you want to analyse.
* @return return the content of file in String form.
*/
public String readFile(String filepath)
{
try{
File file= new File(filepath);
if(file.isDirectory())
{
String[] list = file.list();
String str = new String();
System.out.println("文件的个数:"+list.length+" 文件列表如下:");
for(int i =0;i<list.length;i++)
{
System.out.println(list[i]);
FileInputStream read = new FileInputStream(filepath+'//'+list[i]);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
str +=content;
}
return str;
}
else{
FileInputStream read = new FileInputStream(filepath);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
return content;
}
}catch(IOException e){
System.out.println(e);
return null;
}
}
/**
* This method is designed to init a set of word and the times it appeared.
* @param wordList
*/
public void initSet(Hashtable wordList)
{
Enumeration e = wordList.keys();
this.resultSet = new Set(wordList.size());
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
this.resultSet.addElement(word,times);
}
}
/**
* This method is designed to show the word list.
* @param wordList
*/
public void showTable(Hashtable wordList)
{
Enumeration e = wordList.keys();
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
System.out.println(word+" : "+times);
}
}
/**
* This method is designed to write the result into file.
* @param filepath
*/
public void saveResult(String filepath)
{
try{
FileOutputStream write = new FileOutputStream(filepath);
for(int i = 0;i<this.resultSet.getCount();i++)
{
String word = this.resultSet.getWordAt(i);
int times = this.resultSet.getFrequency(i);
write.write(this.format(word,times));
}
write.close();
}catch(IOException e)
{
System.out.println(e);
}
}
/**@author ZhuTao
* This method is designed to format information
* to byte stream.
* @param word
* @param times
* @return a byte stream.
*/
public byte[] format(String word,int times)
{
String str = new String (word+" : "+times+"/r/n");
byte []data = new byte[str.length()];
for(int i = 0;i<str.length();i++)
data[i] = (byte)str.charAt(i);
return data;
}
}
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.File;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* This class is designed to contain word and it's frequency.
* There is also some useful method.
* @author ZhuTao HUST 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
class Set{
private int num;//The num of the word it already contained.
private int []times;
private String[] word;
public Set(int size){
this.num = 0;
this.times = new int[size];
this.word = new String[size];
}
/**
* This mothod is used to add element into the Set.
* @param word is the word you want to add.
* @param times is the times it appears.
*/
public void addElement(String word,int times)
{
this.times[num] = times;
this.word[num] = word;
num++;
}
/**
* This method is used to sort word by frequency.
*/
public void sort()
{
for(int i = 0;i<this.num;i++)
{
int num = this.times[i];
for(int j = i+1;j<this.num;j++)
{//冒泡排序;
if(this.times[j]>num)
{
num = this.times[j];
String word = this.word[i];
int times = this.times[i];
this.times[i] = this.times[j];
this.word[i] = this.word[j];
this.times[j] = times;
this.word[j] = word;
}
}
}
}
/**
* This method is designed to show the result.
*/
public void showResult()
{
System.out.println("总共有" +this.num+"个单词,它们的出现频率降序排列如下:");
for(int i = 0;i<this.num;i++)
System.out.println(this.word[i]+" : "+this.times[i]);
}
/**
* This method is designed to get the number of words.
* @return The number of words.
*/
public int getCount()
{
return this.num;
}
/**
* This method is designed to get word at number i.
* @param i is the number of word you want to get.
* @return the word[i].
*/
public String getWordAt(int i)
{
return this.word[i];
}
/**
* This method is designed to get frequency of word[i];
* @param i is the number of word.
* @return the frequency of word[i].
*/
public int getFrequency(int i)
{
return this.times[i];
}
}
/**
* This class is designed to annalyse
* word's frequency of English articles.
* @author ZhuTao HUST.
* 2006.6.2-2006.6.3
* Email:greenkugua@sina.com.cn.
* QQ:307356132
* @version:1.0
*/
public class EnglishWord {
private Set resultSet;
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
EnglishWord analyser = new EnglishWord("G://test","G://tao.txt");
}
public EnglishWord(String filepath,String savepath)
{
String content = this.readFile(filepath);
Hashtable table = this.getWordList(content);
this.initSet(table);
this.resultSet.sort();
this.resultSet.showResult();
this.saveResult(savepath);
}
/**
* This method is used to analyse the times of each word appears in
* the file.
* @param content is the content of file you.
* @return the word map.Which contains words and times it appeared.
*/
public Hashtable getWordList(String content)
{
Hashtable wordList = new Hashtable();
int i = 0;
for(;i<content.length();i++)
{
char ch = content.charAt(i);
if((ch>'Z'&&ch<'a')||ch<'A'||ch>'z');
else break;
}
boolean flag = true;
String word = new String();
char ch ;
for(;i<content.length();i++)
{
ch = content.charAt(i);
if((ch>='A'&&ch<='Z')||(ch>='a'&&ch<='z')||ch == '/''){
word+=ch;
flag = true;
}
else{
//如果已经包含该单词,就计数加一;
if(ch == '-'){i+=2;continue;}
if(flag)
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
flag = false;
word = "";
}
}
if(!word.equals(""))
{
if(wordList.containsKey(word))
{
Integer num = (Integer)wordList.get(word);
int t = num.intValue()+1;
wordList.put(word,new Integer(t));
}
else
{
wordList.put(word,new Integer(1));
}
}
return wordList;
}
/**
* @see This method is used to read content from file.
* @param String filepath is the path and name of the file
* which you want to analyse.
* @return return the content of file in String form.
*/
public String readFile(String filepath)
{
try{
File file= new File(filepath);
if(file.isDirectory())
{
String[] list = file.list();
String str = new String();
System.out.println("文件的个数:"+list.length+" 文件列表如下:");
for(int i =0;i<list.length;i++)
{
System.out.println(list[i]);
FileInputStream read = new FileInputStream(filepath+'//'+list[i]);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
str +=content;
}
return str;
}
else{
FileInputStream read = new FileInputStream(filepath);
byte[]data = new byte[read.available()];
read.read(data);
read.close();
String content = new String(data);
return content;
}
}catch(IOException e){
System.out.println(e);
return null;
}
}
/**
* This method is designed to init a set of word and the times it appeared.
* @param wordList
*/
public void initSet(Hashtable wordList)
{
Enumeration e = wordList.keys();
this.resultSet = new Set(wordList.size());
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
this.resultSet.addElement(word,times);
}
}
/**
* This method is designed to show the word list.
* @param wordList
*/
public void showTable(Hashtable wordList)
{
Enumeration e = wordList.keys();
while(e.hasMoreElements())
{
Object key = e.nextElement();
String word = key.toString();
Integer num = (Integer)wordList.get(key);
int times = num.intValue();
System.out.println(word+" : "+times);
}
}
/**
* This method is designed to write the result into file.
* @param filepath
*/
public void saveResult(String filepath)
{
try{
FileOutputStream write = new FileOutputStream(filepath);
for(int i = 0;i<this.resultSet.getCount();i++)
{
String word = this.resultSet.getWordAt(i);
int times = this.resultSet.getFrequency(i);
write.write(this.format(word,times));
}
write.close();
}catch(IOException e)
{
System.out.println(e);
}
}
/**@author ZhuTao
* This method is designed to format information
* to byte stream.
* @param word
* @param times
* @return a byte stream.
*/
public byte[] format(String word,int times)
{
String str = new String (word+" : "+times+"/r/n");
byte []data = new byte[str.length()];
for(int i = 0;i<str.length();i++)
data[i] = (byte)str.charAt(i);
return data;
}
}
- 英文单词频率统计工具
- 统计英文单词
- Java 统计英文单词
- 英文单词词频统计代码
- 统计英文单词词频
- 英文单词词频统计
- 英文单词统计程序
- C红黑树统计英文单词数量
- C++英文单词统计小程序
- 汉字频率统计
- 频率统计(map)
- 字词出现频率统计
- 统计单词频率-map
- ACM 统计频率
- 统计频率和频数
- Java统计单词频率
- 统计字符频率
- JAVA,数字频率统计
- JAVA测试题
- 4.8~4.10
- DNN资料
- 基于LSB的信息隐藏实现
- AES算法的JAVA实现
- 英文单词频率统计工具
- 虚拟文件系统的实现
- Thinking in C++读书笔记--13.4继承与组合
- C语言教务管理系统源代码
- ATM教程:数据链路层
- 微软宣布Vista开发正式结束 代码已交付生产
- 郑重声明:寻找学习伙伴
- ORACLE的锁机制
- SQLSERVER 中的锁机制