黑马程序员——统计文件中每个英文单词出现的次数

来源：互联网发布：java runtime class 编辑：程序博客网时间：2024/05/01 12:36

------- android培训、java培训、期待与您交流！ ----------

需求：有一个文本文件，该文件中存储的有英文单词和中文字符，现在编写一个程序，统计文件中每个英文出现的次数，结果保存到文本文件中。保存的格式为：单词=出现次数，例如there=21，are=6。

分析：

1.数据源：读取的有英文单词的文本文件，是磁盘上的文件，所以数据源是硬盘上的纯文本文件，为了提高读取的效率，下面使用BufferedReader来读取文件；

2.数据目的：英文单词出现的次数保存到文本文件中，所以数据目的是硬盘上的纯文本文件，为了提高读取的效率，下面使用BufferedWriter将数据写入到文件中；

3.统计单词次数的格式为：单词=次数，这种格式符合键值对的形式，Map集合中的元素就是键值对的形式。所以，可以将单词作为键，单词出现的次数作为键值，插入到Map集合中。

代码如下，该代码涉及到IO、JavaAPI等内容：

package com.itheima.entranceExam.blog;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.util.Iterator;import java.util.Map;import java.util.Set;import java.util.TreeMap;//文件工具类class FileUtil {private static BufferedReader bufr = null;private static BufferedWriter bufw = null;// 创建TreeMap集合对象private static TreeMap<String, Integer> treeMap = new TreeMap<String, Integer>();// 统计英文单词出现的次数：参数in表示需要统计英文单词出现次数的文本文件,out表示将统计的结果输出到的文本文件public static void countWordTimes(File in, File out) {//计数器int count = 0;try {//创建缓冲输入流对象bufr = new BufferedReader(new FileReader(in));//字符数组，保存英文单词String[] words = new String[1024];String len = null;// 循环读取文本文件中的内容，一次读取一行数据while ((len = bufr.readLine()) != null) {//调用方法，将字符串分割成英文单词，返回这些单词组成的字符串数组words = englishWors(len);//变量字符串数组for (int i = 0; i < words.length; i++) {String key = words[i];//获取TreeMap对象中的键值Integer value = treeMap.get(key);if (!(value == null))count = value;count++;//单词作为键，单词出现的次数作为键值，插入到TreeMap集合中treeMap.put(key, count);count = 0;// 归零,避免本次键值加到他键的键值上}}} catch (Exception e) {throw new RuntimeException("缓冲输入流对象失败！");} finally {if (bufr != null) {try {bufr.close();//关闭缓冲输入流对象} catch (IOException e) {throw new RuntimeException("关闭缓冲输入流对象失败！");}}}writeToFile(out, treeMap);}//将Map集合中的键值对写入到文件public static void writeToFile(File out, TreeMap<String, Integer> treeMap) {try {//创建缓冲输出流对象bufw = new BufferedWriter(new FileWriter(out));//将TreeMap集合转换成Set集合，因为Set集合有iterator迭代器Set<Map.Entry<String, Integer>> entrySet = treeMap.entrySet();//迭代Set集合Iterator<Map.Entry<String, Integer>> it = entrySet.iterator();//遍历集合while (it.hasNext()) {Map.Entry<String, Integer> entry = it.next();//获取集合中的键String key = entry.getKey();//获取集合中的键值int value = entry.getValue();//将键和键值组成的键值对封装成字符串String len = new String(key+" = "+Integer.toString(value));bufw.write(len);//将字符串len写入到文件中bufw.newLine();//换行bufw.flush();//刷新缓冲区}} catch (IOException e) {throw new RuntimeException("缓冲输出流对象创建失败！");}finally {if (bufw != null) {try {bufw.close();//关闭缓冲输出流对象} catch (IOException e) {throw new RuntimeException("关闭缓冲输出流失败！");}}}}// 取出字符串中的英文单词public static String[] englishWors(String len) {//所有字母小写化len = len.toLowerCase(); //匹配非英文字符为空格len = len.replaceAll("[^A-Za-z]", " "); //将1到多个空格匹配为一个空格len = len.replaceAll("\\s+", " "); //以空格为分隔符分割字符串，分割结果存入字符串数组中作为单词String[] words = len.split("\\s+"); return words;}}//主函数public class WordsCount {public static void main(String[] args) throws IOException {// 文件分隔符String separator = File.separator;// 将硬盘上的文本文件初始化为文件对象，in是需要读取的文本文件，该文件中有英文单词File in = new File("D:" + separator + "JavaTest" + separator+ "Article.txt");//将硬盘上的文本文件初始化为文件对象，out文件用于存储统计的结果File out = new File("D:" + separator + "JavaTest" + separator+ "Result.txt");//调用统计英文单词次数的方法FileUtil.countWordTimes(in, out);//用记事本打开文本文件Runtime.getRuntime().exec("notepad "+out+"");}}