分析一个文本文件中各个单词出现的频率,把频率最高的10个词打印出来

来源:互联网 发布:手机淘宝首页登陆 编辑:程序博客网 时间:2024/05/17 07:46

作业要求:分析一个文本文件中各个单词出现的频率,把频率最高的10个词打印出来


源代码提交:

#include<iostream>
#include<iomanip>
#include<time.h>
using namespace std;
#define M 20000
//文章单词个数


typedef struct
{
char danci[19];//储存单词
int count;//记录单词个数,后面出现几次
}sq;


void main()
{
double start, finish;
start = (double)clock();
sq word[M];
sq t_word;
double s, f;
int K, n = 0, i, j;
char infile[10];
s = (double)clock();
cout << "***********请输入文件路径:***********" << endl;
cin >> infile;//文件路径输入    
f = (double)clock();//
cout << "输入文件路径的时间:" << (f - s) / 1000 << "s" << endl;


FILE *fp;
char ch;
//fp=fopen("d://pro.txt","r");
if ((fp = fopen(infile, "r")) == NULL)
{
cout << "无法打开文件!" << endl;
exit(0);
}
s = (double)clock();
while (!feof(fp))
{
ch = getc(fp);
if (ch == ' ' || ch == 10)
{//虑空
continue;
}
if ((ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z'))              //发现一个单词
{
K = 0;
t_word.count = 1;
while ((ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z') || (ch == '\''))
{
if (ch >= 'A'&&ch <= 'Z')
ch += 32;//转换成小写
t_word.danci[K++] = ch;
ch = getc(fp);
}
t_word.danci[K++] = '\0';


//一个单词结束
j = n;


for (i = 0; i<j; i++) //与前面的单词比较
{
if (strcmp(t_word.danci, word[i].danci) == 0)
{
word[i].count++;
break;
}


}


if (n == 0 || i == j)
{
word[n] = t_word;
n++;
}


}
}
f = (double)clock();
cout << "读文件,分出单词并统计的时间:" << (f - s) / 1000 << "s" << endl;


s = (double)clock();
//输出频率最高的十个单词
sq frequency_max[11];
sq temp;
for (i = 0; i<10; i++)
{
frequency_max[i] = word[i];//初始化频率最高的十个单词为前十个单词
}
//前十个排序


for (j = 0; j<10; j++)
for (i = 0; i<10 - j; i++)
if (frequency_max[i].count<frequency_max[i + 1].count)
{
temp = frequency_max[i];
frequency_max[i] = frequency_max[i + 1];
frequency_max[i + 1] = temp;
}
for (i = 10; i<n; i++)
{
if (frequency_max[9].count<word[i].count)
{
int a = 8;
while (frequency_max[a].count<word[i].count&&a >= 0)
{
a--;
}


for (j = 9; j>a + 1; j--)
{
frequency_max[j] = frequency_max[j - 1];
}
if (a<0)
frequency_max[0] = word[i];
else
frequency_max[j] = word[i];
}
}
f = (double)clock();
cout << "冒泡排序,搜索频率最高的10个单词的时间:" << (f - s) << "ms" << endl;
for (i = 0; i<10; i++)
{
cout << setiosflags(ios::left) << setw(10) << frequency_max[i].danci << frequency_max[i].count << endl;
}
finish = (double)clock();
cout << "总运行时间:" << (finish - start) / 1000 << "s" << endl;

}


结果截图:



性能分析:







0 0
原创粉丝点击