软件工程个人项目--词频统计

来源:互联网 发布:交大知行大厦附近租房 编辑:程序博客网 时间:2024/04/29 16:08

姓名:鞠凡     班级:0411202       学号:2012211565

一.题目

     题目:分析一个文本文件中各个词出现的频率,并且把频率最高的10个单词打印出来,文本文件大小大约30kb---300kb大小。

二.程序结果

三.VS性能分析

1.程序总计用时大概在4秒左右,峰值占用CPU5%,还算比较有效率。


2.热路径

可见main函数占比最高


3.执行单个工作最多的函数


4.可以查看每个函数的情况


5.函数调用的情况



四.源代码

#include "stdafx.h"#include<stdio.h>#include<stdlib.h>#include<ctype.h>#include<string.h>#include <fcntl.h> #include <io.h>#define N 20#define M 100000#define STACK_INIT_SIZE 100typedef struct ntree{char a[N];int i;struct ntree *left;struct ntree *right;}tree;typedef struct WordNumber{char a[N];int i;struct WordNumber *left;struct WordNumber *right;}WN;typedef struct {tree **base;tree **top;int stacksize;}sqstack;int sign = 0, sum = 0, n;char ko[N];WN wonu[M];int traverse(tree *);int initstack(sqstack *S);tree *push(sqstack *s, tree *p);tree *pop(sqstack *s);int createtree(tree *, char *, char *, long, long);int jfcmp(char *, char *, int);int sort();//主函数int main(){tree root;FILE *fp;long at, fg;char ch;int i;char t[M], wd[N];printf("                       准备扫描文章\n");fp = fopen("f:\\word.txt", "rt");if (fp == NULL){printf("文件不存在\n");return 0;}at = 0;do{/* read a char from the file */ch = fgetc(fp);if (isprint(ch)){ t[at] = ch, at++; }} while (ch != EOF);fclose(fp);for (i = 0; i<N; i++)ko[i] = 0;printf("          按字典顺序查看单词统计结果按0\n");printf("          按单词出现频率顺序查看统计结果按1\n          输入数字:");scanf("%d", &n);fg = 0;i = 0;strncpy(wd, ko, N);while (fg<at){if (isalpha(t[fg])){wd[i] = t[fg];i++;}if (t[fg] == 32 && i>0)break;fg++;}strncpy(root.a, ko, N);strcpy(root.a, wd);root.i = 1;root.left = NULL;root.right = NULL;i = 0;strncpy(wd, ko, N);while (fg<at){if (isalpha(t[fg])){wd[i] = t[fg];i++;}if (t[fg] == 32 && i>0)break;fg++;}createtree(&root, wd, t, fg, at);traverse(&root);if (n == 1)sort();printf("在此文章中出现的单词数目是%d\n", sum);return 0;}//创建一棵二叉查找树int createtree(tree *r, char *wd, char *t, long fg, long at){tree *p, *q;int i, j;while (1){p = r;while (p != NULL){j = jfcmp(wd, p->a, N);if (j<0){q = p;p = p->left;if (p == NULL){p = (tree *)malloc(sizeof(tree));strncpy(p->a, ko, N);strncpy(p->a, wd, N);p->i = 1;p->left = NULL;p->right = NULL;q->left = p;break;}}if (j>0){q = p;p = p->right;if (p == NULL){p = (tree *)malloc(sizeof(tree));strncpy(p->a, ko, N);strncpy(p->a, wd, N);p->i = 1;p->left = NULL;p->right = NULL;q->right = p;break;}}if (j == 0){p->i++;break;}}i = 0;strncpy(wd, ko, N);while (fg<at){if (isalpha(t[fg])){wd[i] = t[fg];i++;}if (t[fg] == 32 && i>0)break;fg++;if (fg >= at)return 0;}}return 0;}//比较两个字符串的大小(字典中)int jfcmp(char *a, char *b, int n){int i;for (i = 0; i<N; i++){if (a[i] == b[i])i++;if (a[i]<b[i])return -1;if (a[i]>b[i])return 1;}return 0;}//中序遍历一棵二叉树,非递归实现。int traverse(tree *r){tree *p, *q;sqstack l;initstack(&l);p = r;push(&l, p);while (p == r || l.base != l.top){if (p->left != NULL){push(&l, p->left);q = p;p = p->left;q->left = NULL;}else{p = pop(&l);if (n == 0){printf("%-6d", sign);printf("%-20s次数-->", p->a);printf("%-6d\n", p->i);}sum += p->i;strncpy(wonu[sign].a, ko, N);strncpy(wonu[sign].a, p->a, N);wonu[sign].i = p->i;sign++;if (p->right != NULL){push(&l, p->right);q = p;p = p->right;q->right = NULL;}}}return 0;}int initstack(sqstack *S){S->base = (tree **)malloc(STACK_INIT_SIZE*sizeof(tree));if (!S->base)exit(-1);S->top = S->base;S->stacksize = STACK_INIT_SIZE;return 0;}tree *push(sqstack *s, tree *p){*(s->top) = p;s->top++;return 0;}tree *pop(sqstack *s){tree *p;if (s->top == s->base)return 0;else   p = *(--s->top);return p;}int sort(){int i, j, temp;char s[20];for (i = 0; i<sign - 1; i++)for (j = 0; j<sign - i - 1; j++){if (wonu[j].i<wonu[j + 1].i){strncpy(s, wonu[i].a, N);strncpy(wonu[j].a, wonu[j + 1].a, N);strncpy(wonu[j + 1].a, s, N);temp = wonu[j].i;wonu[j].i = wonu[j + 1].i;wonu[j + 1].i = temp;}}for (i = 0; i<11; i++){printf("%-20s次数-->%d\n", wonu[i].a, wonu[i].i);}return 0;}

五、心得体会

 通过这次个人项目的完成,自己对VS的熟练程度又上了一个档次,特别是刚开始完成性能分析只是应付任务,以至于进程中出现了“搜狗拼音”都不知道。被老师指出后才惭愧得下来重新分析,并仔细去看分析的结果,这样才有助于代码的优化再优化。


0 0
原创粉丝点击