《数据结构学习与实验指导》5-8:迷你搜索引擎
来源:互联网 发布:电路图绘制软件 编辑:程序博客网 时间:2024/05/17 03:34
实验内容:实现一种简单的搜索引擎,快速满足多大10^5条关键字查询请求。
输入说明:输入首先给出正整数N(<=100),为文件总数。随后按以下格式给出每个文件的内容:第一行给出文件的标题,随后给出不超过100行的文件正文,最后一行中只给出一个字符”#”,表示文件结束。每行不超过50个字符。在N个文件内容结束之后,给出查询总数M(<=10^5),随后M行,每行给出不超过10个英文单词,其间以空格分隔,每个单词不超过10个英文字母,不区分大小写。
输出说明:针对每一条查询,首先在一行中输出包含全部该查询单词的文件总数;如果总数为0,则输出”Not Found“。如果有找到符合条件的文件,则按输入的先后顺序输出这些文件,格式为:第一行输出文件标题;随后顺序输出包含查询单词的那些行内容。注意不能把相同的一行重复输出。
测试用例:
A00
Gold
silver truck
#
A01
Shipment of gold
damaged
in a fire
#
A02
Delivery
of silver
arrived in
a silver
truck
#
A03
Shipment of gold
arrived in
a truck
#
2
what ever
silver truck 0
Not Found
2
A00
silver truck
A02
of silver
a silver
truck
#include <iostream>#include <cstdlib>#include <cstring>#define FileMaxN 100#define LineMaxLength 51#define LineMaxN 100#define P 100003typedef struct Line { char value[LineMaxLength];} *PLine;typedef struct File { char title[LineMaxLength]; int size; struct Line line[LineMaxN];} *PFile;typedef struct FileList { struct File file[FileMaxN]; int size;} *PFileList;typedef struct Record { int fileIndex; int lineIndex; struct Record *next;} *PRecord;typedef struct Node { char value[LineMaxLength]; int size; PRecord root;} *PNode;typedef struct HTable { struct Node arr[P]; int size;} *PHTable;int N;PFileList initFileList();void format(char *p);int hash(char s[]);PHTable initTable();void insertTable(PHTable table, char s[], int fileIndex, int lineIndex);void findWordFileIndex(PHTable table, char s[], int flag[]);void findAllFilesByLine(PHTable table, char s[], int flag[]);void findAllFileLinesByLine(PHTable table, char s[], int fileIndex, int flag[]);void printAllFileLinesByLine(PHTable table, char s[], PFileList fileList, int fileIndex);int main() { scanf("%d\n", &N); PFileList fileList = initFileList(); PHTable table = initTable(); char line[LineMaxLength]; for (int i = 0; i < N; i++) { PFile pFile = &(fileList->file[i]); gets(line); strcpy(pFile->title, line); pFile->size = 0; gets(line); while (strcmp(line, "#") != 0) { char word[LineMaxLength]; char *p1 = line; char *p2 = strchr(line, ' '); while (p2 != NULL) { strncpy(word, p1, p2 - p1); word[p2 - p1] = '\0'; if (strlen(word) > 0) { format(word); insertTable(table, word, i, pFile->size); } p1 = p2 + 1; if (*p1 != '\0') { p2 = strchr(p1, ' '); } else { p2 = NULL; } } if (strlen(p1) > 0) { strcpy(word, p1); format(word); insertTable(table, word, i, pFile->size); } strcpy(pFile->line[pFile->size].value, line); pFile->size++; gets(line); } } int M; scanf("%d\n", &M); for (int i = 0; i < M; i++) { gets(line); int flag[N]; for (int j = 0; j < N; j++) { flag[j] = 1; } findAllFilesByLine(table, line, flag); int sum = 0; for (int j = 0; j < N; j++) { if (flag[j] == 1) { sum++; } } if (sum == 0) { printf("0\nNot Found\n"); } else { printf("%d\n", sum); } for (int j = 0; j < N; j++) { if (flag[j] == 1) { printAllFileLinesByLine(table, line, fileList, j); } } } return 0;}PFileList initFileList() { PFileList list = (PFileList) malloc(sizeof(struct FileList)); list->size = N; return list;}void format(char *p) { while (*p != '\0') { if (isupper(*p)) { *p = tolower(*p); } p++; }}int hash(char s[]) { int hash = 0; char *p = s; while (*p != '\0' && (p - s) < 5) { hash += (hash << 5) + *p; p++; } return hash % P;}PHTable initTable() { PHTable table = (PHTable) malloc(sizeof(struct HTable)); table->size = 0; for (int i = 0; i < P; i++) { table->arr[i].size = 0; table->arr[i].root = (PRecord) malloc(sizeof(struct Node)); table->arr[i].root->next = NULL; } return table;}void insertTable(PHTable table, char s[], int fileIndex, int lineIndex) { int hCode = hash(s); while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) { hCode++; hCode %= P; } if (table->arr[hCode].size == 0) { strcpy(table->arr[hCode].value, s); table->size++; } PRecord p = table->arr[hCode].root; int flag = 1; while (p->next != NULL) { p = p->next; if (p->fileIndex == fileIndex && p->lineIndex == lineIndex) { flag = 0; break; } } if (flag) { PRecord nn = (PRecord) malloc(sizeof(struct Record)); nn->fileIndex = fileIndex; nn->lineIndex = lineIndex; nn->next = NULL; p->next = nn; table->arr[hCode].size++; }}void findWordFileIndex(PHTable table, char s[], int flag[]) { int hCode = hash(s); while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) { hCode++; hCode %= P; } int flag1[N]; for (int i = 0; i < N; i++) { flag1[i] = 0; } if (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) == 0) { PRecord p = table->arr[hCode].root; while (p->next != NULL) { p = p->next; if (flag1[p->fileIndex] == 0) { flag1[p->fileIndex] = 1; } } } for (int i = 0; i < N; i++) { if (flag[i] != flag1[i]) { flag[i] = 0; } }}void findAllFilesByLine(PHTable table, char s[], int flag[]) { char word[LineMaxLength]; char *p1 = s; char *p2 = strchr(p1, ' '); while (p2 != NULL) { strncpy(word, p1, p2 - p1); word[p2 - p1] = '\0'; format(word); findWordFileIndex(table, word, flag); p1 = p2 + 1; if (*p1 != '\0') { p2 = strchr(p1, ' '); } else { p2 = NULL; } } if (strlen(p1) > 0) { strcpy(word, p1); format(word); findWordFileIndex(table, word, flag); }}void findAllFileLinesByLine(PHTable table, char s[], int fileIndex, int flag[]) { int hCode = hash(s); while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) { hCode++; hCode %= P; } if (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) == 0) { PRecord p = table->arr[hCode].root; while (p->next != NULL) { p = p->next; if (p->fileIndex == fileIndex && flag[p->lineIndex] == 0) { flag[p->lineIndex] = 1; } } } }void printAllFileLinesByLine(PHTable table, char s[], PFileList fileList, int fileIndex) { printf("%s\n", fileList->file[fileIndex].title); int flag[fileList->file[fileIndex].size]; for (int i = 0; i < fileList->file[fileIndex].size; i++) { flag[i] = 0; } char word[LineMaxLength]; char *p1 = s; char *p2 = strchr(p1, ' '); while (p2 != NULL) { strncpy(word, p1, p2 - p1); word[p2 - p1] = '\0'; format(word); findAllFileLinesByLine(table, word, fileIndex, flag); p1 = p2 + 1; if (*p1 != '\0') { p2 = strchr(p1, ' '); } else { p2 = NULL; } } if (strlen(p1) > 0) { strcpy(word, p1); format(word); findAllFileLinesByLine(table, word, fileIndex, flag); } for (int i = 0; i < fileList->file[fileIndex].size; i++) { if (flag[i] == 1) { printf("%s\n", fileList->file[fileIndex].line[i]); } }}
阅读全文
0 0
- 《数据结构学习与实验指导》5-8:迷你搜索引擎
- 《数据结构学习与实验指导》3-8:堆栈模拟队列
- 《数据结构学习与实验指导》4-8:目录树
- 《数据结构学习与实验指导》4-5:家谱处理
- 《数据结构学习与实验指导》5-3:电话聊天狂人
- <数据结构学习与实验指导>2-1:简单计算器
- 《数据结构学习与实验指导》2-1:简单计算器
- 《数据结构学习与实验指导》2-6:数列求和
- 《数据结构学习与实验指导》2-7:素因子分解
- 《数据结构学习与实验指导》2-9:装箱问题模拟
- 《数据结构学习与实验指导》2-10:海盗分赃
- 《数据结构学习与实验指导》3-1:一元多项式求导
- 《数据结构学习与实验指导》3-6:表达式转换
- 《数据结构学习与实验指导》4-1:还原二叉树
- 《数据结构学习与实验指导》4-2:树种统计
- 《数据结构学习与实验指导》4-3:朋友圈
- 《数据结构学习与实验指导》4-4:Windows消息队列
- 《数据结构学习与实验指导》4-6:搜索树判断
- mysql二进制包安装
- java线程池深入二
- CentOS之SSH的安装与连接-yellowcong
- 建造第一个Tensorflow在Android上的用例
- 计算字符串最后一个单词的长度,单词以空格隔开。(华为在线编程题目)
- 《数据结构学习与实验指导》5-8:迷你搜索引擎
- js选择图片后直接展示
- html基础
- Anaconda基础使用指南
- 用python爬取微信公众号文章
- 最大公约数(公因子)多种java实现方法
- Scala 用Option[T] 避免NullPointerException
- 欢迎使用CSDN-markdown编辑器
- [杂题] Codechef SnackDown 2017 Onsite Final #MINIMAX Minimax