《数据结构学习与实验指导》5-8:迷你搜索引擎

来源:互联网 发布:电路图绘制软件 编辑:程序博客网 时间:2024/05/17 03:34

实验内容:实现一种简单的搜索引擎,快速满足多大10^5条关键字查询请求。
输入说明:输入首先给出正整数N(<=100),为文件总数。随后按以下格式给出每个文件的内容:第一行给出文件的标题,随后给出不超过100行的文件正文,最后一行中只给出一个字符”#”,表示文件结束。每行不超过50个字符。在N个文件内容结束之后,给出查询总数M(<=10^5),随后M行,每行给出不超过10个英文单词,其间以空格分隔,每个单词不超过10个英文字母,不区分大小写。
输出说明:针对每一条查询,首先在一行中输出包含全部该查询单词的文件总数;如果总数为0,则输出”Not Found“。如果有找到符合条件的文件,则按输入的先后顺序输出这些文件,格式为:第一行输出文件标题;随后顺序输出包含查询单词的那些行内容。注意不能把相同的一行重复输出。
测试用例:

输入 输出 4
A00
Gold
silver truck
#
A01
Shipment of gold
damaged
in a fire
#
A02
Delivery
of silver
arrived in
a silver
truck
#
A03
Shipment of gold
arrived in
a truck
#
2
what ever
silver truck 0
Not Found
2
A00
silver truck
A02
of silver
a silver
truck
#include <iostream>#include <cstdlib>#include <cstring>#define FileMaxN 100#define LineMaxLength 51#define LineMaxN 100#define P 100003typedef struct Line {    char value[LineMaxLength];} *PLine;typedef struct File {    char title[LineMaxLength];    int size;    struct Line line[LineMaxN];} *PFile;typedef struct FileList {    struct File file[FileMaxN];    int size;} *PFileList;typedef struct Record {    int fileIndex;    int lineIndex;    struct Record *next;} *PRecord;typedef struct Node {    char value[LineMaxLength];    int size;    PRecord root;} *PNode;typedef struct HTable {    struct Node arr[P];    int size;} *PHTable;int N;PFileList initFileList();void format(char *p);int hash(char s[]);PHTable initTable();void insertTable(PHTable table, char s[], int fileIndex, int lineIndex);void findWordFileIndex(PHTable table, char s[], int flag[]);void findAllFilesByLine(PHTable table, char s[], int flag[]);void findAllFileLinesByLine(PHTable table, char s[], int fileIndex, int flag[]);void printAllFileLinesByLine(PHTable table, char s[], PFileList fileList, int fileIndex);int main() {    scanf("%d\n", &N);    PFileList fileList = initFileList();    PHTable table = initTable();    char line[LineMaxLength];    for (int i = 0; i < N; i++) {        PFile pFile = &(fileList->file[i]);        gets(line);        strcpy(pFile->title, line);        pFile->size = 0;        gets(line);        while (strcmp(line, "#") != 0) {             char word[LineMaxLength];            char *p1 = line;            char *p2 = strchr(line, ' ');            while (p2 != NULL) {                strncpy(word, p1, p2 - p1);                word[p2 - p1] = '\0';                if (strlen(word) > 0) {                    format(word);                    insertTable(table, word, i, pFile->size);                }                p1 = p2 + 1;                if (*p1 != '\0') {                    p2 = strchr(p1, ' ');                } else {                    p2 = NULL;                }            }            if (strlen(p1) > 0) {                strcpy(word, p1);                format(word);                insertTable(table, word, i, pFile->size);            }            strcpy(pFile->line[pFile->size].value, line);            pFile->size++;            gets(line);        }    }    int M;    scanf("%d\n", &M);    for (int i = 0; i < M; i++) {        gets(line);        int flag[N];        for (int j = 0; j < N; j++) {            flag[j] = 1;        }         findAllFilesByLine(table, line, flag);        int sum = 0;        for (int j = 0; j < N; j++) {            if (flag[j] == 1) {                sum++;            }        }        if (sum == 0) {            printf("0\nNot Found\n");        } else {            printf("%d\n", sum);        }        for (int j = 0; j < N; j++) {            if (flag[j] == 1) {                printAllFileLinesByLine(table, line, fileList, j);            }        }    }    return 0;}PFileList initFileList() {    PFileList list = (PFileList) malloc(sizeof(struct FileList));    list->size = N;    return list;}void format(char *p) {    while (*p != '\0') {        if (isupper(*p)) {            *p = tolower(*p);        }        p++;    }}int hash(char s[]) {    int hash = 0;    char *p = s;    while (*p != '\0' && (p - s) < 5) {        hash += (hash << 5) + *p;        p++;    }    return hash % P;}PHTable initTable() {    PHTable table = (PHTable) malloc(sizeof(struct HTable));    table->size = 0;    for (int i = 0; i < P; i++) {        table->arr[i].size = 0;        table->arr[i].root = (PRecord) malloc(sizeof(struct Node));        table->arr[i].root->next = NULL;    }    return table;}void insertTable(PHTable table, char s[], int fileIndex, int lineIndex) {    int hCode = hash(s);    while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) {        hCode++;        hCode %= P;    }    if (table->arr[hCode].size == 0) {        strcpy(table->arr[hCode].value, s);        table->size++;    }    PRecord p = table->arr[hCode].root;    int flag = 1;    while (p->next != NULL) {        p = p->next;        if (p->fileIndex == fileIndex && p->lineIndex == lineIndex) {            flag = 0;            break;        }    }    if (flag) {        PRecord nn = (PRecord) malloc(sizeof(struct Record));        nn->fileIndex = fileIndex;        nn->lineIndex = lineIndex;        nn->next = NULL;        p->next = nn;        table->arr[hCode].size++;    }}void findWordFileIndex(PHTable table, char s[], int flag[]) {    int hCode = hash(s);    while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) {        hCode++;        hCode %= P;    }    int flag1[N];    for (int i = 0; i < N; i++) {        flag1[i] = 0;    }     if (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) == 0) {        PRecord p = table->arr[hCode].root;        while (p->next != NULL) {            p = p->next;            if (flag1[p->fileIndex] == 0) {                flag1[p->fileIndex] = 1;            }        }    }     for (int i = 0; i < N; i++) {        if (flag[i] != flag1[i]) {            flag[i] = 0;        }    }}void findAllFilesByLine(PHTable table, char s[], int flag[]) {    char word[LineMaxLength];    char *p1 = s;    char *p2 = strchr(p1, ' ');    while (p2 != NULL) {        strncpy(word, p1, p2 - p1);        word[p2 - p1] = '\0';        format(word);        findWordFileIndex(table, word, flag);        p1 = p2 + 1;        if (*p1 != '\0') {            p2 = strchr(p1, ' ');        } else {            p2 = NULL;        }    }    if (strlen(p1) > 0) {        strcpy(word, p1);        format(word);        findWordFileIndex(table, word, flag);    }}void findAllFileLinesByLine(PHTable table, char s[], int fileIndex, int flag[]) {    int hCode = hash(s);    while (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) != 0) {        hCode++;        hCode %= P;    }     if (table->arr[hCode].size != 0 && strcmp(table->arr[hCode].value, s) == 0) {        PRecord p = table->arr[hCode].root;        while (p->next != NULL) {            p = p->next;            if (p->fileIndex == fileIndex && flag[p->lineIndex] == 0) {                flag[p->lineIndex] = 1;            }        }    } }void printAllFileLinesByLine(PHTable table, char s[], PFileList fileList, int fileIndex) {    printf("%s\n", fileList->file[fileIndex].title);    int flag[fileList->file[fileIndex].size];    for (int i = 0; i < fileList->file[fileIndex].size; i++) {        flag[i] = 0;    }    char word[LineMaxLength];    char *p1 = s;    char *p2 = strchr(p1, ' ');    while (p2 != NULL) {        strncpy(word, p1, p2 - p1);        word[p2 - p1] = '\0';        format(word);        findAllFileLinesByLine(table, word, fileIndex, flag);        p1 = p2 + 1;        if (*p1 != '\0') {            p2 = strchr(p1, ' ');        } else {            p2 = NULL;        }    }    if (strlen(p1) > 0) {        strcpy(word, p1);        format(word);        findAllFileLinesByLine(table, word, fileIndex, flag);    }    for (int i = 0; i < fileList->file[fileIndex].size; i++) {        if (flag[i] == 1) {            printf("%s\n", fileList->file[fileIndex].line[i]);        }    }}
阅读全文
0 0