PAT 基于词频的文件相似度 (set) -- 解题报告

来源：互联网发布：自选股软件下载编辑：程序博客网时间：2024/06/05 14:40

解题思路

每个文件内的单词存放到单独的一个 set 中。询问时直接遍历其中一个 set（必须是 size 较小的那个，否则会超时在最后一个测试点），用 count() 查找另一个 set 中存不存在这个单词即可。做法类似 PAT 上另一道题目「集合相似度」（连示例输出都几乎是一样的）。

参考代码

#include <cstdio>#include <cstring>#include <algorithm>#include <set>#include <string>using namespace std;int main(int argc, char const *argv[]) {    int n, m, a, b;    char s[2000], tmp[2000];    set<string> st[101];    scanf("%d%*c", &n);    for(int i=1; i<=n; ++i) {        while(gets(s)) {            if(!strcmp(s, "#")) break;            int last = 0;            for(int j=0; s[j]; ++j) {                if(s[j]>='A' && s[j]<='Z') s[j] += 32;                // 分割单词，符合要求的存进 set                if(s[j]<'a' || s[j]>'z') {                    s[j] = 0;                    strcpy(tmp, s+last);                    last = j+1;                    tmp[10] = 0;                    if(strlen(tmp) >= 3) st[i].insert(tmp);                }            }            strcpy(tmp, s+last);            tmp[10] = 0;            if(strlen(tmp) >= 3) st[i].insert(tmp);        }    }    scanf("%d", &m);    while(m--) {        scanf("%d %d", &a, &b);        int cnt = 0;        set<string>::iterator it;        // !important: 遍历 size 较小的 set        if(st[a].size() > st[b].size()) swap(a, b);        for(it=st[a].begin(); it!=st[a].end(); ++it) {            if(st[b].count(*it)) cnt++;        }        printf("%.1f%%\n", 100.0*cnt/(st[a].size()+st[b].size()-cnt));    }    return 0;}

1 0