编程珠玑 Pearls(15)

来源:互联网 发布:国外问卷调查赚钱知乎 编辑:程序博客网 时间:2024/06/06 17:45

[TOC]

15.1为文档中包含的单词生成一个列表

#include <iostream>#include <set>#include <string>using namespace std;int main(){    set<string> S;    string t;     set<string>::iterator j;    while (cin >> t)        S.insert(t);    for (j = S.begin(); j != S.end(); ++j)        cout << *j << "\n";    return 0;}

对文档中每个单词出现的次数做统计

/* Copyright (C) 1999 Lucent Technologies *//* From 'Programming Pearls' by Jon Bentley *//* wordfreq.cpp -- List all words in input file, with counts */#include <iostream>#include <map>#include <string>using namespace std;int main(){    map<string, int> M;     map<string, int>::iterator j;    string t;    while (cin >> t)        M[t]++;    for (j = M.begin(); j != M.end(); ++j)        cout << j->first << " " << j->second << "\n";    return 0;}

使用自定义的散列表,对文档中每个单词出现的次数做统计

/* Copyright (C) 1999 Lucent Technologies *//* From 'Programming Pearls' by Jon Bentley *//* wordfreq.c -- list of words in file, with counts */#include <stdio.h>#include <stdlib.h>#include <string.h>typedef struct node *nodeptr;typedef struct node {    char *word;    int count;    nodeptr next;} node;#define NHASH 29989#define MULT 31nodeptr bin[NHASH];unsigned int hash(char *p){    unsigned int h = 0;    for ( ; *p; p++)        h = MULT * h + *p;    return h % NHASH;}#define NODEGROUP 1000int nodesleft = 0;nodeptr freenode;nodeptr nmalloc(){    if (nodesleft == 0) {        freenode = malloc(NODEGROUP*sizeof(node));        nodesleft = NODEGROUP;    }    nodesleft--;    return freenode++;}#define CHARGROUP 10000int charsleft = 0;char *freechar;char *smalloc(int n){    if (charsleft < n) {        freechar = malloc(n+CHARGROUP);        charsleft = n+CHARGROUP;    }    charsleft -= n;    freechar += n;    return freechar - n;}void incword(char *s){    nodeptr p;    int h = hash(s);    for (p = bin[h]; p != NULL; p = p->next)        if (strcmp(s, p->word) == 0) {            (p->count)++;            return;        }    p = nmalloc();    p->count = 1;    p->word = smalloc(strlen(s)+1);    strcpy(p->word, s);    p->next = bin[h];    bin[h] = p;}int main(){    int i;    nodeptr p;    char buf[100];    for (i = 0; i < NHASH; i++)        bin[i] = NULL;    while (scanf("%s", buf) != EOF)        incword(buf);    for (i = 0; i < NHASH; i++)        for (p = bin[i]; p != NULL; p = p->next)            printf("%s %d\n", p->word, p->count);    return 0;}

15.2 短语

/* Copyright (C) 1999 Lucent Technologies *//* From 'Programming Pearls' by Jon Bentley *//* longdup.c -- Print longest string duplicated M times */#include <stdlib.h>#include <string.h>#include <stdio.h>//比较函数 int pstrcmp(char **p, char **q){   return strcmp(*p, *q); }//返回两个参数字符串中共同部分的长度int comlen(char *p, char *q){    int i = 0;    while (*p && (*p++ == *q++))        i++;    return i;}#define M 1#define MAXN 5000000char c[MAXN], *a[MAXN];int main(){   int i, ch, n = 0, maxi, maxlen = -1;    while ((ch = getchar()) != EOF) {        a[n] = &c[n];        c[n++] = ch;    }    c[n] = 0;    for(i = 0;i < n;i ++)        printf("a[%d] = %s",i,a[i]);     qsort(a, n, sizeof(char *), pstrcmp);    for(i = 0;i < n;i ++)        printf("a[%d] = %s\n",i,a[i]);     for (i = 0; i < n-M; i++)        if (comlen(a[i], a[i+M]) > maxlen) {            maxlen = comlen(a[i], a[i+M]);            maxi = i;        }    //printf("maxi = %d, maxlen = %d, %s\n", maxi, maxlen, a[maxi]);    printf("%.*s\n", maxlen, a[maxi]);    //printf("%s\n",a[maxi]);    return 0;}

15.3 生成文本

/* Copyright (C) 2000 Lucent Technologies *//* Modified from markov.c in 'Programming Pearls' by Jon Bentley *//* markovlet.c -- generate letter-level random text from input text    Alg: Store text in an array on input         Scan complete text for each output character            (Randomly select one matching k-gram) */#include <stdio.h>#include <stdlib.h>char x[5000000];int main(){    int c, i, eqsofar, max, n = 0, k = 5;    char *p, *nextp, *q;    while ((c = getchar()) != EOF)        x[n++] = c;    x[n] = 0;    p = x;    srand(1);    for (max = 2000; max > 0; max--) {        eqsofar = 0;        for (q = x; q < x + n - k + 1; q++) {            for (i = 0; i < k && *(p+i) == *(q+i); i++)                ;            if (i == k)                if (rand() % ++eqsofar == 0)                    nextp = q;        }        c = *(nextp+k);        if (c == 0)            break;        putchar(c);        p = nextp+1;    }    return 0;}
/* Copyright (C) 1999 Lucent Technologies *//* From 'Programming Pearls' by Jon Bentley *//* markov.c -- generate random text from input document */#include <stdio.h>#include <stdlib.h>#include <string.h>char inputchars[4300000];char *word[800000];int nword = 0;int k = 2;int wordncmp(char *p, char* q){    int n = k;    for ( ; *p == *q; p++, q++)        if (*p == 0 && --n == 0)            return 0;    return *p - *q;}int sortcmp(char **p, char **q){    return wordncmp(*p, *q);}char *skip(char *p, int n){    for ( ; n > 0; p++)        if (*p == 0)            n--;    return p;}int main(){    int i, wordsleft = 10000, l, m, u;    char *phrase, *p;    word[0] = inputchars;    while (scanf("%s", word[nword]) != EOF) {        word[nword+1] = word[nword] + strlen(word[nword]) + 1;        nword++;    }    for (i = 0; i < k; i++)        word[nword][i] = 0;    for (i = 0; i < k; i++)        printf("%s\n", word[i]);    qsort(word, nword, sizeof(word[0]), sortcmp);    phrase = inputchars;    for ( ; wordsleft > 0; wordsleft--) {        l = -1;        u = nword;        while (l+1 != u) {            m = (l + u) / 2;            if (wordncmp(word[m], phrase) < 0)                l = m;            else                u = m;        }        for (i = 0; wordncmp(phrase, word[u+i]) == 0; i++)            if (rand() % (i+1) == 0)                p = word[u+i];        phrase = skip(p, 1);        if (strlen(skip(phrase, k-1)) == 0)            break;        printf("%s\n", skip(phrase, k-1));    }    return 0;}
/* Copyright (C) 1999 Lucent Technologies *//* From 'Programming Pearls' by Jon Bentley *//* markovhash.c -- generate random text, sped up with hash tables *//* For storage efficiency (and also to minimize changes from markov.c),   the hash table is implemented in the integer array next.   If bin[i]=j, then word[j] is the first element in the list,   word[next[j]] is the next element, and so on. */#include <stdio.h>#include <stdlib.h>#include <string.h>char inputchars[4300000];#define MAXWORDS 800000char *word[MAXWORDS];int nword = 0;int k = 2;int next[MAXWORDS];#define NHASH 499979int bin[NHASH];#define MULT 31unsigned int hash(char *ptr){    unsigned int h = 0;    unsigned char *p = ptr;    int n;    for (n = k; n > 0; p++) {        h = MULT * h + *p;        if (*p == 0)            n--;    }    return h % NHASH;}int wordncmp(char *p, char* q){    int n = k;    for ( ; *p == *q; p++, q++)        if (*p == 0 && --n == 0)            return 0;    return *p - *q;}int sortcmp(char **p, char **q){    return wordncmp(*p, *q);}char *skip(char *p, int n){    for ( ; n > 0; p++)        if (*p == 0)            n--;    return p;}int main(){    int i, wordsleft = 10000, j;    char *phrase, *p;    word[0] = inputchars;    while (scanf("%s", word[nword]) != EOF) {        word[nword+1] = word[nword] + strlen(word[nword]) + 1;        nword++;    }    for (i = 0; i < k; i++)        word[nword][i] = 0;    for (i = 0; i < NHASH; i++)        bin[i] = -1;    for (i = 0; i <= nword - k; i++) { /* check */        j = hash(word[i]);        next[i] = bin[j];        bin[j] = i;    }    for (i = 0; i < k; i++)        printf("%s\n", word[i]);    phrase = inputchars;    for ( ; wordsleft > 0; wordsleft--) {        i = 0;        for (j = bin[hash(phrase)]; j >= 0; j = next[j])            if ((wordncmp(phrase, word[j]) == 0)                && (rand() % (++i) == 0))                p = word[j];        phrase = skip(p, 1);        if (strlen(skip(phrase, k-1)) == 0)            break;        printf("%s\n", skip(phrase, k-1));    }    return 0;}
原创粉丝点击