淘宝研究院的面试题

来源：互联网发布：ubuntu怎么下载qq 编辑：程序博客网时间：2024/05/21 08:41

声明：本博文之所有权属于当年小丑，转载注明出处。没有当年小丑的书面许可，不得用于商业利益。

1. 有无序的实数列V[N]，要求求里面大小相邻的实数的差的最大值，关键是要求线性空间和线性时间

2. 25匹赛马，5个跑道，也就是说每次有5匹马可以同时比赛。问最少比赛多少次可以知道跑得最快的5匹马

3. 有一个函数int getNum()，每运行一次可以从一个数组V[N]里面取出一个数，N未知，当数取完的时候，函数返回NULL。现在要求写一个函数int get()，这个函数运行一次可以从V[N]里随机取出一个数，而这个数必须是符合1/N平均分布的，也就是说V[N]里面任意一个数都有1/N的机会被取出，要求空间复杂度为O（1）

分析请见这里

第一题：参考了帖子的思想：鸽巢原理。用桶来做可以保证时间和空间都是线性的。我的实现中在hash表中添加两个字段，分别保存映射到同一个桶的元素中的最大值和最小值，也就是把输入放到桶中后的每个桶实际的上届和下界。这样做可以不必将输入的元素放到桶里去（我的实现中放进去了，这是多余的）。

产生数据：

/*file: data.c*/#include <stdio.h>#include <time.h>int main(){    FILE * filein, * fileout;int n,i;    filein = freopen ("in.txt", "r", stdin);    fileout = freopen ("outf.txt", "w", stdout);srand (time (NULL));    scanf ("%d", &n);    printf ("%d\n",n);for (i=0; i<n; i++){       printf ("%d\t", rand());       if (9==i%10){           puts ("");   }}puts ("");fclose (filein);fclose (fileout);return 0;}

/* file: max_neighbor_real_diff.c *//* compute the max differences of two neighbor elements in a reals sequence */#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <time.h>/* find the max difference between two negibour floats */float max_diff (float flt[], int n, float * f_max, float * f_min);int main(){    float * a;    float f,f_max,f_min;    int i;    int n;#ifdef DEBUG1    FILE * filein = freopen ("outf.txt", "r", stdin);    assert (NULL!=filein);#endif    scanf ("%d", &n);    assert (1<n);    a = (float *) malloc (sizeof (float) * n);    assert (NULL!=a);    for (i=0; i<n; i++){        scanf ("%f", &a[i]);    }    puts ("array:\n");    for (i=0; i<n; i++){        printf ("%6.1f\t", a[i]);        if (9==i%10){            puts ("");        }    }    puts ("");        f = max_diff (a, n, &f_max, &f_min);    printf ("after calcluating:\nmax_diff = %6.1f\nf_max = %6.1f\nf_min = %6.1f\n",f, f_max, f_min);        if (a){        free (a);    }#ifdef DEBUG1    fclose (filein);#endif    return 0;}float max_diff (float flt[], int n, float * f_max, float * f_min){    float left_max, right_min, tmp_diff, max_dif;    float max,min;    float avg_diff;    int * nat;    int * bucket;    int bucket_num;    int i;    struct hash_item_node{        float f;        struct hash_item_node * next_node;    };    struct hash_item_node * hash_node_p;    struct hash_item_node * hash_node_q;    struct hash_table_item{        float fmax; /* maximal element */        float fmin; /* minimal element */        struct hash_item_node * next_item;    };    struct hash_table_item * hash_table;    assert (1<n);        /* find the maximal and minimal elements */    for (max = min = flt[0],i=1; i<n; i++){       if (max < flt[i]){           max = flt[i];       }       if (min > flt[i]){           min = flt[i];       }    }    /* average difference of all the elements */    avg_diff = (max - min)/(n-1);   /* hash table index: mapping each element to a natural numbers */    nat = (int *) malloc (sizeof (int) * n);    assert (NULL != nat);    for (i=0; i<n; i++){        nat[i] = (int)((flt[i] - min)/avg_diff);    }    /* create a hash table */    hash_table = (struct hash_table_item *) malloc (sizeof (struct hash_table_item) * n);    assert (NULL!=hash_table);    /* initialize the hash table */    for (i=0; i<n; i++){        hash_table[i].fmax = 0.0f;        hash_table[i].fmin = 0.0f;        hash_table[i].next_item = NULL;    }        /* put the array into the hash table */    for (i=0; i<n; i++){        /* create a new hash_item_node with value flt[i] */        hash_node_p = (struct hash_item_node *) malloc (sizeof (struct hash_item_node));        assert (NULL!=hash_node_p);        hash_node_p->f = flt[i];        hash_node_p->next_node = NULL;                /* insert the newly created node into hash table */        if (NULL==hash_table[nat[i]].next_item){            hash_table[nat[i]].next_item = hash_node_p;            hash_table[nat[i]].fmax = hash_node_p->f; /* recored the maximal element */            hash_table[nat[i]].fmin = hash_node_p->f; /* recored the minimal element */}        else{            if (hash_table[nat[i]].fmax < hash_node_p->f){                hash_table[nat[i]].fmax = hash_node_p->f; /* recored the maximal element */}            if (hash_table[nat[i]].fmin > hash_node_p->f){                hash_table[nat[i]].fmin = hash_node_p->f; /* recored the minimal element */}            hash_node_p->next_node = hash_table[nat[i]].next_item;            hash_table[nat[i]].next_item = hash_node_p;}        hash_node_p = NULL;    }    /* recored the nonempty hash table item */    bucket_num = 0;    bucket = (int *) malloc (sizeof (int)* n);    assert (NULL!=bucket);    for (i=0; i<n; i++){        if (NULL!=hash_table[i].next_item){            bucket[bucket_num++]=i;}    }    /* scan the hash table and computing the maximal neighbor difference */    max_dif = 0.0f;    for (i=0; i<bucket_num-1; i++){        tmp_diff = hash_table[bucket[i+1]].fmin - hash_table[bucket[i]].fmax;        if (max_dif < tmp_diff){             max_dif = tmp_diff;            left_max = hash_table[bucket[i]].fmax;            right_min = hash_table[bucket[i+1]].fmin;}    }    /* memory free */    for (i=0; i<bucket_num; i++){        hash_node_p = hash_table[bucket[i]].next_item;        while (NULL!=hash_node_p){            hash_node_q = hash_node_p->next_node;            free (hash_node_p);            hash_node_p = hash_node_q;        }    }    if (hash_table){        free (hash_table);    }    if (nat){        free (nat);    }    if (bucket){        free (bucket);    }    /* return */    *f_max = right_min;    *f_min = left_max;    return max_dif;}

删去冗余：

float max_diff (float flt[], int n, float * f_max, float * f_min){    float left_max, right_min, tmp_diff, max_dif;    float max,min;    float avg_diff;    int * nat;    int * bucket;    int bucket_num;    int i;    struct hash_table_item{        float fmax; /* maximal element */        float fmin; /* minimal element */        int empty;  /* 1 indicate empty, 0 indicate NOT empty */    };    struct hash_table_item * hash_table;    assert (1<n);        /* find the maximal and minimal elements */    for (max = min = flt[0],i=1; i<n; i++){       if (max < flt[i]){           max = flt[i];       }       if (min > flt[i]){           min = flt[i];       }    }    /* average difference of all the elements */    avg_diff = (max - min)/(n-1);   /* hash table index: mapping each element to a natural numbers */    nat = (int *) malloc (sizeof (int) * n);    assert (NULL != nat);    for (i=0; i<n; i++){        nat[i] = (int)((flt[i] - min)/avg_diff);    }    /* create a hash table */    hash_table = (struct hash_table_item *) malloc (sizeof (struct hash_table_item) * n);    assert (NULL!=hash_table);    /* initialize the hash table */    for (i=0; i<n; i++){        hash_table[i].fmax = 0.0f;        hash_table[i].fmin = 0.0f;        hash_table[i].empty = 1;    }        /* put the array into the hash table */    for (i=0; i<n; i++){        /* use the reals to update each hash item */        if (1==hash_table[nat[i]].empty){            hash_table[nat[i]].fmax = flt[i]; /* recored the maximal element */            hash_table[nat[i]].fmin = flt[i]; /* recored the minimal element */            hash_table[nat[i]].empty = 0;}        else{            if (hash_table[nat[i]].fmax < flt[i]){                hash_table[nat[i]].fmax = flt[i]; /* recored the maximal element */}            if (hash_table[nat[i]].fmin > flt[i]){                hash_table[nat[i]].fmin = flt[i]; /* recored the minimal element */}}    }    bucket_num = 0;    bucket = (int *) malloc (sizeof (int)* n);    assert (NULL!=bucket);    for (i=0; i<n; i++){        if (!hash_table[i].empty){            bucket[bucket_num++]=i;}    }    /* scan the hash table and computing the maximal neighbor difference */    max_dif = 0.0f;    for (i=0; i<bucket_num-1; i++){        tmp_diff = hash_table[bucket[i+1]].fmin - hash_table[bucket[i]].fmax;        if (max_dif < tmp_diff){             max_dif = tmp_diff;            left_max = hash_table[bucket[i]].fmax;            right_min = hash_table[bucket[i+1]].fmin;}    }    /* memory free */    if (hash_table){        free (hash_table);    }    if (nat){        free (nat);    }    if (bucket){        free (bucket);    }    /* return */    *f_max = right_min;    *f_min = left_max;    return max_dif;}

array:19265.0 834.028849.010302.0 379.014280.018890.017461.05647.09698.019511.026108.01994.021693.06082.020042.029786.03037.013359.02781.0 854.08719.029798.023822.0 639.014495.05332.028328.05958.014028.06414.018682.010638.06521.011447.016939.027570.027429.02981.021710.09162.030749.013631.05558.06698.04848.028457.015372.020780.032036.023332.028574.027184.018471.09263.020858.023520.017032.01778.029372.023993.017767.014685.011684.011463.030251.030093.05356.03503.020595.011247.027992.012275.03150.08359.025998.022130.013486.030771.0  40.026926.0 538.030539.025608.021586.019876.031244.01771.022644.012315.026198.026177.012311.08577.07539.016957.011892.011455.012537.03880.0after calcluating:max_diff = 1615.0f_max = 25608.0f_min = 23993.0

结果和如下的排序之后比较的结果一样：

/* file: max_real_diff_by_qsort.c */#include <assert.h>#include <stdio.h>#include <stdlib.h>#include <time.h>/* find the max difference between two negibour floats */float max_diff (float flt[], int n, float * f_max, float * f_min);int main(){    float * a;    float f,f_max,f_min;    int i;    int n;#ifdef DEBUG1    FILE * filein = freopen ("outf.txt", "r", stdin);    assert (NULL!=filein);#endif    scanf ("%d", &n);    assert (1<n);    a = (float *) malloc (sizeof (float) * n);    assert (NULL!=a);    for (i=0; i<n; i++){        scanf ("%f", &a[i]);    }    puts ("array:\n");    for (i=0; i<n; i++){        printf ("%6.1f\t", a[i]);        if (9==i%10){            puts ("");        }    }    puts ("");        int f_cmp (const void * a, const void * b);    qsort (a, n, sizeof (float), f_cmp);    for (i=0; i<n-1; i++){        a[i] = a[i+1]-a[i];    }    f = a[0];    for (i=1; i<n-1; i++){        if (f<a[i]){            f=a[i];        }    }    printf ("f = %6.1f\n",f);        if (a){        free (a);    }#ifdef DEBUG1    fclose (filein);#endif    return 0;}int f_cmp (const void * a, const void * b){    if ( *(float *)a - *(float *)b > 1e-6){        return 1;    }    else if ( *(float *)b - *(float *)a > 1e-6){        return -1;    }    else{        return 0;    }}

第二题：8次可以找出跑的最快的五匹马。

初始化：25匹马随机均分为5组，每一组的组号用字母表示，同一组内的马用数字来编号，这些马被标记为

A1,A2,A3,A4,A5

B1,B2,B3,B4,B5

C1,C2,C3,C4,C5

D1,D2,D3,D4,D5

E1,E2,E3,E4,E5

第一轮 ~ 第五轮：每一组马进行组内比赛，并根据比赛的平均速度进行排序，假设结果是

A1>A2>A3>A4>A5

B1>B2>B3>B4>B5

C1>C2>C3>C4>C5

D1>D2>D3>D4>D5

E1>E2>E3>E4>E5

第六轮：A1,B1,C1,D1,E1进行比赛，假设结果为：

A1>B1>C1>D1>E1

第七轮：A2,A3,B2,C2,D1进行比赛，比赛结果可以根据D1的名次分为三大类：假设结果为：

7.1 假设D1是第一名：意味着D1快于A2,B2,C2；那么

第八轮：D2,E1和第七轮的第一名（共3匹马）比赛，跑得最快的五匹马为：本次比赛的第一名和A1,B1,C1,D1；

7.2 假设D2是第二名：那么可以分三种情况讨论，

7.2.1 假如第一名是A2，那么跑得最快的五匹马是A1,A2,B1,C1,D1；

第八轮：无需比赛。

7.2.2 假如第一名是B2：

第八轮：D1和B3比赛，跑得最快的五匹马为：本次比赛的第一名和A1,B1,B2,C1；

7.2.3 假如第一名是C2：

第八轮：D1和C3比赛，跑得最快的五匹马为：本次比赛的第一名和A1,B1,C1,C2；

7.3 假设D3不是第一名，也不是第二名，那么根据第一名和第二名可能出现的结果，可以分为七类不同的情形：

D1不是第一名和第二名第1名和第2名第8轮比赛的方案最快的5匹马A2,A3A4,A5,B1,B2,C1A1,A2,A3 + 前2名A2,B2A3,B3,C1A1,A2,B1,B2 + 第1名A2,C2----A1,A2,B1,C1,C2B2,A2A3,B3,B4 + 第7轮的2、3名A1,B1,B2 + 前2名B2,C2B3,B4,C1,C2A1,B1,B2 + 前2名C2,A2A2,C3A1,B1,C1,C2 + 第1名C2,B2B2,C3A1,B1,C1,C2 + 第1名

注：---- 表示无需比赛。

综上所述，8次比赛就可以找出最快的5匹马。

第三题：结合第485楼的回答来进行分析。

据说这道题改编自微软的面试题。本质上等价于问这样一个问题：一个箱子里有N-1个蓝球和1个红球，每次抓取一个球。问题：第1次抓到红球的概率是多少？第k次抓到红球的概率是多少？第N次抓到红球的概率是多少？

答：第1次抓到红球的概率：P(1)=1/N

第k次抓到红球的概率：P(k)=1/N =(N-1)/N * (N-2)/(N-1) * ... * (N-k+1)/(N-k) * 1/(N-k+1)

第N次抓到红球的概率：P(N)=1/N=(N-1)/N * (N-2)/(N-1) * ... * 1/2 * 1

解决方案：

由于已经给定函数getNum ()：每次返回数组的一个数，取完了数之后则返回NULL。先定义变量，通过循环调用getNum () 来确定元素的个数（数组的数据结构应该不会被改变）。

然后，构造一个随机函数int random ( int k)，返回 0~(k-1) 之间的一个整数，我们假设产生0~(k-1)之间任意一个数的概率都是1/k。那么函数 int get () 可以这样写：

/* 数组V */int get (){    /* 确定元素个数 */    int i = 0;    while (NULL != getNum ()){        i++;    }         static int n = i;        /* 返回一个元素 */    return V[random (n)];}

以上的操作的概率是由随机函数来保证的，并且，我们假设数组V是对我们开放访问并且 int getNum () 函数不修改其值。随机函数 int random (int k) 返回的每一个整数值映射到了数组V的某个元素，从而保证每个元素被取到的概率是1/N。