运用简单的bloomfilter算法生成100万个不重复的随机数

来源:互联网 发布:淘宝直通车没有展现 编辑:程序博客网 时间:2024/06/05 14:55

本文中只是简单的体会bloomFilter算法的基本原理,设计实现一个生成100万个不重复的随机数。

选择3个分布均匀质数,在这里面质数的选择还是挺有讲究的,要注意不能太小,必须能够满足bloomfilter空间,不然整个空间都是1了还没有找到100万个不重复的随机数。不多说,上代码。

#include<stdio.h>#include<stdlib.h>#include<time.h>#include<cstdbool>#define  MAXNUM 10000000int hash_fuction(int dst, int select_number){    return dst % select_number;}int * byte_bloomfilter_random(int generate_number, int maxValue){    int temp;    char * bloomfilter;    int *dst;    bool flag;    int index_a, index_b, index_c;    char diff_a, diff_b, diff_c;    bloomfilter = (char *)malloc((size_t)MAXNUM / 8 * sizeof(char));    dst = (int *)malloc((size_t)generate_number * sizeof(int));    for (int i = 0; i < MAXNUM / 8; i++)    {        bloomfilter[i] = 0;    }    for (int i = 0; i < generate_number; i++)    {        flag = true;        while (flag)        {            int temp_a, temp_b, temp_c;            char bit_a, bit_b, bit_c;            temp = rand() * rand() % maxValue;            //select 3 prime numbers  and select 3 hash functions            temp_a = hash_fuction(temp, 524287);            temp_b = hash_fuction(temp, 1046527);            temp_c = hash_fuction(temp, 3967);            index_a = temp_a >> 3;            diff_a = temp_a % 8;            index_b = temp_b >> 3;            diff_b = temp_b % 8;            index_c = temp_c >> 3;            diff_c = temp_c % 8;                        bit_a = bloomfilter[index_a] & (1 << diff_a);            bit_b = bloomfilter[index_b] & (1 << diff_b);            bit_c = bloomfilter[index_c] & (1 << diff_c);            if (!bit_a || !bit_b || !bit_c)            {                dst[i] = temp;                bloomfilter[index_a] = bloomfilter[index_a] | (1 << diff_a);                bloomfilter[index_b] = bloomfilter[index_b] | (1 << diff_b);                bloomfilter[index_c] = bloomfilter[index_c] | (1 << diff_c);                flag = false;            }        }    }    free(bloomfilter);    return dst;}


1 0