海量数据处理的top K个数的问题

来源:互联网 发布:mac版的ps怎么破解 编辑:程序博客网 时间:2024/06/03 22:02

这个问题有两种的思路,第一种是将整个的数组建堆,(时间复杂度是O(N)),再分K次从堆的顶端取值,取完值之后将堆尾的元素放到堆首来下虑(K*logN),总的时间复杂度是(O(N+K*logN)).

//对原始的数据原地建最大堆,时间为O(N),然后提取K次,每次提取时,取第一个值再下沉,k*logN,总的时间复杂度是O(N+k*logN)#define LENGTH 9#define GET    5void HeapAdjust(int *Array,int hole,int len){int child,tmp=Array[hole];for(;2*hole+1<len;hole = child){child = 2*hole+1;if(child+1<len&&Array[child+1]>Array[child])child++;if(Array[child]>tmp)Array[hole]=Array[child];elsebreak;}Array[hole] = tmp;}void swap(int &a,int &b){int c = a;a = b;b = c;}int GetMin(int *a,int len,int k){int min = a[0];swap(a[0],a[len-1]);HeapAdjust(a,0,len-1);return min;}void BuildHeap(int *a,int len,int k){for(int i=len/2-1;i>=0;i--){//建立一个堆,时间复杂度为O(n)HeapAdjust(a,i,len);}int j = len;//k次循环来取最小的数据,for(int i=k;i>=0;--i,--j){printf("The %dth number is %d\n",i,GetMin(a,j,i));}}void print(int *a,int len){for(unsigned int i=0;i<len;i++)printf("%d    ",a[i]);printf("\n");}//建立大小为k的堆来不断插入int main(){int *q = new int[LENGTH];for(size_t i=0;i<LENGTH;i++)q[i]=rand()%LENGTH;print(q,LENGTH);BuildHeap(q,LENGTH,GET);print(q,GET);return 0;}

第二种方案是,首先建立一个大小为k 的堆,将后面的数据每次与堆首的元素进行比较,在求最大的k的数时候,建立的是最小堆,因此当所取的值比堆首的元素大时,取代堆首的元素,并进行下虑。整个的时间复杂度是K+(N-K)logK;

#define LENGTH 9#define GET    5void HeapAdjust(int *Array,int hole,int len){int child,tmp=Array[hole];for(;2*hole+1<len;hole = child){child = 2*hole+1;if(child+1<len&&Array[child+1]>Array[child])child++;if(Array[child]>tmp)Array[hole]=Array[child];elsebreak;}Array[hole] = tmp;}void swap(int &a,int &b){int c = a;a = b;b = c;}int GetMin(int *a,int len,int k){int min = a[0];swap(a[0],a[len-1]);HeapAdjust(a,0,len-1);return min;}void BuildKHeap(int *a,int Len,int k){    int i;for(i=k/2-1;i>=0;--i){HeapAdjust(a,i,k);}for(i=k;i<LENGTH;i++){if(a[i]<a[0]){a[0]=a[i];HeapAdjust(a,0,k);}}}void print(int *a,int len){for(unsigned int i=0;i<len;i++)printf("%d    ",a[i]);printf("\n");}//建立大小为k的堆来不断插入int main(){int *q = new int[LENGTH];for(size_t i=0;i<LENGTH;i++)q[i]=rand()%LENGTH;print(q,LENGTH);BuildHeap(q,LENGTH,GET);print(q,GET);return 0;}


阅读全文
0 0