面试题: 求N个数中前k个大的数(大数据)

来源:互联网 发布:线切割图形编程事例 编辑:程序博客网 时间:2024/04/30 12:16

解题思路:一般思路就是将N个数排序后,取前k个数就ok。但是如果N个数是几十亿个数,加载不到内存怎么办?这就需要另外一种思路了,那就是利用堆。

具体的思路是:先建一个k个数的小堆,然后从k+1个数往后的值与堆顶元素比较,若此数比堆顶元素大,就将堆顶元素用这个数替换,然后重新调整堆,以此向后重复上述过程,直到将N个数比较完成,那么此时组成这个堆的k个元素就是前k个大的数。

下边是具体实现的代码:

#pragma once #include<iostream>#include<vector>using namespace std;template<class T>class Heap{public:Heap(const T* a, int k){_CreateHeap(a, k);}void Push(const T& x){_a.push_back(x);_AdjustUp(_a.size() - 1);}T& Top(){return _a[0];}void BuildHeap(const T* a, int k){for (int i = (k - 2) / 2; i >= 0; i--){_AdjustDown(i);}}void Print(){for (int i = 0; i < _a.size(); i++){cout << _a[i] << " ";}cout << endl;}public:void _CreateHeap(const T* a, int k){for (int i = 0; i < k; i++){_a.push_back(a[i]);}//建小堆for (int i = (k - 2) / 2; i >=0; i--){_AdjustDown(i);}}void _AdjustDown(int parent)//向下调整{int child = parent * 2 + 1;while (child < _a.size()){if (child + 1 < _a.size() && _a[child] > _a[child + 1]){child++;}if (_a[child] < _a[parent]){swap(_a[child], _a[parent]);parent = child;child = parent * 2 + 1;}else{break;}}}//void _AdjustUp(int child)//向上调整//{//int parent = (child - 1) / 2;//while (child >= 0)//{//if (_a[child] > _a[parent])//{//swap(_a[child], _a[parent]);//child = parent;//parent = (child - 1) / 2;//}//else//{//break;//}//}//}private:vector<T> _a;};
#include"HeapSort.h"void GetTopK(int* a, int k, int size){//建堆Heap<int> hp(a, k);//hp.Print();//k个数后面的数,比堆顶元素大,则插入for (int i = k; i < size; i++){if (hp.Top() < a[i]){hp.Top() = a[i];hp._AdjustDown(0);}}hp.Print();}void Test(){int a[] = { 1, 0, 3, 5, 8, 2, 6, 7, 4, 9, 10, 26, 35, 20, 30, 40 };int len = sizeof(a) / sizeof(a[0]);int k = 6;for (int i = 0; i < len; i++){cout << a[i] << " ";}cout << endl;cout << "最大的前" << k << "个数为:" << endl;GetTopK(a,6,len);}



0 0
原创粉丝点击