(3)寻找最小的K个数

来源:互联网 发布:数据库读写分离方案 编辑:程序博客网 时间:2024/06/18 17:46
/* * 寻找最小的K个 *//*1.排序,然后遍历前K个O(nlogn)2.维护一个K大小的数组O(K+(n-K)*n)3.维护一个K大小的最大堆O(K*logK + (n-k)*logK) = O(nlogK)4.类似于在一个数组中找第K大的数的算法5.计数排序6.将数组建立一个最小堆,然后取K个7.性质:最小堆:第K层最小堆一定全部小于第(K+1)层吗???*/#ifndef K_SMALLEST_H#define K_SMALLEST_H#include <iostream>#include <string>using namespace std;void TestKSmallest();// 利用方法7.// 首先对N个数建立最小堆,然后取前k个,每次下沉的层数为k、k-1、k-2...void KSmallest(int arr[], int n, int res[], int k);// 建立最小堆void BuildHeap(int arr[], int n);// 元素num上浮,建立最小堆void ShiftUp(int arr[], int n, int num);// 根节点下沉,维持最小堆的性质,与传统堆唯一的不同是:限定了下沉的层数kvoid ShiftDown(int arr[], int n, int k);// 交换两个数inline void MySwap(int &num1, int &num2){if (num1 == num2){return;}int tmp = num1;num1 = num2;num2 = tmp;}inline void PrintVec(int arr[], int n){for (int i = 0; i < n; ++i){cout << arr[i] << " ";}cout << endl;}// 快速选择void KSmallest_2(int arr[], int n, int res[], int k);// 用arr[0]作为partition,并将排序后的partition传出去void QuickSelect(int arr[], int start, int end, int &partition);// Top-K实现// 问题:1.hash数组到底开多大?// 定义最大的HashLen数// 自己实现#define HASH_LEN 2807303#define WORD_LEN 30#define K 10struct HashNode{char *word;int count;HashNode *next;HashNode(const char *str) : count(1), next(NULL){int len = strlen(str);word = new char[len + 1];strcpy(word, str);}};//// 定义hash的节点头。每一个节点头连着一串的hash值相等的节点链//HashNode *head[HASH_LEN];//HashNode *minHeap[K + 1];// 定义Hash函数unsigned HashFunction(const char *word);// 将一个word插入到head节点中void InsertWord(const char *word);// 从输入文件中读取单词,并插入到head中void BuildHeadFromFile(char *file);// 首先取前K个节点建立一个最小堆,然后遍历(N-K)个节点// 并调整最小堆,其中堆的依据为单词出现的频率// 还是用指针表示堆内的元素吧// 建立最小堆,堆中元素个数为nvoid BuildMinHeap(int n);// 上浮函数void ShiftUpNode(int n, int num);// 下沉函数// 扫描hashNode,并最终建立minHeapvoid ScanHashToBuildHeap();// 把填充K个单独拿出来// void InitMinHeap();// 根节点下沉,维持最小堆的性质void ShiftDownNode(int n);// 释放内存void ReleaseNode();// 实现红黑树!!!// 我什么时候实现自己的红黑树,才能破除心魔!!!// 这个心魔就是畏难!!!#endif

#include <algorithm>#include <iostream>#include <string>#include <fstream>#include "3_KSmallest.h"using namespace std;/*1.排序,然后遍历前K个O(nlogn)2.维护一个K大小的数组O(K+(n-K)*n)3.维护一个K大小的最大堆O(K*logK + (n-k)*logK) = O(nlogK)4.类似于在一个数组中找第K大的数的算法5.计数排序6.将数组建立一个最小堆,然后取K个7.性质:最小堆:第K层最小堆一定全部小于第(K+1)层吗???编程之美编程珠玑算法导论*/// 定义hash的节点头。每一个节点头连着一串的hash值相等的节点链HashNode *head[HASH_LEN];HashNode *minHeap[K + 1];void TestKSmallest(){/*int arr[] = {0, 5, 3, 7, 4};BuildHeap(arr, 4);int res[3] = {0};KSmallest(arr, 4, res, 3);*/int arr[] = {4, 3, 2, 7, 5};int partition = 0;//QuickSelect(arr, 0, 4, partition);//cout << partition << endl;int res[5] = {0};KSmallest_2(arr, 5, res, 1);KSmallest_2(arr, 5, res, 2);KSmallest_2(arr, 5, res, 3);KSmallest_2(arr, 5, res, 4);KSmallest_2(arr, 5, res, 5);}// 利用方法7.// 首先对N个数建立最小堆,然后取前k个,每次下沉的层数为k、k-1、k-2...void KSmallest(int arr[], int n, int res[], int k){for (int i = 0; i < k; ++i){res[i] = arr[1];arr[1] = arr[n--];ShiftDown(arr, n, k - i);}}// 建立最小堆,这里的数组arr[0],不保存实际数据,而用作临时存储单元void BuildHeap(int arr[], int n){for (int i = 1; i <= n; ++i){ShiftUp(arr, i, i);}}// 元素num上浮,建立最小堆// 一共n个元素,要调整的是第num个void ShiftUp(int arr[], int n, int num){arr[0] = arr[num];int current = num;while (current > 1) // 直到根节点{int father = current / 2;if (arr[0] < arr[father]){arr[current] = arr[father];current = father;}else{break;}}arr[current] = arr[0];}// 根节点下沉,维持最小堆的性质,与传统堆唯一的不同是:限定了下沉的层数kvoid ShiftDown(int arr[], int n, int k){arr[0] = arr[1];int current = 1;// 一方面保证最多下沉k次,另一方面保证堆不会超出范围while (k-- && (current <= n / 2)){int child = current << 1;// 左子节点if ((child + 1 <= n) && arr[child + 1] < arr[child])// 如果右子节点更小{child++;}if (arr[child] < arr[0]){arr[current] = arr[child];current = child;}else{break;}}arr[current] = arr[0];}// 设置res多此一举了!// 因为已经对arr滴前K个排过序了!可以保证结果一定在前K中void KSmallest_2(int arr[], int n, int res[], int k){int start = 0;int end = n - 1;int resIndex = 0;while (k != 0){int partition = 0;QuickSelect(arr, start, end, partition);int num = partition - start + 1;if (num == k){copy(arr + start, arr + partition + 1, res + resIndex);break;}else if (num < k){copy(arr + start, arr + partition + 1, res + resIndex);resIndex += num;k -= num;start = partition + 1;} else{end = partition;}}}// 这里我直接用start、end做了指针,不好!// 可以单独申请两个指针,而不要动start、endvoid QuickSelect(int arr[], int start, int end, int &partition){int initStart = start;int part = arr[start++];while (start <= end){while (arr[start] <= part){start++;}while (arr[end] > part){end--;}if (start < end){MySwap(arr[start], arr[end]);}}MySwap(arr[initStart], arr[end]);partition = end;}unsigned HashFunction(const char *word){unsigned hashNum = 0;while (*word){hashNum = hashNum * 31 + *word++;if (hashNum > HASH_LEN){hashNum = hashNum % HASH_LEN;}}return hashNum;}void InsertWord(const char *word){unsigned index = HashFunction(word);HashNode *pHashLine = head[index];while (pHashLine != NULL){if (strcmp(word, pHashLine->word) == 0){pHashLine->count++;return;}pHashLine = pHashLine->next;}HashNode newNode(word);newNode.next = head[index];head[index] = &newNode;}void BuildHeadFromFile(char *file){ifstream infile(file);if (!infile){cerr << "open file failed" << endl;exit(-1);}char word[WORD_LEN];while (infile >> word){InsertWord(word);}}void BuildMinHeap(int n){for (int i = 1; i <= n; ++i){ShiftUpNode(i, i);}}void ShiftUpNode(int n, int num){minHeap[0] = minHeap[num];int current = num;while (current > 1) // 直到根节点{int father = current / 2;if (minHeap[0]->count < minHeap[father]->count){minHeap[current] = minHeap[father];current = father;}else{break;}}minHeap[current] = minHeap[0];}void ScanHashToBuildHeap(){int minHeapIndex = 1;// 1. 首先填充K个minHeapfor (int i = 0; i < HASH_LEN; ++i){HashNode *pHashNode = head[i];while (pHashNode != NULL){if (minHeapIndex <= K){minHeap[minHeapIndex] = pHashNode;if (minHeapIndex == K){BuildMinHeap(K);}else{minHeapIndex++;}}else{// 如果次数大于minHeap[1],则将其放入minHeap[1],并调整最小堆if (pHashNode->count > minHeap[1]->count){minHeap[1] = pHashNode;ShiftDownNode(K);}}pHashNode = pHashNode->next;}}}void ShiftDownNode(int n){minHeap[0] = minHeap[1];int current = 1;// 保证堆不会超出范围while ((current <= n / 2)){int child = current << 1;// 左子节点if ((child + 1 <= n) && minHeap[child + 1]->count < minHeap[child]->count)// 如果右子节点更小{child++;}if (minHeap[child]->count < minHeap[0]->count){minHeap[current] = minHeap[child];current = child;}else{break;}}minHeap[current] = minHeap[0];}void ReleaseNode(){for (int i = 0; i < HASH_LEN; ++i){HashNode *pHashNode = head[i];while (pHashNode != NULL){HashNode *pTmp = pHashNode->next;delete pHashNode;pHashNode = pTmp;}}}