1 hash

来源:互联网 发布:里程碑时间轴js 编辑:程序博客网 时间:2024/05/21 09:36

寻找热门查询,300万个查询字符串中统计最热门的10个查询

发表于2013/11/1 14:29:26  326人阅读

分类: 面试篇 C++

寻找热门查询,300万个查询字符串中统计最热门的10个查询


  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
#include <hash_map>
#include <algorithm>
#include <string>

struct QueryInfo{
string s;
int times;
QueryInfo(string s1, int t): s(s1), times(t){}
};

int QueryCompare(const QueryInfo &q1, const QueryInfo &q2)
{
return q1.times < q2.times;
}
void FindHottest(vector<string> &s)
{
hash_map<string , int > table;
int size = s.size();
for (int i = 0; i < size; ++i)
{
if (table.find(s[i]) == table.end())
{
table[s[i]] = 1;
}else
++table[s[i]];
}
vector<QueryInfo> heap;
auto it = table.begin();
for (int i = 0; it != table.end() && i < 10; ++it, ++i)
{
QueryInfo temp(it->first, it->second);
heap.push_back(temp);
}
make_heap(heap.begin(), heap.end(), QueryCompare);
if (it == table.end())
return;
for ( ; it != table.end(); ++it)
{
if (it->second > heap[0].times){
pop_heap(heap.begin(), heap.end(), QueryCompare);
heap.pop_back();
heap.push_back(QueryInfo(it->first, it->second));
push_heap(heap.begin(), heap.end(), QueryCompare);
}
}

for (auto it2 = heap.begin(); it2 != heap.end(); ++it2)
{
cout << it2->s <<" "<<it2->times<<endl;
}
}

void TestHeap()
{
string t[] = {"hello", "helloo", "kadhf", "sakdf","weenr","sdicjsdnf","sdkjjfks","sdkfj" ,"hello", "sdkfj", "sdfiwemcn", "wiersnfk", "weiosdkfn" ," sdkfj"," sldfwe", "a", "v", "d", "we", "c", "sdewr", "sdf"};
vector<string> s(t, t+sizeof(t)/sizeof(string));
FindHottest(s);
}
0 0