快速查询大量数据的hash_table
来源:互联网 发布:php 并且符号怎么打 编辑:程序博客网 时间:2024/04/27 20:20
记得有一次面试题目:求一图片中RGB值最多的那个值,当时我将RGB作为索引,将这个RGB值出现的次数作为值放到std::map中,百万级的数据插入显得有些慢,感觉不好,前些日子我要处理100W条数据,使用的hash_table,对hash_table有所研究,今天针对这个题目花了1个多小时又写了一个hash_table,将一图片中的所有RGB中插到hash_table,查询速度在毫秒级,很适合处理百万组的数据
头文件:
struct hashFun
{
size_t operator()(int nVal) const
{
char szVal[64] = {0};
_itoa(nVal, szVal, 10);
const char* psz = szVal;
unsigned long _hash = 0;
for (; *psz != 0; ++psz)
{
_hash *= 16777619;
_hash ^= (unsigned long) (unsigned short&)*psz;
}
return _hash;
}
};
struct tableNode
{
int nValue;
DWORD nKey;
int nCount; //这个元素的个数
tableNode* pNext;
};
struct BoxVec
{
int nCount; //这个桶下的元素的类型的个数,可能不同的数在同一个桶下
int nMaxNum; //这个桶下这个元素出现的次数
int nVal;
tableNode* pHead;
};
class CMyHasTable
{
public:
CMyHasTable(const int* pBoxSize, int nBoxCount, int nLevel);
~CMyHasTable();
private:
int m_nBoxCount; //桶增长级数个数
const int* m_pBoxSize; //桶级数的值
int m_nBox; //桶的大小
int m_nLow; //桶个数的最小值
int m_nHight; //桶个数的最大值
int m_nLevel; //当前桶级数的索引
BoxVec* m_pBoxs;
int m_nSize; //元素的个数
public:
void Insert(int nVale);
int FindMaxCount();
private:
inline DWORD GetBoxIndex(DWORD key) const
{
return key & (m_nBox - 1);
}
inline DWORD GetBoxIndex(DWORD key, int nNewBox) const
{
return key & (nNewBox - 1);
}
private:
tableNode* Find(tableNode node);
void IncreaseLevel();
void SwitchLevel(int nNewLevel);
void UpBoxMaxCount(int nIndex);
};
.cpp文件
CMyHasTable::CMyHasTable(const int* pBoxSize, int nBoxCount, int nLevel)
{
m_pBoxSize = pBoxSize;
m_nBoxCount = nBoxCount;
m_nLevel = nLevel;
m_nHight = m_nBox = m_pBoxSize[m_nLevel];
m_nLow = m_nLevel > 0 ? m_pBoxSize[m_nLevel - 1]/2 : 0;
m_pBoxs = new BoxVec[m_nBox];
memset(m_pBoxs, 0, sizeof(BoxVec) * m_nBox);
m_nSize = 0;
}
CMyHasTable::~CMyHasTable()
{
for (int i = 0; i < m_nBox; ++i)
{
tableNode* p = m_pBoxs[i].pHead;
while (p != NULL)
{
tableNode* q = p->pNext;
delete p;
p = q;
}
}
delete[] m_pBoxs;
}
void CMyHasTable::Insert(int nVale)
{
tableNode valNode;
valNode.nValue = nVale;
valNode.nKey = hashFun()(nVale);
valNode.pNext = NULL;
tableNode* pNode = Find(valNode);
if (pNode == NULL)
{
m_nSize ++;
tableNode* pNew = new tableNode;
*pNew = valNode;
pNew->nCount = 1;
DWORD dwBoxIndex = GetBoxIndex(valNode.nKey);
pNew->pNext = m_pBoxs[dwBoxIndex].pHead;
m_pBoxs[dwBoxIndex].pHead = pNew;
m_pBoxs[dwBoxIndex].nCount++;
m_pBoxs[dwBoxIndex].nMaxNum = 1;
m_pBoxs[dwBoxIndex].nVal = nVale;
if (m_nSize >= m_nHight)
IncreaseLevel();
}
else
{
pNode->nCount++;
DWORD dwBoxIndex = GetBoxIndex(pNode->nKey);
UpBoxMaxCount(dwBoxIndex);
}
}
int CMyHasTable::FindMaxCount()
{
int nMaxCount = m_pBoxs[0].nMaxNum;
int nIndex = 0;
for (int i = 1; i < m_nBox; i++)
{
if (nMaxCount < m_pBoxs[i].nMaxNum)
{
nMaxCount = m_pBoxs[i].nMaxNum;
nIndex = i;
}
}
return m_pBoxs[nIndex].nVal;
}
void CMyHasTable::UpBoxMaxCount(int nIndex)
{
tableNode* p = m_pBoxs[nIndex].pHead;
tableNode* q = p;
int nMax = p->nCount;
while(p->pNext)
{
if (nMax < p->nCount)
{
nMax = p->nCount;
q = p;
}
p = p->pNext;
}
m_pBoxs[nIndex].nMaxNum = nMax;
m_pBoxs[nIndex].nVal = q->nValue;
}
tableNode* CMyHasTable::Find(tableNode node)
{
DWORD short_key = GetBoxIndex(node.nKey);
tableNode* p = m_pBoxs[short_key].pHead;
while (p != NULL)
{
if (p->nKey == node.nKey)
{
if (p->nValue == node.nValue)
return p;
}
p = p->pNext;
}
return NULL;
}
void CMyHasTable::IncreaseLevel()
{
if (m_nLevel < m_nBoxCount - 1)
SwitchLevel(m_nLevel + 1);
else
m_nHight = 0x20000000;
}
void CMyHasTable::SwitchLevel(int nNewLevel)
{
m_nLevel = nNewLevel;
int nNewBox = m_pBoxSize[m_nLevel];
BoxVec* pNewBoxs = new BoxVec[nNewBox];
memset(pNewBoxs, 0, sizeof(BoxVec) * nNewBox);
for (int i = 0; i < m_nBox; ++i)
{
tableNode* p = m_pBoxs[i].pHead;
tableNode* q = NULL;;
int nCout = m_pBoxs[i].nCount;
int nMax = m_pBoxs[i].nMaxNum;
while (p != NULL)
{
q = p->pNext;
DWORD sht_key = GetBoxIndex(p->nKey, nNewBox);
p->pNext = pNewBoxs[sht_key].pHead;
pNewBoxs[sht_key].pHead = p;
p = q;
pNewBoxs[sht_key].nCount = nCout;
pNewBoxs[sht_key].nMaxNum = nMax;
}
}
m_nHight = m_nBox = m_pBoxSize[m_nLevel];
m_nLow = m_nLevel > 0 ? m_pBoxSize[m_nLevel - 1]/2 : 0;
delete m_pBoxs;
m_pBoxs = pNewBoxs;
}
- 快速查询大量数据的hash_table
- sqlit大量数据快速查询(1)
- mysql嵌套查询,快速插入大量数据
- 数据库SQLite 数据大量存储 和快速读取 链表查询的优化 使用gosn
- redis快速的插入大量的数据
- GzipUtilities快速传输大量数据的例子!
- Oracle如何快速、大量的插入数据
- 大量数据查询输出的处理方法
- sql 大量数据查询的 优化措施
- SQL大量数据查询的优化
- PHP查询MySQL大量数据的内存
- PHP查询MySQL大量数据的内存
- SQL大量数据查询的优化
- SQL大量数据查询的优化
- SQL大量数据查询的优化
- PHP查询MySQL大量数据的内存
- MySQL查询大量数据的方法
- SQL大量数据查询的优化
- 一步一步学Linq to sql(一):预备知识
- 关于Oracle数据库热备份原理深入分析
- ASP 数据库链接类
- CLOB转换为STRING
- What is managed code?
- 快速查询大量数据的hash_table
- 招网站前端技术人才(专长css,xhtml,js)
- Ant实践-java 构建工具
- IIS+PHP+MySQL+Zend Optimizer+GD库+phpMyAdmin安装
- sax和Dom解析xml文档 文档驱动和事件驱动
- 更认真、更持久、更韧性
- select语句执行顺序
- [转帖]FPGA/IC设计入门-FPGA入门(一)
- Linux必学的系统管理命令