数据结构-散列

来源：互联网发布：中山大学图书馆知乎编辑：程序博客网时间：2024/05/01 01:32

散列（hashing）：是一种以常数平均时间执行插入、删除和查找的技术。

散列表（hash table）：是包含有关键字的具有固定大小的数组。表的大小记作table-size。每个关键字被映射到从0到table-size-1这个范围中的某个数，并且被放到适当的单元中。这个映射叫作散列函数（hash function）。

散列函数：一个好的方法为保证表的大小为素数。

一个好的散列函数：

int Hash(int x, int tableSize){return x%tableSize;}

冲突：当一个元素被插入时，另一个元素已经存在，则产生一个冲突。

冲突消除的方法：最简单的两种方法为：分离链表法和开发定址法

分离链表法：将散列到同一个值的所有元素保存到一个表中。表的类型声明如下：

struct listNode;typedef struct listNode* position;struct hashTbl;typedef struct hashTbl* hashTable;struct listNode{int element;position next;};typedef position list;struct hashTbl{int tableSize;list* theLists;};int Hash(int x, int tableSize);hashTable initializeTable(int tableSize);position find(int key, hashTable h);void insert(int key, hashTable h);int isPrime(int x);int nextPrime(int x);

表的初始化：

hashTable initializeTable(int tableSize){hashTable h;int i;h = (struct hashTbl *)malloc(sizeof(struct hashTbl));if (h==NULL){cout << "Out of space!" << endl;}h->tableSize =nextPrime( tableSize);h->theLists = (list *)malloc(sizeof(list)*h->tableSize);if (h->theLists==NULL){cout << "Out of space!" << endl;}for (i = 0; i < h->tableSize; i++){h->theLists[i] = (listNode *)malloc(sizeof(struct listNode));if (h->theLists[i] == NULL)cout << "Out of space!" << endl;elseh->theLists[i]->next = NULL;}return h;}

Find例程：

position find(int key, hashTable h){position p;list l;l = h->theLists[Hash(key, h->tableSize)];p = l->next;while (p!=NULL&&p->element!=key){p = p->next;}return p;}

Insert例程：

void insert(int key, hashTable h){position pos, newCell;list l;pos = find(key, h);if (pos==NULL){newCell = (listNode*)malloc(sizeof(struct listNode));if (newCell==NULL){cout << "Out of space!" << endl;}else{l = h->theLists[Hash(key, h->tableSize)];newCell->next = l->next;newCell->element = key;l->next = newCell;}}}

nextPrime函数的实现：求大于等于当前值的最小素数。

int nextPrime(int x){if (isPrime(x))return x;else{x = x + 1;while (!isPrime(x)){x = x + 1;}return x;}}int isPrime(int x){int i;for ( i = 2; i <=sqrt(x); i++){if (x%i == 0)return 0;}return 1;}

开放定址法：冲突发生后：hi(x)=(Hash(x)+F(i))mod TableSize。线性探测法：F(i)=i; 平方探测法：F(i)=i²; 双散列：F(i)=i*hash₂(x)，即选择第二个Hash函数。以平方散列为例。散列结构声明：
```
typedef int position;struct hashTbl;typedef struct hashTbl *hashTable;enum kindOfEntry{legitiamte,empty,deleted};struct hashEntry{int element;enum kindOfEntry info;};typedef struct hashEntry cell;struct hashTbl{int tableSize;cell* theCells;};int Hash(int key,int tableSize);hashTable initializeTable(int tableSize);position find(int key, hashTable h);void insert(int key, hashTable h);int nextPrime(int x);int isPrime(int x);
```
初始化例程：
```
hashTable initializeTable(int tableSize){hashTable h;int i;h = (hashTbl*)malloc(sizeof(struct hashTbl));if (h == NULL)cout << "Out of space!" << endl;h->tableSize = nextPrime(tableSize);h->theCells = (cell *)malloc(sizeof(cell)*h->tableSize);if (h->tableSize == NULL)cout << "Out of space!" << endl;for (i = 0; i < h->tableSize; i++)h->theCells[i].info = empty;return h;}
```
Find例程：
```
position find(int key, hashTable h){position currentPos;int collisionNum;collisionNum = 0;currentPos = Hash(key, h->tableSize);while (h->theCells[currentPos].info!=empty&&h->theCells[currentPos].element!=key){currentPos += 2 * ++collisionNum - 1;if (currentPos>=h->tableSize){currentPos -= h->tableSize;}}return currentPos;}
```
Insert例程：
```
void insert(int key, hashTable h){position pos;pos = find(key, h);if (h->theCells[pos].info!=legitiamte){h->theCells[pos].info = legitiamte;h->theCells[pos].element = key;}}
```
nextPrime函数的实现：
```
int nextPrime(int x){while (!isPrime(x)){x = x + 1;}return x;}int isPrime(int x){int i;for ( i =2; i <sqrt(x); i++){if (x%i == 0)return 0;}return 1;}
```
最后，对于使用平方探测法的开放定址散列法，若元素太满，那么操作的运行时间可能过长切Insert操作可能失败。此时，解决办法为：建另外一个大约两倍大的表，扫描原始散列表，计算每一个元素的新散列值并将其插入表中。

0 0