算法学习之哈希表实现

来源：互联网发布：邮箱域名大全编辑：程序博客网时间：2024/06/07 03:54

哈希表是一个键值对的数据结构，经常用于数据库索引，map，缓存等地方。可以表示成value = f(key),查找效率很高。哈希表实现最关键的地方是哈希函数的选择，好的哈希函数可以均匀分布，冲突小。现在工业界最常用的哈希函数是murmur，memcached和nginx使用的就是murmur。简单常用的哈希函数构造法有：1.直接定值法，利用key设计一个线性函数 f=a*key+b; 2.数字分析法，主要抽取部分数字进行循环左移，右移，相加减等各种操作。3.平方取中法。4.折叠法。5.除留余数法等。

哈希表冲突处理，哈希函数是会发生冲突的，不同的key计算出了相同的hashcode。处理的方法有闭散列法和开散列法。1.闭散列法就是所有的操作还在原来的存储空间，没有开辟新的存储空间。线性探测法：f(key)=(f(key)+1)mod n。

双重散列法:hash函数产生冲突时，调用rehash函数重新计算hash值。2.开散列法也称为拉链法，用链表组织整个哈希表，拉链法是用的最多的一种方法。

实现一个c语言版的存储字符串类型的hashmap。

#include <stdio.h>  #include <stdlib.h>  #include <string.h>  #define   DEFAULT_INITIAL_CAPACITY     1 << 4   //哈希表默认初始化容量  #define   MAXIMUM_CAPACITY             1 << 30  //哈希表最大容量  #define   DEFAULT_LOAD_FACTOR          0.75F    //哈希表负载系数,当系数越大占用空间越大，查询效率越高，系数越小占用空间越小，查询效率变低，此系数可调最佳0.75,如有需求可以自己调整  struct hlist_node  {     struct hlist_node **prev;   //如果用双向链表组织哈希表可以使用，本程序是用单链表组织的     struct hlist_node *next;  };  struct hlist_head  {     struct hlist_node *first;  };  struct node  {     struct hlist_node hlist_node;     char *key;     char *value;  };  struct hash_map  {     struct hlist_head *table;    //哈希表基地址     int length;                 //哈希表头结点长度     int size;                   //k-v键值对个数     int threshold;             //哈希表阈值  };  /*计算hashcode，java jdk1.8的计算方法，通过关键字的地址计算关键字的哈希值，此哈希函数散列情况较好  */static int hash(char *key)  {        unsigned int seed = 131; // 31 131 1313 13131 131313 etc..        unsigned int hash = 0;        while (*key)        {          hash = hash * seed + (*key++);        }        return (hash & 0x7FFFFFFF);  }  /*当哈希表内存不够用时，扩容，扩容的时候把旧哈希表中的内容复制到新的哈希表中*/  static void resize(struct hash_map *map)  {       int old_cap = map->length;       int new_cap,i;       if(old_cap >= MAXIMUM_CAPACITY) {             map->threshold = MAXIMUM_CAPACITY;       } else if((new_cap = old_cap << 1) < MAXIMUM_CAPACITY && old_cap >= DEFAULT_INITIAL_CAPACITY){             map->threshold = map->threshold << 1;             map->length = new_cap;       }       struct hlist_head *old_table = map->table;         map->table = (struct hlist_head*)calloc(map->length,sizeof(struct hlist_head));       for(i = 0; i < old_cap; i++) {           if(old_table[i].first != NULL) {              struct hlist_node *p = old_table[i].first;              if(p->next == NULL) {                  map->table[hash(((struct node*)p)->key) & (new_cap - 1)].first = p;              } else {                  struct hlist_node *low_head = NULL,*low_tail = NULL;                  struct hlist_node *high_head = NULL, *high_tail = NULL;                  while(p != NULL) {                      if(hash(((struct node*)p)->key) & old_cap == 0) {                          if(low_tail == NULL)                              low_head = p;                          else                              low_tail->next = p;                          low_tail = p;                      } else {                          if(high_tail == NULL)                              high_head = p;                          else                              high_tail->next = p;                          high_tail = p;                      }                        p = p->next;                  }                  if(low_tail != NULL) {                      low_tail->next = NULL;                      map->table[i].first = low_head;                  }                  if(high_tail != NULL) {                      high_tail->next = NULL;                      map->table[i + old_cap].first = high_head;                  }              }           }       }       free(old_table);  }  /*初始化哈希表，获取哈希表对象指针*/  static struct hash_map *new_hash_map()  {        struct hash_map *hash_map = (struct hash_map*)malloc(sizeof(struct hash_map));        hash_map->length = DEFAULT_INITIAL_CAPACITY;        hash_map->threshold = (int)(DEFAULT_LOAD_FACTOR * (DEFAULT_INITIAL_CAPACITY));        hash_map->size  = 0;        hash_map->table = (struct hlist_head*)calloc(hash_map->length,sizeof(struct hlist_head));        return hash_map;  }  /*初始化哈希表节点*/  static struct hlist_node *new_node(char *key,char *value)  {        struct node *node = (struct node*)malloc(sizeof(struct node));        node->key = key;        node->value = value;        node->hlist_node.next = NULL;        return &(node->hlist_node);  }  /*向哈希表中添加值*/  static int put_val(struct hash_map *map,int hash,char *key,char *value)  {       struct hlist_node *p;       int i;       int n = map->length;        /*如果此节点为空，说明这个链表还没有哈希映射*/       if((p = map->table[i = (n-1) & hash].first) == NULL) {           map->table[i].first = new_node(key,value);       } else {             struct node *e = NULL;           /*遍历链表*/          if(!strcmp(((struct node*)p)->key,key)) {                   e = p;          }           while(p->next != NULL) {                struct node *tmp = (struct node*)p;                 /*如果有相等的key，则需要修改此key对应的value，所以记下这个节点*/                if(!strcmp(tmp->key,key)) {                   e = tmp;                   break;                }                p = p->next;            }              /*如果没有相同的key则在链表末尾插入新节点*/            if(!e) {                p->next = new_node(key,value);            } else { /*否则更改key对应的value*/                e->value = value;                return 1;            }       }       if(++map->size > map->threshold)            resize(map);       return 0;  }  int put(struct hash_map *map,char *key,char *value)  {       return put_val(map,hash(key),key,value);  }  struct hlist_node *get_node(struct hash_map *map,int hash,char *key)  {       struct hlist_node *p;       int n;       if(map->table != NULL && (n = map->length) > 0 &&             (p = map->table[(n-1) & hash].first) != NULL){            struct node *tmp;            while(p != NULL) {                 tmp = (struct node*)p;                 if(!strcmp(tmp->key,key)) {                     return p;                 }               p = p->next;            }       }       return NULL;  }  char *get(struct hash_map *map,char *key)  {       struct node *e;       return (e = (struct node*)get_node(map,hash(key),key)) == NULL ? NULL : e->value;  }  int main()  {      struct hash_map *map = new_hash_map();      put(map,"中国","北京");      put(map,"美国","华盛顿");      put(map,"俄罗斯","莫斯科");      put(map,"日本","东京");      printf("%s\n",get(map,"中国"));      printf("%s\n",get(map,"俄罗斯"));      return 0;  }

阅读全文

0 0