Python源码学习笔记 5 字典对象
来源:互联网 发布:js插件如何编写 闭包 编辑:程序博客网 时间:2024/06/06 12:31
Python中对于字典的实现是根据key进行hash生成散列表,算法为“开放定址法”
1.PyDictEntry(K, V对)
字典中每一个kv对,实际上就是一个entry对象
entry的状态存在3种状态 Active, Unused, Dummy
其含义显而易见,值得注意的是Dummy状态,该状态实际上是因为散列表的“开放定址法”缘故,某entry恰巧在碰撞链中时,它不可删除,以免找不到真正的key而直接返回查询失败。
[dictobject.h]typedef struct { Py_ssize_t me_hash; /* cached hash code of me_key */ PyObject *me_key; PyObject *me_value;} PyDictEntry;
2. PyDictObject对象结构(关联容器dict)
由源码可知,该对象是一个定长对象,初始时会分配一个8个entry的数组ma_smalltable
#define PyDict_MINSIZE 8typedef struct _dictobject PyDictObject;struct _dictobject { PyObject_HEAD Py_ssize_t ma_fill; //entry个数: Active + Dummy Py_ssize_t ma_used; //entry个数: Active Py_ssize_t ma_mask; //ma_table能容纳元素数,搜索时用以对hash值做与操作 PyDictEntry *ma_table; //entry超过8个时会分配较大数组,指针指向该数组 PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash);//查询函数 PyDictEntry ma_smalltable[PyDict_MINSIZE];//默认存在的小entry数组避免频繁分配内存};
3.PyDictObject对象的创建
typedef PyDictEntry dictentry;typedef PyDictObject dictobject;/* 将ma_table指向ma_smalltable 并初始化ma_mask */#define INIT_NONZERO_DICT_SLOTS(mp) do { \ (mp)->ma_table = (mp)->ma_smalltable; \ (mp)->ma_mask = PyDict_MINSIZE - 1; \ } while(0)/* 将ma_smalltable清零,重置ma_used和ma_fill并调用INIT_NONZERO_DICT_SLOTS */#define EMPTY_TO_MINSIZE(mp) do { \ memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \ (mp)->ma_used = (mp)->ma_fill = 0; \ INIT_NONZERO_DICT_SLOTS(mp); \ } while(0)PyObject *PyDict_New(void){ register PyDictObject *mp; if (dummy == NULL) { /* Auto-initialize dummy */ dummy = PyString_FromString("<dummy key>"); if (dummy == NULL) return NULL;#ifdef SHOW_CONVERSION_COUNTS Py_AtExit(show_counts);#endif#ifdef SHOW_ALLOC_COUNT Py_AtExit(show_alloc);#endif#ifdef SHOW_TRACK_COUNT Py_AtExit(show_track);#endif } if (numfree) { /* 判断dict缓冲池是否可用 */ mp = free_list[--numfree]; assert (mp != NULL); assert (Py_TYPE(mp) == &PyDict_Type); _Py_NewReference((PyObject *)mp); if (mp->ma_fill) { /* 检查ma_fill判断是否需要EMPTY_TO_MINSIZE */ EMPTY_TO_MINSIZE(mp); } else { /* 否则至少需要进行INIT_NONZERO_DICT_SLOTS操作 */ INIT_NONZERO_DICT_SLOTS(mp); } assert (mp->ma_used == 0); assert (mp->ma_table == mp->ma_smalltable); assert (mp->ma_mask == PyDict_MINSIZE - 1);#ifdef SHOW_ALLOC_COUNT count_reuse++;#endif /* 缓冲池不可用时,进行内存分配操作 */ } else { mp = PyObject_GC_New(PyDictObject, &PyDict_Type); if (mp == NULL) return NULL; EMPTY_TO_MINSIZE(mp);#ifdef SHOW_ALLOC_COUNT count_alloc++;#endif } mp->ma_lookup = lookdict_string;#ifdef SHOW_TRACK_COUNT count_untracked++;#endif#ifdef SHOW_CONVERSION_COUNTS ++created;#endif return (PyObject *)mp;}
4.entry的搜索
大致流程如下
首先寻找第一个entry:
- 通过hash & mask获取索引i,在ma_table[i]处取出该entry对象
- 根据该entry的key产生2种可能:
- ep->key==NULL 搜索失败返回该entry
- ep->key == key 搜索成功返回该entry
- 如果me_key == dummy,令freeslot = ep
- 检查active态的entry,判断hash是否相同,若相同则继续比较key值是否相同
- 失败的话继续寻找下一个散列位置,这样迭代下去
static PyDictEntry *lookdict(PyDictObject *mp, PyObject *key, register long hash){ register size_t i; register size_t perturb; register PyDictEntry *freeslot; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; register int cmp; PyObject *startkey; i = (size_t)hash & mask; //与运算防止溢出 ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key)//搜索成功(引用相同)或搜索失败(unused) return ep; if (ep->me_key == dummy) //key为dummy,置位freeslot,便于服用内存 freeslot = ep; else { /* active态的查询 */ if (ep->me_hash == hash) {//先判断hash startkey = ep->me_key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);//再判断key值是否相同 Py_DECREF(startkey); if (cmp < 0) return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) return ep; } else { /* The compare did major nasty stuff to the * dict: start over. * XXX A clever adversary could prevent this * XXX from terminating. */ return lookdict(mp, key, hash); } } freeslot = NULL; } /* In the loop, me_key == dummy is by far (factor of 100s) the least likely outcome, so test for that last. */ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1;//重定位下一位置 ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key) return ep; if (ep->me_hash == hash && ep->me_key != dummy) { startkey = ep->me_key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; if (ep0 == mp->ma_table && ep->me_key == startkey) { if (cmp > 0) return ep; } else { /* The compare did major nasty stuff to the * dict: start over. * XXX A clever adversary could prevent this * XXX from terminating. */ return lookdict(mp, key, hash); } } else if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } assert(0); /* NOT REACHED */ return 0;}
lookdict_string默认搜索方法
[dictobject.c]static dictentry* lookdict_string(dictobject *mp, PyObject *key, register long hash){ register int i; register unsigned int perturb; register dictentry *freeslot; register unsigned int mask = mp->ma_mask; dictentry *ep0 = mp->ma_table; register dictentry *ep; if (!PyString_CheckExact(key)) { //判断key是否为string类型,若非则返回传统搜索方式 mp->ma_lookup = lookdict; return lookdict(mp, key, hash); } i = hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; if (ep->me_key == dummy) freeslot = ep; else { //string默认策略主要不同在此处,判断函数较为轻量 if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key)) { return ep; } freeslot = NULL; } //搜索第二阶段:遍历冲突链,检查每一个entry for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key || (ep->me_hash == hash && ep->me_key != dummy && _PyString_Eq(ep->me_key, key))) return ep; if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; }}
5.元素插入
insertdict函数:
该函数关心ma_lookup返回的对象类型,决定插入的策略
[dictobject.c]static void insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value){ PyObject *old_value; register dictentry *ep; ep = mp->ma_lookup(mp, key, hash); /*搜索成功*/ if (ep->me_value != NULL) { old_value = ep->me_value; ep->me_value = value; Py_DECREF(old_value); Py_DECREF(key); } /* 搜索失败,返回的值可能是unused或dummy*/ else { if (ep->me_key == NULL) //为unused时ma_fill++ mp->ma_fill++; else //否则为dummy Py_DECREF(ep->me_key); ep->me_key = key; ep->me_hash = hash; ep->me_value = value; mp->ma_used++; }}
PyDict_SetItem函数:
insertdict函数被该函数调用,该函数主要关心取得hash值
[dictobject.c]int PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value){ register dictobject *mp; register long hash; register Py_ssize_t n_used; mp = (dictobject *)op; //[1]:计算hash值 if (PyString_CheckExact(key)) { hash = ((PyStringObject *)key)->ob_shash; if (hash == -1) hash = PyObject_Hash(key); } else { hash = PyObject_Hash(key); if (hash == -1) return -1; } //[2]:插入(key, value)元素对 n_used = mp->ma_used; insertdict(mp, key, hash, value); //[3]:必要时调整dict的内存空间,实际上为判断装填率是否大于2/3 if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) return 0; return dictresize(mp, mp->ma_used * (mp->ma_used>50000 ? 2 : 4));}
dictresize函数
该函数关心字典列表的调整,根据需求使用ma_smalltable或重新分配新的空间,旧空间的Acttive entry依次插入新空间中,dummy趁机释放掉
[dictobject.c]static int dictresize(dictobject *mp, int minused){ Py_ssize_t newsize; dictentry *oldtable, *newtable, *ep; Py_ssize_t i; int is_oldtable_malloced; dictentry small_copy[PyDict_MINSIZE]; //[1]:确定新的table的大小 for(newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) ; oldtable = mp->ma_table; is_oldtable_malloced = (oldtable != mp->ma_smalltable); //[2]: 新的table可以使用mp->ma_smalltable if (newsize == PyDict_MINSIZE) { newtable = mp->ma_smalltable; if (newtable == oldtable) { if (mp->ma_fill == mp->ma_used) { //没有任何Dummy态entry,直接返回 return 0; } //将旧table拷贝,进行备份 memcpy(small_copy, oldtable, sizeof(small_copy)); oldtable = small_copy; } } //[3]: 新的table不能使用mp->ma_smalltable,需要在系统堆上申请 else { newtable = PyMem_NEW(dictentry, newsize); } //[4]:设置新table mp->ma_table = newtable; mp->ma_mask = newsize - 1; memset(newtable, 0, sizeof(dictentry) * newsize); mp->ma_used = 0; i = mp->ma_fill; mp->ma_fill = 0; //[5]:处理旧table中的entry: // 1、Active态entry,搬移到新table中 // 2、Dummy态entry,调整key的引用计数,丢弃该entry for (ep = oldtable; i > 0; ep++) { if (ep->me_value != NULL) { /* active entry */ --i; insertdict(mp, ep->me_key, ep->me_hash, ep->me_value); } else if (ep->me_key != NULL) { /* dummy entry */ --i; assert(ep->me_key == dummy); Py_DECREF(ep->me_key); } } //[6]:必要时释放旧table所维护的内存空间 if (is_oldtable_malloced) PyMem_DEL(oldtable); return 0;}
6.删除元素
[dictobject.c]int PyDict_DelItem(PyObject *op, PyObject *key){ register dictobject *mp; register long hash; register dictentry *ep; PyObject *old_value, *old_key; //[1]:获得hash值 if (!PyString_CheckExact(key) || (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; } //[2]:搜索entry mp = (dictobject *)op; ep = (mp->ma_lookup)(mp, key, hash); if (ep->me_value == NULL) { //搜索失败,entry不存在 return -1; } //[3]:删除entry所维护的元素,将entry的状态转为dummy态 old_key = ep->me_key; ep->me_key = dummy; old_value = ep->me_value; ep->me_value = NULL; mp->ma_used--; Py_DECREF(old_value); Py_DECREF(old_key); return 0;}
7.字典缓冲池
与列表对象相似,也是待删除该字典对象时尝试加入到缓冲区。加入前进行一系列的清理动作。
[dictobject.c]static void dict_dealloc(register dictobject *mp){ register dictentry *ep; Py_ssize_t fill = mp->ma_fill; //[1]:调整dict中对象的引用计数 for (ep = mp->ma_table; fill > 0; ep++) { if (ep->me_key) { --fill; Py_DECREF(ep->me_key); Py_XDECREF(ep->me_value); } } //[2] :释放从系统堆中申请的内存空间 if (mp->ma_table != mp->ma_smalltable) PyMem_DEL(mp->ma_table); //[3] :将被销毁的PyDictObject对象放入缓冲池 if (num_free_dicts < MAXFREEDICTS && mp->ob_type == &PyDict_Type) free_dicts[num_free_dicts++] = mp; else mp->ob_type->tp_free((PyObject *)mp);}
0 0
- Python源码学习笔记 5 字典对象
- python学习笔记 字典
- python-字典学习笔记
- python字典学习笔记
- Python 学习笔记-字典
- Python学习笔记-字典
- Python源码学习笔记 2 整数对象
- Python源码学习笔记 3 字符串对象
- Python源码学习笔记 4 列表对象
- python 学习笔记之字典
- Python学习笔记之字典
- python 学习笔记6------字典
- python 学习笔记之字典
- Python学习笔记3(字典)
- python学习笔记4-字典
- Python学习笔记(2)字典
- Python源码学习笔记 1 简述python对象
- 《Redis源码学习笔记》数据结构-字典
- JS设计模式七:装饰者模式
- 剑指offer数学题
- [leetCode刷题笔记]475. Heaters
- JS设计模式八:观察者模式
- ida动态调试之so层反调试入门篇
- Python源码学习笔记 5 字典对象
- linux的一些常用命令
- Android SharedPreferences,Sqlite,ContentProvider,File
- jquery操作连接式书写
- Windows 64位下Theano新Gpu Backend的安装方法
- Oracle分页查询
- 复数相加减(对象的拷贝构造函数)
- 深入理解C++中的异常处理机制
- 回文数(难度系数:半颗星)