Python中的 type() 和 __class__

来源:互联网 发布:js div style 编辑:程序博客网 时间:2024/06/04 20:24

来源:http://yunyuan.github.io/python/2014/08/19/python-type-class/


最近在公司内部的问答系统上有同事问了一个问题:Python中的type()__class__有什么差别?

  1. >>> class Foo(object):
  2. pass
  3. >>> class Bar(object):
  4. pass
  5. >>> class Brion(object):
  6. pass
  7. >>> class ASML(object):
  8. __class__ = Foo
  9. >>> b = Bar()
  10. >>> a = ASML()

Case 1

  1. >>> b.__class__, type(b)
  2. (<class '__main__.Bar'>, <class '__main__.Bar'>)
  3. >>>
  4. >>> b.__class__ = Foo
  5. >>>
  6. >>> b.__class__, type(b)
  7. (<class '__main__.Foo'>, <class '__main__.Foo'>)

Case 2

  1. >>> a.__class__, type(a)
  2. (<class '__main__.Foo'>, <class '__main__.ASML'>)
  3. >>>
  4. >>> a.__class__ = Brion
  5. >>>
  6. >>> a.__class__, type(a)
  7. (<class '__main__.Brion'>, <class '__main__.ASML'>)

大家看出Case 1Case 2的差别了吧。问题来了:
1. type(obj)到底做了些什么事情?
2. 为什么a在改变__class__后,type(a)还是ASML呢?

为了解决这些问题,我们需要深入Python源代码。以下源代码来自Python 2.7.8

Python Object

Python中一切皆对象。所有对象的数据结构都以一个PyObject_HEAD开头。

object.h

  1. typedef struct _object {
  2. PyObject_HEAD
  3. } PyObject;
  4. /* PyObject_HEAD defines the initial segment of every PyObject. */
  5. #define PyObject_HEAD \
  6. _PyObject_HEAD_EXTRA \
  7. Py_ssize_t ob_refcnt; \
  8. struct _typeobject *ob_type;

这里的ob_refcnt是用来做引用计数的,而ob_type则是对象所对应的type对象。

type对象包含了很多关于对象的元信息:类型名字(tp_name),创建该类型对象时分配内存空间大小的信息(tp_basicsizetp_itemsize),一些操作信息(tp_call,tp_new等),还有其他如__mro__(tp_mro), __bases__(tp_bases)等。

object.h

  1. typedef struct _typeobject {
  2. PyObject_VAR_HEAD
  3. const char *tp_name; /* For printing, in format "<module>.<name>" */
  4. Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
  5. ...
  6. /* More standard operations (here for binary compatibility) */
  7. ternaryfunc tp_call;
  8. ...
  9. /* Attribute descriptor and subclassing stuff */
  10. PyObject *tp_dict;
  11. newfunc tp_new;
  12. PyObject *tp_bases;
  13. PyObject *tp_mro; /* method resolution order */
  14. PyObject *tp_subclasses;
  15. ...
  16. ...
  17. } PyTypeObject;

对于对象f = Foo()来说,它的ob_type就是Foo, 而Fooob_type则是type

type(obj)到底做了些什么事情

上面提到的每个对象都有的ob_type其实就是type(obj)返回的对象。

我们看下运行type(obj)时调用到的一系列函数。

abstract.c

  1. PyObject *
  2. PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw)
  3. {
  4. ternaryfunc call;
  5. if ((call = func->ob_type->tp_call) != NULL) {
  6. PyObject *result;
  7. if (Py_EnterRecursiveCall(" while calling a Python object"))
  8. return NULL;
  9. result = (*call)(func, arg, kw);
  10. ...
  11. return result;
  12. }
  13. ...
  14. return NULL;
  15. }

这里的funcobjob_type,以f = Foo()为例的话就是Foo。那func->ob_type当然就是type了。

typeobject.c

  1. PyTypeObject PyType_Type = {
  2. PyVarObject_HEAD_INIT(&PyType_Type, 0)
  3. "type", /* tp_name */
  4. ...
  5. (ternaryfunc)type_call, /* tp_call */
  6. ...
  7. type_new, /* tp_new */
  8. ...
  9. }

从上面PyType_Type的定义可以看到typetp_call就是type_call函数:

typeobject.c

  1. static PyObject *
  2. type_call(PyTypeObject *type, PyObject *args, PyObject *kwds)
  3. {
  4. PyObject *obj;
  5. if (type->tp_new == NULL) {
  6. PyErr_Format(PyExc_TypeError,
  7. "cannot create '%.100s' instances",
  8. type->tp_name);
  9. return NULL;
  10. }
  11. obj = type->tp_new(type, args, kwds);
  12. if (obj != NULL) {
  13. /* Ugly exception: when the call was type(something),
  14. don't call tp_init on the result. */
  15. if (type == &PyType_Type &&
  16. PyTuple_Check(args) && PyTuple_GET_SIZE(args) == 1 &&
  17. (kwds == NULL ||
  18. (PyDict_Check(kwds) && PyDict_Size(kwds) == 0)))
  19. return obj; // Yun: type(obj) returns from here
  20. /* If the returned object is not an instance of type,
  21. it won't be initialized. */
  22. if (!PyType_IsSubtype(obj->ob_type, type))
  23. return obj;
  24. type = obj->ob_type;
  25. if (PyType_HasFeature(type, Py_TPFLAGS_HAVE_CLASS) &&
  26. type->tp_init != NULL &&
  27. type->tp_init(obj, args, kwds) < 0) {
  28. Py_DECREF(obj);
  29. obj = NULL;
  30. }
  31. }
  32. return obj; // Yun: type(cls, bases, dict) returns from here

type_call中先调用tp_new指向的函数(type_new),然后再做分支。对type(obj)调用来说就是直接返回tp_new得到的对象。而对type(cls, bases, dict)来说还会调用tp_init指向的函数,这在自定义metaclass时会用到。

typeobject.c

  1. static PyObject *
  2. type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
  3. {
  4. ...
  5. /* Special case: type(x) should return x->ob_type */
  6. {
  7. const Py_ssize_t nargs = PyTuple_GET_SIZE(args);
  8. const Py_ssize_t nkwds = kwds == NULL ? 0 : PyDict_Size(kwds);
  9. if (PyType_CheckExact(metatype) && nargs == 1 && nkwds == 0) {
  10. PyObject *x = PyTuple_GET_ITEM(args, 0);
  11. Py_INCREF(Py_TYPE(x));
  12. return (PyObject *) Py_TYPE(x);
  13. }
  14. ...
  15. }
  16. ...
  17. }

object.h

  1. #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)

可见type(obj)其实就是返回对象的ob_type

为什么a在改变__class__后,type(a)还是ASML

要回答这个问题,我们要先回顾下通过obj.xxx查找对象的 attribute 时的搜索顺序:
1. type对象及其基类的__dict__。如果是 data descriptor,返回这个 data descriptor的 __get__ 结果
2. obj的__dict__
3. 第一步中找到的如果是 non-data descriptor, 返回这个 non-data descriptor的 __get__ 结果
4. type对象中的__dict__,也就是直接返回第一步中找到的对象

obj.__class__就是一个 attribute 查找。

typeobject.c

  1. static PyGetSetDef object_getsets[] = {
  2. {"__class__", object_get_class, object_set_class,
  3. PyDoc_STR("the object's class")},
  4. {0}
  5. };
  6. PyTypeObject PyBaseObject_Type = {
  7. PyVarObject_HEAD_INIT(&PyType_Type, 0)
  8. "object", /* tp_name */
  9. ...
  10. PyObject_GenericGetAttr, /* tp_getattro */
  11. PyObject_GenericSetAttr, /* tp_setattro */
  12. ...
  13. object_getsets, /* tp_getset */
  14. ...
  15. };

来看下f = Foo(); f.__class__;中的函数调用链:

object.c

  1. PyObject *
  2. PyObject_GetAttr(PyObject *v, PyObject *name)
  3. {
  4. PyTypeObject *tp = Py_TYPE(v);
  5. ...
  6. if (tp->tp_getattro != NULL)
  7. return (*tp->tp_getattro)(v, name);
  8. ...
  9. return NULL;
  10. }

这里的v就是f, 而tp就是Foo, tp->tp_getattro就是PyObject_GenericGetAttr函数。

object.c

  1. PyObject *
  2. PyObject_GenericGetAttr(PyObject *obj, PyObject *name)
  3. {
  4. return _PyObject_GenericGetAttrWithDict(obj, name, NULL);
  5. }
  6. PyObject *
  7. _PyObject_GenericGetAttrWithDict(PyObject *obj, PyObject *name, PyObject *dict)
  8. {
  9. PyTypeObject *tp = Py_TYPE(obj);
  10. PyObject *descr = NULL;
  11. PyObject *res = NULL;
  12. ...
  13. descr = _PyType_Lookup(tp, name);
  14. Py_XINCREF(descr);
  15. f = NULL;
  16. if (descr != NULL &&
  17. PyType_HasFeature(descr->ob_type, Py_TPFLAGS_HAVE_CLASS)) {
  18. f = descr->ob_type->tp_descr_get;
  19. if (f != NULL && PyDescr_IsData(descr)) {
  20. res = f(descr, obj, (PyObject *)obj->ob_type);
  21. Py_DECREF(descr);
  22. goto done;
  23. }
  24. }
  25. ...
  26. return res;
  27. }
  28. PyObject *
  29. _PyType_Lookup(PyTypeObject *type, PyObject *name)
  30. {
  31. Py_ssize_t i, n;
  32. PyObject *mro, *res, *base, *dict;
  33. unsigned int h;
  34. ...
  35. /* Look in tp_dict of types in MRO */
  36. mro = type->tp_mro;
  37. ...
  38. res = NULL;
  39. assert(PyTuple_Check(mro));
  40. n = PyTuple_GET_SIZE(mro);
  41. for (i = 0; i < n; i++) {
  42. base = PyTuple_GET_ITEM(mro, i);
  43. if (PyClass_Check(base))
  44. dict = ((PyClassObject *)base)->cl_dict;
  45. else {
  46. assert(PyType_Check(base));
  47. dict = ((PyTypeObject *)base)->tp_dict;
  48. }
  49. assert(dict && PyDict_Check(dict));
  50. res = PyDict_GetItem(dict, name);
  51. if (res != NULL)
  52. break;
  53. }
  54. ...
  55. return res;
  56. }

_PyObject_GenericGetAttrWithDict中先调用_PyType_LookupFootp_mro中查到__class__属性(来自Foo的基类object),该属性是一个data descriptor,最终调用了object_get_class

typeobject.c

  1. static PyObject *
  2. object_get_class(PyObject *self, void *closure)
  3. {
  4. Py_INCREF(Py_TYPE(self));
  5. return (PyObject *)(Py_TYPE(self));
  6. }

object_get_class函数可以看出,对于f = Foo(); f.__class__;来说也是返回的Foo对象的ob_type。这就解释了Case 1中为什么type(b)b.__class__是相等的。___

那为什么Case 2中的type(a)a.__class__不相等呢?

因为Case 1中没有自定义__class__,所以查找__class__时在Bar中没找到,接着就去Bar的基类object中找,正好object中定义了一个__class__的 data descriptor, 就返回这个。

而在Case 2中我们自定义了__class__,所以在ASML.__dict__中找到有这个 attribute 后就返回了,不会再去找mro中的下一个(object)。但是这里找到的这个 attribute 不是 data descriptor,根据前面提到的 attribute 搜索顺序,我们接着在a.__dict__中找,也没有,那就直接返回ASML中的找到的那个了。___

为什么设置__class__后,Case 1和Case 2有差别

obj.xxx = yyy 设置attribute时的顺序
1. 先从type对象及其基类的__dict__中查找该 attribute,找到就返回。如果找到的是 data descriptor,则用该 data descriptor的__set__来设置
2. 否则添加到obj.__dict__

Case 1中得到的__class__是一个 data descriptor,给它赋值实际上调用的是object_set_class函数。

typeobject.c

  1. static int
  2. object_set_class(PyObject *self, PyObject *value, void *closure)
  3. {
  4. PyTypeObject *oldto = Py_TYPE(self);
  5. PyTypeObject *newto;
  6. ...
  7. newto = (PyTypeObject *)value;
  8. ...
  9. if (compatible_for_assignment(newto, oldto, "__class__")) {
  10. Py_INCREF(newto);
  11. Py_TYPE(self) = newto;
  12. Py_DECREF(oldto);
  13. return 0;
  14. }
  15. else {
  16. return -1;
  17. }
  18. }

从该函数的实现可以看到b.__class__ = Foo实际上会把bob_type设为Foo。所以赋值后type(b)b.__class__都跟着变了。

  1. >>> b.__dict__
  2. {}
  3. >>> Bar.__dict__
  4. dict_proxy({'__dict__': <attribute '__dict__' of 'Bar' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'Bar' objects>, '__doc__': None})
  5. >>>
  6. >>> b.__class__ = Foo
  7. >>>
  8. >>> b.__class__, type(b)
  9. (<class '__main__.Foo'>, <class '__main__.Foo'>)
  10. >>> b.__dict__
  11. {}
  12. >>> Bar.__dict__
  13. dict_proxy({'__dict__': <attribute '__dict__' of 'Bar' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'Bar' objects>, '__doc__': None})

Case 2中得到的__class__不是 data descriptor。所以a.__class__ = Foo会在a.__dict__中添加一条记录,而aob_type不会变,ASML.__dict__也不会变。

  1. >>> a.__dict__
  2. {}
  3. >>> ASML.__dict__
  4. dict_proxy({'__dict__': <attribute '__dict__' of 'ASML' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'ASML' objects>, '__class__': <class '__main__.Foo'>, '__doc__': None})
  5. >>>
  6. >>> a.__class__ = Brion
  7. >>>
  8. >>> a.__dict__
  9. {'__class__': <class '__main__.Brion'>}
  10. >>> ASML.__dict__
  11. dict_proxy({'__dict__': <attribute '__dict__' of 'ASML' objects>, '__module__': '__main__', '__weakref__': <attribute '__weakref__' of 'ASML' objects>, '__class__': <class '__main__.Foo'>, '__doc__': None})

以上的讨论都是基于new style class。对于old style class来说,type()不等于__class__:

  1. >>> class A():
  2. pass
  3. >>> a = A()
  4. >>> a.__class__, type(a)
  5. (<class __main__.A at 0x0270CFB8>, <type 'instance'>)

另外一个关于 isintance(obj, cls) 的问题

  1. >>> class Foo(object):
  2. pass
  3. >>> class ASML(object):
  4. __class__ = Foo
  5. >>> a = ASML()
  6. >>> isinstance(a, Foo)
  7. True
  8. >>> isinstance(a, ASML)
  9. True

为什么这里的两个isinstance都返回True呢?

object.h

  1. #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
  2. #bltinmodule.c
  3. static PyMethodDef builtin_methods[] = {
  4. ...
  5. {"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc},
  6. ...
  7. }
  8. static PyObject *
  9. builtin_isinstance(PyObject *self, PyObject *args)
  10. {
  11. PyObject *inst;
  12. PyObject *cls;
  13. int retval;
  14. if (!PyArg_UnpackTuple(args, "isinstance", 2, 2, &inst, &cls))
  15. return NULL;
  16. retval = PyObject_IsInstance(inst, cls);
  17. if (retval < 0)
  18. return NULL;
  19. return PyBool_FromLong(retval);
  20. }

最终isinstance(obj, cls)会调用PyObject_IsInstance

abstract.c

  1. int
  2. PyObject_IsInstance(PyObject *inst, PyObject *cls) // Yun: "isinstance(obj, cls)" will call it
  3. {
  4. static PyObject *name = NULL;
  5. /* Quick test for an exact match */
  6. if (Py_TYPE(inst) == (PyTypeObject *)cls) // Yun: "isinstacne(b, ASML)" returns True
  7. return 1;
  8. ...
  9. if (!(PyClass_Check(cls) || PyInstance_Check(cls))) {
  10. PyObject *checker;
  11. checker = _PyObject_LookupSpecial(cls, "__instancecheck__", &name);
  12. ...
  13. res = PyObject_CallFunctionObjArgs(checker, inst, NULL); // Yun: "isinstance(b, Foo)" call recursive_isinstance() and returns True
  14. if (res != NULL) {
  15. ok = PyObject_IsTrue(res);
  16. ...
  17. }
  18. return ok;
  19. }
  20. return recursive_isinstance(inst, cls);
  21. }
  22. static int
  23. recursive_isinstance(PyObject *inst, PyObject *cls)
  24. {
  25. ...
  26. static PyObject *__class__ = NULL;
  27. int retval = 0;
  28. if (__class__ == NULL) {
  29. __class__ = PyString_InternFromString("__class__");
  30. if (__class__ == NULL)
  31. return -1;
  32. }
  33. ...
  34. if (PyClass_Check(cls) && PyInstance_Check(inst)) {
  35. ...
  36. }
  37. else if (PyType_Check(cls)) {
  38. retval = PyObject_TypeCheck(inst, (PyTypeObject *)cls);
  39. if (retval == 0) {
  40. PyObject *c = PyObject_GetAttr(inst, __class__);
  41. ...
  42. retval = PyType_IsSubtype(
  43. (PyTypeObject *)c,
  44. (PyTypeObject *)cls); // Yun: Both "c" and "cls" are "Foo" here
  45. }
  46. ...
  47. return retval;
  48. }

__instancecheck__recursive_isinstance:

typeobject.c

  1. static PyMethodDef type_methods[] = {
  2. {"mro", (PyCFunction)mro_external, METH_NOARGS,
  3. PyDoc_STR("mro() -> list\nreturn a type's method resolution order")},
  4. {"__subclasses__", (PyCFunction)type_subclasses, METH_NOARGS,
  5. PyDoc_STR("__subclasses__() -> list of immediate subclasses")},
  6. {"__instancecheck__", type___instancecheck__, METH_O,
  7. PyDoc_STR("__instancecheck__() -> bool\ncheck if an object is an instance")},
  8. {"__subclasscheck__", type___subclasscheck__, METH_O,
  9. PyDoc_STR("__subclasscheck__() -> bool\ncheck if a class is a subclass")},
  10. {0}
  11. };
  12. static PyObject *
  13. type___instancecheck__(PyObject *type, PyObject *inst)
  14. {
  15. switch (_PyObject_RealIsInstance(inst, type)) {
  16. case -1:
  17. return NULL;
  18. case 0:
  19. Py_RETURN_FALSE;
  20. default:
  21. Py_RETURN_TRUE;
  22. }
  23. }
  24. int
  25. _PyObject_RealIsInstance(PyObject *inst, PyObject *cls)
  26. {
  27. return recursive_isinstance(inst, cls);
  28. }

简单来说就是isinstance(obj, cls)会先看ob_type,然后在看__class__

0 0