vector源码剖析

来源：互联网发布：win制作mac安装u盘编辑：程序博客网时间：2024/06/05 11:12

vector

前导准备

源码位置
* C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include

源码版本

Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
Consult your license regarding permissions and restrictions.
V5.20:0009

类定义

前导定义

template<class _Ty,    class _Ax = allocator<_Ty> >    class vector        : public _Vector_val<_Ty, _Ax>    {   // varying size array of valuespublic:    typedef vector<_Ty, _Ax> _Myt;    typedef _Vector_val<_Ty, _Ax> _Mybase;    typedef typename _Mybase::_Alty _Alloc;    typedef _Alloc allocator_type;    typedef typename _Alloc::size_type size_type;    typedef typename _Alloc::difference_type difference_type;    typedef typename _Alloc::pointer pointer;    typedef typename _Alloc::const_pointer const_pointer;    typedef typename _Alloc::reference reference;    typedef typename _Alloc::const_reference const_reference;    typedef typename _Alloc::value_type value_type;    typedef _Vector_iterator<_Mybase> iterator;    typedef _Vector_const_iterator<_Mybase> const_iterator;    typedef _STD reverse_iterator<iterator> reverse_iterator;    typedef _STD reverse_iterator<const_iterator> const_reverse_iterator;    .......

上面是vector 类开始处的代码片段。重点分析如下：
* allocator是专门的内存分配器，所有的内存分配的相关内容经由alloctor来处理。
* 针对基类的分析，_Vector_val，这里面存储着vector的操作数据
* 形如typedef typename 的定义
* 迭代器iterator的定义

allocator

我们可以在vc\crt\src看到如下这个宏定义，微软的编译器中默认这个_ALLOCATOR类是它的默认内存分配器，allocator是标准上的一个要求，当然我们可以实现不一样的内存分配器。

定义

 #define _ALLOCATOR allocator

接着我们看看这个类的定义

        // TEMPLATE CLASS _ALLOCATORtemplate<class _Ty>    class _ALLOCATOR        : public _Allocator_base<_Ty>    {   // generic allocator for objects of class _Typublic:    typedef _Allocator_base<_Ty> _Mybase;    typedef typename _Mybase::value_type value_type;    typedef value_type _FARQ *pointer;    typedef value_type _FARQ& reference;    typedef const value_type _FARQ *const_pointer;    typedef const value_type _FARQ& const_reference;    typedef _SIZT size_type;    typedef _PDFT difference_type;    };

依旧是使用typedef来定义出必要的类型，以备后续使用。
内部重要的函数是四个，构造析构函数，申请释放函数,我们先看申请和释放函数

申请

pointer allocate(size_type _Count)    {   // allocate array of _Count elements        return (_Allocate(_Count, (pointer)0));    }template<class _Ty> inline    _Ty _FARQ *_Allocate(_SIZT _Count, _Ty _FARQ *)    {   // allocate storage for _Count elements of type _Ty    void *_Ptr = 0;    if (_Count <= 0)        _Count = 0;    else if (((_SIZT)(-1) / sizeof (_Ty) < _Count)        || (_Ptr = ::operator new(_Count * sizeof (_Ty))) == 0)        _THROW_NCEE(bad_alloc, 0);    return ((_Ty _FARQ *)_Ptr);    }

我们来详细分析这个new的操作过程。

        || (_Ptr = ::operator new(_Count * sizeof (_Ty))) == 0)69BF0DC4  cmp         dword ptr [_Count],1FFFFFFFh  69BF0DCB  ja          std::_Allocate<std::pair<CString,CString> >+55h (69BF0DE5h)  69BF0DCD  mov         eax,dword ptr [_Count]  69BF0DD0  shl         eax,3  69BF0DD3  push        eax  69BF0DD4  call        operator new (69BDCC07h)  69BF0DD9  add         esp,4  69BF0DDC  mov         dword ptr [_Ptr],eax  69BF0DDF  cmp         dword ptr [_Ptr],0  69BF0DE3  jne         std::_Allocate<std::pair<CString,CString> >+73h (69BF0E03h)

在69BF0DD4进入的跳转

operator new:69BDCC07  jmp         operator new (69D5FD52h) 69D5FD52  jmp         dword ptr [__imp_operator new (69DDA6F0h)]void *__CRTDECL operator new(size_t size) _THROW1(_STD bad_alloc)        {       // try to allocate size bytes6EA57DA0  mov         edi,edi  6EA57DA2  push        ebp  6EA57DA3  mov         ebp,esp  6EA57DA5  sub         esp,10h          void *p;        while ((p = malloc(size)) == 0)6EA57DA8  mov         eax,dword ptr [size]  6EA57DAB  push        eax  6EA57DAC  call        malloc (6EA69C40h)  6EA57DB1  add         esp,4  6EA57DB4  mov         dword ptr [p],eax  6EA57DB7  cmp         dword ptr [p],0  6EA57DBB  jne         operator new+79h (6EA57E19h)

这是一个内存申请的new。

释放

    void deallocate(pointer _Ptr, size_type)        {   // deallocate object at _Ptr, ignore size        ::operator delete(_Ptr);        }

申请和释放函数，进行一定的参数判断后，直接使用operator delete和new来进行内存的申请。

构造

然后我们来研究一下构造和析构函数。如下所示

void construct(pointer _Ptr, const _Ty& _Val)    {   // construct object at _Ptr with value _Val        _Construct(_Ptr, _Val);    }        // TEMPLATE FUNCTION _Constructtemplate<class _Ty1,    class _Ty2> inline    void _Construct(_Ty1 _FARQ *_Ptr, _Ty2&& _Val)    {   // construct object at _Ptr with value _Val    void _FARQ *_Vptr = _Ptr;    ::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val));    }template<class _Ty1> inline    void _Construct(_Ty1 _FARQ *_Ptr)    {   // construct object at _Ptr with default value    void _FARQ *_Vptr = _Ptr;    ::new (_Vptr) _Ty1();    }

构造函数主要是通过replacement new来进行操作，在指定地址处构造数据
我们来详细分析一下这个::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val))的过程

我们来分析一下::new的汇编指令，来一探究竟。

    ::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val));69BF16D3  mov         eax,dword ptr [_Vptr]  69BF16D6  push        eax  69BF16D7  push        8  69BF16D9  call        operator new (69BE2AFDh)  69BF16DE  add         esp,8  69BF16E1  mov         dword ptr [ebp-0E0h],eax  69BF16E7  mov         dword ptr [ebp-4],0  69BF16EE  cmp         dword ptr [ebp-0E0h],0  69BF16F5  je          std::_Construct<std::pair<CString,CString>,std::pair<CString,CString> const &>+87h (69BF1717h)  69BF16F7  mov         ecx,dword ptr [_Val]  69BF16FA  push        ecx  69BF16FB  call        std::forward<std::pair<CString,CString> const &> (69BE1ECDh)  69BF1700  add         esp,4  69BF1703  push        eax  69BF1704  mov         ecx,dword ptr [ebp-0E0h]  69BF170A  call        std::pair<CString,CString>::pair<CString,CString> (69BDFB8Ch)  69BF170F  mov         dword ptr [ebp-0F4h],eax  69BF1715  jmp         std::_Construct<std::pair<CString,CString>,std::pair<CString,CString> const &>+91h (69BF1721h)  69BF1717  mov         dword ptr [ebp-0F4h],0  69BF1721  mov         edx,dword ptr [ebp-0F4h]  69BF1727  mov         dword ptr [ebp-0ECh],edx  69BF172D  mov         dword ptr [ebp-4],0FFFFFFFFh

我们在69BF16D9处进入call，看看它执行到了哪里

operator new:69BE2AFD  jmp         operator new (69BF2810h) inline void *__CRTDECL operator new(size_t, void *_Where) _THROW0()    {   // construct array with placement at _Where69BF2810  push        ebp  69BF2811  mov         ebp,esp  69BF2813  sub         esp,0C0h  69BF2819  push        ebx  69BF281A  push        esi  69BF281B  push        edi  69BF281C  lea         edi,[ebp-0C0h]  69BF2822  mov         ecx,30h  69BF2827  mov         eax,0CCCCCCCCh  69BF282C  rep stos    dword ptr es:[edi]      return (_Where);69BF282E  mov         eax,dword ptr [_Where]      }69BF2831  pop         edi  69BF2832  pop         esi  69BF2833  pop         ebx  69BF2834  mov         esp,ebp  69BF2836  pop         ebp  69BF2837  ret

如上，这个是一个std的new操作符的一个operator的操作，函数中只是参数代表的地址。
然后返回上一个函数中，此时调用一下forward，接着调用某种类型的构造函数，我们的是pair的构造函数。

析构

void destroy(pointer _Ptr)    {   // destroy object at _Ptr        _Destroy(_Ptr);    }    // TEMPLATE FUNCTION _Destroytemplate<class _Ty> inline    void _Destroy(_Ty _FARQ *_Ptr)    {   // destroy object at _Ptr    _Ptr->~_Ty();    }template<> inline    void _Destroy(char _FARQ *)    {   // destroy a char (do nothing)    }template<> inline    void _Destroy(wchar_t _FARQ *)    {   // destroy a wchar_t (do nothing)    }

析构函数则是调用对象的析构函数，另外针对不同的数据类型进行不同的析构处理。此处使用了函数模板偏特化

_Vector_val基类

        // TEMPLATE CLASS _Vector_valtemplate<class _Ty,    class _Alloc>    class _Vector_val        : public _Container_base    {   // base class for vector to hold datapublic:    typedef typename _Alloc::template rebind<_Ty>::other _Alty; #if _ITERATOR_DEBUG_LEVEL == 0    _Vector_val(_Alloc _Al = _Alloc())        : _Alval(_Al)        {   // construct allocator from _Al        _Myfirst = 0;        _Mylast = 0;        _Myend = 0;        }    ~_Vector_val()        {   // destroy proxy        } #else /* _ITERATOR_DEBUG_LEVEL == 0 */    _Vector_val(_Alloc _Al = _Alloc())        : _Alval(_Al)        {   // construct allocator from _Al        typename _Alloc::template rebind<_Container_proxy>::other            _Alproxy(_Alval);        this->_Myproxy = _Alproxy.allocate(1);        _Cons_val(_Alproxy, this->_Myproxy, _Container_proxy());        this->_Myproxy->_Mycont = this;        _Myfirst = 0;        _Mylast = 0;        _Myend = 0;        }    ~_Vector_val()        {   // destroy proxy        typename _Alloc::template rebind<_Container_proxy>::other            _Alproxy(_Alval);        this->_Orphan_all();        _Dest_val(_Alproxy, this->_Myproxy);        _Alproxy.deallocate(this->_Myproxy, 1);        this->_Myproxy = 0;        } #endif /* _ITERATOR_DEBUG_LEVEL == 0 */    typedef typename _Alty::size_type size_type;    typedef typename _Alty::difference_type difference_type;    typedef typename _Alty::pointer pointer;    typedef typename _Alty::const_pointer const_pointer;    typedef typename _Alty::reference reference;    typedef typename _Alty::const_reference const_reference;    typedef typename _Alty::value_type value_type;    pointer _Myfirst;   // pointer to beginning of array    pointer _Mylast;    // pointer to current end of sequence    pointer _Myend; // pointer to end of array    _Alty _Alval;   // allocator object for values    };

这个基类中主要存储的是vector的数据操作指针，重要的是下面这个三个结构

pointer _Myfirst;   // pointer to beginning of arraypointer _Mylast;    // pointer to current end of sequencepointer _Myend; // pointer to end of array

其他函数对其进行初始化和销毁的处理，根据debug级别进行不同的处理，也使用了typedef来定义出具体的类型。

iterator

vector的迭代器分两种四类

typedef _Vector_iterator<_Mybase> iterator;typedef _Vector_const_iterator<_Mybase> const_iterator;typedef _STD reverse_iterator<iterator> reverse_iterator;typedef _STD reverse_iterator<const_iterator> const_reverse_iterator;

分是否const和正向或反向迭代器

vector 迭代器

_Vector_iterator定义

template<class _Myvec>    class _Vector_iterator        : public _Vector_const_iterator<_Myvec>    {   // iterator for mutable vectorpublic:    typedef _Vector_iterator<_Myvec> _Myiter;    typedef _Vector_const_iterator<_Myvec> _Mybase;    typedef random_access_iterator_tag iterator_category;    typedef typename _Myvec::value_type value_type;    typedef typename _Myvec::difference_type difference_type;    typedef typename _Myvec::pointer pointer;    typedef typename _Myvec::reference reference;    _Vector_iterator()        {   // construct with null vector pointer        }    _Vector_iterator(pointer _Parg, const _Container_base *_Pvector)        : _Mybase(_Parg, _Pvector)        {   // construct with pointer _Parg        }

定义中，有声明出此迭代器的类型random_access_iterator_tag，决定了当前迭代器能做的操作。数据类型value_type等等。以及定义出构造函数。
我们可以看看vector中的迭代器的构造，就可以知道vector向迭代器传递了哪些数据。

typedef _Vector_iterator<_Mybase> iterator;iterator begin(){       // return iterator for beginning of mutable sequence    return (iterator(this->_Myfirst, this));}

vector向迭代器传递了数据操作首地址和类操作this指针，将数据操作权递交给迭代器。_Vector_iterator将数据指针传递给基类存储。

_Vector_iterator对数据的操作

    ....    _Myiter operator-(difference_type _Off) const        {   // return this - integer        _Myiter _Tmp = *this;        return (_Tmp -= _Off);        }    difference_type operator-(const _Mybase& _Right) const        {   // return difference of iterators        return (*(_Mybase *)this - _Right);        }    reference operator[](difference_type _Off) const        {   // subscript        return (*(*this + _Off));        }    ....

那么它是如何对数据的操作的呢，如上所示，返回的类型就是类开始处已经定义好的类型之一。通过迭代器来操作具体容器的数据。如何操作以及类型都由迭代器做具体的处理。类型由typedef迭代器的时候指定要操作数据的类型，而如何操作则由具体的迭代器定义来定义出何种的访问方式。

vector迭代器基类

_Vector_iterator 继承自 _Vector_const_iterator 继承自 _Iterator012

        // TEMPLATE CLASS iteratortemplate<class _Category,    class _Ty,    class _Diff = ptrdiff_t,    class _Pointer = _Ty *,    class _Reference = _Ty&>    struct iterator    {   // base type for all iterator classes    typedef _Category iterator_category;    typedef _Ty value_type;    typedef _Diff difference_type;    typedef _Diff distance_type;    // retained    typedef _Pointer pointer;    typedef _Reference reference;    };template<class _Category,    class _Ty,    class _Diff,    class _Pointer,    class _Reference,    class _Base>    struct _Iterator012        : public _Base    {    typedef _Category iterator_category;    typedef _Ty value_type;    typedef _Diff difference_type;    typedef _Diff distance_type;    // retained    typedef _Pointer pointer;    typedef _Reference reference;    };

这里是迭代器的最基本的定义。

template<class _Myvec>    class _Vector_const_iterator        : public _Iterator012<random_access_iterator_tag,            typename _Myvec::value_type,            typename _Myvec::difference_type,            typename _Myvec::const_pointer,            typename _Myvec::const_reference,            _Iterator_base>    {   // iterator for nonmutable vectorpublic:.........

我们的vector迭代器如上继承，指定我们是一个随机存储迭代器，可以随机索取数据，定指定其他四项数据类型，供索引数据使用。

iterator_traits

我们继续看vector的定义，接着我们看到了构造函数，其中有这么一幕。

    ......    template<class _Iter>        vector(_Iter _First, _Iter _Last)        : _Mybase()        {   // construct from [_First, _Last)        _Construct(_First, _Last, _Iter_cat(_First));        }    template<class _Iter>        void _Construct(_Iter _Count, _Iter _Val, _Int_iterator_tag)        {   // initialize with _Count * _Val        size_type _Size = (size_type)_Count;        _Ty _Newval = (_Ty)_Val;        _Construct_n(_Size, _STD addressof(_Newval));        }    template<class _Iter>        void _Construct(_Iter _First,            _Iter _Last, input_iterator_tag)        {   // initialize with [_First, _Last), input iterators        _TRY_BEGIN        insert(begin(), _First, _Last);        _CATCH_ALL        _Tidy();        _RERAISE;        _CATCH_END        }    ......

vector 支持各种构造函数，值得一说的是上面的这种构造方式，使用_Iter_cat函数取出_First对应的迭代器类型，根据不同的迭代器类型，执行不同的构造算法。下面我们来看看是如何通过迭代器获取到迭代器类型的，这是一个类型识别的过程。

        // TEMPLATE FUNCTION _Iter_cattemplate<class _Iter> inline    typename iterator_traits<_Iter>::iterator_category        _Iter_cat(const _Iter&)    {   // return category from iterator argument    typename iterator_traits<_Iter>::iterator_category _Cat;    return (_Cat);    }

内部主要使用iterator_traits来做核心任务，这个是迭代器类型识别的萃取类。其如下定义

        // TEMPLATE CLASS iterator_traitstemplate<class _Iter>    struct iterator_traits    {   // get traits from iterator _Iter    typedef typename _Iter::iterator_category iterator_category;    typedef typename _Iter::value_type value_type;    typedef typename _Iter::difference_type difference_type;    typedef difference_type distance_type;  // retained    typedef typename _Iter::pointer pointer;    typedef typename _Iter::reference reference;    };template<class _Ty>    struct iterator_traits<_Ty *>    {   // get traits from pointer    typedef random_access_iterator_tag iterator_category;    typedef _Ty value_type;    typedef ptrdiff_t difference_type;    typedef ptrdiff_t distance_type;    // retained    typedef _Ty *pointer;    typedef _Ty& reference;    };template<> struct iterator_traits<_Bool>    {   // get traits from integer type    typedef _Int_iterator_tag iterator_category;    };template<> struct iterator_traits<char>    {   // get traits from integer type    typedef _Int_iterator_tag iterator_category;    };    .....

iterator_traits 提供了多种特化的版本，如上，支持原始指针和迭代器的类型。还支持bool， char，等等数据类型的类型识别。

vector容器自身对数据的操作

push_back

......    void push_back(const _Ty& _Val)        {   // insert element at end        if (_Inside(_STD addressof(_Val)))            {   // push back an element            size_type _Idx = _STD addressof(_Val) - this->_Myfirst;            if (this->_Mylast == this->_Myend)                _Reserve(1);            _Orphan_range(this->_Mylast, this->_Mylast);            _Cons_val(this->_Alval,                this->_Mylast,                this->_Myfirst[_Idx]);            ++this->_Mylast;            }        else            {   // push back a non-element            if (this->_Mylast == this->_Myend)                _Reserve(1);            _Orphan_range(this->_Mylast, this->_Mylast);            _Cons_val(this->_Alval,                this->_Mylast,                _Val);            ++this->_Mylast;            }        }

push_back 的逻辑较为复杂：
* 首先，判断要插入的值的地址是否位于vector所已有数据的地址范围内。
* 如果是，那么计算出该值的位置偏移，使用这个值来初始化数据。
* 如果这是一个新值，那么我要判断是否我还有可用空间。
* 如果没有，那么使用_Reserve来申请空间
* 如果有可用空间，那么使用_Cons_val来初始化数据
* 最后递增vector队尾偏移。
接着我们分别介绍其中几个核心的函数

_Reserve

那么，vector是如何预留空间的呢

.....    void _Reserve(size_type _Count)        {   // ensure room for _Count new elements, grow exponentially        size_type _Size = size();        if (max_size() - _Count < _Size)            _Xlen();        else if ((_Size += _Count) <= capacity())            ;        else            reserve(_Grow_to(_Size));        }        _SIZT max_size() const _THROW0()        {   // estimate maximum array size        _SIZT _Count = (_SIZT)(-1) / sizeof (_Ty);        return (0 < _Count ? _Count : 1);        }

此函数做参数合法判断，确定不能超过最大大小，并且如果已经有容量符合要求了，那么什么都不做，如果容量确实不够，那么增长容量。
那么具体容量是怎么增长的，有个什么规则呢

.....    size_type _Grow_to(size_type _Count) const        {   // grow by 50% or at least to _Count        size_type _Capacity = capacity();        _Capacity = max_size() - _Capacity / 2 < _Capacity            ? 0 : _Capacity + _Capacity / 2;    // try to grow by 50%        if (_Capacity < _Count)            _Capacity = _Count;        return (_Capacity);        }

从上面的我们可以看出，容量每次增长50%，这是vs2010 stl这个版本的实现，其他的实现可能不同。
我们知道了容量的增量了，那么具体它怎么实现内存的操作的呢。

......    void reserve(size_type _Count)        {   // determine new minimum length of allocated storage        if (max_size() < _Count)            _Xlen();    // result too long        else if (capacity() < _Count)            {   // not enough room, reallocate            pointer _Ptr = this->_Alval.allocate(_Count);            _TRY_BEGIN            _Umove(this->_Myfirst, this->_Mylast, _Ptr);            _CATCH_ALL            this->_Alval.deallocate(_Ptr, _Count);            _RERAISE;            _CATCH_END            size_type _Size = size();            if (this->_Myfirst != 0)                {   // destroy and deallocate old array                _Destroy(this->_Myfirst, this->_Mylast);                this->_Alval.deallocate(this->_Myfirst,                    this->_Myend - this->_Myfirst);                }            this->_Orphan_all();            this->_Myend = _Ptr + _Count;            this->_Mylast = _Ptr + _Size;            this->_Myfirst = _Ptr;            }        }

这里申请一个增长后容量大小的空间，然后将原始空间析构释放，之后计算新的头尾偏移值。

_Cons_val

回到push_back函数中，当申请空间等操作都完成后，开始在这块空间上构造数据。

template<class _Alloc,    class _Ty1,    class _Ty2>    void _Cons_val(_Alloc& _Alval, _Ty1 *_Pdest, _Ty2&& _Src)    {   // construct using allocator    _Alval.construct(_Pdest, _STD forward<_Ty2>(_Src));    }

实际上是调用vector的内存分配器去做实际的操作。这个在开始allocator中我们就接触到了如何构造析构和申请释放内存的操作。

0 0