第十六篇--算法导论排序篇总结-开发实用quicksort算法

来源：互联网发布：java方法的特征签名编辑：程序博客网时间：2024/06/10 02:33

行百里者半九十，此言得之。

当看完快速排序之后，我了解了它的思想，我也会实现它的代码，可是，这就是我所需要的quicksort算法吗？显然不是。

习题7-1-2问道：如果数组中元素都相同，该怎么办？

如果采用伪代码直接实现的程序，时间复杂度为n的平方；作者提供了思路，当A[p..r]中的元素相同时，返回的q为(p+r)/2，这样就能保持最好的性能。

习题7-4-5问道：难道一路quicksort递归到底，性能最好吗？能不能改进？

作者又提供了思路：当输入数组已经几乎有序时，插入排序的速度非常快；在实际应用中，我们对一个长度为k的子数组调用快速排序时，让它直接返回；最后对整个数组运行插入排序；

习题7-2问道：我们可不可以实现对于A[p..r]调用划分时，获得A[p..q-1]小于主元；A[q...t]等于主元；A[t+1..r]大于主元？

习题7-4说：其实消除尾递归是能提高程序性能的。。。

习题7-5问：怎么选择主元？如果随机选择实现的太慢，且效果不一定很好；而直接选最后一个元素又太容易出现最坏情况。

作者说：最常用的改进RANDOM-QUICKSORT的方法是：从子数组中更细致的选择主元而非随机选择。常用做法是三数取中法：

从子数组中随机选择三个元素，取其中位数作为主元；

习题7-6问：其实区间模糊排序也可以采用向quicksort一样的排序手法，如何写扩展性好的quicksort算法？

一个算法背后隐藏着这么多的细节或者说是技巧，虽然我们做题目的时候写的都是简单的，不考虑这么多问题的，但是实际应用中自然是要考虑这些细节的，所以本篇的目的是-------如何实现一个实用的排序算法？

程序说明：取材于STL。命名的规则是：只在函数中调用而非面向用户的接口的函数均为 _xxxx（除sort外都是。。）

//----------------------------------------------------------------------------------------------------//这里的插入排序跟《算法导论》的伪代码一致 //对迭代器[first,last)范围内的元素按递增排序template<typename RandomIterator>void _insertion_sort(RandomIterator first,RandomIterator last){typedef  typename iterator_traits<RandomIterator>::value_type  value_type;if(first==last) return;for(RandomIterator i=first+1;i!=last;++i){RandomIterator j=i-1;value_type  key=*i;while(j>=first && key<*j){*(j+1)=*j;--j;}*(j+1)=key;}}//对迭代器[first,last)范围内的元素按函数对象进行排序template<typename RandomIterator,typename Compare>void _insertion_sort(RandomIterator first,RandomIterator last,Compare comp){typedef  typename iterator_traits<RandomIterator>::value_type  value_type;if(first==last) return;for(RandomIterator i=first+1;i!=last;++i){RandomIterator j=i-1;value_type  key=*i;while(j>=first && comp(key,*j)){*(j+1)=*j;--j;}*(j+1)=key;}}//-------------------------------------------------------------------------------------------------

quicksort在平均情况下复杂度为O(nlgn)，不过我们这儿采用的改进版的时间复杂度最坏情况下为O(nlgn)

//策略一：三点中值做主元//三点取中值template<typename T>inline const T& _median(const T& a,const T& b,const T& c){if(a<b){if(b<c)//a<b<creturn b;else if(a<c) //a<b,a<c,b>=creturn c;else //a<b,b>=c,a>=creturn a;}else if(a<c)//a>b,a<creturn a;else if(b<c)//a>b,a>c,b<creturn c;else         //a>b,a>c,b>creturn b;}//------------------------------------------------------------------------

然后是quicksort的划分代码，这里的划分实现选择了《算法导论》书中的伪代码，而非习题7-1的实现方式（STL中采用的是习题7-1的实现方式）

//------------------------------------------------------------------------//两个辅助性工具函数，用于交换两迭代器所指元素的值template<typename RandomIterator>inline void iter_swap(RandomIterator& i,RandomIterator& j){  swap(*i,*j);}template<typename T>inline void  swap(T&  L,T& R){T tmp=R;R=L;L=tmp;}//--------------------------------------------------------------------------

//--------------------------------------------------------------------------//partition函数，划分成两段[first,i)小于等于pivot[i+1,last)大于等于pivottemplate<typename RandomIterator,typename T>RandomIterator _partition(RandomIterator first,RandomIterator last,T pivot){for(RandomIterator i=first;i!=last;++i){if(*i==pivot){RandomIterator tmp=last-1;iter_swap(i,tmp);break;}}RandomIterator i=first-1;RandomIterator r=last-1;for(RandomIterator j=first;j!=r;++j){if(*j<=pivot){++i;iter_swap(i,j);}}++i;iter_swap(i,r);return i;}//这里按照传入的函数对象进行划分，//如果传入的是大于号的话，那么左边子数组大于pivot，右边小于等于pivottemplate<typename RandomIterator,typename T,typename Compare>RandomIterator _partition(RandomIterator first,RandomIterator last,T pivot,Compare comp){for(RandomIterator i=first;i!=last;++i){if(*i==pivot){RandomIterator tmp=last-1;iter_swap(i,tmp);break;}}RandomIterator i=first-1;RandomIterator r=last-1;for(RandomIterator j=first;j!=r;++j){if(comp(*j,pivot)){++i;iter_swap(i,j);}}++i;iter_swap(i,r);return i;}

总的程序来了：

//-----------------------------------------------------------------------------------------//策略二：与插入排序结合使用，如习题7-4-5//策略三：消除尾递归//策略四：混合式排序：内省式排序；当分割行为有恶化为N^2次行为的倾向时，能够自我侦测，转而//使用堆排序，使效率维持在NlgNconst int threshold=16;//当子数组元素数小于此值时，调用快速排序则直接返回//默认情况下按升序排序template<typename RandomIterator>inline void sort(RandomIterator first,RandomIterator last){if(first!=last){        //控制递归调用的层数最多为__lg(last-first)*2层                _intro_sort(first,last,value_type(first),__lg(last-first)*2);_insertion_sort(first,last);}}//按照传入的函数对象进行排序template<typename RandomIterator,typename Compare>inline void sort(RandomIterator first,RandomIterator last,Compare comp){if(first!=last){_intro_sort(first,last,value_type(first),__lg(last-first)*2,comp);_insertion_sort(first,last,comp);}}//---------------------------------------------------------------------------------------

//---------------------------------------------------------------------------------------//__lg是为了控制分割恶化//找到2^k<=n的最大k值//在sort程序中我们传入__lg(last-first)*2，意为如果元素为40的话，那么k=5；层数最多为10层template<typename Size>inline Size __lg(Size n){Size k;for(k=0;n>1;n>>=1)++k;return k;}//--------------------------------------------------------------------------------------

//--------------------------------------------------------------------------------------//按升序排序template<typename RandomIterator,typename T,typename Size>void _intro_sort(RandomIterator first,RandomIterator last,T*,Size depth_limit){while(last-first>threshold){if(depth_limit==0){//分割恶化_heap_sort(first,last);return;}--depth_limit;//主元采用三点中值选择RandomIterator cut=_partition(first,last,T(_median(*first,                                                             *(first+(last-first)/2),*(last-1))));//消除尾递归_intro_sort(cut+1,last,value_type(first),depth_limit);last=cut;}}//按函数对象排序template<typename RandomIterator,typename T,typename Size,typename Compare>void _intro_sort(RandomIterator first,RandomIterator last,T*,Size depth_limit,Compare comp){while(last-first>threshold){if(depth_limit==0){//分割恶化_heap_sort(first,last,comp);return;}--depth_limit;//主元采用三点中值选择RandomIterator cut=_partition(first,last,T(_median(*first,                                                             *(first+(last-first)/2),*(last-1))),comp);//消除尾递归_intro_sort(cut+1,last,value_type(first),depth_limit,comp);last=cut;}}//--------------------------------------------------------------------------------------------

现在的主要程序大纲都基本实现了，但是堆排序又是一个大头，这里仍然是采用《算法导论》伪码：

//------------------------------------------------------------------------------------//辅助程序template<typename Size>inline Size _parent(Size i){return (i-1)>>1;}template<typename Size>inline Size _left(Size i){return (i<<1)+1;}template<typename Size>inline Size _right(Size i){return (i<<1)+2;}

以下函数都在《算法导论》第六章实现过，可以看看我以前写的代码；

//----------------------------------------------------------------------------------------//保持堆性质，默认最大堆，template<typename RandomIterator,typename Distance>void _heapify(RandomIterator first,RandomIterator i,Distance len){Distance i_index=i-first;Distance largest=i_index;Distance current_index=i_index;while(current_index<len){Distance i_left_child=_left(current_index);    Distance i_right_child=_right(current_index);if(i_left_child<len&&(*(first+i_left_child)>*(first+current_index))){largest=i_left_child;}if(i_right_child<len &&(*(first+i_right_child)>*(first+largest))){largest=i_right_child;}if(largest!=current_index){RandomIterator tmp1=first+current_index,tmp2=first+largest;iter_swap(tmp1,tmp2);current_index=largest;}else{break;}}}//保持堆性质，按函数对象，如：传入小于号，那么就是建立最小堆template<typename RandomIterator,typename Distance,typename Compare>void _heapify(RandomIterator first,RandomIterator i,Distance len,Compare comp){Distance i_index=i-first;Distance compest=i_index;Distance current_index=i_index;while(current_index<len){Distance i_left_child=_left(current_index);    Distance i_right_child=_right(current_index);if(i_left_child<len&&comp(*(first+i_left_child),*(first+current_index))){compest=i_left_child;}if(i_right_child<len &&comp(*(first+i_right_child),*(first+compest))){compest=i_right_child;}if(compest!=current_index){RandomIterator tmp1=first+current_index,tmp2=first+compest;iter_swap(tmp1,tmp2);current_index=compest;}else{break;}}}//------------------------------------------------------------------------------------------------

//------------------------------------------------------------------------------------//对[first,last)建堆，默认为最大堆template<typename RandomIterator>inline void build_heap(RandomIterator first,RandomIterator last){typedef iterator_traits<RandomIterator>::difference_type  difference_type;typedef iterator_traits<RandomIterator>::value_type     value_type;if(last-first<2) return;//如果长度为0或1，不必重新排列difference_type len=last-first;for(RandomIterator it=first+(len/2)-1;it!=first-1;--it){_heapify(first,it,len);}}//建堆，按函数对象来建template<typename RandomIterator,typename Compare>inline void build_heap(RandomIterator first,RandomIterator last,Compare comp){typedef iterator_traits<RandomIterator>::difference_type  difference_type;typedef iterator_traits<RandomIterator>::value_type     value_type;if(last-first<2) return;//如果长度为0或1，不必重新排列difference_type len=last-first;for(RandomIterator it=first+(len/2)-1;it!=first-1;--it){_heapify(first,it,len,comp);}}//----------------------------------------------------------------------------------------

//------------------------------------------------------------------------------------------------//堆排序，默认建立最大堆，按升序排序template<typename RandomIterator>void _heap_sort(RandomIterator first,RandomIterator last){typedef iterator_traits<RandomIterator>::difference_type difference_type;difference_type len=last-first;build_heap(first,last);for(difference_type i=len-1;i>=1;--i){RandomIterator tmp=(first+len-1);iter_swap(first,tmp);--len;_heapify(first,first,len);}}

下面是按函数对象版本堆排序的，但是这儿有一些细节，首先对于升序排序，我们传入小于号，但是我们需要建立最大堆，所以在建堆过程中我们却需要大于号，这里需要一个函数对象的取反功能，要对一个函数对象取反（配接器），我们建立配接器需要建立在大家都得遵守的约定上，即定义二元函数对象时，应该继承基本的binary_function：

//辅助程序template<typename Arg1,typename Arg2,typename Result>struct binary_function{typedef Arg1 first_argument_type;typedef Arg2 second_argument_type;typedef Result result_type;};template<typename Predicate>class binary_negate   :public  binary_function<typename Predicate::first_argument_type,                                                      typename Predicate::second_argument_type,bool>{protected:Predicate _pred;public:explicit binary_negate(const Predicate& x):_pred(x){ }bool operator()(const typename Predicate::first_argument_type& x,                    const typename Predicate::second_argument_type& y) const{return !_pred(x,y);}};

这样我们使用时，就可以binary_negate<Compare>(comp)对函数对象取反啦

//按函数对象进行堆排序template<typename RandomIterator,typename Compare>void _heap_sort(RandomIterator first,RandomIterator last,Compare comp){typedef iterator_traits<RandomIterator>::difference_type difference_type;difference_type len=last-first;build_heap(first,last,binary_negate<Compare>(comp));for(difference_type i=len-1;i>=1;--i){RandomIterator tmp=(first+len-1);iter_swap(first,tmp);--len;_heapify(first,first,len,binary_negate<Compare>(comp));}}

好，如此我们的程序就完成了，这就是使用的快速排序算法！

下面是一些测试：

需要包含的头文件：

#include<iostream>#include<cstdlib>

//升序排列int main(){int  A[10];for(int i=0;i<10;++i){A[i]=rand()%100;}    sort(A,A+10);   for(int* it=A;it!=A+10;++it){   std::cout<<*it<<" ";}char c=getchar();}

结果：

首先定义一个大于号的函数对象：

template<typename T>struct greater :public binary_function<T,T,bool>{bool operator()(const T& x,const T& y)const{return x>y;}};

然后降序排列：

//降序排列int main(){int  A[10];for(int i=0;i<10;++i){A[i]=rand()%69;}sort(A,A+10,greater<int>());   for(int* it=A;it!=A+10;++it){   std::cout<<*it<<" ";}char c=getchar();}

由于我写的代码不是很好，所以对于一个可以排序的类至少要定义四种关系操作符：

struct student{int id;char name;student(int a=0,char b=0):id(a),name(b){ }bool operator<(const student& r)const{return id<r.id;}bool operator<=(const student& r)const {return id<=r.id;}bool operator==(const student& r)const {return id==r.id;}bool operator>(const student& r)const {return id>r.id;}};

这里只是为了测试，所以student类地设计非常粗糙，只是为了证明正确性，测试程序也写得很糟糕。。。。请谅解

：

int main(){student A[10];for(int i=0;i<10;++i){int x=rand()%120;A[i].id=x;A[i].name=x+'a';}sort(A,A+10); for(student* it=A;it!=A+10;++it){   std::cout<<(*it).id<<" ";} char c=getchar();}

最后一个，就是我在第六章的一个问题，对于非常大的卫星数据，对于优先队列来说，我们更多的是使用句柄（指针）而非直接用数据，所以对于指向某一类型数据的句柄或指针，我们也要让他能够在优先队列中工作，借此契机，我用heap_sort来说明，我对于这个问题的解法。。我觉得应该把这个问题转移到句柄的设计而非是优先队列的设计。

如果student是非常大的数据类型，那么我在优先队列中处理的是指向student的句柄，如何让这个句柄工作，这个问题的解答和如何让student句柄在堆排序算法中工作是一样的。

我定义了指向student的句柄---迭代器，再次声明，迭代器设计的非常糟糕，只是为了测试正确性。。。。

struct student_iter{student* p;student_iter():p(0){}student_iter(student* x):p(x){ }student_iter(const student_iter& x):p(x.p){ }student_iter& operator=(student_iter& x){p=(x.p);return *this;}bool operator<(const student_iter& r)const{return p->id<(r.p)->id;}bool operator<=(const student_iter& r)const {return p->id<=(r.p)->id;}bool operator==(const student_iter& r)const {return p->id==(r.p)->id;}bool operator>(const student_iter& r)const {return p->id>(r.p)->id;}int& operator*() const {return (*p).id;}};

定义好这些，就可以工作了：

int main(){student A[10];for(int i=0;i<10;++i){int x=rand()%124;A[i].id=x;A[i].name=x+'a';}student_iter B[10];for(int i=0;i<10;++i){B[i]=student_iter(&A[i]);} for(student* it=A;it!=A+10;++it){   std::cout<<(*it).id<<" ";} std::cout<<'\n';    _heap_sort(B,B+10);   for(student_iter* it=B;it!=B+10;++it){   std::cout<<*(*it)<<" ";}char c=getchar();}

对于这个程序的扩展，例如如果排序有大量重复元素的序列，这样我们就可以按《算法导论》7-2设计算法了：

//当后面多一个bool型参数时，可以选择分三段式template<typename RandomIterator,typename T>iter_pair<RandomIterator> _partition(RandomIterator first,RandomIterator last,T pivot,bool){typedef iterator_traits<RandomIterator>::difference_type size_type;for(RandomIterator i=first;i!=last;++i){if(*i==pivot){RandomIterator tmp=last-1;iter_swap(i,tmp);break;}}RandomIterator i=first-1;RandomIterator r=last-1;size_type count=1;//等于pivot元素的个数,初始时为1  RandomIterator tmp=r;//[tmp,last)中的元素的值等于pivotfor(RandomIterator j=first;j!=tmp;++j){if(*j<pivot){++i;iter_swap(i,j);}else if(*j==pivot){//如果两者相等，交换(r-count)和j，此时j的值未知，需要重新判断//是故--j，主元数目增加一    tmp=r-count;iter_swap(j,tmp);++count;--j;}}size_type count_=count;//此时[i+1,i+count+1)中的元素的值大于pivot，[i+count+1,last)中元素的值等于pivotif(ptrdiff_t(r-(i-count))<count){//此时等于pivot的元素多于大于pivot的元素count_=ptrdiff_t(r-(i-count));}//交换for(int j=0;j<count_;++j){ RandomIterator L=i+1+j,R=r-j;iter_swap(L,R);}//此时[first,i]小于pivot，[i+1,i+count]等于pivot，[i+count+1,r]大于pivotreturn iter_pair<RandomIterator>(i+1,i+count);}

然后再设计多一个bool类型参数的对应的sort函数进行宽展，这里就不写了。。。