二分查找（Binary Search）

来源：互联网发布：防排烟设计软件编辑：程序博客网时间：2024/04/27 01:12

1.递归实现

int binarySearchRecursive(int a[],int low,int high,int key){    if(low>high)        return -(low+1);    int mid=low+(high-low)/2;    if(key<a[mid])        return binarySearchRecursive(a,low,mid-1,key);    else if(key > a[mid])        return binarySearchRecursive(a,mid+1,high,key);    else        return mid;}int binarySearchRecursive(int a[],int n,int key){    return binarySearchRecursive(a,0,n-1,key);}

2.非递归实现

int binarySearch(int a[],int n,int key){    int low=0,high=n-1;    int mid;    while(low<=high){        mid=low+(high-low)/2;        if(key<a[mid])            high=mid-1;        else if(key>a[mid])            low=mid+1;        else            return mid;    }    return -(low+1);}

3.二分查找的一种版本，现实需求中可能需要查找“在不破坏有序的状态的原则下，可插入key的第一个位置”。

/*返回递增数组中第一个不小于key的数的索引，即在不破坏排序状态的原则下，可插入key的第一个位置。算法思想：循环不变式为a[low]<key&&a[high]>=key，所以当low+1==high && high<n时，high就应该是第一个大于等于key的数的索引;但是当high==n，这时就可以判断数组中没有大于等于key的值，则插入位置为high，返回-(high+1);*/int lowerBound(int a[],int n,int key){    int low=-1,high=n;//假设n>=0, a[-1]<key&&a[n]>=key(但是程序并不访问这两个假想的元素)    int mid;    while(low+1!=high){        mid=low+(high-low)/2;        if(a[mid]<key)            low=mid;//修正low,确保满足循环不变式中a[low]<key        else            high=mid;//修正high,确保满足循环不变式中a[high]>=key    }    int index=high;//第一个大于等于key的数的索引    /*判断第一个大于等于key的数a[index]是否存在数组中*/    if(index>=n||a[index]!=key)//不存在        index=-(high+1);//修正index为负的插入位置的后一个位置    return index;}

4.二分查找的一种版本，现实需求中可能需要查找“在不破坏有序的状态的原则下，可插入key的最后一个位置”。

/*upperBound试图在已排序数组中寻找可插入key的最后一个合适的位置。算法思想：循环不变式为a[low]<=key&&a[high]>key，所以当low+1==high && low>=0时，low就应该是第一个大于key的数的索引;但是当low<0，这时就可以判断数组中没有小于等于key的值，则插入位置为low+1，返回-(low+1+1)*/int upperBound(int a[],int n,int key){    int low=-1,high=n;//假设n>=0, a[-1]<=key&&a[n]>key(但是程序并不访问这两个假想的元素)    int mid;    while(low+1!=high){        mid=low+(high-low)/2;        if(a[mid]<=key)            low=mid;//修正low,确保满足循环不变式中a[low]<=key        else            high=mid;//修正high,确保满足循环不变式中a[high]>key    }    int index=low+1;//第一个大于key的数的索引    /*判断最后一个小于等于key的数a[low]是否存在数组中*/    if(low<=-1||a[low]!=key)//不存在        index=-(low+1+1);//修正index为负的插入位置的后一个位置    return index;}

幸运的是，STL在<algorithm>中实现了这些算法的泛型版本。对应的函数分别为：binary_search, lower_bound, upper_bound

其内部实现非常精妙，详见侯捷的《STL 源码剖析》。当然思想与上文实现大同小异，但是速度方面有待验证。

此外，C库函数也提供了void* bsearch(const void *key, const void *base, size_t n, size_t size, int (*com) (const void *first, const void *second) )。

下面给出以上函数的例子：

#include<cstdlib>#include<iostream>#include<algorithm>#include<ctime>using namespace std;//print a array with macro#define printArray(arr,n) for(int i=0;i<n;i++){\        cout<<a[i]<<' ';\    }\    cout<<endl<<endl;\//compare function greaterint greater(const void *first,const void *second){    int _first=*static_cast<const int*>(first);    int _second=*static_cast<const int*>(second);    if(_first>_second)        return 1;    else if(_first<_second)        return -1;    else        return 0;}int main(){    srand(time(0));    const int n=10;    int a[n];    for(int i=0;i<n;i++){        a[i]=rand()%20;    }    printArray(a,n);    sort(a,a+n);    printArray(a,n);    int b;    b=rand()%20;     //b=a[0];//test 1    // b=a[n-1];//2    printf("searching %d ...\n",b);    bool found=binary_search(a,a+n,b);    if(found){//found        cout<<"found"<<endl;    }    else{        cout<<"no found"<<endl;    }    cout<<"\nbsearch"<<endl;    void* p=bsearch(&b,a,n,sizeof(int),::greater);    if(p!=NULL){//found        cout<<"found "<<*static_cast<int*>(p)<<endl;    }    else{        cout<<"no found"<<endl;    }    cout<<"\nbinarySearchRecursive"<<endl;    int index=binarySearchRecursive(a,n,b);    if(index>=0){        cout<<"found! And index="<<index<<endl;    }    else{        cout<<"no found! But "<<b<<" should be insert at "<<-index-1<<endl;    }    cout<<"\nbinarySearch"<<endl;    index=binarySearch(a,n,b);    if(index>=0){        cout<<"found! index="<<index<<endl;    }    else{        cout<<"no found! But "<<b<<" should be insert at "<<-index-1<<endl;    }    int* bound=NULL;    bound=lower_bound(a,a+n,b);    cout<<"lower bound: ";    if(bound!=NULL){//exist        cout<<*bound;    }    else{        cout<<"no exist";    }    cout<<endl;    bound=upper_bound(a,a+n,b);    cout<<"upper bound: ";    if(bound!=NULL&&bound<a+n){//exist.         /*这里需要检查bound是否在数组中，因为最大值的upper_bound返回其下一个位置，即a+n。        而lower_bound一定指向数组外面。*/        cout<<*bound;    }    else{        cout<<"no exist";    }    cout<<endl;    cout<<"\nlowerBound"<<endl;    int lIndex=lowerBound(a,n,b);    if(lIndex>=0){        cout<<"found! The index of lower bound is "<<lIndex<<endl;    }    else{        cout<<"no found! The index of lower bound is "<<-lIndex-1<<endl;    }    cout<<"\nupperBound"<<endl;    int uIndex=upperBound(a,n,b);    if(uIndex>=0){        cout<<"found! The index of upper bound is "<<uIndex<<endl;    }    else{        cout<<"no found! The index of upper bound is "<<-uIndex-1<<endl;    }}

那么这些算法的执行时间又是咋样的呢？

我分别针对数组长度和选中概率（即，数组中数据被选中的概率）做了如下实验：

//生成超过RAND_MAX的大随机数long bigRand(){    return rand()*RAND_MAX+rand();}int main(){    srand(time(0));    //生成测试数组    const int n=10000000;    long* a=new long[n];    for(int i=0;i<n;i++){        a[i]=bigRand()%(n*4);//设定选中概率    }        //生成查找的数值    const int times=10000;//测试次数    long b[times];    for(int i=0;i<times;i++){        b[i]=bigRand()%(n*4);    }    clock_t start,end;    //start=clock();    sort(a,a+n);    //end=clock();    //printf("sort eclipse time: %.2f ms\n",double(end-start)*1000/CLOCKS_PER_SEC);        start=clock();    for(int i=0;i<times;i++){        binarySearchRecursive(a,n,b[i]);    }    end=clock();    printf("%-30s: %.2f ms\n","binarySearchRecursive",double(end-start)*1000/CLOCKS_PER_SEC);    start=clock();    for(int i=0;i<times;i++){        binarySearch(a,n,b[i]);    }    end=clock();    printf("%-30s: %.2f ms\n","binarySearch",double(end-start)*1000/CLOCKS_PER_SEC);    //vector<int> vec(a,a+n);    start=clock();    for(int i=0;i<times;i++){        //binary_search(vec.begin(),vec.end(),b[i]);        lower_bound(a,a+n,b[i]);    }    end=clock();    printf("%-30s: %.2f ms\n","binary_search",double(end-start)*1000/CLOCKS_PER_SEC);    start=clock();    for(int i=0;i<times;i++){        bsearch(&b[i],a,n,sizeof(int),::greater);    }    end=clock();    printf("%-30s: %.2f ms\n","bsearch",double(end-start)*1000/CLOCKS_PER_SEC);    delete []a;}

Debug模式

选中概率为0.5

选中概率0.5 执行1000次的总时间（ms）数组长度binarySearchRecursivebinarySearchlower_boundbsearch1000000013528121000000103229100000621861000062155

选中概率为0.25

选中概率0.25 执行1000次的总时间（ms）数组长度binarySearchRecursivebinarySearchlower_boundbsearch1000000015528131000000103218100000731871000052155

选中概率为0.75的执行时间相似，这里就省略了。

对比以上实验结果，可知Debug模式下binarySearch的执行效率最高，STL中以lower_bound为代表三算法效率最低。可能STL的算法没有被优化调用。

由于实验的时候忘记调成release模式，差点得出了相反的结果。

Release模式下binarySearchRecursive，binarySearch，lower_bound全为0，bsearch花费最多时间，可见STL三算法得到了优化调用，而因为正如《编程珠玑》中所述，C库函数的通用接口开销很大。

综上，没有特殊需求，二分搜索可以直接使用STL中lower_bound,upper_bound,binary_search函数。

0 0