【数据结构练习】求区间第K大数的几种方法

来源：互联网发布：淘宝美工工资待遇编辑：程序博客网时间：2024/05/16 11:17

这类求数列上区间第K大数的题目非常非常多(当然题目要求通常是求区间第K小)。

比如HDOJ 2665，POJ 2104，SOJ 3147，SOJ 3010，SOJ 3102(只计算一次)，POJ 2761(区间不包含)。

求解这个问题的方法也非常多，在这里对几种我认为比较常见的方法做一下总结，今后也会不断补充。

当然，几乎所有的高级数据结构都可以用来求区间第K大数，我也认为这是初学一个数据结构时的一个很好的练习。

高级数据结构是我的软肋之一，如果代码写的有什么不优越的地方，欢迎神犇指教 > < 。

1、快速划分

最简单的方法，当然就是用类似快排的方法做快速划分，

每次随机选取一个数作为“主元”，以“主元”为分界线把当前区间的数划分成两部分，并找出“主元”的精确位置，然后不断递归。

关于这个算法的讲解可以看MIT的《算法导论》公开课。

无奈我太蒟蒻，怎么都是TLE，大家姑且一看：

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<ctime>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXN=110000;int a[MAXN],b[MAXN];char s[2000000];int quicksort(int p, int q){    int k=p+((int)rand()%(q-p+1));    swap(b[k],b[p]);    int i=p;    for(int j=p+1; j<=q; j++)    {        if(b[j]<b[p])        {            i++;            swap(b[i],b[j]);        }    }    swap(b[i],b[p]);    return i-p+1;}int solve(int p, int q, int k){    int tmp;    while((tmp=quicksort(p,q))!=k)    {        if(tmp<k)        {            p+=tmp;            k-=tmp;        }        else        {            q=p+tmp-1;        }    }    return b[p+tmp-1];}int main(){    int n,m;    while(scanf("%d%d",&n,&m)==2)    {        for(int i=0; i<n; i++)        {            scanf("%d",&a[i]);        }        while(m--)        {            int l,r,k;            scanf("%d%d%d",&l,&r,&k);            memcpy(b,a,n*sizeof(int));            printf("%d\n",solve(l-1,r-1,k));        }    }    return 0;}

2、二叉堆

用一个容量为K的二叉堆，把当前区间的元素全部入堆，但是每次只保留这个堆中前K小的数，最后这个堆中的堆顶元素即为所求

这个算法肯定更加是TLE的，我相信即使手写堆也多半时过不了的，给大家写一段代码示意一下：

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<ctime>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXN=110000;int a[MAXN];priority_queue<int> q;int main(){    int cas;    scanf("%d",&cas);    while(cas--)    {        int n,m;        scanf("%d%d",&n,&m);        for(int i=0; i<n; i++)        {            scanf("%d",&a[i]);        }        while(m--)        {            int l,r,k;            scanf("%d%d%d",&l,&r,&k);            l--;            r--;            while(!q.empty())            {                q.pop();            }            int cot=0;            for(int i=l; i<=r; i++)            {                q.push(a[i]);                cot++;                if(cot>k)                {                    q.pop();                    cot--;                }            }            printf("%d\n",q.top());        }    }    return 0;}

3、树状数组

小伙伴们喜闻乐见的树状数组也可以用来求区间第K大数哦！

树状数组中的+-lowbit，实际上就是在“树”上实现了跳到父节点/子节点的操作。

由此可以二分地求出第K大数。

参考了clj的代码和一篇解题报告，这个是只能在固定区间求的。

当然想要在任意子区间求显然也不难，只要每次重新建树就可以了。

和上面一样，这个代码也是无论如何都过不了的 > < 。

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXP = 22;const int MAXN = 2100000;vector<int> hash;int a[MAXN], tr[MAXN];int m;void add(int x){    while(x <= m)    {        tr[x]++;        x+=x&(-x);    }}int solve(int k){    int cnt = 0, ans = 0;    for (int i = MAXP-1; i >= 0; i--)    {        ans += 1 << i;        if (ans >= m || cnt + tr[ans] >= k)        {            ans -= 1 << i;        }        else        {            cnt += tr[ans];        }    }    return ans + 1;}int main(){    int n,k;    while(scanf("%d%d", &n, &k)==2)    {        hash.clear();        memset(tr, 0, sizeof(tr));        for(int i = 1; i <= n; i ++)        {            scanf("%d", &a[i]);            hash.push_back(a[i]);        }        sort(hash.begin(), hash.end());        hash.erase(unique(hash.begin(), hash.end()), hash.end());        m = hash.size();        for(int i = 1; i <= n; i ++)        {            a[i] = lower_bound(hash.begin(), hash.end(), a[i]) - hash.begin();            add(a[i]+1);        }        printf("%d\n", hash[solve(k)-1]);    }    return 0;}

-------------------------------------------------------------------------下面介绍几种不TLE的----------------------------------------------------------------------------

4、划分树

划分树是一种线段树。

可以用来求区间第K大。

学划分树墙裂建议做一下HDOJ 4417，可以通过对代码稍加改动，由求区间第K大转化为求区间内小于某个值的数有多少个(不需二分哟)。

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXN=110000;int tr[MAXN<<2];int sorted[MAXN],toleft[20][MAXN],val[20][MAXN];void build(int l, int r, int dep, int rt){    if(l==r)    {        return;    }    int mid=(l+r)>>1;    int lnum=mid-l+1;    for(int i=l; i<=r; i++)    {        if(val[dep][i]<sorted[mid])        {            lnum--;        }    }    int lp=l,rp=mid+1;    int cur_lnum=0;    for(int i=l; i<=r; i++)    {        if(i==l)        {            toleft[dep][i]=0;        }        else        {            toleft[dep][i]=toleft[dep][i-1];        }        if(val[dep][i]<sorted[mid])        {            toleft[dep][i]++;            val[dep+1][lp++]=val[dep][i];        }        else if(val[dep][i]>sorted[mid])        {            val[dep+1][rp++]=val[dep][i];        }        else        {            if(cur_lnum<lnum)            {                cur_lnum++;                toleft[dep][i]++;                val[dep+1][lp++]=val[dep][i];            }            else            {                val[dep+1][rp++]=val[dep][i];            }        }    }    build(l,mid,dep+1,rt<<1);    build(mid+1,r,dep+1,rt<<1|1);}int query(int l, int r, int L, int R, int k, int dep, int rt){    if(l==r)    {        return val[dep][l];    }    int lnum,cur_lnum,rnum,cur_rnum;    int mid=(l+r)>>1;    if(l==L)    {        lnum=toleft[dep][R];        cur_lnum=0;    }    else    {        lnum=toleft[dep][R]-toleft[dep][L-1];        cur_lnum=toleft[dep][L-1];    }    if(lnum>=k)    {        int newL=l+cur_lnum;        int newR=l+lnum+cur_lnum-1;        return query(l,mid,newL,newR,k,dep+1,rt<<1);    }    else    {        int rnum=R-L+1-lnum;        int cur_rnum=L-l-cur_lnum;        int newL=mid+cur_rnum+1;        int newR=mid+cur_rnum+rnum;        return query(mid+1,r,newL,newR,k-lnum,dep+1,rt<<1|1);    }}int main(){    int cas;    scanf("%d",&cas);    while(cas--)    {        int n,m;        scanf("%d%d",&n,&m);        for(int i=0; i<n; i++)        {            scanf("%d",&val[0][i]);            sorted[i]=val[0][i];        }        sort(sorted,sorted+n);        build(0,n-1,0,1);        while(m--)        {            int l,r,k;            scanf("%d%d%d",&l,&r,&k);            l--;            r--;            printf("%d\n",query(0,n-1,l,r,k,0,1));        }    }    return 0;}

5、归并树

归并树跟划分树是一对好基友。

不过我短时间内不打算学。

先开个坑，丢个别人的解题报告：

6、主席树(可持久化线段树/函数式线段树)

主席树的核心要义，就是每次更新都只新建被更新了的节点(O(lgn)个)，而保存历史版本

感谢sd6001和ftiasch的模板

数组版：

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXN = 100005;const int MAXM = 2000005;vector<int> hash;int a[MAXN];int tot;struct NODE{    int count;    int left, right;};int root[MAXN];NODE node[MAXM];int null;int newnode(int count, int left, int right){    node[tot].count=count;    node[tot].left=left;    node[tot].right=right;    return tot++;}int insert(int rt, int l, int r, int k){    if (l <= k && k <= r)    {        if (l == r)        {            return newnode(node[rt].count + 1, 0, 0);        }        int m = (l + r) >> 1;        return newnode(node[rt].count + 1,                       insert(node[rt].left, l, m, k),                       insert(node[rt].right, m + 1, r, k));    }    return rt;}int query(int p, int q, int l, int r, int k){    if (l == r)    {        return hash[l];    }    int m = (l + r) >> 1;    int cot = node[node[q].left].count - node[node[p].left].count;    if (cot >= k)    {        return query(node[p].left, node[q].left, l, m, k);    }    return query(node[p].right, node[q].right, m + 1, r, k - cot);}int main(){    int cas;    scanf("%d", &cas);    while(cas --)    {        int n, q;        scanf("%d%d", &n, &q);        hash.clear();        tot = 0;        for(int i = 1; i <= n; i ++)        {            scanf("%d", &a[i]);            hash.push_back(a[i]);        }        sort(hash.begin(), hash.end());        hash.erase(unique(hash.begin(), hash.end()), hash.end());        int m = hash.size();        null = newnode(0, 0, 0);        root[0] = newnode(0, 0, 0);        for(int i = 1; i <= n; i ++)        {            a[i] = lower_bound(hash.begin(), hash.end(), a[i]) - hash.begin();            root[i] = insert(root[i - 1], 0, m - 1, a[i]);        }        while(q--)        {            int l, r, k;            scanf("%d%d%d", &l, &r, &k);            printf("%d\n", query(root[l - 1], root[r], 0, m - 1, k));        }    }    return 0;}

动态分配内存版：

注意：由于这个模板只有new没有delete，所以可想而知所有case的申请内存都叠加在了一起，分分钟MLE无鸭梨。

目前这个模板只能过SOJ 3147。不过在单case评测的OJ，这样写还是很优雅的。

#include<cstdio>#include<iostream>#include<sstream>#include<cstdlib>#include<cstring>#include<string>#include<climits>#include<cmath>#include<algorithm>#include<queue>#include<vector>#include<stack>#include<set>#include<map>#define INF 0x3f3f3f3f#define eps 1e-8using namespace std;const int MAXN = 100005;vector<int> hash;int a[MAXN];struct node{    int count;    node *left, *right;    node(int count, node* left, node* right):        count(count), left(left), right(right) {}    node* insert(int k);    node* insert(int l, int r, int k);};node* root[MAXN];node* null = new node(0, NULL, NULL);node* node::insert(int l, int r, int k){    if (l <= k && k <= r)    {        if (l==r)        {            return new node(this->count + 1, null, null);        }        int m = (l + r) >> 1;        return new node(this->count + 1,                        this->left->insert(l, m, k),                        this->right->insert(m + 1, r, k));    }    return this;}int query(node *p, node *q, int l, int r, int k){    if (l == r)    {        return hash[l];    }    int m = (l + r) >> 1;    int cot = q->left->count - p->left->count;    if (cot >= k)    {        return query(p->left, q->left, l, m, k);    }    return query(p->right, q->right, m+1, r, k - cot);}int main(){    int cas;    null->left = null->right = null;    scanf("%d", &cas);    while(cas --)    {        int n,q;        scanf("%d%d", &n, &q);        hash.clear();        for(int i = 1; i <= n; i ++)        {            scanf("%d", &a[i]);            hash.push_back(a[i]);        }        sort(hash.begin(), hash.end());        hash.erase(unique(hash.begin(), hash.end()), hash.end());        int m = hash.size();        root[0] = null;        for(int i = 1; i <= n; i ++)        {            a[i] = lower_bound(hash.begin(), hash.end(), a[i]) - hash.begin();            root[i] = root[i-1]->insert(0,m - 1,a[i]);        }        while(q --)        {            int l, r, k;            scanf("%d%d%d", &l, &r, &k);            printf("%d\n", query(root[l - 1], root[r], 0, m - 1, k));        }    }    return 0;}

7、Treap

Treap做区间第K大数最好做POJ 2761，因为这道题有个条件是区间不包含，否则复杂度会上升很多。

这道题实际上就是维护一个FIFO队列，使得处理某个询问时，treap中只包含正在询问的这个区间。

感谢 ftiasch 的模板。

#include <cstdio>#include <iostream>#include <algorithm>#include <sstream>#include <cstdlib>#include <cstring>#include <string>#include <climits>#include <cmath>#include <queue>#include <vector>#include <stack>#include <set>#include <map>#define INF 0x3f3f3f3f#define eps 1e-8#define fi first#define nd secondusing namespace std;typedef pair<int, int> PII;const int MAXN = 110000;const int MAXM = 51000;int treap_size;int key[MAXN], weight[MAXN], cot[MAXN], size[MAXN], cld[MAXN][2];pair<PII, PII> query[MAXM];int ans[MAXM];int a[MAXN];bool cmp(const pair<PII, PII> &a, const pair<PII, PII> &b){    return a.fi < b.fi;}void update(int &x){    size[x] = size[cld[x][0]] + size[cld[x][1]] + cot[x];}void rotate(int &x, int flg){    int y = cld[x][flg];    cld[x][flg] = cld[y][flg ^ 1];    cld[y][flg ^ 1] = x;    update(x);    update(y);    x = y;}void insert(int &x, int k){    if(x)    {        if(key[x] == k)        {            cot[x] ++;        }        else        {            int flg = key[x] < k;            insert(cld[x][flg], k);            if (weight[cld[x][flg]] < weight[x])            {                rotate(x, flg);            }        }    }    else    {        x = treap_size ++;        key[x] = k;        weight[x] = rand();        cot[x] = 1;        cld[x][0] = cld[x][1] = 0;    }    update(x);}void erase (int &x, int k){    if(key[x] == k)    {        if(cot[x] > 1)        {            cot[x] --;        }        else        {            if(!cld[x][0] && !cld[x][1])            {                x = 0;                return;            }            rotate(x, ! (weight[cld[x][0]] < weight[cld[x][1]]));            erase(x, k);        }    }    else    {        erase(cld[x][key[x] < k], k);    }    update(x);}int calc(int &x, int k){    if(size[cld[x][0]] >= k)    {        return calc(cld[x][0], k);    }    if(k <= size[cld[x][0]] + cot[x])    {        return key[x];    }    return calc(cld[x][1], k - (size[cld[x][0]] + cot[x]));}void treap_init(){    treap_size = 1;    size[0] = 0;    weight[0] = INT_MAX;}int main(){    int n, m;    while(scanf("%d %d", &n, &m) == 2)    {        treap_init();        for(int i = 0; i < n; i ++)        {            scanf("%d", &a[i]);        }        for(int i = 0; i < m; i ++)        {            scanf("%d %d %d", &query[i].fi.fi, &query[i].fi.nd, &query[i].nd.fi);            query[i].fi.fi --;            query[i].fi.nd --;            query[i].nd.nd = i;        }        sort(query, query + m, cmp);        int front = 0, rear = 0;        int root = 0;        for(int i = 0; i < m; i ++)        {            while(rear <= query[i].fi.nd)            {                insert(root, a[rear ++]);            }            while(front < query[i].fi.fi)            {                erase(root, a[front ++]);            }            ans[query[i].nd.nd] = calc(root, query[i].nd.fi);        }        for(int i = 0; i < m; i ++)        {            printf("%d\n", ans[i]);        }    }    return 0;}

8、红黑树

这个必须可以搞。。

不过正常人不会想要手写红黑树吧。。

所以也无限期搁置。。

9、SPLAY

待学。

稍安勿躁 >_<

【数据结构练习】 求区间第K大数的几种方法

【数据结构练习】求区间第K大数的几种方法