莫队算法——解决序列上询问的利器

来源：互联网发布：大数据相关论文结语编辑：程序博客网时间：2024/05/19 02:03

问题：
有一个长为N序列，有M个询问：在区间[L,R]内，出现了多少个不同的数字。（序列中所有数字均小于K）。题目会给出K。

莫队算法就是滋磁解决这类问题的离线算法。（其实很简单）

首先来看看暴力：
由于暴力还是比较水的，所以直接上：

#include <bits/stdc++.h>using namespace std ;const int maxn = 50010 ;int n, m, a[maxn] ;bool vis[maxn] ;int main() {    int i, j, k, query_time, L, R ;    cin >> n >> query_time >> k ;    for ( i = 1 ; i <= n ; i ++ )         cin >> a[i] ;    while ( query_time -- ) {        cin >> L >> R ;        memset ( vis, 0, sizeof(vis) ) ;        for ( i = L ; i <= R ; i ++ )             vis[a[i]] = true ;        int ans = 0 ;        for ( i = 0 ; i <= k ; i ++ )             ans += vis[i] ;        cout << ans << endl ;    }    return 0 ;}

这个复杂度显然是 O(N2) 的。有些题目的范围可能到100000或者更大，那么显然就不能了。

这里还有一种稍作改进的方法，比上述做法大多数情况要快些。

void add ( int pos ) {    ++cnt[a[pos]] ;    if ( cnt[a[pos]] == 1 )         ++ answer ;}void remove ( int pos ) {    -- cnt[a[pos]] ;    if ( cnt[a[pos]] == 0 )         -- answer ;}void solve() {    int curL = 1, curR = 0 ; // current L R     for ( each query [L,R] ) {        while ( curL < L )             remove ( curL++ ) ;        while ( curL > L )             add ( --curL ) ;        while ( curR < R )             add ( ++curR ) ;        while ( curR > R )             remove ( curR-- ) ;        cout << answer << endl ;        // Warning : please notice the order "--","++" and "cur" ;    }}

其实还算好理解的，如果不明白，随便搞组数据手玩一下就明白了

手玩数据：6 4 31 3 2 1 1 31 42 63 55 6输出答案：322

不幸的是，虽然这个东西比我们的暴力要快，但是它的时间复杂度仍然是 O(N2) 的。
但好消息是，这个东西可以算是莫队算法的核心了。（你在逗我笑？？？？）
其实是真的 :）

莫队算法是怎么做的呢？
考虑到上述改进的办法的效率低下主要是因为curL和curR两个指针前前后后跑来跑去太多次。于是，我们的做法就是不要让他们跑太多没用的距离。
我们可以通过离线下所有的询问，然后通过某种排序，让两个指针跑动的距离尽量变少。具体的做法是把N划分成N−−√段，每段长度都是N−−√，然后在把所有询问按照L端点排序，看各个询问被划分到哪一块里。接着，对于各个划分出的段，在各自的段里，将它包含的所有区间再按照R端点排序。
举个例子：假设我们有3个长度为3的段（0-2,3-5,6-8）：
{0, 3} {1, 7} {2, 8} {7, 8} {4, 8} {4, 4} {1, 2}
先根据所在段落的编号重排
{0, 3} {1, 7} {2, 8} {1, 2} | {4, 8} {4, 4} | {7, 8}
现在按R的值重排
{1, 2} {0, 3} {1, 7} {2, 8} | {4, 4} {4, 8} | {7, 8}

然后？还要然后吗？就用刚刚那个算法来玩就好了。毕竟我们只是交换了一下询问的顺序而已，并没有对算法做什么改动。

对于这个的复杂度嘛，其实是比较鬼畜的，就这么改了下顺序，然后就变成了O(N∗N−−√)的

上面代码所有查询的复杂性是由4个while循环决定的。前2个while循环是curL的移动总量”，后2个while循环是curR的移动总量”。这两者的和将是总复杂度。
有趣的是。先考虑右指针：对于每个块，查询是递增的顺序排序，所以右指针curR按照递增的顺序移动。在下一个块的开始时，指针是尽量靠右的，将移动到下一个块中的最小的R处。这意味着对于一个给定的块，右指针移动的量是 O(N)。我们有O(N−−√)个块。所以总共是O(N∗N−−√)。
关于左指针：所有查询的左指针都在同一段中，当我们完成一个查询到下一个查询时，左指针会移动，但由于两次询问的L在同一块中，此移动是
O(N−−√)的。所以，左指针的移动总量是O(M∗N−−√)。
所以，总复杂度就是O((N+M)∗N−−√)，就当做是O(N∗N−−√)吧。
是不是很简单的呐2333

接下来给几个例题：

例题1：BZOJ3781 小B的询问

Description

小B有一个序列，包含N个1~K之间的整数。他一共有M个询问，每个询问给定一个区间[L..R]，求Sigma(c(i)^2)的值,其中i的值从1到K，其中c(i)表示数字i在[L..R]中的重复次数。小B请你帮助他回答询问。

Input

第一行，三个整数N、M、K。
第二行，N个整数，表示小B的序列。
接下来的M行，每行两个整数L、R。

Output

M行，每行一个整数，其中第i行的整数表示第i个询问的答案。

Sample Input

6 4 3
1 3 2 1 1 3
1 4
2 6
3 5
5 6

Sample Output

6
9
5
2

HINT

对于全部的数据，1<=N、M、K<=50000

很裸的吧~

/**************************************************************    Source Code : GoAway    Date : 2017-02-06****************************************************************/#include <iostream>#include <cstdio>#include <cstring>#include <cstdlib>#include <vector>#include <map>#include <stack>#include <queue>#include <set>#include <cmath>#include <algorithm>#include <ctime>using namespace std ;const int zhf = 1<<30 ;const int maxn = 50010, tim = 250 ;bool Read ( int &x ) {    bool f = 0 ; x = 0 ; char c = getchar() ;     while ( !isdigit(c) ) {        if ( c == '-' ) f = 1 ;        if ( c == EOF ) return false ;        c = getchar() ;    }    while ( isdigit(c) ) {        x = 10 * x + c - '0' ;        c = getchar() ;    }    if ( f ) x = -x ;    return true ;}struct query {    int L, R, id ;    friend bool operator < ( query a, query b ) {        return (a.L/tim) == (b.L/tim) ? a.R < b.R : a.L < b.L ;    }} e[maxn] ;int n, m, a[maxn], cnt[maxn], ans[maxn], answer ;void add ( int pos ) {    answer += (cnt[a[pos]]++)<<1|1 ;}void remove ( int pos ) {    answer -= (--cnt[a[pos]])<<1|1 ;}int main() {    int i, j, k, curL = 1, curR = 0 ;    Read(n) ; Read(m) ; Read(j) ;    for ( i = 1 ; i <= n ; i ++ )         Read(a[i]) ;    for ( i = 1 ; i <= m ; i ++ ) {        Read(e[i].L) ;        Read(e[i].R) ;        e[i].id = i ;    }    sort ( e+1, e+m+1 ) ;    for ( i = 1 ; i <= m ; i ++ ) {        int L = e[i].L, R = e[i].R ;        while ( curL < L )             remove ( curL++ ) ;        while ( curL > L )             add ( --curL ) ;        while ( curR < R )             add ( ++curR ) ;        while ( curR > R )             remove ( curR-- ) ;        ans[e[i].id] = answer ;    }    for ( i = 1 ; i <= m ; i ++ )         printf ( "%d\n", ans[i] ) ;    return 0 ;}

例题2：SDOI2009 HH的项链洛谷1972

Description

HH有一串由各种漂亮的贝壳组成的项链。HH相信不同的贝壳会带来好运，所以每次散步完后，他都会随意取出一段贝壳，思考它们所表达的含义。HH不断地收集新的贝壳，因此，他的项链变得越来越长。有一天，他突然提出了一个问题：某一段贝壳中，包含了多少种不同的贝壳？这个问题很难回答。。。因为项链实在是太长了。于是，他只好求助睿智的你，来解决这个问题。

Input

第一行：一个整数N，表示项链的长度。第二行：N个整数，表示依次表示项链中贝壳的编号（编号为0到1000000之间的整数）。第三行：一个整数M，表示HH询问的个数。接下来M行：每行两个整数，L和R（1 ≤ L ≤ R ≤ N），表示询问的区间。

Output

M行，每行一个整数，依次表示询问对应的答案。

Sample Input

6
1 2 3 4 3 5
3
1 2
3 5
2 6

Sample Output

2
2
4

HINT

对于20%的数据，N ≤ 100，M ≤ 1000；
对于40%的数据，N ≤ 3000，M ≤ 200000；
对于100%的数据，N ≤ 50000，M ≤ 200000。

还是很裸的2333

/**************************************************************    Source Code : GoAway    Date : 2017-02-06****************************************************************/#include <iostream>#include <cstdio>#include <cstring>#include <cstdlib>#include <vector>#include <map>#include <stack>#include <queue>#include <set>#include <cmath>#include <algorithm>#include <ctime>using namespace std ;const int zhf = 1<<30 ;const int maxn = 1000010 ;bool Read ( int &x ) {    bool f = 0 ; x = 0 ; char c = getchar() ;    while ( !isdigit(c) ) {        if ( c == '-' ) f = 1 ;        if ( c == EOF ) return false ;        c = getchar() ;    }    while ( isdigit(c) ) {        x = 10 * x + c - '0' ;        c = getchar() ;    }    if ( f ) x = -x ;    return true ;}int n, m, cnt[maxn], a[maxn], answer, ans[maxn], tim ;struct query {    int l, r, id ;    friend bool operator < ( query a, query b ) {        return (a.l/tim) == (b.l/tim) ? a.r < b.r : a.l<b.l ;    }} e[maxn] ;void add ( int pos ) {    if ( (++cnt[a[pos]]) == 1 ) ++ answer ;}void remove ( int pos ) {    if ( (--cnt[a[pos]]) == 0 ) -- answer ;}int main() {    int i, j, k, curL = 1, curR = 0 ;    Read(n) ;    for ( i = 1 ; i <= n ; i ++ )         Read(a[i]) ;    Read(m) ; tim = sqrt(m) ;    for ( i = 1 ; i <= m ; i ++ ) {        Read(e[i].l) ;        Read(e[i].r) ;        e[i].id = i ;    }    sort(e+1,e+m+1) ;    for ( i = 1 ; i <= m ; i ++ ) {        int L = e[i].l, R = e[i].r ;        while ( curL < L )             remove(curL++) ;        while ( curL > L )             add(--curL) ;        while ( curR < R )             add(++curR) ;        while ( curR > R )             remove(curR--) ;        ans[e[i].id] = answer ;    }    for ( i = 1 ; i <= m ; i ++ )         printf ( "%d\n", ans[i] ) ;    return 0 ;}

例题3：2009国家集训队小Z的袜子清橙OJ1206

问题描述
　　作为一个生活散漫的人，小Z每天早上都要耗费很久从一堆五颜六色的袜子中找出一双来穿。终于有一天，小Z再也无法忍受这恼人的找袜子过程，于是他决定听天由命……
　　具体来说，小Z把这N只袜子从1到N编号，然后从编号L到R(L 尽管小Z并不在意两只袜子是不是完整的一双，甚至不在意两只袜子是否一左一右，他却很在意袜子的颜色，毕竟穿两只不同色的袜子会很尴尬。
　　你的任务便是告诉小Z，他有多大的概率抽到两只颜色相同的袜子。当然，小Z希望这个概率尽量高，所以他可能会询问多个(L,R)以方便自己选择。
输入格式
　　输入文件第一行包含两个正整数N和M。N为袜子的数量，M为小Z所提的询问的数量。
　　接下来一行包含N个正整数Ci，其中Ci表示第i只袜子的颜色，相同的颜色用相同的数字表示。
　　再接下来M行，每行两个正整数L，R表示一个询问。
输出格式
　　输出文件包含M行，对于每个询问在一行中输出分数A/B表示从该询问的区间[L,R]中随机抽出两只袜子颜色相同的概率。若该概率为0则输出0/1，否则输出的A/B必须为最简分数。（详见样例）
样例输入
6 4
1 2 3 3 3 2
2 6
1 3
3 5
1 6
样例输出
2/5
0/1
1/1
4/15
样例说明
　　询问1：共C(5,2)=10种可能，其中抽出两个2有1种可能，抽出两个3有3种可能，概率为(1+3)/10=4/10=2/5。
　　询问2：共C(3,2)=3种可能，无法抽到颜色相同的袜子，概率为0/3=0/1。
　　询问3：共C(3,2)=3种可能，均为抽出两个3，概率为3/3=1/1。
　　注：上述C(a, b)表示组合数，组合数C(a, b)等价于在a个不同的物品中选取b个的选取方案数。
数据规模和约定
　　30%的数据中 N,M ≤ 5000；
　　60%的数据中 N,M ≤ 25000；
　　100%的数据中 N,M ≤ 50000，1 ≤ L < R ≤ N，Ci ≤ N。

这道题倒是有些需要想想。
但是有一个神奇的事情：C(N,M)=N!M!∗(N−M)!
那么C(N,2)呢？
这不就是一个∑N−1i=1i嘛？
（有没有感觉自己~~被续了一秒~~）

/**************************************************************    Source Code : GoAway    Date : 2017-02-06****************************************************************/#include <iostream>#include <cstdio>#include <cstring>#include <cstdlib>#include <vector>#include <map>#include <stack>#include <queue>#include <set>#include <cmath>#include <algorithm>#include <ctime>#define ll long longusing namespace std ;const ll zhf = 1<<30 ;const ll maxn = 50010 ;bool Read ( ll &x ) {    bool f = 0 ; x = 0 ; char c = getchar() ;    while ( !isdigit(c) ) {        if ( c == '-' ) f = 1 ;        if ( c == EOF ) return false ;        c = getchar() ;    }    while ( isdigit(c) ) {        x = 10 * x + c - '0' ;        c = getchar() ;    }    if ( f ) x = -x ;    return true ;}ll tim ;struct query {    ll L, R, id ;    friend bool operator < ( query a, query b ) {        return (a.L/tim) == (b.L/tim) ? a.R < b.R : a.L < b.L ;    }} e[maxn] ;ll gcd ( ll x, ll y ) {    return y ? gcd ( y, x%y ) : x ;}struct Answer {    ll x, y ;    void out() {        if ( !x ) puts("0/1") ;        else {            ll d = gcd(x, y) ;            x /= d ; y /= d ;            printf ( "%lld/%lld\n", x, y ) ;        }    }} ans[maxn] ;ll n, m, a[maxn], cnt[maxn], answer ;void add ( ll pos ) {    ++ cnt[a[pos]] ;    if ( cnt[a[pos]] > 1 )         answer += cnt[a[pos]] - 1 ;}void remove ( ll pos ) {    -- cnt[a[pos]] ;    if ( cnt[a[pos]] > 0 ) answer -= cnt[a[pos]] ;}ll sum ( ll x ) { return x*(x+1)/2 ; }int main() {    ll i, j, k, curL = 1, curR = 0 ;    Read(n) ; Read(m) ;    tim = sqrt(m) ;    for ( i = 1 ; i <= n ; i ++ )         Read(a[i]) ;    for ( i = 1 ; i <= m ; i ++ ) {        Read(e[i].L) ; Read(e[i].R) ;        e[i].id = i ;    }    sort ( e+1, e+m+1 ) ;    for ( i = 1 ; i <= m ; i ++ ) {        ll L = e[i].L, R = e[i].R ;        while ( curL < L )             remove ( curL++ ) ;        while ( curL > L )             add ( --curL ) ;        while ( curR < R )             add ( ++curR ) ;        while ( curR > R )             remove ( curR-- ) ;        ans[e[i].id] = (Answer){answer,sum(R-L)} ;    }    for ( i = 1 ; i <= m ; i ++ )         ans[i].out() ;    return 0 ;}

其实莫队还可以套上树状数组或者在一棵树上搞的，但是这篇文章就简单先介绍下啦~
要是俺有空就出莫队的续集，嘿嘿~

可修改的莫队戳这里

0 0

莫队算法——解决序列上询问的利器

例题1：BZOJ3781 小B的询问

Description

Input

Output

Sample Input

Sample Output

HINT

例题2：SDOI2009 HH的项链 洛谷1972

Description

Input

Output

Sample Input

Sample Output

HINT

例题3：2009国家集训队 小Z的袜子 清橙OJ1206

例题2：SDOI2009 HH的项链洛谷1972

例题3：2009国家集训队小Z的袜子清橙OJ1206