编译原理(二) NFA的确定化及DFA的最小化的算法及C++实现

来源:互联网 发布:光环大数据怎么样 编辑:程序博客网 时间:2024/05/29 04:37

1. NFA的确定化

1.1. 明确NFA的定义

一个非确定的有穷自动机(NFA)M是一个五元式:

  • M=(S,,δ,S0,F) 
    • S
    • δS×Sδ:S×2S
    • S0S,
    • F S , 是一个终态集(可空)

1.2. 定义运算

定义对状态集合I的几个有关运算:

  • 状态集合I的ε-闭包,表示为ε-closure(I),定义为一状态集,是状态集I中的任何状态s经任意条ε弧而能到达的状态的集合。状态集合I的任何状态s都属于ε-closure(I)。
  • 状态集合I的a弧转换,表示为move(I,a)定义为状态集合J,其中J是所有那些可从I的某一状态经过一条a弧而到达的状态的全体。 
    定义Ia = ε-closure(move(I,a))

1.3. 算法描述

  • 每次从队头取出一个集合,(开始队列内只有初态集合I的ε-闭包(I) ),然后得到它对于任意一个字符a的Ia=εclosure(move(I,a))
  • 然后如果当前状态之前没有出现过,那么当前状态作为一个新的状态I,放入队列。
  • 一直做如上操作,直到队列为空

1.4. 代码如下:

#include <iostream>#include <cstdio>#include <algorithm>#include <fstream>#include <queue>#include <vector>#include <string>#include <cstring>#include <set>#define MAX 500#define M 1000007using namespace std;struct Set{    set<int> elements;    int state;}I[MAX];vector<int> e[MAX];vector<int> edge[MAX];vector<int> val1[MAX];vector<int> val2[MAX];vector<int> hash[M];vector<int> types;int vis[MAX][MAX];int cntp,cnts,start,finish,used[MAX];void _clear( );void clear( );void _add( int u , int v , int x );void add( int u , int v , int x );void dfs ( set<int>& elements , int u , int d , int flag );void ensure();void minimize();int get_hash( set<int>& item );//#define DEBUGint main ( ){    int m;    puts("给出NFA的边的条数:");    while ( ~scanf ( "%d" , &m ) )    {        _clear();        clear();        puts("给出NFA的始态和终态:");        scanf ( "%d%d" , &start , &finish );        puts("给出所有的边,每行一条:");        for ( int i = 0 ; i < m ; i++ )        {            int u,v,x;            scanf ( "%d%d%d" , &u , &v , &x );            _add ( u , v , x );         }#ifdef DEBUG        set<int> temp;        int num[]={2,3,6,7,8};        //for ( int i = 2 ; i < 6 ; i++ )        //    dfs ( temp , i , 0 , 2 );        for ( int i = 0 ; i < 5 ; i++ )            dfs ( temp , num[i] , 0 , 1 );        set<int>::iterator it = temp.begin();        for ( ; it != temp.end() ; it++ )            printf ( "%d " , *it);        puts("");#else         ensure();        puts("计算结果如下:");        printf ( "%d\n" , cnts );        for ( int i = 0 ; i < cnts ; i++ )            for ( int j = 0 ; j < edge[i].size() ; j++ )                printf ( "edges : %d %d %d\n" , i , edge[i][j] , val2[i][j] );        puts("\n给出DFA的边的条数:");#endif    }}void _clear(){    for ( int i = 0 ; i < MAX ; i++ )    {        e[i].clear();        val1[i].clear();    }    types.clear();    memset ( used , 0 , sizeof ( used ) );}void clear(){    for ( int i = 0 ; i < MAX ; i++ )    {        edge[i].clear();        val2[i].clear();    }}void _add ( int u , int v , int x ){    e[u].push_back ( v );    val1[u].push_back ( x );    if ( !used[x] )    {        types.push_back ( x );        used[x] = types.size();    }}int get_hash ( set<int>& item ){    int sum = 0;    set<int>::iterator it = item.begin(),it1,it2;    for ( ; it!= item.end() ; it++ )    {        sum += (*it)*(*it)%M;        sum %= M;    }    for ( int i = 0 ; i < hash[sum].size() ; i++ )    {        int x = hash[sum][i];        set<int>& temp = I[x].elements;        it = temp.begin();        bool flag = true;        if ( temp.size() != item.size() ) continue;        for ( it1 = temp.begin() , it2 = item.begin(); it2!= item.end() ; it1++,it2++ )            if ( *it1 != *it2 )             {                flag = false;                break;            }        if ( flag ) return x;    }    return -1;}int  make_hash ( set<int>& item ){    int sum = 0;    set<int>::iterator it = item.begin();    for ( ; it!= item.end() ; it++ )    {        sum += (*it)*(*it)%M;        sum %= M;    }    hash[sum].push_back ( cnts );    return cnts++;}void add ( int u , int v , int x ){    edge[u].push_back ( v );    val2[u].push_back ( x );}void dfs ( set<int>& elements , int u , int d , int flag ){    if ( vis[u][d] ) return;    vis[u][d] = 1;    if ( d == 1 ) elements.insert ( u );    if ( d == 2 ) return;    int len = e[u].size();    for ( int i = 0 ; i < len ; i++ )    {        int v = e[u][i],dd;        int x = val1[u][i];        if ( x == flag ) dd = d+1;        else if ( x == 0 ) dd = d;        else continue;        dfs ( elements , v , dd , flag );    }}void ensure ( ){    I[0].elements.insert(start);    cnts = 1;    for ( int i = 0 ; i < cnts ; i++ )    {        set<int>& cur = I[i].elements;        for ( int j = 0 ; j < types.size() ; j++ )        {            if ( types[j] == 0 ) continue;            memset ( vis , 0 , sizeof ( vis ) );            set<int>& temp = I[cnts].elements;            temp.clear();            set<int>::iterator it = cur.begin();            for ( ; it != cur.end() ; it++ )                dfs ( temp , *it , 0 , types[j] );            if ( temp.empty() ) continue;            int x = get_hash ( temp );            if ( x == -1 )                 x = make_hash ( temp );            add ( i , x , types[j] );        }        set<int>::iterator it = cur.begin();        printf ( "I%d :" , i );        for ( ; it!= cur.end() ; it++ )            printf ( "%d " , *it );        puts ("");           }}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205

2. DFA的最小化

2.1. 明确DFA的定义

一个确定的有穷自动机(DFA)M是一个五元式:

  • M=(S, ∑, δ, s0, F)其中 
    • S是一个有限集,它的每个元素称为一个状态。
    • ∑是一个有穷字母表,它的每个元素称为一个输入字符 
      -δ是一个从S×∑至S的单值映射。δ(s,a)=s’意味着:当现行状态-为s、输入字符为a时,将转换到下一个状态s’。我们称s’为s的一个后继状态。
    • s0∈S,是唯一的初态。
    •  S,是一个终态集(可空)

2.2 算法描述

  • DFA M =(K,∑,f, k0,, kt),最小状态DFA M’ 
    • 1.构造状态的初始划分0:终态kt 和非终态K- kt两组
    • 2.对∏施用传播性原则 构造新划分new
    • 3.如new=,则令new=并继续步骤4,否则:=new重复2
    • 4.为final中的每一组选一代表,这些代表构成M’的状态。若k是一代表且f(k,a)=t,令r是t组的代表,则M’中有一转换f’(k,a)=r M’ 的开始状态是含有K0的那组的代表 M’的终态是含有Kt的那组的代表
    • 5.去掉M’中的死状态.

2.3 代码实现

#include <iostream>#include <algorithm>#include <cstring>#include <cstdio>#include <cstdlib>#include <string>#include <vector>#include <map>#include <queue>#include <fstream>#define MAX 507using namespace std;struct Node{    int x;    int type;    Node(){}    Node( int a , int b )        :x(a),type(b){}    bool operator < ( const Node& a ) const    {        return type < a.type;    }};int mp[MAX][MAX];vector<int> _edge[MAX];vector<int> _val[MAX];vector<int> edge[MAX];vector<int> val[MAX];vector<int> point;vector<pair<int,int> > ans1;vector<int> ans2;vector<Node> col;int fa[MAX];int type[MAX];int state[MAX];int used[MAX];int kinds;int groups_num;int n;int m;void init ();int _find( int x );void _union ( int x , int y );void _add ( int u , int v , int x );void add ( int u , int v , int x );void minimize ( );void make_csv();int main ( ){    init ( );    scanf ( "%d%d" , &n , &m );    for ( int i = 1 ; i <= n ; i++ )        scanf ( "%d" , &type[i] );    for ( int i = 0 ; i < m ; i++ )    {        int u,v,x;        scanf ( "%d%d%d" , &u , &v , &x );        _add ( u ,v , x );    }    minimize();    //最小化后的点的保留结果    puts ("points : " );    printf ( "%d\n" , (int)point.size() );    for ( int i = 0 ; i < point.size(); i++ )        printf ( "%d " , point[i] );    puts("");    //最小化后的边的保留结果    puts ( "edge: ");    m = ans1.size();    for ( int i = 0 ; i < ans1.size() ; i++ )        printf ( "%d %d %d\n" , ans1[i].first , ans1[i].second , ans2[i] );    puts("");    make_csv();}void init ( ){    for ( int i = 0 ; i < MAX ; i++ )    {        edge[i].clear();        _edge[i].clear();    }    point.clear();    memset ( type , 255 , sizeof ( type ) );    memset ( used , 0 , sizeof ( used ) );    kinds = 0;    groups_num = 2;    col.clear();    for ( int i = 0 ; i < MAX ; i++ )        fa[i] = i;}void _add ( int u , int v , int x ){    _edge[u].push_back ( v );    _val[u].push_back ( x );    if ( used[x] ) return ;    used[x] = x;    kinds++;}void add ( int u , int v , int x ){    edge[u].push_back ( v );    val[u].push_back ( x );}int _find ( int x ){    return x == fa[x]? x: fa[x] = _find ( fa[x] );}void _union ( int x , int y ){    x = _find ( x );    y = _find ( y );    fa[x] = y;}void minimize ( ){    queue<vector<Node> > q[2];    col.clear();    for ( int i = 1 ; i <= n ; i++ )    {        if ( type[i] == 0 )             col.push_back ( Node( i , 0 ) );    }    q[1].push ( col );    col.clear();    for ( int i = 1 ; i <= n ;i++ )    {        if ( type[i] == 1 )            col.push_back ( Node ( i , 1 ) );    }    q[1].push( col );    col.clear();    for ( int i = 1 ; i <= kinds ; i++ )    {        int cur = i%2;        int next = (i+1)%2;        while ( !q[next].empty() ) q[next].pop();        while ( !q[cur].empty() )        {            vector<Node> front = q[cur].front();            q[cur].pop();            for ( int j = 0 ; j < front.size() ; j++ )            {                Node& temp = front[j];                int u = temp.x;                for ( int k = 0 ; k < _edge[u].size() ; k++ )                {                    int v = _edge[u][k];                    int x = _val[u][k];                    if ( x != i ) continue;                    temp.type = type[v];                }            }            sort ( front.begin() , front.end() );            if ( front[0].type == front[front.size()-1].type )                q[next].push ( front );            else             {                col.clear();                col.push_back ( front[0] );                for ( int j = 1 ; j < front.size() ; j++ )                {                    if ( front[j].type != front[j-1].type )                    {                        q[cur].push ( col );                        col.clear();                        groups_num++;                    }                    type[front[j].x] = groups_num;                    col.push_back ( front[j] );                }                q[cur].push( col );            }        }    }//#define DEBUG#ifdef DEBUG    int id = (kinds+1)%2;    int num = 1;    while ( !q[id].empty() )    {        vector<Node> temp = q[id].front();        q[id].pop();        printf ( "%d : ",  num++ );        for ( int i = 0 ; i < temp.size() ; i++ )            printf ( "%d " , temp[i].x );        puts("");    }#else    int id = (kinds+1)%2;    while ( !q[id].empty() )    {        vector<Node> temp = q[id].front();        q[id].pop();        for ( int i = 1 ; i < temp.size() ; i++ )            _union ( temp[i].x , temp[i-1].x);    }    memset ( used , 0 , sizeof ( used ) );    memset ( mp , 0 , sizeof ( mp ) );    for ( int i = 1 ; i <= n ;i++ )    {        int x = _find(i);        if ( used[x] ) continue;        used[x] = 1;        point.push_back ( x );    }    ans1.clear();    for ( int i = 1 ; i <= n ; i++ )        for ( int j = 0 ; j < _edge[i].size() ; j++ )        {            int v = _edge[i][j];            int x = _val[i][j];            int a = _find(i);            int b = _find(v);            if ( mp[a][b] ) continue;            mp[a][b] = 1;            add ( a , b , x );            ans1.push_back ( make_pair ( a , b ) );            ans2.push_back ( x );        }#endif}void make_csv ( ){    freopen ( "g1.csv" , "w" , stdout );    printf ( "%d,%d, \n" , n , m  );     for ( int i = 0 ; i < point.size() ; i++ )        if ( i == 0 ) printf ( "%d" , point[i] );        else printf ( ",%d" , point[i] );    puts("");    for ( int i = 0 ; i < m ; i++ )        printf ( "%d,%d,%d\n" , ans1[i].first , ans1[i].second , ans2[i] );    fclose(stdout);}