不确定有穷自动机正则表达式求值

来源:互联网 发布:录制脚本的软件 编辑:程序博客网 时间:2024/06/05 02:45

#include <algorithm>
#include <cctype>
#include <cstddef>
#include <cstring>
#include <deque>
#include <iostream>
#include <queue>
#include <stack>
#include <utility>
#include <vector>
 
template <size_t _MaxStat>
class _state_extractor {
public:
    typedef unsigned state_type;
 
    class state_overflow {};
 
    _state_extractor():_next_m(0) {}
 
    state_type get() {
    if(!_reclaimer_m.empty()) {
        state_type _x=_reclaimer_m.front();
        _reclaimer_m.pop();
        return _x;
    }
    if(_next_m==_MaxStat) throw state_overflow();
    return _next_m++;
    }
 
    void get_back(state_type _x) {
    _reclaimer_m.push(_x);
    }
 
private:
    std::queue<state_type> _reclaimer_m;
    unsigned _next_m;
 
};
 
template <size_t _MaxStat>
class regular_expression {
public:
    typedef unsigned state_type;
    typedef int trans_type;
    typedef std::vector<state_type> nfa[_MaxStat][256];
    typedef _state_extractor<_MaxStat> state_extractor;
 
    enum {empty};
 
    virtual ~regular_expression() {}
 
    state_type get_begstat() const {
    return _begin_m;
    }
 
    state_type get_endstat() const {
    return _end_m;
    }
 
    std::vector<std::vector<state_type> *> &get_endptr_set() {
    return _endptr_set_m;
    }
 
    void set_begstat(nfa _nfa, state_type _stat) {
    for(int _x=0; _x<256; ++_x)
        if(!_nfa[_begin_m][_x].empty()) {
        _nfa[_stat][_x]=_nfa[_begin_m][_x];
        _nfa[_begin_m][_x].clear();
        }
    _begin_m=_stat;
    }
 
    void set_endstat(nfa _nfa, state_type _stat) {
    for(std::vector<std::vector<state_type> *>::const_iterator _i=_endptr_set_m.begin(); _i!=_endptr_set_m.end(); ++_i) {
        (*_i)->erase(std::find((*_i)->begin(), (*_i)->end(), _end_m));
        (*_i)->push_back(_stat);
    }
    _end_m=_stat;
    }
 
protected:
    regular_expression(state_type _begstat, state_type _endstat)
    :_begin_m(_begstat)
    ,_end_m(_endstat) {}
 
    state_type _begin_m, _end_m;
    std::vector<std::vector<state_type> *> _endptr_set_m;
 
private:
    regular_expression(const regular_expression &);
 
};
 
template <size_t _MaxStat>
class alpha:public regular_expression<_MaxStat> {
    typedef regular_expression<_MaxStat> _base_type;
 
public:
    alpha(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, typename _base_type::trans_type _trans)
    :_base_type(_se.get(), _se.get()) {
    _nfa[this->_begin_m][_trans].push_back(this->_end_m);
    this->_endptr_set_m.push_back(&_nfa[this->_begin_m][_trans]);
    }
 
};
 
template <size_t _MaxStat>
class _and:public regular_expression<_MaxStat> {
    typedef regular_expression<_MaxStat> _base_type;
 
public:
    _and(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)
    :_base_type(_left->get_begstat(), _right->get_endstat()) {
    _se.get_back(_left->get_endstat());
    _left->set_endstat(_nfa, _right->get_begstat());
    this->_endptr_set_m=_right->get_endptr_set();
    }
 
    ~_and() {
    delete _left_m;
    delete _right_m;
    }
 
private:
    _base_type *_left_m, *_right_m;
 
};
 
template <size_t _MaxStat>
class _or:public regular_expression<_MaxStat> {
    typedef regular_expression<_MaxStat> _base_type;
 
public:
    _or(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_left, _base_type *_right)
    :_base_type(_se.get(), _se.get()), _left_m(_left), _right_m(_right) {
    _nfa[this->_begin_m][_base_type::empty].push_back(_left_m->get_begstat());
    _nfa[this->_begin_m][_base_type::empty].push_back(_right_m->get_begstat());
    _nfa[_left_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
    _nfa[_right_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
    this->_endptr_set_m.push_back(&_nfa[_left_m->get_endstat()][_base_type::empty]);
    this->_endptr_set_m.push_back(&_nfa[_right_m->get_endstat()][_base_type::empty]);
    }
 
    ~_or() {
    delete _left_m;
    delete _right_m;
    }
 
private:
    _base_type *_left_m, *_right_m;
 
};
 
template <size_t _MaxStat>
class closure:public regular_expression<_MaxStat> {
    typedef regular_expression<_MaxStat> _base_type;
 
public:
    closure(typename _base_type::nfa _nfa, typename _base_type::state_extractor &_se, _base_type *_original)
    :_base_type(_se.get(), _se.get()), _original_m(_original) {
    _nfa[this->_begin_m][_base_type::empty].push_back(_original_m->get_begstat());
    _nfa[this->_begin_m][_base_type::empty].push_back(this->_end_m);
    _nfa[_original_m->get_endstat()][_base_type::empty].push_back(_original_m->get_begstat());
    _nfa[_original_m->get_endstat()][_base_type::empty].push_back(this->_end_m);
    this->_endptr_set_m.push_back(&_nfa[_original_m->get_endstat()][_base_type::empty]);
    }
 
    ~closure() {
    delete _original_m;
    }
 
private:
    _base_type *_original_m;
 
};
 
class lexer {
public:
    class bad_token {};
 
    lexer(const std::string &_src):_src_m(_src), _iter_m(_src_m.begin()) {}
 
    bool is_already_over() {
    leave_out_space();
    return _iter_m==_src_m.end();
    }
 
    lexer &operator>>(char &_c) {
    leave_out_space();
    if(!isalnum(*_iter_m)&&*_iter_m!='|'&&*_iter_m!='('&&*_iter_m!=')'&&*_iter_m!='*') throw bad_token();
    _c=*_iter_m++;
    return *this;
    }
 
private:
    void leave_out_space() {
    std::string::const_iterator _enditer;
    while(_iter_m!=_enditer&&isspace(*_iter_m)) ++_iter_m;
    }
 
    std::string _src_m;
    std::string::const_iterator _iter_m;
 
};
 
class post_regular_expression_factor {
public:
    std::vector<char> generic(const std::string &_src) const {
    std::vector<char> _result;
    std::stack<char> _operator_stack;
    lexer _lxr(_src);
    char _c;
    while(!_lxr.is_already_over()) {
        _lxr>>_c;
        switch(_c) {
        case '(':
        _operator_stack.push('(');
        break;
        case ')':
        while(_operator_stack.top()!='(') {
            _result.push_back(_operator_stack.top());
            _operator_stack.pop();
        }
        _operator_stack.pop();
        break;
        case '*':
        _result.push_back('*');
        break;
        case '|':
        while(!_operator_stack.empty()&&_operator_stack.top()!='(') {
            _result.push_back(_operator_stack.top());
            _operator_stack.pop();
        }
        _operator_stack.push('|');
        break;
        default:
        _result.push_back(_c);
        }
    }
    while(!_operator_stack.empty()) {
        _result.push_back(_operator_stack.top());
        _operator_stack.pop();
    }
    return _result;
    }
 
};
 
class regular_expression_nfa_factor {
public:
    template <size_t _MaxStat>
    std::pair<
        typename regular_expression<_MaxStat>::state_type,
        typename regular_expression<_MaxStat>::state_type> generic(typename regular_expression<_MaxStat>::nfa _nfa, const std::vector<char> &_vec) const {
    typename regular_expression<_MaxStat>::state_extractor _se;
    std::deque<regular_expression<_MaxStat> *> _stack;
    for(std::vector<char>::const_iterator _i=_vec.begin(); _i!=_vec.end(); ++_i) {
        if(isalnum(*_i))
        _stack.push_back(new alpha<_MaxStat>(_nfa, _se, *_i));
        else if(*_i=='*')
        _stack.back()=new closure<_MaxStat>(_nfa, _se, _stack.back());
        else if(*_i=='|') {
        regular_expression<_MaxStat> *_second=_stack.back();
        _stack.pop_back();
        regular_expression<_MaxStat> *_first=_stack.back();
        _stack.pop_back();
        _stack.push_back(new _or<_MaxStat>(_nfa, _se, _first, _second));
        }
    }
    while(_stack.size()>=2) {
        regular_expression<_MaxStat> *_first=_stack.front();
        _stack.pop_front();
        regular_expression<_MaxStat> *_second=_stack.front();
        _stack.pop_front();
        _stack.push_front(new _and<_MaxStat>(_nfa, _se, _first, _second));
    }
    std::pair<typename regular_expression<_MaxStat>::state_type,
          typename regular_expression<_MaxStat>::state_type> _result(_stack.back()->get_begstat(),
                                         _stack.back()->get_endstat());
    delete _stack.back();
    return _result;
    }
 
};
 
 
class regular_expression_matcher {
public:
    template <size_t _MaxStat>
    bool match(const char *_srcstr, const char *_patstr) {
    post_regular_expression_factor _pref;
    std::vector<char> _vec=_pref.generic(_srcstr);
    typename regular_expression<_MaxStat>::nfa _nfa;
    regular_expression_nfa_factor _renf;
    std::pair<typename regular_expression<_MaxStat>::state_type,
          typename regular_expression<_MaxStat>::state_type> _r=_renf.generic<_MaxStat>(_nfa, _vec);
    return match_impl<_MaxStat>(_nfa, _r.first, _r.second, _patstr);
    }
 
private:
    template <size_t _MaxStat>
    bool match_impl(typename regular_expression<_MaxStat>::nfa _nfa,
            typename regular_expression<_MaxStat>::state_type _beg,
            typename regular_expression<_MaxStat>::state_type _end,
            const char *_patstr) {
    static bool _state_map[_MaxStat];
    memset(_state_map, 0, sizeof(bool)*_MaxStat);
    std::vector<typename regular_expression<_MaxStat>::state_type> _old_states;
    std::vector<typename regular_expression<_MaxStat>::state_type> _new_states;
    match_impl_aux<_MaxStat>(_nfa, _beg, _old_states, _state_map);
    for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_old_states.begin(); _i!=_old_states.end(); ++_i)
        _state_map[*_i]=false;
    for(; *_patstr; ++_patstr) {
        for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_old_states.begin(); _i!=_old_states.end(); ) {
        for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _j=_nfa[*_i][*_patstr].begin(); _j!=_nfa[*_i][*_patstr].end(); ++_j)
            match_impl_aux<_MaxStat>(_nfa, *_j, _new_states, _state_map);
        _i=_old_states.erase(_i);
        }
        for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::iterator _i=_new_states.begin(); _i!=_new_states.end(); ) {
        _old_states.push_back(*_i);
        _state_map[*_i]=false;
        _i=_new_states.erase(_i);
        }
    }
    return std::find(_old_states.begin(), _old_states.end(), _end)!=_old_states.end();
    }
 
    template <size_t _MaxStat>
    void match_impl_aux(typename regular_expression<_MaxStat>::nfa _nfa,
            typename regular_expression<_MaxStat>::state_type _cur,
            std::vector<typename regular_expression<_MaxStat>::state_type> &_states,
            bool _state_map[_MaxStat]) {
    _states.push_back(_cur);
    _state_map[_cur]=true;
    for(typename std::vector<typename regular_expression<_MaxStat>::state_type>::const_iterator _i=_nfa[_cur][regular_expression<_MaxStat>::empty].begin(); _i!=_nfa[_cur][regular_expression<_MaxStat>::empty].end(); ++_i)
        if(!_state_map[*_i])
        match_impl_aux<_MaxStat>(_nfa, *_i, _states, _state_map);
    }
 
};

原创粉丝点击