php实现简单的基于DFA算法的敏感词过滤

来源:互联网 发布:qq邮箱软件 编辑:程序博客网 时间:2024/06/10 23:07

最近一个学Go语言朋友,给我发了一篇文章http://blog.csdn.net/chenssy/article/details/26961957,讲的是使用DFA(即Deterministic Finite Automaton,有穷自动机)算法实现敏感词的过滤。问我能不能使用php来实现,感谢chenssy的精彩文章,下面是我仿照版本的php实现。

<?phpheader("Content-type:text/html; charset=utf-8");class MyMap{    public function get($key)    {        return isset($this->$key) ? $this->$key : null;    }    public function put($key, $value)    {        $this->$key = $value;    }}class MyFilter{    public $map = null;    public function addWordToMap($word)    {        $len = mb_strlen($word);        if (is_null($this->map)) {            $map = new MyMap();            $map->put('isEnd', 0);        } else {            $map = $this->map;        }        $tmp = $map;        for ($i = 0; $i < $len; $i++) {            $nowWord = mb_substr($word, $i, 1);            $nowMap = $map->get($nowWord);            if (!is_null($nowMap)) {                $map = $nowMap;            } else {                $newMap = new MyMap();                $newMap->put('isEnd', 0);                $map->put($nowWord, $newMap);                $map = $newMap;            }            if ($i == ($len - 1)) {                $map->put('isEnd', 1);            }        }        $this->map = $tmp;    }    //仅支持最大匹配    public function searchFromMap($string)    {        $len = mb_strlen($string);        $tmp = $this->map;        $map = $this->map;        $str = '';        $result = [];        for ($i = 0; $i < $len; $i++) {            $nowWord = mb_substr($string, $i, 1);            $nowMap = $map->get($nowWord);            if (!is_null($nowMap)) {                $str .= $nowWord;                if ($nowMap->get('isEnd')) {                    array_push($result, $str);                    $str = '';                    $map = $tmp;                } else {                    $map = $nowMap;                }            } else {                if (!empty($str)) {                    $i--;                }                   $str = '';                $map = $tmp;            }        }        return $result;    }}$example = new MyFilter();$example->addWordToMap('中国人');$example->addWordToMap('中国男人');$example->addWordToMap('女人');$result = $example->searchFromMap('我是中国人,我爱中国,中国男人是最优秀的,中国女人是最漂亮的');//var_dump($example->map);var_dump($result);

个人博客地址
github源码地址

阅读全文
0 0
原创粉丝点击