Lua 字符串实践 -- 过滤词库算法

来源:互联网 发布:软件需求变更申请表 编辑:程序博客网 时间:2024/06/07 18:05

note目录

  • 过滤词字典结构
  • 构造过滤词树结构
  • 测试example

1: 过滤词字典结构

WordMap.lua文件

WordMap = luaclass("WordMap")function WordMap:Init()    self.isLast = false    self.map = {}    return selfendreturn WordMap

过滤词的单个结构包括2个元素:
1:isLast是否是为节点
2:子节点-是一张表table{},用单个字符做key存放WordMap的表


2: 构造过滤词树结构

假设过滤词库的里面有如下过滤词:

local configFilter = {}configFilter[1] = {   filterWord = "李泽东,李泽西,李泽北,李泽南,李克勤,周树人,周就,周佳佳"}

构造的过滤词库的树结构:

lua_filterworld

完整的过滤词库代码:
WordFilterManager.lua

WordFilterManager = {}local WordMap = require("WordMap")--根据过滤词库来初始化过滤词库群树结构function WordFilterManager.InitWordFilter()    -- 初始化一张过滤词树空结构    WordFilterManager.wordMap = WordMap():Init()    local str = configFilter[1].filterWord    local strArr = StringUtil.Split(str,",")    for i = 1,#strArr do        local wordItem = strArr[i]        if wordItem ~= "" then            WordFilterManager.AddWordsChar(WordFilterManager.wordMap , wordItem , 1)        end    end    end--添加单个字符到过滤词树的结构中function WordFilterManager.AddWordsChar(wordMap , word , charIdIndex)    local map = wordMap.map    local singleChar = StringUtil.utf8sub(word,charIdIndex , 1)    if map[singleChar] == nil then        map[singleChar] = WordMap():Init()    end    local subWordMap = map[singleChar]    --单个词条是最后一个字符,则是叶子节点    if charIdIndex == StringUtil.utf8len(word) then        subWordMap.isLast = true    end    if charIdIndex < StringUtil.utf8len(word) then        WordFilterManager.AddWordsChar(subWordMap,word,charIdIndex + 1)    endend--判断一个字符串是否有过滤词存在,并同时返回过滤词的字符个数function WordFilterManager.CheckWord(wordMap , word , beginIndex , filterCounts)    local map = wordMap.map    local singleChar = StringUtil.utf8sub(word,beginIndex,1)    if map[singleChar] ~= nil then        filterCount = filterCount + 1       if map[singleChar].isLast then           return true,filterCount       else           if beginIndex > StringUtil.utf8len(word) then               return false , 0            else               WordFilterManager.CheckWord(map[singleChar],word,beginIndex + 1,filterCount)           end        end    end    rturn false , 0end-- 判断一个字符串是否有过滤词存在function WordFilterManager.IsContains(str)    local len = StringUtile.utf8len(str)    for i = 1,len do        if WordFilterManager.CheckWord(WordFilterManager.wordMap,str,i,0) then            rturn true        end        end    rturn falseend--将给过来的字符串中的所有的过滤词改为"***"function WordFilterManager.Filter(str)    local len = StringUtil.utf8len(str)    for i = 1,len then        local isFilter,filterCount =                         WordFilterManager.CheckWord(WordFilterManager.wordMap,str,i,0)        if isFilter then            local filterWord = ""            for i = 1,filterCount do                filterWord = filterWord .. "*"            end            str = string.sub(str,StringUtil.utf8sub(str,i,filterCount),filterWord)            i = i + filterCount - 1    end        return strend

3: 测试example

local str = "aaa我是李泽西啊aaa"local isContainsFilterWord = WordFilterManager.IsContains(str)local beforeFilterWord = WordFilterManager.Filter(str)print(tostring(isContainsFilterWord))        ---> output: trueprint(beforeFilterWord)                      ---> output:aaa我是***啊aaa