lua之屏蔽字替换为 '*'

来源:互联网 发布:网络投资那个好 编辑:程序博客网 时间:2024/05/30 23:21
local socket = require "socket"local function utf8len(ch)if not ch thenreturn -1endif ch < 0x80 thenreturn 1elseif ch < 0xC0 thenreturn -1elseif ch < 0xE0 thenreturn 2elseif ch < 0xF0 thenreturn 3elseif ch < 0xF8 thenreturn 4elseif ch < 0xFC thenreturn 5elseif ch < 0xFE thenreturn 6elsereturn -1endendlocal function getutf8tbl(input)if not input thenreturn nil, nilendlocal tbl = {}local tbllen = {}local len = #inputlocal i = 1while i <= len dolocal j = utf8len(string.byte(string.sub(input, i, i)))if j <= 0 or i + j - 1 > len thenreturn nil, nilendtable.insert(tbl, string.sub(input, i, i + j - 1))table.insert(tbllen, j)i = i + jendreturn tbl, tbllenendlocal f0 = socket.gettime()local data = {}local maxlen = 0local firstword = {}for line in io.lines("forbidden_words.txt") dolocal len = string.len(line)if data[len] == nil thendata[len] = {}enddata[len][line] = trueif len > maxlen thenmaxlen = lenendlocal wordlen = utf8len(string.byte(string.sub(line, 1, 1)))if wordlen > 0 thenfirstword[string.sub(line, 1, wordlen)] = trueendendlocal f1 = socket.gettime()print(f1 - f0)local fout = io.open("out.txt", "w")for str in io.lines("test.txt") dolocal t0 = socket.gettime()local tbl, tbllen = getutf8tbl(str)if not tbl thenprint(str .. " input is invalid")endlocal count = 0local len = #tblfor i = 1, len dolocal wordlen = 0if tbl[i] ~= '*' and firstword[tbl[i]] thenfor j = 1, len - i + 1 dowordlen = wordlen + tbllen[i + j -1]if wordlen > maxlen then --optimizationbreakendlocal t = data[wordlen]if t thenlocal word = table.concat(tbl, nil, i, i + j - 1)count = count + 1if t[word] thenfor k = i, i + j - 1 dotbl[k] = '*'endbreakendendendendendlocal t1 = socket.gettime()fout:write(table.concat(tbl), t1 - t0, '\n')print(count, t1 - t0)endfout:close()

原创粉丝点击