asp.net敏感词过滤(一)

来源:互联网 发布:俄罗斯海军现状知乎 编辑:程序博客网 时间:2024/05/21 19:27

 

 /// <summary>
    /// 敏感词过滤(查找速度很快,支持模糊查找-如:操*妈;*号代表5个字符)
    /// </summary>

    public class clsFilter
    {
        private string _words = string.Empty;
        private string _minganci = string.Empty;
        private Dictionary<string, Dictionary<string, int[]>> _badWords;
        private Dictionary<int, List<string>> _result;
        const string type1 = "_";               //含1个字符
        const string type2 = "*";               //含0-5个字符

        /// <summary>
        /// 词库
        /// </summary>
        public Dictionary<string, Dictionary<string, int[]>> badWords
        {
            get { return _badWords; }
            set { _badWords = value; }
        }

        /// <summary>
        /// 敏感词字符串
        /// </summary>
        public string minganci
        {
            get { return _minganci; }
            set { _minganci = value; }
        }

        /// <summary>
        /// 敏感词级别,列表
        /// </summary>
        public Dictionary<int, List<string>> result
        {
            get { return _result; }
        }

        /// <summary>
        /// 要查找的字符串
        /// </summary>
        public string words
        {
            get { return _words; }
            set { _words = value; }
        }

        /// <summary>
        /// 构造过滤对象
        /// </summary>
        /// <param name="words">要过滤的词</param>
        public clsFilter(string words)
        {
            this._words = words;
            this._badWords = new Dictionary<string, Dictionary<string, int[]>>();
        }

        /// <summary>
        /// 构造过滤对象
        /// </summary>
        /// <param name="words">要过滤的词</param>
        /// <param name="minganci">敏感词字符串</param>
        public clsFilter(string words, string minganci)
            : this(words)
        {
            this.minganci = minganci;
            LoadDic();//加载词库
        }

        /// <summary>
        /// 构造过滤对象
        /// </summary>
        /// <param name="words">要过滤的词</param>
        /// <param name="dic">敏感词生成后的Dictionary</param>
        public clsFilter(string words, Dictionary<string, Dictionary<string, int[]>> dic)
            : this(words)
        {
            this.badWords = dic;
        }

        //加载词库
        private void LoadDic()
        {
            if (this.minganci == "" || this.minganci == null) return;

            Dictionary<string, int[]> wordlist = null;
            string[] sp = { "\r\n" };
            string[] keys = this.minganci.Split(sp, StringSplitOptions.RemoveEmptyEntries);
            string[] split = null;
            bool con = false;//记录是否含有通配符,0,无 1,有

            int count = 0;//记录敏感字包含通配符时的字数

            foreach (string s in keys)
            {
                split = s.Split('|');
                if (badWords.ContainsKey(s[0].ToString()))
                {
                    if (!badWords[s[0].ToString()].ContainsKey(split[0]))
                    {
                        badWords[s[0].ToString()].Add(ReplaceWord(split[0], out con, out count), new int[3] { clsCommon.GetObjToInt(split[1]), con ? 1 : 0, count });
                    }
                }
                else
                {
                    wordlist = new Dictionary<string, int[]>();
                    wordlist.Add(ReplaceWord(split[0], out con, out count), new int[3] { clsCommon.GetObjToInt(split[1]), con ? 1 : 0, count });
                    badWords.Add(s[0].ToString(), wordlist);
                }
            }
        }

        //替换为正则

        private string ReplaceWord(string str, out bool contain, out int count)
        {
            string s = "";
            int c = 0;
            bool b = false;
            if (str.Contains(type1))
            {
                c += str.Length;
                s = str.Replace("_", "(.){1}");
                b = true;
            }
            if (str.Contains(type2))
            {
                c += str.Length + 4;
                s = str.Replace("*", "(.){0,5}");
                b = true;
            }
            contain = b;
            count = c;
            return s == "" ? str : s;
        }

        /// <summary>
        /// 过滤(这里只是把敏感词找出来,不支持对敏感词进行处理)
        /// </summary>
        public void Filter()
        {
            if (this.badWords == null) return;

            this._result = new Dictionary<int, List<string>>();

            for (int i = 0; i < this.words.Length; i++)
            {
                if (badWords.ContainsKey(words[i].ToString()))
                {
                    Dictionary<string, int[]> dic = badWords[words[i].ToString()];
                    int pos = 0;
                    foreach (string str in dic.Keys)
                    {
                        if (dic[str][1] == 1)//有通配符

                        {
                            pos = (this.words.Length - i) >= dic[str][2] ? dic[str][2] : (this.words.Length - i);
                            string s = words.Substring(i, pos);
                            if (new Regex(str).Match(s).Success)
                            {
                                AddToList(s, dic[str][0]);
                            }
                        }
                        else
                        {
                            pos = (this.words.Length - i) >= str.Length ? str.Length : (this.words.Length - i);
                            if (str == words.Substring(i, pos))
                            {
                                AddToList(str, dic[str][0]);
                            }
                        }
                    }
                }
            }
        }

        /// <summary>
        /// 存入结果
        /// </summary>
        /// <param name="str"></param>
        /// <param name="level"></param>
        private void AddToList(string str, int level)
        {
            if (this._result.ContainsKey(level))
            {
                _result[level].Add(str);
            }
            else
            {
                List<string> list = new List<string>();
                list.Add(str);
                _result.Add(level, list);
            }
        }
    }

 

原创粉丝点击