asp.net敏感词过滤(一)
来源:互联网 发布:俄罗斯海军现状知乎 编辑:程序博客网 时间:2024/05/21 19:27
/// <summary>
/// 敏感词过滤(查找速度很快,支持模糊查找-如:操*妈;*号代表5个字符)
/// </summary>
{
private string _words = string.Empty;
private string _minganci = string.Empty;
private Dictionary<string, Dictionary<string, int[]>> _badWords;
private Dictionary<int, List<string>> _result;
const string type1 = "_"; //含1个字符
const string type2 = "*"; //含0-5个字符
/// <summary>
/// 词库
/// </summary>
public Dictionary<string, Dictionary<string, int[]>> badWords
{
get { return _badWords; }
set { _badWords = value; }
}
/// <summary>
/// 敏感词字符串
/// </summary>
public string minganci
{
get { return _minganci; }
set { _minganci = value; }
}
/// <summary>
/// 敏感词级别,列表
/// </summary>
public Dictionary<int, List<string>> result
{
get { return _result; }
}
/// <summary>
/// 要查找的字符串
/// </summary>
public string words
{
get { return _words; }
set { _words = value; }
}
/// <summary>
/// 构造过滤对象
/// </summary>
/// <param name="words">要过滤的词</param>
public clsFilter(string words)
{
this._words = words;
this._badWords = new Dictionary<string, Dictionary<string, int[]>>();
}
/// <summary>
/// 构造过滤对象
/// </summary>
/// <param name="words">要过滤的词</param>
/// <param name="minganci">敏感词字符串</param>
public clsFilter(string words, string minganci)
: this(words)
{
this.minganci = minganci;
LoadDic();//加载词库
}
/// <summary>
/// 构造过滤对象
/// </summary>
/// <param name="words">要过滤的词</param>
/// <param name="dic">敏感词生成后的Dictionary</param>
public clsFilter(string words, Dictionary<string, Dictionary<string, int[]>> dic)
: this(words)
{
this.badWords = dic;
}
//加载词库
private void LoadDic()
{
if (this.minganci == "" || this.minganci == null) return;
Dictionary<string, int[]> wordlist = null;
string[] sp = { "\r\n" };
string[] keys = this.minganci.Split(sp, StringSplitOptions.RemoveEmptyEntries);
string[] split = null;
bool con = false;//记录是否含有通配符,0,无 1,有
int count = 0;//记录敏感字包含通配符时的字数
foreach (string s in keys)
{
split = s.Split('|');
if (badWords.ContainsKey(s[0].ToString()))
{
if (!badWords[s[0].ToString()].ContainsKey(split[0]))
{
badWords[s[0].ToString()].Add(ReplaceWord(split[0], out con, out count), new int[3] { clsCommon.GetObjToInt(split[1]), con ? 1 : 0, count });
}
}
else
{
wordlist = new Dictionary<string, int[]>();
wordlist.Add(ReplaceWord(split[0], out con, out count), new int[3] { clsCommon.GetObjToInt(split[1]), con ? 1 : 0, count });
badWords.Add(s[0].ToString(), wordlist);
}
}
}
//替换为正则
private string ReplaceWord(string str, out bool contain, out int count)
{
string s = "";
int c = 0;
bool b = false;
if (str.Contains(type1))
{
c += str.Length;
s = str.Replace("_", "(.){1}");
b = true;
}
if (str.Contains(type2))
{
c += str.Length + 4;
s = str.Replace("*", "(.){0,5}");
b = true;
}
contain = b;
count = c;
return s == "" ? str : s;
}
/// <summary>
/// 过滤(这里只是把敏感词找出来,不支持对敏感词进行处理)
/// </summary>
public void Filter()
{
if (this.badWords == null) return;
this._result = new Dictionary<int, List<string>>();
for (int i = 0; i < this.words.Length; i++)
{
if (badWords.ContainsKey(words[i].ToString()))
{
Dictionary<string, int[]> dic = badWords[words[i].ToString()];
int pos = 0;
foreach (string str in dic.Keys)
{
if (dic[str][1] == 1)//有通配符
{
pos = (this.words.Length - i) >= dic[str][2] ? dic[str][2] : (this.words.Length - i);
string s = words.Substring(i, pos);
if (new Regex(str).Match(s).Success)
{
AddToList(s, dic[str][0]);
}
}
else
{
pos = (this.words.Length - i) >= str.Length ? str.Length : (this.words.Length - i);
if (str == words.Substring(i, pos))
{
AddToList(str, dic[str][0]);
}
}
}
}
}
}
/// <summary>
/// 存入结果
/// </summary>
/// <param name="str"></param>
/// <param name="level"></param>
private void AddToList(string str, int level)
{
if (this._result.ContainsKey(level))
{
_result[level].Add(str);
}
else
{
List<string> list = new List<string>();
list.Add(str);
_result.Add(level, list);
}
}
}
- asp.net敏感词过滤(一)
- asp.net敏感词过滤(二)
- asp.net敏感词过滤
- XSS过滤(敏感词过滤)
- 网站敏感词过滤的实现(附敏感词库)
- javaEE之---------过滤敏感词(filter)
- 过滤敏感词
- JAVA过滤敏感词
- 敏感词过滤算法
- web过滤敏感词
- 敏感词过滤
- php敏感词过滤
- 过滤敏感词
- 过滤敏感词
- 敏感词过滤
- 过滤敏感词,脏词
- c++敏感词过滤
- 敏感词过滤
- S3C6410启动模式介绍
- 在xp的vmware下ubuntu中搭建SSH服务器
- MongoDB 速查文档
- MOSS表单的复制和转移
- XMemcached使用
- asp.net敏感词过滤(一)
- 进一步了解 微软认证的好处和需求
- 用Visio画UML 类结构图
- 对面试题赛马问题分析思路的质疑_欢迎拍砖
- 一步一个脚印学习WCF之二构建一个简单的WCF应用程序前必须掌握的WCF基础术语---契约(Contract)
- 简单的,基于长连接的小例子
- c runtime(CRT)
- Joomla单元管理
- 关于Myclipse和Jboss调试过程中内存溢出问题的解决