(敏感词匹配)将数据库的敏感词逗号隔开来进行内容匹配
来源:互联网 发布:linux怎么重启服务器 编辑:程序博客网 时间:2024/06/11 09:28
参考文献:http://cmsblogs.com/?p=1031
参考文献:http://blog.csdn.net/chenssy/article/details/26961957
参考文献:http://blog.csdn.net/chenssy/article/details/26961957
参考文献:http://www.iteye.com/topic/336577
package com.system.util.SensitiveWords;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;/** * @Description: 敏感词过滤 * Created by chenghongchao on 2017/7/3. * @version 1.0 */public class SensitivewordFilter {@SuppressWarnings("rawtypes")private Map sensitiveWordMap = null;public static int minMatchTYpe = 1; //最小匹配规则public static int maxMatchType = 2; //最大匹配规则/** * 构造函数,初始化敏感词库 */public SensitivewordFilter(String SensitiveWords){sensitiveWordMap = new SensitiveWordInit().initKeyWord(SensitiveWords);}/** * 判断文字是否包含敏感字符 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return 若包含返回true,否则返回false * @version 1.0 */public boolean isContaintSensitiveWord(String txt,int matchType){boolean flag = false;for(int i = 0 ; i < txt.length() ; i++){int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(matchFlag > 0){ //大于0存在,返回trueflag = true;}}return flag;}/** * 获取文字中的敏感词 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return * @version 1.0 */public SetgetSensitiveWord(String txt , int matchType){Set sensitiveWordList = new HashSet ();for(int i = 0 ; i < txt.length() ; i++){int length = CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(length > 0){ //存在,加入list中sensitiveWordList.add(txt.substring(i, i+length));i = i + length - 1; //减1的原因,是因为for会自增}}return sensitiveWordList;}/** * 替换敏感字字符 * Created by chenghongchao on 2017/7/3. * @param txt * @param matchType * @param replaceChar 替换字符,默认* * @version 1.0 */public String replaceSensitiveWord(String txt,int matchType,String replaceChar){String resultTxt = txt;Set set = getSensitiveWord(txt, matchType); //获取所有的敏感词Iterator iterator = set.iterator();String word = null;String replaceString = null;while (iterator.hasNext()) {word = iterator.next();replaceString = getReplaceChars(replaceChar, word.length());resultTxt = resultTxt.replaceAll(word, replaceString);}return resultTxt;}/** * 获取替换字符串 * Created by chenghongchao on 2017/7/3. * @param replaceChar * @param length * @return * @version 1.0 */private String getReplaceChars(String replaceChar,int length){String resultReplace = replaceChar;for(int i = 1 ; i < length ; i++){resultReplace += replaceChar;}return resultReplace;}/** * 检查文字中是否包含敏感字符,检查规则如下:
* Created by chenghongchao on 2017/7/3. * @param txt * @param beginIndex * @param matchType * @return,如果存在,则返回敏感词字符的长度,不存在返回0 * @version 1.0 */@SuppressWarnings({ "rawtypes"})public int CheckSensitiveWord(String txt,int beginIndex,int matchType){boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况int matchFlag = 0; //匹配标识数默认为0char word = 0;Map nowMap = sensitiveWordMap;for(int i = beginIndex; i < txt.length() ; i++){word = txt.charAt(i);nowMap = (Map) nowMap.get(word); //获取指定keyif(nowMap != null){ //存在,则判断是否为最后一个matchFlag++; //找到相应key,匹配标识+1if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数flag = true; //结束标志位为trueif(SensitivewordFilter.minMatchTYpe == matchType){ //最小规则,直接返回,最大规则还需继续查找break;}}}else{ //不存在,直接返回break;}}if(matchFlag < 2 || !flag){ //长度必须大于等于1,为词matchFlag = 0;}return matchFlag;}public static void main(String[] args) {//敏感词逗号隔开(敏感词存储到了数据库,可以直接凭借成逗号隔开的字符串)String SensitiveWords ="阿賓,挨了一炮,爱液横流,安街逆,";//赋值敏感词SensitivewordFilter filter = new SensitivewordFilter(SensitiveWords);System.out.println("敏感词数量:" + filter.sensitiveWordMap.size());String string = "这是一条评论的内容包含了安街逆";//设置匹配规则Setset = filter.getSensitiveWord(string, 1);System.out.println("语句中包含敏感词的个数为:" + set.size() + "包含了:" + set);}}
这里的方法是通过数据库来读取敏感词并匹配(需要先把数据库所有的敏感词查出来并用逗号隔开)
package com.system.util.SensitiveWords;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;/** * @Description: 敏感词过滤 * Created by chenghongchao on 2017/7/3. * @version 1.0 */public class SensitivewordFilter {@SuppressWarnings("rawtypes")private Map sensitiveWordMap = null;public static int minMatchTYpe = 1; //最小匹配规则public static int maxMatchType = 2; //最大匹配规则/** * 构造函数,初始化敏感词库 */public SensitivewordFilter(String SensitiveWords){sensitiveWordMap = new SensitiveWordInit().initKeyWord(SensitiveWords);}/** * 判断文字是否包含敏感字符 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return 若包含返回true,否则返回false * @version 1.0 */public boolean isContaintSensitiveWord(String txt,int matchType){boolean flag = false;for(int i = 0 ; i < txt.length() ; i++){int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(matchFlag > 0){ //大于0存在,返回trueflag = true;}}return flag;}/** * 获取文字中的敏感词 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return * @version 1.0 */public SetgetSensitiveWord(String txt , int matchType){Set sensitiveWordList = new HashSet ();for(int i = 0 ; i < txt.length() ; i++){int length = CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(length > 0){ //存在,加入list中sensitiveWordList.add(txt.substring(i, i+length));i = i + length - 1; //减1的原因,是因为for会自增}}return sensitiveWordList;}/** * 替换敏感字字符 * Created by chenghongchao on 2017/7/3. * @param txt * @param matchType * @param replaceChar 替换字符,默认* * @version 1.0 */public String replaceSensitiveWord(String txt,int matchType,String replaceChar){String resultTxt = txt;Set set = getSensitiveWord(txt, matchType); //获取所有的敏感词Iterator iterator = set.iterator();String word = null;String replaceString = null;while (iterator.hasNext()) {word = iterator.next();replaceString = getReplaceChars(replaceChar, word.length());resultTxt = resultTxt.replaceAll(word, replaceString);}return resultTxt;}/** * 获取替换字符串 * Created by chenghongchao on 2017/7/3. * @param replaceChar * @param length * @return * @version 1.0 */private String getReplaceChars(String replaceChar,int length){String resultReplace = replaceChar;for(int i = 1 ; i < length ; i++){resultReplace += replaceChar;}return resultReplace;}/** * 检查文字中是否包含敏感字符,检查规则如下:
* Created by chenghongchao on 2017/7/3. * @param txt * @param beginIndex * @param matchType * @return,如果存在,则返回敏感词字符的长度,不存在返回0 * @version 1.0 */@SuppressWarnings({ "rawtypes"})public int CheckSensitiveWord(String txt,int beginIndex,int matchType){boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况int matchFlag = 0; //匹配标识数默认为0char word = 0;Map nowMap = sensitiveWordMap;for(int i = beginIndex; i < txt.length() ; i++){word = txt.charAt(i);nowMap = (Map) nowMap.get(word); //获取指定keyif(nowMap != null){ //存在,则判断是否为最后一个matchFlag++; //找到相应key,匹配标识+1if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数flag = true; //结束标志位为trueif(SensitivewordFilter.minMatchTYpe == matchType){ //最小规则,直接返回,最大规则还需继续查找break;}}}else{ //不存在,直接返回break;}}if(matchFlag < 2 || !flag){ //长度必须大于等于1,为词matchFlag = 0;}return matchFlag;}public static void main(String[] args) {//敏感词逗号隔开(敏感词存储到了数据库,可以直接凭借成逗号隔开的字符串)String SensitiveWords ="阿賓,挨了一炮,爱液横流,安街逆,";//赋值敏感词SensitivewordFilter filter = new SensitivewordFilter(SensitiveWords);System.out.println("敏感词数量:" + filter.sensitiveWordMap.size());String string = "这是一条评论的内容包含了安街逆";//设置匹配规则Setset = filter.getSensitiveWord(string, 1);System.out.println("语句中包含敏感词的个数为:" + set.size() + "包含了:" + set);}}
阅读全文
0 0
- (敏感词匹配)将数据库的敏感词逗号隔开来进行内容匹配
- 敏感词的字典树匹配(lua版)
- 正则匹配问题(power8竞赛敏感词过滤)
- AC多模匹配算法过滤敏感词实例
- AC多模匹配算法过滤敏感词实例
- AC多模匹配算法过滤敏感词实例
- 匹配逗号分隔的内容
- 【hdu 5880】 【AC自动机+差分法】Family View 把匹配到的敏感词改成*
- 网站敏感词过滤的实现(附敏感词库)
- filter对页面内容敏感词进行替换处理
- filter对页面内容敏感词进行替换处理
- js/JQuery匹配逗号隔开的多个手机号写法
- 敏感词,敏感词,敏感词
- 让人蛋疼的敏感词
- 敏感词
- Oracle在英文匹配时大小写敏感,如何忽略大小写进行匹配
- Oracle在英文匹配时大小写敏感,如何忽略大小写进行匹配 .
- Oracle在英文匹配时大小写敏感,如何忽略大小写进行匹配
- Xcode8 最快最方便的安装插件方案
- Android Service使用方法--简单音乐播放实例
- 乐视秒杀:每秒十万笔交易的数据架构解读
- AsynTask
- 最小生成树-kruskal算法(克鲁斯卡尔算法)
- (敏感词匹配)将数据库的敏感词逗号隔开来进行内容匹配
- muduo源码学习(12)-日志类封装2
- LeetCode字符串(二)
- LeetCode 551. Student Attendance Record I (C++版)
- 穷举法:max-points-on-a-line
- 第一次写解压gz压缩文件,什么不对的地方或者待优化的地方劳烦指点指点,谢谢
- 2017世界人工智能企业排名
- C union使用
- MyBatis 延迟加载,一级缓存,二级缓存设置