(敏感词匹配)将数据库的敏感词逗号隔开来进行内容匹配

来源:互联网 发布:linux怎么重启服务器 编辑:程序博客网 时间:2024/06/11 09:28
参考文献http://cmsblogs.com/?p=1031   
参考文献http://blog.csdn.net/chenssy/article/details/26961957      
 参考文献:http://www.iteye.com/topic/336577

package com.system.util.SensitiveWords;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;/** * @Description: 敏感词过滤 * Created by chenghongchao on 2017/7/3. * @version 1.0 */public class SensitivewordFilter {@SuppressWarnings("rawtypes")private Map sensitiveWordMap = null;public static int minMatchTYpe = 1;      //最小匹配规则public static int maxMatchType = 2;      //最大匹配规则/** * 构造函数,初始化敏感词库 */public SensitivewordFilter(String SensitiveWords){sensitiveWordMap = new SensitiveWordInit().initKeyWord(SensitiveWords);}/** * 判断文字是否包含敏感字符 * Created by chenghongchao on 2017/7/3. * @param txt  文字 * @param matchType  匹配规则 1:最小匹配规则,2:最大匹配规则 * @return 若包含返回true,否则返回false * @version 1.0 */public boolean isContaintSensitiveWord(String txt,int matchType){boolean flag = false;for(int i = 0 ; i < txt.length() ; i++){int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(matchFlag > 0){    //大于0存在,返回trueflag = true;}}return flag;}/** * 获取文字中的敏感词 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return * @version 1.0 */public Set getSensitiveWord(String txt , int matchType){Set sensitiveWordList = new HashSet();for(int i = 0 ; i < txt.length() ; i++){int length = CheckSensitiveWord(txt, i, matchType);    //判断是否包含敏感字符if(length > 0){    //存在,加入list中sensitiveWordList.add(txt.substring(i, i+length));i = i + length - 1;    //减1的原因,是因为for会自增}}return sensitiveWordList;}/** * 替换敏感字字符 * Created by chenghongchao on 2017/7/3. * @param txt * @param matchType * @param replaceChar 替换字符,默认* * @version 1.0 */public String replaceSensitiveWord(String txt,int matchType,String replaceChar){String resultTxt = txt;Set set = getSensitiveWord(txt, matchType);     //获取所有的敏感词Iterator iterator = set.iterator();String word = null;String replaceString = null;while (iterator.hasNext()) {word = iterator.next();replaceString = getReplaceChars(replaceChar, word.length());resultTxt = resultTxt.replaceAll(word, replaceString);}return resultTxt;}/** * 获取替换字符串 * Created by chenghongchao on 2017/7/3. * @param replaceChar * @param length * @return * @version 1.0 */private String getReplaceChars(String replaceChar,int length){String resultReplace = replaceChar;for(int i = 1 ; i < length ; i++){resultReplace += replaceChar;}return resultReplace;}/** * 检查文字中是否包含敏感字符,检查规则如下:
* Created by chenghongchao on 2017/7/3. * @param txt * @param beginIndex * @param matchType * @return,如果存在,则返回敏感词字符的长度,不存在返回0 * @version 1.0 */@SuppressWarnings({ "rawtypes"})public int CheckSensitiveWord(String txt,int beginIndex,int matchType){boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况int matchFlag = 0; //匹配标识数默认为0char word = 0;Map nowMap = sensitiveWordMap;for(int i = beginIndex; i < txt.length() ; i++){word = txt.charAt(i);nowMap = (Map) nowMap.get(word); //获取指定keyif(nowMap != null){ //存在,则判断是否为最后一个matchFlag++; //找到相应key,匹配标识+1if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数flag = true; //结束标志位为trueif(SensitivewordFilter.minMatchTYpe == matchType){ //最小规则,直接返回,最大规则还需继续查找break;}}}else{ //不存在,直接返回break;}}if(matchFlag < 2 || !flag){ //长度必须大于等于1,为词matchFlag = 0;}return matchFlag;}public static void main(String[] args) {//敏感词逗号隔开(敏感词存储到了数据库,可以直接凭借成逗号隔开的字符串)String SensitiveWords ="阿賓,挨了一炮,爱液横流,安街逆,";//赋值敏感词SensitivewordFilter filter = new SensitivewordFilter(SensitiveWords);System.out.println("敏感词数量:" + filter.sensitiveWordMap.size());String string = "这是一条评论的内容包含了安街逆";//设置匹配规则Set set = filter.getSensitiveWord(string, 1);System.out.println("语句中包含敏感词的个数为:" + set.size() + "包含了:" + set);}}
这里的方法是通过数据库来读取敏感词并匹配(需要先把数据库所有的敏感词查出来并用逗号隔开)
package com.system.util.SensitiveWords;import java.util.HashSet;import java.util.Iterator;import java.util.Map;import java.util.Set;/** * @Description: 敏感词过滤 * Created by chenghongchao on 2017/7/3. * @version 1.0 */public class SensitivewordFilter {@SuppressWarnings("rawtypes")private Map sensitiveWordMap = null;public static int minMatchTYpe = 1;      //最小匹配规则public static int maxMatchType = 2;      //最大匹配规则/** * 构造函数,初始化敏感词库 */public SensitivewordFilter(String SensitiveWords){sensitiveWordMap = new SensitiveWordInit().initKeyWord(SensitiveWords);}/** * 判断文字是否包含敏感字符 * Created by chenghongchao on 2017/7/3. * @param txt  文字 * @param matchType  匹配规则 1:最小匹配规则,2:最大匹配规则 * @return 若包含返回true,否则返回false * @version 1.0 */public boolean isContaintSensitiveWord(String txt,int matchType){boolean flag = false;for(int i = 0 ; i < txt.length() ; i++){int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符if(matchFlag > 0){    //大于0存在,返回trueflag = true;}}return flag;}/** * 获取文字中的敏感词 * Created by chenghongchao on 2017/7/3. * @param txt 文字 * @param matchType 匹配规则 1:最小匹配规则,2:最大匹配规则 * @return * @version 1.0 */public Set getSensitiveWord(String txt , int matchType){Set sensitiveWordList = new HashSet();for(int i = 0 ; i < txt.length() ; i++){int length = CheckSensitiveWord(txt, i, matchType);    //判断是否包含敏感字符if(length > 0){    //存在,加入list中sensitiveWordList.add(txt.substring(i, i+length));i = i + length - 1;    //减1的原因,是因为for会自增}}return sensitiveWordList;}/** * 替换敏感字字符 * Created by chenghongchao on 2017/7/3. * @param txt * @param matchType * @param replaceChar 替换字符,默认* * @version 1.0 */public String replaceSensitiveWord(String txt,int matchType,String replaceChar){String resultTxt = txt;Set set = getSensitiveWord(txt, matchType);     //获取所有的敏感词Iterator iterator = set.iterator();String word = null;String replaceString = null;while (iterator.hasNext()) {word = iterator.next();replaceString = getReplaceChars(replaceChar, word.length());resultTxt = resultTxt.replaceAll(word, replaceString);}return resultTxt;}/** * 获取替换字符串 * Created by chenghongchao on 2017/7/3. * @param replaceChar * @param length * @return * @version 1.0 */private String getReplaceChars(String replaceChar,int length){String resultReplace = replaceChar;for(int i = 1 ; i < length ; i++){resultReplace += replaceChar;}return resultReplace;}/** * 检查文字中是否包含敏感字符,检查规则如下:
* Created by chenghongchao on 2017/7/3. * @param txt * @param beginIndex * @param matchType * @return,如果存在,则返回敏感词字符的长度,不存在返回0 * @version 1.0 */@SuppressWarnings({ "rawtypes"})public int CheckSensitiveWord(String txt,int beginIndex,int matchType){boolean flag = false; //敏感词结束标识位:用于敏感词只有1位的情况int matchFlag = 0; //匹配标识数默认为0char word = 0;Map nowMap = sensitiveWordMap;for(int i = beginIndex; i < txt.length() ; i++){word = txt.charAt(i);nowMap = (Map) nowMap.get(word); //获取指定keyif(nowMap != null){ //存在,则判断是否为最后一个matchFlag++; //找到相应key,匹配标识+1if("1".equals(nowMap.get("isEnd"))){ //如果为最后一个匹配规则,结束循环,返回匹配标识数flag = true; //结束标志位为trueif(SensitivewordFilter.minMatchTYpe == matchType){ //最小规则,直接返回,最大规则还需继续查找break;}}}else{ //不存在,直接返回break;}}if(matchFlag < 2 || !flag){ //长度必须大于等于1,为词matchFlag = 0;}return matchFlag;}public static void main(String[] args) {//敏感词逗号隔开(敏感词存储到了数据库,可以直接凭借成逗号隔开的字符串)String SensitiveWords ="阿賓,挨了一炮,爱液横流,安街逆,";//赋值敏感词SensitivewordFilter filter = new SensitivewordFilter(SensitiveWords);System.out.println("敏感词数量:" + filter.sensitiveWordMap.size());String string = "这是一条评论的内容包含了安街逆";//设置匹配规则Set set = filter.getSensitiveWord(string, 1);System.out.println("语句中包含敏感词的个数为:" + set.size() + "包含了:" + set);}}


阅读全文
0 0
原创粉丝点击