1、初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型
package com.datago.common.utils.sensitive;import java.util.*;public class SensitiveWordInit { @SuppressWarnings("rawtypes") public static HashMap sensitiveWordMap; public SensitiveWordInit() { super(); } public static HashMap init(String datas) { addSensitiveWord(datas); return sensitiveWordMap; } private static void addSensitiveWord(String word) { sensitiveWordMap = new HashMap(word.length()); Map now = null; Map now2 = null; now2 = sensitiveWordMap; for (int i = 0; i < word.length(); i++) { char key_word = word.charAt(i); Object obj = now2.get(key_word); if (obj != null) { //存在 now2 = (Map) obj; } else { //不存在 now = new HashMap<>(); now.put("isEnd", "0"); now2.put(key_word, now); now2 = now; } if (i == word.length() - 1) { now2.put("isEnd", "1"); } } } public static List getSensitiveWord(String text, int matchType) { List words = new ArrayList(); Map now = sensitiveWordMap; int count = 0; //初始化敏感词长度 int start = 0; //标志敏感词开始的下标 for (int i = 0; i < text.length(); i++) { char key = text.charAt(i); now = (Map) now.get(key); if (now != null) { //存在 count++; if (count == 1) { start = i; } if ("1".equals(now.get("isEnd"))) { //敏感词结束 now = sensitiveWordMap; //重新获取敏感词库 words.add(text.substring(start, start + count)); //取出敏感词,添加到集合 count = 0; //初始化敏感词长度 } } else { //不存在 now = sensitiveWordMap;//重新获取敏感词库 if (count == 1 && matchType == 1) { //不最佳匹配 count = 0; } else if (count == 1 && matchType == 2) { //最佳匹配 words.add(text.substring(start, start + count)); count = 0; } } } return words; }}
2、敏感词过滤
package com.datago.common.utils.sensitive;import com.datago.common.core.redis.RedisCache;import com.datago.common.utils.StringUtils;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;import java.util.*;@Componentpublic class SensitivewordFilter { private static RedisCache redisCache; @Autowired public void setRedisCache(RedisCache redisCache) { SensitivewordFilter.redisCache = redisCache; } @SuppressWarnings("rawtypes") private static Map sensitiveWordMap = null; public static void initSensitiveWord(String datas) { sensitiveWordMap = SensitiveWordInit.init(datas); } public static String replaceSensitiveWord(String datas, String txt, int matchType, String replaceChar) { if (sensitiveWordMap == null) { initSensitiveWord(datas); } String resultTxt = txt; //matchType = 1; //最小匹配规则 //matchType= 2; //最大匹配规则 List set = SensitiveWordInit.getSensitiveWord(txt, matchType); //获取所有的敏感词 Iterator iterator = set.iterator(); String word = null; String replaceString = null; while (iterator.hasNext()) { word = iterator.next(); replaceString = getReplaceChars(replaceChar, word.length()); resultTxt = resultTxt.replaceAll(word, replaceString); } return resultTxt; } private static String getReplaceChars(String replaceChar, int length) { String resultReplace = replaceChar; if (length > 6) { length = 6; } for (int i = 1; i < length; i++) { resultReplace += replaceChar; } return resultReplace; } public static String filterSensitive(String sensitiveTxt) { //从缓存中提取数据敏感词汇 Map datas = redisCache.getCacheObject("treeSensitive"); //替换敏感词汇 String updateTxt = null; for (Map.Entry entry : datas.entrySet()) { SensitivewordFilter.initSensitiveWord(entry.getKey()); if (StringUtils.isNotEmpty(updateTxt)) { updateTxt = replaceSensitiveWord(entry.getKey(), updateTxt, 1, entry.getValue()); } else { updateTxt = replaceSensitiveWord(entry.getKey(), sensitiveTxt, 1, entry.getValue()); } } return updateTxt; }}
3.应用
@Log(title = "过滤敏感词汇") @GetMapping("/filterSensitive/{sensitiveTxt}") public AjaxResult filterSensitive(@PathVariable(value = "sensitiveTxt") String sensitiveTxt) { String s = SensitivewordFilter.filterSensitive(sensitiveTxt); return AjaxResult.success(s); }
4.参考文献
https://www.hutool.cn/docs/#/dfa/DFA%E6%9F%A5%E6%89%BE