欢迎您访问365答案网,请分享给你的朋友!
生活常识 学习资料

Java使用DFA算法处理敏感词汇

时间:2023-06-11
1、初始化敏感词库,将敏感词加入到HashMap中,构建DFA算法模型

package com.datago.common.utils.sensitive;import java.util.*;public class SensitiveWordInit { @SuppressWarnings("rawtypes") public static HashMap sensitiveWordMap; public SensitiveWordInit() { super(); } public static HashMap init(String datas) { addSensitiveWord(datas); return sensitiveWordMap; } private static void addSensitiveWord(String word) { sensitiveWordMap = new HashMap(word.length()); Map now = null; Map now2 = null; now2 = sensitiveWordMap; for (int i = 0; i < word.length(); i++) { char key_word = word.charAt(i); Object obj = now2.get(key_word); if (obj != null) { //存在 now2 = (Map) obj; } else { //不存在 now = new HashMap<>(); now.put("isEnd", "0"); now2.put(key_word, now); now2 = now; } if (i == word.length() - 1) { now2.put("isEnd", "1"); } } } public static List getSensitiveWord(String text, int matchType) { List words = new ArrayList(); Map now = sensitiveWordMap; int count = 0; //初始化敏感词长度 int start = 0; //标志敏感词开始的下标 for (int i = 0; i < text.length(); i++) { char key = text.charAt(i); now = (Map) now.get(key); if (now != null) { //存在 count++; if (count == 1) { start = i; } if ("1".equals(now.get("isEnd"))) { //敏感词结束 now = sensitiveWordMap; //重新获取敏感词库 words.add(text.substring(start, start + count)); //取出敏感词,添加到集合 count = 0; //初始化敏感词长度 } } else { //不存在 now = sensitiveWordMap;//重新获取敏感词库 if (count == 1 && matchType == 1) { //不最佳匹配 count = 0; } else if (count == 1 && matchType == 2) { //最佳匹配 words.add(text.substring(start, start + count)); count = 0; } } } return words; }}

2、敏感词过滤

package com.datago.common.utils.sensitive;import com.datago.common.core.redis.RedisCache;import com.datago.common.utils.StringUtils;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;import java.util.*;@Componentpublic class SensitivewordFilter { private static RedisCache redisCache; @Autowired public void setRedisCache(RedisCache redisCache) { SensitivewordFilter.redisCache = redisCache; } @SuppressWarnings("rawtypes") private static Map sensitiveWordMap = null; public static void initSensitiveWord(String datas) { sensitiveWordMap = SensitiveWordInit.init(datas); } public static String replaceSensitiveWord(String datas, String txt, int matchType, String replaceChar) { if (sensitiveWordMap == null) { initSensitiveWord(datas); } String resultTxt = txt; //matchType = 1; //最小匹配规则 //matchType= 2; //最大匹配规则 List set = SensitiveWordInit.getSensitiveWord(txt, matchType); //获取所有的敏感词 Iterator iterator = set.iterator(); String word = null; String replaceString = null; while (iterator.hasNext()) { word = iterator.next(); replaceString = getReplaceChars(replaceChar, word.length()); resultTxt = resultTxt.replaceAll(word, replaceString); } return resultTxt; } private static String getReplaceChars(String replaceChar, int length) { String resultReplace = replaceChar; if (length > 6) { length = 6; } for (int i = 1; i < length; i++) { resultReplace += replaceChar; } return resultReplace; } public static String filterSensitive(String sensitiveTxt) { //从缓存中提取数据敏感词汇 Map datas = redisCache.getCacheObject("treeSensitive"); //替换敏感词汇 String updateTxt = null; for (Map.Entry entry : datas.entrySet()) { SensitivewordFilter.initSensitiveWord(entry.getKey()); if (StringUtils.isNotEmpty(updateTxt)) { updateTxt = replaceSensitiveWord(entry.getKey(), updateTxt, 1, entry.getValue()); } else { updateTxt = replaceSensitiveWord(entry.getKey(), sensitiveTxt, 1, entry.getValue()); } } return updateTxt; }}

3.应用

@Log(title = "过滤敏感词汇") @GetMapping("/filterSensitive/{sensitiveTxt}") public AjaxResult filterSensitive(@PathVariable(value = "sensitiveTxt") String sensitiveTxt) { String s = SensitivewordFilter.filterSensitive(sensitiveTxt); return AjaxResult.success(s); }

4.参考文献

https://www.hutool.cn/docs/#/dfa/DFA%E6%9F%A5%E6%89%BE

Copyright © 2016-2020 www.365daan.com All Rights Reserved. 365答案网 版权所有 备案号:

部分内容来自互联网,版权归原作者所有,如有冒犯请联系我们,我们将在三个工作时内妥善处理。