using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Text;using System.Threading.Tasks;namespace SensitiveWordFilter{ public class SensitiveWord { private static readonly char IsEndChar = '$'; /** * 初始化敏感词库 * 将敏感词加入到HashMap中 * 构建DFA算法模型 * * @author dxm * */ public class SensitiveWordInit { // 字符编码 private static readonly String ENCODING = "UTF-8"; /** * 初始化敏感字库 * * @return */ public Dictionary initKeyWord() { // 读取敏感词库 HashSet wordSet = readSensitiveWordFile(); // 将敏感词库加入到HashMap中 return addSensitiveWordToHashMap(wordSet); } /** * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型: * 中 = { * isEnd = 0 * 国 = { * isEnd = 1 * 人 = { * isEnd = 0 * 民 = { * isEnd = 1 * } * } * 男 = { * isEnd = 0 * 人 = { * isEnd = 1 * } * } * } * } * 五 = { * isEnd = 0 * 星 = { * isEnd = 0 * 红 = { * isEnd = 0 * 旗 = { * isEnd = 1 * } * } * } * } */ private Dictionary addSensitiveWordToHashMap(HashSet wordSet) { // 初始化敏感词容器,减少扩容操作 Dictionary wordMap = new Dictionary (wordSet.Count); foreach (String word in wordSet) { IDictionary nowMap = wordMap; for (int i = 0; i < word.Length; i++) { // 转换成char型 char keyChar = word[i]; if (keyChar == IsEndChar) continue; Object tempMap; // 获取 nowMap.TryGetValue(keyChar, out tempMap); // 如果存在该key,直接赋值 if (tempMap != null) { nowMap = (Dictionary )tempMap; } // 不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个 else { // 设置标志位 Dictionary newMap = new Dictionary (); newMap.Add(IsEndChar, "0"); // 添加到集合 nowMap.Add(keyChar, newMap); nowMap = newMap; } // 最后一个 if (i == word.Length - 1) { nowMap[IsEndChar] = "1"; } } } return wordMap; } /** * 读取敏感词库中的内容,将内容添加到SortedSet集合中 * * @return * @throws Exception */ private HashSet readSensitiveWordFile() { HashSet wordSet = new HashSet (); string content = File.ReadAllText("dic.txt", Encoding.GetEncoding(ENCODING)); using (StringReader sr = new StringReader(content)) { string s; while ((s = sr.ReadLine()) != null) { wordSet.Add(s); } } return wordSet; } } public class SensitivewordFilter { private Dictionary sensitiveWordMap = null; // 最小匹配规则 public static int minMatchTYpe = 1; // 最大匹配规则 public static int maxMatchType = 2; // 单例 private static SensitivewordFilter inst = null; /** * 构造函数,初始化敏感词库 */ private SensitivewordFilter() { sensitiveWordMap = new SensitiveWordInit().initKeyWord(); } /** * 获取单例 * * @return */ public static SensitivewordFilter getInstance() { if (null == inst) { inst = new SensitivewordFilter(); } return inst; } /** * 判断文字是否包含敏感字符 * * @param txt * @param matchType * @return */ public bool isContaintSensitiveWord(String txt, int matchType = 1) { bool flag = false; for (int i = 0; i < txt.Length; i++) { // 判断是否包含敏感字符 int matchFlag = this.CheckSensitiveWord(txt, i, matchType); // 大于0存在,返回true if (matchFlag > 0) { flag = true; } } return flag; } /** * 获取文字中的敏感词 * * @param txt * @param matchType * @return */ public HashSet getSensitiveWord(String txt, int matchType = 1) { HashSet sensitiveWordList = new HashSet (); for (int i = 0; i < txt.Length; i++) { // 判断是否包含敏感字符 int length = CheckSensitiveWord(txt, i, matchType); // 存在,加入list中 if (length > 0) { sensitiveWordList.Add(txt.Substring(i, length)); // 减1的原因,是因为for会自增 i = i + length - 1; } } return sensitiveWordList; } /** * 替换敏感字字符 * * @param txt * @param matchType * @param replaceChar * @return */ public String replaceSensitiveWord(String txt, String replaceChar, int matchType = 1) { StringBuilder sb = new StringBuilder(txt); for (int i = 0; i < txt.Length; i++) { // 判断是否包含敏感字符 int length = CheckSensitiveWord(txt, i, matchType); // 存在,加入list中 if (length > 0) { var ttxt = txt.Substring(i, length); sb.Replace(ttxt, getReplaceChars(replaceChar, ttxt.Length), i, length); // 减1的原因,是因为for会自增 i = i + length - 1; } } return sb.ToString(); } /** * 获取替换字符串 * * @param replaceChar * @param length * @return */ private String getReplaceChars(String replaceChar, int length) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < length; i++) { sb.Append(replaceChar); } return sb.ToString(); } /** * 检查文字中是否包含敏感字符,检查规则如下: * 如果存在,则返回敏感词字符的长度,不存在返回0 * * @param txt * @param beginIndex * @param matchType * @return */ public int CheckSensitiveWord(String txt, int beginIndex, int matchType) { // 敏感词结束标识位:用于敏感词只有1位的情况 bool flag = false; // 匹配标识数默认为0 int matchFlag = 0; Dictionary nowMap = sensitiveWordMap; int tempFlag = 0; Dictionary tempMapForBack = new Dictionary (); int len = txt.Length; for (int i = beginIndex; i < len; i++) { char word = txt[i]; if (word == IsEndChar) continue; // 获取指定key Object tempMap; // 获取 nowMap.TryGetValue(word, out tempMap); if (tempFlag == 0) tempMapForBack = nowMap; // 如果存在该key,直接赋值 if (tempMap != null) { nowMap = (Dictionary )tempMap; } else { if (tempFlag > 0) { matchFlag = matchFlag - (i - tempFlag); i = tempFlag - 1; nowMap = tempMapForBack; continue; } else { nowMap = null; } } // 存在,则判断是否为最后一个 if (nowMap != null) { // 找到相应key,匹配标识+1 matchFlag++; object value; if (nowMap.TryGetValue(IsEndChar, out value)) { if (value is string) { // 如果为最后一个匹配规则,结束循环,返回匹配标识数 if ("1" == (string)value) { if (nowMap.Keys.Count == 1 || tempFlag != 0 || i == len - 1) { // 结束标志位为true flag = true; // 最小规则,直接返回,最大规则还需继续查找 if (SensitivewordFilter.minMatchTYpe == matchType) { break; } } else { tempFlag = i; } } } } } // 不存在,直接返回 else { break; } } // 长度必须大于等于1,为词 if (matchFlag < 2 || !flag) { matchFlag = 0; } return matchFlag; } } }}using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;namespace SensitiveWordFilter{ class Program { static void Main(string[] args) { SensitiveWord.SensitivewordFilter filter = SensitiveWord.SensitivewordFilter.getInstance(); String txt = "$fuckfuck you你麻痹e菜太菜了fuckyou从飞啊 fuck you"; String hou = filter.replaceSensitiveWord(txt, "*"); Console.WriteLine("替换前的文字为:" + txt); Console.WriteLine("替换后的文字为:" + hou); Console.ReadKey(); } }}