大家好,又见面了,我是你们的朋友全栈君。
package com.yangkaile.generator;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.Test;
import java.util.*;
/**
* @description: DFA算法案例
* @class Name: ApplicationTest
* @author: wangdong
* @Date: 2021/7/26 15:56
*/
@Slf4j
public class ApplicationTest {
@Test
public void test1(){
Set<String> keyWordSet=new HashSet<>();
keyWordSet.add("五连鞭");
keyWordSet.add("接化发");
keyWordSet.add("一鞭");
keyWordSet.add("二鞭");
keyWordSet.add("三鞭");
keyWordSet.add("四鞭");
keyWordSet.add("五鞭");
keyWordSet.add("混元形意太极掌门人");
Map dfa_map=addSensitiveWordToHashMap(keyWordSet);
Set<String> result=getTriggerOverWord("一鞭后直接五鞭,",dfa_map);
System.out.println(result);
}
/**
* 构建成DFA算法模型
* @param keyWordSet
*/
public Map addSensitiveWordToHashMap (Set<String> keyWordSet) {
Map sensitiveWordMap = new HashMap(keyWordSet.size()); //初始化关键词容器,减少扩容操作
String key = null;
Map nowMap = null;
Map<String, String> newWorMap = null;
//迭代keyWordSet
Iterator<String> iterator = keyWordSet.iterator();
while (iterator.hasNext()) {
key = iterator.next(); //关键字
nowMap = sensitiveWordMap;
for (int i = 0; i < key.length(); i++) {
char keyChar = key.charAt(i); //转换成char型
Object wordMap = nowMap.get(keyChar); //获取
if (wordMap != null) { //如果存在该key,直接赋值
nowMap = (Map) wordMap;
} else { //不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个
newWorMap = new HashMap<String, String>();
newWorMap.put("isEnd", "0"); //不是最后一个
nowMap.put(keyChar, newWorMap);
nowMap = newWorMap;
}
if (i == key.length() - 1) {
nowMap.put("isEnd", "1"); //最后一个
}
}
}
return sensitiveWordMap;
}
/**
* 判断从start后的词是否是关键词 -->基于DFA模型判断
* @param content
* @param start
* @return
*/
public int getOverWordLength(Map keyWordSet,String content,int start,int matchType){
boolean flag = false;
int length = 0;
char word = 0;
Map nowMap = keyWordSet;
for (int i = start ; i < content.length() ; i++ ){
word = content.charAt(i);
nowMap = (Map) nowMap.get(word);
if(nowMap == null){
break;
}else {
length++;
if("1".equals(nowMap.get("isEnd"))){
flag = true;
if(1 == matchType){
break;
}
}
}
}
if (length < 1 || !flag){
length = 0;
}
return length;
}
/**
* 基于DFA模型匹配关键词
* @param content 待匹配文本
* @param keyWordSet 关键词
* @return 返回匹配到的关键词
*/
public Set<String> getTriggerOverWord(String content,Map keyWordSet){
Set<String> words = new HashSet<>();
for (int i = 0 ; i < content.length() ; i++ ){
int length = getOverWordLength(keyWordSet, content, i,2);
if(length > 0){
words.add(content.substring(i,i+length));
i = i + length - 1;
}
}
return words;
}
}
发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/128928.html原文链接:https://javaforall.cn
【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛
【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...