|
@@ -12,6 +12,7 @@ import com.bowintek.practice.vo.Analyze.AnalyzeModel;
|
|
|
import com.bowintek.practice.vo.Analyze.ComparisonResult;
|
|
|
import com.bowintek.practice.vo.EsIndexVo;
|
|
|
import com.bowintek.practice.vo.EsIndexfieldVo;
|
|
|
+import org.apache.commons.beanutils.BeanUtils;
|
|
|
import org.elasticsearch.client.RequestOptions;
|
|
|
import org.elasticsearch.client.RestHighLevelClient;
|
|
|
import org.elasticsearch.client.indices.AnalyzeRequest;
|
|
@@ -21,11 +22,13 @@ import org.springframework.stereotype.Component;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.io.StringReader;
|
|
|
+import java.lang.reflect.InvocationTargetException;
|
|
|
import java.time.LocalDate;
|
|
|
import java.time.LocalDateTime;
|
|
|
import java.util.*;
|
|
|
import java.sql.Timestamp;
|
|
|
|
|
|
+import org.springframework.util.Assert;
|
|
|
import org.wltea.analyzer.cfg.DefaultConfig;
|
|
|
import org.wltea.analyzer.core.IKSegmenter;
|
|
|
import org.wltea.analyzer.core.Lexeme;
|
|
@@ -53,6 +56,8 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
private static HashMap<String, AnalyzeModel> mapUseless = new HashMap<>();
|
|
|
//对象分词
|
|
|
private static HashMap<String, AnalyzeModel> mapObjects = new HashMap<>();
|
|
|
+ private static List<String> indexCache = null;
|
|
|
+ private static List<EsIndexfieldVo> indexFields = null;
|
|
|
|
|
|
private void initStaticWords() {
|
|
|
if (isInitStaticWords) return;
|
|
@@ -116,9 +121,7 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
if (refTime == null || refTime.getTime() < keyword.getCreateTime().getTime())
|
|
|
refTime = keyword.getCreateTime();
|
|
|
}
|
|
|
- EsIndexVo indexModel = esIndexService.getAllList().get(0);
|
|
|
-
|
|
|
- sysMainDict(indexModel.getFieldList());
|
|
|
+ sysMainDict(indexFields);
|
|
|
}
|
|
|
|
|
|
private void sysMainDict(List<EsIndexfieldVo> fields) {
|
|
@@ -153,17 +156,14 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
if (colls.size() > 0) dictionary.addWords(colls);
|
|
|
}
|
|
|
|
|
|
- @Override
|
|
|
- public AnalyzeResponse analyze(String text) {
|
|
|
+ public List<AnalyzeModel> analyze(String text) {
|
|
|
try {
|
|
|
- AnalyzeRequest request =AnalyzeRequest.withGlobalAnalyzer("ik_smart",text) ;
|
|
|
- AnalyzeResponse response = highLevelClient.indices().analyze(request, RequestOptions.DEFAULT);
|
|
|
- System.out.println(response.toString());
|
|
|
- return response;
|
|
|
+ AnalyzeRequest request = AnalyzeRequest.withGlobalAnalyzer("ik_max_word", text);
|
|
|
+ return getAnalyzeModels(request);
|
|
|
} catch (Exception ex) {
|
|
|
ex.printStackTrace();
|
|
|
return null;
|
|
|
- }finally {
|
|
|
+ } finally {
|
|
|
/*try {
|
|
|
highLevelClient.close();
|
|
|
} catch (IOException e) {
|
|
@@ -172,17 +172,51 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ public List<AnalyzeModel> analyzeSynonym(String text) {
|
|
|
+ try {
|
|
|
+ AnalyzeRequest request = AnalyzeRequest.withIndexAnalyzer(indexCache.get(0), "ik_search_analyzer", text);
|
|
|
+ return getAnalyzeModels(request);
|
|
|
+ } catch (Exception ex) {
|
|
|
+ ex.printStackTrace();
|
|
|
+ return null;
|
|
|
+ } finally {
|
|
|
+ /*try {
|
|
|
+ highLevelClient.close();
|
|
|
+ } catch (IOException e) {
|
|
|
+ throw new RuntimeException(e);
|
|
|
+ }*/
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<AnalyzeModel> getAnalyzeModels(AnalyzeRequest request) throws IOException {
|
|
|
+ AnalyzeResponse response = highLevelClient.indices().analyze(request, RequestOptions.DEFAULT);
|
|
|
+ System.out.println(response.toString());
|
|
|
+ List<AnalyzeModel> modelList = new ArrayList<>();
|
|
|
+ response.getTokens().forEach(lexeme -> {
|
|
|
+ AnalyzeModel model = AnalyzeModel.GenModel(lexeme.getType(),
|
|
|
+ lexeme.getTerm(),
|
|
|
+ lexeme.getStartOffset(), lexeme.getEndOffset());
|
|
|
+ modelList.add(model);
|
|
|
+ });
|
|
|
+ return modelList;
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public List<AnalyzeResponse> analyzes(String text) {
|
|
|
String[] arys = text.split(" ");
|
|
|
List<AnalyzeResponse> responses = new ArrayList<>();
|
|
|
- for (int i = 0; i < arys.length; i++) {
|
|
|
+ /* for (int i = 0; i < arys.length; i++) {
|
|
|
AnalyzeResponse response = analyze(arys[i]);
|
|
|
if (response != null) responses.add(response);
|
|
|
- }
|
|
|
+ }*/
|
|
|
return responses;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 解析语义
|
|
|
+ * @param text
|
|
|
+ * @return
|
|
|
+ */
|
|
|
@Override
|
|
|
public ComparisonResult analyzeJava(String text) {
|
|
|
try {
|
|
@@ -190,9 +224,9 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
IKSegmenter segmenter = new IKSegmenter(reader, true);
|
|
|
|
|
|
System.out.println(text);
|
|
|
- //ik分词逻辑
|
|
|
- List<AnalyzeModel> modelList = new ArrayList<>();
|
|
|
- Lexeme lexeme;
|
|
|
+ //ik分词逻辑--通过elasticsearch的分词功能进行分词,可以配置无用次,自定义分词等等
|
|
|
+ List<AnalyzeModel> modelList = analyze(text);
|
|
|
+ /*Lexeme lexeme;
|
|
|
while ((lexeme = segmenter.next()) != null) {
|
|
|
//System.out.println(" "+lexeme.getLexemeTypeString() + " " + lexeme.getLexemeText()
|
|
|
// +" from:"+lexeme.getBeginPosition()
|
|
@@ -202,9 +236,9 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
lexeme.getLexemeText(),
|
|
|
lexeme.getBeginPosition(), lexeme.getEndPosition());
|
|
|
modelList.add(model);
|
|
|
- }
|
|
|
+ }*/
|
|
|
|
|
|
- //分析表达式
|
|
|
+ //分析表达式---对分词后的词进行二次分析,比如同义词转换为转等于,大鱼转大鱼等等
|
|
|
AnalyzeComparisonResult analyze = findAnalyzeMap(modelList, mapAnalyze);
|
|
|
AnalyzeComparisonResult symbols = findAnalyzeMap(modelList, mapSymbols);
|
|
|
if (analyze.getIndex() == -1 || symbols.getIndex() == -1) {
|
|
@@ -285,25 +319,40 @@ public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
AnalyzeComparisonResult result = new AnalyzeComparisonResult();
|
|
|
result.setIndex(-1);
|
|
|
|
|
|
- for (int i = 0; i < modelList.size(); i++) {
|
|
|
- if (map.containsKey(modelList.get(i).getLexemeText())) {
|
|
|
- AnalyzeModel model = map.get(modelList.get(i).getLexemeText());
|
|
|
- model = JSON.parseObject(JSON.toJSONString(model), AnalyzeModel.class);
|
|
|
- model.setLexemeType(modelList.get(i).getLexemeType());
|
|
|
- model.setLexemeText(modelList.get(i).getLexemeText());
|
|
|
- model.setBegin(modelList.get(i).getBegin());
|
|
|
- model.setEnd(modelList.get(i).getEnd());
|
|
|
-
|
|
|
- result.setIndex(i);
|
|
|
- result.setModel(model);
|
|
|
- break;
|
|
|
+ try {
|
|
|
+ for (int i = 0; i < modelList.size(); i++) {
|
|
|
+ //使用elasticsearch的同义词功能
|
|
|
+ String lexemeText = modelList.get(i).getLexemeText();
|
|
|
+ //获取该词的同义词
|
|
|
+ List<AnalyzeModel> synoymList = analyzeSynonym(lexemeText);
|
|
|
+ for (int j = 0; j < synoymList.size(); j++) {
|
|
|
+ String term = synoymList.get(j).getLexemeText();
|
|
|
+ if (map.containsKey(term)) {
|
|
|
+ AnalyzeModel model = new AnalyzeModel();
|
|
|
+ BeanUtils.copyProperties(model, map.get(synoymList.get(j).getLexemeText()));
|
|
|
+ model.setLexemeType(synoymList.get(j).getLexemeType());
|
|
|
+ model.setLexemeText(synoymList.get(j).getLexemeText());
|
|
|
+ model.setBegin( modelList.get(i).getBegin());
|
|
|
+ model.setEnd( modelList.get(i).getEnd());
|
|
|
+
|
|
|
+ result.setIndex(i);
|
|
|
+ result.setModel(model);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ ;
|
|
|
}
|
|
|
+ } catch (Exception ex) {
|
|
|
+ ex.printStackTrace();
|
|
|
}
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
- public List<ComparisonResult> analyzeJavas(String text) {
|
|
|
+ public List<ComparisonResult> analyzeJavas(List<String> indexCache, List<EsIndexfieldVo> indexFields,String text) {
|
|
|
+ this.indexFields =indexFields;
|
|
|
+ this.indexCache=indexCache;
|
|
|
+
|
|
|
Dictionary.initial(DefaultConfig.getInstance());
|
|
|
initKeyWords();
|
|
|
|