|
@@ -0,0 +1,300 @@
|
|
|
+package com.bowintek.practice.services.impl;
|
|
|
+
|
|
|
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
|
|
|
+import co.elastic.clients.elasticsearch.indices.AnalyzeRequest;
|
|
|
+import co.elastic.clients.elasticsearch.indices.AnalyzeResponse;
|
|
|
+import com.alibaba.fastjson.JSON;
|
|
|
+import com.bowintek.practice.mapper.cquery.EsQueryKeywordCQuery;
|
|
|
+import com.bowintek.practice.model.EsQueryKeyword;
|
|
|
+import com.bowintek.practice.services.service.AnalyzeService;
|
|
|
+import com.bowintek.practice.util.StringUtils;
|
|
|
+import com.bowintek.practice.vo.Analyze.AnalyzeComparisonResult;
|
|
|
+import com.bowintek.practice.vo.Analyze.AnalyzeModel;
|
|
|
+import com.bowintek.practice.vo.Analyze.ComparisonResult;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import java.io.StringReader;
|
|
|
+import java.time.LocalDate;
|
|
|
+import java.time.LocalDateTime;
|
|
|
+import java.util.*;
|
|
|
+import java.sql.Timestamp;
|
|
|
+
|
|
|
+import org.wltea.analyzer.cfg.DefaultConfig;
|
|
|
+import org.wltea.analyzer.core.IKSegmenter;
|
|
|
+import org.wltea.analyzer.core.Lexeme;
|
|
|
+import org.wltea.analyzer.dic.Dictionary;
|
|
|
+import org.wltea.analyzer.dic.Hit;
|
|
|
+
|
|
|
+@Component
|
|
|
+public class AnalyzeServiceImpl implements AnalyzeService {
|
|
|
+ @Autowired
|
|
|
+ private ElasticsearchClient esClient;
|
|
|
+ @Autowired
|
|
|
+ private EsQueryKeywordCQuery esQueryKeywordCQuery;
|
|
|
+
|
|
|
+ private static Date refTime = null;
|
|
|
+ private static boolean isInitStaticWords = false;
|
|
|
+ //正常分词配置
|
|
|
+ private static HashMap<String, AnalyzeModel> mapAnalyze = new HashMap<>();
|
|
|
+ //对比符号
|
|
|
+ private static HashMap<String, AnalyzeModel> mapSymbols = new HashMap<>();
|
|
|
+ //单位分词
|
|
|
+ private static HashMap<String, AnalyzeModel> mapUnits = new HashMap<>();
|
|
|
+ //无用分词
|
|
|
+ private static HashMap<String, AnalyzeModel> mapUseless = new HashMap<>();
|
|
|
+ //对象分词
|
|
|
+ private static HashMap<String, AnalyzeModel> mapObjects = new HashMap<>();
|
|
|
+
|
|
|
+ private void initStaticWords(){
|
|
|
+ if(isInitStaticWords) return;
|
|
|
+ isInitStaticWords = true;
|
|
|
+
|
|
|
+ //对比符号
|
|
|
+ addToMainDict(mapSymbols, "大于", "对比符号", "");
|
|
|
+ addToMainDict(mapSymbols, "小于", "对比符号", "");
|
|
|
+ addToMainDict(mapSymbols, "等于", "对比符号", "");
|
|
|
+ addToMainDict(mapSymbols, "大于等于", "对比符号", "");
|
|
|
+ addToMainDict(mapSymbols, "小于等于", "对比符号", "");
|
|
|
+ addToMainDict(mapSymbols, "包含", "对比符号", "");
|
|
|
+
|
|
|
+ //单位分词
|
|
|
+ addToMainDict(mapUnits, "吨", "单位", "");
|
|
|
+
|
|
|
+ //对象分词
|
|
|
+ addToMainDict(mapObjects, "井", "对象", "");
|
|
|
+
|
|
|
+ //无用分词
|
|
|
+ addToMainDict(mapUseless, "的", "无用", "");
|
|
|
+ }
|
|
|
+ private void initKeyWords(){
|
|
|
+ initStaticWords();
|
|
|
+
|
|
|
+ //查找是否有记录需要更新
|
|
|
+ String timeString = null;
|
|
|
+ if(refTime!=null) {
|
|
|
+ timeString = (refTime.getTime()/1000)+"";
|
|
|
+ System.out.println("timeString:"+timeString);
|
|
|
+ }
|
|
|
+ List<EsQueryKeyword> dbList = esQueryKeywordCQuery.getList(null, timeString);
|
|
|
+ if(dbList.size()==0) return;
|
|
|
+
|
|
|
+ for(int i=0;i<dbList.size();i++){
|
|
|
+ EsQueryKeyword keyword = dbList.get(i);
|
|
|
+ //同义词
|
|
|
+ List<String> synonymList = new ArrayList<>();
|
|
|
+ if(!StringUtils.IsNullEmpty(keyword.getSynonymText())) {
|
|
|
+ synonymList.addAll(List.of(keyword.getSynonymText().split(",")));
|
|
|
+ }
|
|
|
+
|
|
|
+ if(keyword.getKeywordType().equals("无用")){
|
|
|
+ addToMainDict(mapUseless, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ addToMainDict(mapUseless, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ }
|
|
|
+ else if(keyword.getKeywordType().equals("对象")){
|
|
|
+ addToMainDict(mapObjects, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ addToMainDict(mapObjects, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ }
|
|
|
+ else if(keyword.getKeywordType().equals("单位")){
|
|
|
+ addToMainDict(mapUnits, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ addToMainDict(mapUnits, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ }
|
|
|
+ else if(keyword.getKeywordType().equals("对比符号")){
|
|
|
+ addToMainDict(mapSymbols, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ addToMainDict(mapSymbols, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ }
|
|
|
+ else{
|
|
|
+ addToMainDict(mapAnalyze, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ addToMainDict(mapAnalyze, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
|
|
|
+ }
|
|
|
+
|
|
|
+ //最大更新时间
|
|
|
+ if(refTime==null || refTime.getTime() < keyword.getCreateTime().getTime())
|
|
|
+ refTime = keyword.getCreateTime();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addToMainDict(HashMap<String, AnalyzeModel> map,String keyword, String keywordType, String esIndexField){
|
|
|
+ List<String> extStringList = new ArrayList<>(Arrays.stream(new String[]{keyword}).toList());
|
|
|
+ addToMainDict(map, extStringList, keywordType, esIndexField);
|
|
|
+ }
|
|
|
+ private void addToMainDict(HashMap<String, AnalyzeModel> map,List<String> extStringList, String keywordType, String esIndexField){
|
|
|
+ extStringList.forEach(keyword->{
|
|
|
+ map.put(keyword, AnalyzeModel.GenModel(keywordType, esIndexField));
|
|
|
+ });
|
|
|
+
|
|
|
+ //动态添加分词配置
|
|
|
+ Collection<String> colls = new Stack<>();
|
|
|
+ org.wltea.analyzer.dic.Dictionary dictionary = org.wltea.analyzer.dic.Dictionary.getSingleton();
|
|
|
+ for(int i=0;i<extStringList.size();i++){
|
|
|
+ Hit hit = dictionary.matchInMainDict(extStringList.get(i).trim().toLowerCase().toCharArray());
|
|
|
+ if(!hit.isMatch()){
|
|
|
+ System.out.println("initKeyWords:添加["+extStringList.get(i)+"]到MainDict");
|
|
|
+ colls.add(extStringList.get(i));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(colls.size()>0) dictionary.addWords(colls);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public AnalyzeResponse analyze(String text) {
|
|
|
+ try {
|
|
|
+ AnalyzeRequest.Builder builder = new AnalyzeRequest.Builder();
|
|
|
+ builder.analyzer("ik_smart");
|
|
|
+ builder.text(text);
|
|
|
+ AnalyzeResponse response = esClient.indices().analyze(builder.build());
|
|
|
+ System.out.println(response.toString());
|
|
|
+ return response;
|
|
|
+ }
|
|
|
+ catch (Exception ex){
|
|
|
+ ex.printStackTrace();
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<AnalyzeResponse> analyzes(String text) {
|
|
|
+ String[] arys = text.split(" ");
|
|
|
+ List<AnalyzeResponse> responses = new ArrayList<>();
|
|
|
+ for (int i=0;i<arys.length;i++){
|
|
|
+ AnalyzeResponse response = analyze(arys[i]);
|
|
|
+ if(response!=null) responses.add(response);
|
|
|
+ }
|
|
|
+ return responses;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public ComparisonResult analyzeJava(String text) {
|
|
|
+ try{
|
|
|
+ StringReader reader = new StringReader(text);
|
|
|
+ IKSegmenter segmenter = new IKSegmenter(reader, true);
|
|
|
+
|
|
|
+ System.out.println(text);
|
|
|
+ //ik分词逻辑
|
|
|
+ List<AnalyzeModel> modelList = new ArrayList<>();
|
|
|
+ Lexeme lexeme;
|
|
|
+ while ((lexeme = segmenter.next()) != null) {
|
|
|
+ //System.out.println(" "+lexeme.getLexemeTypeString() + " " + lexeme.getLexemeText()
|
|
|
+ // +" from:"+lexeme.getBeginPosition()
|
|
|
+ // +" to:"+lexeme.getEndPosition());
|
|
|
+
|
|
|
+ AnalyzeModel model = AnalyzeModel.GenModel(lexeme.getLexemeTypeString(),
|
|
|
+ lexeme.getLexemeText(),
|
|
|
+ lexeme.getBeginPosition(), lexeme.getEndPosition());
|
|
|
+ modelList.add(model);
|
|
|
+ }
|
|
|
+
|
|
|
+ //分析表达式
|
|
|
+ AnalyzeComparisonResult analyze = findAnalyzeMap(modelList, mapAnalyze);
|
|
|
+ AnalyzeComparisonResult symbols = findAnalyzeMap(modelList, mapSymbols);
|
|
|
+ if(analyze.getIndex()==-1 || symbols.getIndex()==-1){
|
|
|
+ //不存在对比操作,返回整串字符串进行全文搜索
|
|
|
+ return ComparisonResult.GenModel("query", text);
|
|
|
+ }
|
|
|
+
|
|
|
+ ComparisonResult result = ComparisonResult.GenModel("comparison", text);
|
|
|
+ //操作符号
|
|
|
+ result.setOpreation(symbols.getModel().getLexemeText());
|
|
|
+ //度量、维度关联的字段
|
|
|
+ result.setFieldName(analyze.getModel().getLexemeText());
|
|
|
+ if(!StringUtils.IsNullEmpty(analyze.getModel().getEsIndexField()))
|
|
|
+ result.setFields(analyze.getModel().getEsIndexField().split(","));
|
|
|
+
|
|
|
+ //有 TYPE_CQUAN 说明读取到了 数值+单位
|
|
|
+ AnalyzeComparisonResult cquan = findByLexemeType(modelList, "TYPE_CQUAN");
|
|
|
+ if(cquan.getIndex()>=0){
|
|
|
+ String[] splits = splitUnit(cquan.getModel().getLexemeText(), mapUnits);
|
|
|
+
|
|
|
+ //对比的值或单位
|
|
|
+ result.setValue(splits[0]);
|
|
|
+ if(splits.length>1) result.setUint(splits[1]);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ //取对比符号 到 对象或者无用词中间的对比值
|
|
|
+ AnalyzeComparisonResult useless = findAnalyzeMap(modelList, mapUseless);
|
|
|
+ AnalyzeComparisonResult object = findAnalyzeMap(modelList, mapObjects);
|
|
|
+ int end = text.length(), start = symbols.getModel().getEnd();
|
|
|
+ if(useless.getIndex()>0 && end > useless.getModel().getBegin())
|
|
|
+ end = useless.getModel().getBegin();
|
|
|
+ if(object.getIndex()>0 && end > object.getModel().getBegin())
|
|
|
+ end = object.getModel().getBegin();
|
|
|
+ //System.out.println("start:"+start+" end:"+end);
|
|
|
+
|
|
|
+ if(start<end){
|
|
|
+ String valString = text.substring(start, end);
|
|
|
+ String[] splits = splitUnit(valString, mapUnits);
|
|
|
+
|
|
|
+ //对比的值或单位
|
|
|
+ result.setValue(splits[0]);
|
|
|
+ if(splits.length>1) result.setUint(splits[1]);
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+ } catch (Exception ex) {
|
|
|
+ ex.printStackTrace();
|
|
|
+ }
|
|
|
+
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String[] splitUnit(String lexmeText, HashMap<String, AnalyzeModel> map) {
|
|
|
+ for (String key : map.keySet()) {
|
|
|
+ if (lexmeText.endsWith(key)) {
|
|
|
+ return new String[]{lexmeText.replaceAll(key, ""), key};
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return new String[]{lexmeText};
|
|
|
+ }
|
|
|
+ private AnalyzeComparisonResult findByLexemeType(List<AnalyzeModel> modelList, String lexmeType){
|
|
|
+ AnalyzeComparisonResult result = new AnalyzeComparisonResult();
|
|
|
+ result.setIndex(-1);
|
|
|
+
|
|
|
+ for(int i=0;i<modelList.size();i++){
|
|
|
+ if(modelList.get(i).getLexemeType().equals(lexmeType)){
|
|
|
+ result.setIndex(i);
|
|
|
+ result.setModel(modelList.get(i));
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private AnalyzeComparisonResult findAnalyzeMap(List<AnalyzeModel> modelList, HashMap<String, AnalyzeModel> map){
|
|
|
+ AnalyzeComparisonResult result = new AnalyzeComparisonResult();
|
|
|
+ result.setIndex(-1);
|
|
|
+
|
|
|
+ for(int i=0;i<modelList.size();i++){
|
|
|
+ if(map.containsKey(modelList.get(i).getLexemeText())){
|
|
|
+ AnalyzeModel model = map.get(modelList.get(i).getLexemeText());
|
|
|
+ model = JSON.parseObject(JSON.toJSONString(model), AnalyzeModel.class);
|
|
|
+ model.setLexemeType(modelList.get(i).getLexemeType());
|
|
|
+ model.setLexemeText(modelList.get(i).getLexemeText());
|
|
|
+ model.setBegin(modelList.get(i).getBegin());
|
|
|
+ model.setEnd(modelList.get(i).getEnd());
|
|
|
+
|
|
|
+ result.setIndex(i);
|
|
|
+ result.setModel(model);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<ComparisonResult> analyzeJavas(String text) {
|
|
|
+ Dictionary.initial(DefaultConfig.getInstance());
|
|
|
+ initKeyWords();
|
|
|
+
|
|
|
+ List<ComparisonResult> results = new ArrayList<>();
|
|
|
+ String[] arys = text.split(" ");
|
|
|
+ for (int i=0;i<arys.length;i++){
|
|
|
+ ComparisonResult result = analyzeJava(arys[i]);
|
|
|
+ if(result!=null) {
|
|
|
+ results.add(result);
|
|
|
+ System.out.println(JSON.toJSON(result));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return results;
|
|
|
+ }
|
|
|
+}
|