Browse Source

分词器拆分条件查询

周壕 1 year ago
parent
commit
d5fac5864e

+ 6 - 0
pom.xml

@@ -171,6 +171,12 @@
             <artifactId>elasticsearch-java</artifactId>
             <version>8.11.1</version>
         </dependency>
+        <!--ik-analyzer分词器-->
+        <dependency>
+            <groupId>com.janeluo</groupId>
+            <artifactId>ikanalyzer</artifactId>
+            <version>2012_u6</version>
+        </dependency>
     </dependencies>
 
     <build>

+ 15 - 0
src/main/java/com/bowintek/practice/controller/TempController.java

@@ -1,5 +1,9 @@
 package com.bowintek.practice.controller;
 
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
+import co.elastic.clients.elasticsearch.indices.AnalyzeRequest;
+import co.elastic.clients.elasticsearch.indices.AnalyzeResponse;
+import co.elastic.clients.elasticsearch.indices.analyze.AnalyzeToken;
 import com.bowintek.practice.AppConfig;
 import com.bowintek.practice.filter.exception.BaseErrorEnum;
 import com.bowintek.practice.filter.exception.BaseException;
@@ -11,9 +15,11 @@ import com.bowintek.practice.model.SrSaerchtemp;
 import com.bowintek.practice.model.SrSubject;
 import com.bowintek.practice.model.SrTempData;
 import com.bowintek.practice.services.service.AccountService;
+import com.bowintek.practice.services.service.AnalyzeService;
 import com.bowintek.practice.services.service.GenSqlStringService;
 import com.bowintek.practice.services.service.TempService;
 import com.bowintek.practice.services.service.system.RoleService;
+import com.bowintek.practice.vo.Analyze.ComparisonResult;
 import com.bowintek.practice.vo.SaerchtempVo;
 import com.bowintek.practice.vo.TagVo;
 import com.bowintek.practice.vo.system.FunctionCodeModel;
@@ -51,6 +57,8 @@ public class TempController {
     private SrSaerchtempMapper srSaerchtempMapper;
     @Autowired
     private AppConfig appConfig;
+    @Autowired
+    private AnalyzeService analyzeService;
 
     @ResponseBody
     @GetMapping("/getList")
@@ -165,4 +173,11 @@ public class TempController {
     public BaseResponse<List<HashMap<String, Object>>> getSubjectFieldList(String subId, String tagId, Integer fixedType) {
         return RespGenerstor.success(tempService.getSubjectFieldList(subId, tagId, fixedType));
     }
+
+    @GetMapping("/getAnalyze")
+    public BaseResponse getAnalyze(String text) {
+        //分词器拆条件查询用法
+        List<ComparisonResult> results = analyzeService.analyzeJavas(text);
+        return RespGenerstor.success(results);
+    }
 }

+ 15 - 0
src/main/java/com/bowintek/practice/mapper/cquery/EsQueryKeywordCQuery.java

@@ -0,0 +1,15 @@
+package com.bowintek.practice.mapper.cquery;
+
+import com.bowintek.practice.model.EsQueryKeyword;
+import com.bowintek.practice.model.SrTempdimension;
+import com.bowintek.practice.model.SrTempfield;
+import com.bowintek.practice.model.SrTempmeasure;
+import com.bowintek.practice.vo.SaerchtempVo;
+import org.apache.ibatis.annotations.Param;
+
+import java.util.HashMap;
+import java.util.List;
+
+public interface EsQueryKeywordCQuery {
+    List<EsQueryKeyword> getList(String createTime, String createTimestamp);
+}

+ 300 - 0
src/main/java/com/bowintek/practice/services/impl/AnalyzeServiceImpl.java

@@ -0,0 +1,300 @@
+package com.bowintek.practice.services.impl;
+
+import co.elastic.clients.elasticsearch.ElasticsearchClient;
+import co.elastic.clients.elasticsearch.indices.AnalyzeRequest;
+import co.elastic.clients.elasticsearch.indices.AnalyzeResponse;
+import com.alibaba.fastjson.JSON;
+import com.bowintek.practice.mapper.cquery.EsQueryKeywordCQuery;
+import com.bowintek.practice.model.EsQueryKeyword;
+import com.bowintek.practice.services.service.AnalyzeService;
+import com.bowintek.practice.util.StringUtils;
+import com.bowintek.practice.vo.Analyze.AnalyzeComparisonResult;
+import com.bowintek.practice.vo.Analyze.AnalyzeModel;
+import com.bowintek.practice.vo.Analyze.ComparisonResult;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.io.StringReader;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.util.*;
+import java.sql.Timestamp;
+
+import org.wltea.analyzer.cfg.DefaultConfig;
+import org.wltea.analyzer.core.IKSegmenter;
+import org.wltea.analyzer.core.Lexeme;
+import org.wltea.analyzer.dic.Dictionary;
+import org.wltea.analyzer.dic.Hit;
+
+@Component
+public class AnalyzeServiceImpl implements AnalyzeService {
+    @Autowired
+    private ElasticsearchClient esClient;
+    @Autowired
+    private EsQueryKeywordCQuery esQueryKeywordCQuery;
+
+    private static Date refTime = null;
+    private static boolean isInitStaticWords = false;
+    //正常分词配置
+    private static HashMap<String, AnalyzeModel> mapAnalyze = new HashMap<>();
+    //对比符号
+    private static HashMap<String, AnalyzeModel> mapSymbols = new HashMap<>();
+    //单位分词
+    private static HashMap<String, AnalyzeModel> mapUnits = new HashMap<>();
+    //无用分词
+    private static HashMap<String, AnalyzeModel> mapUseless = new HashMap<>();
+    //对象分词
+    private static HashMap<String, AnalyzeModel> mapObjects = new HashMap<>();
+
+    private void initStaticWords(){
+        if(isInitStaticWords) return;
+        isInitStaticWords = true;
+
+        //对比符号
+        addToMainDict(mapSymbols, "大于", "对比符号", "");
+        addToMainDict(mapSymbols, "小于", "对比符号", "");
+        addToMainDict(mapSymbols, "等于", "对比符号", "");
+        addToMainDict(mapSymbols, "大于等于", "对比符号", "");
+        addToMainDict(mapSymbols, "小于等于", "对比符号", "");
+        addToMainDict(mapSymbols, "包含", "对比符号", "");
+
+        //单位分词
+        addToMainDict(mapUnits, "吨", "单位", "");
+
+        //对象分词
+        addToMainDict(mapObjects, "井", "对象", "");
+
+        //无用分词
+        addToMainDict(mapUseless, "的", "无用", "");
+    }
+    private void initKeyWords(){
+        initStaticWords();
+
+        //查找是否有记录需要更新
+        String timeString = null;
+        if(refTime!=null) {
+            timeString = (refTime.getTime()/1000)+"";
+            System.out.println("timeString:"+timeString);
+        }
+        List<EsQueryKeyword> dbList = esQueryKeywordCQuery.getList(null, timeString);
+        if(dbList.size()==0) return;
+
+        for(int i=0;i<dbList.size();i++){
+            EsQueryKeyword keyword = dbList.get(i);
+            //同义词
+            List<String> synonymList = new ArrayList<>();
+            if(!StringUtils.IsNullEmpty(keyword.getSynonymText())) {
+                synonymList.addAll(List.of(keyword.getSynonymText().split(",")));
+            }
+
+            if(keyword.getKeywordType().equals("无用")){
+                addToMainDict(mapUseless, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
+                addToMainDict(mapUseless, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
+            }
+            else if(keyword.getKeywordType().equals("对象")){
+                addToMainDict(mapObjects, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
+                addToMainDict(mapObjects, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
+            }
+            else if(keyword.getKeywordType().equals("单位")){
+                addToMainDict(mapUnits, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
+                addToMainDict(mapUnits, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
+            }
+            else if(keyword.getKeywordType().equals("对比符号")){
+                addToMainDict(mapSymbols, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
+                addToMainDict(mapSymbols, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
+            }
+            else{
+                addToMainDict(mapAnalyze, keyword.getKeywordName(), keyword.getKeywordType(), keyword.getEsIndexField());
+                addToMainDict(mapAnalyze, synonymList, keyword.getKeywordType(), keyword.getEsIndexField());
+            }
+
+            //最大更新时间
+            if(refTime==null || refTime.getTime() < keyword.getCreateTime().getTime())
+                refTime = keyword.getCreateTime();
+        }
+    }
+
+    private void addToMainDict(HashMap<String, AnalyzeModel> map,String keyword, String keywordType, String esIndexField){
+        List<String> extStringList = new ArrayList<>(Arrays.stream(new String[]{keyword}).toList());
+        addToMainDict(map, extStringList, keywordType, esIndexField);
+    }
+    private void addToMainDict(HashMap<String, AnalyzeModel> map,List<String> extStringList, String keywordType, String esIndexField){
+        extStringList.forEach(keyword->{
+            map.put(keyword, AnalyzeModel.GenModel(keywordType, esIndexField));
+        });
+
+        //动态添加分词配置
+        Collection<String> colls = new Stack<>();
+        org.wltea.analyzer.dic.Dictionary dictionary = org.wltea.analyzer.dic.Dictionary.getSingleton();
+        for(int i=0;i<extStringList.size();i++){
+            Hit hit = dictionary.matchInMainDict(extStringList.get(i).trim().toLowerCase().toCharArray());
+            if(!hit.isMatch()){
+                System.out.println("initKeyWords:添加["+extStringList.get(i)+"]到MainDict");
+                colls.add(extStringList.get(i));
+            }
+        }
+        if(colls.size()>0) dictionary.addWords(colls);
+    }
+
+    @Override
+    public AnalyzeResponse analyze(String text) {
+        try {
+            AnalyzeRequest.Builder builder = new AnalyzeRequest.Builder();
+            builder.analyzer("ik_smart");
+            builder.text(text);
+            AnalyzeResponse response = esClient.indices().analyze(builder.build());
+            System.out.println(response.toString());
+            return response;
+        }
+        catch (Exception ex){
+            ex.printStackTrace();
+            return null;
+        }
+    }
+
+    @Override
+    public List<AnalyzeResponse> analyzes(String text) {
+        String[] arys = text.split(" ");
+        List<AnalyzeResponse> responses = new ArrayList<>();
+        for (int i=0;i<arys.length;i++){
+            AnalyzeResponse response = analyze(arys[i]);
+            if(response!=null) responses.add(response);
+        }
+        return responses;
+    }
+
+    @Override
+    public ComparisonResult analyzeJava(String text) {
+        try{
+            StringReader reader = new StringReader(text);
+            IKSegmenter segmenter = new IKSegmenter(reader, true);
+
+            System.out.println(text);
+            //ik分词逻辑
+            List<AnalyzeModel> modelList = new ArrayList<>();
+            Lexeme lexeme;
+            while ((lexeme = segmenter.next()) != null) {
+                //System.out.println("  "+lexeme.getLexemeTypeString() + " " + lexeme.getLexemeText()
+                //        +" from:"+lexeme.getBeginPosition()
+                //        +" to:"+lexeme.getEndPosition());
+
+                AnalyzeModel model = AnalyzeModel.GenModel(lexeme.getLexemeTypeString(),
+                        lexeme.getLexemeText(),
+                        lexeme.getBeginPosition(), lexeme.getEndPosition());
+                modelList.add(model);
+            }
+
+            //分析表达式
+            AnalyzeComparisonResult analyze = findAnalyzeMap(modelList, mapAnalyze);
+            AnalyzeComparisonResult symbols = findAnalyzeMap(modelList, mapSymbols);
+            if(analyze.getIndex()==-1 || symbols.getIndex()==-1){
+                //不存在对比操作,返回整串字符串进行全文搜索
+                return ComparisonResult.GenModel("query", text);
+            }
+
+            ComparisonResult result = ComparisonResult.GenModel("comparison", text);
+            //操作符号
+            result.setOpreation(symbols.getModel().getLexemeText());
+            //度量、维度关联的字段
+            result.setFieldName(analyze.getModel().getLexemeText());
+            if(!StringUtils.IsNullEmpty(analyze.getModel().getEsIndexField()))
+                result.setFields(analyze.getModel().getEsIndexField().split(","));
+
+            //有 TYPE_CQUAN 说明读取到了 数值+单位
+            AnalyzeComparisonResult cquan = findByLexemeType(modelList, "TYPE_CQUAN");
+            if(cquan.getIndex()>=0){
+                String[] splits = splitUnit(cquan.getModel().getLexemeText(), mapUnits);
+
+                //对比的值或单位
+                result.setValue(splits[0]);
+                if(splits.length>1) result.setUint(splits[1]);
+                return result;
+            }
+
+            //取对比符号 到 对象或者无用词中间的对比值
+            AnalyzeComparisonResult useless = findAnalyzeMap(modelList, mapUseless);
+            AnalyzeComparisonResult object = findAnalyzeMap(modelList, mapObjects);
+            int end = text.length(), start = symbols.getModel().getEnd();
+            if(useless.getIndex()>0 && end > useless.getModel().getBegin())
+                end = useless.getModel().getBegin();
+            if(object.getIndex()>0 && end > object.getModel().getBegin())
+                end = object.getModel().getBegin();
+            //System.out.println("start:"+start+" end:"+end);
+
+            if(start<end){
+                String valString = text.substring(start, end);
+                String[] splits = splitUnit(valString, mapUnits);
+
+                //对比的值或单位
+                result.setValue(splits[0]);
+                if(splits.length>1) result.setUint(splits[1]);
+            }
+
+            return result;
+        } catch (Exception ex) {
+            ex.printStackTrace();
+        }
+
+        return null;
+    }
+
+    private String[] splitUnit(String lexmeText, HashMap<String, AnalyzeModel> map) {
+        for (String key : map.keySet()) {
+            if (lexmeText.endsWith(key)) {
+                return new String[]{lexmeText.replaceAll(key, ""), key};
+            }
+        }
+        return new String[]{lexmeText};
+    }
+    private AnalyzeComparisonResult findByLexemeType(List<AnalyzeModel> modelList, String lexmeType){
+        AnalyzeComparisonResult result = new AnalyzeComparisonResult();
+        result.setIndex(-1);
+
+        for(int i=0;i<modelList.size();i++){
+            if(modelList.get(i).getLexemeType().equals(lexmeType)){
+                result.setIndex(i);
+                result.setModel(modelList.get(i));
+                break;
+            }
+        }
+        return result;
+    }
+
+    private AnalyzeComparisonResult findAnalyzeMap(List<AnalyzeModel> modelList, HashMap<String, AnalyzeModel> map){
+        AnalyzeComparisonResult result = new AnalyzeComparisonResult();
+        result.setIndex(-1);
+
+        for(int i=0;i<modelList.size();i++){
+            if(map.containsKey(modelList.get(i).getLexemeText())){
+                AnalyzeModel model = map.get(modelList.get(i).getLexemeText());
+                model = JSON.parseObject(JSON.toJSONString(model), AnalyzeModel.class);
+                model.setLexemeType(modelList.get(i).getLexemeType());
+                model.setLexemeText(modelList.get(i).getLexemeText());
+                model.setBegin(modelList.get(i).getBegin());
+                model.setEnd(modelList.get(i).getEnd());
+
+                result.setIndex(i);
+                result.setModel(model);
+                break;
+            }
+        }
+        return result;
+    }
+
+    @Override
+    public List<ComparisonResult> analyzeJavas(String text) {
+        Dictionary.initial(DefaultConfig.getInstance());
+        initKeyWords();
+
+        List<ComparisonResult> results = new ArrayList<>();
+        String[] arys = text.split(" ");
+        for (int i=0;i<arys.length;i++){
+            ComparisonResult result = analyzeJava(arys[i]);
+            if(result!=null) {
+                results.add(result);
+                System.out.println(JSON.toJSON(result));
+            }
+        }
+        return results;
+    }
+}

+ 19 - 0
src/main/java/com/bowintek/practice/services/service/AnalyzeService.java

@@ -0,0 +1,19 @@
+package com.bowintek.practice.services.service;
+
+import co.elastic.clients.elasticsearch.indices.AnalyzeResponse;
+import com.bowintek.practice.model.SrTempData;
+import com.bowintek.practice.vo.Analyze.ComparisonResult;
+import com.bowintek.practice.vo.SaerchtempVo;
+import com.bowintek.practice.vo.temp.TempObjectModel;
+import com.bowintek.practice.vo.temp.TempSaveResult;
+import com.github.pagehelper.PageInfo;
+
+import java.util.HashMap;
+import java.util.List;
+
+public interface AnalyzeService {
+    AnalyzeResponse analyze(String text);
+    List<AnalyzeResponse> analyzes(String text);
+    ComparisonResult analyzeJava(String text);
+    List<ComparisonResult> analyzeJavas(String text);
+}

+ 9 - 0
src/main/java/com/bowintek/practice/vo/Analyze/AnalyzeComparisonResult.java

@@ -0,0 +1,9 @@
+package com.bowintek.practice.vo.Analyze;
+
+import lombok.Data;
+
+@Data
+public class AnalyzeComparisonResult {
+    private int index;
+    private AnalyzeModel model;
+}

+ 30 - 0
src/main/java/com/bowintek/practice/vo/Analyze/AnalyzeModel.java

@@ -0,0 +1,30 @@
+package com.bowintek.practice.vo.Analyze;
+
+import lombok.Data;
+
+@Data
+public class AnalyzeModel {
+    private String keywordType;
+    private String esIndexField;
+
+    private String lexemeType;
+    private String lexemeText;
+    private int begin;
+    private int end;
+
+    public static AnalyzeModel GenModel(String keywordType, String esIndexField){
+        AnalyzeModel model = new AnalyzeModel();
+        model.setKeywordType(keywordType);
+        model.setEsIndexField(esIndexField);
+        return model;
+    }
+
+    public static AnalyzeModel GenModel(String lexemeType, String lexemeText, int begin, int end){
+        AnalyzeModel model = new AnalyzeModel();
+        model.setLexemeType(lexemeType);
+        model.setLexemeText(lexemeText);
+        model.setBegin(begin);
+        model.setEnd(end);
+        return model;
+    }
+}

+ 22 - 0
src/main/java/com/bowintek/practice/vo/Analyze/ComparisonResult.java

@@ -0,0 +1,22 @@
+package com.bowintek.practice.vo.Analyze;
+
+import lombok.Data;
+
+@Data
+public class ComparisonResult {
+    private String searchType;
+    private String keyString;
+
+    private String fieldName;
+    private String[] fields;
+    private String opreation;
+    private String value;
+    private String uint;
+
+    public static ComparisonResult GenModel(String searchType, String keyString){
+        ComparisonResult model = new ComparisonResult();
+        model.setSearchType(searchType);
+        model.setKeyString(keyString);
+        return model;
+    }
+}

+ 14 - 0
src/main/resources/mapping/cquery/EsQueryKeywordCQuery.xml

@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
+<mapper namespace="com.bowintek.practice.mapper.cquery.EsQueryKeywordCQuery">
+    <select id="getList" resultType="com.bowintek.practice.model.EsQueryKeyword">
+        select kw.* from es_query_keyword kw where 1=1
+        <if test="createTime!='' and createTime!=null">
+            and kw.createTime <![CDATA[ >= ]]> STR_TO_DATE(#{createTime},'%Y-%m-%d')
+        </if>
+        <if test="createTimestamp!='' and createTimestamp!=null">
+            and kw.createTime <![CDATA[ > ]]> FROM_UNIXTIME(#{createTimestamp})
+        </if>
+        order by kw.createTime
+    </select>
+</mapper>