|
|
@@ -1,7 +1,12 @@
|
|
|
package com.fs.fastGpt.service.impl;
|
|
|
|
|
|
+import com.fasterxml.jackson.core.JsonProcessingException;
|
|
|
+import com.fasterxml.jackson.databind.JsonNode;
|
|
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
import com.fs.common.BeanCopyUtils;
|
|
|
+import com.fs.common.core.domain.R;
|
|
|
import com.fs.common.utils.DateUtils;
|
|
|
+import com.fs.crm.utils.CrmCustomerAiTagUtil;
|
|
|
import com.fs.fastGpt.domain.FastgptChatQuestion;
|
|
|
import com.fs.fastGpt.domain.FastgptChatQuestionStatistics;
|
|
|
import com.fs.fastGpt.mapper.FastgptChatQuestionMapper;
|
|
|
@@ -9,6 +14,7 @@ import com.fs.fastGpt.mapper.FastgptChatQuestionStatisticsMapper;
|
|
|
import com.fs.fastGpt.param.FastgptKnowledgeMissCollectParam;
|
|
|
import com.fs.fastGpt.service.IFastgptChatQuestionService;
|
|
|
import com.fs.fastGpt.util.FastgptQuestionNormalizeUtil;
|
|
|
+import cn.hutool.json.JSONUtil;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
@@ -17,9 +23,13 @@ import org.springframework.stereotype.Service;
|
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
|
|
|
|
import java.util.Date;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.Map;
|
|
|
|
|
|
/**
|
|
|
- * 知识库未命中:异步写入明细,按 content_summary 归并统计并回填 question_statistics_id
|
|
|
+ * 知识库未命中:异步写入明细。
|
|
|
+ * 顺序:① SimHash+Jaccard 本地匹配已有统计;
|
|
|
+ * ② 未命中再调 AI 按 contentType(question_category)归并;③ 仍失败则新建统计行(含 SimHash 兜底插入)。</p>
|
|
|
*/
|
|
|
@Slf4j
|
|
|
@Service
|
|
|
@@ -28,6 +38,9 @@ public class FastgptChatQuestionCollectServiceImpl {
|
|
|
private static final int SIMHASH_THRESHOLD = 14;
|
|
|
private static final double JACCARD_THRESHOLD = 0.55d;
|
|
|
|
|
|
+ private static final String MODEL_TYPE_HIGH_FREQ = "高频问题类别";
|
|
|
+
|
|
|
+ private static final ObjectMapper objectMapper = new ObjectMapper();
|
|
|
|
|
|
@Autowired
|
|
|
private IFastgptChatQuestionService fastgptChatQuestionService;
|
|
|
@@ -63,37 +76,39 @@ public class FastgptChatQuestionCollectServiceImpl {
|
|
|
|
|
|
Date now = DateUtils.getNowDate();
|
|
|
long sh = FastgptQuestionNormalizeUtil.simhash64(display);
|
|
|
- FastgptChatQuestionStatistics best = fastgptChatQuestionStatisticsMapper.selectBestMatchBySimhash(sh, SIMHASH_THRESHOLD);
|
|
|
- Long statId;
|
|
|
- if (best != null && best.getId() != null) {
|
|
|
- double jac = FastgptQuestionNormalizeUtil.jaccard(
|
|
|
- FastgptQuestionNormalizeUtil.ngramTokens(display),
|
|
|
- FastgptQuestionNormalizeUtil.ngramTokens(best.getContentSummary())
|
|
|
- );
|
|
|
- if (jac < JACCARD_THRESHOLD) {
|
|
|
- best = null;
|
|
|
+
|
|
|
+ // 本地匹配已有统计,获取数据id
|
|
|
+ Long statId = tryMergeByLocalTextMatch(display, sh, now);
|
|
|
+
|
|
|
+ if (statId == null) {
|
|
|
+ // 如果没有匹配,就调用 AI
|
|
|
+ Integer aiCategory = getHighFreqCategoryByAi(param);
|
|
|
+ if (aiCategory != null) {
|
|
|
+ FastgptChatQuestionStatistics questionStatistics =
|
|
|
+ fastgptChatQuestionStatisticsMapper.selectFirstByQuestionCategory(aiCategory);
|
|
|
+ if (questionStatistics != null && questionStatistics.getId() != null) {
|
|
|
+ statId = questionStatistics.getId();
|
|
|
+ fastgptChatQuestionStatisticsMapper.incrementFrequencyById(statId, now);
|
|
|
+ } else {
|
|
|
+ FastgptChatQuestionStatistics row = new FastgptChatQuestionStatistics();
|
|
|
+ row.setQuestionCategory(aiCategory);
|
|
|
+ row.setContentSummary(display.length() > 200 ? display.substring(0, 200) : display);
|
|
|
+ row.setSimhash(sh);
|
|
|
+ row.setIsResolve(0);
|
|
|
+ row.setQuestionId(detailId);
|
|
|
+ row.setFrequency(1);
|
|
|
+ row.setCreateTime(now);
|
|
|
+ row.setUpdateTime(now);
|
|
|
+ fastgptChatQuestionStatisticsMapper.insertFastgptChatQuestionStatistics(row);
|
|
|
+ statId = row.getId();
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- if (best != null && best.getId() != null) {
|
|
|
- statId = best.getId();
|
|
|
- fastgptChatQuestionStatisticsMapper.incrementFrequencyById(statId, now);
|
|
|
- } else {
|
|
|
- FastgptChatQuestionStatistics row = new FastgptChatQuestionStatistics();
|
|
|
- row.setQuestionCategory(0);
|
|
|
- row.setContentSummary(display.length() > 200 ? display.substring(0, 200) : display);
|
|
|
- row.setSimhash(sh);
|
|
|
- row.setIsResolve(0);
|
|
|
- row.setQuestionId(detailId);
|
|
|
- row.setFrequency(1);
|
|
|
- row.setCreateTime(now);
|
|
|
- row.setUpdateTime(now);
|
|
|
- fastgptChatQuestionStatisticsMapper.insertFastgptChatQuestionStatistics(row);
|
|
|
- statId = row.getId();
|
|
|
- if (statId == null) {
|
|
|
- FastgptChatQuestionStatistics insertedBest = fastgptChatQuestionStatisticsMapper.selectBestMatchBySimhash(sh, SIMHASH_THRESHOLD);
|
|
|
- statId = insertedBest != null ? insertedBest.getId() : null;
|
|
|
- }
|
|
|
+
|
|
|
+ if (statId == null) {
|
|
|
+ statId = mergeBySimhashFallback(display, sh, detailId, now);
|
|
|
}
|
|
|
+
|
|
|
if (statId != null) {
|
|
|
fastgptChatQuestionMapper.updateQuestionStatisticsIdById(detailId, statId);
|
|
|
}
|
|
|
@@ -102,6 +117,138 @@ public class FastgptChatQuestionCollectServiceImpl {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ private Integer getHighFreqCategoryByAi(FastgptKnowledgeMissCollectParam param) {
|
|
|
+ if (param.getSessionId() == null || StringUtils.isBlank(param.getAppKey())) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ Map<String, Object> requestParam = new HashMap<>();
|
|
|
+
|
|
|
+ requestParam.put("history", StringUtils.defaultString(param.getAiUserContent(), ""));
|
|
|
+ requestParam.put("aiContent", "");
|
|
|
+ requestParam.put("userContent", "");
|
|
|
+ requestParam.put("isRepository", "");
|
|
|
+ requestParam.put("contentType", "");
|
|
|
+ requestParam.put("modelType", MODEL_TYPE_HIGH_FREQ);
|
|
|
+
|
|
|
+ R aiResponse = CrmCustomerAiTagUtil.callAiService(requestParam, param.getSessionId(), param.getAppKey());
|
|
|
+
|
|
|
+ // 处理返回结果
|
|
|
+ JsonNode userInfo = parseUserInfoFromAiResponse(aiResponse);
|
|
|
+ if (userInfo == null || userInfo.isMissingNode() || !userInfo.isObject()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ if (userInfo.has("contentType") && !userInfo.get("contentType").isNull()) {
|
|
|
+ return parseIntNode(userInfo.get("contentType"));
|
|
|
+ }
|
|
|
+ if (userInfo.has(MODEL_TYPE_HIGH_FREQ) && !userInfo.get(MODEL_TYPE_HIGH_FREQ).isNull()) {
|
|
|
+ return parseIntNode(userInfo.get(MODEL_TYPE_HIGH_FREQ));
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.warn("高频问题类别 AI 解析失败 sessionId={}", param.getSessionId(), e);
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static Integer parseIntNode(JsonNode n) {
|
|
|
+ if (n == null || n.isNull() || n.isMissingNode()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ if (n.isIntegralNumber() || n.isNumber()) {
|
|
|
+ return n.intValue();
|
|
|
+ }
|
|
|
+ return parseIntLoose(n.asText());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 文本转int
|
|
|
+ private static Integer parseIntLoose(String s) {
|
|
|
+ if (StringUtils.isBlank(s)) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return Integer.parseInt(s.trim());
|
|
|
+ } catch (NumberFormatException e) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private JsonNode parseUserInfoFromAiResponse(R aiResponse) throws JsonProcessingException {
|
|
|
+ if (aiResponse == null || !Integer.valueOf(200).equals(aiResponse.get("code"))) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ JsonNode rootS = objectMapper.readTree(JSONUtil.toJsonStr(aiResponse));
|
|
|
+ JsonNode choices = rootS.path("data").path("choices");
|
|
|
+ if (!choices.isArray() || choices.size() <= 0) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ JsonNode contentNode = choices.get(0).path("message").path("content");
|
|
|
+ if (!contentNode.isTextual()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ String contentStr = contentNode.asText();
|
|
|
+ JsonNode contentArray = objectMapper.readTree(contentStr);
|
|
|
+ if (!contentArray.isArray() || contentArray.size() <= 1) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ JsonNode secondElement = contentArray.get(1);
|
|
|
+ JsonNode textNode = secondElement.path("text");
|
|
|
+ if (textNode.isMissingNode()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ JsonNode contentInnerNode = textNode.path("content");
|
|
|
+ if (!contentInnerNode.isTextual()) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ String innerJsonStr = contentInnerNode.asText();
|
|
|
+ JsonNode innerJson = objectMapper.readTree(innerJsonStr);
|
|
|
+ return innerJson.path("userInfo");
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 本地 SimHash + Jaccard 与已有统计行比对,命中则频次+1 并返回该统计 id;不插入新行。
|
|
|
+ */
|
|
|
+ private Long tryMergeByLocalTextMatch(String display, long sh, Date now) {
|
|
|
+ FastgptChatQuestionStatistics best = fastgptChatQuestionStatisticsMapper.selectBestMatchBySimhash(sh, SIMHASH_THRESHOLD);
|
|
|
+ if (best != null && best.getId() != null) {
|
|
|
+ double jac = FastgptQuestionNormalizeUtil.jaccard(
|
|
|
+ FastgptQuestionNormalizeUtil.ngramTokens(display),
|
|
|
+ FastgptQuestionNormalizeUtil.ngramTokens(best.getContentSummary())
|
|
|
+ );
|
|
|
+ if (jac < JACCARD_THRESHOLD) {
|
|
|
+ best = null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (best != null && best.getId() != null) {
|
|
|
+ fastgptChatQuestionStatisticsMapper.incrementFrequencyById(best.getId(), now);
|
|
|
+ return best.getId();
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** AI 也未归类时:再按 SimHash 找一遍(与本地逻辑一致),仍无则插入 question_category=0 的新统计行 */
|
|
|
+ private Long mergeBySimhashFallback(String display, long sh, Long detailId, Date now) {
|
|
|
+ Long localId = tryMergeByLocalTextMatch(display, sh, now);
|
|
|
+ if (localId != null) {
|
|
|
+ return localId;
|
|
|
+ }
|
|
|
+ FastgptChatQuestionStatistics row = new FastgptChatQuestionStatistics();
|
|
|
+ row.setQuestionCategory(0);
|
|
|
+ row.setContentSummary(display.length() > 200 ? display.substring(0, 200) : display);
|
|
|
+ row.setSimhash(sh);
|
|
|
+ row.setIsResolve(0);
|
|
|
+ row.setQuestionId(detailId);
|
|
|
+ row.setFrequency(1);
|
|
|
+ row.setCreateTime(now);
|
|
|
+ row.setUpdateTime(now);
|
|
|
+ fastgptChatQuestionStatisticsMapper.insertFastgptChatQuestionStatistics(row);
|
|
|
+ Long statId = row.getId();
|
|
|
+ if (statId == null) {
|
|
|
+ FastgptChatQuestionStatistics insertedBest = fastgptChatQuestionStatisticsMapper.selectBestMatchBySimhash(sh, SIMHASH_THRESHOLD);
|
|
|
+ statId = insertedBest != null ? insertedBest.getId() : null;
|
|
|
+ }
|
|
|
+ return statId;
|
|
|
+ }
|
|
|
+
|
|
|
private static FastgptChatQuestion buildQuestion(FastgptKnowledgeMissCollectParam param, String userContent) {
|
|
|
FastgptChatQuestion q = new FastgptChatQuestion();
|
|
|
BeanCopyUtils.copy(param, q);
|