Prechádzať zdrojové kódy

feat:ai回复高频问题内容调整

caoliqin 2 dní pred
rodič
commit
b038bbff53

+ 7 - 0
fs-service/src/main/java/com/fs/fastGpt/mapper/FastgptChatQuestionStatisticsMapper.java

@@ -67,4 +67,11 @@ public interface FastgptChatQuestionStatisticsMapper extends BaseMapper<FastgptC
     int incrementFrequencyById(@Param("id") Long id, @Param("updateTime") Date updateTime);
 
     FastgptChatQuestionStatistics selectFirstByQuestionCategory(@Param("questionCategory") Integer questionCategory);
+
+    /**
+     * 按 SimHash 候选列表(包含 dist 计算列,供 Java 侧二次筛选)
+     */
+    List<FastgptChatQuestionStatistics> selectCandidatesBySimhash(@Param("simhash") Long simhash,
+                                                                  @Param("threshold") Integer threshold,
+                                                                  @Param("limit") Integer limit);
 }

+ 66 - 40
fs-service/src/main/java/com/fs/fastGpt/service/impl/FastgptChatQuestionCollectServiceImpl.java

@@ -35,8 +35,17 @@ import java.util.Map;
 @Service
 public class FastgptChatQuestionCollectServiceImpl {
 
-    private static final int SIMHASH_THRESHOLD = 14;
-    private static final double JACCARD_THRESHOLD = 0.55d;
+    /**
+     * 本地匹配阈值(越大越松:允许更大的 SimHash 汉明距离)
+     */
+    private static final int SIMHASH_THRESHOLD = 30;
+
+    /**
+     * 本地 Jaccard 阈值(越小越松)
+     */
+    private static final double JACCARD_THRESHOLD = 0.45d;
+
+    private static final int LOCAL_CANDIDATE_LIMIT = 30;
 
     private static final String MODEL_TYPE_HIGH_FREQ = "高频问题类别";
 
@@ -77,31 +86,22 @@ public class FastgptChatQuestionCollectServiceImpl {
             Date now = DateUtils.getNowDate();
             long sh = FastgptQuestionNormalizeUtil.simhash64(display);
 
-            // 本地匹配已有统计,获取数据id
-            Long statId = tryMergeByLocalTextMatch(display, sh, now);
+            // ① 先本地匹配:命中则频次+1,并且才会继续走 AI;未命中则不走 AI
+            LocalMatch localMatch = findBestLocalMatch(display, sh);
+            Long statId = null;
+            if (localMatch != null) {
+                statId = localMatch.statId;
+                fastgptChatQuestionStatisticsMapper.incrementFrequencyById(statId, now);
 
-            if (statId == null) {
-                // 如果没有匹配,就调用 AI
+                // ② 只有本地命中时,才调用 AI 做类别归类(用于给统计行补齐/修正 question_category)
                 Integer aiCategory = getHighFreqCategoryByAi(param);
                 if (aiCategory != null) {
-                    FastgptChatQuestionStatistics questionStatistics =
-                            fastgptChatQuestionStatisticsMapper.selectFirstByQuestionCategory(aiCategory);
-                    if (questionStatistics != null && questionStatistics.getId() != null) {
-                        statId = questionStatistics.getId();
-                        fastgptChatQuestionStatisticsMapper.incrementFrequencyById(statId, now);
-                    } else {
-                        FastgptChatQuestionStatistics row = new FastgptChatQuestionStatistics();
-                        row.setQuestionCategory(aiCategory);
-                        row.setContentSummary(display.length() > 200 ? display.substring(0, 200) : display);
-                        row.setSimhash(sh);
-                        row.setIsResolve(0);
-                        row.setQuestionId(detailId);
-                        row.setFrequency(1);
-                        row.setCreateTime(now);
-                        row.setUpdateTime(now);
-                        fastgptChatQuestionStatisticsMapper.insertFastgptChatQuestionStatistics(row);
-                        statId = row.getId();
-                    }
+                    FastgptChatQuestionStatistics upd = new FastgptChatQuestionStatistics();
+                    upd.setId(statId);
+                    upd.setQuestionCategory(aiCategory);
+                    upd.setUpdateTime(now);
+                    // 只更新类别(不覆盖其他字段)
+                    fastgptChatQuestionStatisticsMapper.updateFastgptChatQuestionStatistics(upd);
                 }
             }
 
@@ -207,29 +207,45 @@ public class FastgptChatQuestionCollectServiceImpl {
     /**
      * 本地 SimHash + Jaccard 与已有统计行比对,命中则频次+1 并返回该统计 id;不插入新行。
      */
-    private Long tryMergeByLocalTextMatch(String display, long sh, Date now) {
-        FastgptChatQuestionStatistics best = fastgptChatQuestionStatisticsMapper.selectBestMatchBySimhash(sh, SIMHASH_THRESHOLD);
-        if (best != null && best.getId() != null) {
-            double jac = FastgptQuestionNormalizeUtil.jaccard(
-                    FastgptQuestionNormalizeUtil.ngramTokens(display),
-                    FastgptQuestionNormalizeUtil.ngramTokens(best.getContentSummary())
-            );
+    private LocalMatch findBestLocalMatch(String display, long sh) {
+        // 取一批候选,Java 侧算 Jaccard 再挑最优
+        java.util.List<FastgptChatQuestionStatistics> candidates =
+                fastgptChatQuestionStatisticsMapper.selectCandidatesBySimhash(sh, SIMHASH_THRESHOLD, LOCAL_CANDIDATE_LIMIT);
+        if (candidates == null || candidates.isEmpty()) {
+            return null;
+        }
+        java.util.Set<String> a = FastgptQuestionNormalizeUtil.ngramTokens(display);
+        LocalMatch best = null;
+        for (FastgptChatQuestionStatistics c : candidates) {
+            if (c == null || c.getId() == null || StringUtils.isBlank(c.getContentSummary())) {
+                continue;
+            }
+            double jac = FastgptQuestionNormalizeUtil.jaccard(a, FastgptQuestionNormalizeUtil.ngramTokens(c.getContentSummary()));
             if (jac < JACCARD_THRESHOLD) {
-                best = null;
+                continue;
+            }
+            // dist 列未映射到实体,这里拿不到;所以只用 jac + id 做选择
+            if (best == null) {
+                best = new LocalMatch(c.getId(), jac);
+                continue;
+            }
+            // 选择“匹配值更大”的(这里用 Jaccard 作为匹配分数)
+            if (jac > best.jaccard) {
+                best = new LocalMatch(c.getId(), jac);
+            } else if (Double.compare(jac, best.jaccard) == 0 && c.getId() > best.statId) {
+                // 分数相同取最新(id 更大)
+                best = new LocalMatch(c.getId(), jac);
             }
         }
-        if (best != null && best.getId() != null) {
-            fastgptChatQuestionStatisticsMapper.incrementFrequencyById(best.getId(), now);
-            return best.getId();
-        }
-        return null;
+        return best;
     }
 
     /** AI 也未归类时:再按 SimHash 找一遍(与本地逻辑一致),仍无则插入 question_category=0 的新统计行 */
     private Long mergeBySimhashFallback(String display, long sh, Long detailId, Date now) {
-        Long localId = tryMergeByLocalTextMatch(display, sh, now);
-        if (localId != null) {
-            return localId;
+        LocalMatch localMatch = findBestLocalMatch(display, sh);
+        if (localMatch != null) {
+            fastgptChatQuestionStatisticsMapper.incrementFrequencyById(localMatch.statId, now);
+            return localMatch.statId;
         }
         FastgptChatQuestionStatistics row = new FastgptChatQuestionStatistics();
         row.setQuestionCategory(0);
@@ -249,6 +265,16 @@ public class FastgptChatQuestionCollectServiceImpl {
         return statId;
     }
 
+    private static class LocalMatch {
+        private final Long statId;
+        private final double jaccard;
+
+        private LocalMatch(Long statId, double jaccard) {
+            this.statId = statId;
+            this.jaccard = jaccard;
+        }
+    }
+
     private static FastgptChatQuestion buildQuestion(FastgptKnowledgeMissCollectParam param, String userContent) {
         FastgptChatQuestion q = new FastgptChatQuestion();
         BeanCopyUtils.copy(param, q);

+ 10 - 0
fs-service/src/main/resources/mapper/fastGpt/FastgptChatQuestionStatisticsMapper.xml

@@ -101,6 +101,16 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
         LIMIT 1
     </select>
 
+    <select id="selectCandidatesBySimhash" resultMap="FastgptChatQuestionStatisticsResult">
+        SELECT s.*,
+               BIT_COUNT(s.simhash ^ #{simhash}) AS dist
+        FROM fastgpt_chat_question_statistics s
+        WHERE s.simhash IS NOT NULL
+        HAVING dist &lt;= #{threshold}
+        ORDER BY dist ASC, s.frequency DESC, s.id DESC
+        LIMIT #{limit}
+    </select>
+
     <update id="incrementFrequencyById">
         update fastgpt_chat_question_statistics
         set frequency = frequency + 1,