Преглед изворни кода

上传csv文件录入知识库

yh пре 2 дана
родитељ
комит
b8efa25dfc

+ 22 - 0
fs-company/src/main/java/com/fs/company/controller/knowledge/CompanyKnowledgeBaseController.java

@@ -10,6 +10,7 @@ import com.fs.framework.security.LoginUser;
 import com.fs.framework.service.TokenService;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.web.bind.annotation.*;
+import org.springframework.web.multipart.MultipartFile;
 
 import java.util.List;
 import java.util.Map;
@@ -119,4 +120,25 @@ public class CompanyKnowledgeBaseController extends BaseController {
         Map<String, Object> result = knowledgeBaseService.dualValidation(loginUser.getCompany().getCompanyId(), query, fastgptResult);
         return AjaxResult.success(result);
     }
+
+    /**
+     * CSV文件上传,批量导入问答对
+     */
+    @PostMapping("/upload")
+    public AjaxResult uploadFile(@RequestParam("file") MultipartFile file,
+                                  @RequestParam("baseId") Long baseId,
+                                  @RequestParam(value = "collectionName", required = false) String collectionName,
+                                  @RequestParam(value = "collectionId", required = false) String collectionId,
+                                  @RequestHeader("tenant-code") String tenantcode) {
+        LoginUser loginUser = tokenService.getLoginUser(ServletUtils.getRequest());
+        return knowledgeBaseService.batchUploadKnowledge(
+            loginUser.getCompany().getCompanyId(),
+            loginUser.getUsername(),
+            baseId,
+            collectionName,
+            collectionId,
+            file,
+            tenantcode
+        );
+    }
 }

+ 2 - 0
fs-service/src/main/java/com/fs/company/mapper/CompanyKnowledgeBaseMapper.java

@@ -16,6 +16,8 @@ public interface CompanyKnowledgeBaseMapper extends BaseMapper<CompanyKnowledgeB
                                                     @Param("baseId") Long baseId);
     
     int insertKnowledge(CompanyKnowledgeBase entity);
+
+    int batchInsertKnowledge(List<CompanyKnowledgeBase> list);
     
     CompanyKnowledgeBase selectKnowledgeByIdAndCompanyId(@Param("id") Long id, @Param("companyId") Long companyId);
     

+ 6 - 0
fs-service/src/main/java/com/fs/company/service/ICompanyKnowledgeBaseService.java

@@ -3,6 +3,7 @@ package com.fs.company.service;
 import com.fs.common.core.domain.AjaxResult;
 import com.fs.company.domain.CompanyKnowledgeBase;
 import com.fs.company.dto.CompanyKnowledgeBaseDto;
+import org.springframework.web.multipart.MultipartFile;
 
 import java.util.List;
 import java.util.Map;
@@ -53,4 +54,9 @@ public interface ICompanyKnowledgeBaseService {
      * 双知识库校验
      */
     Map<String, Object> dualValidation(Long companyId, String query, String fastgptResult);
+
+    /**
+     * CSV文件批量上传知识数据
+     */
+    AjaxResult batchUploadKnowledge(Long companyId, String userName, Long baseId, String collectionName, String collectionId, MultipartFile file, String tenantcode);
 }

+ 236 - 0
fs-service/src/main/java/com/fs/company/service/impl/CompanyKnowledgeBaseServiceImpl.java

@@ -7,6 +7,7 @@ import com.alibaba.fastjson.JSONObject;
 import com.alibaba.fastjson.TypeReference;
 import com.fs.common.core.domain.AjaxResult;
 import com.fs.common.utils.DateUtils;
+import com.fs.common.utils.StringUtils;
 import com.fs.company.domain.CompanyKnowledgeBase;
 import com.fs.company.dto.CompanyKnowledgeBaseDto;
 import com.fs.company.mapper.CompanyKnowledgeBaseMapper;
@@ -15,14 +16,19 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 import org.springframework.util.CollectionUtils;
+import org.springframework.web.multipart.MultipartFile;
 
+import java.nio.charset.StandardCharsets;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * 知识库基础Service实现
@@ -206,6 +212,191 @@ public class CompanyKnowledgeBaseServiceImpl implements ICompanyKnowledgeBaseSer
         return result > 0 ? AjaxResult.success("提取成功", knowledge) : AjaxResult.error("提取失败");
     }
 
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public AjaxResult batchUploadKnowledge(Long companyId, String userName, Long baseId, String collectionName, String collectionId, MultipartFile file, String tenantcode) {
+        if (file.isEmpty()) {
+            return AjaxResult.error("上传文件为空");
+        }
+
+        List<String[]> qaPairs = parseCsvFile(file);
+        if (qaPairs.isEmpty()) {
+            return AjaxResult.error("未从文件中解析出有效的问答对");
+        }
+
+        // 1. 构建所有实体对象
+        Date now = DateUtils.getNowDate();
+        List<CompanyKnowledgeBase> knowledgeList = new ArrayList<>();
+        for (String[] qa : qaPairs) {
+            String question = qa[0].trim();
+            String answer = qa[1].trim();
+            if (StringUtils.isEmpty(question) || StringUtils.isEmpty(answer)) {
+                continue;
+            }
+
+            CompanyKnowledgeBase knowledge = new CompanyKnowledgeBase();
+            knowledge.setCompanyId(companyId);
+            knowledge.setBaseId(baseId);
+            knowledge.setTitle(question.length() > 100 ? question.substring(0, 100) : question);
+            knowledge.setQuestion(question);
+            knowledge.setAnswer(answer);
+            knowledge.setSource("manual");
+            knowledge.setAuditStatus(0);
+            knowledge.setUseCount(0);
+            knowledge.setSyncStatus(0);
+            knowledge.setDelFlag(0);
+            knowledge.setCreateBy(userName);
+            knowledge.setCreateTime(now);
+            knowledge.setUpdateBy(userName);
+            knowledge.setUpdateTime(now);
+            knowledgeList.add(knowledge);
+        }
+
+        if (knowledgeList.isEmpty()) {
+            return AjaxResult.error("没有有效的问答数据");
+        }
+
+        // 2. 批量插入数据库(useGeneratedKeys 自动回填 ID)
+        int insertCount = knowledgeBaseMapper.batchInsertKnowledge(knowledgeList);
+        if (insertCount <= 0) {
+            return AjaxResult.error("数据库批量插入失败");
+        }
+
+        // 3. 构建文本列表,逐条创建向量嵌入
+        List<String> texts = new ArrayList<>();
+        List<List<Float>> allEmbeddings = new ArrayList<>();
+        for (CompanyKnowledgeBase k : knowledgeList) {
+            String text = buildKnowledgeText(k);
+            texts.add(text);
+            allEmbeddings.add(createEmbedding(text));
+        }
+
+        // 4. 批量写入 Qdrant
+        if (collectionName != null) {
+            batchUpsertKnowledgeToQdrant(knowledgeList, texts, allEmbeddings, collectionName);
+        }
+
+        Map<String, Object> resultMap = new HashMap<>();
+        resultMap.put("total", knowledgeList.size());
+        resultMap.put("success", knowledgeList.size());
+        resultMap.put("errors", new ArrayList<>());
+
+        return AjaxResult.success("导入完成,共导入 " + knowledgeList.size() + " 条数据", resultMap);
+    }
+
+    /**
+     * 解析CSV文件,提取问答对
+     * 支持格式:
+     * 1. Q:xxx,A:xxx  (中文冒号)
+     * 2. Q: xxx, A: xxx  (英文冒号)
+     * 3. "Q1: xxx\n",A1: xxx  (带引号的编号形式,跨行)
+     * 4. 1. 问:xxx,答:xxx  (编号+中文问/答)
+     * 5. Q1: xxx,A1: xxx  (英文编号形式)
+     */
+    private List<String[]> parseCsvFile(MultipartFile file) {
+        List<String[]> qaPairs = new ArrayList<>();
+        try {
+            String content = new String(file.getBytes(), StandardCharsets.UTF_8);
+            if (content.startsWith("\uFEFF")) {
+                content = content.substring(1);
+            }
+            qaPairs = parseByLines(content);
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return qaPairs;
+    }
+    
+    /**
+     * 按行遍历解析(扁平化后正则匹配)
+     * 处理复杂格式:多行CSV条目等
+     */
+    private List<String[]> parseByLines(String content) {
+        List<String[]> qaPairs = new ArrayList<>();
+    
+        // 将所有换行替换为空格(处理多行CSV引号条目)
+        String flat = content.replaceAll("\\r?\\n", " ");
+        flat = flat.replaceAll("\\s+", " ").trim();
+    
+        // 匹配问答对:Q前缀+问题内容+分隔符+A前缀+答案内容
+        // 格式示例: Q:xxx,A:xxx 或 Q1: xxx,A1: xxx 或 问:xxx,答:xxx
+        Pattern qaPattern = Pattern.compile(
+                "(?:Q|问)\\s*\\d*\\s*[::]" +                    // Q前缀
+                "\\s*\"?([^\"]*?)\"?\\s*" +                       // 问题内容(可能带引号)
+                ",\\s*\"?(?:A|答)\\s*\\d*\\s*[::]" +             // A分隔符(兼容引号)
+                "\\s*\"?(.*?)\"?\\s*" +                           // 答案内容(可能带引号)
+                "(?=\\s*(?:Q|问)\\s*\\d*\\s*[::]|\\s*\\d+\\.\\s*问[::]|$)"  // 前瞻:下一个Q或结束
+        );
+    
+        Matcher matcher = qaPattern.matcher(flat);
+        while (matcher.find()) {
+            String question = matcher.group(1).trim();
+            String answer = matcher.group(2).trim();
+            if (StringUtils.isNotEmpty(question) && StringUtils.isNotEmpty(answer)) {
+                qaPairs.add(new String[]{question, answer});
+            }
+        }
+    
+        // 如果还是没匹配到,做最基本的逗号分割
+        if (qaPairs.isEmpty()) {
+            qaPairs = parseFallback(flat);
+        }
+    
+        return qaPairs;
+    }
+    
+    /**
+     * 最简回退解析:按行遍历,逐一匹配
+     */
+    private List<String[]> parseFallback(String content) {
+        List<String[]> qaPairs = new ArrayList<>();
+        String[] lines = content.split("\\r?\\n");
+    
+        for (String line : lines) {
+            line = line.trim();
+            if (line.isEmpty()) continue;
+    
+            // 尝试按 Q...:... ,A...:... 格式匹配
+            int qIdx = -1;
+            java.util.regex.Pattern qp = java.util.regex.Pattern.compile("(?:Q|问)\\s*\\d*\\s*[::]");
+            java.util.regex.Matcher qm = qp.matcher(line);
+            if (qm.find()) {
+                qIdx = qm.end();
+            }
+            if (qIdx < 0) continue;
+    
+            // 找到答案分隔符
+            int aIdx = -1;
+            java.util.regex.Pattern ap = java.util.regex.Pattern.compile(",\\s*\"?(?:A|答)\\s*\\d*\\s*[::]");
+            java.util.regex.Matcher am = ap.matcher(line);
+            if (am.find()) {
+                aIdx = am.start();
+            }
+            if (aIdx <= qIdx) continue;
+    
+            String question = line.substring(qIdx, aIdx).trim();
+            question = question.replaceAll("^\"+|\"+$", "").trim();
+    
+            // 提取答案内容(跳过A前缀)
+            int aContentStart = -1;
+            java.util.regex.Matcher aContentMatcher = java.util.regex.Pattern.compile("(?:A|答)\\s*\\d*\\s*[::]")
+                    .matcher(line.substring(aIdx));
+            if (aContentMatcher.find()) {
+                aContentStart = aIdx + aContentMatcher.end();
+            }
+            if (aContentStart < 0) continue;
+    
+            String answer = line.substring(aContentStart).trim();
+            answer = answer.replaceAll("^\"+|\"+$", "").trim();
+    
+            if (StringUtils.isNotEmpty(question) && StringUtils.isNotEmpty(answer)) {
+                qaPairs.add(new String[]{question, answer});
+            }
+        }
+    
+        return qaPairs;
+    }
+
     @Override
     public Map<String, Object> dualValidation(Long companyId, String query, String fastgptResult) {
         Map<String, Object> result = new HashMap<>();
@@ -269,6 +460,51 @@ public class CompanyKnowledgeBaseServiceImpl implements ICompanyKnowledgeBaseSer
                 .body();
     }
 
+    /**
+     * 批量写入 Qdrant 向量数据库(一次API调用)
+     */
+    private void batchUpsertKnowledgeToQdrant(List<CompanyKnowledgeBase> knowledgeList, List<String> texts,
+                                               List<List<Float>> allEmbeddings, String collectionName) {
+        Map<String, Object> req = new LinkedHashMap<>();
+        req.put("collectionName", collectionName);
+
+        List<Long> ids = new ArrayList<>();
+        List<List<Float>> vectors = new ArrayList<>();
+        List<String> documents = new ArrayList<>();
+        List<Map<String, Object>> payloads = new ArrayList<>();
+
+        for (int i = 0; i < knowledgeList.size(); i++) {
+            CompanyKnowledgeBase k = knowledgeList.get(i);
+            List<Float> embedding = i < allEmbeddings.size() ? allEmbeddings.get(i) : null;
+            String text = i < texts.size() ? texts.get(i) : buildKnowledgeText(k);
+
+            if (k.getId() == null || CollectionUtils.isEmpty(embedding)) {
+                continue;
+            }
+
+            ids.add(k.getId());
+            vectors.add(embedding);
+            documents.add(text);
+            Map<String, Object> payloadMap = JSON.parseObject(JSON.toJSONString(k), new TypeReference<Map<String, Object>>() {});
+            payloads.add(convertToSafePayload(payloadMap));
+        }
+
+        if (ids.isEmpty()) {
+            return;
+        }
+
+        req.put("ids", ids);
+        req.put("vectors", vectors);
+        req.put("documents", documents);
+        req.put("payloads", payloads);
+
+        HttpRequest.post(AI_API_BASE_URL + "/qdrant/point/upsert")
+                .header("Content-Type", "application/json;charset=UTF-8")
+                .body(JSON.toJSONString(req))
+                .execute()
+                .body();
+    }
+
     private void deleteKnowledgeFromQdrant(Long knowledgeId, String collectionName) {
         Map<String, Object> req = new LinkedHashMap<>();
         req.put("collectionName", collectionName);

+ 11 - 0
fs-service/src/main/resources/mapper/company/CompanyKnowledgeBaseMapper.xml

@@ -57,6 +57,17 @@
          #{syncStatus}, #{delFlag}, #{createBy}, #{createTime}, #{updateBy}, #{updateTime}, #{baseId})
     </insert>
 
+    <insert id="batchInsertKnowledge" parameterType="java.util.List" useGeneratedKeys="true" keyProperty="id">
+        insert into company_knowledge_base
+        (company_id, title, question, answer, industry_type, source, audit_status, use_count,
+         sync_status, del_flag, create_by, create_time, update_by, update_time, base_id)
+        values
+        <foreach collection="list" item="item" separator=",">
+        (#{item.companyId}, #{item.title}, #{item.question}, #{item.answer}, #{item.industryType}, #{item.source}, #{item.auditStatus}, #{item.useCount},
+         #{item.syncStatus}, #{item.delFlag}, #{item.createBy}, #{item.createTime}, #{item.updateBy}, #{item.updateTime}, #{item.baseId})
+        </foreach>
+    </insert>
+
     <select id="selectKnowledgeByIdAndCompanyId" resultMap="CompanyKnowledgeBaseResult">
         select id, company_id, base_id, title, question, answer, industry_type, source, audit_status,
                audit_comment, auditor, audit_time, use_count, fastgpt_id, sync_status, sync_time,