|
|
@@ -1,732 +0,0 @@
|
|
|
-package com.fs.ai.rag.service.impl;
|
|
|
-
|
|
|
-import com.fs.ai.rag.AiRagProperties;
|
|
|
-import com.fs.ai.rag.KnowledgeVectorService;
|
|
|
-import com.fs.ai.rag.dto.CollectionCreateReq;
|
|
|
-import com.fs.ai.rag.dto.CollectionDeleteReq;
|
|
|
-import com.fs.ai.rag.dto.CollectionListReq;
|
|
|
-import com.fs.ai.rag.dto.CreateDatabaseReq;
|
|
|
-import com.fs.ai.rag.dto.CreateTenantReq;
|
|
|
-import com.fs.ai.rag.dto.DeleteReq;
|
|
|
-import com.fs.ai.rag.dto.IndexReq;
|
|
|
-import com.fs.ai.rag.dto.QueryReq;
|
|
|
-import com.fs.ai.rag.dto.RecordDeleteReq;
|
|
|
-import com.fs.ai.rag.dto.RecordQueryReq;
|
|
|
-import com.fs.ai.rag.dto.RecordUpsertReq;
|
|
|
-import lombok.RequiredArgsConstructor;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.lang3.StringUtils;
|
|
|
-import org.springframework.core.ParameterizedTypeReference;
|
|
|
-import org.springframework.http.HttpEntity;
|
|
|
-import org.springframework.http.HttpHeaders;
|
|
|
-import org.springframework.http.HttpMethod;
|
|
|
-import org.springframework.http.MediaType;
|
|
|
-import org.springframework.stereotype.Service;
|
|
|
-import org.springframework.web.client.RestTemplate;
|
|
|
-
|
|
|
-import java.util.*;
|
|
|
-
|
|
|
-@Service
|
|
|
-@Slf4j
|
|
|
-@RequiredArgsConstructor
|
|
|
-public class KnowledgeVectorServiceImpl implements KnowledgeVectorService {
|
|
|
- private static final String WORKFLOW_DATABASE = "ai_workflow";
|
|
|
- private static final String WORKFLOW_COLLECTION = "workflow_knowledge_base";
|
|
|
-
|
|
|
- private final AiRagProperties props;
|
|
|
- private final RestTemplate restTemplate;
|
|
|
-
|
|
|
- private volatile ChromaScope resolvedScope;
|
|
|
- private static final class ChromaScope {
|
|
|
- final String tenantId;
|
|
|
- final String databaseName;
|
|
|
-
|
|
|
- ChromaScope(String tenantId, String databaseName) {
|
|
|
- this.tenantId = tenantId;
|
|
|
- this.databaseName = databaseName;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private static final class EmbeddingProfile {
|
|
|
- final String url;
|
|
|
- final String apiKey;
|
|
|
- final String model;
|
|
|
-
|
|
|
- EmbeddingProfile(String url, String apiKey, String model) {
|
|
|
- this.url = url;
|
|
|
- this.apiKey = apiKey;
|
|
|
- this.model = model;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void index(IndexReq req) {
|
|
|
- if (!props.isEnabled()) {
|
|
|
- return;
|
|
|
- }
|
|
|
- validateIndex(req);
|
|
|
- String tenantCode = StringUtils.trimToNull(req.getTenantCode());
|
|
|
- String targetCollection = WORKFLOW_COLLECTION;
|
|
|
- ChromaScope scope = StringUtils.isNotBlank(tenantCode)
|
|
|
- ? new ChromaScope(tenantCode, WORKFLOW_DATABASE)
|
|
|
- : resolveScope();
|
|
|
- if (StringUtils.isBlank(tenantCode) && StringUtils.isNotBlank(req.getCollectionName())) {
|
|
|
- targetCollection = req.getCollectionName();
|
|
|
- }
|
|
|
-
|
|
|
- EmbeddingProfile embeddingProfile = resolveEmbeddingProfile(targetCollection);
|
|
|
- String collId = StringUtils.isNotBlank(tenantCode)
|
|
|
- ? getOrCreateWorkflowCollectionId(scope)
|
|
|
- : getOrCreateCollectionId(scope, targetCollection);
|
|
|
-
|
|
|
- deleteByDocIdInternal(scope, collId, req.getDocId());
|
|
|
-
|
|
|
- List<String> chunks = chunk(req.getText());
|
|
|
- if (chunks.isEmpty()) {
|
|
|
- return;
|
|
|
- }
|
|
|
- List<List<Double>> embeddings = embedBatch(chunks, embeddingProfile);
|
|
|
- if (embeddings.size() != chunks.size()) {
|
|
|
- throw new IllegalStateException("Embedding 条数与分片不一致");
|
|
|
- }
|
|
|
-
|
|
|
- List<String> ids = new ArrayList<>();
|
|
|
- List<Map<String, Object>> metadatas = new ArrayList<>();
|
|
|
- int n = chunks.size();
|
|
|
- for (int i = 0; i < n; i++) {
|
|
|
- ids.add(req.getDocId() + "_" + i);
|
|
|
- Map<String, Object> meta = new LinkedHashMap<>();
|
|
|
- meta.put("doc_id", req.getDocId());
|
|
|
- meta.put("tenant_id", StringUtils.defaultIfBlank(req.getTenantCode(), req.getTenantId()));
|
|
|
- meta.put("chunk_index", i);
|
|
|
- meta.put("total_chunks", n);
|
|
|
- metadatas.add(meta);
|
|
|
- }
|
|
|
-
|
|
|
- String url = collectionsPath(scope, collId) + "/upsert";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("ids", ids);
|
|
|
- body.put("embeddings", embeddings);
|
|
|
- body.put("documents", chunks);
|
|
|
- body.put("metadatas", metadatas);
|
|
|
- postJson(url, body);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public List<String> search(QueryReq req) {
|
|
|
- if (!props.isEnabled()) {
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
- if (StringUtils.isAnyBlank(req.getTenantId(), req.getCollectionName(), req.getQuestion())) {
|
|
|
- throw new IllegalArgumentException("tenantId/collectionName/question 不能为空");
|
|
|
- }
|
|
|
- EmbeddingProfile embeddingProfile = resolveEmbeddingProfile(req.getCollectionName());
|
|
|
- ChromaScope scope = resolveScope();
|
|
|
- String collId = findCollectionIdByName(scope, req.getCollectionName());
|
|
|
- if (collId == null) {
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
-
|
|
|
- List<Double> vec = embedSingle(req.getQuestion(), embeddingProfile);
|
|
|
- String url = collectionsPath(scope, collId) + "/query";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("query_embeddings", Collections.singletonList(vec));
|
|
|
- body.put("n_results", Optional.ofNullable(req.getTopK()).orElse(5));
|
|
|
- Map<String, String> where = new LinkedHashMap<>();
|
|
|
- where.put("tenant_id", req.getTenantId());
|
|
|
- body.put("where", where);
|
|
|
- body.put("include", Arrays.asList("documents", "metadatas", "distances"));
|
|
|
-
|
|
|
- Map<String, Object> resp = postJsonReturnMap(url, body);
|
|
|
- return extractDocuments(resp);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void deleteByDocId(DeleteReq req) {
|
|
|
- if (!props.isEnabled()) {
|
|
|
- return;
|
|
|
- }
|
|
|
- if (StringUtils.isAnyBlank(req.getTenantId(), req.getDocId(), req.getCollectionName())) {
|
|
|
- throw new IllegalArgumentException("tenantId/docId/collectionName 不能为空");
|
|
|
- }
|
|
|
- ChromaScope scope = resolveScope();
|
|
|
- String collId = findCollectionIdByName(scope, req.getCollectionName());
|
|
|
- if (collId == null) {
|
|
|
- return;
|
|
|
- }
|
|
|
- deleteByDocIdInternal(scope, collId, req.getDocId());
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> createTenant(CreateTenantReq req) {
|
|
|
- if (req == null || StringUtils.isBlank(req.getName())) {
|
|
|
- throw new IllegalArgumentException("name 不能为空");
|
|
|
- }
|
|
|
- String url = baseNormalized() + "/tenants";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("name", req.getName().trim());
|
|
|
- return postJsonReturnMap(url, body);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> createDatabase(CreateDatabaseReq req) {
|
|
|
- if (req == null || StringUtils.isAnyBlank(req.getTenantId(), req.getName())) {
|
|
|
- throw new IllegalArgumentException("tenantId/name 不能为空");
|
|
|
- }
|
|
|
- String url = baseNormalized() + "/tenants/" + req.getTenantId().trim() + "/databases";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("name", req.getName().trim());
|
|
|
- return postJsonReturnMap(url, body);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> createCollection(CollectionCreateReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- String name = trim(req.getName());
|
|
|
- if (StringUtils.isBlank(name)) {
|
|
|
- throw new IllegalArgumentException("name 不能为空");
|
|
|
- }
|
|
|
- String url = tenantsDatabasesPath(scope) + "/collections";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("name", name);
|
|
|
- body.put("get_or_create", req.getGetOrCreate() == null ? Boolean.TRUE : req.getGetOrCreate());
|
|
|
- if (req.getMetadata() != null) {
|
|
|
- body.put("metadata", req.getMetadata());
|
|
|
- }
|
|
|
- return postJsonReturnMap(url, body);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Object listCollections(CollectionListReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- int limit = req.getLimit() == null ? 100 : req.getLimit();
|
|
|
- int offset = req.getOffset() == null ? 0 : req.getOffset();
|
|
|
- String url = tenantsDatabasesPath(scope) + "/collections?limit=" + limit + "&offset=" + offset;
|
|
|
- return getJsonObject(url);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void deleteCollection(CollectionDeleteReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- String collectionId = trim(req.getCollectionId());
|
|
|
- if (StringUtils.isBlank(collectionId)) {
|
|
|
- String collectionName = trim(req.getCollectionName());
|
|
|
- if (StringUtils.isBlank(collectionName)) {
|
|
|
- throw new IllegalArgumentException("collection_id 或 collection_name 必填其一");
|
|
|
- }
|
|
|
- collectionId = findCollectionIdByName(scope, collectionName);
|
|
|
- if (StringUtils.isBlank(collectionId)) {
|
|
|
- throw new IllegalArgumentException("未找到集合: " + collectionName);
|
|
|
- }
|
|
|
- }
|
|
|
- String url = collectionsPath(scope, collectionId);
|
|
|
- HttpHeaders h = chromaHeaders();
|
|
|
- restTemplate.exchange(url, HttpMethod.DELETE, new HttpEntity<>(h), String.class);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> upsertRecords(RecordUpsertReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- String collectionId = resolveCollectionId(scope, req);
|
|
|
- String url = collectionsPath(scope, collectionId) + "/upsert";
|
|
|
- return postJsonReturnMap(url, stripControlFields(req));
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> queryRecords(RecordQueryReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- String collectionId = resolveCollectionId(scope, req);
|
|
|
- String url = collectionsPath(scope, collectionId) + "/query";
|
|
|
- return postJsonReturnMap(url, stripControlFields(req));
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public Map<String, Object> deleteRecords(RecordDeleteReq req) {
|
|
|
- ChromaScope scope = resolveScopeOverride(req);
|
|
|
- String collectionId = resolveCollectionId(scope, req);
|
|
|
- String url = collectionsPath(scope, collectionId) + "/delete";
|
|
|
- return postJsonReturnMap(url, stripControlFields(req));
|
|
|
- }
|
|
|
-
|
|
|
- private void deleteByDocIdInternal(ChromaScope scope, String collectionId, String docId) {
|
|
|
- String url = collectionsPath(scope, collectionId) + "/delete";
|
|
|
- Map<String, Object> where = new LinkedHashMap<>();
|
|
|
- where.put("doc_id", docId);
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("where", where);
|
|
|
- postJson(url, body);
|
|
|
- }
|
|
|
-
|
|
|
- private String baseNormalized() {
|
|
|
- String b = props.getChromaBaseUrl().trim();
|
|
|
- while (b.endsWith("/")) {
|
|
|
- b = b.substring(0, b.length() - 1);
|
|
|
- }
|
|
|
- return b;
|
|
|
- }
|
|
|
-
|
|
|
- private String tenantsDatabasesPath(ChromaScope s) {
|
|
|
- return baseNormalized() + "/tenants/" + s.tenantId + "/databases/" + s.databaseName;
|
|
|
- }
|
|
|
-
|
|
|
- private String collectionsPath(ChromaScope s, String collectionId) {
|
|
|
- return tenantsDatabasesPath(s) + "/collections/" + collectionId;
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScope() {
|
|
|
- ChromaScope c = resolvedScope;
|
|
|
- if (c != null) {
|
|
|
- return c;
|
|
|
- }
|
|
|
- synchronized (this) {
|
|
|
- if (resolvedScope != null) {
|
|
|
- return resolvedScope;
|
|
|
- }
|
|
|
- String tid = StringUtils.trimToEmpty(props.getChromaTenantId());
|
|
|
- String db = StringUtils.trimToEmpty(props.getChromaDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- resolvedScope = new ChromaScope(tid, db);
|
|
|
- return resolvedScope;
|
|
|
- }
|
|
|
- Map<String, Object> idMap = getJsonMap(baseNormalized() + "/auth/identity");
|
|
|
- String tenant = Objects.toString(idMap.get("tenant"), "");
|
|
|
- if (StringUtils.isBlank(tenant)) {
|
|
|
- throw new IllegalStateException("Chroma /auth/identity 未返回 tenant,请配置 ai.rag.chroma-tenant-id 与 ai.rag.chroma-database");
|
|
|
- }
|
|
|
- if (StringUtils.isBlank(db)) {
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
- List<Map<String, Object>> databases = (List<Map<String, Object>>) idMap.get("databases");
|
|
|
- if (databases != null && !databases.isEmpty()) {
|
|
|
- db = Objects.toString(databases.get(0).get("name"), "default_database");
|
|
|
- } else {
|
|
|
- db = "default_database";
|
|
|
- }
|
|
|
- }
|
|
|
- resolvedScope = new ChromaScope(tenant, db);
|
|
|
- log.info("Chroma 作用域:tenant={}, database={}", tenant, db);
|
|
|
- return resolvedScope;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(CollectionCreateReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(CollectionListReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(CollectionDeleteReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(RecordUpsertReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(RecordQueryReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private ChromaScope resolveScopeOverride(RecordDeleteReq req) {
|
|
|
- String tid = req == null ? null : trim(req.getTenantId());
|
|
|
- String db = req == null ? null : trim(req.getDatabase());
|
|
|
- if (StringUtils.isNotBlank(tid) && StringUtils.isNotBlank(db)) {
|
|
|
- return new ChromaScope(tid, db);
|
|
|
- }
|
|
|
- ChromaScope fallback = resolveScope();
|
|
|
- return new ChromaScope(StringUtils.defaultIfBlank(tid, fallback.tenantId),
|
|
|
- StringUtils.defaultIfBlank(db, fallback.databaseName));
|
|
|
- }
|
|
|
-
|
|
|
- private String resolveCollectionId(ChromaScope scope, RecordUpsertReq req) {
|
|
|
- String collectionId = trim(req.getCollectionId());
|
|
|
- if (StringUtils.isNotBlank(collectionId)) {
|
|
|
- return collectionId;
|
|
|
- }
|
|
|
- String collectionName = trim(req.getCollectionName());
|
|
|
- if (StringUtils.isBlank(collectionName)) {
|
|
|
- throw new IllegalArgumentException("collection_id 或 collection_name 必填其一");
|
|
|
- }
|
|
|
- String resolved = findCollectionIdByName(scope, collectionName);
|
|
|
- if (StringUtils.isBlank(resolved)) {
|
|
|
- throw new IllegalArgumentException("未找到集合: " + collectionName);
|
|
|
- }
|
|
|
- return resolved;
|
|
|
- }
|
|
|
-
|
|
|
- private String resolveCollectionId(ChromaScope scope, RecordQueryReq req) {
|
|
|
- String collectionId = trim(req.getCollectionId());
|
|
|
- if (StringUtils.isNotBlank(collectionId)) {
|
|
|
- return collectionId;
|
|
|
- }
|
|
|
- String collectionName = trim(req.getCollectionName());
|
|
|
- if (StringUtils.isBlank(collectionName)) {
|
|
|
- throw new IllegalArgumentException("collection_id 或 collection_name 必填其一");
|
|
|
- }
|
|
|
- String resolved = findCollectionIdByName(scope, collectionName);
|
|
|
- if (StringUtils.isBlank(resolved)) {
|
|
|
- throw new IllegalArgumentException("未找到集合: " + collectionName);
|
|
|
- }
|
|
|
- return resolved;
|
|
|
- }
|
|
|
-
|
|
|
- private String resolveCollectionId(ChromaScope scope, RecordDeleteReq req) {
|
|
|
- String collectionId = trim(req.getCollectionId());
|
|
|
- if (StringUtils.isNotBlank(collectionId)) {
|
|
|
- return collectionId;
|
|
|
- }
|
|
|
- String collectionName = trim(req.getCollectionName());
|
|
|
- if (StringUtils.isBlank(collectionName)) {
|
|
|
- throw new IllegalArgumentException("collection_id 或 collection_name 必填其一");
|
|
|
- }
|
|
|
- String resolved = findCollectionIdByName(scope, collectionName);
|
|
|
- if (StringUtils.isBlank(resolved)) {
|
|
|
- throw new IllegalArgumentException("未找到集合: " + collectionName);
|
|
|
- }
|
|
|
- return resolved;
|
|
|
- }
|
|
|
-
|
|
|
- private String getOrCreateCollectionId(ChromaScope scope, String name) {
|
|
|
- String existing = findCollectionIdByName(scope, name);
|
|
|
- if (existing != null) {
|
|
|
- return existing;
|
|
|
- }
|
|
|
- String url = tenantsDatabasesPath(scope) + "/collections";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("name", name);
|
|
|
- body.put("get_or_create", Boolean.TRUE);
|
|
|
- Map<String, Object> resp = postJsonReturnMap(url, body);
|
|
|
- Object id = resp.get("id");
|
|
|
- if (id != null) {
|
|
|
- return id.toString();
|
|
|
- }
|
|
|
- return Objects.requireNonNull(findCollectionIdByName(scope, name), "创建集合失败: " + name);
|
|
|
- }
|
|
|
-
|
|
|
- private String getOrCreateWorkflowCollectionId(ChromaScope scope) {
|
|
|
- String existing = findCollectionIdByName(scope, WORKFLOW_COLLECTION);
|
|
|
- if (existing != null) {
|
|
|
- return existing;
|
|
|
- }
|
|
|
- String url = tenantsDatabasesPath(scope) + "/collections";
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- Map<String, Object> configuration = new LinkedHashMap<>();
|
|
|
- Map<String, Object> hnsw = new LinkedHashMap<>();
|
|
|
- hnsw.put("space", "cosine");
|
|
|
- configuration.put("hnsw", hnsw);
|
|
|
- body.put("configuration", configuration);
|
|
|
- body.put("get_or_create", Boolean.TRUE);
|
|
|
- Map<String, Object> metadata = new LinkedHashMap<>();
|
|
|
- metadata.put("description", "用户创建的工作流知识库");
|
|
|
- metadata.put("owner", "team-ai");
|
|
|
- body.put("metadata", metadata);
|
|
|
- body.put("name", WORKFLOW_COLLECTION);
|
|
|
- body.put("schema", null);
|
|
|
- Map<String, Object> resp = postJsonReturnMap(url, body);
|
|
|
- Object id = resp.get("id");
|
|
|
- if (id != null) {
|
|
|
- return id.toString();
|
|
|
- }
|
|
|
- return Objects.requireNonNull(findCollectionIdByName(scope, WORKFLOW_COLLECTION),
|
|
|
- "创建默认工作流集合失败: " + WORKFLOW_COLLECTION);
|
|
|
- }
|
|
|
-
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
- private String findCollectionIdByName(ChromaScope scope, String name) {
|
|
|
- String url = tenantsDatabasesPath(scope) + "/collections?limit=500&offset=0";
|
|
|
- Object resp = getJsonObject(url);
|
|
|
- List<?> collections;
|
|
|
- // 兼容两类响应:
|
|
|
- // 1) [](直接数组)
|
|
|
- // 2) {"collections":[...]}(对象包裹数组)
|
|
|
- if (resp instanceof List) {
|
|
|
- collections = (List<?>) resp;
|
|
|
- } else if (resp instanceof Map) {
|
|
|
- Object cols = ((Map<String, Object>) resp).get("collections");
|
|
|
- if (!(cols instanceof List)) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- collections = (List<?>) cols;
|
|
|
- } else {
|
|
|
- return null;
|
|
|
- }
|
|
|
- for (Object o : collections) {
|
|
|
- if (!(o instanceof Map)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, Object> m = (Map<String, Object>) o;
|
|
|
- if (name.equals(Objects.toString(m.get("name"), null))) {
|
|
|
- return Objects.toString(m.get("id"), null);
|
|
|
- }
|
|
|
- }
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- private void validateIndex(IndexReq req) {
|
|
|
- if (StringUtils.isBlank(req.getTenantCode()) && StringUtils.isBlank(req.getTenantId())) {
|
|
|
- throw new IllegalArgumentException("tenantCode 或 tenantId 至少填一个");
|
|
|
- }
|
|
|
- if (StringUtils.isBlank(req.getDocId())) {
|
|
|
- throw new IllegalArgumentException("docId 不能为空");
|
|
|
- }
|
|
|
- if (StringUtils.isBlank(req.getText())) {
|
|
|
- throw new IllegalArgumentException("text 不能为空");
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private List<String> chunk(String text) {
|
|
|
- int size = Math.max(100, props.getChunkSize());
|
|
|
- int overlap = Math.max(0, Math.min(props.getChunkOverlap(), size - 1));
|
|
|
- int step = Math.max(1, size - overlap);
|
|
|
- String t = text.trim();
|
|
|
- if (t.isEmpty()) {
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
- List<String> out = new ArrayList<>();
|
|
|
- for (int start = 0; start < t.length(); start += step) {
|
|
|
- int end = Math.min(start + size, t.length());
|
|
|
- out.add(t.substring(start, end));
|
|
|
- if (end >= t.length()) {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- return out;
|
|
|
- }
|
|
|
-
|
|
|
- private List<Double> embedSingle(String input, EmbeddingProfile profile) {
|
|
|
- List<List<Double>> batch = embedBatch(Collections.singletonList(input), profile);
|
|
|
- return batch.isEmpty() ? Collections.emptyList() : batch.get(0);
|
|
|
- }
|
|
|
-
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
- private List<List<Double>> embedBatch(List<String> inputs, EmbeddingProfile profile) {
|
|
|
- HttpHeaders headers = new HttpHeaders();
|
|
|
- headers.setContentType(MediaType.APPLICATION_JSON);
|
|
|
- headers.setBearerAuth(profile.apiKey);
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("model", profile.model);
|
|
|
- body.put("input", inputs);
|
|
|
- HttpEntity<Map<String, Object>> entity = new HttpEntity<>(body, headers);
|
|
|
- Map<String, Object> resp = restTemplate.postForObject(profile.url, entity, Map.class);
|
|
|
- if (resp == null) {
|
|
|
- throw new IllegalStateException("Embedding 接口无响应");
|
|
|
- }
|
|
|
- Object dataObj = resp.get("data");
|
|
|
- if (!(dataObj instanceof List)) {
|
|
|
- throw new IllegalStateException("Embedding 响应格式异常");
|
|
|
- }
|
|
|
- List<List<Double>> vecs = new ArrayList<>();
|
|
|
- for (Object row : (List<?>) dataObj) {
|
|
|
- if (!(row instanceof Map)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Object emb = ((Map<?, ?>) row).get("embedding");
|
|
|
- if (!(emb instanceof List)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- List<Double> v = new ArrayList<>();
|
|
|
- for (Object x : (List<?>) emb) {
|
|
|
- if (x instanceof Number) {
|
|
|
- v.add(((Number) x).doubleValue());
|
|
|
- }
|
|
|
- }
|
|
|
- vecs.add(v);
|
|
|
- }
|
|
|
- return vecs;
|
|
|
- }
|
|
|
-
|
|
|
- private EmbeddingProfile resolveEmbeddingProfile(String scene) {
|
|
|
- // 多模型配置优先:routing.scene -> routing.embeddingDefault -> 旧字段回退
|
|
|
- String alias = null;
|
|
|
- if (props.getRouting() != null && props.getRouting().getScenes() != null) {
|
|
|
- alias = props.getRouting().getScenes().get(scene);
|
|
|
- }
|
|
|
- if (StringUtils.isBlank(alias) && props.getRouting() != null) {
|
|
|
- alias = props.getRouting().getEmbeddingDefault();
|
|
|
- }
|
|
|
- if (StringUtils.isNotBlank(alias)) {
|
|
|
- EmbeddingProfile p = buildProfileByAlias(alias);
|
|
|
- if (p != null) {
|
|
|
- return p;
|
|
|
- }
|
|
|
- log.warn("ai.rag.routing 指定模型别名不存在 alias={},将回退旧配置", alias);
|
|
|
- }
|
|
|
- return legacyEmbeddingProfile();
|
|
|
- }
|
|
|
-
|
|
|
- private EmbeddingProfile buildProfileByAlias(String alias) {
|
|
|
- if (props.getModels() == null || props.getProviders() == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- AiRagProperties.ModelConfig modelCfg = props.getModels().get(alias);
|
|
|
- if (modelCfg == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
- AiRagProperties.ProviderConfig providerCfg = props.getProviders().get(modelCfg.getProvider());
|
|
|
- if (providerCfg == null) {
|
|
|
- throw new IllegalStateException("未找到 provider 配置: " + modelCfg.getProvider());
|
|
|
- }
|
|
|
- if (!"embedding".equalsIgnoreCase(StringUtils.defaultString(modelCfg.getType(), "embedding"))) {
|
|
|
- throw new IllegalStateException("模型别名不是 embedding 类型: " + alias);
|
|
|
- }
|
|
|
- String baseUrl = StringUtils.trimToEmpty(providerCfg.getBaseUrl());
|
|
|
- String url = normalizeEmbeddingUrl(baseUrl);
|
|
|
- String apiKey = StringUtils.trimToEmpty(providerCfg.getApiKey());
|
|
|
- String model = StringUtils.trimToEmpty(modelCfg.getModel());
|
|
|
- if (StringUtils.isAnyBlank(url, apiKey, model)) {
|
|
|
- throw new IllegalStateException("模型别名配置不完整 alias=" + alias + ",请检查 providers/models");
|
|
|
- }
|
|
|
- return new EmbeddingProfile(url, apiKey, model);
|
|
|
- }
|
|
|
-
|
|
|
- private EmbeddingProfile legacyEmbeddingProfile() {
|
|
|
- String url = StringUtils.trimToEmpty(props.getEmbeddingUrl());
|
|
|
- String apiKey = StringUtils.trimToEmpty(props.getEmbeddingApiKey());
|
|
|
- String model = StringUtils.trimToEmpty(props.getEmbeddingModel());
|
|
|
- if (StringUtils.isBlank(apiKey)) {
|
|
|
- throw new IllegalStateException("未配置 ai.rag.embedding-api-key,或未配置可用的 ai.rag.routing/models/providers");
|
|
|
- }
|
|
|
- if (StringUtils.isAnyBlank(url, model)) {
|
|
|
- throw new IllegalStateException("未配置 ai.rag.embedding-url / ai.rag.embedding-model");
|
|
|
- }
|
|
|
- return new EmbeddingProfile(normalizeEmbeddingUrl(url), apiKey, model);
|
|
|
- }
|
|
|
-
|
|
|
- private String normalizeEmbeddingUrl(String baseOrFull) {
|
|
|
- String url = StringUtils.trimToEmpty(baseOrFull);
|
|
|
- if (url.endsWith("/")) {
|
|
|
- url = url.substring(0, url.length() - 1);
|
|
|
- }
|
|
|
- if (url.endsWith("/embeddings")) {
|
|
|
- return url;
|
|
|
- }
|
|
|
- if (url.endsWith("/v1")) {
|
|
|
- return url + "/embeddings";
|
|
|
- }
|
|
|
- return url + "/v1/embeddings";
|
|
|
- }
|
|
|
-
|
|
|
- private String trim(Object val) {
|
|
|
- return val == null ? null : StringUtils.trimToNull(String.valueOf(val));
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Object> stripControlFields(RecordUpsertReq req) {
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("ids", req.getIds());
|
|
|
- body.put("documents", req.getDocuments());
|
|
|
- body.put("embeddings", req.getEmbeddings());
|
|
|
- body.put("metadatas", req.getMetadatas());
|
|
|
- return body;
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Object> stripControlFields(RecordQueryReq req) {
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- body.put("query_embeddings", req.getQueryEmbeddings());
|
|
|
- body.put("n_results", req.getNResults());
|
|
|
- if (req.getWhere() != null) {
|
|
|
- body.put("where", req.getWhere());
|
|
|
- }
|
|
|
- if (req.getInclude() != null) {
|
|
|
- body.put("include", req.getInclude());
|
|
|
- }
|
|
|
- return body;
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Object> stripControlFields(RecordDeleteReq req) {
|
|
|
- Map<String, Object> body = new LinkedHashMap<>();
|
|
|
- if (req.getIds() != null) {
|
|
|
- body.put("ids", req.getIds());
|
|
|
- }
|
|
|
- if (req.getWhere() != null) {
|
|
|
- body.put("where", req.getWhere());
|
|
|
- }
|
|
|
- return body;
|
|
|
- }
|
|
|
-
|
|
|
- private void postJson(String url, Map<String, Object> body) {
|
|
|
- HttpHeaders h = chromaHeaders();
|
|
|
- h.setContentType(MediaType.APPLICATION_JSON);
|
|
|
- restTemplate.exchange(url, HttpMethod.POST, new HttpEntity<>(body, h), String.class);
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Object> postJsonReturnMap(String url, Map<String, Object> body) {
|
|
|
- HttpHeaders h = chromaHeaders();
|
|
|
- h.setContentType(MediaType.APPLICATION_JSON);
|
|
|
- return restTemplate.exchange(url, HttpMethod.POST, new HttpEntity<>(body, h),
|
|
|
- new ParameterizedTypeReference<Map<String, Object>>() {}).getBody();
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Object> getJsonMap(String url) {
|
|
|
- HttpHeaders h = chromaHeaders();
|
|
|
- return restTemplate.exchange(url, HttpMethod.GET, new HttpEntity<>(h),
|
|
|
- new ParameterizedTypeReference<Map<String, Object>>() {}).getBody();
|
|
|
- }
|
|
|
-
|
|
|
- private Object getJsonObject(String url) {
|
|
|
- HttpHeaders h = chromaHeaders();
|
|
|
- return restTemplate.exchange(url, HttpMethod.GET, new HttpEntity<>(h), Object.class).getBody();
|
|
|
- }
|
|
|
-
|
|
|
- private HttpHeaders chromaHeaders() {
|
|
|
- HttpHeaders h = new HttpHeaders();
|
|
|
- if (StringUtils.isNotBlank(props.getChromaToken())) {
|
|
|
- h.add("x-chroma-token", props.getChromaToken());
|
|
|
- }
|
|
|
- return h;
|
|
|
- }
|
|
|
-
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
- private List<String> extractDocuments(Map<String, Object> queryResp) {
|
|
|
- Object docs = queryResp.get("documents");
|
|
|
- if (!(docs instanceof List) || ((List<?>) docs).isEmpty()) {
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
- Object first = ((List<?>) docs).get(0);
|
|
|
- if (!(first instanceof List)) {
|
|
|
- return Collections.emptyList();
|
|
|
- }
|
|
|
- List<String> out = new ArrayList<>();
|
|
|
- for (Object x : (List<?>) first) {
|
|
|
- if (x != null) {
|
|
|
- out.add(x.toString());
|
|
|
- }
|
|
|
- }
|
|
|
- return out;
|
|
|
- }
|
|
|
-}
|