Selaa lähdekoodia

feat:接豆包录音识别转文字功能

caoliqin 2 viikkoa sitten
vanhempi
commit
9278e8a637

+ 56 - 0
fs-service/src/main/java/com/fs/aiSoundReplication/config/AsrConfig.java

@@ -0,0 +1,56 @@
+package com.fs.aiSoundReplication.config;
+
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Configuration;
+
+/**
+ * 豆包「大模型录音文件识别」HTTP 配置
+ * @author caoliqin
+ */
+@Data
+@Configuration
+@ConfigurationProperties(prefix = "doubao.asr")
+public class AsrConfig {
+
+    private String submitUrl = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit";
+    private String queryUrl = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/query";
+
+    private String appKey;
+
+    private String accessToken;
+
+    /** 新版仅填 X-Api-Key 时配置此项,非空则不再使用 appKey、accessToken */
+    private String apiKey;
+
+    /** 模型资源:1.0 volc.bigasr.auc,2.0 volc.seedasr.auc */
+    private String resourceId = "volc.seedasr.auc";
+
+    /** 标识为企微会话存档 */
+    private String uid = "fs-msg-audit";
+
+    /** 须与音频 URL 实际格式一致:raw/wav/mp3/ogg;企微 amr 需先转码 */
+    private String defaultAudioFormat = "mp3";
+
+    private String defaultLanguage = "zh-CN";
+
+    private boolean enableItn = true;
+    private boolean enablePunc = true;
+
+    private long pollIntervalMs = 2000L;
+    private int maxQueryAttempts = 90;
+
+    private boolean enabled = true;
+
+    public boolean useNewConsoleApiKey() {
+        return apiKey != null && !apiKey.trim().isEmpty();
+    }
+
+    public boolean hasCredentials() {
+        if (useNewConsoleApiKey()) {
+            return true;
+        }
+        return appKey != null && !appKey.trim().isEmpty()
+                && accessToken != null && !accessToken.trim().isEmpty();
+    }
+}

+ 15 - 0
fs-service/src/main/java/com/fs/aiSoundReplication/service/AsrService.java

@@ -0,0 +1,15 @@
+package com.fs.aiSoundReplication.service;
+
+/**
+ * 豆包:公网音频 URL 转写为文本。实现类会读 doubao.asr 配置。
+ * 用法:注入 AsrService,调用 recognizeFromUrl(String);
+ * 密钥与 URL 见AsrConfig配置
+ */
+public interface AsrService {
+
+    /** 使用配置中的默认 format、language */
+    String recognizeFromUrl(String audioUrl);
+
+    /** 指定容器格式与语言,如"mp3","zh-CN" */
+    String recognizeFromUrl(String audioUrl, String format, String language);
+}

+ 192 - 0
fs-service/src/main/java/com/fs/aiSoundReplication/service/impl/AsrServiceImpl.java

@@ -0,0 +1,192 @@
+package com.fs.aiSoundReplication.service.impl;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fs.aiSoundReplication.config.AsrConfig;
+import com.fs.aiSoundReplication.service.AsrService;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
+import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * 豆包大模型录音文件识别
+ * 状态码见官方文档
+ */
+@Service
+@Slf4j
+@RequiredArgsConstructor
+public class AsrServiceImpl implements AsrService {
+
+    private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
+    private static final String STATUS_OK = "20000000";
+    private static final String STATUS_PROCESSING = "20000001";
+    private static final String STATUS_QUEUED = "20000002";
+
+    private final AsrConfig config;
+    private final OkHttpClient okHttpClient;
+    private final ObjectMapper objectMapper;
+
+    @Override
+    public String recognizeFromUrl(String audioUrl) {
+        return recognizeFromUrl(audioUrl, config.getDefaultAudioFormat(), config.getDefaultLanguage());
+    }
+
+    @Override
+    public String recognizeFromUrl(String audioUrl, String format, String language) {
+        if (!config.isEnabled() || !config.hasCredentials()) {
+            log.warn("ASR 未启用或未配置 doubao.asr,跳过");
+            return null;
+        }
+        if (!StringUtils.hasText(audioUrl)) {
+            return null;
+        }
+        String fmt = StringUtils.hasText(format) ? format : config.getDefaultAudioFormat();
+        String taskId = UUID.randomUUID().toString();
+        try {
+            if (!submitTask(taskId, audioUrl, fmt, language)) {
+                return null;
+            }
+            return pollQuery(taskId);
+        } catch (Exception e) {
+            log.error("ASR 识别异常, url={}", audioUrl, e);
+            return null;
+        }
+    }
+
+    private boolean submitTask(String taskId, String audioUrl, String format, String language) throws Exception {
+        Map<String, Object> user = new HashMap<>();
+        user.put("uid", config.getUid());
+
+        Map<String, Object> audio = new HashMap<>();
+        audio.put("url", audioUrl);
+        audio.put("format", format);
+        if (StringUtils.hasText(language)) {
+            audio.put("language", language);
+        }
+
+        Map<String, Object> req = new HashMap<>();
+        req.put("model_name", "bigmodel");
+        req.put("enable_itn", config.isEnableItn());
+        req.put("enable_punc", config.isEnablePunc());
+
+        Map<String, Object> body = new HashMap<>();
+        body.put("user", user);
+        body.put("audio", audio);
+        body.put("request", req);
+
+        String json = objectMapper.writeValueAsString(body);
+        Request.Builder builder = new Request.Builder()
+                .url(config.getSubmitUrl())
+                .post(RequestBody.create(json, JSON))
+                .addHeader("Content-Type", "application/json");
+
+        applySubmitHeaders(builder, taskId);
+
+        try (Response response = okHttpClient.newCall(builder.build()).execute()) {
+            String code = headerStatus(response, "X-Api-Status-Code");
+            if (STATUS_OK.equals(code)) {
+                log.info("ASR submit 成功, taskId={}, logId={}", taskId, response.header("X-Tt-Logid"));
+                return true;
+            }
+            log.error("ASR submit 失败, taskId={}, status={}, message={}, body={}",
+                    taskId, code, response.header("X-Api-Message"), bodyString(response));
+            return false;
+        }
+    }
+
+    private String pollQuery(String taskId) throws Exception {
+        Request.Builder builder = new Request.Builder()
+                .url(config.getQueryUrl())
+                .post(RequestBody.create("{}", JSON))
+                .addHeader("Content-Type", "application/json");
+
+        applyQueryHeaders(builder, taskId);
+
+        Request request = builder.build();
+        for (int i = 0; i < config.getMaxQueryAttempts(); i++) {
+            try (Response response = okHttpClient.newCall(request).execute()) {
+                String code = headerStatus(response, "X-Api-Status-Code");
+                String bodyStr = bodyString(response);
+
+                if (STATUS_OK.equals(code)) {
+                    String text = extractText(bodyStr);
+                    if (StringUtils.hasText(text)) {
+                        return text.trim();
+                    }
+                    log.warn("ASR query 成功但 result.text 为空, taskId={}", taskId);
+                    return null;
+                }
+                if (STATUS_PROCESSING.equals(code) || STATUS_QUEUED.equals(code)) {
+                    Thread.sleep(config.getPollIntervalMs());
+                    continue;
+                }
+                log.error("ASR query 失败, taskId={}, status={}, message={}, body={}",
+                        taskId, code, response.header("X-Api-Message"), bodyStr);
+                return null;
+            }
+        }
+        log.error("ASR query 超时, taskId={}, attempts={}", taskId, config.getMaxQueryAttempts());
+        return null;
+    }
+
+    private void applySubmitHeaders(Request.Builder builder, String taskId) {
+        builder.addHeader("X-Api-Resource-Id", config.getResourceId());
+        builder.addHeader("X-Api-Request-Id", taskId);
+        if (config.useNewConsoleApiKey()) {
+            builder.addHeader("X-Api-Key", config.getApiKey());
+            builder.addHeader("X-Api-Sequence", "-1");
+        } else {
+            builder.addHeader("X-Api-App-Key", config.getAppKey());
+            builder.addHeader("X-Api-Access-Key", config.getAccessToken());
+            builder.addHeader("X-Api-Sequence", "-1");
+        }
+    }
+
+    private void applyQueryHeaders(Request.Builder builder, String taskId) {
+        builder.addHeader("X-Api-Resource-Id", config.getResourceId());
+        builder.addHeader("X-Api-Request-Id", taskId);
+        if (config.useNewConsoleApiKey()) {
+            builder.addHeader("X-Api-Key", config.getApiKey());
+        } else {
+            builder.addHeader("X-Api-App-Key", config.getAppKey());
+            builder.addHeader("X-Api-Access-Key", config.getAccessToken());
+        }
+    }
+
+    private static String headerStatus(Response response, String name) {
+        String v = response.header(name);
+        if (v == null) {
+            v = response.header(name.toLowerCase());
+        }
+        return v != null ? v.trim() : "";
+    }
+
+    private static String bodyString(Response response) throws java.io.IOException {
+        if (response.body() == null) {
+            return "";
+        }
+        return response.body().string();
+    }
+
+    private String extractText(String json) throws Exception {
+        if (!StringUtils.hasText(json)) {
+            return null;
+        }
+        JsonNode root = objectMapper.readTree(json);
+        JsonNode text = root.path("result").path("text");
+        if (text.isMissingNode() || text.isNull()) {
+            return null;
+        }
+        return text.asText();
+    }
+}

+ 82 - 0
fs-service/src/main/java/com/fs/utils/AmrToMp3Util.java

@@ -0,0 +1,82 @@
+package com.fs.utils;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * 企微 Finance 下载的语音为 amr,豆包 ASR 要求 mp3/wav/ogg 等,用本机已安装的 ffmpeg 做转换。
+ * 需要本地有 ffmpeg
+ */
+@Slf4j
+public final class AmrToMp3Util {
+
+    private AmrToMp3Util() {
+    }
+
+    /**
+     * amr 字节转 mp3 字节。服务器需已安装 ffmpeg 并在 PATH 中,或通过参数传入绝对路径。
+     *
+     * @param amrBytes         原始 amr
+     * @param ffmpegExecutable 如 ffmpeg 或 D:/tools/ffmpeg.exe
+     */
+    public static byte[] amrBytesToMp3Bytes(byte[] amrBytes, String ffmpegExecutable) throws Exception {
+        if (amrBytes == null || amrBytes.length == 0) {
+            throw new IllegalArgumentException("amr empty");
+        }
+        Path in = Files.createTempFile("qw_amr_", ".amr");
+        Path out = Files.createTempFile("qw_mp3_", ".mp3");
+        try {
+            Files.write(in, amrBytes);
+            ProcessBuilder pb = new ProcessBuilder(
+                    ffmpegExecutable,
+                    "-y",
+                    "-i", in.toAbsolutePath().toString(),
+                    "-vn", // 表示不处理视频
+                    "-acodec", "libmp3lame", //指定 MP3 编码器
+                    "-q:a", "4", //控制音质,数值越小质量越高
+                    out.toAbsolutePath().toString()
+            );
+            pb.redirectErrorStream(true);
+            Process p = pb.start();
+            try (InputStream err = p.getInputStream()) {
+                drainToLog(err);
+            }
+            int code = p.waitFor();
+            if (code != 0) {
+                throw new IllegalStateException("ffmpeg exit=" + code);
+            }
+            return Files.readAllBytes(out);
+        } finally {
+            try {
+                Files.deleteIfExists(in);
+            } catch (Exception ignored) {
+            }
+            try {
+                Files.deleteIfExists(out);
+            } catch (Exception ignored) {
+            }
+        }
+    }
+
+    private static void drainToLog(InputStream in) throws java.io.IOException {
+        byte[] buf = new byte[8192];
+        int n;
+        ByteArrayOutputStream acc = new ByteArrayOutputStream();
+        while ((n = in.read(buf)) != -1) {
+            acc.write(buf, 0, n);
+        }
+        byte[] all = acc.toByteArray();
+        if (all.length > 0 && log.isDebugEnabled()) {
+            String s = new String(all, StandardCharsets.UTF_8);
+            if (s.length() > 2000) {
+                s = s.substring(0, 2000) + "...";
+            }
+            log.debug("ffmpeg: {}", s);
+        }
+    }
+}

+ 15 - 0
fs-service/src/main/resources/application-config-dev.yml

@@ -100,6 +100,21 @@ cloud_host:
   projectCode: DEV
   spaceName:
   volcengineUrl:
+
+# 豆包语音 - 大模型录音文件识别
+doubao:
+  asr:
+    enabled: true
+    # 旧版控制台:APP ID -> 请求头 X-Api-App-Key
+    app-key: 9901314779
+    # 旧版控制台:Access Token -> 请求头 X-Api-Access-Key
+    access-token: ahvjLYU_CX86otk0ffpkQTGjQPz_iOvw
+    # 新版控制台:仅使用 API Key 时配置此项,并留空 app-key、access-token
+    # api-key: ''
+    resource-id: volc.seedasr.auc
+    default-audio-format: mp3
+    default-language: zh-CN
+
 headerImg:
   imgUrl: https://jz-cos-1356808054.cos.ap-chengdu.myqcloud.com/fs/20250515/0877754b59814ea8a428fa3697b20e68.png
 ipad:

+ 25 - 0
fs-service/src/main/resources/application-config-druid-ddgy.yml

@@ -89,6 +89,21 @@ cloud_host:
   projectCode: DDGY
   spaceName: ddgy-2114522511
   volcengineUrl: https://ddgyvolcengine.ylrztop.com
+
+# 豆包语音 - 大模型录音文件识别
+doubao:
+  asr:
+    enabled: true
+    # 旧版控制台:APP ID -> 请求头 X-Api-App-Key
+    app-key: 9901314779
+    # 旧版控制台:Access Token -> 请求头 X-Api-Access-Key
+    access-token: ahvjLYU_CX86otk0ffpkQTGjQPz_iOvw
+    # 新版控制台:仅使用 API Key 时配置此项,并留空 app-key、access-token
+    # api-key: ''
+    resource-id: volc.seedasr.auc
+    default-audio-format: mp3
+    default-language: zh-CN
+
 #看课授权时显示的头像
 headerImg:
   imgUrl: https://ddgy-1323137866.cos.ap-chongqing.myqcloud.com/fs/20251010/ddgy.jpg
@@ -102,3 +117,13 @@ wx_miniapp_temp:
   pay_order_temp_id:
   inquiry_temp_id:
 
+sysconfig:
+  # 包含哪些关键词的配置文件参数,将会被mask打码隐藏
+  hidden-key-list: api/app/token/key/secret/access
+  # 是否开启敏感参数隐藏功能
+  hide-secret: true
+  # 系统版本号
+  sysVersion: v20260217
+  # 是否开启登陆时选择业务组
+  show-dynamic-groupid: true
+