|
@@ -0,0 +1,587 @@
|
|
|
|
|
+package com.ruoyi.aicall.controller;
|
|
|
|
|
+
|
|
|
|
|
+import com.alibaba.fastjson.JSON;
|
|
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
+import com.ruoyi.cc.service.IFsConfService;
|
|
|
|
|
+import com.ruoyi.common.annotation.Log;
|
|
|
|
|
+import com.ruoyi.common.core.controller.BaseController;
|
|
|
|
|
+import com.ruoyi.common.core.domain.AjaxResult;
|
|
|
|
|
+import com.ruoyi.common.enums.BusinessType;
|
|
|
|
|
+import com.ruoyi.common.utils.StringUtils;
|
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
+import okhttp3.MediaType;
|
|
|
|
|
+import okhttp3.MultipartBody;
|
|
|
|
|
+import okhttp3.OkHttpClient;
|
|
|
|
|
+import okhttp3.Request;
|
|
|
|
|
+import okhttp3.RequestBody;
|
|
|
|
|
+import okhttp3.Response;
|
|
|
|
|
+import okhttp3.WebSocket;
|
|
|
|
|
+import okhttp3.WebSocketListener;
|
|
|
|
|
+import okio.Buffer;
|
|
|
|
|
+import okio.ByteString;
|
|
|
|
|
+import org.apache.shiro.authz.annotation.RequiresPermissions;
|
|
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
+import org.springframework.stereotype.Controller;
|
|
|
|
|
+import org.springframework.web.bind.annotation.GetMapping;
|
|
|
|
|
+import org.springframework.web.bind.annotation.PostMapping;
|
|
|
|
|
+import org.springframework.web.bind.annotation.RequestMapping;
|
|
|
|
|
+import org.springframework.web.bind.annotation.RequestParam;
|
|
|
|
|
+import org.springframework.web.bind.annotation.ResponseBody;
|
|
|
|
|
+import org.springframework.web.multipart.MultipartFile;
|
|
|
|
|
+
|
|
|
|
|
+import javax.crypto.Mac;
|
|
|
|
|
+import javax.crypto.spec.SecretKeySpec;
|
|
|
|
|
+import javax.servlet.http.HttpServletRequest;
|
|
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
|
|
+import java.io.IOException;
|
|
|
|
|
+import java.net.URLEncoder;
|
|
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
|
|
+import java.security.MessageDigest;
|
|
|
|
|
+import java.time.ZoneId;
|
|
|
|
|
+import java.time.ZonedDateTime;
|
|
|
|
|
+import java.time.format.DateTimeFormatter;
|
|
|
|
|
+import java.util.Base64;
|
|
|
|
|
+import java.util.LinkedHashMap;
|
|
|
|
|
+import java.util.Map;
|
|
|
|
|
+import java.util.concurrent.CountDownLatch;
|
|
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
|
|
+import java.util.concurrent.atomic.AtomicReference;
|
|
|
|
|
+
|
|
|
|
|
+/**
|
|
|
|
|
+ * 科大讯飞声音克隆工具
|
|
|
|
|
+ *
|
|
|
|
|
+ * @author ruoyi
|
|
|
|
|
+ * @date 2026-06-01
|
|
|
|
|
+ */
|
|
|
|
|
+@Controller
|
|
|
|
|
+@RequestMapping("/aicall/xfvoiceclone")
|
|
|
|
|
+@Slf4j
|
|
|
|
|
+public class XfVoiceCloneController extends BaseController {
|
|
|
|
|
+ private static final MediaType JSON_MEDIA_TYPE = MediaType.parse("application/json; charset=utf-8");
|
|
|
|
|
+ private static final MediaType OCTET_STREAM = MediaType.parse("application/octet-stream");
|
|
|
|
|
+ private static final String TOKEN_URL = "http://avatar-hci.xfyousheng.com/aiauth/v1/token";
|
|
|
|
|
+ private static final String TRAIN_TEXT_URL = "http://opentrain.xfyousheng.com/voice_train/task/traintext";
|
|
|
|
|
+ private static final String TASK_ADD_URL = "http://opentrain.xfyousheng.com/voice_train/task/add";
|
|
|
|
|
+ private static final String TASK_SUBMIT_WITH_AUDIO_URL = "http://opentrain.xfyousheng.com/voice_train/task/submitWithAudio";
|
|
|
|
|
+ private static final String TASK_RESULT_URL = "http://opentrain.xfyousheng.com/voice_train/task/result";
|
|
|
|
|
+ private static final String CLONE_TTS_WS_URL = "wss://cn-huabei-1.xf-yun.com/v1/private/voice_clone";
|
|
|
|
|
+ private static final String CLONE_TTS_HOST = "cn-huabei-1.xf-yun.com";
|
|
|
|
|
+ private static final String CLONE_TTS_PATH = "/v1/private/voice_clone";
|
|
|
|
|
+ private static final Long DEFAULT_TEXT_ID = 5001L;
|
|
|
|
|
+ private static final OkHttpClient HTTP_CLIENT = new OkHttpClient.Builder()
|
|
|
|
|
+ .connectTimeout(30, TimeUnit.SECONDS)
|
|
|
|
|
+ .readTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .writeTimeout(60, TimeUnit.SECONDS)
|
|
|
|
|
+ .build();
|
|
|
|
|
+ private final String prefix = "aicall/xfvoiceclone";
|
|
|
|
|
+
|
|
|
|
|
+ @Autowired
|
|
|
|
|
+ private IFsConfService fsConfService;
|
|
|
|
|
+
|
|
|
|
|
+ @RequiresPermissions("aicall:xfvoiceclone:view")
|
|
|
|
|
+ @GetMapping("voiceclone")
|
|
|
|
|
+ public String voiceClone() {
|
|
|
|
|
+ return prefix + "/voiceclone";
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @GetMapping("/getTrainTexts")
|
|
|
|
|
+ @ResponseBody
|
|
|
|
|
+ public AjaxResult getTrainTexts() {
|
|
|
|
|
+ try {
|
|
|
|
|
+ JSONObject account = loadCloneAccount();
|
|
|
|
|
+ String token = fetchAccessToken(account);
|
|
|
|
|
+ JSONArray textSegs = loadTrainTextsInternal(account, token);
|
|
|
|
|
+ return AjaxResult.success("success", textSegs);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("getTrainTexts error", e);
|
|
|
|
|
+ return AjaxResult.error("获取训练文本失败:\n" + e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @RequiresPermissions("aicall:xfvoiceclone:uploadAndTrain")
|
|
|
|
|
+ @Log(title = "讯飞声音克隆-上传训练", businessType = BusinessType.IMPORT)
|
|
|
|
|
+ @PostMapping("/uploadAndTrain")
|
|
|
|
|
+ @ResponseBody
|
|
|
|
|
+ public AjaxResult uploadAndTrain(HttpServletRequest request, @RequestParam("file") MultipartFile file) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ if (file == null || file.isEmpty()) {
|
|
|
|
|
+ return AjaxResult.error("请先选择录音文件");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject account = loadCloneAccount();
|
|
|
|
|
+ String token = fetchAccessToken(account);
|
|
|
|
|
+
|
|
|
|
|
+ String voiceName = request.getParameter("voice_name");
|
|
|
|
|
+ String taskName = defaultIfBlank(request.getParameter("task_name"), voiceName);
|
|
|
|
|
+ String resourceName = defaultIfBlank(request.getParameter("resource_name"), voiceName);
|
|
|
|
|
+ String language = normalizeLanguage(request.getParameter("language"));
|
|
|
|
|
+ String engineVersion = trimToEmpty(request.getParameter("engine_version"));
|
|
|
|
|
+ int sex = parseIntOrDefault(request.getParameter("sex"), 1);
|
|
|
|
|
+ int ageGroup = parseIntOrDefault(request.getParameter("age_group"), 2);
|
|
|
|
|
+ float mosRatio = parseFloatOrDefault(request.getParameter("mos_ratio"), 0.0F);
|
|
|
|
|
+ int denoise = parseIntOrDefault(request.getParameter("denoise"), 0);
|
|
|
|
|
+ String textSegId = request.getParameter("text_seg_id");
|
|
|
|
|
+
|
|
|
|
|
+ if (StringUtils.isBlank(voiceName)) {
|
|
|
|
|
+ return AjaxResult.error("请填写音色名称");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.isBlank(textSegId)) {
|
|
|
|
|
+ return AjaxResult.error("请选择训练文本");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String taskId = createTrainTask(account, token, taskName, resourceName, language, sex, ageGroup, mosRatio, denoise, engineVersion);
|
|
|
|
|
+ submitWithAudio(account, token, taskId, textSegId, mosRatio, file);
|
|
|
|
|
+ JSONObject taskResult = queryTaskResultInternal(account, token, taskId);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject data = new JSONObject(true);
|
|
|
|
|
+ data.put("taskId", taskId);
|
|
|
|
|
+ data.put("voiceName", voiceName);
|
|
|
|
|
+ data.put("language", language);
|
|
|
|
|
+ data.put("engineVersion", engineVersion);
|
|
|
|
|
+ if (taskResult != null) {
|
|
|
|
|
+ data.put("assetId", taskResult.getString("assetId"));
|
|
|
|
|
+ data.put("trainVid", taskResult.getString("trainVid"));
|
|
|
|
|
+ data.put("trainStatus", taskResult.getInteger("trainStatus"));
|
|
|
|
|
+ data.put("failedDesc", taskResult.getString("failedDesc"));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String tips = buildTrainStatusTips(taskResult);
|
|
|
|
|
+ return AjaxResult.success("训练请求已提交。\n" + tips, data);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("uploadAndTrain error", e);
|
|
|
|
|
+ return AjaxResult.error("训练失败:\n" + e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @PostMapping("/queryTrainResult")
|
|
|
|
|
+ @ResponseBody
|
|
|
|
|
+ public AjaxResult queryTrainResult(@RequestParam("taskId") String taskId) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ if (StringUtils.isBlank(taskId)) {
|
|
|
|
|
+ return AjaxResult.error("taskId不能为空");
|
|
|
|
|
+ }
|
|
|
|
|
+ JSONObject account = loadCloneAccount();
|
|
|
|
|
+ String token = fetchAccessToken(account);
|
|
|
|
|
+ JSONObject taskResult = queryTaskResultInternal(account, token, taskId);
|
|
|
|
|
+ return AjaxResult.success(buildTrainStatusTips(taskResult), taskResult);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("queryTrainResult error", e);
|
|
|
|
|
+ return AjaxResult.error("查询训练状态失败:\n" + e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @RequiresPermissions("aicall:xfvoiceclone:ttsTest")
|
|
|
|
|
+ @PostMapping("/ttsTest")
|
|
|
|
|
+ @ResponseBody
|
|
|
|
|
+ public AjaxResult ttsTest(HttpServletRequest request) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ String assetId = request.getParameter("assetId");
|
|
|
|
|
+ String text = request.getParameter("text");
|
|
|
|
|
+ String engineVersion = trimToEmpty(request.getParameter("engine_version"));
|
|
|
|
|
+ if (StringUtils.isBlank(assetId)) {
|
|
|
|
|
+ return AjaxResult.error("请先完成训练并获取音色ID");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.isBlank(text)) {
|
|
|
|
|
+ return AjaxResult.error("请输入测试文本");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject account = loadCloneAccount();
|
|
|
|
|
+ String audioBase64 = synthesizeCloneAudio(account, assetId, text, engineVersion);
|
|
|
|
|
+ JSONObject data = new JSONObject(true);
|
|
|
|
|
+ data.put("audioBase64", audioBase64);
|
|
|
|
|
+ data.put("audioMime", "audio/mp3");
|
|
|
|
|
+ return AjaxResult.success("试听合成成功", data);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("ttsTest error", e);
|
|
|
|
|
+ return AjaxResult.error("试听合成失败:\n" + e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONObject loadCloneAccount() {
|
|
|
|
|
+ JSONObject conf = fsConfService.getAsrConf("/autoload_configs/xf_tts.conf.xml");
|
|
|
|
|
+ String appId = trimToEmpty(conf.getString("app-id"));
|
|
|
|
|
+ String apiKey = trimToEmpty(conf.getString("api-key"));
|
|
|
|
|
+ String apiSecret = trimToEmpty(conf.getString("api-secret"));
|
|
|
|
|
+ if (StringUtils.isBlank(appId) || StringUtils.isBlank(apiKey) || StringUtils.isBlank(apiSecret)) {
|
|
|
|
|
+ throw new IllegalStateException("请先在“科大讯飞TTS配置”中填写 app-id、api-key、api-secret。注意这里必须使用已开通“讯飞一句话复刻”服务的应用凭证。");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject account = new JSONObject(true);
|
|
|
|
|
+ account.put("appId", appId);
|
|
|
|
|
+ account.put("apiKey", apiKey);
|
|
|
|
|
+ account.put("apiSecret", apiSecret);
|
|
|
|
|
+ return account;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String fetchAccessToken(JSONObject account) throws IOException {
|
|
|
|
|
+ long timestamp = System.currentTimeMillis();
|
|
|
|
|
+ JSONObject base = new JSONObject(true);
|
|
|
|
|
+ base.put("appid", account.getString("appId"));
|
|
|
|
|
+ base.put("version", "v1");
|
|
|
|
|
+ base.put("timestamp", String.valueOf(timestamp));
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject body = new JSONObject(true);
|
|
|
|
|
+ body.put("base", base);
|
|
|
|
|
+ body.put("model", "remote");
|
|
|
|
|
+ String bodyText = body.toJSONString();
|
|
|
|
|
+
|
|
|
|
|
+ String keySign = md5Hex(account.getString("apiKey") + timestamp);
|
|
|
|
|
+ String sign = md5Hex(keySign + bodyText);
|
|
|
|
|
+ Map<String, String> headers = new LinkedHashMap<>();
|
|
|
|
|
+ headers.put("Content-Type", "application/json");
|
|
|
|
|
+ headers.put("Authorization", sign);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject responseJson = postJson(TOKEN_URL, body, headers);
|
|
|
|
|
+ if (!"000000".equals(responseJson.getString("retcode"))) {
|
|
|
|
|
+ throw new IOException("讯飞鉴权失败: " + responseJson.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return responseJson.getString("accesstoken");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONArray loadTrainTextsInternal(JSONObject account, String token) throws IOException {
|
|
|
|
|
+ JSONObject body = new JSONObject(true);
|
|
|
|
|
+ body.put("textId", DEFAULT_TEXT_ID);
|
|
|
|
|
+ JSONObject responseJson = postJson(TRAIN_TEXT_URL, body,
|
|
|
|
|
+ buildVoiceTrainHeaders(account.getString("apiKey"), account.getString("appId"), token, body.toJSONString()));
|
|
|
|
|
+ if (responseJson.getIntValue("code") != 0) {
|
|
|
|
|
+ throw new IOException("获取训练文本失败: " + responseJson.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+ JSONObject data = responseJson.getJSONObject("data");
|
|
|
|
|
+ return data == null ? new JSONArray() : data.getJSONArray("textSegs");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String createTrainTask(JSONObject account, String token, String taskName, String resourceName,
|
|
|
|
|
+ String language, int sex, int ageGroup, float mosRatio,
|
|
|
|
|
+ int denoise, String engineVersion) throws IOException {
|
|
|
|
|
+ JSONObject body = new JSONObject(true);
|
|
|
|
|
+ body.put("taskName", taskName);
|
|
|
|
|
+ body.put("sex", sex);
|
|
|
|
|
+ body.put("ageGroup", ageGroup);
|
|
|
|
|
+ body.put("resourceType", 12);
|
|
|
|
|
+ body.put("thirdUser", "easycallcenter365");
|
|
|
|
|
+ body.put("denoise", denoise);
|
|
|
|
|
+ body.put("mosRatio", mosRatio);
|
|
|
|
|
+ body.put("resourceName", resourceName);
|
|
|
|
|
+ if (StringUtils.isNotBlank(language)) {
|
|
|
|
|
+ body.put("language", language);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.isNotBlank(engineVersion)) {
|
|
|
|
|
+ body.put("engineVersion", engineVersion);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject responseJson = postJson(TASK_ADD_URL, body,
|
|
|
|
|
+ buildVoiceTrainHeaders(account.getString("apiKey"), account.getString("appId"), token, body.toJSONString()));
|
|
|
|
|
+ if (responseJson.getIntValue("code") != 0) {
|
|
|
|
|
+ throw new IOException("创建训练任务失败: " + responseJson.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return responseJson.getString("data");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void submitWithAudio(JSONObject account, String token, String taskId, String textSegId,
|
|
|
|
|
+ float mosRatio, MultipartFile file) throws IOException {
|
|
|
|
|
+ RequestBody fileBody = RequestBody.create(OCTET_STREAM, file.getBytes());
|
|
|
|
|
+ MultipartBody.Builder builder = new MultipartBody.Builder().setType(MultipartBody.FORM)
|
|
|
|
|
+ .addFormDataPart("file", file.getOriginalFilename(), fileBody)
|
|
|
|
|
+ .addFormDataPart("taskId", taskId)
|
|
|
|
|
+ .addFormDataPart("textId", String.valueOf(DEFAULT_TEXT_ID))
|
|
|
|
|
+ .addFormDataPart("textSegId", textSegId);
|
|
|
|
|
+ if (mosRatio > 0) {
|
|
|
|
|
+ builder.addFormDataPart("mosRatio", String.valueOf(mosRatio));
|
|
|
|
|
+ }
|
|
|
|
|
+ MultipartBody body = builder.build();
|
|
|
|
|
+ Buffer buffer = new Buffer();
|
|
|
|
|
+ body.writeTo(buffer);
|
|
|
|
|
+ String bodyMd5 = md5Hex(buffer.readByteArray());
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> headers = new LinkedHashMap<>();
|
|
|
|
|
+ String requestTime = String.valueOf(System.currentTimeMillis());
|
|
|
|
|
+ headers.put("X-Time", requestTime);
|
|
|
|
|
+ headers.put("X-AppId", account.getString("appId"));
|
|
|
|
|
+ headers.put("X-Token", token);
|
|
|
|
|
+ headers.put("X-Sign", md5Hex(account.getString("apiKey") + requestTime + bodyMd5));
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject responseJson = postMultipart(TASK_SUBMIT_WITH_AUDIO_URL, body, headers);
|
|
|
|
|
+ if (responseJson.getIntValue("code") != 0) {
|
|
|
|
|
+ throw new IOException("上传训练音频失败: " + responseJson.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONObject queryTaskResultInternal(JSONObject account, String token, String taskId) throws IOException {
|
|
|
|
|
+ JSONObject body = new JSONObject(true);
|
|
|
|
|
+ body.put("taskId", taskId);
|
|
|
|
|
+ JSONObject responseJson = postJson(TASK_RESULT_URL, body,
|
|
|
|
|
+ buildVoiceTrainHeaders(account.getString("apiKey"), account.getString("appId"), token, body.toJSONString()));
|
|
|
|
|
+ if (responseJson.getIntValue("code") != 0) {
|
|
|
|
|
+ throw new IOException("查询训练任务失败: " + responseJson.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return responseJson.getJSONObject("data");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String synthesizeCloneAudio(JSONObject account, String assetId, String text, String engineVersion) throws Exception {
|
|
|
|
|
+ String authUrl = buildCloneWebsocketUrl(account.getString("apiKey"), account.getString("apiSecret"));
|
|
|
|
|
+ CountDownLatch latch = new CountDownLatch(1);
|
|
|
|
|
+ AtomicReference<String> errRef = new AtomicReference<>();
|
|
|
|
|
+ ByteArrayOutputStream audioOutput = new ByteArrayOutputStream();
|
|
|
|
|
+
|
|
|
|
|
+ Request request = new Request.Builder().url(authUrl).build();
|
|
|
|
|
+ HTTP_CLIENT.newWebSocket(request, new WebSocketListener() {
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void onOpen(WebSocket webSocket, Response response) {
|
|
|
|
|
+ JSONObject payload = buildCloneTtsPayload(account.getString("appId"), assetId, text, engineVersion);
|
|
|
|
|
+ webSocket.send(payload.toJSONString());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void onMessage(WebSocket webSocket, String textMessage) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ JSONObject json = JSON.parseObject(textMessage);
|
|
|
|
|
+ int code = json.getIntValue("code");
|
|
|
|
|
+ if (code != 0) {
|
|
|
|
|
+ errRef.set("讯飞试音接口返回错误: " + json.toJSONString());
|
|
|
|
|
+ webSocket.close(1000, "error");
|
|
|
|
|
+ latch.countDown();
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject payload = json.getJSONObject("payload");
|
|
|
|
|
+ if (payload == null) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ JSONObject audio = payload.getJSONObject("audio");
|
|
|
|
|
+ if (audio == null) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ String audioChunk = audio.getString("audio");
|
|
|
|
|
+ if (StringUtils.isNotBlank(audioChunk)) {
|
|
|
|
|
+ audioOutput.write(Base64.getDecoder().decode(audioChunk));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (audio.getIntValue("status") == 2) {
|
|
|
|
|
+ webSocket.close(1000, "done");
|
|
|
|
|
+ latch.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ errRef.set("解析试听音频失败: " + e.getMessage());
|
|
|
|
|
+ webSocket.close(1000, "parse-error");
|
|
|
|
|
+ latch.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void onMessage(WebSocket webSocket, ByteString bytes) {
|
|
|
|
|
+ onMessage(webSocket, bytes.utf8());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void onFailure(WebSocket webSocket, Throwable t, Response response) {
|
|
|
|
|
+ errRef.set(buildWebsocketFailureMessage(t, response));
|
|
|
|
|
+ latch.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public void onClosed(WebSocket webSocket, int code, String reason) {
|
|
|
|
|
+ latch.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ if (!latch.await(45, TimeUnit.SECONDS)) {
|
|
|
|
|
+ throw new IOException("试听超时,请稍后再试");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.isNotBlank(errRef.get())) {
|
|
|
|
|
+ throw new IOException(errRef.get());
|
|
|
|
|
+ }
|
|
|
|
|
+ if (audioOutput.size() == 0) {
|
|
|
|
|
+ throw new IOException("未收到试听音频数据");
|
|
|
|
|
+ }
|
|
|
|
|
+ return Base64.getEncoder().encodeToString(audioOutput.toByteArray());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONObject buildCloneTtsPayload(String appId, String assetId, String text, String engineVersion) {
|
|
|
|
|
+ JSONObject root = new JSONObject(true);
|
|
|
|
|
+ JSONObject header = new JSONObject(true);
|
|
|
|
|
+ header.put("app_id", appId);
|
|
|
|
|
+ header.put("status", 2);
|
|
|
|
|
+ header.put("res_id", assetId);
|
|
|
|
|
+ root.put("header", header);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject audio = new JSONObject(true);
|
|
|
|
|
+ audio.put("encoding", "lame");
|
|
|
|
|
+ audio.put("sample_rate", 24000);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject tts = new JSONObject(true);
|
|
|
|
|
+ tts.put("vcn", "omni_v1".equalsIgnoreCase(engineVersion) ? "x6_clone" : "x5_clone");
|
|
|
|
|
+ tts.put("volume", 50);
|
|
|
|
|
+ tts.put("rhy", 0);
|
|
|
|
|
+ tts.put("pybuffer", 1);
|
|
|
|
|
+ tts.put("speed", 50);
|
|
|
|
|
+ tts.put("pitch", 50);
|
|
|
|
|
+ tts.put("bgs", 0);
|
|
|
|
|
+ tts.put("reg", 0);
|
|
|
|
|
+ tts.put("rdn", 0);
|
|
|
|
|
+ tts.put("audio", audio);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject parameter = new JSONObject(true);
|
|
|
|
|
+ parameter.put("tts", tts);
|
|
|
|
|
+ root.put("parameter", parameter);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject textNode = new JSONObject(true);
|
|
|
|
|
+ textNode.put("encoding", "utf8");
|
|
|
|
|
+ textNode.put("compress", "raw");
|
|
|
|
|
+ textNode.put("format", "plain");
|
|
|
|
|
+ textNode.put("status", 2);
|
|
|
|
|
+ textNode.put("seq", 0);
|
|
|
|
|
+ textNode.put("text", text);
|
|
|
|
|
+
|
|
|
|
|
+ JSONObject payload = new JSONObject(true);
|
|
|
|
|
+ payload.put("text", textNode);
|
|
|
|
|
+ root.put("payload", payload);
|
|
|
|
|
+ return root;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String buildCloneWebsocketUrl(String apiKey, String apiSecret) throws Exception {
|
|
|
|
|
+ String date = DateTimeFormatter.RFC_1123_DATE_TIME.format(ZonedDateTime.now(ZoneId.of("GMT")));
|
|
|
|
|
+ String signatureOrigin = "host: " + CLONE_TTS_HOST + "\n" +
|
|
|
|
|
+ "date: " + date + "\n" +
|
|
|
|
|
+ "GET " + CLONE_TTS_PATH + " HTTP/1.1";
|
|
|
|
|
+ Mac mac = Mac.getInstance("HmacSHA256");
|
|
|
|
|
+ mac.init(new SecretKeySpec(apiSecret.getBytes(StandardCharsets.UTF_8), "HmacSHA256"));
|
|
|
|
|
+ String signature = Base64.getEncoder().encodeToString(mac.doFinal(signatureOrigin.getBytes(StandardCharsets.UTF_8)));
|
|
|
|
|
+ String authorizationOrigin = String.format("api_key=\"%s\", algorithm=\"hmac-sha256\", headers=\"host date request-line\", signature=\"%s\"",
|
|
|
|
|
+ apiKey, signature);
|
|
|
|
|
+ String authorization = Base64.getEncoder().encodeToString(authorizationOrigin.getBytes(StandardCharsets.UTF_8));
|
|
|
|
|
+ return CLONE_TTS_WS_URL + "?authorization=" + urlEncode(authorization) +
|
|
|
|
|
+ "&date=" + urlEncode(date) +
|
|
|
|
|
+ "&host=" + urlEncode(CLONE_TTS_HOST);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String buildWebsocketFailureMessage(Throwable t, Response response) {
|
|
|
|
|
+ StringBuilder sb = new StringBuilder("讯飞试听WebSocket失败");
|
|
|
|
|
+ if (response != null) {
|
|
|
|
|
+ sb.append(": HTTP ").append(response.code());
|
|
|
|
|
+ String bodyText = "";
|
|
|
|
|
+ try {
|
|
|
|
|
+ bodyText = response.body() == null ? "" : response.body().string();
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.warn("read xfvoiceclone websocket error body failed", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.isNotBlank(bodyText)) {
|
|
|
|
|
+ sb.append(", body=").append(bodyText);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (response.code() == 403) {
|
|
|
|
|
+ sb.append("。请确认当前 app-id/api-key/api-secret 是否属于“讯飞一句话复刻”服务应用,且该应用已开通 voice_clone/一句话复刻权限;普通在线TTS应用凭证无法调用该接口。");
|
|
|
|
|
+ }
|
|
|
|
|
+ } else if (t != null && StringUtils.isNotBlank(t.getMessage())) {
|
|
|
|
|
+ sb.append(": ").append(t.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ return sb.toString();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, String> buildVoiceTrainHeaders(String apiKey, String appId, String token, String bodyText) {
|
|
|
|
|
+ Map<String, String> headers = new LinkedHashMap<>();
|
|
|
|
|
+ String requestTime = String.valueOf(System.currentTimeMillis());
|
|
|
|
|
+ headers.put("X-Time", requestTime);
|
|
|
|
|
+ headers.put("X-AppId", appId);
|
|
|
|
|
+ headers.put("X-Token", token);
|
|
|
|
|
+ headers.put("X-Sign", md5Hex(apiKey + requestTime + md5Hex(bodyText)));
|
|
|
|
|
+ return headers;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONObject postJson(String url, JSONObject body, Map<String, String> headers) throws IOException {
|
|
|
|
|
+ RequestBody requestBody = RequestBody.create(JSON_MEDIA_TYPE, body.toJSONString());
|
|
|
|
|
+ Request.Builder requestBuilder = new Request.Builder().url(url).post(requestBody);
|
|
|
|
|
+ for (Map.Entry<String, String> header : headers.entrySet()) {
|
|
|
|
|
+ requestBuilder.addHeader(header.getKey(), header.getValue());
|
|
|
|
|
+ }
|
|
|
|
|
+ try (Response response = HTTP_CLIENT.newCall(requestBuilder.build()).execute()) {
|
|
|
|
|
+ String bodyText = response.body() == null ? "" : response.body().string();
|
|
|
|
|
+ log.info("xfvoiceclone http {} status={} body={}", url, response.code(), bodyText);
|
|
|
|
|
+ if (!response.isSuccessful()) {
|
|
|
|
|
+ throw new IOException("HTTP " + response.code() + ": " + bodyText);
|
|
|
|
|
+ }
|
|
|
|
|
+ return JSON.parseObject(bodyText);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private JSONObject postMultipart(String url, MultipartBody body, Map<String, String> headers) throws IOException {
|
|
|
|
|
+ Request.Builder requestBuilder = new Request.Builder().url(url).post(body);
|
|
|
|
|
+ for (Map.Entry<String, String> header : headers.entrySet()) {
|
|
|
|
|
+ requestBuilder.addHeader(header.getKey(), header.getValue());
|
|
|
|
|
+ }
|
|
|
|
|
+ try (Response response = HTTP_CLIENT.newCall(requestBuilder.build()).execute()) {
|
|
|
|
|
+ String bodyText = response.body() == null ? "" : response.body().string();
|
|
|
|
|
+ log.info("xfvoiceclone multipart {} status={} body={}", url, response.code(), bodyText);
|
|
|
|
|
+ if (!response.isSuccessful()) {
|
|
|
|
|
+ throw new IOException("HTTP " + response.code() + ": " + bodyText);
|
|
|
|
|
+ }
|
|
|
|
|
+ return JSON.parseObject(bodyText);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String buildTrainStatusTips(JSONObject taskResult) {
|
|
|
|
|
+ if (taskResult == null) {
|
|
|
|
|
+ return "训练任务已提交,请稍后查询结果。";
|
|
|
|
|
+ }
|
|
|
|
|
+ int trainStatus = taskResult.getIntValue("trainStatus");
|
|
|
|
|
+ if (trainStatus == 1) {
|
|
|
|
|
+ return "训练成功,音色ID: " + taskResult.getString("assetId");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (trainStatus == -1) {
|
|
|
|
|
+ return "训练中,请等待30-60秒后再次查询。";
|
|
|
|
|
+ }
|
|
|
|
|
+ if (trainStatus == 2) {
|
|
|
|
|
+ return "任务已创建,等待训练中。";
|
|
|
|
|
+ }
|
|
|
|
|
+ return "训练失败: " + defaultIfBlank(taskResult.getString("failedDesc"), "未知原因");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String trimToEmpty(String value) {
|
|
|
|
|
+ return value == null ? "" : value.trim();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String defaultIfBlank(String value, String defaultValue) {
|
|
|
|
|
+ return StringUtils.isBlank(value) ? defaultValue : value.trim();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private int parseIntOrDefault(String value, int defaultValue) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ return Integer.parseInt(trimToEmpty(value));
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ return defaultValue;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private float parseFloatOrDefault(String value, float defaultValue) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ return Float.parseFloat(trimToEmpty(value));
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ return defaultValue;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String normalizeLanguage(String language) {
|
|
|
|
|
+ String value = trimToEmpty(language);
|
|
|
|
|
+ return "zh".equalsIgnoreCase(value) ? "" : value;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String urlEncode(String value) throws Exception {
|
|
|
|
|
+ return URLEncoder.encode(value, "UTF-8");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String md5Hex(String value) {
|
|
|
|
|
+ return md5Hex(value.getBytes(StandardCharsets.UTF_8));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private String md5Hex(byte[] bytes) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ MessageDigest md = MessageDigest.getInstance("MD5");
|
|
|
|
|
+ byte[] digest = md.digest(bytes);
|
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
|
+ for (byte b : digest) {
|
|
|
|
|
+ sb.append(String.format("%02x", b & 0xff));
|
|
|
|
|
+ }
|
|
|
|
|
+ return sb.toString();
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ throw new IllegalStateException("MD5计算失败", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+}
|