|
|
@@ -0,0 +1,181 @@
|
|
|
+package com.fs.aiSoundReplication;
|
|
|
+
|
|
|
+import com.fs.aiSoundReplication.param.StatusResponse;
|
|
|
+import com.fs.aiSoundReplication.param.TtsRequest;
|
|
|
+import com.fs.aiSoundReplication.param.TtsResponse;
|
|
|
+import com.fs.aiSoundReplication.param.UploadResponse;
|
|
|
+import com.fs.aiSoundReplication.service.TtsService;
|
|
|
+import com.fs.aiSoundReplication.service.VoiceCloneService;
|
|
|
+import com.fs.common.core.domain.R;
|
|
|
+import com.fs.fastgptApi.vo.AudioVO;
|
|
|
+import io.swagger.annotations.Api;
|
|
|
+import io.swagger.annotations.ApiOperation;
|
|
|
+import io.swagger.annotations.ApiParam;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.http.HttpHeaders;
|
|
|
+import org.springframework.http.MediaType;
|
|
|
+import org.springframework.http.ResponseEntity;
|
|
|
+import org.springframework.web.bind.annotation.*;
|
|
|
+import org.springframework.web.multipart.MultipartFile;
|
|
|
+
|
|
|
+import javax.annotation.Resource;
|
|
|
+import javax.servlet.http.HttpServletRequest;
|
|
|
+import java.io.File;
|
|
|
+import java.util.List;
|
|
|
+import java.util.UUID;
|
|
|
+
|
|
|
+@RestController
|
|
|
+@RequestMapping("/api/voice-clone")
|
|
|
+@Api(tags = "声音复刻API")
|
|
|
+public class VoiceCloneController {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private VoiceCloneService voiceCloneService;
|
|
|
+ @Autowired
|
|
|
+ private TtsService ttsService;
|
|
|
+
|
|
|
+ @PostMapping("/synthesize")
|
|
|
+ @ApiOperation("文本转语音")
|
|
|
+ public AudioVO synthesize(
|
|
|
+ @ApiParam(value = "TTS请求参数", required = true)
|
|
|
+ @RequestBody TtsRequest request) {
|
|
|
+ return ttsService.textToSpeech(request);
|
|
|
+ }
|
|
|
+
|
|
|
+ @PostMapping("/synthesize-simple")
|
|
|
+ @ApiOperation("简化版文本转语音")
|
|
|
+ public AudioVO synthesizeSimple(
|
|
|
+ @ApiParam(value = "要合成的文本", required = true)
|
|
|
+ @RequestParam String text,
|
|
|
+ @ApiParam(value = "音色ID", required = true)
|
|
|
+ @RequestParam String voiceType,
|
|
|
+ @ApiParam(value = "音频格式")
|
|
|
+ @RequestParam(required = false, defaultValue = "mp3") String format,
|
|
|
+ @ApiParam(value = "语速 (0-15)")
|
|
|
+ @RequestParam(required = false, defaultValue = "1") Integer speed
|
|
|
+ ) {
|
|
|
+
|
|
|
+ TtsRequest request = new TtsRequest(
|
|
|
+ "", "", voiceType, text); // AppID和Token会在Service中设置
|
|
|
+ request.setReqId(UUID.randomUUID().toString());
|
|
|
+ request.setFormat(format);
|
|
|
+ request.setSpeed(speed);
|
|
|
+ return ttsService.textToSpeech(request);
|
|
|
+ }
|
|
|
+
|
|
|
+// @PostMapping("/synthesize-and-download")
|
|
|
+// @ApiOperation("文本转语音并下载")
|
|
|
+// public R synthesizeAndDownload(
|
|
|
+// @ApiParam(value = "要合成的文本", required = true)
|
|
|
+// @RequestParam String text,
|
|
|
+// @ApiParam(value = "音色ID", required = true)
|
|
|
+// @RequestParam String voiceType,
|
|
|
+// HttpServletRequest httpRequest) {
|
|
|
+//
|
|
|
+// TtsRequest ttsRequest = new TtsRequest("", "", voiceType, text);
|
|
|
+// ttsRequest.setReqId(UUID.randomUUID().toString());
|
|
|
+//
|
|
|
+// String url = ttsService.textToSpeechStream(ttsRequest);
|
|
|
+//
|
|
|
+// return R.ok();
|
|
|
+// }
|
|
|
+
|
|
|
+// @PostMapping("/batch-synthesize")
|
|
|
+// @ApiOperation("批量文本转语音")
|
|
|
+// public ResponseEntity<List<File>> batchSynthesize(
|
|
|
+// @ApiParam(value = "文本列表", required = true)
|
|
|
+// @RequestBody List<String> texts,
|
|
|
+// @ApiParam(value = "音色ID", required = true)
|
|
|
+// @RequestParam String voiceType,
|
|
|
+// @ApiParam(value = "是否打包下载")
|
|
|
+// @RequestParam(required = false, defaultValue = "false") Boolean zip) {
|
|
|
+//
|
|
|
+// List<File> audioFiles = ttsService.batchTextToSpeech(texts, voiceType);
|
|
|
+//
|
|
|
+// if (zip && !audioFiles.isEmpty()) {
|
|
|
+// // 这里可以添加ZIP打包逻辑
|
|
|
+// // 返回ZIP文件的ResponseEntity
|
|
|
+// }
|
|
|
+//
|
|
|
+// return ResponseEntity.ok(audioFiles);
|
|
|
+// }
|
|
|
+
|
|
|
+// @PostMapping("/synthesize-with-params")
|
|
|
+// @ApiOperation("带参数的文本转语音")
|
|
|
+// public TtsResponse synthesizeWithParams(
|
|
|
+// @ApiParam(value = "音色ID", required = true) @RequestParam String voiceType,
|
|
|
+// @ApiParam(value = "文本内容", required = true) @RequestParam String text,
|
|
|
+// @ApiParam(value = "语速 (0-15)") @RequestParam(required = false) Integer speed,
|
|
|
+// @ApiParam(value = "音量 (0-15)") @RequestParam(required = false) Integer volume,
|
|
|
+// @ApiParam(value = "音高 (0-15)") @RequestParam(required = false) Integer pitch,
|
|
|
+// @ApiParam(value = "情感参数") @RequestParam(required = false) String emotion,
|
|
|
+// @ApiParam(value = "说话风格") @RequestParam(required = false) String speakingStyle) {
|
|
|
+//
|
|
|
+// TtsRequest request = new TtsRequest("", "", voiceType, text);
|
|
|
+// request.setReqId(UUID.randomUUID().toString());
|
|
|
+//
|
|
|
+// if (speed != null) request.setSpeed(speed);
|
|
|
+// if (volume != null) request.setVolume(volume);
|
|
|
+// if (pitch != null) request.setPitch(pitch);
|
|
|
+// if (emotion != null) request.setEmotion(emotion);
|
|
|
+// if (speakingStyle != null) request.setSpeakingStyle(speakingStyle);
|
|
|
+//
|
|
|
+// return ttsService.textToSpeech(request);
|
|
|
+// }
|
|
|
+
|
|
|
+ private String getContentType(String format) {
|
|
|
+ switch (format.toLowerCase()) {
|
|
|
+ case "mp3":
|
|
|
+ return "audio/mpeg";
|
|
|
+ case "wav":
|
|
|
+ return "audio/wav";
|
|
|
+ case "pcm":
|
|
|
+ return "audio/L16";
|
|
|
+ default:
|
|
|
+ return "application/octet-stream";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ @PostMapping("/upload")
|
|
|
+ @ApiOperation("上传音频训练音色")
|
|
|
+ public R uploadVoice(
|
|
|
+ @ApiParam(value = "音色ID", required = true) @RequestParam String speakerId,
|
|
|
+ @ApiParam(value = "音频文件", required = true) @RequestParam MultipartFile audioFile,
|
|
|
+ @ApiParam(value = "模型类型(1-ICL1.0, 4-ICL2.0)", defaultValue = "4")
|
|
|
+ @RequestParam(required = false) Integer modelType,
|
|
|
+ @ApiParam(value = "语种(0-中文, 1-英文)", defaultValue = "0")
|
|
|
+ @RequestParam(required = false) Integer language) {
|
|
|
+ return voiceCloneService.uploadVoice(speakerId, audioFile, modelType, language);
|
|
|
+ }
|
|
|
+
|
|
|
+ @GetMapping("/status/{speakerId}")
|
|
|
+ @ApiOperation("查询音色训练状态")
|
|
|
+ public StatusResponse getTrainingStatus(
|
|
|
+ @ApiParam(value = "音色ID", required = true)
|
|
|
+ @PathVariable String speakerId) {
|
|
|
+ return voiceCloneService.queryTrainingStatus(speakerId);
|
|
|
+ }
|
|
|
+
|
|
|
+// @PostMapping("/upload-and-wait")
|
|
|
+// @ApiOperation("上传并等待训练完成")
|
|
|
+// public StatusResponse uploadAndWait(
|
|
|
+// @ApiParam(value = "音色ID", required = true) @RequestParam String speakerId,
|
|
|
+// @ApiParam(value = "音频文件", required = true) @RequestParam MultipartFile audioFile,
|
|
|
+// @ApiParam(value = "模型类型", defaultValue = "4")
|
|
|
+// @RequestParam(required = false) Integer modelType,
|
|
|
+// @ApiParam(value = "语种", defaultValue = "0")
|
|
|
+// @RequestParam(required = false) Integer language,
|
|
|
+// @ApiParam(value = "最大等待时间(秒)", defaultValue = "600")
|
|
|
+// @RequestParam(required = false) Integer maxWaitSeconds) {
|
|
|
+//
|
|
|
+// // 1. 上传音频
|
|
|
+// UploadResponse uploadResponse = voiceCloneService.uploadVoice(
|
|
|
+// speakerId, audioFile, modelType, language);
|
|
|
+//
|
|
|
+// // 2. 计算轮询参数
|
|
|
+// int maxPollingTimes = maxWaitSeconds != null ? maxWaitSeconds * 1000 / 10000 : 60;
|
|
|
+//
|
|
|
+// // 3. 轮询训练状态
|
|
|
+// return voiceCloneService.pollTrainingStatus(
|
|
|
+// uploadResponse.getSpeakerId(), maxPollingTimes, 10000L);
|
|
|
+// }
|
|
|
+}
|