Просмотр исходного кода

添加语言转文字功能,合并隐患提示(图片可以点击对应物体)

zkn 1 месяц назад
Родитель
Сommit
66615c772d

+ 1 - 0
shudao-chat-py/requirements.txt

@@ -15,3 +15,4 @@ python-jose[cryptography]==3.3.0
 cryptography==46.0.6
 oss2==2.18.4
 openai==2.30.0
+tencentcloud-sdk-python==3.0.1413

+ 2 - 1
shudao-chat-py/routers/__init__.py

@@ -4,7 +4,7 @@ from fastapi import APIRouter
 api_router = APIRouter(prefix="/apiv1")
 
 # 导入各个路由模块
-from . import chat, total, scene, tracking, file, knowledge, exam, auth, points, hazard, new_apis, report_compat
+from . import chat, total, scene, tracking, file, knowledge, exam, auth, points, hazard, new_apis, report_compat, speech
 
 # 注册路由
 api_router.include_router(auth.router, prefix="/auth", tags=["认证"])
@@ -18,6 +18,7 @@ api_router.include_router(knowledge.router, tags=["知识库"])
 api_router.include_router(exam.router, tags=["考试"])
 api_router.include_router(hazard.router, tags=["隐患识别"])
 api_router.include_router(new_apis.router, tags=["新增补充接口"])
+api_router.include_router(speech.router, tags=["语音"])
 
 # 注册报告兼容路由(不带前缀,因为 report_compat.router 已经有 /apiv1 前缀)
 from fastapi import FastAPI

+ 98 - 0
shudao-chat-py/routers/speech.py

@@ -0,0 +1,98 @@
+import asyncio
+
+from fastapi import APIRouter, File, Form, Request, UploadFile
+from fastapi.responses import JSONResponse, Response
+from pydantic import BaseModel
+
+from services.tencent_speech_service import tencent_speech_service
+from utils.logger import logger
+
+
+router = APIRouter(prefix="/speech")
+
+
+class SpeechSynthesizeRequest(BaseModel):
+    text: str = ""
+    voice_type: int | None = None
+    speed: float | None = None
+    volume: float | None = None
+
+
+def _speech_error_response(message: str, exc: Exception) -> JSONResponse:
+    detail = str(exc)
+    status_code = 503 if "PkgExhausted" in detail else 500
+    return JSONResponse(
+        status_code=status_code,
+        content={
+            "statusCode": status_code,
+            "msg": f"{message}: {detail}",
+        },
+    )
+
+
+@router.post("/transcribe")
+async def transcribe_audio(
+    request: Request,
+    file: UploadFile = File(...),
+    user_id: str = Form(""),
+):
+    user = request.state.user
+    if not user:
+        return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
+
+    if not file or not file.filename:
+        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "缺少音频文件"})
+
+    try:
+        result = await asyncio.to_thread(
+            tencent_speech_service.transcribe_file,
+            file,
+            user_id or str(getattr(user, "user_id", "")),
+        )
+        return {
+            "statusCode": 200,
+            "msg": "success",
+            "data": {
+                "text": result["text"],
+                "request_id": result.get("request_id", ""),
+            },
+        }
+    except ValueError as exc:
+        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
+    except Exception as exc:
+        logger.error("[speech] transcribe failed: %s", exc)
+        return _speech_error_response("语音转文字失败", exc)
+
+
+@router.post("/synthesize")
+async def synthesize_speech(request: Request, data: SpeechSynthesizeRequest):
+    user = request.state.user
+    if not user:
+        return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
+
+    normalized_text = (data.text or "").strip()
+    if not normalized_text:
+        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "播报文本不能为空"})
+
+    try:
+        result = await asyncio.to_thread(
+            tencent_speech_service.synthesize_text,
+            normalized_text,
+            voice_type=data.voice_type,
+            speed=data.speed,
+            volume=data.volume,
+        )
+        headers = {
+            "X-Speech-Request-Id": result.get("request_id", ""),
+            "Cache-Control": "no-store",
+        }
+        return Response(
+            content=result["audio_bytes"],
+            media_type=result["content_type"],
+            headers=headers,
+        )
+    except ValueError as exc:
+        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
+    except Exception as exc:
+        logger.error("[speech] synthesize failed: %s", exc)
+        return _speech_error_response("语音播报失败", exc)

+ 204 - 0
shudao-chat-py/services/tencent_speech_service.py

@@ -0,0 +1,204 @@
+import base64
+import json
+import time
+from typing import Any
+
+from utils.config import settings
+from utils.logger import logger
+
+
+CONTENT_TYPE_BY_CODEC = {
+    "mp3": "audio/mpeg",
+    "wav": "audio/wav",
+    "pcm": "audio/pcm",
+}
+
+
+class TencentSpeechService:
+    def __init__(self):
+        self.config = settings.speech
+
+    def _ensure_available(self):
+        if not self.config.enabled:
+            raise ValueError("语音服务未启用")
+        if self.config.provider != "tencent_cloud":
+            raise ValueError(f"暂不支持的语音服务商: {self.config.provider}")
+
+        credentials = self.config.tencent
+        if not credentials.app_id or not credentials.secret_id or not credentials.secret_key:
+            raise ValueError("腾讯云语音服务凭证未配置")
+
+    def _load_tencent_sdk(self):
+        try:
+            from tencentcloud.common import credential
+            from tencentcloud.common.exception.tencent_cloud_sdk_exception import (
+                TencentCloudSDKException,
+            )
+            from tencentcloud.asr.v20190614 import asr_client, models as asr_models
+            from tencentcloud.tts.v20190823 import tts_client, models as tts_models
+        except ImportError as exc:
+            raise RuntimeError(
+                "未安装腾讯云 SDK,请先安装 tencentcloud-sdk-python"
+            ) from exc
+
+        return {
+            "credential": credential,
+            "TencentCloudSDKException": TencentCloudSDKException,
+            "asr_client": asr_client,
+            "asr_models": asr_models,
+            "tts_client": tts_client,
+            "tts_models": tts_models,
+        }
+
+    def _build_credentials(self, credential_module):
+        cfg = self.config.tencent
+        return credential_module.Credential(cfg.secret_id, cfg.secret_key)
+
+    @staticmethod
+    def _read_upload_bytes(upload_file) -> bytes:
+        source = getattr(upload_file, "file", None)
+        if source is None:
+            raise ValueError("未读取到音频文件")
+
+        if hasattr(source, "seek"):
+            source.seek(0)
+
+        data = source.read()
+        if hasattr(source, "seek"):
+            source.seek(0)
+
+        if not data:
+            raise ValueError("音频文件为空")
+
+        return data
+
+    def transcribe_file(self, upload_file, user_id: str = "", request_id: str | None = None) -> dict[str, Any]:
+        self._ensure_available()
+
+        if not getattr(self.config.transcribe, "enabled", True):
+            raise ValueError("语音转文字未启用")
+
+        audio_bytes = self._read_upload_bytes(upload_file)
+        max_size_bytes = self.config.transcribe.max_audio_size_mb * 1024 * 1024
+        if len(audio_bytes) > max_size_bytes:
+            raise ValueError(
+                f"音频文件超过 {self.config.transcribe.max_audio_size_mb}MB,当前版本请缩短录音后重试"
+            )
+
+        sdk = self._load_tencent_sdk()
+        credentials = self._build_credentials(sdk["credential"])
+        client = sdk["asr_client"].AsrClient(credentials, self.config.tencent.region)
+        models = sdk["asr_models"]
+
+        usr_audio_key = request_id or f"{user_id or 'user'}-{int(time.time() * 1000)}"
+        params = {
+            "ProjectId": 0,
+            "SubServiceType": 2,
+            "EngSerViceType": self.config.transcribe.engine_model_type,
+            "SourceType": self.config.transcribe.source_type,
+            "VoiceFormat": self.config.transcribe.voice_format,
+            "UsrAudioKey": usr_audio_key,
+            "Data": base64.b64encode(audio_bytes).decode("utf-8"),
+            "DataLen": len(audio_bytes),
+            "WordInfo": self.config.transcribe.word_info,
+            "FilterDirty": self.config.transcribe.filter_dirty,
+            "FilterModal": self.config.transcribe.filter_modal,
+            "FilterPunc": self.config.transcribe.filter_punc,
+            "ConvertNumMode": self.config.transcribe.convert_num_mode,
+        }
+
+        if self.config.transcribe.hotword_id:
+            params["HotwordId"] = self.config.transcribe.hotword_id
+        elif self.config.transcribe.hotword_list:
+            params["HotwordList"] = self.config.transcribe.hotword_list
+
+        if self.config.transcribe.customization_id:
+            params["CustomizationId"] = self.config.transcribe.customization_id
+
+        if self.config.transcribe.replace_text_id:
+            params["ReplaceTextId"] = self.config.transcribe.replace_text_id
+
+        req = models.SentenceRecognitionRequest()
+        req.from_json_string(json.dumps(params))
+
+        try:
+            resp = client.SentenceRecognition(req)
+            text = getattr(resp, "Result", "") or ""
+            request_id = getattr(resp, "RequestId", "")
+            if not text.strip():
+                raise RuntimeError("腾讯云语音识别未返回文本内容")
+
+            return {
+                "text": text.strip(),
+                "request_id": request_id,
+                "raw": resp.to_json_string(),
+            }
+        except sdk["TencentCloudSDKException"] as exc:
+            logger.error("[speech] Tencent ASR failed: %s", exc)
+            raise RuntimeError(f"腾讯云语音识别失败: {exc}") from exc
+
+    def synthesize_text(
+        self,
+        text: str,
+        *,
+        session_id: str | None = None,
+        voice_type: int | None = None,
+        speed: float | None = None,
+        volume: float | None = None,
+    ) -> dict[str, Any]:
+        self._ensure_available()
+
+        if not getattr(self.config.synthesize, "enabled", True):
+            raise ValueError("语音播报未启用")
+
+        normalized_text = (text or "").strip()
+        if not normalized_text:
+            raise ValueError("播报文本不能为空")
+
+        if len(normalized_text) > self.config.synthesize.basic_text_limit_chars:
+            raise ValueError(
+                f"播报文本超过 {self.config.synthesize.basic_text_limit_chars} 字,请先分段后重试"
+            )
+
+        sdk = self._load_tencent_sdk()
+        credentials = self._build_credentials(sdk["credential"])
+        client = sdk["tts_client"].TtsClient(credentials, self.config.tencent.region)
+        models = sdk["tts_models"]
+
+        codec = self.config.synthesize.codec.lower()
+        params = {
+            "Text": normalized_text,
+            "SessionId": session_id or f"speech-{int(time.time() * 1000)}",
+            "ProjectId": 0,
+            "ModelType": 1,
+            "Volume": volume if volume is not None else self.config.synthesize.volume,
+            "Speed": speed if speed is not None else self.config.synthesize.speed,
+            "VoiceType": voice_type if voice_type is not None else self.config.synthesize.voice_type,
+            "SampleRate": self.config.synthesize.sample_rate,
+            "Codec": codec,
+            "PrimaryLanguage": self.config.synthesize.primary_language,
+            "EnableSubtitle": self.config.synthesize.enable_subtitle,
+        }
+
+        req = models.TextToVoiceRequest()
+        req.from_json_string(json.dumps(params))
+
+        try:
+            resp = client.TextToVoice(req)
+            audio_base64 = getattr(resp, "Audio", "") or ""
+            request_id = getattr(resp, "RequestId", "")
+            if not audio_base64:
+                raise RuntimeError("腾讯云语音合成未返回音频内容")
+
+            return {
+                "audio_bytes": base64.b64decode(audio_base64),
+                "content_type": CONTENT_TYPE_BY_CODEC.get(codec, "application/octet-stream"),
+                "request_id": request_id,
+                "raw": resp.to_json_string(),
+            }
+        except sdk["TencentCloudSDKException"] as exc:
+            logger.error("[speech] Tencent TTS failed: %s", exc)
+            raise RuntimeError(f"腾讯云语音合成失败: {exc}") from exc
+
+
+tencent_speech_service = TencentSpeechService()

+ 84 - 0
shudao-chat-py/tests/test_speech_router.py

@@ -0,0 +1,84 @@
+import importlib.util
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import Mock, patch
+
+from fastapi import UploadFile
+from starlette.responses import JSONResponse
+
+
+SPEECH_PATH = Path(__file__).resolve().parents[1] / "routers" / "speech.py"
+spec = importlib.util.spec_from_file_location("speech_under_test", SPEECH_PATH)
+speech = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(speech)
+
+
+class SpeechRouterTests(unittest.IsolatedAsyncioTestCase):
+    def _request(self, user_id=70430):
+        return SimpleNamespace(state=SimpleNamespace(user=SimpleNamespace(user_id=user_id)))
+
+    async def test_transcribe_audio_returns_text_payload(self):
+        upload = UploadFile(filename="sample.wav", file=None)
+
+        with patch.object(
+            speech.tencent_speech_service,
+            "transcribe_file",
+            Mock(return_value={"text": "桥梁施工注意事项", "request_id": "req-1"}),
+        ):
+            response = await speech.transcribe_audio(
+                self._request(),
+                file=upload,
+                user_id="web-user",
+            )
+
+        self.assertEqual(response["statusCode"], 200)
+        self.assertEqual(response["data"]["text"], "桥梁施工注意事项")
+        self.assertEqual(response["data"]["request_id"], "req-1")
+
+    async def test_synthesize_speech_returns_audio_response(self):
+        with patch.object(
+            speech.tencent_speech_service,
+            "synthesize_text",
+            Mock(
+                return_value={
+                    "audio_bytes": b"fake-mp3",
+                    "content_type": "audio/mpeg",
+                    "request_id": "req-2",
+                }
+            ),
+        ):
+            response = await speech.synthesize_speech(
+                self._request(),
+                speech.SpeechSynthesizeRequest(text="测试播报"),
+            )
+
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(response.media_type, "audio/mpeg")
+        self.assertEqual(response.headers["X-Speech-Request-Id"], "req-2")
+
+    async def test_synthesize_speech_returns_503_when_tencent_quota_is_exhausted(self):
+        with patch.object(
+            speech.tencent_speech_service,
+            "synthesize_text",
+            Mock(
+                side_effect=RuntimeError(
+                    "腾讯云语音合成失败: "
+                    "[TencentCloudSDKException] code:UnsupportedOperation.PkgExhausted "
+                    "message:The resource pack allowance has been exhausted"
+                )
+            ),
+        ):
+            response = await speech.synthesize_speech(
+                self._request(),
+                speech.SpeechSynthesizeRequest(text="测试播报"),
+            )
+
+        self.assertIsInstance(response, JSONResponse)
+        self.assertEqual(response.status_code, 503)
+        self.assertIn('"statusCode":503', response.body.decode("utf-8"))
+        self.assertIn("PkgExhausted", response.body.decode("utf-8"))
+
+
+if __name__ == "__main__":
+    unittest.main()

+ 87 - 0
shudao-chat-py/utils/config.py

@@ -68,6 +68,92 @@ class AIChatConfig(BaseSettings):
     timeout: int = 600
 
 
+class SpeechProcurementConfig(BaseSettings):
+    required_cloud_products: list[str] = [
+        "asr_sentence_recognition",
+        "asr_flash_file_recognition",
+        "tts_text_to_voice",
+        "tts_long_text",
+    ]
+    required_credentials: list[str] = [
+        "app_id",
+        "secret_id",
+        "secret_key",
+    ]
+    optional_cloud_products: list[str] = [
+        "cos",
+    ]
+    notes: list[str] = [
+        "sentence_recognition_for_audio_within_60s_and_3mb",
+        "flash_file_recognition_for_longer_audio_fallback",
+        "text_to_voice_for_default_tts",
+        "long_text_tts_for_content_over_150_chinese_chars",
+    ]
+
+
+class SpeechTencentConfig(BaseSettings):
+    app_id: str = ""
+    secret_id: str = ""
+    secret_key: str = ""
+    region: str = "ap-guangzhou"
+    asr_endpoint: str = "asr.tencentcloudapi.com"
+    tts_endpoint: str = "tts.tencentcloudapi.com"
+
+
+class SpeechTranscribeConfig(BaseSettings):
+    enabled: bool = True
+    default_api: str = "SentenceRecognition"
+    fallback_api: str = "CreateRecTask"
+    engine_model_type: str = "16k_zh"
+    source_type: int = 1
+    voice_format: str = "wav"
+    input_sample_rate: int = 16000
+    max_audio_seconds: int = 60
+    max_audio_size_mb: int = 3
+    word_info: int = 0
+    filter_dirty: int = 0
+    filter_modal: int = 0
+    filter_punc: int = 0
+    convert_num_mode: int = 1
+    hotword_list: str = ""
+    hotword_id: str = ""
+    customization_id: str = ""
+    replace_text_id: str = ""
+
+
+class SpeechSynthesizeConfig(BaseSettings):
+    enabled: bool = True
+    default_api: str = "TextToVoice"
+    fallback_api: str = "CreateTtsTask"
+    primary_language: int = 1
+    voice_type: int = 1001
+    sample_rate: int = 16000
+    codec: str = "mp3"
+    speed: float = 0.0
+    volume: float = 0.0
+    enable_subtitle: bool = False
+    segment_rate: int = 0
+    emotion_category: str = ""
+    emotion_intensity: int = 100
+    basic_text_limit_chars: int = 150
+    long_text_trigger_chars: int = 120
+    long_text_max_chars: int = 100000
+    long_text_codec: str = "mp3"
+    callback_url: str = ""
+
+
+class SpeechConfig(BaseSettings):
+    enabled: bool = True
+    provider: str = "tencent_cloud"
+    integration_mode: str = "backend_direct"
+    backend_service: str = "shudao-chat-py"
+    request_timeout_seconds: int = 60
+    procurement: SpeechProcurementConfig = SpeechProcurementConfig()
+    tencent: SpeechTencentConfig = SpeechTencentConfig()
+    transcribe: SpeechTranscribeConfig = SpeechTranscribeConfig()
+    synthesize: SpeechSynthesizeConfig = SpeechSynthesizeConfig()
+
+
 class ThinkingSummaryConfig(BaseSettings):
     """思考过程二次总结(方案三)配置"""
     enabled: bool = True
@@ -104,6 +190,7 @@ class Settings:
         self.auth = AuthConfig(**config_data.get('auth', {}))
         self.oss = OSSConfig(**config_data.get('oss', {}))
         self.aichat = AIChatConfig(**config_data.get('aichat', {}))
+        self.speech = SpeechConfig(**config_data.get('speech', {}))
         self.thinking_summary = ThinkingSummaryConfig(**config_data.get('thinking_summary', {}))
         self.base_url = config_data.get(
             'base_url', 'https://aqai.shudaodsj.com:22001')

+ 176 - 116
shudao-vue-frontend/src/composables/useSpeechRecognition.js

@@ -1,9 +1,16 @@
 import { ref } from 'vue'
+
 import { transcribeAudio } from '@/services/audioTranscription'
+import {
+  cleanTextForSpeech,
+  splitTextIntoSpeechChunks,
+  synthesizeSpeechToObjectUrl
+} from '@/services/speechService'
+
 
 const hasWindow = typeof window !== 'undefined'
 const AudioContextClass = hasWindow ? (window.AudioContext || window.webkitAudioContext) : null
-const speechSynthesisInstance = hasWindow ? window.speechSynthesis : null
+
 
 export function useSpeechRecognition() {
   const isSupported = ref(false)
@@ -11,13 +18,18 @@ export function useSpeechRecognition() {
   const isProcessing = ref(false)
   const transcript = ref('')
   const error = ref('')
-
   const isSpeaking = ref(false)
 
   let mediaRecorder = null
   let mediaStream = null
   let audioChunks = []
 
+  let currentAudio = null
+  let currentAudioUrl = ''
+  let settleCurrentPlayback = null
+  let playbackSessionId = 0
+  const selectedVoiceName = ref('Tencent Cloud Default')
+
   const checkSupport = () => {
     const support =
       hasWindow &&
@@ -71,7 +83,7 @@ export function useSpeechRecognition() {
         message = '麦克风被占用或不可用,请检查是否有其他应用正在使用'
         break
       case 'OverconstrainedError':
-        message = '无法满足当前音频采集的约束条件'
+        message = '当前录音参数不受支持,请更换设备后重试'
         break
       default:
         message = mediaError.message || message
@@ -96,7 +108,6 @@ export function useSpeechRecognition() {
       const finalBlob = wavBlob || blob
       const wavFile = new File([finalBlob], `audio_${Date.now()}.wav`, { type: 'audio/wav' })
       const userId = getStoredUserId()
-
       const { text } = await transcribeAudio({ file: wavFile, userId })
       transcript.value = text || ''
       error.value = ''
@@ -133,7 +144,7 @@ export function useSpeechRecognition() {
         try {
           mediaRecorder = options ? new MediaRecorder(stream, options) : new MediaRecorder(stream)
         } catch (recorderError) {
-          console.error('MediaRecorder初始化失败:', recorderError)
+          console.error('MediaRecorder 初始化失败:', recorderError)
           error.value = '无法启动录音,请检查浏览器是否支持录音功能'
           stopMediaTracks()
           return
@@ -150,7 +161,7 @@ export function useSpeechRecognition() {
         }
 
         mediaRecorder.onerror = (event) => {
-          console.error('MediaRecorder错误:', event.error || event)
+          console.error('MediaRecorder 错误:', event.error || event)
           error.value = '录音过程中出现问题,请重新尝试'
           resetRecorder(true)
         }
@@ -186,120 +197,169 @@ export function useSpeechRecognition() {
     }
   }
 
-  const speechSynthesis = speechSynthesisInstance
+  const clearCurrentAudio = () => {
+    if (currentAudio) {
+      currentAudio.onended = null
+      currentAudio.onerror = null
+      currentAudio.onpause = null
+      currentAudio = null
+    }
 
-  const speakText = (text, options = {}) => {
-    if (!speechSynthesis) {
-      error.value = '浏览器不支持语音播放功能'
-      return false
+    if (currentAudioUrl) {
+      URL.revokeObjectURL(currentAudioUrl)
+      currentAudioUrl = ''
     }
+  }
 
-    stopSpeaking()
+  const playAudioUrl = (audioUrl, sessionId) =>
+    new Promise((resolve, reject) => {
+      if (!hasWindow || typeof Audio === 'undefined') {
+        reject(new Error('当前环境不支持音频播放'))
+        return
+      }
 
-    const utterance = new SpeechSynthesisUtterance(text)
-    
-    utterance.lang = options.lang || 'zh-CN'
-    utterance.rate = options.rate || 1.0
-    utterance.pitch = options.pitch || 1.0
-    utterance.volume = options.volume || 1.0
+      clearCurrentAudio()
+      const audio = new Audio(audioUrl)
+      currentAudio = audio
+      currentAudioUrl = audioUrl
 
-    utterance.onstart = () => {
-      isSpeaking.value = true
-      error.value = ''
-    }
+      const finish = (handler) => {
+        if (settleCurrentPlayback) {
+          settleCurrentPlayback = null
+        }
+        handler()
+      }
 
-    utterance.onend = () => {
-      isSpeaking.value = false
-    }
+      settleCurrentPlayback = () => finish(() => resolve(false))
 
-    utterance.onerror = (event) => {
-      let errorMessage = ''
-      switch (event.error) {
-        case 'canceled':
-          errorMessage = '语音播放已取消'
-          break
-        case 'interrupted':
-          errorMessage = '语音播放被中断'
-          break
-        case 'audio-busy':
-          errorMessage = '音频设备忙,请稍后重试'
-          break
-        case 'audio-hardware':
-          errorMessage = '音频硬件错误'
-          break
-        case 'network':
-          errorMessage = '网络错误,请检查网络连接'
-          break
-        case 'synthesis-unavailable':
-          errorMessage = '语音播放服务不可用'
-          break
-        case 'synthesis-failed':
-          errorMessage = '语音播放失败'
-          break
-        case 'language-unavailable':
-          errorMessage = '不支持当前语言'
-          break
-        case 'voice-unavailable':
-          errorMessage = '当前语音不可用'
-          break
-        case 'text-too-long':
-          errorMessage = '文本过长,无法播放'
-          break
-        case 'invalid-argument':
-          errorMessage = '语音播放参数无效'
-          break
-        case 'not-allowed':
-          errorMessage = '语音播放权限被拒绝'
-          break
-        default:
-          errorMessage = `语音播放错误: ${event.error}`
+      audio.onended = () => {
+        if (sessionId !== playbackSessionId) {
+          finish(() => resolve(false))
+          return
+        }
+        clearCurrentAudio()
+        finish(() => resolve(true))
+      }
+
+      audio.onerror = () => {
+        const playbackError = new Error('语音播放失败')
+        clearCurrentAudio()
+        finish(() => reject(playbackError))
+      }
+
+      audio.play().catch((playbackError) => {
+        clearCurrentAudio()
+        finish(() => reject(playbackError))
+      })
+    })
+
+  const stopSpeaking = () => {
+    playbackSessionId += 1
+
+    if (currentAudio) {
+      try {
+        currentAudio.pause()
+        currentAudio.currentTime = 0
+      } catch (err) {
+        console.warn('停止音频播放失败:', err)
       }
-      error.value = errorMessage
-      isSpeaking.value = false
     }
 
-    speechSynthesis.speak(utterance)
-    return true
+    if (settleCurrentPlayback) {
+      const settle = settleCurrentPlayback
+      settleCurrentPlayback = null
+      settle()
+    }
+
+    clearCurrentAudio()
+    isSpeaking.value = false
   }
 
-  const stopSpeaking = () => {
-    if (speechSynthesis && isSpeaking.value) {
-      speechSynthesis.cancel()
-      isSpeaking.value = false
+  const speakText = async (text, options = {}) => {
+    if (!hasWindow || typeof Audio === 'undefined') {
+      error.value = '浏览器不支持语音播放功能'
+      return false
+    }
+
+    const normalizedText = cleanTextForSpeech(text)
+    if (!normalizedText) {
+      error.value = '文本内容为空,无法播放'
+      return false
+    }
+
+    stopSpeaking()
+    const sessionId = playbackSessionId
+    const chunks = splitTextIntoSpeechChunks(
+      normalizedText,
+      options.firstChunkLimit ?? 60,
+      options.remainingChunkLimit ?? 120
+    )
+
+    isSpeaking.value = true
+    error.value = ''
+
+    try {
+      for (const chunk of chunks) {
+        if (sessionId !== playbackSessionId) {
+          return false
+        }
+
+        const audioUrl = await synthesizeSpeechToObjectUrl({
+          text: chunk,
+          voiceType: options.voiceType,
+          speed: options.rate ?? options.speed,
+          volume: options.volume
+        })
+
+        if (sessionId !== playbackSessionId) {
+          URL.revokeObjectURL(audioUrl)
+          return false
+        }
+
+        await playAudioUrl(audioUrl, sessionId)
+      }
+
+      return true
+    } catch (playbackError) {
+      console.error('语音播放失败:', playbackError)
+      if (sessionId === playbackSessionId) {
+        error.value = playbackError?.message || '语音播放失败,请稍后重试'
+      }
+      return false
+    } finally {
+      if (sessionId === playbackSessionId) {
+        isSpeaking.value = false
+        clearCurrentAudio()
+      }
     }
   }
 
   const pauseSpeaking = () => {
-    if (speechSynthesis && isSpeaking.value) {
-      speechSynthesis.pause()
+    if (currentAudio && isSpeaking.value) {
+      currentAudio.pause()
     }
   }
 
   const resumeSpeaking = () => {
-    if (speechSynthesis) {
-      speechSynthesis.resume()
+    if (currentAudio) {
+      currentAudio.play().catch((playbackError) => {
+        console.error('恢复播放失败:', playbackError)
+        error.value = playbackError?.message || '恢复播放失败'
+      })
     }
   }
 
-  const getAvailableVoices = () => {
-    if (!speechSynthesis) return []
-    
-    return speechSynthesis.getVoices().filter(voice =>
-      voice.lang.startsWith('zh') || voice.lang.startsWith('en')
-    )
-  }
+  const getAvailableVoices = () => [
+    {
+      name: selectedVoiceName.value,
+      lang: 'zh-CN'
+    }
+  ]
 
   const setVoice = (voiceName) => {
-    if (!speechSynthesis) return false
-    
-    const voices = speechSynthesis.getVoices()
-    const voice = voices.find(v => v.name === voiceName)
-    
-    if (voice) {
-      return true
-    }
-    
-    return false
+    if (!voiceName) return false
+    selectedVoiceName.value = voiceName
+    return true
   }
 
   return {
@@ -321,6 +381,7 @@ export function useSpeechRecognition() {
   }
 }
 
+
 function getRecorderOptions() {
   if (!hasWindow || typeof window.MediaRecorder === 'undefined') {
     return undefined
@@ -347,23 +408,23 @@ function getStoredUserId() {
   if (!hasWindow || !window.localStorage) {
     return 'web-user'
   }
+
   try {
     return window.localStorage.getItem('shudao_user_id') || 'web-user'
   } catch (err) {
-    console.warn('读取本地用户ID失败:', err)
+    console.warn('读取本地用户 ID 失败:', err)
     return 'web-user'
   }
 }
 
 async function convertBlobToWav(blob) {
   if (!blob || !blob.size) return blob
-
   if (blob.type === 'audio/wav' || /wav/i.test(blob.type)) {
     return blob
   }
 
   if (!AudioContextClass) {
-    console.warn('当前环境不支持AudioContext,无法转换为WAV,将直接上传原始音频')
+    console.warn('当前环境不支持 AudioContext,将直接上传原始音频')
     return blob
   }
 
@@ -376,14 +437,14 @@ async function convertBlobToWav(blob) {
     const wavBuffer = encodeWAV(monoData, audioBuffer.sampleRate)
     return new Blob([wavBuffer], { type: 'audio/wav' })
   } catch (err) {
-    console.error('音频转换为WAV失败,将上传原始格式:', err)
+    console.error('音频转换为 WAV 失败,将上传原始格式:', err)
     return blob
   } finally {
     if (audioContext && audioContext.state !== 'closed') {
       try {
         await audioContext.close()
       } catch (closeError) {
-        console.warn('关闭AudioContext失败:', closeError)
+        console.warn('关闭 AudioContext 失败:', closeError)
       }
     }
   }
@@ -397,15 +458,15 @@ function mergeToMono(audioBuffer) {
   const length = audioBuffer.length
   const result = new Float32Array(length)
 
-  for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
+  for (let channel = 0; channel < audioBuffer.numberOfChannels; channel += 1) {
     const channelData = audioBuffer.getChannelData(channel)
-    for (let i = 0; i < length; i++) {
-      result[i] += channelData[i]
+    for (let index = 0; index < length; index += 1) {
+      result[index] += channelData[index]
     }
   }
 
-  for (let i = 0; i < length; i++) {
-    result[i] /= audioBuffer.numberOfChannels
+  for (let index = 0; index < length; index += 1) {
+    result[index] /= audioBuffer.numberOfChannels
   }
 
   return result
@@ -420,30 +481,29 @@ function encodeWAV(samples, sampleRate) {
   writeString(view, 8, 'WAVE')
   writeString(view, 12, 'fmt ')
   view.setUint32(16, 16, true)
-  view.setUint16(20, 1, true) // PCM
-  view.setUint16(22, 1, true) // mono
+  view.setUint16(20, 1, true)
+  view.setUint16(22, 1, true)
   view.setUint32(24, sampleRate, true)
   view.setUint32(28, sampleRate * 2, true)
-  view.setUint16(32, 2, true) // block align
-  view.setUint16(34, 16, true) // bits per sample
+  view.setUint16(32, 2, true)
+  view.setUint16(34, 16, true)
   writeString(view, 36, 'data')
   view.setUint32(40, samples.length * 2, true)
 
   floatTo16BitPCM(view, 44, samples)
-
   return buffer
 }
 
 function floatTo16BitPCM(output, offset, input) {
-  for (let i = 0; i < input.length; i++, offset += 2) {
-    let s = Math.max(-1, Math.min(1, input[i]))
-    s = s < 0 ? s * 0x8000 : s * 0x7fff
-    output.setInt16(offset, s, true)
+  for (let index = 0; index < input.length; index += 1, offset += 2) {
+    let sample = Math.max(-1, Math.min(1, input[index]))
+    sample = sample < 0 ? sample * 0x8000 : sample * 0x7fff
+    output.setInt16(offset, sample, true)
   }
 }
 
 function writeString(view, offset, string) {
-  for (let i = 0; i < string.length; i++) {
-    view.setUint8(offset + i, string.charCodeAt(i))
+  for (let index = 0; index < string.length; index += 1) {
+    view.setUint8(offset + index, string.charCodeAt(index))
   }
 }

+ 1 - 64
shudao-vue-frontend/src/services/audioTranscription.js

@@ -1,64 +1 @@
-import axios from 'axios'
-import { getToken, getTokenType } from '@/utils/auth'
-import { getAudioTranscriptionBase } from '@/utils/apiConfig'
-
-const audioClient = axios.create({
-  baseURL: import.meta.env?.VITE_AUDIO_API_BASE || getAudioTranscriptionBase(),
-  timeout: 120000
-})
-
-audioClient.interceptors.request.use((config) => {
-  const token = getToken()
-  const tokenType = getTokenType()
-
-  if (token && tokenType) {
-    const formattedType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
-    config.headers = config.headers || {}
-    config.headers['Authorization'] = `${formattedType} ${token}`
-  }
-
-  return config
-})
-
-function extractTranscriptionText(payload) {
-  if (!payload) return ''
-  if (typeof payload === 'string') return payload
-
-  return (
-    payload?.workflow_result?.data?.outputs?.text ||
-    payload?.workflow_result?.data?.outputs?.content ||
-    payload?.data?.outputs?.text ||
-    payload?.outputs?.text ||
-    payload?.text ||
-    ''
-  )
-}
-
-export async function transcribeAudio({ file, userId }) {
-  const formData = new FormData()
-  const filename = file?.name || `audio_${Date.now()}.wav`
-  formData.append('file', file, filename)
-
-  if (userId) {
-    formData.append('user_id', userId)
-  }
-
-  const response = await audioClient.post('', formData, {
-    headers: {
-      'Content-Type': 'multipart/form-data'
-    }
-  })
-
-  const payload = response?.data ?? response
-  const text = extractTranscriptionText(payload)
-
-  if (!text) {
-    throw new Error('语音转写成功,但未返回文本内容')
-  }
-
-  return {
-    text,
-    raw: payload
-  }
-}
-
+export { transcribeAudio } from './speechService'

+ 236 - 0
shudao-vue-frontend/src/services/speechService.js

@@ -0,0 +1,236 @@
+import axios from 'axios'
+
+import { getToken, getTokenType } from '@/utils/auth'
+import { getSpeechApiBase } from '@/utils/apiConfig'
+
+
+const speechClient = axios.create({
+  baseURL: import.meta.env?.VITE_SPEECH_API_BASE || getSpeechApiBase(),
+  timeout: 120000
+})
+
+speechClient.interceptors.request.use((config) => {
+  const token = getToken()
+  const tokenType = getTokenType()
+
+  if (token && tokenType) {
+    const formattedType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
+    config.headers = config.headers || {}
+    config.headers.Authorization = `${formattedType} ${token}`
+  }
+
+  return config
+})
+
+export function extractTranscriptionText(payload) {
+  if (!payload) return ''
+  if (typeof payload === 'string') return payload
+
+  return (
+    payload?.data?.text ||
+    payload?.text ||
+    payload?.workflow_result?.data?.outputs?.text ||
+    payload?.workflow_result?.data?.outputs?.content ||
+    payload?.data?.outputs?.text ||
+    payload?.outputs?.text ||
+    ''
+  )
+}
+
+function tryParseJson(text) {
+  if (!text) return null
+
+  try {
+    return JSON.parse(text)
+  } catch {
+    return null
+  }
+}
+
+async function readBlobText(payload) {
+  if (typeof payload?.text === 'function') {
+    return payload.text()
+  }
+
+  if (typeof payload?.arrayBuffer === 'function') {
+    const buffer = await payload.arrayBuffer()
+    return new TextDecoder().decode(buffer)
+  }
+
+  return ''
+}
+
+async function extractSpeechErrorMessage(payload, fallbackMessage) {
+  if (!payload) return fallbackMessage
+
+  if (
+    payload instanceof Blob ||
+    typeof payload?.text === 'function' ||
+    typeof payload?.arrayBuffer === 'function'
+  ) {
+    const text = await readBlobText(payload)
+    const parsed = tryParseJson(text)
+    return (
+      parsed?.msg ||
+      parsed?.message ||
+      parsed?.error?.message ||
+      text ||
+      fallbackMessage
+    )
+  }
+
+  if (typeof payload === 'string') {
+    const parsed = tryParseJson(payload)
+    return (
+      parsed?.msg ||
+      parsed?.message ||
+      parsed?.error?.message ||
+      payload ||
+      fallbackMessage
+    )
+  }
+
+  if (typeof payload === 'object') {
+    return (
+      payload?.msg ||
+      payload?.message ||
+      payload?.error?.message ||
+      fallbackMessage
+    )
+  }
+
+  return fallbackMessage
+}
+
+async function createSpeechRequestError(error, fallbackMessage) {
+  const responseMessage = await extractSpeechErrorMessage(error?.response?.data, '')
+  return new Error(responseMessage || error?.message || fallbackMessage)
+}
+
+export function cleanTextForSpeech(text) {
+  if (!text) return ''
+
+  let cleanText = String(text).replace(/<[^>]*>/g, '')
+  cleanText = cleanText.replace(/\s+/g, ' ').trim()
+  cleanText = cleanText.replace(/[^\u4e00-\u9fa5a-zA-Z0-9\s,。!?、;:'"()\-.\[\]]/g, '')
+  return cleanText.trim()
+}
+
+export function splitTextIntoSpeechChunks(text, firstChunkLimit = 60, remainingChunkLimit = 120) {
+  const normalized = cleanTextForSpeech(text)
+  if (!normalized) return []
+  if (normalized.length <= firstChunkLimit) return [normalized]
+
+  const chunks = []
+  let currentChunk = ''
+  let isFirstChunk = true
+  const sentences = normalized.split(/(?<=[。!?])/)
+
+  for (const rawSentence of sentences) {
+    const sentence = rawSentence.trim()
+    if (!sentence) continue
+
+    const maxLength = isFirstChunk ? firstChunkLimit : remainingChunkLimit
+    if (currentChunk.length + sentence.length <= maxLength) {
+      currentChunk += sentence
+      continue
+    }
+
+    if (currentChunk) {
+      chunks.push(currentChunk.trim())
+      currentChunk = sentence
+      isFirstChunk = false
+      continue
+    }
+
+    let start = 0
+    while (start < sentence.length) {
+      const currentLimit = isFirstChunk ? firstChunkLimit : remainingChunkLimit
+      chunks.push(sentence.slice(start, start + currentLimit))
+      start += currentLimit
+      isFirstChunk = false
+    }
+    currentChunk = ''
+  }
+
+  if (currentChunk.trim()) {
+    chunks.push(currentChunk.trim())
+  }
+
+  return chunks
+}
+
+export async function transcribeAudio({ file, userId }) {
+  const formData = new FormData()
+  const filename = file?.name || `audio_${Date.now()}.wav`
+  formData.append('file', file, filename)
+
+  if (userId) {
+    formData.append('user_id', userId)
+  }
+
+  let response
+  try {
+    response = await speechClient.post('/transcribe', formData, {
+      headers: {
+        'Content-Type': 'multipart/form-data'
+      }
+    })
+  } catch (error) {
+    throw await createSpeechRequestError(error, '语音转写失败')
+  }
+
+  const payload = response?.data ?? response
+  const text = extractTranscriptionText(payload)
+  if (!text) {
+    throw new Error('语音转写成功,但未返回文本内容')
+  }
+
+  return {
+    text,
+    raw: payload
+  }
+}
+
+export async function synthesizeSpeech({ text, voiceType, speed, volume }) {
+  const normalizedText = cleanTextForSpeech(text)
+  if (!normalizedText) {
+    throw new Error('播报文本不能为空')
+  }
+
+  let response
+  try {
+    response = await speechClient.post(
+      '/synthesize',
+      {
+        text: normalizedText,
+        voice_type: voiceType,
+        speed,
+        volume
+      },
+      {
+        responseType: 'blob'
+      }
+    )
+  } catch (error) {
+    throw await createSpeechRequestError(error, '语音播报失败')
+  }
+
+  const audioBlob = response?.data
+  const contentType = String(response?.headers?.['content-type'] || audioBlob?.type || '').toLowerCase()
+
+  if (!(audioBlob instanceof Blob) || audioBlob.size === 0) {
+    throw new Error('语音播报未返回有效音频')
+  }
+
+  if (contentType && !contentType.startsWith('audio/')) {
+    throw new Error(await extractSpeechErrorMessage(audioBlob, '语音播报失败'))
+  }
+
+  return audioBlob
+}
+
+export async function synthesizeSpeechToObjectUrl(options) {
+  const audioBlob = await synthesizeSpeech(options)
+  return URL.createObjectURL(audioBlob)
+}

+ 109 - 0
shudao-vue-frontend/src/services/speechService.test.js

@@ -0,0 +1,109 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+
+const postMock = vi.fn()
+const requestUseMock = vi.fn()
+const createMock = vi.fn(() => ({
+  interceptors: {
+    request: {
+      use: requestUseMock
+    }
+  },
+  post: postMock
+}))
+
+vi.mock('axios', () => ({
+  default: {
+    create: createMock
+  }
+}))
+
+vi.mock('@/utils/auth', () => ({
+  getToken: () => 'token-1',
+  getTokenType: () => 'bearer'
+}))
+
+vi.mock('@/utils/apiConfig', () => ({
+  getSpeechApiBase: () => '/apiv1/speech'
+}))
+
+describe('speechService', () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+    postMock.mockReset()
+    global.URL.createObjectURL = vi.fn(() => 'blob:test-audio')
+  })
+
+  it('transcribes audio from the new backend speech route payload', async () => {
+    postMock.mockResolvedValue({
+      data: {
+        statusCode: 200,
+        data: {
+          text: '这是新的语音转写结果'
+        }
+      }
+    })
+
+    const { transcribeAudio } = await import('./speechService')
+    const response = await transcribeAudio({
+      file: new File(['abc'], 'sample.wav', { type: 'audio/wav' }),
+      userId: 'web-user'
+    })
+
+    expect(response.text).toBe('这是新的语音转写结果')
+    expect(postMock).toHaveBeenCalledWith(
+      '/transcribe',
+      expect.any(FormData),
+      expect.objectContaining({
+        headers: {
+          'Content-Type': 'multipart/form-data'
+        }
+      })
+    )
+  })
+
+  it('creates an object URL from backend synthesized audio', async () => {
+    postMock.mockResolvedValue({
+      headers: {
+        'content-type': 'audio/mpeg'
+      },
+      data: new Blob(['fake-audio'], { type: 'audio/mpeg' })
+    })
+
+    const { synthesizeSpeechToObjectUrl } = await import('./speechService')
+    const audioUrl = await synthesizeSpeechToObjectUrl({ text: '测试播报' })
+
+    expect(audioUrl).toBe('blob:test-audio')
+    expect(postMock).toHaveBeenCalledWith(
+      '/synthesize',
+      { text: '测试播报' },
+      expect.objectContaining({
+        responseType: 'blob'
+      })
+    )
+  })
+
+  it('surfaces backend synthesize errors instead of a generic axios message', async () => {
+    const errorBlob = {
+      type: 'application/json',
+      text: vi.fn(async () => JSON.stringify({
+        statusCode: 503,
+        msg: '璇煶鎾姤澶辫触: 鑵捐浜戣闊冲悎鎴愯祫婧愬寘宸茶€楀敖'
+      }))
+    }
+
+    postMock.mockRejectedValue({
+      response: {
+        status: 503,
+        data: errorBlob
+      },
+      message: 'Request failed with status code 503'
+    })
+
+    const { synthesizeSpeechToObjectUrl } = await import('./speechService')
+
+    await expect(synthesizeSpeechToObjectUrl({ text: '娴嬭瘯鎾姤' })).rejects.toThrow(
+      '璇煶鎾姤澶辫触: 鑵捐浜戣闊冲悎鎴愯祫婧愬寘宸茶€楀敖'
+    )
+  })
+})

+ 11 - 5
shudao-vue-frontend/src/utils/apiConfig.js

@@ -8,8 +8,7 @@
  * - /apiv1 → 系统后端 (shudao-go-backend:22001)
  * - /chatwithai/api/v1 → AI对话服务 (ReportGenerator:28002)
  * - /auth/api → 认证网关服务 (auth-server:28004)
- * - /tts → TTS语音合成服务
- * - /audio_to_text → 语音转文字服务
+ * - /apiv1/speech → 新语音服务(转文字/播报)
  */
 
 // ==================== 环境检测 ====================
@@ -45,10 +44,13 @@ export const SSE_API_PREFIX = REPORT_API_PREFIX
 /** 认证网关服务 (auth-server) */
 export const AUTH_GATEWAY_URL = '/auth/api'
 
-/** TTS 语音合成服务 */
+/** 新语音服务 */
+export const SPEECH_API_PREFIX = '/apiv1/speech'
+
+/** TTS 语音合成服务(旧链路,保留兼容) */
 export const TTS_API_PREFIX = '/tts'
 
-/** 音频转录服务 (语音转文字) */
+/** 音频转录服务(旧链路,保留兼容) */
 export const AUDIO_TRANSCRIPTION_BASE = '/audio_to_text'
 
 // ==================== 便捷函数 ====================
@@ -77,10 +79,14 @@ export function getAudioTranscriptionBase() {
   return AUDIO_TRANSCRIPTION_BASE
 }
 
+export function getSpeechApiBase() {
+  return SPEECH_API_PREFIX
+}
+
 /**
  * 构建完整的 API URL
  */
 export function buildApiUrl(path, prefix = BACKEND_API_PREFIX) {
   const normalizedPath = path.startsWith('/') ? path : `/${path}`
   return `${prefix}${normalizedPath}`
-}
+}

+ 11 - 23
shudao-vue-frontend/src/views/AIWriting.vue

@@ -1366,7 +1366,7 @@ const createNewChat = async () => {
   
   // 停止当前朗读
   if (speakingMessageId.value) {
-    try { window.speechSynthesis && window.speechSynthesis.cancel(); } catch (e) {}
+    stopSpeaking();
     speakingMessageId.value = null;
   }
   
@@ -1433,7 +1433,7 @@ const handleHistoryItem = async (historyItem) => {
   try {
     // 停止当前朗读
     if (speakingMessageId.value) {
-      try { window.speechSynthesis && window.speechSynthesis.cancel(); } catch (e) {}
+      stopSpeaking();
       speakingMessageId.value = null;
     }
     
@@ -4329,12 +4329,12 @@ const regenerateResponse = async (messageIndex) => {
 const handleVoiceRead = (message) => {
   if (speakingMessageId.value === message.id) {
     // 如果正在朗读这条消息,则停止
-    try { window.speechSynthesis && window.speechSynthesis.cancel(); } catch (e) {}
+    stopSpeaking();
     speakingMessageId.value = null;
   } else {
     // 如果朗读其他消息,先停止当前朗读
     if (speakingMessageId.value) {
-      try { window.speechSynthesis && window.speechSynthesis.cancel(); } catch (e) {}
+      stopSpeaking();
     }
     
     // 开始朗读新消息 - 使用过滤后的内容,移除内部标签但保留文档内容
@@ -4372,25 +4372,13 @@ const handleVoiceRead = (message) => {
     textToRead = cleanMarkdownForSpeech(textToRead);
     
     if (textToRead) {
-      // 创建自定义的语音合成实例,以便监听完成事件
-      const utterance = new SpeechSynthesisUtterance(textToRead);
-      utterance.lang = 'zh-CN';
-      utterance.rate = 0.9;
-      utterance.pitch = 1;
-      utterance.volume = 1;
-      
-      // 监听语音合成完成事件
-      utterance.onend = () => {
-        speakingMessageId.value = null;
-      };
-      
-      utterance.onerror = () => {
-        speakingMessageId.value = null;
-      };
-      
-      // 开始朗读
-      window.speechSynthesis.speak(utterance);
       speakingMessageId.value = message.id;
+      Promise.resolve(speakText(textToRead, { rate: 0.9 }))
+        .finally(() => {
+          if (speakingMessageId.value === message.id) {
+            speakingMessageId.value = null;
+          }
+        });
     }
   }
 };
@@ -4538,7 +4526,7 @@ onMounted(async () => {
 onBeforeUnmount(() => {
   // 停止当前朗读
   if (speakingMessageId.value) {
-    try { window.speechSynthesis && window.speechSynthesis.cancel(); } catch (e) {}
+    stopSpeaking();
     speakingMessageId.value = null;
   }
 });

+ 13 - 80
shudao-vue-frontend/src/views/Chat.vue

@@ -690,7 +690,7 @@ import {
   shouldClearSummaryForOnlineAnswer,
   splitHtmlIntoTypewriterChunks
 } from '@/utils/chatHistoryPersistence.js'
-import { getToken, getTokenType } from '@/utils/auth.js'
+import { getToken } from '@/utils/auth.js'
 import { renderMarkdown } from '@/utils/markdown'
 import 'katex/dist/katex.min.css'
 
@@ -707,6 +707,7 @@ import WebSearchSummary from '@/components/WebSearchSummary.vue'
 import StatusAvatar from '@/components/StatusAvatar.vue'
 import { createSSEConnection, closeSSEConnection } from '@/utils/sse'
 import { getApiPrefix } from '@/utils/apiConfig'
+import { synthesizeSpeechToObjectUrl } from '@/services/speechService'
 import { Document } from '@element-plus/icons-vue'
 
 // 导入发送按钮图标
@@ -3616,55 +3617,20 @@ const currentAudio = ref(null)
 const audioQueue = ref([])
 const isPlayingQueue = ref(false)
 
-// 获取TTS服务地址(自动判断是否使用代理)
-const getTTSUrl = () => {
-  // 在开发环境中使用代理,生产环境中使用直接地址
-  const isDevelopment = import.meta.env.DEV
-  if (isDevelopment) {
-    return '/api/tts/voice'  // 使用Vite代理
-  } else {
-    return window.location.origin + '/tts/voice'  // 生产环境直接地址
-  }
-}
+// 获取TTS服务地址(仅用于日志展示)
+const getTTSUrl = () => '/apiv1/speech/synthesize'
 
 // 测试TTS服务连接
 const testTTSConnection = async () => {
-  const ttsUrl = getTTSUrl()
-  
   try {
-    const controller = new AbortController()
-    const timeoutId = setTimeout(() => controller.abort(), 8000)
-    
-    // 准备请求头,添加认证 Token
-    const headers = { 'Content-Type': 'application/json' }
-    const token = getToken()
-    const tokenType = getTokenType()
-    if (token && tokenType) {
-      // 确保 Bearer 首字母大写
-      const bearerType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
-      headers['Authorization'] = `${bearerType} ${token}`
-    }
-    
-    const response = await fetch(ttsUrl, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify({ text: '测试' }),
-      signal: controller.signal
-    })
-    
-    clearTimeout(timeoutId)
-    
-    if (response.ok) {
-      await response.blob()
-      return { success: true, message: 'TTS服务连接正常' }
-    } else {
-      return { success: false, message: `TTS服务响应错误: ${response.status}` }
-    }
+    const audioUrl = await synthesizeSpeechToObjectUrl({ text: '测试' })
+    URL.revokeObjectURL(audioUrl)
+    return { success: true, message: 'TTS服务连接正常' }
   } catch (error) {
     let message = 'TTS服务连接失败'
     if (error.name === 'AbortError') {
       message = 'TTS服务连接超时'
-    } else if (error.message.includes('Failed to fetch')) {
+    } else if ((error.message || '').includes('Failed to fetch')) {
       message = '无法连接到TTS服务'
     }
     return { success: false, message }
@@ -3673,48 +3639,16 @@ const testTTSConnection = async () => {
 
 // 调用TTS接口进行语音合成(带重试机制)
 const callTTSAPI = async (text, retryCount = 0) => {
-  const ttsUrl = getTTSUrl()
   const maxRetries = 2
   
   try {
-    const controller = new AbortController()
-    const timeoutId = setTimeout(() => controller.abort(), 15000)
-    
-    // 准备请求头,添加认证 Token
-    const headers = { 'Content-Type': 'application/json' }
-    const token = getToken()
-    const tokenType = getTokenType()
-    if (token && tokenType) {
-      // 确保 Bearer 首字母大写
-      const bearerType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
-      headers['Authorization'] = `${bearerType} ${token}`
-    }
-    
-    const response = await fetch(ttsUrl, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify({ text }),
-      signal: controller.signal
-    })
-    
-    clearTimeout(timeoutId)
-    
-    if (!response.ok) {
-      throw new Error(`TTS接口调用失败: ${response.status}`)
-    }
-    
-    const audioBlob = await response.blob()
-    if (audioBlob.size === 0) {
-      throw new Error('TTS接口返回的音频数据为空')
-    }
-    
-    return URL.createObjectURL(audioBlob)
+    return await synthesizeSpeechToObjectUrl({ text })
   } catch (error) {
     // 重试逻辑
     if (retryCount < maxRetries && (
       error.name === 'AbortError' || 
-      error.message.includes('Failed to fetch') ||
-      error.message.includes('NetworkError')
+      (error.message || '').includes('Failed to fetch') ||
+      (error.message || '').includes('NetworkError')
     )) {
       await new Promise(resolve => setTimeout(resolve, (retryCount + 1) * 1000))
       return callTTSAPI(text, retryCount + 1)
@@ -3724,9 +3658,9 @@ const callTTSAPI = async (text, retryCount = 0) => {
     let errorMessage = '语音合成失败'
     if (error.name === 'AbortError') {
       errorMessage = '语音合成请求超时'
-    } else if (error.message.includes('Failed to fetch')) {
+    } else if ((error.message || '').includes('Failed to fetch')) {
       errorMessage = '无法连接到语音合成服务'
-    } else if (error.message.includes('TTS接口调用失败')) {
+    } else if ((error.message || '').includes('TTS接口调用失败')) {
       errorMessage = error.message
     }
     
@@ -3831,7 +3765,6 @@ const stopAllAudio = () => {
     
     audioQueue.value = []
     isPlayingQueue.value = false
-    window.speechSynthesis && window.speechSynthesis.cancel()
   } catch (e) {
     console.warn('停止音频播放失败:', e)
   }

+ 16 - 115
shudao-vue-frontend/src/views/mobile/m-Chat.vue

@@ -479,6 +479,7 @@ import {
 } from '@/utils/chatHistoryPersistence.js'
 import { getToken, getTokenType, getUserName, getAccountId } from '@/utils/auth.js'
 import { initNativeNavForSubPage } from '@/utils/nativeBridge.js'
+import { synthesizeSpeechToObjectUrl } from '@/services/speechService'
 import Vditor from 'vditor'
 import 'vditor/dist/index.css'
 import 'katex/dist/katex.min.css'
@@ -1601,67 +1602,24 @@ const autoSendMessage = async (message) => {
 
 // ========== 语音合成相关函数 ==========
 
-// 获取TTS服务地址(统一使用代理路径
-const getTTSUrl = () => '/tts/voice'
+// 获取TTS服务地址(仅用于日志展示
+const getTTSUrl = () => '/apiv1/speech/synthesize'
 
 // 测试TTS服务连接
 const testTTSConnection = async () => {
-  // 自动获取TTS服务地址
-  const ttsUrl = getTTSUrl()
-  
   try {
     console.log('开始测试TTS服务连接...')
-    console.log('使用代理地址:', ttsUrl)
-    
-    // 使用简单的测试文本
-    const testText = '测试'
-    
-    const controller = new AbortController()
-    const timeoutId = setTimeout(() => controller.abort(), 8000) // 8秒超时
-    
-    // 准备请求头,添加认证 Token
-    const headers = { 'Content-Type': 'application/json' }
-    const token = getToken()
-    const tokenType = getTokenType()
-    if (token && tokenType) {
-      // 确保 Bearer 首字母大写
-      const bearerType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
-      headers['Authorization'] = `${bearerType} ${token}`
-    }
-    
-    const response = await fetch(ttsUrl, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify({
-        text: testText
-      }),
-      signal: controller.signal
-    })
-    
-    clearTimeout(timeoutId)
-    
-    console.log('TTS连接测试结果:', {
-      status: response.status,
-      statusText: response.statusText,
-      headers: Object.fromEntries(response.headers.entries()),
-      url: ttsUrl
-    })
-    
-    if (response.ok) {
-      const blob = await response.blob()
-      console.log('TTS服务连接正常,测试音频大小:', blob.size, 'bytes')
-      return { success: true, message: 'TTS服务连接正常' }
-    } else {
-      return { success: false, message: `TTS服务响应错误: ${response.status} ${response.statusText}` }
-    }
-    
+    console.log('当前语音接口:', getTTSUrl())
+    const audioUrl = await synthesizeSpeechToObjectUrl({ text: '测试' })
+    URL.revokeObjectURL(audioUrl)
+    return { success: true, message: 'TTS服务连接正常' }
   } catch (error) {
     console.error('TTS连接测试失败:', error)
     
     let message = 'TTS服务连接失败'
     if (error.name === 'AbortError') {
       message = 'TTS服务连接超时'
-    } else if (error.message.includes('Failed to fetch')) {
+    } else if ((error.message || '').includes('Failed to fetch')) {
       message = '无法连接到TTS服务,请检查网络或服务状态'
     } else {
       message = `TTS服务连接失败: ${error.message}`
@@ -1673,66 +1631,12 @@ const testTTSConnection = async () => {
 
 // 调用TTS接口进行语音合成(带重试机制)
 const callTTSAPI = async (text, retryCount = 0) => {
-  // 自动获取TTS服务地址
-  const ttsUrl = getTTSUrl()
   const maxRetries = 2 // 最大重试次数
   
   try {
     console.log(`开始调用TTS接口,文本长度: ${text.length}, 重试次数: ${retryCount}`)
-    console.log('TTS接口地址:', ttsUrl)
-    
-    // 添加超时控制 - 减少超时时间,提高响应速度
-    const controller = new AbortController()
-    const timeoutId = setTimeout(() => controller.abort(), 15000) // 15秒超时
-    
-    // 准备请求头,添加认证 Token
-    const headers = { 'Content-Type': 'application/json' }
-    const token = getToken()
-    const tokenType = getTokenType()
-    if (token && tokenType) {
-      // 确保 Bearer 首字母大写
-      const bearerType = tokenType.charAt(0).toUpperCase() + tokenType.slice(1).toLowerCase()
-      headers['Authorization'] = `${bearerType} ${token}`
-    }
-    
-    const response = await fetch(ttsUrl, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify({
-        text: text
-      }),
-      signal: controller.signal
-    })
-    
-    clearTimeout(timeoutId)
-    
-    console.log('TTS接口响应状态:', response.status, response.statusText)
-    
-    if (!response.ok) {
-      const errorText = await response.text().catch(() => '无法读取错误信息')
-      throw new Error(`TTS接口调用失败: ${response.status} ${response.statusText} - ${errorText}`)
-    }
-    
-    // 检查响应类型
-    const contentType = response.headers.get('content-type')
-    console.log('响应Content-Type:', contentType)
-    
-    if (!contentType || !contentType.includes('audio')) {
-      console.warn('响应可能不是音频格式:', contentType)
-    }
-    
-    // 获取音频数据
-    const audioBlob = await response.blob()
-    console.log('TTS接口调用成功,音频大小:', audioBlob.size, 'bytes')
-    console.log('音频类型:', audioBlob.type)
-    
-    if (audioBlob.size === 0) {
-      throw new Error('TTS接口返回的音频数据为空')
-    }
-    
-    // 创建音频URL
-    const audioUrl = URL.createObjectURL(audioBlob)
-    return audioUrl
+    console.log('TTS接口地址:', getTTSUrl())
+    return await synthesizeSpeechToObjectUrl({ text })
     
   } catch (error) {
     console.error(`TTS接口调用失败 (重试${retryCount}/${maxRetries}):`, error)
@@ -1740,8 +1644,8 @@ const callTTSAPI = async (text, retryCount = 0) => {
     // 如果是网络错误或超时,且还有重试次数,则重试
     if (retryCount < maxRetries && (
       error.name === 'AbortError' || 
-      error.message.includes('Failed to fetch') ||
-      error.message.includes('NetworkError')
+      (error.message || '').includes('Failed to fetch') ||
+      (error.message || '').includes('NetworkError')
     )) {
       console.log(`准备重试TTS请求,等待${(retryCount + 1) * 1000}ms...`)
       await new Promise(resolve => setTimeout(resolve, (retryCount + 1) * 1000))
@@ -1753,13 +1657,13 @@ const callTTSAPI = async (text, retryCount = 0) => {
     
     if (error.name === 'AbortError') {
       errorMessage = '语音合成请求超时,请检查网络连接或稍后重试'
-    } else if (error.message.includes('Failed to fetch')) {
+    } else if ((error.message || '').includes('Failed to fetch')) {
       errorMessage = '无法连接到语音合成服务,请检查网络连接或联系管理员'
-    } else if (error.message.includes('CORS')) {
+    } else if ((error.message || '').includes('CORS')) {
       errorMessage = '跨域请求被阻止,请联系管理员配置服务器'
-    } else if (error.message.includes('NetworkError')) {
+    } else if ((error.message || '').includes('NetworkError')) {
       errorMessage = '网络错误,请检查网络连接'
-    } else if (error.message.includes('TTS接口调用失败')) {
+    } else if ((error.message || '').includes('TTS接口调用失败')) {
       errorMessage = error.message
     } else {
       errorMessage = `语音合成失败: ${error.message}`
@@ -1882,9 +1786,6 @@ const stopAllAudio = () => {
     audioQueue.value = []
     isPlayingQueue.value = false
     
-    // 停止浏览器原生语音合成(备用)
-    window.speechSynthesis && window.speechSynthesis.cancel()
-    
     console.log('所有音频播放已停止')
   } catch (e) {
     console.warn('停止音频播放时出错:', e)

+ 12 - 13
shudao-vue-frontend/src/views/mobile/m-SafetyHazard.vue

@@ -487,7 +487,9 @@ const {
   transcript,
   error: speechError,
   startListening,
-  stopListening
+  stopListening,
+  speakText,
+  stopSpeaking
 } = useSpeechRecognition()
 
 // Toast状态
@@ -1496,26 +1498,23 @@ const stopVoiceInput = () => {
 // 语音朗读相关方法
 const handleVoiceRead = (message) => {
   if (speakingMessageId.value === message.id) {
-    try { window.speechSynthesis && window.speechSynthesis.cancel() } catch (e) {}
+    stopSpeaking()
     speakingMessageId.value = null
   } else {
     if (speakingMessageId.value) {
-      try { window.speechSynthesis && window.speechSynthesis.cancel() } catch (e) {}
+      stopSpeaking()
     }
     
     const textToRead = message.displayContent || message.content
     if (textToRead && textToRead.trim()) {
       const cleanText = textToRead.replace(/<[^>]*>/g, '')
-      
-      const utterance = new SpeechSynthesisUtterance(cleanText)
-      utterance.lang = 'zh-CN'
-      utterance.rate = 0.9
-      utterance.pitch = 1
-      utterance.volume = 1
-      utterance.onend = () => { speakingMessageId.value = null }
-      utterance.onerror = () => { speakingMessageId.value = null }
-      try { window.speechSynthesis && window.speechSynthesis.speak(utterance) } catch (e) {}
       speakingMessageId.value = message.id
+      Promise.resolve(speakText(cleanText, { rate: 0.9 }))
+        .finally(() => {
+          if (speakingMessageId.value === message.id) {
+            speakingMessageId.value = null
+          }
+        })
     }
   }
 }
@@ -2607,7 +2606,7 @@ onMounted(async () => {
 // 组件销毁前,强制停止任何朗读
 onBeforeUnmount(() => {
   if (speakingMessageId.value) {
-    try { window.speechSynthesis && window.speechSynthesis.cancel() } catch (e) {}
+    stopSpeaking()
     speakingMessageId.value = null
   }
 })