|
|
@@ -1,9 +1,16 @@
|
|
|
import { ref } from 'vue'
|
|
|
+
|
|
|
import { transcribeAudio } from '@/services/audioTranscription'
|
|
|
+import {
|
|
|
+ cleanTextForSpeech,
|
|
|
+ splitTextIntoSpeechChunks,
|
|
|
+ synthesizeSpeechToObjectUrl
|
|
|
+} from '@/services/speechService'
|
|
|
+
|
|
|
|
|
|
const hasWindow = typeof window !== 'undefined'
|
|
|
const AudioContextClass = hasWindow ? (window.AudioContext || window.webkitAudioContext) : null
|
|
|
-const speechSynthesisInstance = hasWindow ? window.speechSynthesis : null
|
|
|
+
|
|
|
|
|
|
export function useSpeechRecognition() {
|
|
|
const isSupported = ref(false)
|
|
|
@@ -11,13 +18,18 @@ export function useSpeechRecognition() {
|
|
|
const isProcessing = ref(false)
|
|
|
const transcript = ref('')
|
|
|
const error = ref('')
|
|
|
-
|
|
|
const isSpeaking = ref(false)
|
|
|
|
|
|
let mediaRecorder = null
|
|
|
let mediaStream = null
|
|
|
let audioChunks = []
|
|
|
|
|
|
+ let currentAudio = null
|
|
|
+ let currentAudioUrl = ''
|
|
|
+ let settleCurrentPlayback = null
|
|
|
+ let playbackSessionId = 0
|
|
|
+ const selectedVoiceName = ref('Tencent Cloud Default')
|
|
|
+
|
|
|
const checkSupport = () => {
|
|
|
const support =
|
|
|
hasWindow &&
|
|
|
@@ -71,7 +83,7 @@ export function useSpeechRecognition() {
|
|
|
message = '麦克风被占用或不可用,请检查是否有其他应用正在使用'
|
|
|
break
|
|
|
case 'OverconstrainedError':
|
|
|
- message = '无法满足当前音频采集的约束条件'
|
|
|
+ message = '当前录音参数不受支持,请更换设备后重试'
|
|
|
break
|
|
|
default:
|
|
|
message = mediaError.message || message
|
|
|
@@ -96,7 +108,6 @@ export function useSpeechRecognition() {
|
|
|
const finalBlob = wavBlob || blob
|
|
|
const wavFile = new File([finalBlob], `audio_${Date.now()}.wav`, { type: 'audio/wav' })
|
|
|
const userId = getStoredUserId()
|
|
|
-
|
|
|
const { text } = await transcribeAudio({ file: wavFile, userId })
|
|
|
transcript.value = text || ''
|
|
|
error.value = ''
|
|
|
@@ -133,7 +144,7 @@ export function useSpeechRecognition() {
|
|
|
try {
|
|
|
mediaRecorder = options ? new MediaRecorder(stream, options) : new MediaRecorder(stream)
|
|
|
} catch (recorderError) {
|
|
|
- console.error('MediaRecorder初始化失败:', recorderError)
|
|
|
+ console.error('MediaRecorder 初始化失败:', recorderError)
|
|
|
error.value = '无法启动录音,请检查浏览器是否支持录音功能'
|
|
|
stopMediaTracks()
|
|
|
return
|
|
|
@@ -150,7 +161,7 @@ export function useSpeechRecognition() {
|
|
|
}
|
|
|
|
|
|
mediaRecorder.onerror = (event) => {
|
|
|
- console.error('MediaRecorder错误:', event.error || event)
|
|
|
+ console.error('MediaRecorder 错误:', event.error || event)
|
|
|
error.value = '录音过程中出现问题,请重新尝试'
|
|
|
resetRecorder(true)
|
|
|
}
|
|
|
@@ -186,120 +197,169 @@ export function useSpeechRecognition() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- const speechSynthesis = speechSynthesisInstance
|
|
|
+ const clearCurrentAudio = () => {
|
|
|
+ if (currentAudio) {
|
|
|
+ currentAudio.onended = null
|
|
|
+ currentAudio.onerror = null
|
|
|
+ currentAudio.onpause = null
|
|
|
+ currentAudio = null
|
|
|
+ }
|
|
|
|
|
|
- const speakText = (text, options = {}) => {
|
|
|
- if (!speechSynthesis) {
|
|
|
- error.value = '浏览器不支持语音播放功能'
|
|
|
- return false
|
|
|
+ if (currentAudioUrl) {
|
|
|
+ URL.revokeObjectURL(currentAudioUrl)
|
|
|
+ currentAudioUrl = ''
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- stopSpeaking()
|
|
|
+ const playAudioUrl = (audioUrl, sessionId) =>
|
|
|
+ new Promise((resolve, reject) => {
|
|
|
+ if (!hasWindow || typeof Audio === 'undefined') {
|
|
|
+ reject(new Error('当前环境不支持音频播放'))
|
|
|
+ return
|
|
|
+ }
|
|
|
|
|
|
- const utterance = new SpeechSynthesisUtterance(text)
|
|
|
-
|
|
|
- utterance.lang = options.lang || 'zh-CN'
|
|
|
- utterance.rate = options.rate || 1.0
|
|
|
- utterance.pitch = options.pitch || 1.0
|
|
|
- utterance.volume = options.volume || 1.0
|
|
|
+ clearCurrentAudio()
|
|
|
+ const audio = new Audio(audioUrl)
|
|
|
+ currentAudio = audio
|
|
|
+ currentAudioUrl = audioUrl
|
|
|
|
|
|
- utterance.onstart = () => {
|
|
|
- isSpeaking.value = true
|
|
|
- error.value = ''
|
|
|
- }
|
|
|
+ const finish = (handler) => {
|
|
|
+ if (settleCurrentPlayback) {
|
|
|
+ settleCurrentPlayback = null
|
|
|
+ }
|
|
|
+ handler()
|
|
|
+ }
|
|
|
|
|
|
- utterance.onend = () => {
|
|
|
- isSpeaking.value = false
|
|
|
- }
|
|
|
+ settleCurrentPlayback = () => finish(() => resolve(false))
|
|
|
|
|
|
- utterance.onerror = (event) => {
|
|
|
- let errorMessage = ''
|
|
|
- switch (event.error) {
|
|
|
- case 'canceled':
|
|
|
- errorMessage = '语音播放已取消'
|
|
|
- break
|
|
|
- case 'interrupted':
|
|
|
- errorMessage = '语音播放被中断'
|
|
|
- break
|
|
|
- case 'audio-busy':
|
|
|
- errorMessage = '音频设备忙,请稍后重试'
|
|
|
- break
|
|
|
- case 'audio-hardware':
|
|
|
- errorMessage = '音频硬件错误'
|
|
|
- break
|
|
|
- case 'network':
|
|
|
- errorMessage = '网络错误,请检查网络连接'
|
|
|
- break
|
|
|
- case 'synthesis-unavailable':
|
|
|
- errorMessage = '语音播放服务不可用'
|
|
|
- break
|
|
|
- case 'synthesis-failed':
|
|
|
- errorMessage = '语音播放失败'
|
|
|
- break
|
|
|
- case 'language-unavailable':
|
|
|
- errorMessage = '不支持当前语言'
|
|
|
- break
|
|
|
- case 'voice-unavailable':
|
|
|
- errorMessage = '当前语音不可用'
|
|
|
- break
|
|
|
- case 'text-too-long':
|
|
|
- errorMessage = '文本过长,无法播放'
|
|
|
- break
|
|
|
- case 'invalid-argument':
|
|
|
- errorMessage = '语音播放参数无效'
|
|
|
- break
|
|
|
- case 'not-allowed':
|
|
|
- errorMessage = '语音播放权限被拒绝'
|
|
|
- break
|
|
|
- default:
|
|
|
- errorMessage = `语音播放错误: ${event.error}`
|
|
|
+ audio.onended = () => {
|
|
|
+ if (sessionId !== playbackSessionId) {
|
|
|
+ finish(() => resolve(false))
|
|
|
+ return
|
|
|
+ }
|
|
|
+ clearCurrentAudio()
|
|
|
+ finish(() => resolve(true))
|
|
|
+ }
|
|
|
+
|
|
|
+ audio.onerror = () => {
|
|
|
+ const playbackError = new Error('语音播放失败')
|
|
|
+ clearCurrentAudio()
|
|
|
+ finish(() => reject(playbackError))
|
|
|
+ }
|
|
|
+
|
|
|
+ audio.play().catch((playbackError) => {
|
|
|
+ clearCurrentAudio()
|
|
|
+ finish(() => reject(playbackError))
|
|
|
+ })
|
|
|
+ })
|
|
|
+
|
|
|
+ const stopSpeaking = () => {
|
|
|
+ playbackSessionId += 1
|
|
|
+
|
|
|
+ if (currentAudio) {
|
|
|
+ try {
|
|
|
+ currentAudio.pause()
|
|
|
+ currentAudio.currentTime = 0
|
|
|
+ } catch (err) {
|
|
|
+ console.warn('停止音频播放失败:', err)
|
|
|
}
|
|
|
- error.value = errorMessage
|
|
|
- isSpeaking.value = false
|
|
|
}
|
|
|
|
|
|
- speechSynthesis.speak(utterance)
|
|
|
- return true
|
|
|
+ if (settleCurrentPlayback) {
|
|
|
+ const settle = settleCurrentPlayback
|
|
|
+ settleCurrentPlayback = null
|
|
|
+ settle()
|
|
|
+ }
|
|
|
+
|
|
|
+ clearCurrentAudio()
|
|
|
+ isSpeaking.value = false
|
|
|
}
|
|
|
|
|
|
- const stopSpeaking = () => {
|
|
|
- if (speechSynthesis && isSpeaking.value) {
|
|
|
- speechSynthesis.cancel()
|
|
|
- isSpeaking.value = false
|
|
|
+ const speakText = async (text, options = {}) => {
|
|
|
+ if (!hasWindow || typeof Audio === 'undefined') {
|
|
|
+ error.value = '浏览器不支持语音播放功能'
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ const normalizedText = cleanTextForSpeech(text)
|
|
|
+ if (!normalizedText) {
|
|
|
+ error.value = '文本内容为空,无法播放'
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ stopSpeaking()
|
|
|
+ const sessionId = playbackSessionId
|
|
|
+ const chunks = splitTextIntoSpeechChunks(
|
|
|
+ normalizedText,
|
|
|
+ options.firstChunkLimit ?? 60,
|
|
|
+ options.remainingChunkLimit ?? 120
|
|
|
+ )
|
|
|
+
|
|
|
+ isSpeaking.value = true
|
|
|
+ error.value = ''
|
|
|
+
|
|
|
+ try {
|
|
|
+ for (const chunk of chunks) {
|
|
|
+ if (sessionId !== playbackSessionId) {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ const audioUrl = await synthesizeSpeechToObjectUrl({
|
|
|
+ text: chunk,
|
|
|
+ voiceType: options.voiceType,
|
|
|
+ speed: options.rate ?? options.speed,
|
|
|
+ volume: options.volume
|
|
|
+ })
|
|
|
+
|
|
|
+ if (sessionId !== playbackSessionId) {
|
|
|
+ URL.revokeObjectURL(audioUrl)
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ await playAudioUrl(audioUrl, sessionId)
|
|
|
+ }
|
|
|
+
|
|
|
+ return true
|
|
|
+ } catch (playbackError) {
|
|
|
+ console.error('语音播放失败:', playbackError)
|
|
|
+ if (sessionId === playbackSessionId) {
|
|
|
+ error.value = playbackError?.message || '语音播放失败,请稍后重试'
|
|
|
+ }
|
|
|
+ return false
|
|
|
+ } finally {
|
|
|
+ if (sessionId === playbackSessionId) {
|
|
|
+ isSpeaking.value = false
|
|
|
+ clearCurrentAudio()
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
|
const pauseSpeaking = () => {
|
|
|
- if (speechSynthesis && isSpeaking.value) {
|
|
|
- speechSynthesis.pause()
|
|
|
+ if (currentAudio && isSpeaking.value) {
|
|
|
+ currentAudio.pause()
|
|
|
}
|
|
|
}
|
|
|
|
|
|
const resumeSpeaking = () => {
|
|
|
- if (speechSynthesis) {
|
|
|
- speechSynthesis.resume()
|
|
|
+ if (currentAudio) {
|
|
|
+ currentAudio.play().catch((playbackError) => {
|
|
|
+ console.error('恢复播放失败:', playbackError)
|
|
|
+ error.value = playbackError?.message || '恢复播放失败'
|
|
|
+ })
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- const getAvailableVoices = () => {
|
|
|
- if (!speechSynthesis) return []
|
|
|
-
|
|
|
- return speechSynthesis.getVoices().filter(voice =>
|
|
|
- voice.lang.startsWith('zh') || voice.lang.startsWith('en')
|
|
|
- )
|
|
|
- }
|
|
|
+ const getAvailableVoices = () => [
|
|
|
+ {
|
|
|
+ name: selectedVoiceName.value,
|
|
|
+ lang: 'zh-CN'
|
|
|
+ }
|
|
|
+ ]
|
|
|
|
|
|
const setVoice = (voiceName) => {
|
|
|
- if (!speechSynthesis) return false
|
|
|
-
|
|
|
- const voices = speechSynthesis.getVoices()
|
|
|
- const voice = voices.find(v => v.name === voiceName)
|
|
|
-
|
|
|
- if (voice) {
|
|
|
- return true
|
|
|
- }
|
|
|
-
|
|
|
- return false
|
|
|
+ if (!voiceName) return false
|
|
|
+ selectedVoiceName.value = voiceName
|
|
|
+ return true
|
|
|
}
|
|
|
|
|
|
return {
|
|
|
@@ -321,6 +381,7 @@ export function useSpeechRecognition() {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
function getRecorderOptions() {
|
|
|
if (!hasWindow || typeof window.MediaRecorder === 'undefined') {
|
|
|
return undefined
|
|
|
@@ -347,23 +408,23 @@ function getStoredUserId() {
|
|
|
if (!hasWindow || !window.localStorage) {
|
|
|
return 'web-user'
|
|
|
}
|
|
|
+
|
|
|
try {
|
|
|
return window.localStorage.getItem('shudao_user_id') || 'web-user'
|
|
|
} catch (err) {
|
|
|
- console.warn('读取本地用户ID失败:', err)
|
|
|
+ console.warn('读取本地用户 ID 失败:', err)
|
|
|
return 'web-user'
|
|
|
}
|
|
|
}
|
|
|
|
|
|
async function convertBlobToWav(blob) {
|
|
|
if (!blob || !blob.size) return blob
|
|
|
-
|
|
|
if (blob.type === 'audio/wav' || /wav/i.test(blob.type)) {
|
|
|
return blob
|
|
|
}
|
|
|
|
|
|
if (!AudioContextClass) {
|
|
|
- console.warn('当前环境不支持AudioContext,无法转换为WAV,将直接上传原始音频')
|
|
|
+ console.warn('当前环境不支持 AudioContext,将直接上传原始音频')
|
|
|
return blob
|
|
|
}
|
|
|
|
|
|
@@ -376,14 +437,14 @@ async function convertBlobToWav(blob) {
|
|
|
const wavBuffer = encodeWAV(monoData, audioBuffer.sampleRate)
|
|
|
return new Blob([wavBuffer], { type: 'audio/wav' })
|
|
|
} catch (err) {
|
|
|
- console.error('音频转换为WAV失败,将上传原始格式:', err)
|
|
|
+ console.error('音频转换为 WAV 失败,将上传原始格式:', err)
|
|
|
return blob
|
|
|
} finally {
|
|
|
if (audioContext && audioContext.state !== 'closed') {
|
|
|
try {
|
|
|
await audioContext.close()
|
|
|
} catch (closeError) {
|
|
|
- console.warn('关闭AudioContext失败:', closeError)
|
|
|
+ console.warn('关闭 AudioContext 失败:', closeError)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -397,15 +458,15 @@ function mergeToMono(audioBuffer) {
|
|
|
const length = audioBuffer.length
|
|
|
const result = new Float32Array(length)
|
|
|
|
|
|
- for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
|
|
|
+ for (let channel = 0; channel < audioBuffer.numberOfChannels; channel += 1) {
|
|
|
const channelData = audioBuffer.getChannelData(channel)
|
|
|
- for (let i = 0; i < length; i++) {
|
|
|
- result[i] += channelData[i]
|
|
|
+ for (let index = 0; index < length; index += 1) {
|
|
|
+ result[index] += channelData[index]
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- for (let i = 0; i < length; i++) {
|
|
|
- result[i] /= audioBuffer.numberOfChannels
|
|
|
+ for (let index = 0; index < length; index += 1) {
|
|
|
+ result[index] /= audioBuffer.numberOfChannels
|
|
|
}
|
|
|
|
|
|
return result
|
|
|
@@ -420,30 +481,29 @@ function encodeWAV(samples, sampleRate) {
|
|
|
writeString(view, 8, 'WAVE')
|
|
|
writeString(view, 12, 'fmt ')
|
|
|
view.setUint32(16, 16, true)
|
|
|
- view.setUint16(20, 1, true) // PCM
|
|
|
- view.setUint16(22, 1, true) // mono
|
|
|
+ view.setUint16(20, 1, true)
|
|
|
+ view.setUint16(22, 1, true)
|
|
|
view.setUint32(24, sampleRate, true)
|
|
|
view.setUint32(28, sampleRate * 2, true)
|
|
|
- view.setUint16(32, 2, true) // block align
|
|
|
- view.setUint16(34, 16, true) // bits per sample
|
|
|
+ view.setUint16(32, 2, true)
|
|
|
+ view.setUint16(34, 16, true)
|
|
|
writeString(view, 36, 'data')
|
|
|
view.setUint32(40, samples.length * 2, true)
|
|
|
|
|
|
floatTo16BitPCM(view, 44, samples)
|
|
|
-
|
|
|
return buffer
|
|
|
}
|
|
|
|
|
|
function floatTo16BitPCM(output, offset, input) {
|
|
|
- for (let i = 0; i < input.length; i++, offset += 2) {
|
|
|
- let s = Math.max(-1, Math.min(1, input[i]))
|
|
|
- s = s < 0 ? s * 0x8000 : s * 0x7fff
|
|
|
- output.setInt16(offset, s, true)
|
|
|
+ for (let index = 0; index < input.length; index += 1, offset += 2) {
|
|
|
+ let sample = Math.max(-1, Math.min(1, input[index]))
|
|
|
+ sample = sample < 0 ? sample * 0x8000 : sample * 0x7fff
|
|
|
+ output.setInt16(offset, sample, true)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
function writeString(view, offset, string) {
|
|
|
- for (let i = 0; i < string.length; i++) {
|
|
|
- view.setUint8(offset + i, string.charCodeAt(i))
|
|
|
+ for (let index = 0; index < string.length; index += 1) {
|
|
|
+ view.setUint8(offset + index, string.charCodeAt(index))
|
|
|
}
|
|
|
}
|