stt.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import base64
  2. import json
  3. import os
  4. import traceback
  5. from typing import Dict
  6. from tencentcloud.asr.v20190614 import asr_client, models
  7. from tencentcloud.common import credential
  8. from tencentcloud.common.exception import TencentCloudSDKException
  9. from tencentcloud.common.profile.client_profile import ClientProfile
  10. from tencentcloud.common.profile.http_profile import HttpProfile
  11. from common.utils.logger import maxkb_logger
  12. from models_provider.base_model_provider import MaxKBBaseModel
  13. from models_provider.impl.base_stt import BaseSpeechToText
  14. class TencentSpeechToText(MaxKBBaseModel, BaseSpeechToText):
  15. hunyuan_secret_id: str
  16. hunyuan_secret_key: str
  17. model: str
  18. params: dict
  19. def __init__(self, **kwargs):
  20. super().__init__(**kwargs)
  21. self.hunyuan_secret_id = kwargs.get('hunyuan_secret_id')
  22. self.hunyuan_secret_key = kwargs.get('hunyuan_secret_key')
  23. self.model = kwargs.get('model')
  24. self.params = kwargs.get('params')
  25. @staticmethod
  26. def is_cache_model():
  27. return False
  28. @staticmethod
  29. def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
  30. return TencentSpeechToText(
  31. hunyuan_secret_id=model_credential.get('SecretId'),
  32. hunyuan_secret_key=model_credential.get('SecretKey'),
  33. model=model_name,
  34. params=model_kwargs,
  35. **model_kwargs
  36. )
  37. def check_auth(self):
  38. cwd = os.path.dirname(os.path.abspath(__file__))
  39. with open(f'{cwd}/iat_mp3_16k.mp3', 'rb') as f:
  40. self.speech_to_text(f)
  41. def speech_to_text(self, audio_file):
  42. try:
  43. cred = credential.Credential(self.hunyuan_secret_id, self.hunyuan_secret_key)
  44. # 实例化一个http选项,可选的,没有特殊需求可以跳过
  45. httpProfile = HttpProfile()
  46. httpProfile.endpoint = "asr.tencentcloudapi.com"
  47. # 实例化一个client选项,可选的,没有特殊需求可以跳过
  48. clientProfile = ClientProfile()
  49. clientProfile.httpProfile = httpProfile
  50. # 实例化要请求产品的client对象,clientProfile是可选的
  51. client = asr_client.AsrClient(cred, "", clientProfile)
  52. buf = audio_file.read()
  53. _v = base64.b64encode(buf)
  54. # 实例化一个请求对象,每个接口都会对应一个request对象
  55. req = models.SentenceRecognitionRequest()
  56. params = {
  57. "EngSerViceType": self.params.get('EngSerViceType'),
  58. "SourceType": 1,
  59. "VoiceFormat": "mp3",
  60. "Data": _v.decode(),
  61. **self.params
  62. }
  63. req.from_json_string(json.dumps(params))
  64. # 返回的resp是一个SentenceRecognitionResponse的实例,与请求对象对应
  65. resp = client.SentenceRecognition(req)
  66. # 输出json格式的字符串回包
  67. return resp.Result
  68. except TencentCloudSDKException as err:
  69. maxkb_logger.error(f":Error: {str(err)}: {traceback.format_exc()}")
  70. raise err