tts.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. from typing import Dict
  2. from openai import OpenAI
  3. from common.config.tokenizer_manage_config import TokenizerManage
  4. from common.utils.common import _remove_empty_lines
  5. from models_provider.base_model_provider import MaxKBBaseModel
  6. from models_provider.impl.base_tts import BaseTextToSpeech
  7. from django.utils.translation import gettext as _
  8. def custom_get_token_ids(text: str):
  9. tokenizer = TokenizerManage.get_tokenizer()
  10. return tokenizer.encode(text)
  11. class XInferenceTextToSpeech(MaxKBBaseModel, BaseTextToSpeech):
  12. api_base: str
  13. api_key: str
  14. model: str
  15. params: dict
  16. def __init__(self, **kwargs):
  17. super().__init__(**kwargs)
  18. self.api_key = kwargs.get('api_key')
  19. self.api_base = kwargs.get('api_base')
  20. self.model = kwargs.get('model')
  21. self.params = kwargs.get('params')
  22. @staticmethod
  23. def is_cache_model():
  24. return False
  25. @staticmethod
  26. def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
  27. optional_params = {'params': {'voice': '中文女'}}
  28. for key, value in model_kwargs.items():
  29. if key not in ['model_id', 'use_local', 'streaming']:
  30. optional_params['params'][key] = value
  31. return XInferenceTextToSpeech(
  32. model=model_name,
  33. api_base=model_credential.get('api_base'),
  34. api_key=model_credential.get('api_key'),
  35. **optional_params,
  36. )
  37. def check_auth(self):
  38. self.text_to_speech(_('Hello'))
  39. def text_to_speech(self, text):
  40. client = OpenAI(
  41. base_url=self.api_base,
  42. api_key=self.api_key
  43. )
  44. # ['中文女', '中文男', '日语男', '粤语女', '英文女', '英文男', '韩语女']
  45. text = _remove_empty_lines(text)
  46. with client.audio.speech.with_streaming_response.create(
  47. model=self.model,
  48. input=text,
  49. **self.params
  50. ) as response:
  51. return response.read()