embedding.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # coding=utf-8
  2. """
  3. @project: MaxKB
  4. @Author:虎
  5. @file: embedding.py
  6. @date:2024/7/12 17:44
  7. @desc:
  8. """
  9. from typing import Dict, List
  10. import openai
  11. from models_provider.base_model_provider import MaxKBBaseModel
  12. class OpenAIEmbeddingModel(MaxKBBaseModel):
  13. model_name: str
  14. optional_params: dict
  15. def __init__(self, api_key, base_url, model_name: str, optional_params: dict):
  16. self.client = openai.OpenAI(api_key=api_key, base_url=base_url).embeddings
  17. self.model_name = model_name
  18. self.optional_params = optional_params
  19. def is_cache_model(self):
  20. return False
  21. @staticmethod
  22. def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
  23. optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs)
  24. return OpenAIEmbeddingModel(
  25. api_key=model_credential.get('api_key'),
  26. model_name=model_name,
  27. base_url=model_credential.get('api_base'),
  28. optional_params=optional_params
  29. )
  30. def embed_query(self, text: str):
  31. res = self.embed_documents([text])
  32. return res[0]
  33. def embed_documents(
  34. self, texts: List[str], chunk_size: int | None = None
  35. ) -> List[List[float]]:
  36. if len(self.optional_params) > 0:
  37. res = self.client.create(
  38. input=texts, model=self.model_name, encoding_format="float",
  39. **self.optional_params
  40. )
  41. else:
  42. res = self.client.create(input=texts, model=self.model_name, encoding_format="float")
  43. return [e.embedding for e in res.data]