llm.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # coding=utf-8
  2. """
  3. @project: maxkb
  4. @Author:虎
  5. @file: llm.py
  6. @date:2024/4/18 15:28
  7. @desc:
  8. """
  9. from typing import List, Dict
  10. from langchain_anthropic import ChatAnthropic
  11. from langchain_core.messages import BaseMessage, get_buffer_string
  12. from common.config.tokenizer_manage_config import TokenizerManage
  13. from models_provider.base_model_provider import MaxKBBaseModel
  14. def custom_get_token_ids(text: str):
  15. tokenizer = TokenizerManage.get_tokenizer()
  16. return tokenizer.encode(text)
  17. class AnthropicChatModel(MaxKBBaseModel, ChatAnthropic):
  18. @staticmethod
  19. def is_cache_model():
  20. return False
  21. @staticmethod
  22. def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs):
  23. optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs)
  24. azure_chat_open_ai = AnthropicChatModel(
  25. model=model_name,
  26. anthropic_api_url=model_credential.get('api_base'),
  27. anthropic_api_key=model_credential.get('api_key'),
  28. **optional_params,
  29. custom_get_token_ids=custom_get_token_ids
  30. )
  31. return azure_chat_open_ai
  32. def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
  33. try:
  34. return super().get_num_tokens_from_messages(messages)
  35. except Exception as e:
  36. tokenizer = TokenizerManage.get_tokenizer()
  37. return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages])
  38. def get_num_tokens(self, text: str) -> int:
  39. try:
  40. return super().get_num_tokens(text)
  41. except Exception as e:
  42. tokenizer = TokenizerManage.get_tokenizer()
  43. return len(tokenizer.encode(text))