ollama_model_provider.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # coding=utf-8
  2. """
  3. @project: maxkb
  4. @Author:虎
  5. @file: ollama_model_provider.py
  6. @date:2024/3/5 17:23
  7. @desc:
  8. """
  9. import json
  10. import os
  11. from typing import Dict, Iterator
  12. from urllib.parse import urlparse, ParseResult
  13. import requests
  14. from common.utils.common import get_file_content
  15. from models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, ModelTypeConst, \
  16. BaseModelCredential, DownModelChunk, DownModelChunkStatus, ValidCode, ModelInfoManage
  17. from models_provider.impl.ollama_model_provider.credential.embedding import OllamaEmbeddingModelCredential
  18. from models_provider.impl.ollama_model_provider.credential.image import OllamaImageModelCredential
  19. from models_provider.impl.ollama_model_provider.credential.llm import OllamaLLMModelCredential
  20. from models_provider.impl.ollama_model_provider.credential.reranker import OllamaReRankModelCredential
  21. from models_provider.impl.ollama_model_provider.model.embedding import OllamaEmbedding
  22. from models_provider.impl.ollama_model_provider.model.image import OllamaImage
  23. from models_provider.impl.ollama_model_provider.model.llm import OllamaChatModel
  24. from models_provider.impl.ollama_model_provider.model.reranker import OllamaReranker
  25. from maxkb.conf import PROJECT_DIR
  26. from django.utils.translation import gettext as _
  27. ""
  28. ollama_llm_model_credential = OllamaLLMModelCredential()
  29. model_info_list = [
  30. ModelInfo(
  31. 'deepseek-r1:1.5b',
  32. '',
  33. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  34. ModelInfo(
  35. 'deepseek-r1:7b',
  36. '',
  37. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  38. ModelInfo(
  39. 'deepseek-r1:8b',
  40. '',
  41. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  42. ModelInfo(
  43. 'deepseek-r1:14b',
  44. '',
  45. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  46. ModelInfo(
  47. 'deepseek-r1:32b',
  48. '',
  49. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  50. ModelInfo(
  51. 'llama2',
  52. _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 7B pretrained models. Links to other models can be found in the index at the bottom.'),
  53. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  54. ModelInfo(
  55. 'llama2:13b',
  56. _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 13B pretrained models. Links to other models can be found in the index at the bottom.'),
  57. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  58. ModelInfo(
  59. 'llama2:70b',
  60. _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 70B pretrained models. Links to other models can be found in the index at the bottom.'),
  61. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  62. ModelInfo(
  63. 'llama2-chinese:13b',
  64. _('Since the Chinese alignment of Llama2 itself is weak, we use the Chinese instruction set to fine-tune meta-llama/Llama-2-13b-chat-hf with LoRA so that it has strong Chinese conversation capabilities.'),
  65. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  66. ModelInfo(
  67. 'llama3:8b',
  68. _('Meta Llama 3: The most capable public product LLM to date. 8 billion parameters.'),
  69. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  70. ModelInfo(
  71. 'llama3:70b',
  72. _('Meta Llama 3: The most capable public product LLM to date. 70 billion parameters.'),
  73. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  74. ModelInfo(
  75. 'qwen:0.5b',
  76. _("Compared with previous versions, qwen 1.5 0.5b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 500 million parameters."),
  77. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  78. ModelInfo(
  79. 'qwen:1.8b',
  80. _("Compared with previous versions, qwen 1.5 1.8b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 1.8 billion parameters."),
  81. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  82. ModelInfo(
  83. 'qwen:4b',
  84. _("Compared with previous versions, qwen 1.5 4b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 4 billion parameters."),
  85. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  86. ModelInfo(
  87. 'qwen:7b',
  88. _("Compared with previous versions, qwen 1.5 7b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 7 billion parameters."),
  89. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  90. ModelInfo(
  91. 'qwen:14b',
  92. _("Compared with previous versions, qwen 1.5 14b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 14 billion parameters."),
  93. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  94. ModelInfo(
  95. 'qwen:32b',
  96. _("Compared with previous versions, qwen 1.5 32b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 32 billion parameters."),
  97. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  98. ModelInfo(
  99. 'qwen:72b',
  100. _("Compared with previous versions, qwen 1.5 72b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 72 billion parameters."),
  101. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  102. ModelInfo(
  103. 'qwen:110b',
  104. _("Compared with previous versions, qwen 1.5 110b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 110 billion parameters."),
  105. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  106. ModelInfo(
  107. 'qwen2:72b-instruct',
  108. '',
  109. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  110. ModelInfo(
  111. 'qwen2:57b-a14b-instruct',
  112. '',
  113. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  114. ModelInfo(
  115. 'qwen2:7b-instruct',
  116. '',
  117. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  118. ModelInfo(
  119. 'qwen2.5:72b-instruct',
  120. '',
  121. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  122. ModelInfo(
  123. 'qwen2.5:32b-instruct',
  124. '',
  125. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  126. ModelInfo(
  127. 'qwen2.5:14b-instruct',
  128. '',
  129. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  130. ModelInfo(
  131. 'qwen2.5:7b-instruct',
  132. '',
  133. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  134. ModelInfo(
  135. 'qwen2.5:1.5b-instruct',
  136. '',
  137. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  138. ModelInfo(
  139. 'qwen2.5:0.5b-instruct',
  140. '',
  141. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  142. ModelInfo(
  143. 'qwen2.5:3b-instruct',
  144. '',
  145. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  146. ModelInfo(
  147. 'phi3',
  148. _("Phi-3 Mini is Microsoft's 3.8B parameter, lightweight, state-of-the-art open model."),
  149. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
  150. ]
  151. ollama_embedding_model_credential = OllamaEmbeddingModelCredential()
  152. ollama_image_model_credential = OllamaImageModelCredential()
  153. ollama_reranker_model_credential = OllamaReRankModelCredential()
  154. embedding_model_info = [
  155. ModelInfo(
  156. 'nomic-embed-text',
  157. _('A high-performance open embedding model with a large token context window.'),
  158. ModelTypeConst.EMBEDDING, ollama_embedding_model_credential, OllamaEmbedding),
  159. ]
  160. reranker_model_info = [
  161. ModelInfo(
  162. 'linux6200/bge-reranker-v2-m3',
  163. '',
  164. ModelTypeConst.RERANKER, ollama_reranker_model_credential, OllamaReranker),
  165. ]
  166. image_model_info = [
  167. ModelInfo(
  168. 'llava:7b',
  169. '',
  170. ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
  171. ModelInfo(
  172. 'llava:13b',
  173. '',
  174. ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
  175. ModelInfo(
  176. 'llava:34b',
  177. '',
  178. ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
  179. ]
  180. model_info_manage = (
  181. ModelInfoManage.builder()
  182. .append_model_info_list(model_info_list)
  183. .append_model_info_list(embedding_model_info)
  184. .append_default_model_info(ModelInfo(
  185. 'phi3',
  186. _('Phi-3 Mini is Microsoft\'s 3.8B parameter, lightweight, state-of-the-art open model.'),
  187. ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel))
  188. .append_default_model_info(ModelInfo(
  189. 'nomic-embed-text',
  190. _('A high-performance open embedding model with a large token context window.'),
  191. ModelTypeConst.EMBEDDING, ollama_embedding_model_credential, OllamaEmbedding), )
  192. .append_model_info_list(image_model_info)
  193. .append_default_model_info(image_model_info[0])
  194. .append_model_info_list(reranker_model_info)
  195. .append_default_model_info(reranker_model_info[0])
  196. .build()
  197. )
  198. def get_base_url(url: str):
  199. parse = urlparse(url)
  200. result_url = ParseResult(scheme=parse.scheme, netloc=parse.netloc, path=parse.path, params='',
  201. query='',
  202. fragment='').geturl()
  203. return result_url[:-1] if result_url.endswith("/") else result_url
  204. def convert_to_down_model_chunk(row_str: str, chunk_index: int):
  205. row = json.loads(row_str)
  206. status = DownModelChunkStatus.unknown
  207. digest = ""
  208. progress = 100
  209. if 'status' in row:
  210. digest = row.get('status')
  211. if row.get('status') == 'success':
  212. status = DownModelChunkStatus.success
  213. if row.get('status').__contains__("pulling"):
  214. progress = 0
  215. status = DownModelChunkStatus.pulling
  216. if 'total' in row and 'completed' in row and row.get('total'):
  217. progress = (row.get('completed') / row.get('total') * 100)
  218. elif 'error' in row:
  219. status = DownModelChunkStatus.error
  220. digest = row.get('error')
  221. return DownModelChunk(status=status, digest=digest, progress=progress, details=row_str, index=chunk_index)
  222. def convert(response_stream) -> Iterator[DownModelChunk]:
  223. temp = ""
  224. index = 0
  225. for c in response_stream:
  226. index += 1
  227. row_content = c.decode()
  228. temp += row_content
  229. if row_content.endswith('}') or row_content.endswith('\n'):
  230. rows = [t for t in temp.split("\n") if len(t) > 0]
  231. for row in rows:
  232. yield convert_to_down_model_chunk(row, index)
  233. temp = ""
  234. if len(temp) > 0:
  235. rows = [t for t in temp.split("\n") if len(t) > 0]
  236. for row in rows:
  237. yield convert_to_down_model_chunk(row, index)
  238. class OllamaModelProvider(IModelProvider):
  239. def get_model_info_manage(self):
  240. return model_info_manage
  241. def get_model_provide_info(self):
  242. return ModelProvideInfo(provider='model_ollama_provider', name='Ollama', icon=get_file_content(
  243. os.path.join(PROJECT_DIR, "apps", 'models_provider', 'impl', 'ollama_model_provider', 'icon',
  244. 'ollama_icon_svg')))
  245. @staticmethod
  246. def get_base_model_list(api_base):
  247. base_url = get_base_url(api_base)
  248. r = requests.request(method="GET", url=f"{base_url}/api/tags", timeout=5)
  249. r.raise_for_status()
  250. return r.json()
  251. def down_model(self, model_type: str, model_name, model_credential: Dict[str, object]) -> Iterator[DownModelChunk]:
  252. api_base = model_credential.get('api_base', '')
  253. base_url = get_base_url(api_base)
  254. r = requests.request(
  255. method="POST",
  256. url=f"{base_url}/api/pull",
  257. data=json.dumps({"name": model_name}).encode(),
  258. stream=True,
  259. )
  260. return convert(r)