| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 |
- # coding=utf-8
- """
- @project: maxkb
- @Author:虎
- @file: ollama_model_provider.py
- @date:2024/3/5 17:23
- @desc:
- """
- import json
- import os
- from typing import Dict, Iterator
- from urllib.parse import urlparse, ParseResult
- import requests
- from common.utils.common import get_file_content
- from models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, ModelTypeConst, \
- BaseModelCredential, DownModelChunk, DownModelChunkStatus, ValidCode, ModelInfoManage
- from models_provider.impl.ollama_model_provider.credential.embedding import OllamaEmbeddingModelCredential
- from models_provider.impl.ollama_model_provider.credential.image import OllamaImageModelCredential
- from models_provider.impl.ollama_model_provider.credential.llm import OllamaLLMModelCredential
- from models_provider.impl.ollama_model_provider.credential.reranker import OllamaReRankModelCredential
- from models_provider.impl.ollama_model_provider.model.embedding import OllamaEmbedding
- from models_provider.impl.ollama_model_provider.model.image import OllamaImage
- from models_provider.impl.ollama_model_provider.model.llm import OllamaChatModel
- from models_provider.impl.ollama_model_provider.model.reranker import OllamaReranker
- from maxkb.conf import PROJECT_DIR
- from django.utils.translation import gettext as _
- ""
- ollama_llm_model_credential = OllamaLLMModelCredential()
- model_info_list = [
- ModelInfo(
- 'deepseek-r1:1.5b',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'deepseek-r1:7b',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'deepseek-r1:8b',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'deepseek-r1:14b',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'deepseek-r1:32b',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama2',
- _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 7B pretrained models. Links to other models can be found in the index at the bottom.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama2:13b',
- _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 13B pretrained models. Links to other models can be found in the index at the bottom.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama2:70b',
- _('Llama 2 is a set of pretrained and fine-tuned generative text models ranging in size from 7 billion to 70 billion. This is a repository of 70B pretrained models. Links to other models can be found in the index at the bottom.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama2-chinese:13b',
- _('Since the Chinese alignment of Llama2 itself is weak, we use the Chinese instruction set to fine-tune meta-llama/Llama-2-13b-chat-hf with LoRA so that it has strong Chinese conversation capabilities.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama3:8b',
- _('Meta Llama 3: The most capable public product LLM to date. 8 billion parameters.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'llama3:70b',
- _('Meta Llama 3: The most capable public product LLM to date. 70 billion parameters.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:0.5b',
- _("Compared with previous versions, qwen 1.5 0.5b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 500 million parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:1.8b',
- _("Compared with previous versions, qwen 1.5 1.8b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 1.8 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:4b',
- _("Compared with previous versions, qwen 1.5 4b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 4 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:7b',
- _("Compared with previous versions, qwen 1.5 7b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 7 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:14b',
- _("Compared with previous versions, qwen 1.5 14b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 14 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:32b',
- _("Compared with previous versions, qwen 1.5 32b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 32 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:72b',
- _("Compared with previous versions, qwen 1.5 72b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 72 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen:110b',
- _("Compared with previous versions, qwen 1.5 110b has significantly enhanced the model's alignment with human preferences and its multi-language processing capabilities. Models of all sizes support a context length of 32768 tokens. 110 billion parameters."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2:72b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2:57b-a14b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2:7b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:72b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:32b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:14b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:7b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:1.5b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:0.5b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'qwen2.5:3b-instruct',
- '',
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ModelInfo(
- 'phi3',
- _("Phi-3 Mini is Microsoft's 3.8B parameter, lightweight, state-of-the-art open model."),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel),
- ]
- ollama_embedding_model_credential = OllamaEmbeddingModelCredential()
- ollama_image_model_credential = OllamaImageModelCredential()
- ollama_reranker_model_credential = OllamaReRankModelCredential()
- embedding_model_info = [
- ModelInfo(
- 'nomic-embed-text',
- _('A high-performance open embedding model with a large token context window.'),
- ModelTypeConst.EMBEDDING, ollama_embedding_model_credential, OllamaEmbedding),
- ]
- reranker_model_info = [
- ModelInfo(
- 'linux6200/bge-reranker-v2-m3',
- '',
- ModelTypeConst.RERANKER, ollama_reranker_model_credential, OllamaReranker),
- ]
- image_model_info = [
- ModelInfo(
- 'llava:7b',
- '',
- ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
- ModelInfo(
- 'llava:13b',
- '',
- ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
- ModelInfo(
- 'llava:34b',
- '',
- ModelTypeConst.IMAGE, ollama_image_model_credential, OllamaImage),
- ]
- model_info_manage = (
- ModelInfoManage.builder()
- .append_model_info_list(model_info_list)
- .append_model_info_list(embedding_model_info)
- .append_default_model_info(ModelInfo(
- 'phi3',
- _('Phi-3 Mini is Microsoft\'s 3.8B parameter, lightweight, state-of-the-art open model.'),
- ModelTypeConst.LLM, ollama_llm_model_credential, OllamaChatModel))
- .append_default_model_info(ModelInfo(
- 'nomic-embed-text',
- _('A high-performance open embedding model with a large token context window.'),
- ModelTypeConst.EMBEDDING, ollama_embedding_model_credential, OllamaEmbedding), )
- .append_model_info_list(image_model_info)
- .append_default_model_info(image_model_info[0])
- .append_model_info_list(reranker_model_info)
- .append_default_model_info(reranker_model_info[0])
- .build()
- )
- def get_base_url(url: str):
- parse = urlparse(url)
- result_url = ParseResult(scheme=parse.scheme, netloc=parse.netloc, path=parse.path, params='',
- query='',
- fragment='').geturl()
- return result_url[:-1] if result_url.endswith("/") else result_url
- def convert_to_down_model_chunk(row_str: str, chunk_index: int):
- row = json.loads(row_str)
- status = DownModelChunkStatus.unknown
- digest = ""
- progress = 100
- if 'status' in row:
- digest = row.get('status')
- if row.get('status') == 'success':
- status = DownModelChunkStatus.success
- if row.get('status').__contains__("pulling"):
- progress = 0
- status = DownModelChunkStatus.pulling
- if 'total' in row and 'completed' in row and row.get('total'):
- progress = (row.get('completed') / row.get('total') * 100)
- elif 'error' in row:
- status = DownModelChunkStatus.error
- digest = row.get('error')
- return DownModelChunk(status=status, digest=digest, progress=progress, details=row_str, index=chunk_index)
- def convert(response_stream) -> Iterator[DownModelChunk]:
- temp = ""
- index = 0
- for c in response_stream:
- index += 1
- row_content = c.decode()
- temp += row_content
- if row_content.endswith('}') or row_content.endswith('\n'):
- rows = [t for t in temp.split("\n") if len(t) > 0]
- for row in rows:
- yield convert_to_down_model_chunk(row, index)
- temp = ""
- if len(temp) > 0:
- rows = [t for t in temp.split("\n") if len(t) > 0]
- for row in rows:
- yield convert_to_down_model_chunk(row, index)
- class OllamaModelProvider(IModelProvider):
- def get_model_info_manage(self):
- return model_info_manage
- def get_model_provide_info(self):
- return ModelProvideInfo(provider='model_ollama_provider', name='Ollama', icon=get_file_content(
- os.path.join(PROJECT_DIR, "apps", 'models_provider', 'impl', 'ollama_model_provider', 'icon',
- 'ollama_icon_svg')))
- @staticmethod
- def get_base_model_list(api_base):
- base_url = get_base_url(api_base)
- r = requests.request(method="GET", url=f"{base_url}/api/tags", timeout=5)
- r.raise_for_status()
- return r.json()
- def down_model(self, model_type: str, model_name, model_credential: Dict[str, object]) -> Iterator[DownModelChunk]:
- api_base = model_credential.get('api_base', '')
- base_url = get_base_url(api_base)
- r = requests.request(
- method="POST",
- url=f"{base_url}/api/pull",
- data=json.dumps({"name": model_name}).encode(),
- stream=True,
- )
- return convert(r)
|