xinference_model_provider.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. # coding=utf-8
  2. import os
  3. from urllib.parse import urlparse, ParseResult
  4. import requests
  5. from common.utils.common import get_file_content
  6. from models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, ModelTypeConst, \
  7. ModelInfoManage
  8. from models_provider.impl.xinference_model_provider.credential.embedding import \
  9. XinferenceEmbeddingModelCredential
  10. from models_provider.impl.xinference_model_provider.credential.image import XinferenceImageModelCredential
  11. from models_provider.impl.xinference_model_provider.credential.llm import XinferenceLLMModelCredential
  12. from models_provider.impl.xinference_model_provider.credential.reranker import XInferenceRerankerModelCredential
  13. from models_provider.impl.xinference_model_provider.credential.stt import XInferenceSTTModelCredential
  14. from models_provider.impl.xinference_model_provider.credential.tti import XinferenceTextToImageModelCredential
  15. from models_provider.impl.xinference_model_provider.credential.tts import XInferenceTTSModelCredential
  16. from models_provider.impl.xinference_model_provider.model.embedding import XinferenceEmbedding
  17. from models_provider.impl.xinference_model_provider.model.image import XinferenceImage
  18. from models_provider.impl.xinference_model_provider.model.llm import XinferenceChatModel
  19. from models_provider.impl.xinference_model_provider.model.reranker import XInferenceReranker
  20. from models_provider.impl.xinference_model_provider.model.stt import XInferenceSpeechToText
  21. from models_provider.impl.xinference_model_provider.model.tti import XinferenceTextToImage
  22. from models_provider.impl.xinference_model_provider.model.tts import XInferenceTextToSpeech
  23. from maxkb.conf import PROJECT_DIR
  24. from django.utils.translation import gettext as _
  25. xinference_llm_model_credential = XinferenceLLMModelCredential()
  26. xinference_stt_model_credential = XInferenceSTTModelCredential()
  27. xinference_tts_model_credential = XInferenceTTSModelCredential()
  28. xinference_image_model_credential = XinferenceImageModelCredential()
  29. xinference_tti_model_credential = XinferenceTextToImageModelCredential()
  30. model_info_list = [
  31. ModelInfo(
  32. 'code-llama',
  33. _('Code Llama is a language model specifically designed for code generation.'),
  34. ModelTypeConst.LLM,
  35. xinference_llm_model_credential,
  36. XinferenceChatModel
  37. ),
  38. ModelInfo(
  39. 'code-llama-instruct',
  40. _('''
  41. Code Llama Instruct is a fine-tuned version of Code Llama's instructions, designed to perform specific tasks.
  42. '''),
  43. ModelTypeConst.LLM,
  44. xinference_llm_model_credential,
  45. XinferenceChatModel
  46. ),
  47. ModelInfo(
  48. 'code-llama-python',
  49. _('Code Llama Python is a language model specifically designed for Python code generation.'),
  50. ModelTypeConst.LLM,
  51. xinference_llm_model_credential,
  52. XinferenceChatModel
  53. ),
  54. ModelInfo(
  55. 'codeqwen1.5',
  56. _('CodeQwen 1.5 is a language model for code generation with high performance.'),
  57. ModelTypeConst.LLM,
  58. xinference_llm_model_credential,
  59. XinferenceChatModel
  60. ),
  61. ModelInfo(
  62. 'codeqwen1.5-chat',
  63. _('CodeQwen 1.5 Chat is a chat model version of CodeQwen 1.5.'),
  64. ModelTypeConst.LLM,
  65. xinference_llm_model_credential,
  66. XinferenceChatModel
  67. ),
  68. ModelInfo(
  69. 'deepseek',
  70. _('Deepseek is a large-scale language model with 13 billion parameters.'),
  71. ModelTypeConst.LLM,
  72. xinference_llm_model_credential,
  73. XinferenceChatModel
  74. ),
  75. ModelInfo(
  76. 'deepseek-chat',
  77. '',
  78. ModelTypeConst.LLM,
  79. xinference_llm_model_credential,
  80. XinferenceChatModel
  81. ),
  82. ModelInfo(
  83. 'deepseek-coder',
  84. '',
  85. ModelTypeConst.LLM,
  86. xinference_llm_model_credential,
  87. XinferenceChatModel
  88. ),
  89. ModelInfo(
  90. 'deepseek-coder-instruct',
  91. '',
  92. ModelTypeConst.LLM,
  93. xinference_llm_model_credential,
  94. XinferenceChatModel
  95. ),
  96. ModelInfo(
  97. 'deepseek-vl-chat',
  98. '',
  99. ModelTypeConst.LLM,
  100. xinference_llm_model_credential,
  101. XinferenceChatModel
  102. ),
  103. ModelInfo(
  104. 'gpt-3.5-turbo',
  105. '',
  106. ModelTypeConst.LLM,
  107. xinference_llm_model_credential,
  108. XinferenceChatModel
  109. ),
  110. ModelInfo(
  111. 'gpt-4',
  112. '',
  113. ModelTypeConst.LLM,
  114. xinference_llm_model_credential,
  115. XinferenceChatModel
  116. ),
  117. ModelInfo(
  118. 'gpt-4-vision-preview',
  119. '',
  120. ModelTypeConst.LLM,
  121. xinference_llm_model_credential,
  122. XinferenceChatModel
  123. ),
  124. ModelInfo(
  125. 'gpt4all',
  126. '',
  127. ModelTypeConst.LLM,
  128. xinference_llm_model_credential,
  129. XinferenceChatModel
  130. ),
  131. ModelInfo(
  132. 'llama2',
  133. '',
  134. ModelTypeConst.LLM,
  135. xinference_llm_model_credential,
  136. XinferenceChatModel
  137. ),
  138. ModelInfo(
  139. 'llama2-chat',
  140. '',
  141. ModelTypeConst.LLM,
  142. xinference_llm_model_credential,
  143. XinferenceChatModel
  144. ),
  145. ModelInfo(
  146. 'llama2-chat-32k',
  147. '',
  148. ModelTypeConst.LLM,
  149. xinference_llm_model_credential,
  150. XinferenceChatModel
  151. ),
  152. ModelInfo(
  153. 'qwen',
  154. '',
  155. ModelTypeConst.LLM,
  156. xinference_llm_model_credential,
  157. XinferenceChatModel
  158. ),
  159. ModelInfo(
  160. 'qwen-chat',
  161. '',
  162. ModelTypeConst.LLM,
  163. xinference_llm_model_credential,
  164. XinferenceChatModel
  165. ),
  166. ModelInfo(
  167. 'qwen-chat-32k',
  168. '',
  169. ModelTypeConst.LLM,
  170. xinference_llm_model_credential,
  171. XinferenceChatModel
  172. ),
  173. ModelInfo(
  174. 'qwen-code',
  175. '',
  176. ModelTypeConst.LLM,
  177. xinference_llm_model_credential,
  178. XinferenceChatModel
  179. ),
  180. ModelInfo(
  181. 'qwen-code-chat',
  182. '',
  183. ModelTypeConst.LLM,
  184. xinference_llm_model_credential,
  185. XinferenceChatModel
  186. ),
  187. ModelInfo(
  188. 'qwen-vl',
  189. '',
  190. ModelTypeConst.LLM,
  191. xinference_llm_model_credential,
  192. XinferenceChatModel
  193. ),
  194. ModelInfo(
  195. 'qwen-vl-chat',
  196. '',
  197. ModelTypeConst.LLM,
  198. xinference_llm_model_credential,
  199. XinferenceChatModel
  200. ),
  201. ModelInfo(
  202. 'qwen2-instruct',
  203. '',
  204. ModelTypeConst.LLM,
  205. xinference_llm_model_credential,
  206. XinferenceChatModel
  207. ),
  208. ModelInfo(
  209. 'qwen2-72b-instruct',
  210. '',
  211. ModelTypeConst.LLM,
  212. xinference_llm_model_credential,
  213. XinferenceChatModel
  214. ),
  215. ModelInfo(
  216. 'qwen2-57b-a14b-instruct',
  217. '',
  218. ModelTypeConst.LLM,
  219. xinference_llm_model_credential,
  220. XinferenceChatModel
  221. ),
  222. ModelInfo(
  223. 'qwen2-7b-instruct',
  224. '',
  225. ModelTypeConst.LLM,
  226. xinference_llm_model_credential,
  227. XinferenceChatModel
  228. ),
  229. ModelInfo(
  230. 'qwen2.5-72b-instruct',
  231. '',
  232. ModelTypeConst.LLM,
  233. xinference_llm_model_credential,
  234. XinferenceChatModel
  235. ),
  236. ModelInfo(
  237. 'qwen2.5-32b-instruct',
  238. '',
  239. ModelTypeConst.LLM,
  240. xinference_llm_model_credential,
  241. XinferenceChatModel
  242. ),
  243. ModelInfo(
  244. 'qwen2.5-14b-instruct',
  245. '',
  246. ModelTypeConst.LLM,
  247. xinference_llm_model_credential,
  248. XinferenceChatModel
  249. ),
  250. ModelInfo(
  251. 'qwen2.5-7b-instruct',
  252. '',
  253. ModelTypeConst.LLM,
  254. xinference_llm_model_credential,
  255. XinferenceChatModel
  256. ),
  257. ModelInfo(
  258. 'qwen2.5-1.5b-instruct',
  259. '',
  260. ModelTypeConst.LLM,
  261. xinference_llm_model_credential,
  262. XinferenceChatModel
  263. ),
  264. ModelInfo(
  265. 'qwen2.5-0.5b-instruct',
  266. '',
  267. ModelTypeConst.LLM,
  268. xinference_llm_model_credential,
  269. XinferenceChatModel
  270. ),
  271. ModelInfo(
  272. 'qwen2.5-3b-instruct',
  273. '',
  274. ModelTypeConst.LLM,
  275. xinference_llm_model_credential,
  276. XinferenceChatModel
  277. ),
  278. ModelInfo(
  279. 'minicpm-llama3-v-2_5',
  280. '',
  281. ModelTypeConst.LLM,
  282. xinference_llm_model_credential,
  283. XinferenceChatModel
  284. ),
  285. ]
  286. voice_model_info = [
  287. ModelInfo(
  288. 'CosyVoice-300M-SFT',
  289. '',
  290. ModelTypeConst.TTS,
  291. xinference_tts_model_credential,
  292. XInferenceTextToSpeech
  293. ),
  294. ModelInfo(
  295. 'Belle-whisper-large-v3-zh',
  296. '',
  297. ModelTypeConst.STT,
  298. xinference_stt_model_credential,
  299. XInferenceSpeechToText
  300. ),
  301. ]
  302. image_model_info = [
  303. ModelInfo(
  304. 'qwen-vl-chat',
  305. '',
  306. ModelTypeConst.IMAGE,
  307. xinference_image_model_credential,
  308. XinferenceImage
  309. ),
  310. ModelInfo(
  311. 'deepseek-vl-chat',
  312. '',
  313. ModelTypeConst.IMAGE,
  314. xinference_image_model_credential,
  315. XinferenceImage
  316. ),
  317. ModelInfo(
  318. 'yi-vl-chat',
  319. '',
  320. ModelTypeConst.IMAGE,
  321. xinference_image_model_credential,
  322. XinferenceImage
  323. ),
  324. ModelInfo(
  325. 'omnilmm',
  326. '',
  327. ModelTypeConst.IMAGE,
  328. xinference_image_model_credential,
  329. XinferenceImage
  330. ),
  331. ModelInfo(
  332. 'internvl-chat',
  333. '',
  334. ModelTypeConst.IMAGE,
  335. xinference_image_model_credential,
  336. XinferenceImage
  337. ),
  338. ModelInfo(
  339. 'cogvlm2',
  340. '',
  341. ModelTypeConst.IMAGE,
  342. xinference_image_model_credential,
  343. XinferenceImage
  344. ),
  345. ModelInfo(
  346. 'MiniCPM-Llama3-V-2_5',
  347. '',
  348. ModelTypeConst.IMAGE,
  349. xinference_image_model_credential,
  350. XinferenceImage
  351. ),
  352. ModelInfo(
  353. 'GLM-4V',
  354. '',
  355. ModelTypeConst.IMAGE,
  356. xinference_image_model_credential,
  357. XinferenceImage
  358. ),
  359. ModelInfo(
  360. 'MiniCPM-V-2.6',
  361. '',
  362. ModelTypeConst.IMAGE,
  363. xinference_image_model_credential,
  364. XinferenceImage
  365. ),
  366. ModelInfo(
  367. 'internvl2',
  368. '',
  369. ModelTypeConst.IMAGE,
  370. xinference_image_model_credential,
  371. XinferenceImage
  372. ),
  373. ModelInfo(
  374. 'qwen2-vl-instruct',
  375. '',
  376. ModelTypeConst.IMAGE,
  377. xinference_image_model_credential,
  378. XinferenceImage
  379. ),
  380. ModelInfo(
  381. 'llama-3.2-vision',
  382. '',
  383. ModelTypeConst.IMAGE,
  384. xinference_image_model_credential,
  385. XinferenceImage
  386. ),
  387. ModelInfo(
  388. 'llama-3.2-vision-instruct',
  389. '',
  390. ModelTypeConst.IMAGE,
  391. xinference_image_model_credential,
  392. XinferenceImage
  393. ),
  394. ModelInfo(
  395. 'glm-edge-v',
  396. '',
  397. ModelTypeConst.IMAGE,
  398. xinference_image_model_credential,
  399. XinferenceImage
  400. ),
  401. ]
  402. tti_model_info = [
  403. ModelInfo(
  404. 'sd-turbo',
  405. '',
  406. ModelTypeConst.TTI,
  407. xinference_tti_model_credential,
  408. XinferenceTextToImage
  409. ),
  410. ModelInfo(
  411. 'sdxl-turbo',
  412. '',
  413. ModelTypeConst.TTI,
  414. xinference_tti_model_credential,
  415. XinferenceTextToImage
  416. ),
  417. ModelInfo(
  418. 'stable-diffusion-v1.5',
  419. '',
  420. ModelTypeConst.TTI,
  421. xinference_tti_model_credential,
  422. XinferenceTextToImage
  423. ),
  424. ModelInfo(
  425. 'stable-diffusion-xl-base-1.0',
  426. '',
  427. ModelTypeConst.TTI,
  428. xinference_tti_model_credential,
  429. XinferenceTextToImage
  430. ),
  431. ModelInfo(
  432. 'sd3-medium',
  433. '',
  434. ModelTypeConst.TTI,
  435. xinference_tti_model_credential,
  436. XinferenceTextToImage
  437. ),
  438. ModelInfo(
  439. 'FLUX.1-schnell',
  440. '',
  441. ModelTypeConst.TTI,
  442. xinference_tti_model_credential,
  443. XinferenceTextToImage
  444. ),
  445. ModelInfo(
  446. 'FLUX.1-dev',
  447. '',
  448. ModelTypeConst.TTI,
  449. xinference_tti_model_credential,
  450. XinferenceTextToImage
  451. ),
  452. ]
  453. xinference_embedding_model_credential = XinferenceEmbeddingModelCredential()
  454. # 生成embedding_model_info列表
  455. embedding_model_info = [
  456. ModelInfo('bce-embedding-base_v1', '', ModelTypeConst.EMBEDDING,
  457. xinference_embedding_model_credential, XinferenceEmbedding),
  458. ModelInfo('bge-base-en', '', ModelTypeConst.EMBEDDING,
  459. xinference_embedding_model_credential, XinferenceEmbedding),
  460. ModelInfo('bge-base-en-v1.5', '', ModelTypeConst.EMBEDDING,
  461. xinference_embedding_model_credential, XinferenceEmbedding),
  462. ModelInfo('bge-base-zh', '', ModelTypeConst.EMBEDDING,
  463. xinference_embedding_model_credential, XinferenceEmbedding),
  464. ModelInfo('bge-base-zh-v1.5', '', ModelTypeConst.EMBEDDING,
  465. xinference_embedding_model_credential, XinferenceEmbedding),
  466. ModelInfo('bge-large-en', '', ModelTypeConst.EMBEDDING,
  467. xinference_embedding_model_credential, XinferenceEmbedding),
  468. ModelInfo('bge-large-en-v1.5', '', ModelTypeConst.EMBEDDING,
  469. xinference_embedding_model_credential, XinferenceEmbedding),
  470. ModelInfo('bge-large-zh', '', ModelTypeConst.EMBEDDING,
  471. xinference_embedding_model_credential, XinferenceEmbedding),
  472. ModelInfo('bge-large-zh-noinstruct', '', ModelTypeConst.EMBEDDING,
  473. xinference_embedding_model_credential, XinferenceEmbedding),
  474. ModelInfo('bge-large-zh-v1.5', '', ModelTypeConst.EMBEDDING,
  475. xinference_embedding_model_credential, XinferenceEmbedding),
  476. ModelInfo('bge-m3', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  477. XinferenceEmbedding),
  478. ModelInfo('bge-small-en-v1.5', '', ModelTypeConst.EMBEDDING,
  479. xinference_embedding_model_credential, XinferenceEmbedding),
  480. ModelInfo('bge-small-zh', '', ModelTypeConst.EMBEDDING,
  481. xinference_embedding_model_credential, XinferenceEmbedding),
  482. ModelInfo('bge-small-zh-v1.5', '', ModelTypeConst.EMBEDDING,
  483. xinference_embedding_model_credential, XinferenceEmbedding),
  484. ModelInfo('e5-large-v2', '', ModelTypeConst.EMBEDDING,
  485. xinference_embedding_model_credential, XinferenceEmbedding),
  486. ModelInfo('gte-base', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  487. XinferenceEmbedding),
  488. ModelInfo('gte-large', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  489. XinferenceEmbedding),
  490. ModelInfo('jina-embeddings-v2-base-en', '', ModelTypeConst.EMBEDDING,
  491. xinference_embedding_model_credential, XinferenceEmbedding),
  492. ModelInfo('jina-embeddings-v2-base-zh', '', ModelTypeConst.EMBEDDING,
  493. xinference_embedding_model_credential, XinferenceEmbedding),
  494. ModelInfo('jina-embeddings-v2-small-en', '', ModelTypeConst.EMBEDDING,
  495. xinference_embedding_model_credential, XinferenceEmbedding),
  496. ModelInfo('m3e-base', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  497. XinferenceEmbedding),
  498. ModelInfo('m3e-large', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  499. XinferenceEmbedding),
  500. ModelInfo('m3e-small', '', ModelTypeConst.EMBEDDING, xinference_embedding_model_credential,
  501. XinferenceEmbedding),
  502. ModelInfo('multilingual-e5-large', '', ModelTypeConst.EMBEDDING,
  503. xinference_embedding_model_credential, XinferenceEmbedding),
  504. ModelInfo('text2vec-base-chinese', '', ModelTypeConst.EMBEDDING,
  505. xinference_embedding_model_credential, XinferenceEmbedding),
  506. ModelInfo('text2vec-base-chinese-paraphrase', '', ModelTypeConst.EMBEDDING,
  507. xinference_embedding_model_credential, XinferenceEmbedding),
  508. ModelInfo('text2vec-base-chinese-sentence', '', ModelTypeConst.EMBEDDING,
  509. xinference_embedding_model_credential, XinferenceEmbedding),
  510. ModelInfo('text2vec-base-multilingual', '', ModelTypeConst.EMBEDDING,
  511. xinference_embedding_model_credential, XinferenceEmbedding),
  512. ModelInfo('text2vec-large-chinese', '', ModelTypeConst.EMBEDDING,
  513. xinference_embedding_model_credential, XinferenceEmbedding),
  514. ]
  515. rerank_list = [ModelInfo('bce-reranker-base_v1',
  516. '',
  517. ModelTypeConst.RERANKER, XInferenceRerankerModelCredential(), XInferenceReranker)]
  518. model_info_manage = (
  519. ModelInfoManage.builder()
  520. .append_model_info_list(model_info_list)
  521. .append_model_info_list(voice_model_info)
  522. .append_default_model_info(voice_model_info[0])
  523. .append_default_model_info(voice_model_info[1])
  524. .append_default_model_info(ModelInfo('phi3',
  525. '',
  526. ModelTypeConst.LLM, xinference_llm_model_credential,
  527. XinferenceChatModel))
  528. .append_model_info_list(embedding_model_info)
  529. .append_default_model_info(ModelInfo('',
  530. '',
  531. ModelTypeConst.EMBEDDING,
  532. xinference_embedding_model_credential, XinferenceEmbedding))
  533. .append_model_info_list(rerank_list)
  534. .append_model_info_list(image_model_info)
  535. .append_default_model_info(image_model_info[0])
  536. .append_model_info_list(tti_model_info)
  537. .append_default_model_info(tti_model_info[0])
  538. .append_default_model_info(rerank_list[0])
  539. .build()
  540. )
  541. def get_base_url(url: str):
  542. parse = urlparse(url)
  543. result_url = ParseResult(scheme=parse.scheme, netloc=parse.netloc, path=parse.path, params='',
  544. query='',
  545. fragment='').geturl()
  546. return result_url[:-1] if result_url.endswith("/") else result_url
  547. class XinferenceModelProvider(IModelProvider):
  548. def get_model_info_manage(self):
  549. return model_info_manage
  550. def get_model_provide_info(self):
  551. return ModelProvideInfo(provider='model_xinference_provider', name='Xorbits Inference', icon=get_file_content(
  552. os.path.join(PROJECT_DIR, "apps", 'models_provider', 'impl', 'xinference_model_provider', 'icon',
  553. 'xinference_icon_svg')))
  554. @staticmethod
  555. def get_base_model_list(api_base, api_key, model_type):
  556. base_url = get_base_url(api_base)
  557. base_url = base_url if base_url.endswith('/v1') else (base_url + '/v1')
  558. headers = {}
  559. if api_key:
  560. headers['Authorization'] = f"Bearer {api_key}"
  561. r = requests.request(method="GET", url=f"{base_url}/models", headers=headers, timeout=5)
  562. r.raise_for_status()
  563. model_list = r.json().get('data')
  564. return [model for model in model_list if model.get('model_type') == model_type]
  565. @staticmethod
  566. def get_model_info_by_name(model_list, model_name):
  567. if model_list is None:
  568. return []
  569. return [model for model in model_list if model.get('model_name') == model_name or model.get('id') == model_name]