hub.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. import json
  2. import logging
  3. import gzip
  4. import os
  5. import tempfile
  6. from typing import Dict, List, Optional, Any
  7. from pathlib import Path
  8. import fnmatch
  9. from threading import Lock
  10. from functools import cache
  11. from huggingface_hub import HfFileSystem
  12. from huggingface_hub.utils import validate_repo_id
  13. from modelscope.hub.api import HubApi
  14. from modelscope.hub.snapshot_download import (
  15. snapshot_download as modelscope_snapshot_download,
  16. )
  17. from transformers import PretrainedConfig
  18. from huggingface_hub import HfApi
  19. from huggingface_hub.utils import GatedRepoError, HfHubHTTPError
  20. from requests.exceptions import HTTPError
  21. from gpustack.config.config import get_global_config
  22. from gpustack.schemas import ModelFile
  23. from gpustack.schemas.models import (
  24. CategoryEnum,
  25. Model,
  26. SourceEnum,
  27. get_mmproj_filename,
  28. )
  29. from gpustack.utils.cache import is_cached, load_cache, save_cache
  30. logger = logging.getLogger(__name__)
  31. LIST_REPO_CACHE_DIR = "repo-skeleton"
  32. # Only root config.json plus Python modules (custom configs / trust_remote_code).
  33. # Avoid '*.json' so large files like tokenizer.json are not pulled during evaluation.
  34. MODELSCOPE_CONFIG_ALLOW_FILE_PATTERN = [
  35. "config.json",
  36. "*.py",
  37. ]
  38. @cache
  39. def get_model_lock(model_id: str) -> Lock:
  40. """Get or create a lock for the given model_id. The model_id is used as the key to store Lock in cache."""
  41. return Lock()
  42. class FileEntry:
  43. def __init__(self, rfilename: str, size: Optional[int] = None):
  44. self.rfilename = rfilename
  45. self.size = size
  46. def get_model_path_and_name(model: ModelFile) -> (str, str):
  47. if model.source == SourceEnum.HUGGING_FACE:
  48. return model.huggingface_repo_id, model.huggingface_filename
  49. elif model.source == SourceEnum.MODEL_SCOPE:
  50. return model.model_scope_model_id, model.model_scope_file_path
  51. elif model.source == SourceEnum.LOCAL_PATH:
  52. return model.local_path, ""
  53. else:
  54. return "", ""
  55. def match_file_and_calculate_size(
  56. files: List[FileEntry],
  57. model: ModelFile,
  58. cache_dir: str,
  59. ) -> (int, List[str]):
  60. """
  61. Match the files and calculate the total size.
  62. Also return the selected files.
  63. """
  64. selected_files = []
  65. match_files = []
  66. extra_files = []
  67. file_path, filename = get_model_path_and_name(model)
  68. extra_filename = get_mmproj_filename(model)
  69. if file_path and not filename:
  70. base_dir = model.local_dir or f"{cache_dir}/{model.source.value}/{file_path}"
  71. return (
  72. sum(f.size for f in files if getattr(f, 'size', None) is not None),
  73. [base_dir],
  74. )
  75. for sibling in files:
  76. if sibling.size is None:
  77. continue
  78. rfilename = sibling.rfilename
  79. if filename and fnmatch.fnmatch(rfilename, filename):
  80. selected_files.append(rfilename)
  81. match_files.append(sibling)
  82. elif extra_filename and fnmatch.fnmatch(rfilename, extra_filename):
  83. extra_files.append(rfilename)
  84. match_files.append(sibling)
  85. best_extra = select_most_suitable_extra_file(extra_files)
  86. if best_extra:
  87. selected_files.append(best_extra)
  88. sum_size = sum(
  89. f.size
  90. for f in match_files
  91. if getattr(f, 'rfilename', '') in selected_files
  92. and getattr(f, 'size', None) is not None
  93. )
  94. if selected_files and model.source in [
  95. SourceEnum.HUGGING_FACE,
  96. SourceEnum.MODEL_SCOPE,
  97. ]:
  98. base_dir = model.local_dir or f"{cache_dir}/{model.source.value}/{file_path}"
  99. selected_files = [os.path.join(base_dir, f) for f in selected_files]
  100. return sum_size, selected_files
  101. def select_most_suitable_extra_file(file_list: List[str]) -> str:
  102. """
  103. Select the most suitable extra file from the list of files.
  104. For example, when matches f16 and f32 mmproj files, prefer f32 over f16
  105. """
  106. if not file_list or len(file_list) == 0:
  107. return ""
  108. _file_list = sorted(file_list, reverse=True)
  109. return _file_list[0]
  110. def match_hugging_face_files(
  111. repo_id: str,
  112. filename: str,
  113. extra_filename: Optional[str] = None,
  114. token: Optional[str] = None,
  115. ) -> List[str]:
  116. validate_repo_id(repo_id)
  117. hffs = HfFileSystem(token=token)
  118. files = [
  119. file["name"] if isinstance(file, dict) else file
  120. for file in hffs.ls(repo_id, recursive=True)
  121. ]
  122. file_list: List[str] = []
  123. for file in files:
  124. rel_path = Path(file).relative_to(repo_id)
  125. file_list.append(rel_path.as_posix())
  126. matching_files = [file for file in file_list if fnmatch.fnmatch(file, filename)] # type: ignore
  127. matching_files = sorted(matching_files)
  128. if extra_filename is None:
  129. return matching_files
  130. extra_matching_files = [
  131. file for file in file_list if fnmatch.fnmatch(file, extra_filename)
  132. ]
  133. extra_file = select_most_suitable_extra_file(extra_matching_files)
  134. if extra_file:
  135. matching_files.append(extra_file)
  136. return matching_files
  137. def is_repo_cached(repo_id: str, source: str) -> bool:
  138. if not repo_id or not source:
  139. return False
  140. cache_key = f"{source}:{repo_id}"
  141. return is_cached(LIST_REPO_CACHE_DIR, cache_key)
  142. def list_repo(
  143. repo_id: str,
  144. source: str,
  145. token: Optional[str] = None,
  146. cache_expiration: Optional[int] = None,
  147. root_dir_only: bool = False,
  148. ) -> List[Dict[str, any]]:
  149. cache_key = f"{source}:{repo_id}:{root_dir_only}"
  150. cached_result, is_succ = load_cache(
  151. LIST_REPO_CACHE_DIR, cache_key, cache_expiration
  152. )
  153. if is_succ:
  154. result = json.loads(cached_result)
  155. if isinstance(result, list):
  156. return result
  157. if source == SourceEnum.HUGGING_FACE:
  158. validate_repo_id(repo_id)
  159. hffs = HfFileSystem(token=token)
  160. file_info = []
  161. for file in hffs.ls(repo_id, recursive=not root_dir_only):
  162. if not isinstance(file, dict):
  163. continue
  164. relative_path = Path(file["name"]).relative_to(repo_id).as_posix()
  165. # If root_only is True, skip files in subdirectories
  166. if root_dir_only and "/" in relative_path:
  167. continue
  168. file_info.append(
  169. {
  170. "name": relative_path,
  171. "size": file["size"],
  172. }
  173. )
  174. elif source == SourceEnum.MODEL_SCOPE:
  175. msapi = HubApi()
  176. files = msapi.get_model_files(repo_id, recursive=not root_dir_only)
  177. file_info = []
  178. for file in files:
  179. file_path = file["Path"]
  180. # If root_only is True, skip files in subdirectories
  181. if root_dir_only and "/" in file_path:
  182. continue
  183. file_info.append(
  184. {
  185. "name": file_path,
  186. "size": file["Size"],
  187. }
  188. )
  189. else:
  190. raise ValueError(f"Invalid source: {source}")
  191. if not save_cache(LIST_REPO_CACHE_DIR, cache_key, json.dumps(file_info)):
  192. logger.info(f"Saved cache {LIST_REPO_CACHE_DIR} {cache_key} fail")
  193. return file_info
  194. def filter_filename(file_path: str, file_paths: List[str]):
  195. matching_paths = [p for p in file_paths if fnmatch.fnmatch(p, file_path)]
  196. matching_paths = sorted(matching_paths)
  197. return matching_paths
  198. def match_model_scope_file_paths(
  199. model_id: str, file_path: str, extra_file_path: Optional[str] = None
  200. ) -> List[str]:
  201. if '/' in file_path:
  202. root, _ = file_path.rsplit('/', 1)
  203. else:
  204. root = None
  205. api = HubApi()
  206. files = api.get_model_files(model_id, root=root, recursive=True)
  207. file_paths = [file["Path"] for file in files]
  208. matching_paths = [p for p in file_paths if fnmatch.fnmatch(p, file_path)]
  209. matching_paths = sorted(matching_paths)
  210. if extra_file_path is None:
  211. return matching_paths
  212. extra_matching_paths = [
  213. p for p in file_paths if fnmatch.fnmatch(p, extra_file_path)
  214. ]
  215. extra_matching_paths = sorted(extra_matching_paths, reverse=True)
  216. if extra_matching_paths:
  217. # Add the first element of the extra matching paths to the matching paths
  218. # For example, when matches f16 and f32 mmproj files, prefer f32 over f16
  219. matching_paths.append(extra_matching_paths[0])
  220. return matching_paths
  221. def read_repo_file_content( # noqa: C901
  222. model: Model,
  223. file_path: str,
  224. token: Optional[str] = None,
  225. ) -> Optional[Dict[str, Any]]:
  226. """
  227. Read a JSON config file from the model's source.
  228. - Hugging Face: uses HfFileSystem to open `{repo_id}/{file_path}`.
  229. - ModelScope: downloads a snapshot matching `file_path` and cleaned automatically after reading locally.
  230. - Local Path: reads from the local directory only (no worker broadcast).
  231. Returns None if the file cannot be found or read.
  232. """
  233. try:
  234. if model.source == SourceEnum.HUGGING_FACE:
  235. hffs = HfFileSystem(token=token)
  236. repo_path = f"{model.huggingface_repo_id}/{file_path}"
  237. with hffs.open(repo_path, "rb") as f:
  238. content = f.read()
  239. if (
  240. content
  241. and content.startswith(b"\x1f\x8b")
  242. and not file_path.endswith(".gz")
  243. ):
  244. try:
  245. content = gzip.decompress(content)
  246. except Exception as e:
  247. logger.warning(
  248. f"Failed to decompress gzip content for {file_path}: {e}"
  249. )
  250. return json.loads(content)
  251. elif model.source == SourceEnum.MODEL_SCOPE:
  252. _cfg = get_global_config()
  253. base_tmp = os.path.join(
  254. (_cfg.cache_dir if _cfg and _cfg.cache_dir else "/tmp"),
  255. "modelscope",
  256. "tempfile",
  257. )
  258. os.makedirs(base_tmp, exist_ok=True)
  259. safe_id = (model.model_scope_model_id or "").replace("/", "__")
  260. with tempfile.TemporaryDirectory(
  261. dir=base_tmp, prefix=f"{safe_id}__"
  262. ) as tmp_dir:
  263. model_dir = modelscope_snapshot_download(
  264. model_id=model.model_scope_model_id,
  265. local_dir=tmp_dir,
  266. allow_patterns=[file_path],
  267. )
  268. candidate = os.path.join(model_dir, file_path)
  269. fp = candidate if os.path.exists(candidate) else None
  270. if not fp:
  271. # Search recursively by base filename for robustness
  272. base_name = os.path.basename(file_path)
  273. for root, _dirs, files in os.walk(model_dir):
  274. if base_name in files:
  275. fp = os.path.join(root, base_name)
  276. break
  277. if not fp:
  278. return None
  279. with open(fp, "r", encoding="utf-8") as f:
  280. return json.load(f)
  281. elif model.source == SourceEnum.LOCAL_PATH:
  282. local_path = model.local_path or ""
  283. if not local_path or not os.path.isdir(local_path):
  284. return None
  285. fp = os.path.join(local_path, file_path)
  286. if os.path.exists(fp):
  287. with open(fp, "r", encoding="utf-8") as f:
  288. return json.load(f)
  289. return None
  290. else:
  291. return None
  292. except Exception as e:
  293. source_key = (
  294. model.huggingface_repo_id
  295. or model.model_scope_model_id
  296. or model.local_path
  297. or "<unknown>"
  298. )
  299. logger.error(f"Failed to read '{file_path}' for source '{source_key}': {e}")
  300. return None
  301. def get_model_weight_size(model: Model, token: Optional[str] = None) -> int:
  302. """
  303. Get the size of the model weights. This is the sum of all the weight files with extensions
  304. .safetensors, .bin, .pt, .pth in the root directory only.
  305. Args:
  306. model: Model to get the weight size for
  307. token: Optional Hugging Face API token
  308. Returns:
  309. int: The size of the model weights
  310. """
  311. weight_file_extensions = (".safetensors", ".bin", ".pt", ".pth")
  312. # consolidated.safetensors is usually a duplicate of other weight files. Exclude by default.
  313. # Example: https://huggingface.co/mistralai/Voxtral-Small-24B-2507
  314. exclude_files = ["consolidated.safetensors"]
  315. if model.source == SourceEnum.HUGGING_FACE:
  316. repo_id = model.huggingface_repo_id
  317. elif model.source == SourceEnum.MODEL_SCOPE:
  318. repo_id = model.model_scope_model_id
  319. else:
  320. raise ValueError(f"Unknown source {model.source}")
  321. repo_file_infos = list_repo(repo_id, model.source, token=token, root_dir_only=True)
  322. return sum(
  323. file.get("size", 0)
  324. for file in repo_file_infos
  325. if (
  326. file.get("name", "").endswith(weight_file_extensions)
  327. and file.get("name", "") not in exclude_files
  328. )
  329. )
  330. def get_diffusion_model_weight_size(model: Model, token: Optional[str] = None) -> int:
  331. """
  332. Get the size of the diffusion model weights.
  333. This is the sum of all weight files with extensions .safetensors, .bin, .pt, or .pth located in the root directory
  334. and also specified in the model_index.
  335. Args:
  336. model: Model to get the weight size for
  337. token: Optional Hugging Face API token
  338. Returns:
  339. int: The size of the model weights
  340. """
  341. weight_file_extensions = (".safetensors", ".bin", ".pt", ".pth")
  342. if model.source == SourceEnum.HUGGING_FACE:
  343. repo_id = model.huggingface_repo_id
  344. elif model.source == SourceEnum.MODEL_SCOPE:
  345. repo_id = model.model_scope_model_id
  346. else:
  347. raise ValueError(f"Unknown source {model.source}")
  348. if not model.categories or CategoryEnum.IMAGE not in model.categories:
  349. raise ValueError("Model is not an image model")
  350. # In different repositories, model files may be stored in different dir.
  351. # However, during runtime, the diffusers loads components from corresponding dir according to the pipeline defined in model_index.json.
  352. # We can follow the definition in model_index.json to determine which file weights should be included in the calculation.
  353. pipeline_data = read_repo_file_content(model, "model_index.json", token=token)
  354. if pipeline_data is None:
  355. raise ValueError(f"No model_index.json in repo {repo_id}")
  356. if isinstance(pipeline_data, list) and len(pipeline_data) > 0:
  357. pipeline_data = pipeline_data[0]
  358. sum_size = 0
  359. repo_file_infos = list_repo(repo_id, model.source, token=token, root_dir_only=False)
  360. for file_info in repo_file_infos:
  361. name_split = file_info.get("name", "").split("/", 1)
  362. if (
  363. len(name_split) <= 1
  364. or pipeline_data.get(name_split[0], None) is None
  365. or not name_split[1].endswith(weight_file_extensions)
  366. ):
  367. continue
  368. sum_size += file_info.get("size", 0)
  369. return sum_size
  370. def get_pretrained_config(model: Model, **kwargs):
  371. """
  372. Get the pretrained config of the model from Hugging Face or ModelScope.
  373. Args:
  374. model: Model to get the pretrained config for.
  375. """
  376. trust_remote_code = False
  377. if (
  378. model.backend_parameters and "--trust-remote-code" in model.backend_parameters
  379. ) or kwargs.get("trust_remote_code"):
  380. trust_remote_code = True
  381. global_config = get_global_config()
  382. pretrained_config = None
  383. if model.source == SourceEnum.HUGGING_FACE:
  384. from transformers import AutoConfig
  385. pretrained_config = AutoConfig.from_pretrained(
  386. model.huggingface_repo_id,
  387. token=global_config.huggingface_token,
  388. trust_remote_code=trust_remote_code,
  389. cache_dir=os.path.join(global_config.cache_dir, "huggingface"),
  390. )
  391. elif model.source == SourceEnum.MODEL_SCOPE:
  392. from modelscope import AutoConfig
  393. model_scope_cache_dir = os.path.join(global_config.cache_dir, "model_scope")
  394. repo_cache_dir = os.path.join(
  395. model_scope_cache_dir, *model.model_scope_model_id.split('/')
  396. )
  397. ms_config_json = os.path.join(repo_cache_dir, "config.json")
  398. # ModelScope's wrapped AutoConfig passes kwargs to HF from_pretrained after
  399. # snapshot_download; cache_dir/local_dir are not forwarded to snapshot_download,
  400. # so downloads must use modelscope_snapshot_download with local_dir here.
  401. # Require config.json (not merely an existing dir) so empty directories still fetch.
  402. if not os.path.isfile(ms_config_json):
  403. logger.info(
  404. f"Downloading ModelScope files required for config to {repo_cache_dir} "
  405. f"(model_id={model.model_scope_model_id})"
  406. )
  407. with get_model_lock(model.model_scope_model_id):
  408. if not os.path.isfile(ms_config_json):
  409. modelscope_snapshot_download(
  410. model_id=model.model_scope_model_id,
  411. local_dir=repo_cache_dir,
  412. allow_file_pattern=MODELSCOPE_CONFIG_ALLOW_FILE_PATTERN,
  413. )
  414. else:
  415. logger.info(f"Repo cache dir: {repo_cache_dir}")
  416. logger.info(
  417. f"Loading pretrained config for ModelScope model {model.model_scope_model_id} "
  418. f"from {repo_cache_dir}"
  419. )
  420. pretrained_config = AutoConfig.from_pretrained(
  421. repo_cache_dir,
  422. trust_remote_code=trust_remote_code,
  423. allow_file_pattern=MODELSCOPE_CONFIG_ALLOW_FILE_PATTERN,
  424. local_files_only=True,
  425. )
  426. logger.info(
  427. f"Successfully loaded pretrained config for ModelScope model {model.model_scope_model_id}"
  428. )
  429. elif model.source == SourceEnum.LOCAL_PATH:
  430. if not os.path.exists(model.local_path):
  431. logger.warning(
  432. f"Local Path: {model.readable_source} is not local to the server node and may reside on a worker node."
  433. )
  434. # Return an empty dict here to facilitate special handling by upstream methods.
  435. return {}
  436. from transformers import AutoConfig
  437. pretrained_config = AutoConfig.from_pretrained(
  438. model.local_path,
  439. trust_remote_code=trust_remote_code,
  440. local_files_only=True,
  441. )
  442. else:
  443. raise ValueError(f"Unsupported model source: {model.source}")
  444. return pretrained_config
  445. def safe_pretrained_config_from_dict(config_dict: Dict) -> PretrainedConfig:
  446. """
  447. Reconstruct a PretrainedConfig from a raw config.json dict.
  448. Works around a transformers v5 bug where __post_init__'s RoPE
  449. standardization reads self.max_position_embeddings before kwargs are
  450. applied as attributes, which raises AttributeError on configs that
  451. still use the legacy `rope_scaling` key.
  452. """
  453. config_dict = dict(config_dict)
  454. rope_scaling = config_dict.pop("rope_scaling", None)
  455. config = PretrainedConfig.from_dict(config_dict)
  456. if rope_scaling is not None:
  457. config.rope_scaling = rope_scaling
  458. return config
  459. # Simplified from vllm.config._get_and_verify_max_len
  460. # Keep in our codebase to avoid dependency on vllm's internal
  461. # APIs which may change unexpectedly.
  462. # https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/config.py#L2453
  463. def get_max_model_len(pretrained_config) -> int: # noqa: C901
  464. """Get the model's maximum length."""
  465. derived_max_model_len = float("inf")
  466. possible_keys = [
  467. # OPT
  468. "max_position_embeddings",
  469. # GPT-2
  470. "n_positions",
  471. # MPT
  472. "max_seq_len",
  473. # ChatGLM2
  474. "seq_length",
  475. # Command-R
  476. "model_max_length",
  477. # Whisper
  478. "max_target_positions",
  479. # Others
  480. "max_sequence_length",
  481. "max_seq_length",
  482. "seq_len",
  483. ]
  484. # Choose the smallest "max_length" from the possible keys.
  485. max_len_key = None
  486. for key in possible_keys:
  487. max_len = getattr(pretrained_config, key, None)
  488. if max_len is not None:
  489. max_len_key = key if max_len < derived_max_model_len else max_len_key
  490. derived_max_model_len = min(derived_max_model_len, max_len)
  491. # If none of the keys were found in the config, use a default and
  492. # log a warning.
  493. if derived_max_model_len == float("inf"):
  494. default_max_len = 2048
  495. logger.warning(
  496. "The model's config.json does not contain any of the following "
  497. "keys to determine the original maximum length of the model: "
  498. "%s. Assuming the model's maximum length is %d.",
  499. possible_keys,
  500. default_max_len,
  501. )
  502. derived_max_model_len = default_max_len
  503. rope_scaling = getattr(pretrained_config, "rope_scaling", None)
  504. if rope_scaling is not None:
  505. if "type" in rope_scaling:
  506. rope_type = rope_scaling["type"]
  507. elif "rope_type" in rope_scaling:
  508. rope_type = rope_scaling["rope_type"]
  509. else:
  510. raise ValueError("rope_scaling must have a 'type' or 'rope_type' key.")
  511. # The correct one should be "longrope", kept "su" here
  512. # to be backward compatible
  513. if rope_type not in ("su", "longrope", "llama3"):
  514. scaling_factor = 1
  515. if "factor" in rope_scaling:
  516. scaling_factor = rope_scaling["factor"]
  517. if rope_type == "yarn":
  518. derived_max_model_len = rope_scaling["original_max_position_embeddings"]
  519. derived_max_model_len *= scaling_factor
  520. logger.debug(f"Derived max model length: {derived_max_model_len}")
  521. return int(derived_max_model_len)
  522. # Similar to https://github.com/vllm-project/vllm/blob/89a77b10846fd96273cce78d86d2556ea582d26e/vllm/transformers_utils/config.py#L978,
  523. # But we don't assert and fail if num_attention_heads is missing.
  524. def get_hf_text_config(config: PretrainedConfig):
  525. """Get the "sub" config relevant to llm for multi modal models.
  526. No op for pure text models.
  527. """
  528. if hasattr(config, "text_config"):
  529. text_config = config.get_text_config()
  530. if text_config is not None:
  531. if isinstance(text_config, dict):
  532. text_config = safe_pretrained_config_from_dict(text_config)
  533. if hasattr(text_config, "num_attention_heads"):
  534. return text_config
  535. return config
  536. quantization_list = [
  537. "-IQ1_",
  538. "-IQ2_",
  539. "-IQ3_",
  540. "-IQ4_",
  541. "-Q2_",
  542. "-Q3_",
  543. "-Q4_",
  544. "-Q5_",
  545. "-Q6_",
  546. "-Q8_",
  547. ]
  548. def get_hugging_face_model_min_gguf_path(
  549. model_id: str,
  550. token: Optional[str] = None,
  551. ) -> Optional[str]:
  552. api = HfApi(token=token)
  553. files = api.list_repo_files(model_id)
  554. gguf_files = sorted([f for f in files if f.endswith(".gguf")])
  555. if not gguf_files:
  556. return None
  557. for quantization in quantization_list:
  558. for gguf_file in gguf_files:
  559. if quantization in gguf_file.upper():
  560. return gguf_file
  561. return gguf_files[0]
  562. def auth_check(
  563. model: Model,
  564. huggingface_token: Optional[str] = None,
  565. ):
  566. if model.source == SourceEnum.HUGGING_FACE:
  567. api = HfApi(token=huggingface_token)
  568. try:
  569. api.auth_check(model.huggingface_repo_id)
  570. except GatedRepoError:
  571. raise Exception(
  572. "Access to the model is restricted. Please set a valid Huggingface token with proper permissions in the GPUStack server configuration."
  573. )
  574. except HfHubHTTPError as e:
  575. if e.response.status_code in [401, 403]:
  576. raise Exception(
  577. "Access to the model is restricted. Please set a valid Huggingface token with proper permissions in the GPUStack server configuration."
  578. )
  579. if model.source == SourceEnum.MODEL_SCOPE:
  580. api = HubApi()
  581. try:
  582. api.get_model_files(model.model_scope_model_id)
  583. except HTTPError as e:
  584. if e.response.status_code in [401, 403, 404]:
  585. raise Exception("Access to the model is restricted.")
  586. def get_model_scope_model_min_gguf_path(
  587. model_id: str,
  588. ) -> Optional[str]:
  589. api = HubApi()
  590. files = api.get_model_files(model_id, recursive=True)
  591. file_paths: List[str] = [file["Path"] for file in files]
  592. gguf_files = sorted([f for f in file_paths if f.endswith(".gguf")])
  593. if not gguf_files:
  594. return None
  595. for quantization in quantization_list:
  596. for gguf_file in gguf_files:
  597. if quantization in gguf_file.upper():
  598. return gguf_file
  599. return gguf_files[0]
  600. def has_diffusers_model_index(
  601. model: Model,
  602. token: Optional[str] = None,
  603. ) -> bool:
  604. """Check whether the model source contains a model_index.json with
  605. the key "_diffusers_version".
  606. This function only handles direct file access (Hub sources and local files).
  607. For LOCAL_PATH models that require worker queries, use
  608. check_diffusers_model_index_from_workers() in calculator.py instead.
  609. Supported sources:
  610. - Hugging Face: checks via HfFileSystem
  611. - ModelScope: downloads only model_index.json via snapshot_download and inspects
  612. - Local Path: reads model_index.json in the local directory only
  613. Args:
  614. model: Model to check
  615. token: Optional Hugging Face API token
  616. Returns:
  617. True if model_index.json contains _diffusers_version, False otherwise
  618. """
  619. try:
  620. data = read_repo_file_content(model, "model_index.json", token=token)
  621. if data is None:
  622. return False
  623. # The typical structure is a dict containing _diffusers_version
  624. if isinstance(data, dict) and "_diffusers_version" in data:
  625. return True
  626. # Some repos might have a list structure; check items for the key
  627. if isinstance(data, list):
  628. for item in data:
  629. if isinstance(item, dict) and "_diffusers_version" in item:
  630. return True
  631. return False
  632. except Exception as e:
  633. # Best-effort detection; do not raise on error
  634. try:
  635. source_key = (
  636. model.huggingface_repo_id
  637. or model.model_scope_model_id
  638. or model.local_path
  639. or "<unknown>"
  640. )
  641. logger.error(f"Failed to check model_index.json for {source_key}: {e}")
  642. except Exception:
  643. pass
  644. return False