llama3_8b_partial_offload.json 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. {
  2. "estimate": {
  3. "items": [
  4. {
  5. "offloadLayers": 0,
  6. "fullOffloaded": false,
  7. "ram": {
  8. "handleLayers": 32,
  9. "handleLastLayer": 31,
  10. "handleOutputLayer": true,
  11. "remote": false,
  12. "uma": 1245834200,
  13. "nonuma": 1403120600
  14. },
  15. "vrams": [
  16. {
  17. "handleLayers": 0,
  18. "handleLastLayer": -1,
  19. "handleOutputLayer": false,
  20. "remote": false,
  21. "uma": 0,
  22. "nonuma": 964157440
  23. }
  24. ]
  25. },
  26. {
  27. "offloadLayers": 1,
  28. "fullOffloaded": false,
  29. "ram": {
  30. "handleLayers": 31,
  31. "handleLastLayer": 30,
  32. "handleOutputLayer": true,
  33. "remote": false,
  34. "uma": 1212279768,
  35. "nonuma": 1369566168
  36. },
  37. "vrams": [
  38. {
  39. "handleLayers": 1,
  40. "handleLastLayer": 31,
  41. "handleOutputLayer": false,
  42. "remote": false,
  43. "uma": 33554432,
  44. "nonuma": 1145597952
  45. }
  46. ]
  47. },
  48. {
  49. "offloadLayers": 2,
  50. "fullOffloaded": false,
  51. "ram": {
  52. "handleLayers": 30,
  53. "handleLastLayer": 29,
  54. "handleOutputLayer": true,
  55. "remote": false,
  56. "uma": 1178725336,
  57. "nonuma": 1336011736
  58. },
  59. "vrams": [
  60. {
  61. "handleLayers": 2,
  62. "handleLastLayer": 31,
  63. "handleOutputLayer": false,
  64. "remote": false,
  65. "uma": 67108864,
  66. "nonuma": 1301868544
  67. }
  68. ]
  69. },
  70. {
  71. "offloadLayers": 3,
  72. "fullOffloaded": false,
  73. "ram": {
  74. "handleLayers": 29,
  75. "handleLastLayer": 28,
  76. "handleOutputLayer": true,
  77. "remote": false,
  78. "uma": 1145170904,
  79. "nonuma": 1302457304
  80. },
  81. "vrams": [
  82. {
  83. "handleLayers": 3,
  84. "handleLastLayer": 31,
  85. "handleOutputLayer": false,
  86. "remote": false,
  87. "uma": 100663296,
  88. "nonuma": 1458139136
  89. }
  90. ]
  91. },
  92. {
  93. "offloadLayers": 4,
  94. "fullOffloaded": false,
  95. "ram": {
  96. "handleLayers": 28,
  97. "handleLastLayer": 27,
  98. "handleOutputLayer": true,
  99. "remote": false,
  100. "uma": 1111616472,
  101. "nonuma": 1268902872
  102. },
  103. "vrams": [
  104. {
  105. "handleLayers": 4,
  106. "handleLastLayer": 31,
  107. "handleOutputLayer": false,
  108. "remote": false,
  109. "uma": 134217728,
  110. "nonuma": 1614409728
  111. }
  112. ]
  113. },
  114. {
  115. "offloadLayers": 5,
  116. "fullOffloaded": false,
  117. "ram": {
  118. "handleLayers": 27,
  119. "handleLastLayer": 26,
  120. "handleOutputLayer": true,
  121. "remote": false,
  122. "uma": 1078062040,
  123. "nonuma": 1235348440
  124. },
  125. "vrams": [
  126. {
  127. "handleLayers": 5,
  128. "handleLastLayer": 31,
  129. "handleOutputLayer": false,
  130. "remote": false,
  131. "uma": 167772160,
  132. "nonuma": 1770680320
  133. }
  134. ]
  135. },
  136. {
  137. "offloadLayers": 6,
  138. "fullOffloaded": false,
  139. "ram": {
  140. "handleLayers": 26,
  141. "handleLastLayer": 25,
  142. "handleOutputLayer": true,
  143. "remote": false,
  144. "uma": 1044507608,
  145. "nonuma": 1201794008
  146. },
  147. "vrams": [
  148. {
  149. "handleLayers": 6,
  150. "handleLastLayer": 31,
  151. "handleOutputLayer": false,
  152. "remote": false,
  153. "uma": 201326592,
  154. "nonuma": 1926950912
  155. }
  156. ]
  157. },
  158. {
  159. "offloadLayers": 7,
  160. "fullOffloaded": false,
  161. "ram": {
  162. "handleLayers": 25,
  163. "handleLastLayer": 24,
  164. "handleOutputLayer": true,
  165. "remote": false,
  166. "uma": 1010953176,
  167. "nonuma": 1168239576
  168. },
  169. "vrams": [
  170. {
  171. "handleLayers": 7,
  172. "handleLastLayer": 31,
  173. "handleOutputLayer": false,
  174. "remote": false,
  175. "uma": 234881024,
  176. "nonuma": 2083221504
  177. }
  178. ]
  179. },
  180. {
  181. "offloadLayers": 8,
  182. "fullOffloaded": false,
  183. "ram": {
  184. "handleLayers": 24,
  185. "handleLastLayer": 23,
  186. "handleOutputLayer": true,
  187. "remote": false,
  188. "uma": 977398744,
  189. "nonuma": 1134685144
  190. },
  191. "vrams": [
  192. {
  193. "handleLayers": 8,
  194. "handleLastLayer": 31,
  195. "handleOutputLayer": false,
  196. "remote": false,
  197. "uma": 268435456,
  198. "nonuma": 2239492096
  199. }
  200. ]
  201. },
  202. {
  203. "offloadLayers": 9,
  204. "fullOffloaded": false,
  205. "ram": {
  206. "handleLayers": 23,
  207. "handleLastLayer": 22,
  208. "handleOutputLayer": true,
  209. "remote": false,
  210. "uma": 943844312,
  211. "nonuma": 1101130712
  212. },
  213. "vrams": [
  214. {
  215. "handleLayers": 9,
  216. "handleLastLayer": 31,
  217. "handleOutputLayer": false,
  218. "remote": false,
  219. "uma": 301989888,
  220. "nonuma": 2395762688
  221. }
  222. ]
  223. },
  224. {
  225. "offloadLayers": 10,
  226. "fullOffloaded": false,
  227. "ram": {
  228. "handleLayers": 22,
  229. "handleLastLayer": 21,
  230. "handleOutputLayer": true,
  231. "remote": false,
  232. "uma": 910289880,
  233. "nonuma": 1067576280
  234. },
  235. "vrams": [
  236. {
  237. "handleLayers": 10,
  238. "handleLastLayer": 31,
  239. "handleOutputLayer": false,
  240. "remote": false,
  241. "uma": 335544320,
  242. "nonuma": 2552033280
  243. }
  244. ]
  245. },
  246. {
  247. "offloadLayers": 11,
  248. "fullOffloaded": false,
  249. "ram": {
  250. "handleLayers": 21,
  251. "handleLastLayer": 20,
  252. "handleOutputLayer": true,
  253. "remote": false,
  254. "uma": 876735448,
  255. "nonuma": 1034021848
  256. },
  257. "vrams": [
  258. {
  259. "handleLayers": 11,
  260. "handleLastLayer": 31,
  261. "handleOutputLayer": false,
  262. "remote": false,
  263. "uma": 369098752,
  264. "nonuma": 2708303872
  265. }
  266. ]
  267. },
  268. {
  269. "offloadLayers": 12,
  270. "fullOffloaded": false,
  271. "ram": {
  272. "handleLayers": 20,
  273. "handleLastLayer": 19,
  274. "handleOutputLayer": true,
  275. "remote": false,
  276. "uma": 843181016,
  277. "nonuma": 1000467416
  278. },
  279. "vrams": [
  280. {
  281. "handleLayers": 12,
  282. "handleLastLayer": 31,
  283. "handleOutputLayer": false,
  284. "remote": false,
  285. "uma": 402653184,
  286. "nonuma": 2864574464
  287. }
  288. ]
  289. },
  290. {
  291. "offloadLayers": 13,
  292. "fullOffloaded": false,
  293. "ram": {
  294. "handleLayers": 19,
  295. "handleLastLayer": 18,
  296. "handleOutputLayer": true,
  297. "remote": false,
  298. "uma": 809626584,
  299. "nonuma": 966912984
  300. },
  301. "vrams": [
  302. {
  303. "handleLayers": 13,
  304. "handleLastLayer": 31,
  305. "handleOutputLayer": false,
  306. "remote": false,
  307. "uma": 436207616,
  308. "nonuma": 3020845056
  309. }
  310. ]
  311. },
  312. {
  313. "offloadLayers": 14,
  314. "fullOffloaded": false,
  315. "ram": {
  316. "handleLayers": 18,
  317. "handleLastLayer": 17,
  318. "handleOutputLayer": true,
  319. "remote": false,
  320. "uma": 776072152,
  321. "nonuma": 933358552
  322. },
  323. "vrams": [
  324. {
  325. "handleLayers": 14,
  326. "handleLastLayer": 31,
  327. "handleOutputLayer": false,
  328. "remote": false,
  329. "uma": 469762048,
  330. "nonuma": 3177115648
  331. }
  332. ]
  333. },
  334. {
  335. "offloadLayers": 15,
  336. "fullOffloaded": false,
  337. "ram": {
  338. "handleLayers": 17,
  339. "handleLastLayer": 16,
  340. "handleOutputLayer": true,
  341. "remote": false,
  342. "uma": 742517720,
  343. "nonuma": 899804120
  344. },
  345. "vrams": [
  346. {
  347. "handleLayers": 15,
  348. "handleLastLayer": 31,
  349. "handleOutputLayer": false,
  350. "remote": false,
  351. "uma": 503316480,
  352. "nonuma": 3333386240
  353. }
  354. ]
  355. },
  356. {
  357. "offloadLayers": 16,
  358. "fullOffloaded": false,
  359. "ram": {
  360. "handleLayers": 16,
  361. "handleLastLayer": 15,
  362. "handleOutputLayer": true,
  363. "remote": false,
  364. "uma": 708963288,
  365. "nonuma": 866249688
  366. },
  367. "vrams": [
  368. {
  369. "handleLayers": 16,
  370. "handleLastLayer": 31,
  371. "handleOutputLayer": false,
  372. "remote": false,
  373. "uma": 536870912,
  374. "nonuma": 3489656832
  375. }
  376. ]
  377. },
  378. {
  379. "offloadLayers": 17,
  380. "fullOffloaded": false,
  381. "ram": {
  382. "handleLayers": 15,
  383. "handleLastLayer": 14,
  384. "handleOutputLayer": true,
  385. "remote": false,
  386. "uma": 675408856,
  387. "nonuma": 832695256
  388. },
  389. "vrams": [
  390. {
  391. "handleLayers": 17,
  392. "handleLastLayer": 31,
  393. "handleOutputLayer": false,
  394. "remote": false,
  395. "uma": 570425344,
  396. "nonuma": 3645927424
  397. }
  398. ]
  399. },
  400. {
  401. "offloadLayers": 18,
  402. "fullOffloaded": false,
  403. "ram": {
  404. "handleLayers": 14,
  405. "handleLastLayer": 13,
  406. "handleOutputLayer": true,
  407. "remote": false,
  408. "uma": 641854424,
  409. "nonuma": 799140824
  410. },
  411. "vrams": [
  412. {
  413. "handleLayers": 18,
  414. "handleLastLayer": 31,
  415. "handleOutputLayer": false,
  416. "remote": false,
  417. "uma": 603979776,
  418. "nonuma": 3802198016
  419. }
  420. ]
  421. },
  422. {
  423. "offloadLayers": 19,
  424. "fullOffloaded": false,
  425. "ram": {
  426. "handleLayers": 13,
  427. "handleLastLayer": 12,
  428. "handleOutputLayer": true,
  429. "remote": false,
  430. "uma": 608299992,
  431. "nonuma": 765586392
  432. },
  433. "vrams": [
  434. {
  435. "handleLayers": 19,
  436. "handleLastLayer": 31,
  437. "handleOutputLayer": false,
  438. "remote": false,
  439. "uma": 637534208,
  440. "nonuma": 3958468608
  441. }
  442. ]
  443. },
  444. {
  445. "offloadLayers": 20,
  446. "fullOffloaded": false,
  447. "ram": {
  448. "handleLayers": 12,
  449. "handleLastLayer": 11,
  450. "handleOutputLayer": true,
  451. "remote": false,
  452. "uma": 574745560,
  453. "nonuma": 732031960
  454. },
  455. "vrams": [
  456. {
  457. "handleLayers": 20,
  458. "handleLastLayer": 31,
  459. "handleOutputLayer": false,
  460. "remote": false,
  461. "uma": 671088640,
  462. "nonuma": 4114739200
  463. }
  464. ]
  465. },
  466. {
  467. "offloadLayers": 21,
  468. "fullOffloaded": false,
  469. "ram": {
  470. "handleLayers": 11,
  471. "handleLastLayer": 10,
  472. "handleOutputLayer": true,
  473. "remote": false,
  474. "uma": 541191128,
  475. "nonuma": 698477528
  476. },
  477. "vrams": [
  478. {
  479. "handleLayers": 21,
  480. "handleLastLayer": 31,
  481. "handleOutputLayer": false,
  482. "remote": false,
  483. "uma": 704643072,
  484. "nonuma": 4271009792
  485. }
  486. ]
  487. },
  488. {
  489. "offloadLayers": 22,
  490. "fullOffloaded": false,
  491. "ram": {
  492. "handleLayers": 10,
  493. "handleLastLayer": 9,
  494. "handleOutputLayer": true,
  495. "remote": false,
  496. "uma": 507636696,
  497. "nonuma": 664923096
  498. },
  499. "vrams": [
  500. {
  501. "handleLayers": 22,
  502. "handleLastLayer": 31,
  503. "handleOutputLayer": false,
  504. "remote": false,
  505. "uma": 738197504,
  506. "nonuma": 4427280384
  507. }
  508. ]
  509. },
  510. {
  511. "offloadLayers": 23,
  512. "fullOffloaded": false,
  513. "ram": {
  514. "handleLayers": 9,
  515. "handleLastLayer": 8,
  516. "handleOutputLayer": true,
  517. "remote": false,
  518. "uma": 474082264,
  519. "nonuma": 631368664
  520. },
  521. "vrams": [
  522. {
  523. "handleLayers": 23,
  524. "handleLastLayer": 31,
  525. "handleOutputLayer": false,
  526. "remote": false,
  527. "uma": 771751936,
  528. "nonuma": 4583550976
  529. }
  530. ]
  531. },
  532. {
  533. "offloadLayers": 24,
  534. "fullOffloaded": false,
  535. "ram": {
  536. "handleLayers": 8,
  537. "handleLastLayer": 7,
  538. "handleOutputLayer": true,
  539. "remote": false,
  540. "uma": 440527832,
  541. "nonuma": 597814232
  542. },
  543. "vrams": [
  544. {
  545. "handleLayers": 24,
  546. "handleLastLayer": 31,
  547. "handleOutputLayer": false,
  548. "remote": false,
  549. "uma": 805306368,
  550. "nonuma": 4739821568
  551. }
  552. ]
  553. },
  554. {
  555. "offloadLayers": 25,
  556. "fullOffloaded": false,
  557. "ram": {
  558. "handleLayers": 7,
  559. "handleLastLayer": 6,
  560. "handleOutputLayer": true,
  561. "remote": false,
  562. "uma": 406973400,
  563. "nonuma": 564259800
  564. },
  565. "vrams": [
  566. {
  567. "handleLayers": 25,
  568. "handleLastLayer": 31,
  569. "handleOutputLayer": false,
  570. "remote": false,
  571. "uma": 838860800,
  572. "nonuma": 4896092160
  573. }
  574. ]
  575. },
  576. {
  577. "offloadLayers": 26,
  578. "fullOffloaded": false,
  579. "ram": {
  580. "handleLayers": 6,
  581. "handleLastLayer": 5,
  582. "handleOutputLayer": true,
  583. "remote": false,
  584. "uma": 373418968,
  585. "nonuma": 530705368
  586. },
  587. "vrams": [
  588. {
  589. "handleLayers": 26,
  590. "handleLastLayer": 31,
  591. "handleOutputLayer": false,
  592. "remote": false,
  593. "uma": 872415232,
  594. "nonuma": 5052362752
  595. }
  596. ]
  597. },
  598. {
  599. "offloadLayers": 27,
  600. "fullOffloaded": false,
  601. "ram": {
  602. "handleLayers": 5,
  603. "handleLastLayer": 4,
  604. "handleOutputLayer": true,
  605. "remote": false,
  606. "uma": 339864536,
  607. "nonuma": 497150936
  608. },
  609. "vrams": [
  610. {
  611. "handleLayers": 27,
  612. "handleLastLayer": 31,
  613. "handleOutputLayer": false,
  614. "remote": false,
  615. "uma": 905969664,
  616. "nonuma": 5208633344
  617. }
  618. ]
  619. },
  620. {
  621. "offloadLayers": 28,
  622. "fullOffloaded": false,
  623. "ram": {
  624. "handleLayers": 4,
  625. "handleLastLayer": 3,
  626. "handleOutputLayer": true,
  627. "remote": false,
  628. "uma": 306310104,
  629. "nonuma": 463596504
  630. },
  631. "vrams": [
  632. {
  633. "handleLayers": 28,
  634. "handleLastLayer": 31,
  635. "handleOutputLayer": false,
  636. "remote": false,
  637. "uma": 939524096,
  638. "nonuma": 5364903936
  639. }
  640. ]
  641. },
  642. {
  643. "offloadLayers": 29,
  644. "fullOffloaded": false,
  645. "ram": {
  646. "handleLayers": 3,
  647. "handleLastLayer": 2,
  648. "handleOutputLayer": true,
  649. "remote": false,
  650. "uma": 272755672,
  651. "nonuma": 430042072
  652. },
  653. "vrams": [
  654. {
  655. "handleLayers": 29,
  656. "handleLastLayer": 31,
  657. "handleOutputLayer": false,
  658. "remote": false,
  659. "uma": 973078528,
  660. "nonuma": 5521174528
  661. }
  662. ]
  663. },
  664. {
  665. "offloadLayers": 30,
  666. "fullOffloaded": false,
  667. "ram": {
  668. "handleLayers": 2,
  669. "handleLastLayer": 1,
  670. "handleOutputLayer": true,
  671. "remote": false,
  672. "uma": 239201240,
  673. "nonuma": 396487640
  674. },
  675. "vrams": [
  676. {
  677. "handleLayers": 30,
  678. "handleLastLayer": 31,
  679. "handleOutputLayer": false,
  680. "remote": false,
  681. "uma": 1006632960,
  682. "nonuma": 5677445120
  683. }
  684. ]
  685. },
  686. {
  687. "offloadLayers": 31,
  688. "fullOffloaded": false,
  689. "ram": {
  690. "handleLayers": 1,
  691. "handleLastLayer": 0,
  692. "handleOutputLayer": true,
  693. "remote": false,
  694. "uma": 205646808,
  695. "nonuma": 362933208
  696. },
  697. "vrams": [
  698. {
  699. "handleLayers": 31,
  700. "handleLastLayer": 31,
  701. "handleOutputLayer": false,
  702. "remote": false,
  703. "uma": 1040187392,
  704. "nonuma": 5833715712
  705. }
  706. ]
  707. },
  708. {
  709. "offloadLayers": 32,
  710. "fullOffloaded": false,
  711. "ram": {
  712. "handleLayers": 0,
  713. "handleLastLayer": -1,
  714. "handleOutputLayer": true,
  715. "remote": false,
  716. "uma": 172092376,
  717. "nonuma": 329378776
  718. },
  719. "vrams": [
  720. {
  721. "handleLayers": 32,
  722. "handleLastLayer": 31,
  723. "handleOutputLayer": false,
  724. "remote": false,
  725. "uma": 1073741824,
  726. "nonuma": 5989986304
  727. }
  728. ]
  729. },
  730. {
  731. "offloadLayers": 33,
  732. "fullOffloaded": true,
  733. "ram": {
  734. "handleLayers": 0,
  735. "handleLastLayer": -1,
  736. "handleOutputLayer": false,
  737. "remote": false,
  738. "uma": 179951576,
  739. "nonuma": 337237976
  740. },
  741. "vrams": [
  742. {
  743. "handleLayers": 32,
  744. "handleLastLayer": 31,
  745. "handleOutputLayer": true,
  746. "remote": false,
  747. "uma": 1074271232,
  748. "nonuma": 6315049984
  749. }
  750. ]
  751. }
  752. ],
  753. "type": "model",
  754. "architecture": "llama",
  755. "contextSize": 8192,
  756. "flashAttention": false,
  757. "noMMap": false,
  758. "embeddingOnly": false,
  759. "reranking": false,
  760. "distributable": true,
  761. "logicalBatchSize": 2048,
  762. "physicalBatchSize": 512
  763. }
  764. }