llama3_70b_full_offload_split_2_4080.json 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. {
  2. "estimate": {
  3. "items": [
  4. {
  5. "offloadLayers": 81,
  6. "fullOffloaded": true,
  7. "ram": {
  8. "handleLayers": 0,
  9. "handleLastLayer": -1,
  10. "handleOutputLayer": false,
  11. "remote": false,
  12. "uma": 297878712,
  13. "nonuma": 455165112
  14. },
  15. "vrams": [
  16. {
  17. "handleLayers": 41,
  18. "handleLastLayer": 40,
  19. "handleOutputLayer": false,
  20. "remote": false,
  21. "uma": 1342723072,
  22. "nonuma": 22912443392
  23. },
  24. {
  25. "handleLayers": 39,
  26. "handleLastLayer": 79,
  27. "handleOutputLayer": true,
  28. "remote": false,
  29. "uma": 22649753600,
  30. "nonuma": 22911897600
  31. }
  32. ]
  33. }
  34. ],
  35. "type": "model",
  36. "architecture": "llama",
  37. "contextSize": 8192,
  38. "flashAttention": false,
  39. "noMMap": false,
  40. "embeddingOnly": false,
  41. "reranking": false,
  42. "distributable": true,
  43. "logicalBatchSize": 2048,
  44. "physicalBatchSize": 512
  45. }
  46. }