llama3_8b_disable_offload.json 842 B

1234567891011121314151617181920212223242526272829303132333435363738
  1. {
  2. "estimate": {
  3. "items": [
  4. {
  5. "offloadLayers": 0,
  6. "fullOffloaded": false,
  7. "ram": {
  8. "handleLayers": 32,
  9. "handleLastLayer": 31,
  10. "handleOutputLayer": true,
  11. "remote": false,
  12. "uma": 1245834200,
  13. "nonuma": 1403120600
  14. },
  15. "vrams": [
  16. {
  17. "handleLayers": 0,
  18. "handleLastLayer": -1,
  19. "handleOutputLayer": false,
  20. "remote": false,
  21. "uma": 0,
  22. "nonuma": 964157440
  23. }
  24. ]
  25. }
  26. ],
  27. "type": "model",
  28. "architecture": "llama",
  29. "contextSize": 8192,
  30. "flashAttention": false,
  31. "noMMap": false,
  32. "embeddingOnly": false,
  33. "reranking": false,
  34. "distributable": true,
  35. "logicalBatchSize": 2048,
  36. "physicalBatchSize": 512
  37. }
  38. }