gpustack-model.json 105 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752
  1. {
  2. "annotations": {
  3. "list": [
  4. {
  5. "builtIn": 1,
  6. "datasource": {
  7. "type": "grafana",
  8. "uid": "-- Grafana --"
  9. },
  10. "enable": true,
  11. "hide": true,
  12. "iconColor": "rgba(0, 211, 255, 1)",
  13. "name": "Annotations & Alerts",
  14. "target": {
  15. "limit": 100,
  16. "matchAny": false,
  17. "tags": [],
  18. "type": "dashboard"
  19. },
  20. "type": "dashboard"
  21. }
  22. ]
  23. },
  24. "description": "Monitoring Inference Server",
  25. "editable": true,
  26. "fiscalYearStartMonth": 0,
  27. "graphTooltip": 0,
  28. "id": 0,
  29. "links": [],
  30. "panels": [
  31. {
  32. "collapsed": false,
  33. "gridPos": {
  34. "h": 1,
  35. "w": 24,
  36. "x": 0,
  37. "y": 0
  38. },
  39. "id": 24,
  40. "panels": [],
  41. "title": "Summary",
  42. "type": "row"
  43. },
  44. {
  45. "datasource": {
  46. "type": "prometheus",
  47. "uid": "prometheus"
  48. },
  49. "fieldConfig": {
  50. "defaults": {
  51. "color": {
  52. "mode": "thresholds"
  53. },
  54. "mappings": [],
  55. "max": 90,
  56. "min": 0,
  57. "thresholds": {
  58. "mode": "absolute",
  59. "steps": [
  60. {
  61. "color": "green",
  62. "value": 0
  63. },
  64. {
  65. "color": "#EAB839",
  66. "value": 60
  67. },
  68. {
  69. "color": "red",
  70. "value": 80
  71. }
  72. ]
  73. },
  74. "unit": "none"
  75. },
  76. "overrides": []
  77. },
  78. "gridPos": {
  79. "h": 3,
  80. "w": 4,
  81. "x": 0,
  82. "y": 1
  83. },
  84. "id": 32,
  85. "options": {
  86. "colorMode": "none",
  87. "graphMode": "area",
  88. "justifyMode": "auto",
  89. "orientation": "horizontal",
  90. "percentChangeColorMode": "standard",
  91. "reduceOptions": {
  92. "calcs": [
  93. "lastNotNull"
  94. ],
  95. "fields": "",
  96. "values": false
  97. },
  98. "showPercentChange": false,
  99. "textMode": "auto",
  100. "wideLayout": true
  101. },
  102. "pluginVersion": "12.2.0",
  103. "targets": [
  104. {
  105. "datasource": {
  106. "type": "prometheus",
  107. "uid": "P1809F7CD0C75ACF3"
  108. },
  109. "editorMode": "code",
  110. "expr": "sum(gpustack:model_desired_instances{cluster_name=\"$cluster_name\", model_name=\"$model_name\"})",
  111. "instant": false,
  112. "legendFormat": "__auto",
  113. "range": true,
  114. "refId": "A"
  115. }
  116. ],
  117. "title": "Desired Instances",
  118. "type": "stat"
  119. },
  120. {
  121. "datasource": {
  122. "type": "prometheus",
  123. "uid": "prometheus"
  124. },
  125. "fieldConfig": {
  126. "defaults": {
  127. "color": {
  128. "mode": "thresholds"
  129. },
  130. "mappings": [],
  131. "max": 90,
  132. "min": 0,
  133. "thresholds": {
  134. "mode": "absolute",
  135. "steps": [
  136. {
  137. "color": "green",
  138. "value": 0
  139. },
  140. {
  141. "color": "#EAB839",
  142. "value": 60
  143. },
  144. {
  145. "color": "red",
  146. "value": 80
  147. }
  148. ]
  149. },
  150. "unit": "none"
  151. },
  152. "overrides": []
  153. },
  154. "gridPos": {
  155. "h": 3,
  156. "w": 4,
  157. "x": 4,
  158. "y": 1
  159. },
  160. "id": 33,
  161. "options": {
  162. "colorMode": "value",
  163. "graphMode": "area",
  164. "justifyMode": "auto",
  165. "orientation": "horizontal",
  166. "percentChangeColorMode": "standard",
  167. "reduceOptions": {
  168. "calcs": [
  169. "lastNotNull"
  170. ],
  171. "fields": "",
  172. "values": false
  173. },
  174. "showPercentChange": false,
  175. "textMode": "auto",
  176. "wideLayout": true
  177. },
  178. "pluginVersion": "12.2.0",
  179. "targets": [
  180. {
  181. "datasource": {
  182. "type": "prometheus",
  183. "uid": "P1809F7CD0C75ACF3"
  184. },
  185. "editorMode": "code",
  186. "expr": "sum(gpustack:model_running_instances{cluster_name=\"$cluster_name\",model_name=\"$model_name\"})",
  187. "instant": false,
  188. "legendFormat": "__auto",
  189. "range": true,
  190. "refId": "A"
  191. }
  192. ],
  193. "title": "Running Instances",
  194. "type": "stat"
  195. },
  196. {
  197. "datasource": {
  198. "type": "prometheus",
  199. "uid": "prometheus"
  200. },
  201. "fieldConfig": {
  202. "defaults": {
  203. "color": {
  204. "mode": "thresholds"
  205. },
  206. "mappings": [],
  207. "max": 90,
  208. "min": 0,
  209. "thresholds": {
  210. "mode": "absolute",
  211. "steps": [
  212. {
  213. "color": "green",
  214. "value": 0
  215. },
  216. {
  217. "color": "#EAB839",
  218. "value": 60
  219. },
  220. {
  221. "color": "red",
  222. "value": 80
  223. }
  224. ]
  225. },
  226. "unit": "none"
  227. },
  228. "overrides": [
  229. {
  230. "matcher": {
  231. "id": "byName",
  232. "options": "Value"
  233. },
  234. "properties": [
  235. {
  236. "id": "color",
  237. "value": {
  238. "mode": "continuous-RdYlGr"
  239. }
  240. }
  241. ]
  242. }
  243. ]
  244. },
  245. "gridPos": {
  246. "h": 3,
  247. "w": 4,
  248. "x": 8,
  249. "y": 1
  250. },
  251. "id": 35,
  252. "options": {
  253. "colorMode": "value",
  254. "graphMode": "area",
  255. "justifyMode": "auto",
  256. "orientation": "horizontal",
  257. "percentChangeColorMode": "standard",
  258. "reduceOptions": {
  259. "calcs": [
  260. "lastNotNull"
  261. ],
  262. "fields": "",
  263. "values": false
  264. },
  265. "showPercentChange": false,
  266. "textMode": "auto",
  267. "wideLayout": true
  268. },
  269. "pluginVersion": "12.2.0",
  270. "targets": [
  271. {
  272. "datasource": {
  273. "type": "prometheus",
  274. "uid": "P1809F7CD0C75ACF3"
  275. },
  276. "editorMode": "code",
  277. "expr": "sum(gpustack:model_desired_instances{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}) - sum(gpustack:model_running_instances{cluster_name=\"$cluster_name\",model_name=\"$model_name\"})",
  278. "instant": false,
  279. "legendFormat": "__auto",
  280. "range": true,
  281. "refId": "A"
  282. }
  283. ],
  284. "title": "Error Instances",
  285. "type": "stat"
  286. },
  287. {
  288. "datasource": {
  289. "type": "prometheus",
  290. "uid": "prometheus"
  291. },
  292. "fieldConfig": {
  293. "defaults": {
  294. "color": {
  295. "mode": "thresholds"
  296. },
  297. "mappings": [],
  298. "max": 90,
  299. "min": 0,
  300. "thresholds": {
  301. "mode": "absolute",
  302. "steps": [
  303. {
  304. "color": "green",
  305. "value": 0
  306. },
  307. {
  308. "color": "#EAB839",
  309. "value": 60
  310. },
  311. {
  312. "color": "red",
  313. "value": 80
  314. }
  315. ]
  316. },
  317. "unit": "none"
  318. },
  319. "overrides": []
  320. },
  321. "gridPos": {
  322. "h": 3,
  323. "w": 4,
  324. "x": 12,
  325. "y": 1
  326. },
  327. "id": 37,
  328. "options": {
  329. "colorMode": "value",
  330. "graphMode": "area",
  331. "justifyMode": "auto",
  332. "orientation": "horizontal",
  333. "percentChangeColorMode": "standard",
  334. "reduceOptions": {
  335. "calcs": [
  336. "lastNotNull"
  337. ],
  338. "fields": "",
  339. "values": false
  340. },
  341. "showPercentChange": false,
  342. "textMode": "auto",
  343. "wideLayout": true
  344. },
  345. "pluginVersion": "12.2.0",
  346. "targets": [
  347. {
  348. "datasource": {
  349. "type": "prometheus",
  350. "uid": "P1809F7CD0C75ACF3"
  351. },
  352. "editorMode": "code",
  353. "expr": "sum(gpustack:num_requests_running{cluster_name=\"$cluster_name\",model_name=\"$model_name\"})",
  354. "instant": false,
  355. "legendFormat": "__auto",
  356. "range": true,
  357. "refId": "A"
  358. }
  359. ],
  360. "title": "Running Requests",
  361. "type": "stat"
  362. },
  363. {
  364. "datasource": {
  365. "type": "prometheus",
  366. "uid": "prometheus"
  367. },
  368. "fieldConfig": {
  369. "defaults": {
  370. "color": {
  371. "mode": "thresholds"
  372. },
  373. "mappings": [],
  374. "max": 90,
  375. "min": 0,
  376. "thresholds": {
  377. "mode": "absolute",
  378. "steps": [
  379. {
  380. "color": "green",
  381. "value": 0
  382. },
  383. {
  384. "color": "#EAB839",
  385. "value": 60
  386. },
  387. {
  388. "color": "red",
  389. "value": 80
  390. }
  391. ]
  392. },
  393. "unit": "none"
  394. },
  395. "overrides": [
  396. {
  397. "matcher": {
  398. "id": "byName",
  399. "options": "Value"
  400. },
  401. "properties": [
  402. {
  403. "id": "color",
  404. "value": {
  405. "mode": "continuous-RdYlGr"
  406. }
  407. }
  408. ]
  409. }
  410. ]
  411. },
  412. "gridPos": {
  413. "h": 3,
  414. "w": 4,
  415. "x": 16,
  416. "y": 1
  417. },
  418. "id": 36,
  419. "options": {
  420. "colorMode": "value",
  421. "graphMode": "area",
  422. "justifyMode": "auto",
  423. "orientation": "horizontal",
  424. "percentChangeColorMode": "standard",
  425. "reduceOptions": {
  426. "calcs": [
  427. "lastNotNull"
  428. ],
  429. "fields": "",
  430. "values": false
  431. },
  432. "showPercentChange": false,
  433. "textMode": "auto",
  434. "wideLayout": true
  435. },
  436. "pluginVersion": "12.2.0",
  437. "targets": [
  438. {
  439. "datasource": {
  440. "type": "prometheus",
  441. "uid": "P1809F7CD0C75ACF3"
  442. },
  443. "editorMode": "code",
  444. "expr": "sum(gpustack:num_requests_waiting{cluster_name=\"$cluster_name\",model_name=\"$model_name\"})",
  445. "instant": false,
  446. "legendFormat": "__auto",
  447. "range": true,
  448. "refId": "A"
  449. }
  450. ],
  451. "title": "Waiting Requests",
  452. "type": "stat"
  453. },
  454. {
  455. "datasource": {
  456. "type": "prometheus",
  457. "uid": "prometheus"
  458. },
  459. "fieldConfig": {
  460. "defaults": {
  461. "color": {
  462. "mode": "palette-classic-by-name"
  463. },
  464. "mappings": [],
  465. "thresholds": {
  466. "mode": "absolute",
  467. "steps": [
  468. {
  469. "color": "green",
  470. "value": 0
  471. },
  472. {
  473. "color": "red",
  474. "value": 80
  475. }
  476. ]
  477. }
  478. },
  479. "overrides": []
  480. },
  481. "gridPos": {
  482. "h": 3,
  483. "w": 4,
  484. "x": 20,
  485. "y": 1
  486. },
  487. "id": 28,
  488. "options": {
  489. "colorMode": "value",
  490. "graphMode": "area",
  491. "justifyMode": "center",
  492. "orientation": "auto",
  493. "percentChangeColorMode": "standard",
  494. "reduceOptions": {
  495. "calcs": [
  496. "lastNotNull"
  497. ],
  498. "fields": "/^runtime$/",
  499. "values": false
  500. },
  501. "showPercentChange": false,
  502. "text": {},
  503. "textMode": "value",
  504. "wideLayout": true
  505. },
  506. "pluginVersion": "12.2.0",
  507. "targets": [
  508. {
  509. "editorMode": "code",
  510. "exemplar": false,
  511. "expr": "gpustack:model_info{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}",
  512. "format": "table",
  513. "legendFormat": "__auto",
  514. "range": true,
  515. "refId": "A"
  516. }
  517. ],
  518. "title": "Runtime",
  519. "type": "stat"
  520. },
  521. {
  522. "datasource": {
  523. "type": "prometheus",
  524. "uid": "prometheus"
  525. },
  526. "fieldConfig": {
  527. "defaults": {
  528. "color": {
  529. "mode": "thresholds"
  530. },
  531. "mappings": [],
  532. "thresholds": {
  533. "mode": "absolute",
  534. "steps": [
  535. {
  536. "color": "green",
  537. "value": 0
  538. },
  539. {
  540. "color": "red",
  541. "value": 80
  542. }
  543. ]
  544. }
  545. },
  546. "overrides": []
  547. },
  548. "gridPos": {
  549. "h": 4,
  550. "w": 12,
  551. "x": 0,
  552. "y": 4
  553. },
  554. "id": 34,
  555. "options": {
  556. "colorMode": "background",
  557. "graphMode": "none",
  558. "justifyMode": "auto",
  559. "orientation": "auto",
  560. "percentChangeColorMode": "standard",
  561. "reduceOptions": {
  562. "calcs": [
  563. "lastNotNull"
  564. ],
  565. "fields": "/^model_name$/",
  566. "values": false
  567. },
  568. "showPercentChange": false,
  569. "text": {
  570. "valueSize": 55
  571. },
  572. "textMode": "auto",
  573. "wideLayout": true
  574. },
  575. "pluginVersion": "12.2.0",
  576. "targets": [
  577. {
  578. "editorMode": "code",
  579. "exemplar": false,
  580. "expr": "gpustack:model_info{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}",
  581. "format": "table",
  582. "legendFormat": "__auto",
  583. "range": true,
  584. "refId": "A"
  585. }
  586. ],
  587. "title": "Model",
  588. "type": "stat"
  589. },
  590. {
  591. "datasource": {
  592. "type": "prometheus",
  593. "uid": "prometheus"
  594. },
  595. "fieldConfig": {
  596. "defaults": {
  597. "color": {
  598. "mode": "thresholds"
  599. },
  600. "mappings": [],
  601. "thresholds": {
  602. "mode": "absolute",
  603. "steps": [
  604. {
  605. "color": "green",
  606. "value": 0
  607. },
  608. {
  609. "color": "red",
  610. "value": 80
  611. }
  612. ]
  613. }
  614. },
  615. "overrides": [
  616. {
  617. "matcher": {
  618. "id": "byName",
  619. "options": "source_key"
  620. },
  621. "properties": [
  622. {
  623. "id": "color",
  624. "value": {
  625. "mode": "palette-classic-by-name"
  626. }
  627. }
  628. ]
  629. }
  630. ]
  631. },
  632. "gridPos": {
  633. "h": 4,
  634. "w": 8,
  635. "x": 12,
  636. "y": 4
  637. },
  638. "id": 30,
  639. "options": {
  640. "colorMode": "background_solid",
  641. "graphMode": "area",
  642. "justifyMode": "auto",
  643. "orientation": "auto",
  644. "percentChangeColorMode": "standard",
  645. "reduceOptions": {
  646. "calcs": [
  647. "lastNotNull"
  648. ],
  649. "fields": "/^source_key$/",
  650. "values": false
  651. },
  652. "showPercentChange": false,
  653. "text": {
  654. "valueSize": 30
  655. },
  656. "textMode": "auto",
  657. "wideLayout": true
  658. },
  659. "pluginVersion": "12.2.0",
  660. "targets": [
  661. {
  662. "editorMode": "code",
  663. "exemplar": false,
  664. "expr": "gpustack:model_info{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}",
  665. "format": "table",
  666. "legendFormat": "__auto",
  667. "range": true,
  668. "refId": "A"
  669. }
  670. ],
  671. "title": "Model Source Name",
  672. "type": "stat"
  673. },
  674. {
  675. "datasource": {
  676. "type": "prometheus",
  677. "uid": "prometheus"
  678. },
  679. "fieldConfig": {
  680. "defaults": {
  681. "color": {
  682. "mode": "thresholds"
  683. },
  684. "mappings": [
  685. {
  686. "options": {
  687. "huggingface": {
  688. "index": 0,
  689. "text": "Hugging Face"
  690. },
  691. "local_path": {
  692. "index": 2,
  693. "text": "Local Path"
  694. },
  695. "model_scope": {
  696. "index": 1,
  697. "text": "ModelScope"
  698. }
  699. },
  700. "type": "value"
  701. }
  702. ],
  703. "thresholds": {
  704. "mode": "absolute",
  705. "steps": [
  706. {
  707. "color": "green",
  708. "value": 0
  709. },
  710. {
  711. "color": "red",
  712. "value": 80
  713. }
  714. ]
  715. }
  716. },
  717. "overrides": [
  718. {
  719. "matcher": {
  720. "id": "byName",
  721. "options": "source"
  722. },
  723. "properties": [
  724. {
  725. "id": "color",
  726. "value": {
  727. "mode": "palette-classic-by-name"
  728. }
  729. }
  730. ]
  731. }
  732. ]
  733. },
  734. "gridPos": {
  735. "h": 4,
  736. "w": 4,
  737. "x": 20,
  738. "y": 4
  739. },
  740. "id": 29,
  741. "options": {
  742. "colorMode": "background",
  743. "graphMode": "area",
  744. "justifyMode": "auto",
  745. "orientation": "auto",
  746. "percentChangeColorMode": "standard",
  747. "reduceOptions": {
  748. "calcs": [
  749. "lastNotNull"
  750. ],
  751. "fields": "/^source$/",
  752. "values": false
  753. },
  754. "showPercentChange": false,
  755. "text": {
  756. "valueSize": 30
  757. },
  758. "textMode": "auto",
  759. "wideLayout": true
  760. },
  761. "pluginVersion": "12.2.0",
  762. "targets": [
  763. {
  764. "editorMode": "code",
  765. "exemplar": false,
  766. "expr": "gpustack:model_info{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}",
  767. "format": "table",
  768. "legendFormat": "__auto",
  769. "range": true,
  770. "refId": "A"
  771. }
  772. ],
  773. "title": "Model Source",
  774. "type": "stat"
  775. },
  776. {
  777. "collapsed": false,
  778. "gridPos": {
  779. "h": 1,
  780. "w": 24,
  781. "x": 0,
  782. "y": 8
  783. },
  784. "id": 23,
  785. "panels": [],
  786. "title": "Service API",
  787. "type": "row"
  788. },
  789. {
  790. "datasource": {
  791. "type": "prometheus",
  792. "uid": "prometheus"
  793. },
  794. "description": "Total request per minute.",
  795. "fieldConfig": {
  796. "defaults": {
  797. "color": {
  798. "mode": "palette-classic"
  799. },
  800. "custom": {
  801. "axisBorderShow": false,
  802. "axisCenteredZero": false,
  803. "axisColorMode": "text",
  804. "axisLabel": "",
  805. "axisPlacement": "auto",
  806. "barAlignment": 0,
  807. "barWidthFactor": 0.6,
  808. "drawStyle": "line",
  809. "fillOpacity": 0,
  810. "gradientMode": "none",
  811. "hideFrom": {
  812. "legend": false,
  813. "tooltip": false,
  814. "viz": false
  815. },
  816. "insertNulls": false,
  817. "lineInterpolation": "linear",
  818. "lineWidth": 1,
  819. "pointSize": 5,
  820. "scaleDistribution": {
  821. "type": "linear"
  822. },
  823. "showPoints": "never",
  824. "showValues": false,
  825. "spanNulls": false,
  826. "stacking": {
  827. "group": "A",
  828. "mode": "none"
  829. },
  830. "thresholdsStyle": {
  831. "mode": "off"
  832. }
  833. },
  834. "mappings": [],
  835. "thresholds": {
  836. "mode": "absolute",
  837. "steps": [
  838. {
  839. "color": "green",
  840. "value": 0
  841. },
  842. {
  843. "color": "red",
  844. "value": 80
  845. }
  846. ]
  847. },
  848. "unit": "req/min"
  849. },
  850. "overrides": []
  851. },
  852. "gridPos": {
  853. "h": 8,
  854. "w": 12,
  855. "x": 0,
  856. "y": 9
  857. },
  858. "id": 22,
  859. "options": {
  860. "legend": {
  861. "calcs": [
  862. "last",
  863. "max"
  864. ],
  865. "displayMode": "table",
  866. "placement": "bottom",
  867. "showLegend": true
  868. },
  869. "tooltip": {
  870. "hideZeros": false,
  871. "mode": "single",
  872. "sort": "none"
  873. }
  874. },
  875. "pluginVersion": "12.2.0",
  876. "targets": [
  877. {
  878. "editorMode": "code",
  879. "expr": "increase(gpustack:e2e_request_latency_seconds_count{cluster_name=\"$cluster_name\",model_name=~\"$model_name\", model_instance_name=~\"$model_instance_name\"}[1m])",
  880. "legendFormat": "Requests/min({{model_instance_name}})",
  881. "range": true,
  882. "refId": "A"
  883. }
  884. ],
  885. "title": "Total Requests per Minute",
  886. "type": "timeseries"
  887. },
  888. {
  889. "datasource": {
  890. "type": "prometheus",
  891. "uid": "prometheus"
  892. },
  893. "description": "Number of tokens generation per second for current model",
  894. "fieldConfig": {
  895. "defaults": {
  896. "color": {
  897. "mode": "palette-classic"
  898. },
  899. "custom": {
  900. "axisBorderShow": false,
  901. "axisCenteredZero": false,
  902. "axisColorMode": "text",
  903. "axisLabel": "",
  904. "axisPlacement": "auto",
  905. "barAlignment": 0,
  906. "barWidthFactor": 0.6,
  907. "drawStyle": "line",
  908. "fillOpacity": 0,
  909. "gradientMode": "none",
  910. "hideFrom": {
  911. "legend": false,
  912. "tooltip": false,
  913. "viz": false
  914. },
  915. "insertNulls": false,
  916. "lineInterpolation": "linear",
  917. "lineWidth": 1,
  918. "pointSize": 5,
  919. "scaleDistribution": {
  920. "type": "linear"
  921. },
  922. "showPoints": "never",
  923. "showValues": false,
  924. "spanNulls": false,
  925. "stacking": {
  926. "group": "A",
  927. "mode": "none"
  928. },
  929. "thresholdsStyle": {
  930. "mode": "off"
  931. }
  932. },
  933. "mappings": [],
  934. "thresholds": {
  935. "mode": "absolute",
  936. "steps": [
  937. {
  938. "color": "green",
  939. "value": 0
  940. },
  941. {
  942. "color": "red",
  943. "value": 80
  944. }
  945. ]
  946. }
  947. },
  948. "overrides": []
  949. },
  950. "gridPos": {
  951. "h": 8,
  952. "w": 12,
  953. "x": 12,
  954. "y": 9
  955. },
  956. "id": 41,
  957. "options": {
  958. "legend": {
  959. "calcs": [
  960. "last",
  961. "max"
  962. ],
  963. "displayMode": "table",
  964. "placement": "bottom",
  965. "showLegend": true
  966. },
  967. "tooltip": {
  968. "hideZeros": false,
  969. "mode": "single",
  970. "sort": "none"
  971. }
  972. },
  973. "pluginVersion": "12.2.0",
  974. "targets": [
  975. {
  976. "datasource": {
  977. "type": "prometheus",
  978. "uid": "Prometheus"
  979. },
  980. "disableTextWrap": false,
  981. "editorMode": "builder",
  982. "expr": "sum(rate(gpustack:generation_tokens_total{cluster_name=\"$cluster_name\",model_name=\"$model_name\"}[$__rate_interval]))",
  983. "fullMetaSearch": false,
  984. "hide": false,
  985. "includeNullMetadata": false,
  986. "instant": false,
  987. "legendFormat": "Generation Tokens/Sec (Total) ",
  988. "range": true,
  989. "refId": "D",
  990. "useBackend": false
  991. }
  992. ],
  993. "title": "Model Token Generation Throughput",
  994. "type": "timeseries"
  995. },
  996. {
  997. "datasource": {
  998. "type": "prometheus",
  999. "uid": "prometheus"
  1000. },
  1001. "description": "End to end request latency measured in seconds.",
  1002. "fieldConfig": {
  1003. "defaults": {
  1004. "color": {
  1005. "mode": "palette-classic"
  1006. },
  1007. "custom": {
  1008. "axisBorderShow": false,
  1009. "axisCenteredZero": false,
  1010. "axisColorMode": "text",
  1011. "axisLabel": "",
  1012. "axisPlacement": "auto",
  1013. "barAlignment": 0,
  1014. "barWidthFactor": 0.6,
  1015. "drawStyle": "line",
  1016. "fillOpacity": 0,
  1017. "gradientMode": "none",
  1018. "hideFrom": {
  1019. "legend": false,
  1020. "tooltip": false,
  1021. "viz": false
  1022. },
  1023. "insertNulls": false,
  1024. "lineInterpolation": "linear",
  1025. "lineWidth": 1,
  1026. "pointSize": 5,
  1027. "scaleDistribution": {
  1028. "type": "linear"
  1029. },
  1030. "showPoints": "never",
  1031. "showValues": false,
  1032. "spanNulls": false,
  1033. "stacking": {
  1034. "group": "A",
  1035. "mode": "none"
  1036. },
  1037. "thresholdsStyle": {
  1038. "mode": "off"
  1039. }
  1040. },
  1041. "mappings": [],
  1042. "thresholds": {
  1043. "mode": "absolute",
  1044. "steps": [
  1045. {
  1046. "color": "green",
  1047. "value": 0
  1048. },
  1049. {
  1050. "color": "red",
  1051. "value": 80
  1052. }
  1053. ]
  1054. },
  1055. "unit": "s"
  1056. },
  1057. "overrides": []
  1058. },
  1059. "gridPos": {
  1060. "h": 8,
  1061. "w": 12,
  1062. "x": 0,
  1063. "y": 17
  1064. },
  1065. "id": 9,
  1066. "options": {
  1067. "legend": {
  1068. "calcs": [
  1069. "last",
  1070. "max"
  1071. ],
  1072. "displayMode": "table",
  1073. "placement": "bottom",
  1074. "showLegend": true
  1075. },
  1076. "tooltip": {
  1077. "hideZeros": false,
  1078. "mode": "single",
  1079. "sort": "none"
  1080. }
  1081. },
  1082. "pluginVersion": "12.2.0",
  1083. "targets": [
  1084. {
  1085. "datasource": {
  1086. "type": "prometheus",
  1087. "uid": "Prometheus"
  1088. },
  1089. "disableTextWrap": false,
  1090. "editorMode": "builder",
  1091. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:e2e_request_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1092. "fullMetaSearch": false,
  1093. "includeNullMetadata": false,
  1094. "instant": false,
  1095. "legendFormat": "P99({{model_instance_name}})",
  1096. "range": true,
  1097. "refId": "A",
  1098. "useBackend": false
  1099. },
  1100. {
  1101. "datasource": {
  1102. "type": "prometheus",
  1103. "uid": "Prometheus"
  1104. },
  1105. "disableTextWrap": false,
  1106. "editorMode": "builder",
  1107. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:e2e_request_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1108. "fullMetaSearch": false,
  1109. "hide": false,
  1110. "includeNullMetadata": false,
  1111. "instant": false,
  1112. "legendFormat": "P95({{model_instance_name}})",
  1113. "range": true,
  1114. "refId": "B",
  1115. "useBackend": false
  1116. },
  1117. {
  1118. "datasource": {
  1119. "type": "prometheus",
  1120. "uid": "Prometheus"
  1121. },
  1122. "disableTextWrap": false,
  1123. "editorMode": "builder",
  1124. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:e2e_request_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1125. "fullMetaSearch": false,
  1126. "hide": false,
  1127. "includeNullMetadata": false,
  1128. "instant": false,
  1129. "legendFormat": "P90({{model_instance_name}})",
  1130. "range": true,
  1131. "refId": "C",
  1132. "useBackend": false
  1133. },
  1134. {
  1135. "datasource": {
  1136. "type": "prometheus",
  1137. "uid": "Prometheus"
  1138. },
  1139. "disableTextWrap": false,
  1140. "editorMode": "builder",
  1141. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:e2e_request_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1142. "fullMetaSearch": false,
  1143. "hide": false,
  1144. "includeNullMetadata": false,
  1145. "instant": false,
  1146. "legendFormat": "P50({{model_instance_name}})",
  1147. "range": true,
  1148. "refId": "D",
  1149. "useBackend": false
  1150. },
  1151. {
  1152. "datasource": {
  1153. "type": "prometheus",
  1154. "uid": "Prometheus"
  1155. },
  1156. "editorMode": "code",
  1157. "expr": "rate(gpustack:e2e_request_latency_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:e2e_request_latency_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  1158. "hide": false,
  1159. "instant": false,
  1160. "legendFormat": "Average({{model_instance_name}})",
  1161. "range": true,
  1162. "refId": "E"
  1163. }
  1164. ],
  1165. "title": "End-to-End Request Latency",
  1166. "type": "timeseries"
  1167. },
  1168. {
  1169. "datasource": {
  1170. "default": true,
  1171. "type": "prometheus"
  1172. },
  1173. "fieldConfig": {
  1174. "defaults": {
  1175. "custom": {
  1176. "hideFrom": {
  1177. "legend": false,
  1178. "tooltip": false,
  1179. "viz": false
  1180. },
  1181. "scaleDistribution": {
  1182. "type": "linear"
  1183. }
  1184. }
  1185. },
  1186. "overrides": []
  1187. },
  1188. "gridPos": {
  1189. "h": 8,
  1190. "w": 12,
  1191. "x": 12,
  1192. "y": 17
  1193. },
  1194. "id": 40,
  1195. "maxDataPoints": 30,
  1196. "options": {
  1197. "calculate": false,
  1198. "calculation": {
  1199. "yBuckets": {
  1200. "scale": {
  1201. "type": "linear"
  1202. }
  1203. }
  1204. },
  1205. "cellGap": 1,
  1206. "cellValues": {},
  1207. "color": {
  1208. "exponent": 0.5,
  1209. "fill": "dark-orange",
  1210. "mode": "scheme",
  1211. "reverse": false,
  1212. "scale": "exponential",
  1213. "scheme": "Spectral",
  1214. "steps": 64
  1215. },
  1216. "exemplars": {
  1217. "color": "rgba(255,0,255,0.7)"
  1218. },
  1219. "filterValues": {
  1220. "le": 1e-09
  1221. },
  1222. "legend": {
  1223. "show": true
  1224. },
  1225. "rowsFrame": {
  1226. "layout": "auto"
  1227. },
  1228. "tooltip": {
  1229. "mode": "single",
  1230. "showColorScale": true,
  1231. "yHistogram": false
  1232. },
  1233. "yAxis": {
  1234. "axisPlacement": "left",
  1235. "reverse": false,
  1236. "unit": "secs"
  1237. }
  1238. },
  1239. "pluginVersion": "12.2.0",
  1240. "targets": [
  1241. {
  1242. "datasource": {
  1243. "type": "prometheus",
  1244. "uid": "Prometheus"
  1245. },
  1246. "disableTextWrap": false,
  1247. "editorMode": "builder",
  1248. "expr": "sum by(le) (increase(gpustack:e2e_request_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=~\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]))",
  1249. "format": "heatmap",
  1250. "fullMetaSearch": false,
  1251. "includeNullMetadata": true,
  1252. "instant": false,
  1253. "legendFormat": "{{le}}",
  1254. "range": true,
  1255. "refId": "A",
  1256. "useBackend": false
  1257. }
  1258. ],
  1259. "title": "End-to-End Request Latency Heatmap",
  1260. "type": "heatmap"
  1261. },
  1262. {
  1263. "collapsed": false,
  1264. "gridPos": {
  1265. "h": 1,
  1266. "w": 24,
  1267. "x": 0,
  1268. "y": 25
  1269. },
  1270. "id": 25,
  1271. "panels": [],
  1272. "title": "Runtime Performance",
  1273. "type": "row"
  1274. },
  1275. {
  1276. "datasource": {
  1277. "type": "prometheus",
  1278. "uid": "prometheus"
  1279. },
  1280. "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
  1281. "fieldConfig": {
  1282. "defaults": {
  1283. "color": {
  1284. "mode": "palette-classic"
  1285. },
  1286. "custom": {
  1287. "axisBorderShow": false,
  1288. "axisCenteredZero": false,
  1289. "axisColorMode": "text",
  1290. "axisLabel": "",
  1291. "axisPlacement": "auto",
  1292. "barAlignment": 0,
  1293. "barWidthFactor": 0.6,
  1294. "drawStyle": "line",
  1295. "fillOpacity": 0,
  1296. "gradientMode": "none",
  1297. "hideFrom": {
  1298. "legend": false,
  1299. "tooltip": false,
  1300. "viz": false
  1301. },
  1302. "insertNulls": false,
  1303. "lineInterpolation": "linear",
  1304. "lineWidth": 1,
  1305. "pointSize": 5,
  1306. "scaleDistribution": {
  1307. "type": "linear"
  1308. },
  1309. "showPoints": "never",
  1310. "showValues": false,
  1311. "spanNulls": false,
  1312. "stacking": {
  1313. "group": "A",
  1314. "mode": "none"
  1315. },
  1316. "thresholdsStyle": {
  1317. "mode": "off"
  1318. }
  1319. },
  1320. "mappings": [],
  1321. "thresholds": {
  1322. "mode": "absolute",
  1323. "steps": [
  1324. {
  1325. "color": "green",
  1326. "value": 0
  1327. },
  1328. {
  1329. "color": "red",
  1330. "value": 80
  1331. }
  1332. ]
  1333. },
  1334. "unit": "none"
  1335. },
  1336. "overrides": []
  1337. },
  1338. "gridPos": {
  1339. "h": 8,
  1340. "w": 12,
  1341. "x": 0,
  1342. "y": 26
  1343. },
  1344. "id": 3,
  1345. "options": {
  1346. "legend": {
  1347. "calcs": [
  1348. "last",
  1349. "max"
  1350. ],
  1351. "displayMode": "table",
  1352. "placement": "bottom",
  1353. "showLegend": true
  1354. },
  1355. "tooltip": {
  1356. "hideZeros": false,
  1357. "mode": "single",
  1358. "sort": "none"
  1359. }
  1360. },
  1361. "pluginVersion": "12.2.0",
  1362. "targets": [
  1363. {
  1364. "datasource": {
  1365. "type": "prometheus",
  1366. "uid": "Prometheus"
  1367. },
  1368. "disableTextWrap": false,
  1369. "editorMode": "builder",
  1370. "expr": "gpustack:num_requests_running{cluster_name=\"$cluster_name\",model_instance_name=~\"$model_instance_name\", model_name=\"$model_name\"}",
  1371. "fullMetaSearch": false,
  1372. "includeNullMetadata": true,
  1373. "instant": false,
  1374. "legendFormat": "Request Running({{model_instance_name}})",
  1375. "range": true,
  1376. "refId": "A",
  1377. "useBackend": false
  1378. },
  1379. {
  1380. "datasource": {
  1381. "type": "prometheus",
  1382. "uid": "Prometheus"
  1383. },
  1384. "disableTextWrap": false,
  1385. "editorMode": "builder",
  1386. "expr": "gpustack:num_requests_swapped{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}",
  1387. "fullMetaSearch": false,
  1388. "hide": false,
  1389. "includeNullMetadata": true,
  1390. "instant": false,
  1391. "legendFormat": "Request Swapped({{model_instance_name}})",
  1392. "range": true,
  1393. "refId": "B",
  1394. "useBackend": false
  1395. },
  1396. {
  1397. "datasource": {
  1398. "type": "prometheus",
  1399. "uid": "Prometheus"
  1400. },
  1401. "disableTextWrap": false,
  1402. "editorMode": "builder",
  1403. "expr": "gpustack:num_requests_waiting{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}",
  1404. "fullMetaSearch": false,
  1405. "hide": false,
  1406. "includeNullMetadata": true,
  1407. "instant": false,
  1408. "legendFormat": "Request Waiting({{model_instance_name}})",
  1409. "range": true,
  1410. "refId": "C",
  1411. "useBackend": false
  1412. }
  1413. ],
  1414. "title": "Request Status",
  1415. "type": "timeseries"
  1416. },
  1417. {
  1418. "datasource": {
  1419. "type": "prometheus",
  1420. "uid": "prometheus"
  1421. },
  1422. "description": "Total number of tokens processed per second, combining prompt and generation tokens.",
  1423. "fieldConfig": {
  1424. "defaults": {
  1425. "color": {
  1426. "mode": "palette-classic"
  1427. },
  1428. "custom": {
  1429. "axisBorderShow": false,
  1430. "axisCenteredZero": false,
  1431. "axisColorMode": "text",
  1432. "axisLabel": "",
  1433. "axisPlacement": "auto",
  1434. "barAlignment": 0,
  1435. "barWidthFactor": 0.6,
  1436. "drawStyle": "line",
  1437. "fillOpacity": 0,
  1438. "gradientMode": "none",
  1439. "hideFrom": {
  1440. "legend": false,
  1441. "tooltip": false,
  1442. "viz": false
  1443. },
  1444. "insertNulls": false,
  1445. "lineInterpolation": "linear",
  1446. "lineWidth": 1,
  1447. "pointSize": 5,
  1448. "scaleDistribution": {
  1449. "type": "linear"
  1450. },
  1451. "showPoints": "never",
  1452. "showValues": false,
  1453. "spanNulls": false,
  1454. "stacking": {
  1455. "group": "A",
  1456. "mode": "none"
  1457. },
  1458. "thresholdsStyle": {
  1459. "mode": "off"
  1460. }
  1461. },
  1462. "mappings": [],
  1463. "thresholds": {
  1464. "mode": "absolute",
  1465. "steps": [
  1466. {
  1467. "color": "green",
  1468. "value": 0
  1469. },
  1470. {
  1471. "color": "red",
  1472. "value": 80
  1473. }
  1474. ]
  1475. }
  1476. },
  1477. "overrides": []
  1478. },
  1479. "gridPos": {
  1480. "h": 8,
  1481. "w": 12,
  1482. "x": 12,
  1483. "y": 26
  1484. },
  1485. "id": 8,
  1486. "options": {
  1487. "legend": {
  1488. "calcs": [
  1489. "last",
  1490. "max"
  1491. ],
  1492. "displayMode": "table",
  1493. "placement": "bottom",
  1494. "showLegend": true
  1495. },
  1496. "tooltip": {
  1497. "hideZeros": false,
  1498. "mode": "single",
  1499. "sort": "none"
  1500. }
  1501. },
  1502. "pluginVersion": "12.2.0",
  1503. "targets": [
  1504. {
  1505. "datasource": {
  1506. "type": "prometheus",
  1507. "uid": "Prometheus"
  1508. },
  1509. "disableTextWrap": false,
  1510. "editorMode": "code",
  1511. "expr": "rate(gpustack:prompt_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]) + rate(gpustack:generation_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  1512. "fullMetaSearch": false,
  1513. "includeNullMetadata": false,
  1514. "instant": false,
  1515. "legendFormat": "Total Tokens/Sec {{model_instance_name}}",
  1516. "range": true,
  1517. "refId": "A",
  1518. "useBackend": false
  1519. },
  1520. {
  1521. "datasource": {
  1522. "type": "prometheus",
  1523. "uid": "Prometheus"
  1524. },
  1525. "disableTextWrap": false,
  1526. "editorMode": "code",
  1527. "expr": "sum(rate(gpustack:prompt_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]) + rate(gpustack:generation_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]))",
  1528. "fullMetaSearch": false,
  1529. "hide": false,
  1530. "includeNullMetadata": false,
  1531. "instant": false,
  1532. "legendFormat": "Total Tokens/Sec (Total)",
  1533. "range": true,
  1534. "refId": "B",
  1535. "useBackend": false
  1536. }
  1537. ],
  1538. "title": "Token Throughput",
  1539. "type": "timeseries"
  1540. },
  1541. {
  1542. "datasource": {
  1543. "type": "prometheus",
  1544. "uid": "prometheus"
  1545. },
  1546. "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
  1547. "fieldConfig": {
  1548. "defaults": {
  1549. "color": {
  1550. "mode": "palette-classic"
  1551. },
  1552. "custom": {
  1553. "axisBorderShow": false,
  1554. "axisCenteredZero": false,
  1555. "axisColorMode": "text",
  1556. "axisLabel": "",
  1557. "axisPlacement": "auto",
  1558. "barAlignment": 0,
  1559. "barWidthFactor": 0.6,
  1560. "drawStyle": "line",
  1561. "fillOpacity": 0,
  1562. "gradientMode": "none",
  1563. "hideFrom": {
  1564. "legend": false,
  1565. "tooltip": false,
  1566. "viz": false
  1567. },
  1568. "insertNulls": false,
  1569. "lineInterpolation": "linear",
  1570. "lineWidth": 1,
  1571. "pointSize": 5,
  1572. "scaleDistribution": {
  1573. "type": "linear"
  1574. },
  1575. "showPoints": "never",
  1576. "showValues": false,
  1577. "spanNulls": false,
  1578. "stacking": {
  1579. "group": "A",
  1580. "mode": "none"
  1581. },
  1582. "thresholdsStyle": {
  1583. "mode": "off"
  1584. }
  1585. },
  1586. "mappings": [],
  1587. "thresholds": {
  1588. "mode": "absolute",
  1589. "steps": [
  1590. {
  1591. "color": "green",
  1592. "value": 0
  1593. },
  1594. {
  1595. "color": "red",
  1596. "value": 80
  1597. }
  1598. ]
  1599. },
  1600. "unit": "s"
  1601. },
  1602. "overrides": []
  1603. },
  1604. "gridPos": {
  1605. "h": 8,
  1606. "w": 12,
  1607. "x": 0,
  1608. "y": 42
  1609. },
  1610. "id": 5,
  1611. "options": {
  1612. "legend": {
  1613. "calcs": [
  1614. "last",
  1615. "max"
  1616. ],
  1617. "displayMode": "table",
  1618. "placement": "bottom",
  1619. "showLegend": true
  1620. },
  1621. "tooltip": {
  1622. "hideZeros": false,
  1623. "mode": "single",
  1624. "sort": "none"
  1625. }
  1626. },
  1627. "pluginVersion": "12.2.0",
  1628. "targets": [
  1629. {
  1630. "datasource": {
  1631. "type": "prometheus",
  1632. "uid": "Prometheus"
  1633. },
  1634. "disableTextWrap": false,
  1635. "editorMode": "builder",
  1636. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:time_to_first_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1637. "fullMetaSearch": false,
  1638. "hide": false,
  1639. "includeNullMetadata": false,
  1640. "instant": false,
  1641. "legendFormat": "P99({{model_instance_name}})",
  1642. "range": true,
  1643. "refId": "A",
  1644. "useBackend": false
  1645. },
  1646. {
  1647. "datasource": {
  1648. "type": "prometheus",
  1649. "uid": "Prometheus"
  1650. },
  1651. "disableTextWrap": false,
  1652. "editorMode": "builder",
  1653. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:time_to_first_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1654. "fullMetaSearch": false,
  1655. "includeNullMetadata": false,
  1656. "instant": false,
  1657. "legendFormat": "P95({{model_instance_name}})",
  1658. "range": true,
  1659. "refId": "B",
  1660. "useBackend": false
  1661. },
  1662. {
  1663. "datasource": {
  1664. "type": "prometheus",
  1665. "uid": "Prometheus"
  1666. },
  1667. "disableTextWrap": false,
  1668. "editorMode": "builder",
  1669. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:time_to_first_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1670. "fullMetaSearch": false,
  1671. "hide": false,
  1672. "includeNullMetadata": false,
  1673. "instant": false,
  1674. "legendFormat": "P90({{model_instance_name}})",
  1675. "range": true,
  1676. "refId": "C",
  1677. "useBackend": false
  1678. },
  1679. {
  1680. "datasource": {
  1681. "type": "prometheus",
  1682. "uid": "Prometheus"
  1683. },
  1684. "disableTextWrap": false,
  1685. "editorMode": "builder",
  1686. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:time_to_first_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1687. "fullMetaSearch": false,
  1688. "hide": false,
  1689. "includeNullMetadata": false,
  1690. "instant": false,
  1691. "legendFormat": "P50({{model_instance_name}})",
  1692. "range": true,
  1693. "refId": "D",
  1694. "useBackend": false
  1695. },
  1696. {
  1697. "datasource": {
  1698. "type": "prometheus",
  1699. "uid": "Prometheus"
  1700. },
  1701. "editorMode": "code",
  1702. "expr": "rate(gpustack:time_to_first_token_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:time_to_first_token_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  1703. "hide": false,
  1704. "instant": false,
  1705. "legendFormat": "Average({{model_instance_name}})",
  1706. "range": true,
  1707. "refId": "E"
  1708. }
  1709. ],
  1710. "title": "Time To First Token Latency",
  1711. "type": "timeseries"
  1712. },
  1713. {
  1714. "datasource": {
  1715. "type": "prometheus",
  1716. "uid": "prometheus"
  1717. },
  1718. "description": "Inter token latency in seconds.",
  1719. "fieldConfig": {
  1720. "defaults": {
  1721. "color": {
  1722. "mode": "palette-classic"
  1723. },
  1724. "custom": {
  1725. "axisBorderShow": false,
  1726. "axisCenteredZero": false,
  1727. "axisColorMode": "text",
  1728. "axisLabel": "",
  1729. "axisPlacement": "auto",
  1730. "barAlignment": 0,
  1731. "barWidthFactor": 0.6,
  1732. "drawStyle": "line",
  1733. "fillOpacity": 0,
  1734. "gradientMode": "none",
  1735. "hideFrom": {
  1736. "legend": false,
  1737. "tooltip": false,
  1738. "viz": false
  1739. },
  1740. "insertNulls": false,
  1741. "lineInterpolation": "linear",
  1742. "lineWidth": 1,
  1743. "pointSize": 5,
  1744. "scaleDistribution": {
  1745. "type": "linear"
  1746. },
  1747. "showPoints": "never",
  1748. "showValues": false,
  1749. "spanNulls": false,
  1750. "stacking": {
  1751. "group": "A",
  1752. "mode": "none"
  1753. },
  1754. "thresholdsStyle": {
  1755. "mode": "off"
  1756. }
  1757. },
  1758. "mappings": [],
  1759. "thresholds": {
  1760. "mode": "absolute",
  1761. "steps": [
  1762. {
  1763. "color": "green",
  1764. "value": 0
  1765. },
  1766. {
  1767. "color": "red",
  1768. "value": 80
  1769. }
  1770. ]
  1771. },
  1772. "unit": "s"
  1773. },
  1774. "overrides": []
  1775. },
  1776. "gridPos": {
  1777. "h": 8,
  1778. "w": 12,
  1779. "x": 12,
  1780. "y": 42
  1781. },
  1782. "id": 39,
  1783. "options": {
  1784. "legend": {
  1785. "calcs": [
  1786. "last",
  1787. "max"
  1788. ],
  1789. "displayMode": "table",
  1790. "placement": "bottom",
  1791. "showLegend": true
  1792. },
  1793. "tooltip": {
  1794. "hideZeros": false,
  1795. "mode": "single",
  1796. "sort": "none"
  1797. }
  1798. },
  1799. "pluginVersion": "12.2.0",
  1800. "targets": [
  1801. {
  1802. "datasource": {
  1803. "type": "prometheus",
  1804. "uid": "Prometheus"
  1805. },
  1806. "disableTextWrap": false,
  1807. "editorMode": "builder",
  1808. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:inter_token_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1809. "fullMetaSearch": false,
  1810. "includeNullMetadata": false,
  1811. "instant": false,
  1812. "legendFormat": "P99({{model_instance_name}})",
  1813. "range": true,
  1814. "refId": "A",
  1815. "useBackend": false
  1816. },
  1817. {
  1818. "datasource": {
  1819. "type": "prometheus",
  1820. "uid": "Prometheus"
  1821. },
  1822. "disableTextWrap": false,
  1823. "editorMode": "builder",
  1824. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:inter_token_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1825. "fullMetaSearch": false,
  1826. "hide": false,
  1827. "includeNullMetadata": false,
  1828. "instant": false,
  1829. "legendFormat": "P95({{model_instance_name}})",
  1830. "range": true,
  1831. "refId": "B",
  1832. "useBackend": false
  1833. },
  1834. {
  1835. "datasource": {
  1836. "type": "prometheus",
  1837. "uid": "Prometheus"
  1838. },
  1839. "disableTextWrap": false,
  1840. "editorMode": "builder",
  1841. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:inter_token_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1842. "fullMetaSearch": false,
  1843. "hide": false,
  1844. "includeNullMetadata": false,
  1845. "instant": false,
  1846. "legendFormat": "P90({{model_instance_name}})",
  1847. "range": true,
  1848. "refId": "C",
  1849. "useBackend": false
  1850. },
  1851. {
  1852. "datasource": {
  1853. "type": "prometheus",
  1854. "uid": "Prometheus"
  1855. },
  1856. "disableTextWrap": false,
  1857. "editorMode": "builder",
  1858. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:inter_token_latency_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  1859. "fullMetaSearch": false,
  1860. "hide": false,
  1861. "includeNullMetadata": false,
  1862. "instant": false,
  1863. "legendFormat": "P50({{model_instance_name}})",
  1864. "range": true,
  1865. "refId": "D",
  1866. "useBackend": false
  1867. },
  1868. {
  1869. "datasource": {
  1870. "type": "prometheus",
  1871. "uid": "Prometheus"
  1872. },
  1873. "editorMode": "code",
  1874. "expr": "rate(gpustack:inter_token_latency_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:inter_token_latency_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  1875. "hide": false,
  1876. "instant": false,
  1877. "legendFormat": "Mean({{model_instance_name}})",
  1878. "range": true,
  1879. "refId": "E"
  1880. }
  1881. ],
  1882. "title": " Inter Token Latency",
  1883. "type": "timeseries"
  1884. },
  1885. {
  1886. "datasource": {
  1887. "type": "prometheus",
  1888. "uid": "prometheus"
  1889. },
  1890. "description": "Percentage of used cache blocks by LLM Serving Runtime.",
  1891. "fieldConfig": {
  1892. "defaults": {
  1893. "color": {
  1894. "mode": "palette-classic"
  1895. },
  1896. "custom": {
  1897. "axisBorderShow": false,
  1898. "axisCenteredZero": false,
  1899. "axisColorMode": "text",
  1900. "axisLabel": "",
  1901. "axisPlacement": "auto",
  1902. "barAlignment": 0,
  1903. "barWidthFactor": 0.6,
  1904. "drawStyle": "line",
  1905. "fillOpacity": 0,
  1906. "gradientMode": "none",
  1907. "hideFrom": {
  1908. "legend": false,
  1909. "tooltip": false,
  1910. "viz": false
  1911. },
  1912. "insertNulls": false,
  1913. "lineInterpolation": "linear",
  1914. "lineWidth": 1,
  1915. "pointSize": 5,
  1916. "scaleDistribution": {
  1917. "type": "linear"
  1918. },
  1919. "showPoints": "never",
  1920. "showValues": false,
  1921. "spanNulls": false,
  1922. "stacking": {
  1923. "group": "A",
  1924. "mode": "none"
  1925. },
  1926. "thresholdsStyle": {
  1927. "mode": "off"
  1928. }
  1929. },
  1930. "mappings": [],
  1931. "thresholds": {
  1932. "mode": "absolute",
  1933. "steps": [
  1934. {
  1935. "color": "green",
  1936. "value": 0
  1937. },
  1938. {
  1939. "color": "red",
  1940. "value": 80
  1941. }
  1942. ]
  1943. },
  1944. "unit": "percentunit"
  1945. },
  1946. "overrides": []
  1947. },
  1948. "gridPos": {
  1949. "h": 8,
  1950. "w": 12,
  1951. "x": 0,
  1952. "y": 50
  1953. },
  1954. "id": 4,
  1955. "options": {
  1956. "legend": {
  1957. "calcs": [
  1958. "last",
  1959. "max"
  1960. ],
  1961. "displayMode": "table",
  1962. "placement": "bottom",
  1963. "showLegend": true
  1964. },
  1965. "tooltip": {
  1966. "hideZeros": false,
  1967. "mode": "single",
  1968. "sort": "none"
  1969. }
  1970. },
  1971. "pluginVersion": "12.2.0",
  1972. "targets": [
  1973. {
  1974. "datasource": {
  1975. "type": "prometheus",
  1976. "uid": "Prometheus"
  1977. },
  1978. "editorMode": "code",
  1979. "expr": "gpustack:kv_cache_usage_ratio{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}",
  1980. "instant": false,
  1981. "legendFormat": "{{model_instance_name}}",
  1982. "range": true,
  1983. "refId": "A"
  1984. }
  1985. ],
  1986. "title": "Cache Utilization",
  1987. "type": "timeseries"
  1988. },
  1989. {
  1990. "datasource": {
  1991. "default": true,
  1992. "type": "prometheus"
  1993. },
  1994. "fieldConfig": {
  1995. "defaults": {
  1996. "color": {
  1997. "mode": "palette-classic"
  1998. },
  1999. "custom": {
  2000. "axisBorderShow": false,
  2001. "axisCenteredZero": false,
  2002. "axisColorMode": "text",
  2003. "axisLabel": "",
  2004. "axisPlacement": "auto",
  2005. "barAlignment": 0,
  2006. "barWidthFactor": 0.6,
  2007. "drawStyle": "line",
  2008. "fillOpacity": 0,
  2009. "gradientMode": "none",
  2010. "hideFrom": {
  2011. "legend": false,
  2012. "tooltip": false,
  2013. "viz": false
  2014. },
  2015. "insertNulls": false,
  2016. "lineInterpolation": "linear",
  2017. "lineWidth": 1,
  2018. "pointSize": 5,
  2019. "scaleDistribution": {
  2020. "type": "linear"
  2021. },
  2022. "showPoints": "never",
  2023. "showValues": false,
  2024. "spanNulls": false,
  2025. "stacking": {
  2026. "group": "A",
  2027. "mode": "none"
  2028. },
  2029. "thresholdsStyle": {
  2030. "mode": "off"
  2031. }
  2032. },
  2033. "mappings": [],
  2034. "thresholds": {
  2035. "mode": "absolute",
  2036. "steps": [
  2037. {
  2038. "color": "green",
  2039. "value": 0
  2040. },
  2041. {
  2042. "color": "red",
  2043. "value": 80
  2044. }
  2045. ]
  2046. },
  2047. "unit": "percentunit"
  2048. },
  2049. "overrides": []
  2050. },
  2051. "gridPos": {
  2052. "h": 8,
  2053. "w": 12,
  2054. "x": 12,
  2055. "y": 50
  2056. },
  2057. "id": 38,
  2058. "options": {
  2059. "legend": {
  2060. "calcs": [
  2061. "last",
  2062. "max"
  2063. ],
  2064. "displayMode": "table",
  2065. "placement": "bottom",
  2066. "showLegend": true
  2067. },
  2068. "tooltip": {
  2069. "hideZeros": false,
  2070. "mode": "single",
  2071. "sort": "none"
  2072. }
  2073. },
  2074. "pluginVersion": "12.2.0",
  2075. "targets": [
  2076. {
  2077. "datasource": {
  2078. "type": "prometheus",
  2079. "uid": "Prometheus"
  2080. },
  2081. "disableTextWrap": false,
  2082. "editorMode": "code",
  2083. "expr": "gpustack:prefix_cache_hit_rate{model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}",
  2084. "fullMetaSearch": false,
  2085. "includeNullMetadata": true,
  2086. "instant": false,
  2087. "legendFormat": "{{model_instance_name}}",
  2088. "range": true,
  2089. "refId": "A",
  2090. "useBackend": false
  2091. },
  2092. {
  2093. "datasource": {
  2094. "type": "prometheus",
  2095. "uid": "prometheus"
  2096. },
  2097. "editorMode": "code",
  2098. "expr": "rate(gpustack:prefix_cache_hits_total{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}[1m]) / rate(gpustack:prefix_cache_queries_total{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}[1m])",
  2099. "hide": false,
  2100. "instant": false,
  2101. "legendFormat": "{{model_instance_name}}",
  2102. "range": true,
  2103. "refId": "B"
  2104. }
  2105. ],
  2106. "title": "Cache Hit Rate",
  2107. "type": "timeseries"
  2108. },
  2109. {
  2110. "datasource": {
  2111. "type": "prometheus",
  2112. "uid": "prometheus"
  2113. },
  2114. "description": "Heatmap of request prompt length",
  2115. "fieldConfig": {
  2116. "defaults": {
  2117. "custom": {
  2118. "hideFrom": {
  2119. "legend": false,
  2120. "tooltip": false,
  2121. "viz": false
  2122. },
  2123. "scaleDistribution": {
  2124. "type": "linear"
  2125. }
  2126. }
  2127. },
  2128. "overrides": []
  2129. },
  2130. "gridPos": {
  2131. "h": 8,
  2132. "w": 12,
  2133. "x": 0,
  2134. "y": 58
  2135. },
  2136. "id": 12,
  2137. "options": {
  2138. "calculate": false,
  2139. "cellGap": 1,
  2140. "cellValues": {
  2141. "unit": "none"
  2142. },
  2143. "color": {
  2144. "exponent": 0.5,
  2145. "fill": "dark-orange",
  2146. "min": 0,
  2147. "mode": "scheme",
  2148. "reverse": false,
  2149. "scale": "exponential",
  2150. "scheme": "Spectral",
  2151. "steps": 64
  2152. },
  2153. "exemplars": {
  2154. "color": "rgba(255,0,255,0.7)"
  2155. },
  2156. "filterValues": {
  2157. "le": 1e-09
  2158. },
  2159. "legend": {
  2160. "show": true
  2161. },
  2162. "rowsFrame": {
  2163. "layout": "auto",
  2164. "value": "Request count"
  2165. },
  2166. "tooltip": {
  2167. "mode": "single",
  2168. "showColorScale": false,
  2169. "yHistogram": true
  2170. },
  2171. "yAxis": {
  2172. "axisLabel": "Prompt Length",
  2173. "axisPlacement": "left",
  2174. "reverse": false,
  2175. "unit": "none"
  2176. }
  2177. },
  2178. "pluginVersion": "12.2.0",
  2179. "targets": [
  2180. {
  2181. "datasource": {
  2182. "type": "prometheus",
  2183. "uid": "Prometheus"
  2184. },
  2185. "disableTextWrap": false,
  2186. "editorMode": "builder",
  2187. "expr": "sum by(le) (increase(gpustack:request_prompt_tokens_bucket{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval]))",
  2188. "format": "heatmap",
  2189. "fullMetaSearch": false,
  2190. "includeNullMetadata": true,
  2191. "instant": false,
  2192. "legendFormat": "{{le}}",
  2193. "range": true,
  2194. "refId": "A",
  2195. "useBackend": false
  2196. }
  2197. ],
  2198. "title": "Request Prompt Length",
  2199. "type": "heatmap"
  2200. },
  2201. {
  2202. "datasource": {
  2203. "type": "prometheus",
  2204. "uid": "prometheus"
  2205. },
  2206. "description": "Heatmap of request generation length",
  2207. "fieldConfig": {
  2208. "defaults": {
  2209. "custom": {
  2210. "hideFrom": {
  2211. "legend": false,
  2212. "tooltip": false,
  2213. "viz": false
  2214. },
  2215. "scaleDistribution": {
  2216. "type": "linear"
  2217. }
  2218. }
  2219. },
  2220. "overrides": []
  2221. },
  2222. "gridPos": {
  2223. "h": 8,
  2224. "w": 12,
  2225. "x": 12,
  2226. "y": 58
  2227. },
  2228. "id": 13,
  2229. "options": {
  2230. "calculate": false,
  2231. "cellGap": 1,
  2232. "cellValues": {
  2233. "unit": "none"
  2234. },
  2235. "color": {
  2236. "exponent": 0.5,
  2237. "fill": "dark-orange",
  2238. "min": 0,
  2239. "mode": "scheme",
  2240. "reverse": false,
  2241. "scale": "exponential",
  2242. "scheme": "Spectral",
  2243. "steps": 64
  2244. },
  2245. "exemplars": {
  2246. "color": "rgba(255,0,255,0.7)"
  2247. },
  2248. "filterValues": {
  2249. "le": 1e-09
  2250. },
  2251. "legend": {
  2252. "show": true
  2253. },
  2254. "rowsFrame": {
  2255. "layout": "auto",
  2256. "value": "Request count"
  2257. },
  2258. "tooltip": {
  2259. "mode": "single",
  2260. "showColorScale": false,
  2261. "yHistogram": true
  2262. },
  2263. "yAxis": {
  2264. "axisLabel": "Generation Length",
  2265. "axisPlacement": "left",
  2266. "reverse": false,
  2267. "unit": "none"
  2268. }
  2269. },
  2270. "pluginVersion": "12.2.0",
  2271. "targets": [
  2272. {
  2273. "datasource": {
  2274. "type": "prometheus",
  2275. "uid": "Prometheus"
  2276. },
  2277. "disableTextWrap": false,
  2278. "editorMode": "builder",
  2279. "expr": "sum by(le) (increase(gpustack:request_generation_tokens_bucket{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval]))",
  2280. "format": "heatmap",
  2281. "fullMetaSearch": false,
  2282. "includeNullMetadata": true,
  2283. "instant": false,
  2284. "legendFormat": "{{le}}",
  2285. "range": true,
  2286. "refId": "A",
  2287. "useBackend": false
  2288. }
  2289. ],
  2290. "title": "Request Generation Length",
  2291. "type": "heatmap"
  2292. },
  2293. {
  2294. "datasource": {
  2295. "type": "prometheus",
  2296. "uid": "prometheus"
  2297. },
  2298. "description": "Per output token latency in seconds.",
  2299. "fieldConfig": {
  2300. "defaults": {
  2301. "color": {
  2302. "mode": "palette-classic"
  2303. },
  2304. "custom": {
  2305. "axisBorderShow": false,
  2306. "axisCenteredZero": false,
  2307. "axisColorMode": "text",
  2308. "axisLabel": "",
  2309. "axisPlacement": "auto",
  2310. "barAlignment": 0,
  2311. "barWidthFactor": 0.6,
  2312. "drawStyle": "line",
  2313. "fillOpacity": 0,
  2314. "gradientMode": "none",
  2315. "hideFrom": {
  2316. "legend": false,
  2317. "tooltip": false,
  2318. "viz": false
  2319. },
  2320. "insertNulls": false,
  2321. "lineInterpolation": "linear",
  2322. "lineWidth": 1,
  2323. "pointSize": 5,
  2324. "scaleDistribution": {
  2325. "type": "linear"
  2326. },
  2327. "showPoints": "never",
  2328. "showValues": false,
  2329. "spanNulls": false,
  2330. "stacking": {
  2331. "group": "A",
  2332. "mode": "none"
  2333. },
  2334. "thresholdsStyle": {
  2335. "mode": "off"
  2336. }
  2337. },
  2338. "mappings": [],
  2339. "thresholds": {
  2340. "mode": "absolute",
  2341. "steps": [
  2342. {
  2343. "color": "green",
  2344. "value": 0
  2345. },
  2346. {
  2347. "color": "red",
  2348. "value": 80
  2349. }
  2350. ]
  2351. },
  2352. "unit": "s"
  2353. },
  2354. "overrides": []
  2355. },
  2356. "gridPos": {
  2357. "h": 8,
  2358. "w": 12,
  2359. "x": 0,
  2360. "y": 66
  2361. },
  2362. "id": 10,
  2363. "options": {
  2364. "legend": {
  2365. "calcs": [
  2366. "last",
  2367. "max"
  2368. ],
  2369. "displayMode": "table",
  2370. "placement": "bottom",
  2371. "showLegend": true
  2372. },
  2373. "tooltip": {
  2374. "hideZeros": false,
  2375. "mode": "single",
  2376. "sort": "none"
  2377. }
  2378. },
  2379. "pluginVersion": "12.2.0",
  2380. "targets": [
  2381. {
  2382. "datasource": {
  2383. "type": "prometheus",
  2384. "uid": "Prometheus"
  2385. },
  2386. "disableTextWrap": false,
  2387. "editorMode": "builder",
  2388. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:time_per_output_token_seconds_bucket{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval])))",
  2389. "fullMetaSearch": false,
  2390. "includeNullMetadata": false,
  2391. "instant": false,
  2392. "legendFormat": "P99({{model_instance_name}})",
  2393. "range": true,
  2394. "refId": "A",
  2395. "useBackend": false
  2396. },
  2397. {
  2398. "datasource": {
  2399. "type": "prometheus",
  2400. "uid": "Prometheus"
  2401. },
  2402. "disableTextWrap": false,
  2403. "editorMode": "builder",
  2404. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:time_per_output_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2405. "fullMetaSearch": false,
  2406. "hide": false,
  2407. "includeNullMetadata": false,
  2408. "instant": false,
  2409. "legendFormat": "P95({{model_instance_name}})",
  2410. "range": true,
  2411. "refId": "B",
  2412. "useBackend": false
  2413. },
  2414. {
  2415. "datasource": {
  2416. "type": "prometheus",
  2417. "uid": "Prometheus"
  2418. },
  2419. "disableTextWrap": false,
  2420. "editorMode": "builder",
  2421. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:time_per_output_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2422. "fullMetaSearch": false,
  2423. "hide": false,
  2424. "includeNullMetadata": false,
  2425. "instant": false,
  2426. "legendFormat": "P90({{model_instance_name}})",
  2427. "range": true,
  2428. "refId": "C",
  2429. "useBackend": false
  2430. },
  2431. {
  2432. "datasource": {
  2433. "type": "prometheus",
  2434. "uid": "Prometheus"
  2435. },
  2436. "disableTextWrap": false,
  2437. "editorMode": "builder",
  2438. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:time_per_output_token_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2439. "fullMetaSearch": false,
  2440. "hide": false,
  2441. "includeNullMetadata": false,
  2442. "instant": false,
  2443. "legendFormat": "P50({{model_instance_name}})",
  2444. "range": true,
  2445. "refId": "D",
  2446. "useBackend": false
  2447. },
  2448. {
  2449. "datasource": {
  2450. "type": "prometheus",
  2451. "uid": "Prometheus"
  2452. },
  2453. "editorMode": "code",
  2454. "expr": "rate(gpustack:time_per_output_token_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:time_per_output_token_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  2455. "hide": false,
  2456. "instant": false,
  2457. "legendFormat": "Mean({{model_instance_name}})",
  2458. "range": true,
  2459. "refId": "E"
  2460. }
  2461. ],
  2462. "title": "Time Per Output Token Latency",
  2463. "type": "timeseries"
  2464. },
  2465. {
  2466. "datasource": {
  2467. "type": "prometheus",
  2468. "uid": "prometheus"
  2469. },
  2470. "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.",
  2471. "fieldConfig": {
  2472. "defaults": {
  2473. "color": {
  2474. "mode": "palette-classic"
  2475. },
  2476. "custom": {
  2477. "axisBorderShow": false,
  2478. "axisCenteredZero": false,
  2479. "axisColorMode": "text",
  2480. "axisLabel": "",
  2481. "axisPlacement": "auto",
  2482. "barAlignment": 0,
  2483. "barWidthFactor": 0.6,
  2484. "drawStyle": "line",
  2485. "fillOpacity": 0,
  2486. "gradientMode": "none",
  2487. "hideFrom": {
  2488. "legend": false,
  2489. "tooltip": false,
  2490. "viz": false
  2491. },
  2492. "insertNulls": false,
  2493. "lineInterpolation": "linear",
  2494. "lineWidth": 1,
  2495. "pointSize": 5,
  2496. "scaleDistribution": {
  2497. "type": "linear"
  2498. },
  2499. "showPoints": "never",
  2500. "showValues": false,
  2501. "spanNulls": false,
  2502. "stacking": {
  2503. "group": "A",
  2504. "mode": "none"
  2505. },
  2506. "thresholdsStyle": {
  2507. "mode": "off"
  2508. }
  2509. },
  2510. "mappings": [],
  2511. "thresholds": {
  2512. "mode": "absolute",
  2513. "steps": [
  2514. {
  2515. "color": "green",
  2516. "value": 0
  2517. },
  2518. {
  2519. "color": "red",
  2520. "value": 80
  2521. }
  2522. ]
  2523. }
  2524. },
  2525. "overrides": []
  2526. },
  2527. "gridPos": {
  2528. "h": 8,
  2529. "w": 12,
  2530. "x": 12,
  2531. "y": 66
  2532. },
  2533. "id": 11,
  2534. "options": {
  2535. "legend": {
  2536. "calcs": [
  2537. "last",
  2538. "max"
  2539. ],
  2540. "displayMode": "table",
  2541. "placement": "bottom",
  2542. "showLegend": true
  2543. },
  2544. "tooltip": {
  2545. "hideZeros": false,
  2546. "mode": "single",
  2547. "sort": "none"
  2548. }
  2549. },
  2550. "pluginVersion": "12.2.0",
  2551. "targets": [
  2552. {
  2553. "datasource": {
  2554. "type": "prometheus",
  2555. "uid": "Prometheus"
  2556. },
  2557. "disableTextWrap": false,
  2558. "editorMode": "builder",
  2559. "expr": "sum by(finished_reason, model_instance_name) (increase(gpustack:request_success_total{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval]))",
  2560. "fullMetaSearch": false,
  2561. "includeNullMetadata": true,
  2562. "instant": false,
  2563. "interval": "",
  2564. "legendFormat": "{{finished_reason}}({{model_instance_name}})",
  2565. "range": true,
  2566. "refId": "A",
  2567. "useBackend": false
  2568. }
  2569. ],
  2570. "title": "Finish Reason",
  2571. "type": "timeseries"
  2572. },
  2573. {
  2574. "datasource": {
  2575. "type": "prometheus",
  2576. "uid": "prometheus"
  2577. },
  2578. "description": "Number of prompt tokens and cached prompt tokens served per second.",
  2579. "fieldConfig": {
  2580. "defaults": {
  2581. "color": {
  2582. "mode": "palette-classic"
  2583. },
  2584. "custom": {
  2585. "axisBorderShow": false,
  2586. "axisCenteredZero": false,
  2587. "axisColorMode": "text",
  2588. "axisLabel": "",
  2589. "axisPlacement": "auto",
  2590. "barAlignment": 0,
  2591. "barWidthFactor": 0.6,
  2592. "drawStyle": "line",
  2593. "fillOpacity": 0,
  2594. "gradientMode": "none",
  2595. "hideFrom": {
  2596. "legend": false,
  2597. "tooltip": false,
  2598. "viz": false
  2599. },
  2600. "insertNulls": false,
  2601. "lineInterpolation": "linear",
  2602. "lineWidth": 1,
  2603. "pointSize": 5,
  2604. "scaleDistribution": {
  2605. "type": "linear"
  2606. },
  2607. "showPoints": "never",
  2608. "showValues": false,
  2609. "spanNulls": false,
  2610. "stacking": {
  2611. "group": "A",
  2612. "mode": "none"
  2613. },
  2614. "thresholdsStyle": {
  2615. "mode": "off"
  2616. }
  2617. },
  2618. "mappings": [],
  2619. "thresholds": {
  2620. "mode": "absolute",
  2621. "steps": [
  2622. {
  2623. "color": "green",
  2624. "value": 0
  2625. },
  2626. {
  2627. "color": "red",
  2628. "value": 80
  2629. }
  2630. ]
  2631. }
  2632. },
  2633. "overrides": []
  2634. },
  2635. "gridPos": {
  2636. "h": 8,
  2637. "w": 12,
  2638. "x": 0,
  2639. "y": 34
  2640. },
  2641. "id": 42,
  2642. "options": {
  2643. "legend": {
  2644. "calcs": [
  2645. "last",
  2646. "max"
  2647. ],
  2648. "displayMode": "table",
  2649. "placement": "bottom",
  2650. "showLegend": true
  2651. },
  2652. "tooltip": {
  2653. "hideZeros": false,
  2654. "mode": "single",
  2655. "sort": "none"
  2656. }
  2657. },
  2658. "pluginVersion": "12.2.0",
  2659. "targets": [
  2660. {
  2661. "datasource": {
  2662. "type": "prometheus",
  2663. "uid": "Prometheus"
  2664. },
  2665. "disableTextWrap": false,
  2666. "editorMode": "code",
  2667. "expr": "rate(gpustack:prompt_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  2668. "fullMetaSearch": false,
  2669. "includeNullMetadata": false,
  2670. "instant": false,
  2671. "legendFormat": "Prompt Tokens/Sec {{model_instance_name}}",
  2672. "range": true,
  2673. "refId": "A",
  2674. "useBackend": false
  2675. },
  2676. {
  2677. "datasource": {
  2678. "type": "prometheus",
  2679. "uid": "Prometheus"
  2680. },
  2681. "disableTextWrap": false,
  2682. "editorMode": "code",
  2683. "expr": "rate(gpustack:prompt_tokens_cached_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  2684. "fullMetaSearch": false,
  2685. "includeNullMetadata": false,
  2686. "instant": false,
  2687. "legendFormat": "Cached Prompt Tokens/Sec {{model_instance_name}}",
  2688. "range": true,
  2689. "refId": "B",
  2690. "useBackend": false
  2691. },
  2692. {
  2693. "datasource": {
  2694. "type": "prometheus",
  2695. "uid": "Prometheus"
  2696. },
  2697. "disableTextWrap": false,
  2698. "editorMode": "code",
  2699. "expr": "sum(rate(gpustack:prompt_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]))",
  2700. "fullMetaSearch": false,
  2701. "hide": false,
  2702. "includeNullMetadata": false,
  2703. "instant": false,
  2704. "legendFormat": "Prompt Tokens/Sec (Total)",
  2705. "range": true,
  2706. "refId": "C",
  2707. "useBackend": false
  2708. },
  2709. {
  2710. "datasource": {
  2711. "type": "prometheus",
  2712. "uid": "Prometheus"
  2713. },
  2714. "disableTextWrap": false,
  2715. "editorMode": "code",
  2716. "expr": "sum(rate(gpustack:prompt_tokens_cached_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]))",
  2717. "fullMetaSearch": false,
  2718. "hide": false,
  2719. "includeNullMetadata": false,
  2720. "instant": false,
  2721. "legendFormat": "Cached Prompt Tokens/Sec (Total)",
  2722. "range": true,
  2723. "refId": "D",
  2724. "useBackend": false
  2725. }
  2726. ],
  2727. "title": "Prompt Tokens Throughput",
  2728. "type": "timeseries"
  2729. },
  2730. {
  2731. "datasource": {
  2732. "type": "prometheus",
  2733. "uid": "prometheus"
  2734. },
  2735. "description": "Time spent waiting in the request queue.",
  2736. "fieldConfig": {
  2737. "defaults": {
  2738. "color": {
  2739. "mode": "palette-classic"
  2740. },
  2741. "custom": {
  2742. "axisBorderShow": false,
  2743. "axisCenteredZero": false,
  2744. "axisColorMode": "text",
  2745. "axisLabel": "",
  2746. "axisPlacement": "auto",
  2747. "barAlignment": 0,
  2748. "barWidthFactor": 0.6,
  2749. "drawStyle": "line",
  2750. "fillOpacity": 0,
  2751. "gradientMode": "none",
  2752. "hideFrom": {
  2753. "legend": false,
  2754. "tooltip": false,
  2755. "viz": false
  2756. },
  2757. "insertNulls": false,
  2758. "lineInterpolation": "linear",
  2759. "lineWidth": 1,
  2760. "pointSize": 5,
  2761. "scaleDistribution": {
  2762. "type": "linear"
  2763. },
  2764. "showPoints": "never",
  2765. "showValues": false,
  2766. "spanNulls": false,
  2767. "stacking": {
  2768. "group": "A",
  2769. "mode": "none"
  2770. },
  2771. "thresholdsStyle": {
  2772. "mode": "off"
  2773. }
  2774. },
  2775. "mappings": [],
  2776. "thresholds": {
  2777. "mode": "absolute",
  2778. "steps": [
  2779. {
  2780. "color": "green",
  2781. "value": 0
  2782. },
  2783. {
  2784. "color": "red",
  2785. "value": 80
  2786. }
  2787. ]
  2788. },
  2789. "unit": "s"
  2790. },
  2791. "overrides": []
  2792. },
  2793. "gridPos": {
  2794. "h": 8,
  2795. "w": 12,
  2796. "x": 12,
  2797. "y": 74
  2798. },
  2799. "id": 43,
  2800. "options": {
  2801. "legend": {
  2802. "calcs": [
  2803. "last",
  2804. "max"
  2805. ],
  2806. "displayMode": "table",
  2807. "placement": "bottom",
  2808. "showLegend": true
  2809. },
  2810. "tooltip": {
  2811. "hideZeros": false,
  2812. "mode": "single",
  2813. "sort": "none"
  2814. }
  2815. },
  2816. "pluginVersion": "12.2.0",
  2817. "targets": [
  2818. {
  2819. "datasource": {
  2820. "type": "prometheus",
  2821. "uid": "Prometheus"
  2822. },
  2823. "disableTextWrap": false,
  2824. "editorMode": "builder",
  2825. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:request_queue_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2826. "fullMetaSearch": false,
  2827. "hide": false,
  2828. "includeNullMetadata": false,
  2829. "instant": false,
  2830. "legendFormat": "P99({{model_instance_name}})",
  2831. "range": true,
  2832. "refId": "A",
  2833. "useBackend": false
  2834. },
  2835. {
  2836. "datasource": {
  2837. "type": "prometheus",
  2838. "uid": "Prometheus"
  2839. },
  2840. "disableTextWrap": false,
  2841. "editorMode": "builder",
  2842. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:request_queue_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2843. "fullMetaSearch": false,
  2844. "includeNullMetadata": false,
  2845. "instant": false,
  2846. "legendFormat": "P95({{model_instance_name}})",
  2847. "range": true,
  2848. "refId": "B",
  2849. "useBackend": false
  2850. },
  2851. {
  2852. "datasource": {
  2853. "type": "prometheus",
  2854. "uid": "Prometheus"
  2855. },
  2856. "disableTextWrap": false,
  2857. "editorMode": "builder",
  2858. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:request_queue_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2859. "fullMetaSearch": false,
  2860. "hide": false,
  2861. "includeNullMetadata": false,
  2862. "instant": false,
  2863. "legendFormat": "P90({{model_instance_name}})",
  2864. "range": true,
  2865. "refId": "C",
  2866. "useBackend": false
  2867. },
  2868. {
  2869. "datasource": {
  2870. "type": "prometheus",
  2871. "uid": "Prometheus"
  2872. },
  2873. "disableTextWrap": false,
  2874. "editorMode": "builder",
  2875. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:request_queue_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2876. "fullMetaSearch": false,
  2877. "hide": false,
  2878. "includeNullMetadata": false,
  2879. "instant": false,
  2880. "legendFormat": "P50({{model_instance_name}})",
  2881. "range": true,
  2882. "refId": "D",
  2883. "useBackend": false
  2884. },
  2885. {
  2886. "datasource": {
  2887. "type": "prometheus",
  2888. "uid": "Prometheus"
  2889. },
  2890. "editorMode": "code",
  2891. "expr": "rate(gpustack:request_queue_time_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:request_queue_time_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  2892. "hide": false,
  2893. "instant": false,
  2894. "legendFormat": "Mean({{model_instance_name}})",
  2895. "range": true,
  2896. "refId": "E"
  2897. }
  2898. ],
  2899. "title": "Request Queue Time",
  2900. "type": "timeseries"
  2901. },
  2902. {
  2903. "datasource": {
  2904. "type": "prometheus",
  2905. "uid": "prometheus"
  2906. },
  2907. "description": "Time spent in the prefill phase.",
  2908. "fieldConfig": {
  2909. "defaults": {
  2910. "color": {
  2911. "mode": "palette-classic"
  2912. },
  2913. "custom": {
  2914. "axisBorderShow": false,
  2915. "axisCenteredZero": false,
  2916. "axisColorMode": "text",
  2917. "axisLabel": "",
  2918. "axisPlacement": "auto",
  2919. "barAlignment": 0,
  2920. "barWidthFactor": 0.6,
  2921. "drawStyle": "line",
  2922. "fillOpacity": 0,
  2923. "gradientMode": "none",
  2924. "hideFrom": {
  2925. "legend": false,
  2926. "tooltip": false,
  2927. "viz": false
  2928. },
  2929. "insertNulls": false,
  2930. "lineInterpolation": "linear",
  2931. "lineWidth": 1,
  2932. "pointSize": 5,
  2933. "scaleDistribution": {
  2934. "type": "linear"
  2935. },
  2936. "showPoints": "never",
  2937. "showValues": false,
  2938. "spanNulls": false,
  2939. "stacking": {
  2940. "group": "A",
  2941. "mode": "none"
  2942. },
  2943. "thresholdsStyle": {
  2944. "mode": "off"
  2945. }
  2946. },
  2947. "mappings": [],
  2948. "thresholds": {
  2949. "mode": "absolute",
  2950. "steps": [
  2951. {
  2952. "color": "green",
  2953. "value": 0
  2954. },
  2955. {
  2956. "color": "red",
  2957. "value": 80
  2958. }
  2959. ]
  2960. },
  2961. "unit": "s"
  2962. },
  2963. "overrides": []
  2964. },
  2965. "gridPos": {
  2966. "h": 8,
  2967. "w": 12,
  2968. "x": 0,
  2969. "y": 74
  2970. },
  2971. "id": 44,
  2972. "options": {
  2973. "legend": {
  2974. "calcs": [
  2975. "last",
  2976. "max"
  2977. ],
  2978. "displayMode": "table",
  2979. "placement": "bottom",
  2980. "showLegend": true
  2981. },
  2982. "tooltip": {
  2983. "hideZeros": false,
  2984. "mode": "single",
  2985. "sort": "none"
  2986. }
  2987. },
  2988. "pluginVersion": "12.2.0",
  2989. "targets": [
  2990. {
  2991. "datasource": {
  2992. "type": "prometheus",
  2993. "uid": "Prometheus"
  2994. },
  2995. "disableTextWrap": false,
  2996. "editorMode": "builder",
  2997. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:request_prefill_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  2998. "fullMetaSearch": false,
  2999. "hide": false,
  3000. "includeNullMetadata": false,
  3001. "instant": false,
  3002. "legendFormat": "P99({{model_instance_name}})",
  3003. "range": true,
  3004. "refId": "A",
  3005. "useBackend": false
  3006. },
  3007. {
  3008. "datasource": {
  3009. "type": "prometheus",
  3010. "uid": "Prometheus"
  3011. },
  3012. "disableTextWrap": false,
  3013. "editorMode": "builder",
  3014. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:request_prefill_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3015. "fullMetaSearch": false,
  3016. "includeNullMetadata": false,
  3017. "instant": false,
  3018. "legendFormat": "P95({{model_instance_name}})",
  3019. "range": true,
  3020. "refId": "B",
  3021. "useBackend": false
  3022. },
  3023. {
  3024. "datasource": {
  3025. "type": "prometheus",
  3026. "uid": "Prometheus"
  3027. },
  3028. "disableTextWrap": false,
  3029. "editorMode": "builder",
  3030. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:request_prefill_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3031. "fullMetaSearch": false,
  3032. "hide": false,
  3033. "includeNullMetadata": false,
  3034. "instant": false,
  3035. "legendFormat": "P90({{model_instance_name}})",
  3036. "range": true,
  3037. "refId": "C",
  3038. "useBackend": false
  3039. },
  3040. {
  3041. "datasource": {
  3042. "type": "prometheus",
  3043. "uid": "Prometheus"
  3044. },
  3045. "disableTextWrap": false,
  3046. "editorMode": "builder",
  3047. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:request_prefill_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3048. "fullMetaSearch": false,
  3049. "hide": false,
  3050. "includeNullMetadata": false,
  3051. "instant": false,
  3052. "legendFormat": "P50({{model_instance_name}})",
  3053. "range": true,
  3054. "refId": "D",
  3055. "useBackend": false
  3056. },
  3057. {
  3058. "datasource": {
  3059. "type": "prometheus",
  3060. "uid": "Prometheus"
  3061. },
  3062. "editorMode": "code",
  3063. "expr": "rate(gpustack:request_prefill_time_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:request_prefill_time_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  3064. "hide": false,
  3065. "instant": false,
  3066. "legendFormat": "Mean({{model_instance_name}})",
  3067. "range": true,
  3068. "refId": "E"
  3069. }
  3070. ],
  3071. "title": "Request Prefill Time",
  3072. "type": "timeseries"
  3073. },
  3074. {
  3075. "datasource": {
  3076. "type": "prometheus",
  3077. "uid": "prometheus"
  3078. },
  3079. "description": "Time spent in the decode phase.",
  3080. "fieldConfig": {
  3081. "defaults": {
  3082. "color": {
  3083. "mode": "palette-classic"
  3084. },
  3085. "custom": {
  3086. "axisBorderShow": false,
  3087. "axisCenteredZero": false,
  3088. "axisColorMode": "text",
  3089. "axisLabel": "",
  3090. "axisPlacement": "auto",
  3091. "barAlignment": 0,
  3092. "barWidthFactor": 0.6,
  3093. "drawStyle": "line",
  3094. "fillOpacity": 0,
  3095. "gradientMode": "none",
  3096. "hideFrom": {
  3097. "legend": false,
  3098. "tooltip": false,
  3099. "viz": false
  3100. },
  3101. "insertNulls": false,
  3102. "lineInterpolation": "linear",
  3103. "lineWidth": 1,
  3104. "pointSize": 5,
  3105. "scaleDistribution": {
  3106. "type": "linear"
  3107. },
  3108. "showPoints": "never",
  3109. "showValues": false,
  3110. "spanNulls": false,
  3111. "stacking": {
  3112. "group": "A",
  3113. "mode": "none"
  3114. },
  3115. "thresholdsStyle": {
  3116. "mode": "off"
  3117. }
  3118. },
  3119. "mappings": [],
  3120. "thresholds": {
  3121. "mode": "absolute",
  3122. "steps": [
  3123. {
  3124. "color": "green",
  3125. "value": 0
  3126. },
  3127. {
  3128. "color": "red",
  3129. "value": 80
  3130. }
  3131. ]
  3132. },
  3133. "unit": "s"
  3134. },
  3135. "overrides": []
  3136. },
  3137. "gridPos": {
  3138. "h": 8,
  3139. "w": 12,
  3140. "x": 0,
  3141. "y": 82
  3142. },
  3143. "id": 45,
  3144. "options": {
  3145. "legend": {
  3146. "calcs": [
  3147. "last",
  3148. "max"
  3149. ],
  3150. "displayMode": "table",
  3151. "placement": "bottom",
  3152. "showLegend": true
  3153. },
  3154. "tooltip": {
  3155. "hideZeros": false,
  3156. "mode": "single",
  3157. "sort": "none"
  3158. }
  3159. },
  3160. "pluginVersion": "12.2.0",
  3161. "targets": [
  3162. {
  3163. "datasource": {
  3164. "type": "prometheus",
  3165. "uid": "Prometheus"
  3166. },
  3167. "disableTextWrap": false,
  3168. "editorMode": "builder",
  3169. "expr": "histogram_quantile(0.99, sum by(le, model_instance_name) (rate(gpustack:request_decode_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3170. "fullMetaSearch": false,
  3171. "includeNullMetadata": false,
  3172. "instant": false,
  3173. "legendFormat": "P99({{model_instance_name}})",
  3174. "range": true,
  3175. "refId": "A",
  3176. "useBackend": false
  3177. },
  3178. {
  3179. "datasource": {
  3180. "type": "prometheus",
  3181. "uid": "Prometheus"
  3182. },
  3183. "disableTextWrap": false,
  3184. "editorMode": "builder",
  3185. "expr": "histogram_quantile(0.95, sum by(le, model_instance_name) (rate(gpustack:request_decode_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3186. "fullMetaSearch": false,
  3187. "hide": false,
  3188. "includeNullMetadata": false,
  3189. "instant": false,
  3190. "legendFormat": "P95({{model_instance_name}})",
  3191. "range": true,
  3192. "refId": "B",
  3193. "useBackend": false
  3194. },
  3195. {
  3196. "datasource": {
  3197. "type": "prometheus",
  3198. "uid": "Prometheus"
  3199. },
  3200. "disableTextWrap": false,
  3201. "editorMode": "builder",
  3202. "expr": "histogram_quantile(0.9, sum by(le, model_instance_name) (rate(gpustack:request_decode_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3203. "fullMetaSearch": false,
  3204. "hide": false,
  3205. "includeNullMetadata": false,
  3206. "instant": false,
  3207. "legendFormat": "P90({{model_instance_name}})",
  3208. "range": true,
  3209. "refId": "C",
  3210. "useBackend": false
  3211. },
  3212. {
  3213. "datasource": {
  3214. "type": "prometheus",
  3215. "uid": "Prometheus"
  3216. },
  3217. "disableTextWrap": false,
  3218. "editorMode": "builder",
  3219. "expr": "histogram_quantile(0.5, sum by(le, model_instance_name) (rate(gpustack:request_decode_time_seconds_bucket{cluster_name=\"$cluster_name\",model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])))",
  3220. "fullMetaSearch": false,
  3221. "hide": false,
  3222. "includeNullMetadata": false,
  3223. "instant": false,
  3224. "legendFormat": "P50({{model_instance_name}})",
  3225. "range": true,
  3226. "refId": "D",
  3227. "useBackend": false
  3228. },
  3229. {
  3230. "datasource": {
  3231. "type": "prometheus",
  3232. "uid": "Prometheus"
  3233. },
  3234. "editorMode": "code",
  3235. "expr": "rate(gpustack:request_decode_time_seconds_sum{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])\n/\nrate(gpustack:request_decode_time_seconds_count{cluster_name=\"$cluster_name\",model_name=\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  3236. "hide": false,
  3237. "instant": false,
  3238. "legendFormat": "Mean({{model_instance_name}})",
  3239. "range": true,
  3240. "refId": "E"
  3241. }
  3242. ],
  3243. "title": "Request Decode Time",
  3244. "type": "timeseries"
  3245. },
  3246. {
  3247. "datasource": {
  3248. "default": true,
  3249. "type": "prometheus"
  3250. },
  3251. "fieldConfig": {
  3252. "defaults": {
  3253. "color": {
  3254. "mode": "palette-classic"
  3255. },
  3256. "custom": {
  3257. "axisBorderShow": false,
  3258. "axisCenteredZero": false,
  3259. "axisColorMode": "text",
  3260. "axisLabel": "",
  3261. "axisPlacement": "auto",
  3262. "barAlignment": 0,
  3263. "barWidthFactor": 0.6,
  3264. "drawStyle": "line",
  3265. "fillOpacity": 0,
  3266. "gradientMode": "none",
  3267. "hideFrom": {
  3268. "legend": false,
  3269. "tooltip": false,
  3270. "viz": false
  3271. },
  3272. "insertNulls": false,
  3273. "lineInterpolation": "linear",
  3274. "lineWidth": 1,
  3275. "pointSize": 5,
  3276. "scaleDistribution": {
  3277. "type": "linear"
  3278. },
  3279. "showPoints": "never",
  3280. "showValues": false,
  3281. "spanNulls": false,
  3282. "stacking": {
  3283. "group": "A",
  3284. "mode": "none"
  3285. },
  3286. "thresholdsStyle": {
  3287. "mode": "off"
  3288. }
  3289. },
  3290. "mappings": [],
  3291. "thresholds": {
  3292. "mode": "absolute",
  3293. "steps": [
  3294. {
  3295. "color": "green",
  3296. "value": 0
  3297. },
  3298. {
  3299. "color": "red",
  3300. "value": 80
  3301. }
  3302. ]
  3303. },
  3304. "unit": "percentunit"
  3305. },
  3306. "overrides": []
  3307. },
  3308. "gridPos": {
  3309. "h": 8,
  3310. "w": 12,
  3311. "x": 12,
  3312. "y": 82
  3313. },
  3314. "id": 46,
  3315. "options": {
  3316. "legend": {
  3317. "calcs": [
  3318. "last",
  3319. "max"
  3320. ],
  3321. "displayMode": "table",
  3322. "placement": "bottom",
  3323. "showLegend": true
  3324. },
  3325. "tooltip": {
  3326. "hideZeros": false,
  3327. "mode": "single",
  3328. "sort": "none"
  3329. }
  3330. },
  3331. "pluginVersion": "12.2.0",
  3332. "targets": [
  3333. {
  3334. "datasource": {
  3335. "type": "prometheus",
  3336. "uid": "Prometheus"
  3337. },
  3338. "disableTextWrap": false,
  3339. "editorMode": "code",
  3340. "expr": "gpustack:spec_decode_accept_rate{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}",
  3341. "fullMetaSearch": false,
  3342. "includeNullMetadata": true,
  3343. "instant": false,
  3344. "legendFormat": "{{model_instance_name}} (SGLang)",
  3345. "range": true,
  3346. "refId": "A",
  3347. "useBackend": false
  3348. },
  3349. {
  3350. "datasource": {
  3351. "type": "prometheus",
  3352. "uid": "Prometheus"
  3353. },
  3354. "disableTextWrap": false,
  3355. "editorMode": "code",
  3356. "expr": "rate(gpustack:spec_decode_accepted_tokens_total{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval]) / rate(gpustack:spec_decode_draft_tokens_total{cluster_name=\"$cluster_name\",model_name=~\"$model_name\",model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  3357. "fullMetaSearch": false,
  3358. "includeNullMetadata": true,
  3359. "instant": false,
  3360. "legendFormat": "{{model_instance_name}} (vLLM)",
  3361. "range": true,
  3362. "refId": "B",
  3363. "useBackend": false
  3364. }
  3365. ],
  3366. "title": "Spec Decode Accept Rate",
  3367. "type": "timeseries",
  3368. "description": "Average speculative decoding acceptance rate."
  3369. },
  3370. {
  3371. "datasource": {
  3372. "type": "prometheus",
  3373. "uid": "prometheus"
  3374. },
  3375. "description": "Heatmap of requested number of parallel completions per request.",
  3376. "fieldConfig": {
  3377. "defaults": {
  3378. "custom": {
  3379. "hideFrom": {
  3380. "legend": false,
  3381. "tooltip": false,
  3382. "viz": false
  3383. },
  3384. "scaleDistribution": {
  3385. "type": "linear"
  3386. }
  3387. }
  3388. },
  3389. "overrides": []
  3390. },
  3391. "gridPos": {
  3392. "h": 8,
  3393. "w": 12,
  3394. "x": 0,
  3395. "y": 90
  3396. },
  3397. "id": 48,
  3398. "options": {
  3399. "calculate": false,
  3400. "cellGap": 1,
  3401. "cellValues": {
  3402. "unit": "none"
  3403. },
  3404. "color": {
  3405. "exponent": 0.5,
  3406. "fill": "dark-orange",
  3407. "min": 0,
  3408. "mode": "scheme",
  3409. "reverse": false,
  3410. "scale": "exponential",
  3411. "scheme": "Spectral",
  3412. "steps": 64
  3413. },
  3414. "exemplars": {
  3415. "color": "rgba(255,0,255,0.7)"
  3416. },
  3417. "filterValues": {
  3418. "le": 1e-09
  3419. },
  3420. "legend": {
  3421. "show": true
  3422. },
  3423. "rowsFrame": {
  3424. "layout": "auto",
  3425. "value": "Request count"
  3426. },
  3427. "tooltip": {
  3428. "mode": "single",
  3429. "showColorScale": false,
  3430. "yHistogram": true
  3431. },
  3432. "yAxis": {
  3433. "axisLabel": "Requested N",
  3434. "axisPlacement": "left",
  3435. "reverse": false,
  3436. "unit": "none"
  3437. }
  3438. },
  3439. "pluginVersion": "12.2.0",
  3440. "targets": [
  3441. {
  3442. "datasource": {
  3443. "type": "prometheus",
  3444. "uid": "Prometheus"
  3445. },
  3446. "disableTextWrap": false,
  3447. "editorMode": "builder",
  3448. "expr": "sum by(le) (increase(gpustack:request_params_n_bucket{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval]))",
  3449. "format": "heatmap",
  3450. "fullMetaSearch": false,
  3451. "includeNullMetadata": true,
  3452. "instant": false,
  3453. "legendFormat": "{{le}}",
  3454. "range": true,
  3455. "refId": "A",
  3456. "useBackend": false
  3457. }
  3458. ],
  3459. "title": "Request Params N",
  3460. "type": "heatmap"
  3461. },
  3462. {
  3463. "datasource": {
  3464. "type": "prometheus",
  3465. "uid": "prometheus"
  3466. },
  3467. "description": "Heatmap of requested max_tokens per request.",
  3468. "fieldConfig": {
  3469. "defaults": {
  3470. "custom": {
  3471. "hideFrom": {
  3472. "legend": false,
  3473. "tooltip": false,
  3474. "viz": false
  3475. },
  3476. "scaleDistribution": {
  3477. "type": "linear"
  3478. }
  3479. }
  3480. },
  3481. "overrides": []
  3482. },
  3483. "gridPos": {
  3484. "h": 8,
  3485. "w": 12,
  3486. "x": 12,
  3487. "y": 90
  3488. },
  3489. "id": 49,
  3490. "options": {
  3491. "calculate": false,
  3492. "cellGap": 1,
  3493. "cellValues": {
  3494. "unit": "none"
  3495. },
  3496. "color": {
  3497. "exponent": 0.5,
  3498. "fill": "dark-orange",
  3499. "min": 0,
  3500. "mode": "scheme",
  3501. "reverse": false,
  3502. "scale": "exponential",
  3503. "scheme": "Spectral",
  3504. "steps": 64
  3505. },
  3506. "exemplars": {
  3507. "color": "rgba(255,0,255,0.7)"
  3508. },
  3509. "filterValues": {
  3510. "le": 1e-09
  3511. },
  3512. "legend": {
  3513. "show": true
  3514. },
  3515. "rowsFrame": {
  3516. "layout": "auto",
  3517. "value": "Request count"
  3518. },
  3519. "tooltip": {
  3520. "mode": "single",
  3521. "showColorScale": false,
  3522. "yHistogram": true
  3523. },
  3524. "yAxis": {
  3525. "axisLabel": "Requested Max Tokens",
  3526. "axisPlacement": "left",
  3527. "reverse": false,
  3528. "unit": "none"
  3529. }
  3530. },
  3531. "pluginVersion": "12.2.0",
  3532. "targets": [
  3533. {
  3534. "datasource": {
  3535. "type": "prometheus",
  3536. "uid": "Prometheus"
  3537. },
  3538. "disableTextWrap": false,
  3539. "editorMode": "builder",
  3540. "expr": "sum by(le) (increase(gpustack:request_params_max_tokens_bucket{model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\", cluster_name=\"$cluster_name\"}[$__rate_interval]))",
  3541. "format": "heatmap",
  3542. "fullMetaSearch": false,
  3543. "includeNullMetadata": true,
  3544. "instant": false,
  3545. "legendFormat": "{{le}}",
  3546. "range": true,
  3547. "refId": "A",
  3548. "useBackend": false
  3549. }
  3550. ],
  3551. "title": "Request Max Tokens",
  3552. "type": "heatmap"
  3553. },
  3554. {
  3555. "datasource": {
  3556. "type": "prometheus",
  3557. "uid": "prometheus"
  3558. },
  3559. "description": "Number of generation tokens processed per second.",
  3560. "fieldConfig": {
  3561. "defaults": {
  3562. "color": {
  3563. "mode": "palette-classic"
  3564. },
  3565. "custom": {
  3566. "axisBorderShow": false,
  3567. "axisCenteredZero": false,
  3568. "axisColorMode": "text",
  3569. "axisLabel": "",
  3570. "axisPlacement": "auto",
  3571. "barAlignment": 0,
  3572. "barWidthFactor": 0.6,
  3573. "drawStyle": "line",
  3574. "fillOpacity": 0,
  3575. "gradientMode": "none",
  3576. "hideFrom": {
  3577. "legend": false,
  3578. "tooltip": false,
  3579. "viz": false
  3580. },
  3581. "insertNulls": false,
  3582. "lineInterpolation": "linear",
  3583. "lineWidth": 1,
  3584. "pointSize": 5,
  3585. "scaleDistribution": {
  3586. "type": "linear"
  3587. },
  3588. "showPoints": "never",
  3589. "showValues": false,
  3590. "spanNulls": false,
  3591. "stacking": {
  3592. "group": "A",
  3593. "mode": "none"
  3594. },
  3595. "thresholdsStyle": {
  3596. "mode": "off"
  3597. }
  3598. },
  3599. "mappings": [],
  3600. "thresholds": {
  3601. "mode": "absolute",
  3602. "steps": [
  3603. {
  3604. "color": "green",
  3605. "value": 0
  3606. },
  3607. {
  3608. "color": "red",
  3609. "value": 80
  3610. }
  3611. ]
  3612. }
  3613. },
  3614. "overrides": []
  3615. },
  3616. "gridPos": {
  3617. "h": 8,
  3618. "w": 12,
  3619. "x": 12,
  3620. "y": 34
  3621. },
  3622. "id": 47,
  3623. "options": {
  3624. "legend": {
  3625. "calcs": [
  3626. "last",
  3627. "max"
  3628. ],
  3629. "displayMode": "table",
  3630. "placement": "bottom",
  3631. "showLegend": true
  3632. },
  3633. "tooltip": {
  3634. "hideZeros": false,
  3635. "mode": "single",
  3636. "sort": "none"
  3637. }
  3638. },
  3639. "pluginVersion": "12.2.0",
  3640. "targets": [
  3641. {
  3642. "datasource": {
  3643. "type": "prometheus",
  3644. "uid": "Prometheus"
  3645. },
  3646. "disableTextWrap": false,
  3647. "editorMode": "code",
  3648. "expr": "rate(gpustack:generation_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval])",
  3649. "fullMetaSearch": false,
  3650. "includeNullMetadata": false,
  3651. "instant": false,
  3652. "legendFormat": "Generation Tokens/Sec {{model_instance_name}}",
  3653. "range": true,
  3654. "refId": "A",
  3655. "useBackend": false
  3656. },
  3657. {
  3658. "datasource": {
  3659. "type": "prometheus",
  3660. "uid": "Prometheus"
  3661. },
  3662. "disableTextWrap": false,
  3663. "editorMode": "code",
  3664. "expr": "sum(rate(gpustack:generation_tokens_total{cluster_name=\"$cluster_name\", model_name=\"$model_name\", model_instance_name=~\"$model_instance_name\"}[$__rate_interval]))",
  3665. "fullMetaSearch": false,
  3666. "hide": false,
  3667. "includeNullMetadata": false,
  3668. "instant": false,
  3669. "legendFormat": "Generation Tokens/Sec (Total)",
  3670. "range": true,
  3671. "refId": "B",
  3672. "useBackend": false
  3673. }
  3674. ],
  3675. "title": "Generation Tokens Throughput",
  3676. "type": "timeseries"
  3677. }
  3678. ],
  3679. "preload": false,
  3680. "refresh": "",
  3681. "schemaVersion": 42,
  3682. "tags": [],
  3683. "templating": {
  3684. "list": [
  3685. {
  3686. "current": {
  3687. "text": "",
  3688. "value": ""
  3689. },
  3690. "definition": "query_result(gpustack:cluster_info)",
  3691. "label": "Cluster",
  3692. "name": "cluster_name",
  3693. "options": [],
  3694. "query": {
  3695. "qryType": 1,
  3696. "query": "query_result(gpustack:cluster_info)",
  3697. "refId": "PrometheusVariableQueryEditor-VariableQuery"
  3698. },
  3699. "refresh": 2,
  3700. "regex": "/cluster_name=\"([^\"]+)\"/",
  3701. "type": "query"
  3702. },
  3703. {
  3704. "current": {
  3705. "text": "",
  3706. "value": ""
  3707. },
  3708. "definition": "query_result(gpustack:model_info{cluster_name=\"$cluster_name\"})",
  3709. "label": "Model",
  3710. "name": "model_name",
  3711. "options": [],
  3712. "query": {
  3713. "qryType": 1,
  3714. "query": "query_result(gpustack:model_info{cluster_name=\"$cluster_name\"})",
  3715. "refId": "PrometheusVariableQueryEditor-VariableQuery"
  3716. },
  3717. "refresh": 2,
  3718. "regex": "/model_name=\"([^\"]+)\"/",
  3719. "type": "query"
  3720. },
  3721. {
  3722. "allValue": ".*",
  3723. "current": {
  3724. "text": "All",
  3725. "value": "$__all"
  3726. },
  3727. "definition": "query_result(gpustack:model_instance_status{model_name=\"$model_name\", cluster_name=\"$cluster_name\"})",
  3728. "includeAll": true,
  3729. "label": "Model Instance",
  3730. "name": "model_instance_name",
  3731. "options": [],
  3732. "query": {
  3733. "qryType": 1,
  3734. "query": "query_result(gpustack:model_instance_status{model_name=\"$model_name\", cluster_name=\"$cluster_name\"})",
  3735. "refId": "PrometheusVariableQueryEditor-VariableQuery"
  3736. },
  3737. "refresh": 2,
  3738. "regex": "/model_instance_name=\"([^\"]+)\"/",
  3739. "type": "query"
  3740. }
  3741. ]
  3742. },
  3743. "time": {
  3744. "from": "now-5m",
  3745. "to": "now"
  3746. },
  3747. "timepicker": {},
  3748. "timezone": "",
  3749. "title": "GPUStack Model",
  3750. "uid": "gpustack-model",
  3751. "version": 2
  3752. }