result.txt 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. lq@lq:~/Fine-tuning$ sudo docker logs -f finetune-backend
  2. => Syncing backend code to compute node 192.168.91.253 ...
  3. Warning: Permanently added '192.168.91.253' (ED25519) to the list of known hosts.
  4. sending incremental file list
  5. ./
  6. .dockerignore
  7. .env.docker
  8. .env.example
  9. .python-version
  10. Dockerfile
  11. entrypoint.sh
  12. main.py
  13. pyproject.toml
  14. requirements.txt
  15. app/
  16. app/__init__.py
  17. app/config.py
  18. app/__pycache__/__init__.cpython-310.pyc
  19. app/__pycache__/config.cpython-310.pyc
  20. app/api/
  21. app/api/__init__.py
  22. app/api/api_keys.py
  23. app/api/auth.py
  24. app/api/datasets.py
  25. app/api/deployment.py
  26. app/api/evaluation.py
  27. app/api/inference.py
  28. app/api/models.py
  29. app/api/sample_center.py
  30. app/api/training.py
  31. app/api/__pycache__/__init__.cpython-310.pyc
  32. app/api/__pycache__/api_keys.cpython-310.pyc
  33. app/api/__pycache__/auth.cpython-310.pyc
  34. app/api/__pycache__/datasets.cpython-310.pyc
  35. app/api/__pycache__/deployment.cpython-310.pyc
  36. app/api/__pycache__/evaluation.cpython-310.pyc
  37. app/api/__pycache__/inference.cpython-310.pyc
  38. app/api/__pycache__/models.cpython-310.pyc
  39. app/api/__pycache__/sample_center.cpython-310.pyc
  40. app/api/__pycache__/training.cpython-310.pyc
  41. app/core/
  42. app/core/__init__.py
  43. app/core/auth.py
  44. app/core/background_tasks.py
  45. app/core/db.py
  46. app/core/deploy_server_template.py
  47. app/core/inference_worker.py
  48. app/core/job_queue.py
  49. app/core/logging.py
  50. app/core/remote_deploy.py
  51. app/core/remote_eval.py
  52. app/core/remote_executor.py
  53. app/core/security.py
  54. app/core/sso_client.py
  55. app/core/websocket.py
  56. app/core/__pycache__/
  57. app/core/__pycache__/__init__.cpython-310.pyc
  58. app/core/__pycache__/auth.cpython-310.pyc
  59. app/core/__pycache__/background_tasks.cpython-310.pyc
  60. app/core/__pycache__/db.cpython-310.pyc
  61. app/core/__pycache__/job_queue.cpython-310.pyc
  62. app/core/__pycache__/logging.cpython-310.pyc
  63. app/core/__pycache__/remote_deploy.cpython-310.pyc
  64. app/core/__pycache__/remote_eval.cpython-310.pyc
  65. app/core/__pycache__/remote_executor.cpython-310.pyc
  66. app/core/__pycache__/security.cpython-310.pyc
  67. app/core/__pycache__/sso_client.cpython-310.pyc
  68. app/core/__pycache__/websocket.cpython-310.pyc
  69. app/engines/
  70. app/engines/__init__.py
  71. app/engines/__main__.py
  72. app/engines/base.py
  73. app/engines/multimodal_engine.py
  74. app/engines/remote_train.py
  75. app/engines/text_engine.py
  76. app/engines/vision_engine.py
  77. app/engines/__pycache__/__init__.cpython-310.pyc
  78. app/engines/__pycache__/base.cpython-310.pyc
  79. app/engines/__pycache__/remote_train.cpython-310.pyc
  80. app/engines/__pycache__/text_engine.cpython-310.pyc
  81. app/peft/
  82. app/peft/__init__.py
  83. app/peft/__pycache__/__init__.cpython-310.pyc
  84. app/preprocessors/
  85. app/preprocessors/__init__.py
  86. app/preprocessors/__pycache__/__init__.cpython-310.pyc
  87. app/schemas/
  88. app/schemas/__init__.py
  89. app/schemas/background_task.py
  90. app/schemas/common.py
  91. app/schemas/dataset.py
  92. app/schemas/deployment.py
  93. app/schemas/evaluation.py
  94. app/schemas/model.py
  95. app/schemas/model_test.py
  96. app/schemas/sample_center.py
  97. app/schemas/training.py
  98. app/schemas/__pycache__/__init__.cpython-310.pyc
  99. app/schemas/__pycache__/background_task.cpython-310.pyc
  100. app/schemas/__pycache__/common.cpython-310.pyc
  101. app/schemas/__pycache__/dataset.cpython-310.pyc
  102. app/schemas/__pycache__/deployment.cpython-310.pyc
  103. app/schemas/__pycache__/evaluation.cpython-310.pyc
  104. app/schemas/__pycache__/model.cpython-310.pyc
  105. app/schemas/__pycache__/model_test.cpython-310.pyc
  106. app/schemas/__pycache__/sample_center.cpython-310.pyc
  107. app/schemas/__pycache__/training.cpython-310.pyc
  108. app/services/
  109. app/services/api_key_service.py
  110. app/services/dataset_service.py
  111. app/services/deploy_service.py
  112. app/services/eval_service.py
  113. app/services/inference_service.py
  114. app/services/model_service.py
  115. app/services/model_test_service.py
  116. app/services/sample_center_service.py
  117. app/services/training_service.py
  118. app/services/__pycache__/api_key_service.cpython-310.pyc
  119. app/services/__pycache__/dataset_service.cpython-310.pyc
  120. app/services/__pycache__/deploy_service.cpython-310.pyc
  121. app/services/__pycache__/eval_service.cpython-310.pyc
  122. app/services/__pycache__/inference_service.cpython-310.pyc
  123. app/services/__pycache__/model_service.cpython-310.pyc
  124. app/services/__pycache__/model_test_service.cpython-310.pyc
  125. app/services/__pycache__/sample_center_service.cpython-310.pyc
  126. app/services/__pycache__/training_service.cpython-310.pyc
  127. sent 10,187 bytes received 6,962 bytes 926.97 bytes/sec
  128. total size is 518,960 speedup is 30.26
  129. => Sync done.
  130. INFO: Started server process [1]
  131. INFO: Waiting for application startup.
  132. 2026-05-26 01:48:14 | INFO | peft-platform | JobQueue started with 2 workers
  133. 2026-05-26 01:48:14 | INFO | peft-platform | Recovered 1 stale deploy tasks
  134. INFO: Application startup complete.
  135. INFO: Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
  136. INFO: 127.0.0.1:38956 - "GET /health HTTP/1.1" 200 OK
  137. INFO: 172.20.0.4:58486 - "GET /api/v1/models/ HTTP/1.0" 401 Unauthorized
  138. INFO: 172.20.0.4:58488 - "POST /api/v1/auth/refresh HTTP/1.0" 200 OK
  139. INFO: 172.20.0.4:58504 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  140. INFO: 172.20.0.4:58512 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  141. INFO: 172.20.0.4:58522 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  142. INFO: 172.20.0.4:58518 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  143. INFO: 172.20.0.4:58524 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  144. INFO: 172.20.0.4:58530 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  145. INFO: 172.20.0.4:58534 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  146. INFO: 172.20.0.4:60598 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  147. INFO: 172.20.0.4:60616 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  148. INFO: 172.20.0.4:60612 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  149. INFO: 172.20.0.4:60624 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  150. INFO: 172.20.0.4:60630 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  151. INFO: 172.20.0.4:60632 - "GET /api/v1/models/ HTTP/1.0" 200 OK
  152. INFO: 172.20.0.4:60656 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
  153. INFO: 172.20.0.4:60640 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  154. INFO: 172.20.0.4:60658 - "GET /api/v1/inference/adapters HTTP/1.0" 200 OK
  155. INFO: 172.20.0.4:60682 - "GET /api/v1/api-keys/ HTTP/1.0" 200 OK
  156. INFO: 172.20.0.4:60674 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
  157. INFO: 172.20.0.4:60696 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  158. INFO: 172.20.0.4:60708 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  159. INFO: 172.20.0.4:48096 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  160. 2026-05-26 01:48:37 | INFO | peft-platform | Serve task started: job=3819e7af-6c9b-4fde-88d0-35784e6afeda port=8100 (task_id=589e0e7b-ff1f-4c15-aed9-9eb562718242)
  161. INFO: 172.20.0.4:48102 - "POST /api/v1/deployment/serve HTTP/1.0" 200 OK
  162. 2026-05-26 01:50:37 | INFO | peft-platform | Remote worker launched: task=589e0e7b-ff1f-4c15-aed9-9eb562718242 port=8100 pid=92043
  163. INFO: 127.0.0.1:34844 - "GET /health HTTP/1.1" 200 OK
  164. INFO: 127.0.0.1:51118 - "GET /health HTTP/1.1" 200 OK
  165. INFO: 127.0.0.1:58876 - "GET /health HTTP/1.1" 200 OK
  166. INFO: 172.20.0.4:48112 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  167. INFO: 172.20.0.4:44574 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  168. INFO: 172.20.0.4:38862 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  169. INFO: 172.20.0.4:35560 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  170. INFO: 172.20.0.4:35568 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  171. INFO: 172.20.0.4:35580 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  172. INFO: 172.20.0.4:40030 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  173. INFO: 172.20.0.4:40050 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  174. INFO: 172.20.0.4:40036 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  175. INFO: 172.20.0.4:40058 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  176. INFO: 172.20.0.4:40060 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  177. INFO: 172.20.0.4:40094 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  178. INFO: 172.20.0.4:40100 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  179. INFO: 172.20.0.4:40106 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  180. INFO: 172.20.0.4:40080 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  181. INFO: 172.20.0.4:40120 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  182. INFO: 172.20.0.4:40064 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  183. INFO: 172.20.0.4:40122 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  184. INFO: 172.20.0.4:40132 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  185. INFO: 172.20.0.4:40134 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  186. INFO: 172.20.0.4:40154 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  187. INFO: 172.20.0.4:40144 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  188. INFO: 172.20.0.4:40166 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  189. INFO: 172.20.0.4:40168 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  190. INFO: 172.20.0.4:40180 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  191. INFO: 172.20.0.4:40192 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  192. INFO: 172.20.0.4:40206 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  193. INFO: 172.20.0.4:40212 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  194. INFO: 172.20.0.4:40218 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  195. INFO: 172.20.0.4:40232 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  196. INFO: 172.20.0.4:40216 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  197. INFO: 172.20.0.4:40254 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  198. INFO: 172.20.0.4:40238 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  199. INFO: 172.20.0.4:40236 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  200. INFO: 172.20.0.4:40272 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  201. INFO: 172.20.0.4:40270 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  202. INFO: 172.20.0.4:40286 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  203. INFO: 172.20.0.4:40296 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  204. INFO: 172.20.0.4:40298 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  205. INFO: 172.20.0.4:40314 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  206. INFO: 172.20.0.4:40320 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  207. INFO: 172.20.0.4:40322 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  208. INFO: 172.20.0.4:40330 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  209. INFO: 172.20.0.4:40334 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  210. INFO: 172.20.0.4:40346 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  211. INFO: 172.20.0.4:40358 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  212. INFO: 172.20.0.4:40372 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  213. INFO: 172.20.0.4:40380 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  214. INFO: 172.20.0.4:40394 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  215. INFO: 172.20.0.4:40406 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  216. INFO: 172.20.0.4:40414 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  217. INFO: 172.20.0.4:40430 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  218. INFO: 172.20.0.4:40438 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  219. 2026-05-26 01:51:00 | INFO | peft-platform | Worker ready: task=589e0e7b-ff1f-4c15-aed9-9eb562718242 (after ~5s)
  220. INFO: 127.0.0.1:55970 - "GET /health HTTP/1.1" 200 OK
  221. INFO: 172.20.0.4:40448 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  222. INFO: 172.20.0.4:35594 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  223. INFO: 172.20.0.4:36428 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  224. INFO: 172.20.0.4:45976 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  225. INFO: 172.20.0.4:43664 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  226. INFO: 172.20.0.4:43670 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  227. INFO: 127.0.0.1:34970 - "GET /health HTTP/1.1" 200 OK
  228. INFO: 172.20.0.4:45990 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  229. INFO: 172.20.0.4:46010 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
  230. INFO: 172.20.0.4:46004 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  231. INFO: 172.20.0.4:33412 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  232. INFO: 172.20.0.4:54884 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  233. INFO: 172.20.0.4:54886 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  234. INFO: 172.20.0.4:54896 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  235. INFO: 172.20.0.4:54908 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  236. INFO: 172.20.0.4:54918 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  237. INFO: 172.20.0.4:54928 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  238. INFO: 172.20.0.4:54940 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  239. INFO: 172.20.0.4:34010 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  240. INFO: 172.20.0.4:58916 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  241. INFO: 127.0.0.1:38650 - "GET /health HTTP/1.1" 200 OK
  242. INFO: 172.20.0.4:37086 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
  243. INFO: 172.20.0.4:37088 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK