| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- lq@lq:~/Fine-tuning$ sudo docker logs -f finetune-backend
- => Syncing backend code to compute node 192.168.91.253 ...
- Warning: Permanently added '192.168.91.253' (ED25519) to the list of known hosts.
- sending incremental file list
- ./
- .dockerignore
- .env.docker
- .env.example
- .python-version
- Dockerfile
- entrypoint.sh
- main.py
- pyproject.toml
- requirements.txt
- app/
- app/__init__.py
- app/config.py
- app/__pycache__/__init__.cpython-310.pyc
- app/__pycache__/config.cpython-310.pyc
- app/api/
- app/api/__init__.py
- app/api/api_keys.py
- app/api/auth.py
- app/api/datasets.py
- app/api/deployment.py
- app/api/evaluation.py
- app/api/inference.py
- app/api/models.py
- app/api/sample_center.py
- app/api/training.py
- app/api/__pycache__/__init__.cpython-310.pyc
- app/api/__pycache__/api_keys.cpython-310.pyc
- app/api/__pycache__/auth.cpython-310.pyc
- app/api/__pycache__/datasets.cpython-310.pyc
- app/api/__pycache__/deployment.cpython-310.pyc
- app/api/__pycache__/evaluation.cpython-310.pyc
- app/api/__pycache__/inference.cpython-310.pyc
- app/api/__pycache__/models.cpython-310.pyc
- app/api/__pycache__/sample_center.cpython-310.pyc
- app/api/__pycache__/training.cpython-310.pyc
- app/core/
- app/core/__init__.py
- app/core/auth.py
- app/core/background_tasks.py
- app/core/db.py
- app/core/deploy_server_template.py
- app/core/inference_worker.py
- app/core/job_queue.py
- app/core/logging.py
- app/core/remote_deploy.py
- app/core/remote_eval.py
- app/core/remote_executor.py
- app/core/security.py
- app/core/sso_client.py
- app/core/websocket.py
- app/core/__pycache__/
- app/core/__pycache__/__init__.cpython-310.pyc
- app/core/__pycache__/auth.cpython-310.pyc
- app/core/__pycache__/background_tasks.cpython-310.pyc
- app/core/__pycache__/db.cpython-310.pyc
- app/core/__pycache__/job_queue.cpython-310.pyc
- app/core/__pycache__/logging.cpython-310.pyc
- app/core/__pycache__/remote_deploy.cpython-310.pyc
- app/core/__pycache__/remote_eval.cpython-310.pyc
- app/core/__pycache__/remote_executor.cpython-310.pyc
- app/core/__pycache__/security.cpython-310.pyc
- app/core/__pycache__/sso_client.cpython-310.pyc
- app/core/__pycache__/websocket.cpython-310.pyc
- app/engines/
- app/engines/__init__.py
- app/engines/__main__.py
- app/engines/base.py
- app/engines/multimodal_engine.py
- app/engines/remote_train.py
- app/engines/text_engine.py
- app/engines/vision_engine.py
- app/engines/__pycache__/__init__.cpython-310.pyc
- app/engines/__pycache__/base.cpython-310.pyc
- app/engines/__pycache__/remote_train.cpython-310.pyc
- app/engines/__pycache__/text_engine.cpython-310.pyc
- app/peft/
- app/peft/__init__.py
- app/peft/__pycache__/__init__.cpython-310.pyc
- app/preprocessors/
- app/preprocessors/__init__.py
- app/preprocessors/__pycache__/__init__.cpython-310.pyc
- app/schemas/
- app/schemas/__init__.py
- app/schemas/background_task.py
- app/schemas/common.py
- app/schemas/dataset.py
- app/schemas/deployment.py
- app/schemas/evaluation.py
- app/schemas/model.py
- app/schemas/model_test.py
- app/schemas/sample_center.py
- app/schemas/training.py
- app/schemas/__pycache__/__init__.cpython-310.pyc
- app/schemas/__pycache__/background_task.cpython-310.pyc
- app/schemas/__pycache__/common.cpython-310.pyc
- app/schemas/__pycache__/dataset.cpython-310.pyc
- app/schemas/__pycache__/deployment.cpython-310.pyc
- app/schemas/__pycache__/evaluation.cpython-310.pyc
- app/schemas/__pycache__/model.cpython-310.pyc
- app/schemas/__pycache__/model_test.cpython-310.pyc
- app/schemas/__pycache__/sample_center.cpython-310.pyc
- app/schemas/__pycache__/training.cpython-310.pyc
- app/services/
- app/services/api_key_service.py
- app/services/dataset_service.py
- app/services/deploy_service.py
- app/services/eval_service.py
- app/services/inference_service.py
- app/services/model_service.py
- app/services/model_test_service.py
- app/services/sample_center_service.py
- app/services/training_service.py
- app/services/__pycache__/api_key_service.cpython-310.pyc
- app/services/__pycache__/dataset_service.cpython-310.pyc
- app/services/__pycache__/deploy_service.cpython-310.pyc
- app/services/__pycache__/eval_service.cpython-310.pyc
- app/services/__pycache__/inference_service.cpython-310.pyc
- app/services/__pycache__/model_service.cpython-310.pyc
- app/services/__pycache__/model_test_service.cpython-310.pyc
- app/services/__pycache__/sample_center_service.cpython-310.pyc
- app/services/__pycache__/training_service.cpython-310.pyc
- sent 10,187 bytes received 6,962 bytes 926.97 bytes/sec
- total size is 518,960 speedup is 30.26
- => Sync done.
- INFO: Started server process [1]
- INFO: Waiting for application startup.
- 2026-05-26 01:48:14 | INFO | peft-platform | JobQueue started with 2 workers
- 2026-05-26 01:48:14 | INFO | peft-platform | Recovered 1 stale deploy tasks
- INFO: Application startup complete.
- INFO: Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
- INFO: 127.0.0.1:38956 - "GET /health HTTP/1.1" 200 OK
- INFO: 172.20.0.4:58486 - "GET /api/v1/models/ HTTP/1.0" 401 Unauthorized
- INFO: 172.20.0.4:58488 - "POST /api/v1/auth/refresh HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58504 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58512 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58522 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58518 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58524 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58530 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58534 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60598 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60616 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60612 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60624 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60630 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60632 - "GET /api/v1/models/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60656 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60640 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60658 - "GET /api/v1/inference/adapters HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60682 - "GET /api/v1/api-keys/ HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60674 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60696 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:60708 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:48096 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- 2026-05-26 01:48:37 | INFO | peft-platform | Serve task started: job=3819e7af-6c9b-4fde-88d0-35784e6afeda port=8100 (task_id=589e0e7b-ff1f-4c15-aed9-9eb562718242)
- INFO: 172.20.0.4:48102 - "POST /api/v1/deployment/serve HTTP/1.0" 200 OK
- 2026-05-26 01:50:37 | INFO | peft-platform | Remote worker launched: task=589e0e7b-ff1f-4c15-aed9-9eb562718242 port=8100 pid=92043
- INFO: 127.0.0.1:34844 - "GET /health HTTP/1.1" 200 OK
- INFO: 127.0.0.1:51118 - "GET /health HTTP/1.1" 200 OK
- INFO: 127.0.0.1:58876 - "GET /health HTTP/1.1" 200 OK
- INFO: 172.20.0.4:48112 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:44574 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:38862 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:35560 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:35568 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:35580 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40030 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40050 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40036 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40058 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40060 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40094 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40100 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40106 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40080 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40120 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40064 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40122 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40132 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40134 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40154 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40144 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40166 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40168 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40180 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40192 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40206 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40212 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40218 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40232 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40216 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40254 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40238 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40236 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40272 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40270 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40286 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40296 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40298 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40314 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40320 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40322 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40330 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40334 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40346 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40358 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40372 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40380 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40394 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40406 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40414 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40430 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:40438 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- 2026-05-26 01:51:00 | INFO | peft-platform | Worker ready: task=589e0e7b-ff1f-4c15-aed9-9eb562718242 (after ~5s)
- INFO: 127.0.0.1:55970 - "GET /health HTTP/1.1" 200 OK
- INFO: 172.20.0.4:40448 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:35594 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:36428 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:45976 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:43664 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:43670 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 127.0.0.1:34970 - "GET /health HTTP/1.1" 200 OK
- INFO: 172.20.0.4:45990 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:46010 - "GET /api/v1/deployment/589e0e7b-ff1f-4c15-aed9-9eb562718242/status HTTP/1.0" 200 OK
- INFO: 172.20.0.4:46004 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:33412 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54884 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54886 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54896 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54908 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54918 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54928 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:54940 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:34010 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:58916 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 127.0.0.1:38650 - "GET /health HTTP/1.1" 200 OK
- INFO: 172.20.0.4:37086 - "GET /api/v1/deployment/services HTTP/1.0" 200 OK
- INFO: 172.20.0.4:37088 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
|