|
|
@@ -1,18 +1,66 @@
|
|
|
-2026-05-19 15:16:02 | INFO | peft-platform | Remote test result: code=1, stdout_len=0, stderr_len=408
|
|
|
-2026-05-19 15:16:02 | INFO | peft-platform | stderr (first 500): Traceback (most recent call last):
|
|
|
- File "<stdin>", line 2, in <module>
|
|
|
- File "/root/Fine-tuning/backend/app/services/model_service.py", line 7, in <module>
|
|
|
- from app.core.db import async_session, ModelCache
|
|
|
- File "/root/Fine-tuning/backend/app/core/db.py", line 3, in <module>
|
|
|
- from sqlalchemy import Column, DateTime, Float, Integer, String, Text
|
|
|
-ModuleNotFoundError: No module named 'sqlalchemy'
|
|
|
+lq@lq:~$ sudo docker logs -f finetune-backend
|
|
|
+INFO: Started server process [1]
|
|
|
+INFO: Waiting for application startup.
|
|
|
+2026-05-19 16:16:13 | INFO | peft-platform | JobQueue started with 2 workers
|
|
|
+INFO: Application startup complete.
|
|
|
+INFO: Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
|
|
|
+INFO: 127.0.0.1:41466 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 172.20.0.4:48258 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
+INFO: 172.20.0.4:48262 - "GET /api/v1/datasets/819f7803-ddc6-4805-bda5-d08daee9ec54/preview?rows=10 HTTP/1.0" 200 OK
|
|
|
+INFO: 172.20.0.4:55738 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
+2026-05-19 16:16:34 | INFO | peft-platform | Deleted dataset: yanalong/yanalong
|
|
|
+INFO: 172.20.0.4:55750 - "DELETE /api/v1/datasets/819f7803-ddc6-4805-bda5-d08daee9ec54 HTTP/1.0" 200 OK
|
|
|
+INFO: 172.20.0.4:55762 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
|
|
|
+2026-05-19 16:16:38 | WARNING | peft-platform | MsDataset.load failed: No module named 'oss2', falling back to CLI download
|
|
|
+2026-05-19 16:16:39 | ERROR | peft-platform | Dataset download failed: No training data found in downloaded dataset files
|
|
|
+INFO: 172.20.0.4:56616 - "POST /api/v1/datasets/download HTTP/1.0" 400 Bad Request
|
|
|
+INFO: 127.0.0.1:60844 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:57570 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:40418 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:57306 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:52054 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:42066 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 172.20.0.4:33186 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
+2026-05-19 16:20:15 | ERROR | peft-platform | Remote model test failed: /opt/conda/bin/python: can't open file '/tmp/remote_model_test.py': [Errno 2] No such file or directory
|
|
|
|
|
|
-2026-05-19 15:16:02 | ERROR | peft-platform | Remote model test failed: Traceback (most recent call last):
|
|
|
- File "<stdin>", line 2, in <module>
|
|
|
- File "/root/Fine-tuning/backend/app/services/model_service.py", line 7, in <module>
|
|
|
- from app.core.db import async_session, ModelCache
|
|
|
- File "/root/Fine-tuning/backend/app/core/db.py", line 3, in <module>
|
|
|
- from sqlalchemy import Column, DateTime, Float, Integer, String, Text
|
|
|
-ModuleNotFoundError: No module named 'sqlalchemy'
|
|
|
+2026-05-19 16:20:25 | ERROR | peft-platform | SSH command timeout after 10s: rm -f /tmp/remote_model_test.py
|
|
|
+INFO: 172.20.0.4:33192 - "POST /api/v1/models/test HTTP/1.0" 400 Bad Request
|
|
|
+INFO: 127.0.0.1:48394 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:48060 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:44484 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:50510 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:42126 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:37302 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:43660 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:40252 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:56026 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:37736 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:37836 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:35194 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 172.20.0.4:59118 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
+2026-05-19 16:26:35 | ERROR | peft-platform | Remote model test failed: /opt/conda/bin/python: can't open file '/tmp/remote_model_test.py': [Errno 2] No such file or directory
|
|
|
|
|
|
-INFO: 172.20.0.4:52338 - "POST /api/v1/models/test HTTP/1.0" 400 Bad Request
|
|
|
+2026-05-19 16:26:45 | ERROR | peft-platform | SSH command timeout after 10s: rm -f /tmp/remote_model_test.py
|
|
|
+INFO: 172.20.0.4:59120 - "POST /api/v1/models/test HTTP/1.0" 400 Bad Request
|
|
|
+INFO: 127.0.0.1:41352 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 127.0.0.1:40704 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: Shutting down
|
|
|
+INFO: Waiting for application shutdown.
|
|
|
+2026-05-19 16:26:49 | INFO | peft-platform | JobQueue stopped
|
|
|
+INFO: Application shutdown complete.
|
|
|
+INFO: Finished server process [1]
|
|
|
+lq@lq:~$ sudo docker logs -f finetune-backend
|
|
|
+INFO: Started server process [1]
|
|
|
+INFO: Waiting for application startup.
|
|
|
+2026-05-19 16:26:52 | INFO | peft-platform | JobQueue started with 2 workers
|
|
|
+INFO: Application startup complete.
|
|
|
+INFO: Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
|
|
|
+INFO: 127.0.0.1:56270 - "GET /health HTTP/1.1" 200 OK
|
|
|
+INFO: 172.20.0.4:51748 - "GET /api/v1/models/ HTTP/1.0" 200 OK
|
|
|
+2026-05-19 16:27:27 | ERROR | peft-platform | SSH command timeout after 10s: docker cp /tmp/_model_test_host.py finetune-trainer:/tmp/_model_test.py
|
|
|
+2026-05-19 16:27:27 | ERROR | peft-platform | docker cp failed: Command timed out after 10s
|
|
|
+2026-05-19 16:27:37 | ERROR | peft-platform | SSH command timeout after 10s: rm -f /tmp/_model_test_host.py
|
|
|
+2026-05-19 16:27:47 | ERROR | peft-platform | SSH command timeout after 10s: docker exec finetune-trainer rm -f /tmp/_model_test.py
|
|
|
+INFO: 172.20.0.4:51758 - "POST /api/v1/models/test HTTP/1.0" 400 Bad Request
|
|
|
+INFO: 127.0.0.1:44200 - "GET /health HTTP/1.1" 200 OK
|
|
|
+2026-05-19 16:28:00 | ERROR | peft-platform | Dataset download failed: No module named 'oss2'
|