Эх сурвалжийг харах

修复标注平台下载问题

lxylxy123321 2 цаг өмнө
parent
commit
04816eeb17

+ 19 - 3
backend/app/services/annotation_platform_service.py

@@ -265,20 +265,36 @@ async def import_project_dataset(
     # 3. 通过独立的下载接口获取文件(文档 4.6 节)
     await get_token()
     base_url = _get_base_url()
+    download_url = f"{base_url}/api/v1/open/datasets/downloads/{download_token}"
 
     async with httpx.AsyncClient(timeout=120) as client:
+        # 先手动处理重定向,确保每次请求都带上认证头
         resp = await client.get(
-            f"{base_url}/api/v1/open/datasets/downloads/{download_token}",
+            download_url,
             headers=_auth_headers(),
-            follow_redirects=True,
+            follow_redirects=False,
         )
+        # 手动跟随重定向,每次都带上认证头
+        redirect_count = 0
+        while resp.is_redirect and redirect_count < 5:
+            redirect_url = resp.next_request.url
+            logger.info(f"Download redirect to: {redirect_url}")
+            resp = await client.get(
+                str(redirect_url),
+                headers=_auth_headers(),
+                follow_redirects=False,
+            )
+            redirect_count += 1
         resp.raise_for_status()
         file_content = resp.content
 
     logger.info(
         f"Downloaded annotation file: {len(file_content)} bytes, "
-        f"content_type={resp.headers.get('content-type', 'unknown')}"
+        f"content_type={resp.headers.get('content-type', 'unknown')}, "
+        f"url={resp.url}, redirects={redirect_count}"
     )
+    if len(file_content) < 200:
+        logger.warning(f"Annotation file content suspiciously small: {file_content!r}")
 
     # 4. 保存到 uploads 目录
     upload_dir = settings.uploads_dir

+ 12 - 43
result.txt

@@ -1,43 +1,12 @@
-INFO:     Started server process [1]
-INFO:     Waiting for application startup.
-2026-05-28 07:52:40 | INFO     | peft-platform | JobQueue started with 2 workers
-INFO:     Application startup complete.
-INFO:     Uvicorn running on http://0.0.0.0:8010 (Press CTRL+C to quit)
-INFO:     172.20.0.4:35472 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35474 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35500 - "GET /api/v1/training/jobs HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35510 - "GET /api/v1/models/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:35514 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     127.0.0.1:46858 - "GET /health HTTP/1.1" 200 OK
-2026-05-28 07:52:46 | INFO     | peft-platform | Deleted dataset directory: /root/Fine-tuning/backend/data/processed/ms_tany0699_carBrands50
-2026-05-28 07:52:46 | INFO     | peft-platform | Deleted dataset: tany0699/carBrands50
-INFO:     172.20.0.4:58764 - "DELETE /api/v1/datasets/98dd637f-879b-4fc4-b7ea-f64238110c25 HTTP/1.0" 200 OK
-INFO:     172.20.0.4:58766 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-2026-05-28 07:52:51 | INFO     | peft-platform | Dataset download task started: tany0699/carBrands50 (task_id=035d9cad-3c35-467b-9409-01de28520fb3)
-INFO:     172.20.0.4:58774 - "POST /api/v1/datasets/download HTTP/1.0" 200 OK
-INFO:     172.20.0.4:57858 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-INFO:     172.20.0.4:57866 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-2026-05-28 07:53:00,756 - modelscope - INFO - No subset_name specified, defaulting to the default
-INFO:     172.20.0.4:57876 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-2026-05-28 07:53:02,365 - modelscope - WARNING - Reusing dataset dataset_builder (/root/.cache/modelscope/hub/datasets/tany0699/carBrands50/master/data_files)
-2026-05-28 07:53:02,365 - modelscope - INFO - Generating dataset dataset_builder (/root/.cache/modelscope/hub/datasets/tany0699/carBrands50/master/data_files)
-2026-05-28 07:53:02,366 - modelscope - INFO - Loading meta-data file ...
-4398it [00:00, 40120.82it/s]
-INFO:     172.20.0.4:38498 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-9% INFO:     172.20.0.4:38514 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-40% INFO:     172.20.0.4:38520 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-61% INFO:     172.20.0.4:38522 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-73% INFO:     127.0.0.1:43340 - "GET /health HTTP/1.1" 200 OK
-84% INFO:     172.20.0.4:47586 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-100% INFO:     172.20.0.4:47596 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-INFO:     172.20.0.4:47600 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-2026-05-28 07:53:22 | INFO     | peft-platform | MsDataset.load() loaded split 'train': 4397 records
-2026-05-28 07:53:22 | WARNING  | peft-platform | MsDataset.load() failed for tany0699/carBrands50: name 'os' is not defined, falling back to CLI
-2026-05-28 07:53:22 | INFO     | peft-platform | Fallback CLI: modelscope download --dataset tany0699/carBrands50 --local_dir /root/Fine-tuning/backend/data/processed/ms_tany0699_carBrands50
-INFO:     172.20.0.4:43150 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-2026-05-28 07:53:27 | INFO     | peft-platform | CLI downloaded 9 files to /root/Fine-tuning/backend/data/processed/ms_tany0699_carBrands50
-2026-05-28 07:53:27 | INFO     | peft-platform | Selected data file: /root/Fine-tuning/backend/data/processed/ms_tany0699_carBrands50/train.csv (size=140505)
-2026-05-28 07:53:27 | INFO     | peft-platform | Dataset downloaded: tany0699/carBrands50 (4397 records)
-INFO:     172.20.0.4:43156 - "GET /api/v1/datasets/download/035d9cad-3c35-467b-9409-01de28520fb3 HTTP/1.0" 200 OK
-INFO:     172.20.0.4:43160 - "GET /api/v1/datasets/ HTTP/1.0" 200 OK
-INFO:     172.20.0.4:43172 - "GET /api/v1/datasets/4eb51319-31aa-4f8c-a770-0b94294d0b5a/preview?rows=10 HTTP/1.0" 200 OK
+2026-05-28 08:26:28 | INFO     | httpx | HTTP Request: POST http://192.168.92.61:8003/api/v1/open/auth/token "HTTP/1.1 200 OK"
+2026-05-28 08:26:28 | INFO     | httpx | HTTP Request: GET http://192.168.92.61:8003/api/v1/open/projects?page=1&page_size=20 "HTTP/1.1 200 OK"
+INFO:     172.20.0.4:53778 - "GET /api/v1/annotation-platform/projects?page=1&page_size=20 HTTP/1.0" 200 OK
+2026-05-28 08:26:32 | INFO     | httpx | HTTP Request: GET http://192.168.92.61:8003/api/v1/open/projects/proj_2e8e2373469c "HTTP/1.1 200 OK"
+INFO:     172.20.0.4:53786 - "GET /api/v1/annotation-platform/projects/proj_2e8e2373469c HTTP/1.0" 200 OK
+2026-05-28 08:26:35 | INFO     | httpx | HTTP Request: POST http://192.168.92.61:8003/api/v1/open/projects/proj_2e8e2373469c/datasets/download "HTTP/1.1 200 OK"
+2026-05-28 08:26:35 | INFO     | peft-platform | Annotation export (completed_only=True): total_exported=4, file_url=/api/v1/open/datasets/downloads/dl_12fa17c6d874, file_name=proj_2e8e2373469c_alpaca_20260528_082635.json
+2026-05-28 08:26:35 | INFO     | httpx | HTTP Request: GET http://192.168.92.61:8003/api/v1/open/datasets/downloads/dl_12fa17c6d874 "HTTP/1.1 200 OK"
+2026-05-28 08:26:35 | INFO     | peft-platform | Downloaded annotation file: 2 bytes, content_type=application/json
+2026-05-28 08:26:35 | INFO     | peft-platform | Annotation file converted: 计算机实体标注_.jsonl, record_count=0
+2026-05-28 08:26:35 | INFO     | peft-platform | Imported dataset from annotation platform: proj_2e8e2373469c -> 计算机实体标注_.jsonl (0 records)
+INFO:     172.20.0.4:49930 - "POST /api/v1/annotation-platform/projects/proj_2e8e2373469c/import?project_name=%E8%AE%A1%E7%AE%97%E6%9C%BA%E5%AE%9E%E4%BD%93%E6%A0%87%E6%B3%A8%20&format=alpaca HTTP/1.0" 200 OK