Selaa lähdekoodia

完善基本模块

lxylxy123321 1 viikko sitten
vanhempi
sitoutus
3e0d7e5fd3
44 muutettua tiedostoa jossa 3870 lisäystä ja 176 poistoa
  1. 0 2
      .gitignore
  2. BIN
      backend/GeoLite2-ASN.mmdb
  3. BIN
      backend/GeoLite2-City.mmdb
  4. 1 0
      backend/app/config.py
  5. 9 0
      backend/app/main.py
  6. 4 2
      backend/app/middleware/logging.py
  7. 74 0
      backend/app/routers/discounts.py
  8. 54 0
      backend/app/routers/models.py
  9. 131 9
      backend/app/routers/public.py
  10. 46 0
      backend/app/routers/schedule.py
  11. 13 2
      backend/app/routers/scrape.py
  12. 3 1
      backend/app/routers/stats.py
  13. 25 8
      backend/app/services/geo.py
  14. 81 0
      backend/app/services/scheduler.py
  15. 35 16
      backend/app/services/scraper.py
  16. 257 0
      backend/app/utils/price_parser.py
  17. 255 0
      backend/crawl/main.py
  18. 810 0
      backend/crawl/scrape_aliyun_models.py
  19. 341 0
      backend/crawl/scrape_model_info.py
  20. 182 0
      backend/crawl/scrape_rate_limits.py
  21. 369 0
      backend/crawl/scrape_tool_prices.py
  22. 9 0
      backend/migrations/002_models.sql
  23. 15 0
      backend/migrations/003_schedule.sql
  24. 7 0
      backend/migrations/004_scrape_results_extend.sql
  25. 5 0
      backend/migrations/005_raw_data.sql
  26. 4 0
      backend/migrations/006_access_logs_org.sql
  27. 15 0
      backend/migrations/007_price_api_logs.sql
  28. 13 0
      backend/migrations/008_discounts.sql
  29. 1 0
      backend/requirements.txt
  30. 0 7
      backend/scrape_aliyun_models.py
  31. 71 0
      backend/test_price_parser.py
  32. 4 2
      frontend/src/App.tsx
  33. 57 0
      frontend/src/api.ts
  34. 17 8
      frontend/src/components/BottomNav.css
  35. 5 4
      frontend/src/components/BottomNav.tsx
  36. 7 1
      frontend/src/pages/Dashboard.css
  37. 21 10
      frontend/src/pages/Dashboard.tsx
  38. 118 0
      frontend/src/pages/Discounts.css
  39. 117 0
      frontend/src/pages/Discounts.tsx
  40. 8 8
      frontend/src/pages/Logs.tsx
  41. 1 1
      frontend/src/pages/Map.css
  42. 373 8
      frontend/src/pages/Scraper.css
  43. 293 87
      frontend/src/pages/Scraper.tsx
  44. 19 0
      frontend/src/types.ts

+ 0 - 2
.gitignore

@@ -25,8 +25,6 @@ frontend/.env.*.local
 .pytest_cache/
 .hypothesis/
 
-# Database / GeoIP (large binary, should not be committed)
-backend/GeoLite2-City.mmdb
 
 # Node
 frontend/node_modules/

BIN
backend/GeoLite2-ASN.mmdb


BIN
backend/GeoLite2-City.mmdb


+ 1 - 0
backend/app/config.py

@@ -30,6 +30,7 @@ class Settings:
         origins_raw = os.getenv("ALLOWED_ORIGINS", "http://localhost:5173")
         self.allowed_origins = [o.strip() for o in origins_raw.split(",") if o.strip()]
         self.geoip_db_path = os.getenv("GEOIP_DB_PATH", "./GeoLite2-City.mmdb")
+        self.geoip_asn_db_path = os.getenv("GEOIP_ASN_DB_PATH", "./GeoLite2-ASN.mmdb")
         self.playwright_executable = os.getenv("PLAYWRIGHT_EXECUTABLE") or None
 
         self.host = os.getenv("HOST", "0.0.0.0")

+ 9 - 0
backend/app/main.py

@@ -10,12 +10,15 @@ from app.config import settings
 from app.db import close_pool, init_pool
 from app.middleware.logging import LoggingMiddleware
 from app.services.ws_hub import hub as ws_hub  # noqa: F401
+from app.services.scheduler import start_scheduler, stop_scheduler
 
 
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     await init_pool()
+    await start_scheduler()
     yield
+    await stop_scheduler()
     await close_pool()
 
 
@@ -36,10 +39,16 @@ from app.routers import logs  # noqa: E402
 from app.routers import scrape  # noqa: E402
 from app.routers import public  # noqa: E402
 from app.routers import ws  # noqa: E402
+from app.routers import models  # noqa: E402
+from app.routers import schedule  # noqa: E402
+from app.routers import discounts  # noqa: E402
 app.include_router(stats.router, prefix="/api")
 app.include_router(logs.router, prefix="/api")
 app.include_router(scrape.router, prefix="/api")
 app.include_router(public.router, prefix="/api/public")
+app.include_router(models.router, prefix="/api")
+app.include_router(schedule.router, prefix="/api")
+app.include_router(discounts.router, prefix="/api")
 app.include_router(ws.router)
 
 

+ 4 - 2
backend/app/middleware/logging.py

@@ -34,8 +34,8 @@ class LoggingMiddleware(BaseHTTPMiddleware):
                 """
                 INSERT INTO access_logs
                     (ip, method, path, status_code, latency_ms,
-                     country, city, latitude, longitude)
-                VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+                     country, city, latitude, longitude, org)
+                VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
                 RETURNING id, created_at
                 """,
                 ip,
@@ -47,6 +47,7 @@ class LoggingMiddleware(BaseHTTPMiddleware):
                 geo.city,
                 geo.latitude,
                 geo.longitude,
+                geo.org,
             )
 
             log_dict = {
@@ -60,6 +61,7 @@ class LoggingMiddleware(BaseHTTPMiddleware):
                 "city": geo.city,
                 "latitude": geo.latitude,
                 "longitude": geo.longitude,
+                "org": geo.org,
                 "created_at": row["created_at"].isoformat(),
             }
 

+ 74 - 0
backend/app/routers/discounts.py

@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import List, Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, Field
+
+from app.db import get_pool
+
+router = APIRouter(tags=["discounts"])
+
+
+class DiscountIn(BaseModel):
+    domain: str
+    discount: float = Field(..., gt=0, le=1, description="折扣系数,如 0.8 表示八折")
+    note: Optional[str] = None
+
+
+class DiscountOut(BaseModel):
+    id: int
+    domain: str
+    discount: float
+    note: Optional[str]
+    created_at: datetime
+    updated_at: datetime
+
+
+@router.get("/discounts", response_model=List[DiscountOut])
+async def list_discounts() -> List[DiscountOut]:
+    pool = get_pool()
+    rows = await pool.fetch("SELECT * FROM discounts ORDER BY updated_at DESC")
+    return [DiscountOut(**dict(r)) for r in rows]
+
+
+@router.post("/discounts", response_model=DiscountOut, status_code=201)
+async def create_discount(body: DiscountIn) -> DiscountOut:
+    pool = get_pool()
+    row = await pool.fetchrow(
+        """
+        INSERT INTO discounts (domain, discount, note)
+        VALUES ($1, $2, $3)
+        ON CONFLICT (domain) DO UPDATE
+            SET discount = EXCLUDED.discount,
+                note = EXCLUDED.note,
+                updated_at = NOW()
+        RETURNING *
+        """,
+        body.domain, body.discount, body.note,
+    )
+    return DiscountOut(**dict(row))
+
+
+@router.put("/discounts/{discount_id}", response_model=DiscountOut)
+async def update_discount(discount_id: int, body: DiscountIn) -> DiscountOut:
+    pool = get_pool()
+    row = await pool.fetchrow(
+        """
+        UPDATE discounts SET domain=$1, discount=$2, note=$3, updated_at=NOW()
+        WHERE id=$4 RETURNING *
+        """,
+        body.domain, body.discount, body.note, discount_id,
+    )
+    if not row:
+        raise HTTPException(status_code=404, detail="不存在")
+    return DiscountOut(**dict(row))
+
+
+@router.delete("/discounts/{discount_id}", status_code=204)
+async def delete_discount(discount_id: int) -> None:
+    pool = get_pool()
+    result = await pool.execute("DELETE FROM discounts WHERE id=$1", discount_id)
+    if result == "DELETE 0":
+        raise HTTPException(status_code=404, detail="不存在")

+ 54 - 0
backend/app/routers/models.py

@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import List
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from app.db import get_pool
+
+router = APIRouter(tags=["models"])
+
+
+class ModelIn(BaseModel):
+    name: str
+    url: str
+
+
+class ModelOut(BaseModel):
+    id: int
+    name: str
+    url: str
+    created_at: datetime
+
+
+@router.get("/models", response_model=List[ModelOut])
+async def list_models() -> List[ModelOut]:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        rows = await conn.fetch("SELECT id, name, url, created_at FROM models ORDER BY created_at DESC")
+    return [ModelOut(**dict(r)) for r in rows]
+
+
+@router.post("/models", response_model=ModelOut, status_code=201)
+async def create_model(body: ModelIn) -> ModelOut:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        try:
+            row = await conn.fetchrow(
+                "INSERT INTO models (name, url) VALUES ($1, $2) RETURNING id, name, url, created_at",
+                body.name, body.url,
+            )
+        except Exception:
+            raise HTTPException(status_code=409, detail="该 URL 已存在")
+    return ModelOut(**dict(row))
+
+
+@router.delete("/models/{model_id}", status_code=204)
+async def delete_model(model_id: int) -> None:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        result = await conn.execute("DELETE FROM models WHERE id = $1", model_id)
+    if result == "DELETE 0":
+        raise HTTPException(status_code=404, detail="模型不存在")

+ 131 - 9
backend/app/routers/public.py

@@ -2,13 +2,16 @@ from __future__ import annotations
 
 from datetime import datetime
 from typing import List, Optional
+from urllib.parse import urlparse
 
 import json
+from app.utils.price_parser import parse_prices
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 
 from app.db import get_pool
+from app.services.geo import geo_resolver
 
 router = APIRouter()
 
@@ -17,29 +20,148 @@ class PublicPriceOut(BaseModel):
     url: str
     model_name: str
     prices: dict
+    model_info: Optional[dict] = None
+    rate_limits: Optional[dict] = None
+    tool_prices: Optional[list] = None
     scraped_at: datetime
 
 
-@router.get("/prices", response_model=List[PublicPriceOut])
-async def get_public_prices(url: Optional[str] = None) -> List[PublicPriceOut]:
+class ParsedPriceItem(BaseModel):
+    url: str
+    model_name: str
+    tier_min: Optional[float] = None
+    tier_max: Optional[float] = None
+    tier_unit: Optional[str] = None
+    input_price: Optional[float] = None
+    output_price: Optional[float] = None
+    currency: str = "CNY"
+    unit: Optional[str] = None
+    label: Optional[str] = None
+
+
+class DiscountedPriceItem(BaseModel):
+    url: str
+    model_name: str
+    tier_min: Optional[float] = None
+    tier_max: Optional[float] = None
+    tier_unit: Optional[str] = None
+    input_price: Optional[float] = None
+    output_price: Optional[float] = None
+    currency: str = "CNY"
+    unit: Optional[str] = None
+    label: Optional[str] = None
+    discount: Optional[float] = None  # None 表示无折扣(原价)
+
+
+class PricesResponse(BaseModel):
+    models: List[PublicPriceOut]
+    parsed_prices: List[ParsedPriceItem]
+    discounted_prices: List[DiscountedPriceItem]
+
+
+def _extract_domain(referer: Optional[str]) -> Optional[str]:
+    if not referer:
+        return None
+    try:
+        return urlparse(referer).netloc or None
+    except Exception:
+        return None
+
+
+@router.get("/prices", response_model=PricesResponse)
+async def get_public_prices(request: Request, url: Optional[str] = None) -> PricesResponse:
     pool = get_pool()
+
+    # 记录调用来源
+    ip = request.client.host if request.client else "unknown"
+    referer = request.headers.get("referer") or request.headers.get("origin")
+    geo = geo_resolver.resolve(ip)
+    try:
+        await pool.execute(
+            """
+            INSERT INTO price_api_logs (ip, referer, org, country, city)
+            VALUES ($1, $2, $3, $4, $5)
+            """,
+            ip, referer, geo.org, geo.country, geo.city,
+        )
+    except Exception:
+        pass
+
+    # 查调用方域名对应的折扣
+    caller_domain = _extract_domain(referer)
+    discount_rate: Optional[float] = None
+    if caller_domain:
+        row = await pool.fetchrow(
+            "SELECT discount FROM discounts WHERE domain = $1", caller_domain
+        )
+        if row:
+            discount_rate = float(row["discount"])
+
+    def _j(v):
+        if v is None:
+            return None
+        return v if isinstance(v, (dict, list)) else json.loads(v)
+
     if url is None:
         rows = await pool.fetch(
-            "SELECT DISTINCT ON (url) url, model_name, prices, scraped_at "
-            "FROM scrape_results ORDER BY url, scraped_at DESC"
+            """
+            WITH latest_job AS (
+                SELECT id FROM scrape_jobs
+                WHERE status = 'done'
+                ORDER BY created_at DESC LIMIT 1
+            )
+            SELECT DISTINCT ON (r.url) r.url, r.model_name, r.prices,
+                   r.model_info, r.rate_limits, r.tool_prices, r.scraped_at
+            FROM scrape_results r
+            JOIN latest_job j ON r.job_id = j.id
+            ORDER BY r.url, r.scraped_at DESC
+            """
         )
     else:
         rows = await pool.fetch(
-            "SELECT url, model_name, prices, scraped_at "
-            "FROM scrape_results WHERE url = $1 ORDER BY scraped_at DESC LIMIT 1",
+            """
+            SELECT url, model_name, prices, model_info, rate_limits, tool_prices, scraped_at
+            FROM scrape_results
+            WHERE url = $1
+            ORDER BY scraped_at DESC LIMIT 1
+            """,
             url,
         )
         if not rows:
             raise HTTPException(status_code=404, detail="No scrape results found for the given URL")
 
-    return [PublicPriceOut(
+    models = [PublicPriceOut(
         url=r["url"],
         model_name=r["model_name"],
-        prices=r["prices"] if isinstance(r["prices"], dict) else json.loads(r["prices"]),
+        prices=_j(r["prices"]) or {},
+        model_info=_j(r["model_info"]),
+        rate_limits=_j(r["rate_limits"]),
+        tool_prices=_j(r["tool_prices"]),
         scraped_at=r["scraped_at"],
     ) for r in rows]
+
+    parsed_prices: List[ParsedPriceItem] = []
+    discounted_prices: List[DiscountedPriceItem] = []
+
+    for r in rows:
+        for item in parse_prices(_j(r["prices"]) or {}):
+            parsed_prices.append(ParsedPriceItem(
+                url=r["url"],
+                model_name=r["model_name"],
+                **item,
+            ))
+            # 折扣价:有折扣就乘,没有就原价(discount=None)
+            d_item = dict(item)
+            if discount_rate is not None:
+                if d_item.get("input_price") is not None:
+                    d_item["input_price"] = round(d_item["input_price"] * discount_rate, 6)
+                if d_item.get("output_price") is not None:
+                    d_item["output_price"] = round(d_item["output_price"] * discount_rate, 6)
+            discounted_prices.append(DiscountedPriceItem(
+                url=r["url"],
+                model_name=r["model_name"],
+                discount=discount_rate,
+                **d_item,
+            ))
+
+    return PricesResponse(models=models, parsed_prices=parsed_prices, discounted_prices=discounted_prices)

+ 46 - 0
backend/app/routers/schedule.py

@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+from datetime import datetime
+
+from fastapi import APIRouter
+from pydantic import BaseModel, Field
+
+from app.db import get_pool
+from app.services.scheduler import reschedule
+
+router = APIRouter(tags=["schedule"])
+
+
+class ScheduleOut(BaseModel):
+    enabled: bool
+    interval_days: int
+    start_hour: int
+    updated_at: datetime
+
+
+class ScheduleIn(BaseModel):
+    enabled: bool
+    interval_days: int = Field(ge=1)
+    start_hour: int = Field(ge=0, le=23)
+
+
+@router.get("/schedule", response_model=ScheduleOut)
+async def get_schedule() -> ScheduleOut:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow("SELECT id, enabled, interval_days, start_hour, updated_at FROM scrape_schedule WHERE id = 1")
+    return ScheduleOut(**dict(row))
+
+
+@router.put("/schedule", response_model=ScheduleOut)
+async def update_schedule(body: ScheduleIn) -> ScheduleOut:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        row = await conn.fetchrow(
+            """UPDATE scrape_schedule
+               SET enabled=$1, interval_days=$2, start_hour=$3, updated_at=NOW()
+               WHERE id=1 RETURNING id, enabled, interval_days, start_hour, updated_at""",
+            body.enabled, body.interval_days, body.start_hour,
+        )
+    reschedule(body.interval_days, body.start_hour)
+    return ScheduleOut(**dict(row))

+ 13 - 2
backend/app/routers/scrape.py

@@ -30,6 +30,9 @@ class ScrapeResultOut(BaseModel):
     url: str
     model_name: str
     prices: dict
+    model_info: Optional[dict] = None
+    rate_limits: Optional[dict] = None
+    tool_prices: Optional[list] = None
     scraped_at: datetime
 
 
@@ -92,14 +95,22 @@ async def get_scrape_job(job_id: str) -> ScrapeJobDetailOut:
         results: Optional[List[ScrapeResultOut]] = None
         if row["status"] == "done":
             result_rows = await conn.fetch(
-                "SELECT url, model_name, prices, scraped_at FROM scrape_results WHERE job_id = $1 ORDER BY scraped_at ASC",
+                "SELECT url, model_name, prices, model_info, rate_limits, tool_prices, scraped_at FROM scrape_results WHERE job_id = $1 ORDER BY scraped_at ASC",
                 job_id,
             )
+
+            def _j(v):
+                if v is None: return None
+                return v if isinstance(v, (dict, list)) else json.loads(v)
+
             results = [
                 ScrapeResultOut(
                     url=r["url"],
                     model_name=r["model_name"],
-                    prices=r["prices"] if isinstance(r["prices"], dict) else json.loads(r["prices"]),
+                    prices=_j(r["prices"]) or {},
+                    model_info=_j(r["model_info"]),
+                    rate_limits=_j(r["rate_limits"]),
+                    tool_prices=_j(r["tool_prices"]),
                     scraped_at=r["scraped_at"],
                 )
                 for r in result_rows

+ 3 - 1
backend/app/routers/stats.py

@@ -42,7 +42,9 @@ class GeoPoint(BaseModel):
 async def get_stats() -> StatsOut:
     pool = get_pool()
     async with pool.acquire() as conn:
-        total_hits: int = await conn.fetchval("SELECT COUNT(*) FROM access_logs") or 0
+        total_hits: int = await conn.fetchval(
+            "SELECT COUNT(*) FROM access_logs WHERE path LIKE '/api/public/prices%'"
+        ) or 0
         active_ips: int = (
             await conn.fetchval(
                 "SELECT COUNT(DISTINCT ip) FROM access_logs "

+ 25 - 8
backend/app/services/geo.py

@@ -12,27 +12,35 @@ class GeoInfo:
     city: str
     latitude: Optional[float]
     longitude: Optional[float]
+    org: Optional[str] = None
 
 
-_UNKNOWN = GeoInfo("Unknown", "Unknown", None, None)
+_UNKNOWN = GeoInfo("Unknown", "Unknown", None, None, None)
 
 
 class GeoResolver:
-    def __init__(self, db_path: str) -> None:
+    def __init__(self, db_path: str, asn_db_path: str) -> None:
         self._db_path = db_path
+        self._asn_db_path = asn_db_path
         self._reader = None
+        self._asn_reader = None
 
     def _get_reader(self):
         if self._reader is None:
-            import geoip2.database  # lazy import
-
+            import geoip2.database
             self._reader = geoip2.database.Reader(self._db_path)
         return self._reader
 
+    def _get_asn_reader(self):
+        if self._asn_reader is None:
+            import geoip2.database
+            self._asn_reader = geoip2.database.Reader(self._asn_db_path)
+        return self._asn_reader
+
     def resolve(self, ip: str) -> GeoInfo:
         # Private / loopback addresses have no GeoIP entry
         if ip in ("127.0.0.1", "::1", "localhost") or ip.startswith("192.168.") or ip.startswith("10.") or ip.startswith("172."):
-            return GeoInfo("Local", "Loopback", None, None)
+            return GeoInfo("Local", "Loopback", None, None, None)
         try:
             reader = self._get_reader()
             response = reader.city(ip)
@@ -40,9 +48,18 @@ class GeoResolver:
             city = response.city.name or "Unknown"
             lat = response.location.latitude
             lon = response.location.longitude
-            return GeoInfo(country, city, lat, lon)
         except Exception:
-            return GeoInfo("Unknown", "Unknown", None, None)
+            country, city, lat, lon = "Unknown", "Unknown", None, None
+
+        org = None
+        try:
+            asn_reader = self._get_asn_reader()
+            asn_response = asn_reader.asn(ip)
+            org = asn_response.autonomous_system_organization or None
+        except Exception:
+            pass
+
+        return GeoInfo(country, city, lat, lon, org)
 
 
-geo_resolver = GeoResolver(settings.geoip_db_path)
+geo_resolver = GeoResolver(settings.geoip_db_path, settings.geoip_asn_db_path)

+ 81 - 0
backend/app/services/scheduler.py

@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import asyncio
+import logging
+from datetime import datetime, timedelta, timezone
+
+from apscheduler.schedulers.asyncio import AsyncIOScheduler
+from apscheduler.triggers.interval import IntervalTrigger
+
+from app.db import get_pool
+from app.services.scraper import ScraperService
+
+logger = logging.getLogger(__name__)
+_scheduler = AsyncIOScheduler()
+_scraper = ScraperService()
+JOB_ID = "auto_scrape"
+
+
+async def _run_auto_scrape() -> None:
+    """爬取所有已注册模型。"""
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        cfg = await conn.fetchrow("SELECT * FROM scrape_schedule WHERE id = 1")
+
+    if not cfg or not cfg["enabled"]:
+        return
+
+    async with pool.acquire() as conn:
+        rows = await conn.fetch("SELECT url FROM models ORDER BY created_at")
+
+    urls = [r["url"] for r in rows]
+    if not urls:
+        logger.info("[scheduler] 没有已注册的模型,跳过")
+        return
+
+    logger.info(f"[scheduler] 开始自动爬取 {len(urls)} 个模型")
+    job_id_row = await pool.fetchrow(
+        "INSERT INTO scrape_jobs (urls, status) VALUES ($1, 'pending') RETURNING id",
+        urls,
+    )
+    asyncio.create_task(_scraper.run_job(str(job_id_row["id"]), urls, pool))
+
+
+def _reschedule(interval_days: int, start_hour: int) -> None:
+    if _scheduler.get_job(JOB_ID):
+        _scheduler.remove_job(JOB_ID)
+    now = datetime.now(timezone.utc)
+    next_run = now.replace(hour=start_hour, minute=0, second=0, microsecond=0)
+    if next_run <= now:
+        next_run += timedelta(days=1)
+    _scheduler.add_job(
+        _run_auto_scrape,
+        trigger=IntervalTrigger(days=interval_days, start_date=next_run),
+        id=JOB_ID,
+        replace_existing=True,
+    )
+    logger.info(f"[scheduler] 已设置每 {interval_days} 天 {start_hour}:00 执行,下次: {next_run}")
+
+
+async def start_scheduler() -> None:
+    pool = get_pool()
+    async with pool.acquire() as conn:
+        cfg = await conn.fetchrow("SELECT * FROM scrape_schedule WHERE id = 1")
+    interval = cfg["interval_days"] if cfg else 1
+    start_hour = cfg["start_hour"] if cfg else 2
+    _reschedule(interval, start_hour)
+    _scheduler.start()
+    logger.info("[scheduler] 调度器已启动")
+
+
+async def stop_scheduler() -> None:
+    if _scheduler.running:
+        _scheduler.shutdown(wait=False)
+
+
+def get_scheduler() -> AsyncIOScheduler:
+    return _scheduler
+
+
+def reschedule(interval_days: int, start_hour: int) -> None:
+    _reschedule(interval_days, start_hour)

+ 35 - 16
backend/app/services/scraper.py

@@ -1,6 +1,7 @@
 """
 ScraperService: runs scrape jobs asynchronously using a thread pool executor.
-Delegates all scraping logic to scrape_aliyun_models.py (the working standalone script).
+Uses the new crawl/main.py scrape_all() which collects prices, model info,
+rate limits and tool call prices in a single browser session.
 """
 from __future__ import annotations
 
@@ -11,12 +12,14 @@ import sys
 import traceback
 from typing import Any
 
-# Add backend root to path so we can import scrape_aliyun_models.py directly
+# Add backend root and crawl dir to path
 _backend_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-if _backend_root not in sys.path:
-    sys.path.insert(0, _backend_root)
+_crawl_dir = os.path.join(_backend_root, "crawl")
+for _p in (_backend_root, _crawl_dir):
+    if _p not in sys.path:
+        sys.path.insert(0, _p)
 
-from scrape_aliyun_models import scrape_model_price  # noqa: E402
+from main import scrape_all  # noqa: E402  (backend/crawl/main.py)
 
 
 class ScraperService:
@@ -37,22 +40,42 @@ class ScraperService:
 
             for url in urls:
                 result: dict = await loop.run_in_executor(
-                    None, scrape_model_price, url, headless, 20000, exec_path
+                    None,
+                    lambda u=url: scrape_all(
+                        u,
+                        headless=headless,
+                        timeout=20000,
+                        executable_path=exec_path,
+                        modules=["info", "rate", "tool", "price"],
+                    ),
+                )
+
+                prices     = result.get("prices") or {}
+                model_info = result.get("info") or {}
+                rate_limits = result.get("rate_limits") or {}
+                tool_prices = result.get("tool_call_prices") or []
+
+                # model_name: 直接用 URL 中提取的 model_id,保持和用户输入一致
+                model_name = (
+                    result.get("model_id")
+                    or url.rstrip("/").split("/")[-1]
                 )
-                # scrape_model_price returns {"url":..., "error":..., "prices":{...}}
-                prices = result.get("prices") or {}
-                model_name = url.rstrip("/").split("/")[-1]
 
                 async with pool.acquire() as conn:
                     await conn.execute(
                         """
-                        INSERT INTO scrape_results (job_id, url, model_name, prices)
-                        VALUES ($1, $2, $3, $4::jsonb)
+                        INSERT INTO scrape_results
+                            (job_id, url, model_name, prices, model_info, rate_limits, tool_prices, raw_data)
+                        VALUES ($1, $2, $3, $4::jsonb, $5::jsonb, $6::jsonb, $7::jsonb, $8::jsonb)
                         """,
                         job_id,
                         url,
                         model_name,
                         json.dumps(prices),
+                        json.dumps(model_info),
+                        json.dumps(rate_limits),
+                        json.dumps(tool_prices),
+                        json.dumps(result),
                     )
 
             async with pool.acquire() as conn:
@@ -65,11 +88,7 @@ class ScraperService:
             error_msg = f"{type(exc).__name__}: {exc}\n{traceback.format_exc()}"
             async with pool.acquire() as conn:
                 await conn.execute(
-                    """
-                    UPDATE scrape_jobs
-                    SET status = 'failed', error = $2, updated_at = NOW()
-                    WHERE id = $1
-                    """,
+                    "UPDATE scrape_jobs SET status = 'failed', error = $2, updated_at = NOW() WHERE id = $1",
                     job_id,
                     error_msg,
                 )

+ 257 - 0
backend/app/utils/price_parser.py

@@ -0,0 +1,257 @@
+"""
+price_parser.py
+统一价格结构,所有模型类型输出相同字段:
+
+  tier_min     : 档位下限(token数 或 秒数),0 表示从0开始,None 表示无档位
+  tier_max     : 档位上限(token数 或 秒数),None 表示无上限
+  tier_unit    : 档位单位,"tokens" 或 "seconds",None 表示无档位
+  input_price  : 输入价格(元/百万tokens 或 0),视频/图像为 0
+  output_price : 输出价格(元/百万tokens)或视频/图像的生成价格
+  currency     : "CNY"
+  unit         : 计费单位原始字符串
+  label        : 原始 key
+
+视频规格 -> 秒数映射:
+  480P  -> 0 ~ 480
+  720P  -> 0 ~ 720  (或 481 ~ 720)
+  1080P -> 0 ~ 1080 (或 721 ~ 1080)
+  4K    -> 0 ~ 2160
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Optional
+
+
+# ── 视频规格 -> 最大秒数 ────────────────────────────────────────────────────────
+_VIDEO_SPEC_MAX: Dict[str, int] = {
+    "480P":  480,
+    "480p":  480,
+    "720P":  720,
+    "720p":  720,
+    "1080P": 1080,
+    "1080p": 1080,
+    "2K":    1440,
+    "4K":    2160,
+}
+
+# 非 token 计费单位
+_NON_TOKEN_UNITS = re.compile(r"每秒|每张|每次|每帧|/秒|/张|/次", re.I)
+
+# token 阶梯 key 正则
+# 情况1:input<=128k / 32k<input<=128k(有上限)
+_TIER_RE = re.compile(
+    r"^(?:([\d.]+[KkMm]?)\s*<\s*)?(?:input|输入)\s*<=?\s*([\d.]+[KkMm]?)$",
+    re.I,
+)
+# 情况2:256k<input(只有下限,无上限)
+_TIER_RE_LO_ONLY = re.compile(
+    r"^([\d.]+[KkMm]?)\s*<\s*(?:input|输入)$",
+    re.I,
+)
+
+
+def _to_tokens(val: str) -> Optional[int]:
+    """把 '32k'/'128K'/'1M' 转成 token 整数。"""
+    s = str(val).strip().upper().replace(",", "")
+    m = re.match(r"^([\d.]+)\s*([KMG]?)$", s)
+    if not m:
+        return None
+    num = float(m.group(1))
+    suffix = m.group(2)
+    if suffix == "K":
+        return int(num * 1_000)
+    if suffix == "M":
+        return int(num * 1_000_000)
+    return int(num)
+
+
+def _parse_price(obj: Any) -> Optional[float]:
+    if isinstance(obj, (int, float)):
+        return float(obj)
+    if isinstance(obj, dict):
+        v = obj.get("price")
+        if v is not None:
+            try:
+                return float(v)
+            except (TypeError, ValueError):
+                pass
+    return None
+
+
+def _parse_unit(obj: Any) -> Optional[str]:
+    if isinstance(obj, dict):
+        return obj.get("unit")
+    return None
+
+
+def _parse_tier_key(key: str):
+    """解析 token 阶梯 key,返回 (min_tokens, max_tokens) 或 None。"""
+    k = key.strip().lower().replace(" ", "")
+    m = _TIER_RE.match(k)
+    if m:
+        lo_str, hi_str = m.group(1), m.group(2)
+        lo = _to_tokens(lo_str) if lo_str else 0
+        hi = _to_tokens(hi_str) if hi_str else None
+        return (lo, hi)
+    # 只有下限:256k<input
+    m2 = _TIER_RE_LO_ONLY.match(k)
+    if m2:
+        lo = _to_tokens(m2.group(1))
+        return (lo, None)
+    return None
+
+
+def _extract_video_spec(label: str) -> Optional[str]:
+    """从 label 中提取视频规格,如 '视频生成(720P)' -> '720P'。"""
+    m = re.search(r"[((]([^))]+)[))]", label)
+    if m:
+        spec = m.group(1).strip()
+        if spec.upper() in {k.upper() for k in _VIDEO_SPEC_MAX}:
+            return spec.upper()
+    # 直接在 label 里找
+    for spec in _VIDEO_SPEC_MAX:
+        if spec.upper() in label.upper():
+            return spec.upper()
+    return None
+
+
+def _build_video_tiers(items: List[Dict]) -> List[Dict]:
+    """
+    把多个视频规格条目转成连续区间:
+    720P(0.6) + 1080P(1.0) ->
+      [0, 720,  input=0, output=0.6]
+      [721, 1080, input=0, output=1.0]
+    """
+    # 按 tier_max 排序
+    sorted_items = sorted(items, key=lambda x: x["_spec_max"])
+    result = []
+    prev_max = 0
+    for item in sorted_items:
+        spec_max = item["_spec_max"]
+        result.append({
+            "label": item["label"],
+            "tier_min": prev_max + (1 if prev_max > 0 else 0),
+            "tier_max": spec_max,
+            "tier_unit": "seconds",
+            "input_price": 0.0,
+            "output_price": item["price"],
+            "currency": item["currency"],
+            "unit": item["unit"],
+        })
+        prev_max = spec_max
+    return result
+
+
+def parse_prices(prices: Dict[str, Any]) -> List[Dict]:
+    result: List[Dict] = []
+    video_items: List[Dict] = []
+    input_entry: Optional[Dict] = None
+
+    for key, val in prices.items():
+        # ── token 阶梯 ──
+        tier = _parse_tier_key(key)
+        if tier is not None and isinstance(val, dict):
+            entry: Dict = {
+                "label": key,
+                "tier_min": tier[0],
+                "tier_max": tier[1],
+                "tier_unit": "tokens",
+                "input_price": None,
+                "output_price": None,
+                "currency": "CNY",
+                "unit": None,
+            }
+            for sub_key, sub_val in val.items():
+                sk = sub_key.strip()
+                price = _parse_price(sub_val)
+                unit = _parse_unit(sub_val)
+                if unit:
+                    entry["unit"] = unit
+                if re.match(r"^输入$|^input$", sk, re.I):
+                    entry["input_price"] = price
+                elif re.match(r"^输出$|^output$", sk, re.I):
+                    entry["output_price"] = price
+            result.append(entry)
+            continue
+
+        if not isinstance(val, dict):
+            continue
+
+        price = _parse_price(val)
+        unit = _parse_unit(val)
+
+        # ── 视频/图像按单位计费 ──
+        if _NON_TOKEN_UNITS.search(unit or ""):
+            spec = _extract_video_spec(key)
+            if spec and spec in _VIDEO_SPEC_MAX:
+                video_items.append({
+                    "label": key,
+                    "_spec_max": _VIDEO_SPEC_MAX[spec],
+                    "price": price,
+                    "currency": "CNY",
+                    "unit": unit,
+                })
+            else:
+                # 未知规格,直接输出
+                result.append({
+                    "label": key,
+                    "tier_min": None,
+                    "tier_max": None,
+                    "tier_unit": None,
+                    "input_price": 0.0,
+                    "output_price": price,
+                    "currency": "CNY",
+                    "unit": unit,
+                })
+            continue
+
+        # ── 简单非阶梯(输入/输出) ──
+        if re.match(r"^输入$|^input$", key.strip(), re.I):
+            input_entry = {"price": price, "unit": unit}
+            continue
+        if re.match(r"^输出$|^output$", key.strip(), re.I):
+            result.append({
+                "label": "input/output",
+                "tier_min": None,
+                "tier_max": None,
+                "tier_unit": None,
+                "input_price": input_entry["price"] if input_entry else None,
+                "output_price": price,
+                "currency": "CNY",
+                "unit": unit or (input_entry["unit"] if input_entry else None),
+            })
+            input_entry = None
+            continue
+
+        # 其他普通标签
+        result.append({
+            "label": key,
+            "tier_min": None,
+            "tier_max": None,
+            "tier_unit": None,
+            "input_price": price,
+            "output_price": None,
+            "currency": "CNY",
+            "unit": unit,
+        })
+
+    # 处理只有输入没有输出的情况
+    if input_entry:
+        result.append({
+            "label": "input",
+            "tier_min": None,
+            "tier_max": None,
+            "tier_unit": None,
+            "input_price": input_entry["price"],
+            "output_price": None,
+            "currency": "CNY",
+            "unit": input_entry["unit"],
+        })
+
+    # 把视频条目转成连续区间
+    if video_items:
+        result.extend(_build_video_tiers(video_items))
+
+    return result

+ 255 - 0
backend/crawl/main.py

@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+"""
+main.py - 阿里云百炼模型完整信息抓取入口
+
+整合以下模块,对每个 URL 只打开一次浏览器,依次运行所有抓取逻辑:
+  - scrape_aliyun_models.py  → 模型价格(含阶梯计费)
+  - scrape_model_info.py     → 模型基本信息 + 能力
+  - scrape_rate_limits.py    → 限流与上下文
+  - scrape_tool_prices.py    → 工具调用价格
+
+用法:
+  python main.py --url "https://bailian.console.aliyun.com/...#/model-market/detail/qwen3-max"
+  python main.py --file urls.txt
+  python main.py --url "..." --browser-path "D:\\playwright-browsers\\...\\chrome.exe"
+  python main.py --url "..." --modules info,price,rate,tool   # 只运行指定模块
+  python main.py --url "..." --headful                        # 有头模式调试
+
+输出: JSON 到 stdout,同时保存到 output/<model_id>.json
+"""
+
+import argparse
+import json
+import os
+import re
+import time
+from typing import Dict, List, Optional
+
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
+
+# 导入各模块的核心解析函数(不启动独立浏览器)
+from scrape_model_info import (
+    _extract_model_id_from_url,
+    _find_model_in_json,
+    parse_model_info,
+    API_URL_RE as INFO_API_RE,
+)
+from scrape_rate_limits import (
+    parse_rate_limits_from_text,
+    _get_rate_limit_section_text,
+)
+from scrape_tool_prices import (
+    parse_tool_prices_from_text,
+    _get_tool_price_section_text,
+)
+from scrape_aliyun_models import (
+    scrape_model_price,
+)
+
+
+def _navigate(page, url: str, timeout: int) -> bool:
+    """导航到 URL,返回是否成功。"""
+    try:
+        page.goto(url, wait_until="networkidle", timeout=timeout)
+        return True
+    except PlaywrightTimeoutError:
+        try:
+            page.goto(url, wait_until="load", timeout=timeout)
+            return True
+        except Exception as e:
+            print(f"[ERROR] 导航失败: {e}")
+            return False
+
+
+def _wait_for_content(page) -> None:
+    """等待页面核心内容渲染完成。"""
+    for sel in ["text=模型价格", "text=模型介绍", "text=模型能力"]:
+        try:
+            page.wait_for_selector(sel, timeout=6000)
+            break
+        except PlaywrightTimeoutError:
+            pass
+    time.sleep(1.5)
+    # 滚动触发懒加载
+    try:
+        page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+        time.sleep(0.8)
+        page.evaluate("window.scrollTo(0, 0)")
+        time.sleep(0.3)
+    except Exception:
+        pass
+
+
+def scrape_all(
+    url: str,
+    headless: bool = True,
+    timeout: int = 20000,
+    executable_path: Optional[str] = None,
+    modules: Optional[List[str]] = None,
+) -> Dict:
+    """
+    对单个 URL 运行所有(或指定)模块,共享一个浏览器实例。
+
+    modules 可选值: ["info", "rate", "tool", "price"]
+    默认全部运行。
+    """
+    if modules is None:
+        modules = ["info", "rate", "tool", "price"]
+
+    target = _extract_model_id_from_url(url)
+    result: Dict = {"url": url, "model_id": target, "error": None}
+
+    # price 模块复用原始脚本,独立启动浏览器(原脚本结构限制)
+    # 其余模块共享一个浏览器实例
+    shared_modules = [m for m in modules if m != "price"]
+
+    # ── 共享浏览器:info / rate / tool ──────────────────────────────────────────
+    if shared_modules:
+        api_data: List[Dict] = []
+
+        with sync_playwright() as p:
+            launch_kwargs: Dict = {"headless": headless}
+            if executable_path:
+                launch_kwargs["executable_path"] = executable_path
+            browser = p.chromium.launch(**launch_kwargs)
+            page = browser.new_context().new_page()
+
+            # 拦截 API 响应
+            def on_response(resp):
+                try:
+                    if "application/json" not in resp.headers.get("content-type", ""):
+                        return
+                    if not INFO_API_RE.search(resp.url):
+                        return
+                    try:
+                        api_data.append(resp.json())
+                    except Exception:
+                        pass
+                except Exception:
+                    pass
+
+            page.on("response", on_response)
+
+            if not _navigate(page, url, timeout):
+                result["error"] = "导航失败"
+                browser.close()
+            else:
+                _wait_for_content(page)
+
+                # 从 API 找模型对象
+                model_obj = None
+                for body in api_data:
+                    found = _find_model_in_json(body, target)
+                    if found:
+                        model_obj = found
+                        print(f"[INFO] API 找到模型: {found.get('model', found.get('name', target))}")
+                        break
+
+                if not model_obj:
+                    print(f"[WARN] 未从 API 找到模型 '{target}',部分字段将为空")
+
+                # ── info 模块 ──
+                if "info" in shared_modules:
+                    if model_obj:
+                        result["info"] = parse_model_info(model_obj)
+                    else:
+                        result["info"] = {"error": f"未找到模型 '{target}'"}
+
+                # ── rate 模块 ──
+                if "rate" in shared_modules:
+                    rate_text = _get_rate_limit_section_text(page)
+                    result["rate_limits"] = parse_rate_limits_from_text(rate_text) if rate_text else {}
+
+                # ── tool 模块 ──
+                if "tool" in shared_modules:
+                    html = page.content()
+                    tool_text = _get_tool_price_section_text(html)
+                    result["tool_call_prices"] = parse_tool_prices_from_text(tool_text) if tool_text else []
+
+                browser.close()
+
+    # ── price 模块(原始脚本,独立浏览器) ──────────────────────────────────────
+    if "price" in modules:
+        print(f"[INFO] 运行价格模块...")
+        price_result = scrape_model_price(
+            url,
+            headless=headless,
+            timeout=timeout,
+            executable_path=executable_path,
+        )
+        result["prices"] = price_result.get("prices", {})
+        if price_result.get("error"):
+            result["price_error"] = price_result["error"]
+
+    return result
+
+
+def main():
+    ap = argparse.ArgumentParser(
+        description="阿里云百炼模型完整信息抓取(整合所有模块)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+模块说明:
+  info   - 模型基本信息、能力、模态
+  rate   - 限流与上下文(RPM、context window 等)
+  tool   - 工具调用价格
+  price  - 模型 token 价格(含阶梯计费)
+
+示例:
+  python main.py --url "https://..." --browser-path "D:\\chrome.exe"
+  python main.py --file urls.txt --headful
+  python main.py --url "https://..." --modules info,rate
+        """,
+    )
+    group = ap.add_mutually_exclusive_group(required=True)
+    group.add_argument("--url", help="单个模型页面 URL")
+    group.add_argument("--file", help="URL 列表文件(每行一个)")
+    ap.add_argument("--headful", action="store_true", help="有头模式(方便调试)")
+    ap.add_argument("--timeout", type=int, default=20000, help="导航超时毫秒,默认 20000")
+    ap.add_argument("--browser-path", help="浏览器可执行文件路径")
+    ap.add_argument(
+        "--modules",
+        default="info,rate,tool,price",
+        help="要运行的模块,逗号分隔,可选: info,rate,tool,price(默认全部)",
+    )
+    ap.add_argument("--output-dir", default="output", help="结果保存目录,默认 output/")
+    args = ap.parse_args()
+
+    urls: List[str] = []
+    if args.url:
+        urls = [args.url]
+    else:
+        with open(args.file, "r", encoding="utf-8") as f:
+            urls = [ln.strip() for ln in f if ln.strip()]
+
+    exec_path = args.browser_path or os.environ.get("PLAYWRIGHT_EXECUTABLE")
+    headless = not args.headful
+    if os.environ.get("PLAYWRIGHT_HEADLESS", "").lower() == "false":
+        headless = False
+
+    modules = [m.strip() for m in args.modules.split(",") if m.strip()]
+    print(f"[INFO] 运行模块: {modules}")
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    all_results = []
+    for u in urls:
+        print(f"\n{'='*60}\n[INFO] 抓取: {u}", flush=True)
+        res = scrape_all(u, headless=headless, timeout=args.timeout,
+                         executable_path=exec_path, modules=modules)
+        all_results.append(res)
+
+        # 保存单个结果
+        model_id = res.get("model_id", "unknown")
+        safe_id = re.sub(r"[^\w\-.]", "_", model_id)
+        out_path = os.path.join(args.output_dir, f"{safe_id}.json")
+        with open(out_path, "w", encoding="utf-8") as f:
+            json.dump(res, f, ensure_ascii=False, indent=2)
+        print(f"[INFO] 已保存: {out_path}")
+
+    # 输出到 stdout
+    print(json.dumps(all_results, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()

+ 810 - 0
backend/crawl/scrape_aliyun_models.py

@@ -0,0 +1,810 @@
+#!/usr/bin/env python3
+"""
+Aliyun Model Price Scraper - Final Improved Version
+- 使用 Playwright 渲染页面并抓取"模型价格"区域内的价格信息
+- 支持单个模型页面 URL,或从文件读取多个 URL
+
+改进要点:
+1. 能够生成阶梯计费结构:{input: {tier1: {...}, tier2: {...}}, output: {...}}
+2. 优惠标记正确处理:label只保留基础部分,优惠信息放入note字段
+3. 强化过滤:完全排除工具调用价格(包括"千次调用"单位)
+
+依赖:
+  pip install playwright beautifulsoup4 lxml
+  python -m playwright install
+
+用法示例:
+  python scrape_aliyun_models.py --url "https://bailian.console.aliyun.com/.../qwen3-max"
+  python scrape_aliyun_models.py --file urls.txt
+
+输出: JSON 到 stdout
+"""
+
+import argparse
+import json
+import re
+import time
+import os
+from typing import List, Dict, Optional
+
+from bs4 import BeautifulSoup, FeatureNotFound
+from bs4.element import Tag
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
+
+
+TOOL_CALL_RE = re.compile(
+    r"调用|工具|接口|api调用|api|次调用|千次调用|/千次|每千次|搜索策略|代码解释|文生图|数据增强|模型推理",
+    re.I,
+)
+
+
+def _is_tool_call_item(label: str, raw: str, unit: str) -> bool:
+    label_l = label.lower()
+    raw_l = raw.lower()
+    unit_l = unit.lower()
+    if TOOL_CALL_RE.search(label_l) or TOOL_CALL_RE.search(raw_l) or TOOL_CALL_RE.search(unit_l):
+        return True
+    if "千次" in unit_l or "/千" in unit_l or "次调用" in unit_l:
+        return True
+    return False
+
+
+def _find_nearest_tier_label(lines: List[str], idx: int) -> Optional[str]:
+    tier_re = re.compile(r"(输入|输出).*(<=|>=|<|>|\b\d+\s*k|\d+\s*万|\d+\s*千|\d+\s*tokens?)", re.I)
+    for step in range(1, 6):
+        for pos in (idx - step, idx + step):
+            if pos < 0 or pos >= len(lines):
+                continue
+            candidate = lines[pos]
+            if not candidate or re.search(r"([0-9]+(?:\.[0-9]+)?)\s*元", candidate, re.I):
+                continue
+            if tier_re.search(candidate):
+                return candidate.strip()
+    return None
+
+
+def _open_tier_dropdown(page) -> bool:
+        try:
+                try:
+                    selector = page.locator(".efm_ant-select-selector, .ant-select-selector").filter(has_text=re.compile(r"输入.*\d+\s*[kK]"))
+                    if selector.count() > 0:
+                        selector.first.click(timeout=3000)
+                        time.sleep(0.5)
+                        return True
+                except Exception as e:
+                    pass
+
+                ok = page.evaluate(
+                        """
+                        () => {
+                            const isVisible = (el) => {
+                                if (!el) return false;
+                                const rect = el.getBoundingClientRect();
+                                const style = window.getComputedStyle(el);
+                                return rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden';
+                            };
+
+                            const norm = (s) => (s || '').replace(/\s+/g, ' ').trim();
+                            const tierRe = /输入.*\d+\s*[kK]/i;
+
+                            let clickEl = null;
+                            const selectors = Array.from(document.querySelectorAll(
+                                ".efm_ant-select-selector, .ant-select-selector"
+                            ));
+                            for (const el of selectors) {
+                                const txt = norm(el.innerText || el.textContent);
+                                if (tierRe.test(txt) && isVisible(el)) {
+                                    clickEl = el;
+                                    break;
+                                }
+                            }
+
+                            if (!clickEl) {
+                                const containers = Array.from(document.querySelectorAll(
+                                    ".efm_ant-select, .ant-select"
+                                ));
+                                for (const el of containers) {
+                                    const txt = norm(el.innerText || el.textContent);
+                                    if (tierRe.test(txt) && isVisible(el)) {
+                                        clickEl = el.querySelector(".efm_ant-select-selector, .ant-select-selector") || el;
+                                        break;
+                                    }
+                                }
+                            }
+
+                            if (!isVisible(clickEl)) return false;
+                            clickEl.dispatchEvent(new MouseEvent('mousedown', { bubbles: true }));
+                            clickEl.dispatchEvent(new MouseEvent('mouseup', { bubbles: true }));
+                            clickEl.click();
+                            return true;
+                        }
+                        """
+                )
+                time.sleep(0.5)
+                return bool(ok)
+        except Exception:
+                return False
+
+
+def _normalize_tier_option(opt: str) -> str:
+        if not opt:
+            return "unknown"
+        s = opt.replace('\u00a0', ' ')
+        m = re.search(r"(\d+\s*k\s*<\s*输入\s*<=\s*\d+\s*k)", s, re.I)
+        if not m:
+            m = re.search(r"(输入\s*<=\s*\d+\s*k)", s, re.I)
+        if not m:
+            m = re.search(r"(\d+\s*k\s*<\s*输入)", s, re.I)
+        if m:
+            key = m.group(1)
+            key = re.sub(r"\s+", "", key)
+            key = key.replace("输入", "input").replace("输出", "output")
+            return key
+
+        if "输入" in s or "输出" in s:
+            nums = re.findall(r"\d+\s*k", s, re.I)
+            if nums:
+                joined = "-".join([n.replace(' ', '') for n in nums])
+                if "输入" in s:
+                    return f"input_{joined}"
+                return f"output_{joined}"
+
+        short = re.sub(r"\s+", " ", s).strip()
+        return short[:60]
+
+
+def _get_tier_options(page) -> List[str]:
+    if not _open_tier_dropdown(page):
+        return []
+
+    try:
+        page.wait_for_selector(
+            ".efm_ant-select-dropdown, .ant-select-dropdown",
+            state="visible", timeout=3000
+        )
+    except Exception:
+        pass
+
+    options = []
+    try:
+        options = page.evaluate(
+            """
+            () => {
+                const isVisible = (el) => {
+                    const r = el.getBoundingClientRect();
+                    const s = window.getComputedStyle(el);
+                    return r.width > 0 && r.height > 0 && s.display !== 'none' && s.visibility !== 'hidden';
+                };
+                const dropdown = Array.from(document.querySelectorAll(
+                    '.efm_ant-select-dropdown, .ant-select-dropdown'
+                )).find(el => isVisible(el));
+                if (!dropdown) return [];
+                const leaves = Array.from(dropdown.querySelectorAll('*'))
+                    .filter(el => isVisible(el) && el.children.length === 0);
+                const texts = leaves
+                    .map(el => (el.innerText || el.textContent || '').replace(/\\s+/g, ' ').trim())
+                    .filter(t => t.length > 0 && t.length < 60);
+                return Array.from(new Set(texts));
+            }
+            """
+        )
+        options = [t for t in options if re.search(r"输入", t) and re.search(r"\d+\s*[kK]", t)]
+    except Exception:
+        options = []
+
+    if not options:
+        try:
+            options = page.evaluate(
+                """
+                () => {
+                    const isVisible = (el) => {
+                        const r = el.getBoundingClientRect();
+                        const s = window.getComputedStyle(el);
+                        return r.width > 0 && r.height > 0 && s.display !== 'none' && s.visibility !== 'hidden';
+                    };
+                    const texts = Array.from(document.querySelectorAll('*'))
+                        .filter(el => isVisible(el) && el.children.length === 0)
+                        .map(el => (el.innerText || el.textContent || '').replace(/\\s+/g, ' ').trim())
+                        .filter(t => t.length < 60 && /输入/.test(t) && /\\d+\\s*[kK]/.test(t) && /<=|</.test(t));
+                    return Array.from(new Set(texts));
+                }
+                """
+            )
+        except Exception:
+            options = []
+
+    try:
+        page.keyboard.press("Escape")
+    except Exception:
+        pass
+
+    return list(dict.fromkeys(options))
+
+
+def _select_tier_option(page, option_text: str) -> bool:
+    if not _open_tier_dropdown(page):
+        return False
+
+    try:
+        page.wait_for_selector(
+            ".efm_ant-select-dropdown, .ant-select-dropdown",
+            state="visible", timeout=2000,
+        )
+    except Exception:
+        return False
+
+    try:
+        try:
+            option_loc = page.get_by_text(option_text, exact=True).first
+            option_loc.click(timeout=3000, force=False)
+            time.sleep(0.6)
+            return True
+        except Exception:
+            pass
+
+        clicked = page.evaluate(
+            """
+            (opt) => {
+              const isVisible = (el) => {
+                if (!el) return false;
+                const rect = el.getBoundingClientRect();
+                const style = window.getComputedStyle(el);
+                return rect.width > 0 && rect.height > 0 && style.display !== 'none' && style.visibility !== 'hidden';
+              };
+              const norm = (s) => (s || '').replace(/\s+/g, ' ').trim();
+              const nodes = Array.from(document.querySelectorAll(
+                ".efm_ant-select-item-option-content, [role='option'], .efm_ant-select-item, .ant-select-item"
+              ));
+              const target = nodes.find((n) => norm(n.textContent) === opt && isVisible(n));
+              if (!target) return false;
+              const clickEl = target.closest(".efm_ant-select-item, [role='option']") || target;
+              clickEl.dispatchEvent(new MouseEvent('mousedown', { bubbles: true }));
+              clickEl.dispatchEvent(new MouseEvent('mouseup', { bubbles: true }));
+              clickEl.click();
+              return true;
+            }
+            """,
+            option_text,
+        )
+        if clicked:
+            time.sleep(0.6)
+            return True
+        return False
+    except Exception:
+        return False
+
+
+def _ensure_tiered_pricing(page) -> None:
+    try:
+        toggle = page.locator("text=阶梯计费").first
+        if toggle.count() > 0:
+            toggle.click()
+            time.sleep(0.3)
+    except Exception:
+        pass
+
+
+def parse_prices_from_text(text: str) -> List[Dict]:
+    lines = [ln.strip() for ln in text.splitlines()]
+    lines = [ln for ln in lines if ln]
+
+    items = []
+    price_re = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s*元", re.I)
+    for idx, ln in enumerate(lines):
+        matches = price_re.findall(ln)
+        if not matches:
+            continue
+
+        label = None
+        first_m = price_re.search(ln)
+        if first_m:
+            before = ln[: first_m.start()].strip()
+            if before:
+                label = before
+        if not label:
+            for j in range(idx - 1, -1, -1):
+                if lines[j] and not price_re.search(lines[j]):
+                    label = lines[j]
+                    break
+        if not label:
+            label = f"price_{len(items) + 1}"
+
+        if label == "原价":
+            if items and matches:
+                try:
+                    items[-1]["price_original"] = float(matches[0])
+                except Exception:
+                    items[-1]["price_original"] = matches[0]
+                items[-1].setdefault("note", "")
+                if items[-1]["note"]:
+                    items[-1]["note"] += "; 原价显示"
+                else:
+                    items[-1]["note"] = "原价显示"
+            continue
+
+        raw = ln
+
+        if re.fullmatch(r"输入|输出", label.strip()):
+            tier_label = _find_nearest_tier_label(lines, idx)
+            if tier_label:
+                label = tier_label
+
+        entry: Dict = {"label": label.strip(), "raw": raw}
+        try:
+            nums = [float(x) for x in matches]
+            if len(nums) == 1:
+                entry["price"] = nums[0]
+            else:
+                fnums = sorted(nums)
+                entry["price_current"] = fnums[0]
+                entry["price_original"] = fnums[-1]
+        except Exception:
+            try:
+                entry["price"] = float(matches[0])
+            except Exception:
+                entry["price"] = matches[0]
+
+        unit = None
+        if re.search(r"每千|每 1k|/千|/每千|tokens", raw, re.I):
+            unit = "元/每千tokens"
+        unit_m = re.search(r"元\s*/?\s*每[^\n,,;]*", raw)
+        if unit_m:
+            unit = unit_m.group(0)
+        if unit:
+            entry["unit"] = unit
+
+        note = []
+        if re.search(r"限时|折", raw):
+            note.append("限时优惠")
+        if re.search(r"原价", raw):
+            note.append("原价显示")
+        if note:
+            entry["note"] = "; ".join(note)
+
+        entry["currency"] = "CNY"
+        items.append(entry)
+
+    return items
+
+
+def extract_price_block_html(html: str) -> str:
+    try:
+        soup = BeautifulSoup(html, "lxml")
+    except FeatureNotFound:
+        soup = BeautifulSoup(html, "html.parser")
+
+    # 跳过 script/style 标签内的文本节点
+    node = None
+    for n in soup.find_all(string=re.compile(r"模型价格")):
+        if n.parent and n.parent.name in ("script", "style"):
+            continue
+        node = n
+        break
+
+    if not node:
+        return soup.get_text(separator="\n")
+
+    ancestor = node.parent
+    for _ in range(6):
+        txt = ancestor.get_text(separator="\n")
+        if "元" in txt or re.search(r"\d", txt) or "tokens" in txt.lower():
+            return txt
+        if ancestor.parent:
+            ancestor = ancestor.parent
+        else:
+            break
+    return ancestor.get_text(separator="\n")
+
+
+def extract_price_items_from_html(html: str) -> List[Dict]:
+    try:
+        soup = BeautifulSoup(html, "lxml")
+    except FeatureNotFound:
+        soup = BeautifulSoup(html, "html.parser")
+
+    node = None
+    for n in soup.find_all(string=re.compile(r"模型价格")):
+        if n.parent and n.parent.name in ("script", "style"):
+            continue
+        node = n
+        break
+    if not node:
+        return []
+
+    ancestor = node.parent
+    container = ancestor
+    for _ in range(6):
+        txt = ancestor.get_text(separator="\n")
+        if "元" in txt or re.search(r"\d", txt) or "tokens" in txt.lower():
+            container = ancestor
+            break
+        if ancestor.parent:
+            ancestor = ancestor.parent
+        else:
+            container = ancestor
+            break
+
+    price_re = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s*元", re.I)
+    items: List[Dict] = []
+
+    container_text = container.get_text(separator="\n")
+    items = parse_prices_from_text(container_text)
+
+    def _postprocess_items(raw_items: List[Dict]) -> List[Dict]:
+        filtered: List[Dict] = []
+        for it in raw_items:
+            raw = it.get("raw", "")
+            label = it.get("label", "")
+            unit = it.get("unit", "")
+
+            if _is_tool_call_item(label, raw, unit):
+                continue
+
+            if "原价" in label and filtered:
+                if "price" in it:
+                    filtered[-1]["price_original"] = it["price"]
+                elif "price_current" in it and "price_original" in it:
+                    filtered[-1]["price_original"] = it["price_original"]
+                filtered[-1].setdefault("note", "")
+                if filtered[-1]["note"]:
+                    filtered[-1]["note"] += "; 原价显示"
+                else:
+                    filtered[-1]["note"] = "原价显示"
+                continue
+
+            notes = []
+            discount_match = re.search(r"(限时)?([0-9.]+)\s*折", raw)
+            if discount_match:
+                discount = discount_match.group(2)
+                notes.append(f"限时{discount}折")
+            else:
+                if re.search(r"限时|免费", raw) or re.search(r"限时|免费", label):
+                    if re.search(r"免费", raw):
+                        notes.append("限时免费")
+                    else:
+                        notes.append("限时优惠")
+
+            if re.search(r"原价", raw):
+                notes.append("原价显示")
+            if notes:
+                it["note"] = "; ".join(notes)
+
+            if "unit" not in it:
+                if re.search(r"每千|tokens|/千|/每千", raw, re.I):
+                    it["unit"] = "元/每千tokens"
+                else:
+                    um = re.search(r"元\s*/?\s*每[^\n,,;]*", raw)
+                    if um:
+                        it["unit"] = um.group(0)
+
+            cleaned_label = re.sub(r"限时[0-9.]*折|限时|免费|原价|\s*元.*", "", label).strip()
+            cleaned_label = re.sub(r"\s+", " ", cleaned_label).strip()
+            if not cleaned_label:
+                cleaned_label = "price"
+            it["label"] = cleaned_label
+
+            it["currency"] = "CNY"
+            filtered.append(it)
+        return filtered
+
+    filtered = _postprocess_items(items)
+
+    structured: List[Dict] = []
+    grouped: Dict[str, Dict[str, Dict]] = {}
+
+    for it in filtered:
+        lbl = it.get("label", "")
+        raw = it.get("raw", "")
+        combined = lbl + " " + raw
+
+        should_group = False
+        group = None
+
+        if re.search(r"输入", lbl):
+            should_group = True
+            group = "input"
+        elif re.search(r"输出", lbl):
+            should_group = True
+            group = "output"
+        if "tier" in it:
+            tier_raw = it.get("tier") or ""
+            tier_key = _normalize_tier_option(tier_raw)
+            if not group:
+                if "input" in tier_key.lower():
+                    group = "input"
+                elif "output" in tier_key.lower():
+                    group = "output"
+                else:
+                    group = "input"
+
+            tier_data = {k: v for k, v in it.items() if k not in ("label", "tier")}
+            grouped.setdefault(group, {})[tier_key] = tier_data
+        elif should_group and group:
+            key = lbl
+            if group == "input":
+                key = re.sub(r"^输入", "input", key)
+            elif group == "output":
+                key = re.sub(r"^输出", "output", key)
+            tier_data = {k: v for k, v in it.items() if k not in ("label",)}
+            grouped.setdefault(group, {})[key] = tier_data
+        else:
+            structured.append(it)
+
+    for g, mapping in grouped.items():
+        structured.append({"label": g, "tiers": mapping})
+
+    items = structured
+
+    if not items:
+        try:
+            price_nodes = []
+            for el in soup.find_all(class_=re.compile(r"price", re.I)):
+                text = el.get_text(" ", strip=True)
+                if not re.search(r"[0-9]+(\.[0-9]+)?", text):
+                    continue
+                price_nodes.append((el, text))
+
+            seen = set()
+            for el, text in price_nodes:
+                if text in seen:
+                    continue
+                seen.add(text)
+                unit_el = el.find_next(class_=re.compile(r"unit", re.I))
+                unit_text = unit_el.get_text(" ", strip=True) if unit_el else None
+
+                label = None
+                p = el
+                for _ in range(4):
+                    sib_label = None
+                    parent = p.parent
+                    if parent:
+                        sib_label = parent.find(class_=re.compile(r"label", re.I))
+                    if sib_label and sib_label.get_text(strip=True):
+                        label = sib_label.get_text(" ", strip=True)
+                        break
+                    if parent is None:
+                        break
+                    p = parent
+
+                if not label:
+                    prev = el.previous_sibling
+                    steps = 0
+                    while prev and steps < 6:
+                        candidate = None
+                        if isinstance(prev, str) and prev.strip():
+                            candidate = prev.strip()
+                        else:
+                            try:
+                                candidate = prev.get_text(" ", strip=True)
+                            except Exception:
+                                candidate = None
+                        if candidate and not re.search(r"[0-9]", candidate):
+                            label = candidate
+                            break
+                        prev = prev.previous_sibling
+                        steps += 1
+
+                entry = {"label": label or "price", "raw": text, "currency": "CNY"}
+                try:
+                    entry["price"] = float(re.search(r"([0-9]+(?:\.[0-9]+)?)", text).group(1))
+                except Exception:
+                    entry["price"] = text
+                if unit_text:
+                    entry["unit"] = unit_text
+                items.append(entry)
+        except Exception:
+            pass
+
+    if items:
+        items = _postprocess_items(items)
+
+    return items
+
+
+def extract_price_items_global(html: str) -> List[Dict]:
+    try:
+        soup = BeautifulSoup(html, "lxml")
+    except FeatureNotFound:
+        soup = BeautifulSoup(html, "html.parser")
+
+    node = None
+    for n in soup.find_all(string=re.compile(r"模型价格")):
+        if n.parent and n.parent.name in ("script", "style"):
+            continue
+        node = n
+        break
+    if not node:
+        return []
+
+    ancestor = node.parent
+    for _ in range(6):
+        txt = ancestor.get_text(separator="\n")
+        if "元" in txt or re.search(r"\d", txt) or "tokens" in txt.lower():
+            return parse_prices_from_text(txt)
+        if ancestor.parent:
+            ancestor = ancestor.parent
+        else:
+            break
+    return parse_prices_from_text(ancestor.get_text(separator="\n"))
+
+
+def scrape_model_price(url: str, headless: bool = True, timeout: int = 20000, executable_path: Optional[str] = None) -> Dict:
+    result = {"url": url, "error": None, "items": []}
+
+    with sync_playwright() as p:
+        launch_kwargs = {"headless": headless}
+        if executable_path:
+            launch_kwargs["executable_path"] = executable_path
+        browser = p.chromium.launch(**launch_kwargs)
+        context = browser.new_context()
+        page = context.new_page()
+
+        network_hits = []
+        console_logs = []
+
+        def _on_console(msg):
+            try:
+                console_logs.append({"type": msg.type, "text": msg.text})
+            except Exception:
+                pass
+
+        def _on_response(resp):
+            try:
+                url_r = resp.url
+                ct = resp.headers.get("content-type", "")
+                if "application/json" in ct or ct.startswith("text") or "json" in url_r.lower() or "price" in url_r.lower():
+                    try:
+                        body = resp.text()
+                    except Exception:
+                        body = None
+                    snippet = None
+                    if body:
+                        if "元" in body or "price" in body.lower() or "tokens" in body.lower() or "price" in url_r.lower():
+                            snippet = body[:2000]
+                    if snippet:
+                        network_hits.append({"url": url_r, "content_type": ct, "snippet": snippet})
+            except Exception:
+                pass
+
+        page.on("console", _on_console)
+        page.on("response", _on_response)
+        try:
+            page.goto(url, wait_until="networkidle", timeout=timeout)
+        except PlaywrightTimeoutError:
+            try:
+                page.goto(url, wait_until="load", timeout=timeout)
+            except Exception as e:
+                result["error"] = f"导航失败: {e}"
+                browser.close()
+                return result
+
+        try:
+            page.wait_for_selector("text=模型价格", timeout=8000)
+        except PlaywrightTimeoutError:
+            pass
+
+        time.sleep(1.2)
+        html = page.content()
+        items = []
+        try:
+            items = extract_price_items_from_html(html)
+        except Exception:
+            items = []
+
+        tiered_items: List[Dict] = []
+        try:
+            _ensure_tiered_pricing(page)
+            tier_options = _get_tier_options(page)
+            for opt in tier_options:
+                if not _select_tier_option(page, opt):
+                    continue
+                html = page.content()
+                try:
+                    tier_items = extract_price_items_from_html(html)
+                except Exception:
+                    tier_items = []
+                for it in tier_items:
+                    it["tier"] = opt
+                tiered_items.extend(tier_items)
+        except Exception:
+            tiered_items = []
+
+        if tiered_items:
+            items = tiered_items
+
+        if not items:
+            try:
+                page.wait_for_selector("text=/[0-9]+(\\.[0-9]+)?\\s*元/", timeout=8000)
+            except PlaywrightTimeoutError:
+                pass
+
+            try:
+                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                time.sleep(1.0)
+                html = page.content()
+                items = extract_price_items_from_html(html)
+            except Exception:
+                items = []
+
+        if not items:
+            text_block = extract_price_block_html(html)
+            if not text_block:
+                result["error"] = "未找到包含 '模型价格' 的区域,可能需要登录或页面结构不同。"
+                browser.close()
+                return result
+            items = parse_prices_from_text(text_block)
+
+        def _build_price_map(parsed_items: List[Dict]) -> Dict:
+            price_map: Dict = {}
+
+            for it in parsed_items:
+                if isinstance(it, dict) and it.get("tiers") and isinstance(it.get("tiers"), dict):
+                    for tier_key, tier_val in it["tiers"].items():
+                        k = _normalize_tier_option(tier_key)
+                        price_map.setdefault(k, {})
+                        sub_label = tier_val.get("label") or tier_val.get("raw") or k
+                        price_map[k][sub_label] = {k2: v for k2, v in tier_val.items() if k2 not in ("tier", "tiers", "label")}
+                    continue
+
+                if it.get("tier"):
+                    tk = _normalize_tier_option(it.get("tier"))
+                    price_map.setdefault(tk, {})
+                    sub_label = it.get("label") or it.get("raw") or tk
+                    price_map[tk][sub_label] = {k: v for k, v in it.items() if k not in ("tier", "label")}
+                    continue
+
+                lbl = it.get("label") or it.get("raw") or "price"
+                if lbl in price_map and not isinstance(price_map[lbl], list):
+                    price_map[lbl] = [price_map[lbl]]
+                if isinstance(price_map.get(lbl), list):
+                    price_map[lbl].append({k: v for k, v in it.items() if k != "label"})
+                else:
+                    price_map[lbl] = {k: v for k, v in it.items() if k != "label"}
+
+            return price_map
+
+        price_map = _build_price_map(items)
+        result = {"url": url, "error": result.get("error"), "prices": price_map}
+
+        browser.close()
+    return result
+
+
+def main():
+    ap = argparse.ArgumentParser(description="爬取阿里云模型市场页面的模型价格(基于 Playwright)")
+    group = ap.add_mutually_exclusive_group(required=True)
+    group.add_argument("--url", help="单个模型页面 URL")
+    group.add_argument("--file", help="包含多个 URL(每行一个)的文件路径")
+    ap.add_argument("--headful", action="store_true", help="以有头模式打开浏览器(方便调试)")
+    ap.add_argument("--timeout", type=int, default=20000, help="导航超时(毫秒),默认20000")
+    ap.add_argument("--browser-path", help="浏览器可执行文件完整路径")
+    args = ap.parse_args()
+
+    urls: List[str] = []
+    if args.url:
+        urls = [args.url]
+    else:
+        with open(args.file, "r", encoding="utf-8") as f:
+            urls = [ln.strip() for ln in f if ln.strip()]
+
+    exec_path = None
+    if args.browser_path:
+        exec_path = args.browser_path
+    else:
+        exec_path = os.environ.get("PLAYWRIGHT_EXECUTABLE")
+
+    headless = not args.headful
+    if os.environ.get("PLAYWRIGHT_HEADLESS", "").lower() == "false":
+        headless = False
+
+    results = []
+    for u in urls:
+        print(f"抓取: {u}")
+        res = scrape_model_price(u, headless=headless, timeout=args.timeout, executable_path=exec_path)
+        results.append(res)
+
+    print(json.dumps(results, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()

+ 341 - 0
backend/crawl/scrape_model_info.py

@@ -0,0 +1,341 @@
+#!/usr/bin/env python3
+"""
+scrape_model_info.py
+抓取阿里云百炼模型页面的基本信息和能力:
+  - 模型 Code(model_code)
+  - 模型名称(name)
+  - 模型描述(description)
+  - 能力标签(capabilities):如 文本生成、深度思考
+  - 功能特性(features):如 function calling、联网搜索
+  - 输入/输出模态(input_modalities / output_modalities)
+
+原理:拦截页面请求的后端 API(listFoundationModels 等),
+直接从 JSON 响应中提取,比解析 HTML 更准确。
+"""
+
+import re
+import time
+import json
+from typing import Any, Dict, List, Optional
+
+from playwright.sync_api import Page, TimeoutError as PlaywrightTimeoutError
+
+
+# 能力标签映射
+CAPABILITY_LABELS: Dict[str, str] = {
+    "TG":        "文本生成",
+    "QwQ":       "深度思考",
+    "Reasoning": "深度思考",
+    "VU":        "视觉理解",
+    "AU":        "音频理解",
+    "VID":       "视频理解",
+    "VG":        "视频生成",
+    "IMG":       "图像生成",
+    "EMB":       "向量表示",
+    "ASR":       "语音识别",
+    "TTS":       "语音合成",
+}
+
+# 页面上固定展示的功能项(按截图顺序:左列从上到下,右列从上到下)
+# key = API 返回的 feature 字符串,value = 页面显示名
+FEATURE_LABELS: Dict[str, str] = {
+    "model-experience":   "模型体验",
+    "function-calling":   "function calling",
+    "structured-outputs": "结构化输出",
+    "web-search":         "联网搜索",
+    "prefix-completion":  "前缀续写",
+    "cache":              "cache存储",
+    "batch":              "批量推理",
+    "model-optimization": "模型调优",
+}
+
+# 页面能力区域所有功能项的固定顺序(与截图一致)
+ALL_FEATURES_ORDERED: List[str] = [
+    "model-experience",   # 模型体验
+    "function-calling",   # function calling
+    "structured-outputs", # 结构化输出
+    "web-search",         # 联网搜索
+    "prefix-completion",  # 前缀续写
+    "cache",              # cache存储
+    "batch",              # 批量推理
+    "model-optimization", # 模型调优
+]
+
+# 需要拦截的 API URL 关键词
+API_URL_RE = re.compile(
+    r"listFoundationModels|listRecommendedModels|listFeaturedModels|getModelDetail|modelCenter",
+    re.I,
+)
+
+
+def _extract_model_id_from_url(url: str) -> str:
+    """从页面 URL 的 hash 部分提取模型 ID,如 #/model-market/detail/qwen3-max -> qwen3-max。"""
+    # 优先从 hash 提取
+    hash_match = re.search(r"#.*?/detail/([^/?#&]+)", url)
+    if hash_match:
+        return hash_match.group(1).strip()
+    # 回退:取最后一段路径
+    clean = re.sub(r"[?#].*", "", url)
+    parts = [p for p in clean.rstrip("/").split("/") if p]
+    return parts[-1] if parts else ""
+
+
+def _merge_with_items(obj: Dict) -> Dict:
+    """
+    如果对象是"组"(group=true 或 model 以 group- 开头),
+    用其 items[0] 的数据补充缺失的 features / modelInfo / inferenceMetadata。
+    """
+    items = obj.get("items", [])
+    if not items or not isinstance(items, list):
+        return obj
+
+    # 合并:组对象字段优先,子模型补充缺失字段
+    merged = dict(obj)
+    child = items[0] if isinstance(items[0], dict) else {}
+
+    for key in ("features", "modelInfo", "inferenceMetadata", "capabilities"):
+        # 只在组对象该字段为空时才用子模型的值补充
+        group_val = merged.get(key)
+        child_val = child.get(key)
+        if not group_val and child_val:
+            merged[key] = child_val
+
+    # description 优先用组对象的,若为空则用子模型的
+    if not merged.get("description") and child.get("description"):
+        merged["description"] = child["description"]
+
+    return merged
+
+
+def _find_model_in_json(data: Any, target: str) -> Optional[Dict]:
+    """
+    递归在 JSON 数据中查找与 target 匹配的模型对象。
+    匹配规则:model 或 name 字段去掉 group- 前缀后与 target 完全相等(优先),
+    或 target 是 model_val 的完整前缀(如 qwen3-max 匹配 qwen3-max-0919)。
+    找到后自动用 items[0] 补充缺失字段。
+    """
+    clean_target = re.sub(r"^group-", "", target.lower())
+
+    if isinstance(data, dict):
+        model_val = re.sub(r"^group-", "", str(data.get("model", "")).lower())
+        name_val = str(data.get("name", "")).lower()
+        # 精确匹配 model 字段
+        is_match = (model_val == clean_target)
+        if is_match and ("model" in data or "name" in data):
+            return _merge_with_items(data)
+        for v in data.values():
+            found = _find_model_in_json(v, target)
+            if found:
+                return found
+    elif isinstance(data, list):
+        for item in data:
+            found = _find_model_in_json(item, target)
+            if found:
+                return found
+    return None
+
+
+def parse_model_info(model_obj: Dict) -> Dict:
+    """
+    将 API 返回的模型对象解析为结构化信息,字段顺序与页面一致:
+    model_code -> display_tags -> description -> input_modalities
+    -> output_modalities -> features(固定8项,true/false)
+    """
+    info: Dict = {}
+
+    # ── 模型 Code ──
+    info["model_code"] = re.sub(r"^group-", "", model_obj.get("model", ""))
+
+    # ── 模型介绍标签(页面红框第一行:Qwen3 · 文本生成 · 深度思考) ──
+    display_tags: List[str] = []
+    ct = model_obj.get("collectionTag", "")
+    if ct:
+        display_tags.append(re.sub(r"^qwen", "Qwen", ct, flags=re.I))
+    caps_raw: List[str] = model_obj.get("capabilities", [])
+    for c in caps_raw:
+        label = CAPABILITY_LABELS.get(c, c)
+        if label not in display_tags:
+            display_tags.append(label)
+    info["display_tags"] = display_tags
+
+    # ── 模型描述 ──
+    info["description"] = (
+        model_obj.get("description", "")
+        or model_obj.get("shortDescription", "")
+    )
+
+    # ── 输入/输出模态 ──
+    meta = model_obj.get("inferenceMetadata", {})
+    info["input_modalities"] = meta.get("request_modality", [])
+    info["output_modalities"] = meta.get("response_modality", [])
+
+    # ── 模型能力:固定8项,true/false ──
+    features_raw: List[str] = model_obj.get("features", [])
+    features_set = set(features_raw)
+    info["features"] = {
+        FEATURE_LABELS[key]: (key in features_set)
+        for key in ALL_FEATURES_ORDERED
+    }
+
+    return info
+
+
+def scrape_model_info(page: Page, url: str) -> Dict:
+    """
+    在已打开的 Playwright page 上抓取模型基本信息。
+    page 应已导航到目标 URL 并完成渲染。
+
+    返回:
+    {
+      "model_code": str,
+      "name": str,
+      "description": str,
+      "provider": str,
+      "collection_tag": str,
+      "updated_at": str,
+      "open_source": bool,
+      "capabilities": [...],
+      "features": [...],
+      "input_modalities": [...],
+      "output_modalities": [...],
+      "error": str | None
+    }
+    """
+    target = _extract_model_id_from_url(url)
+    result: Dict = {"model_code": target, "error": None}
+
+    # 从已捕获的 API 响应中查找(需要在导航前注册监听器)
+    # 这里提供一个独立运行版本,重新导航并拦截
+    api_data: List[Dict] = []
+
+    def on_response(resp):
+        try:
+            if "application/json" not in resp.headers.get("content-type", ""):
+                return
+            if not API_URL_RE.search(resp.url):
+                return
+            try:
+                api_data.append(resp.json())
+            except Exception:
+                pass
+        except Exception:
+            pass
+
+    page.on("response", on_response)
+
+    # 等待 API 响应(页面可能已加载,这里等待一小段确保响应被捕获)
+    time.sleep(0.5)
+
+    # 在已有 API 数据中查找
+    model_obj = None
+    for body in api_data:
+        found = _find_model_in_json(body, target)
+        if found:
+            model_obj = found
+            break
+
+    if model_obj:
+        result.update(parse_model_info(model_obj))
+    else:
+        result["error"] = f"未从 API 响应中找到模型 '{target}',可能需要登录或模型 ID 不匹配"
+
+    return result
+
+
+# ── 独立运行入口 ────────────────────────────────────────────────────────────────
+
+def scrape_model_info_standalone(
+    url: str,
+    headless: bool = True,
+    timeout: int = 20000,
+    executable_path: Optional[str] = None,
+) -> Dict:
+    """独立运行:自己启动浏览器,导航,抓取模型信息后关闭。"""
+    from playwright.sync_api import sync_playwright
+
+    target = _extract_model_id_from_url(url)
+    result: Dict = {"url": url, "model_code": target, "error": None}
+    api_data: List[Dict] = []
+
+    with sync_playwright() as p:
+        launch_kwargs: Dict = {"headless": headless}
+        if executable_path:
+            launch_kwargs["executable_path"] = executable_path
+        browser = p.chromium.launch(**launch_kwargs)
+        page = browser.new_context().new_page()
+
+        def on_response(resp):
+            try:
+                if "application/json" not in resp.headers.get("content-type", ""):
+                    return
+                if not API_URL_RE.search(resp.url):
+                    return
+                try:
+                    api_data.append(resp.json())
+                except Exception:
+                    pass
+            except Exception:
+                pass
+
+        page.on("response", on_response)
+
+        try:
+            page.goto(url, wait_until="networkidle", timeout=timeout)
+        except PlaywrightTimeoutError:
+            try:
+                page.goto(url, wait_until="load", timeout=timeout)
+            except Exception as e:
+                result["error"] = f"导航失败: {e}"
+                browser.close()
+                return result
+
+        # 等待页面内容
+        for sel in ["text=模型介绍", "text=模型能力", "text=模型价格"]:
+            try:
+                page.wait_for_selector(sel, timeout=6000)
+                break
+            except PlaywrightTimeoutError:
+                pass
+        time.sleep(1.0)
+
+        model_obj = None
+        for body in api_data:
+            found = _find_model_in_json(body, target)
+            if found:
+                model_obj = found
+                break
+
+        if model_obj:
+            result.update(parse_model_info(model_obj))
+        else:
+            result["error"] = f"未从 API 响应中找到模型 '{target}'"
+
+        browser.close()
+
+    return result
+
+
+if __name__ == "__main__":
+    import argparse, os
+
+    ap = argparse.ArgumentParser(description="抓取阿里云模型基本信息与能力")
+    group = ap.add_mutually_exclusive_group(required=True)
+    group.add_argument("--url", help="模型页面 URL")
+    group.add_argument("--file", help="URL 列表文件(每行一个)")
+    ap.add_argument("--headful", action="store_true")
+    ap.add_argument("--timeout", type=int, default=20000)
+    ap.add_argument("--browser-path")
+    args = ap.parse_args()
+
+    urls = [args.url] if args.url else open(args.file, encoding="utf-8").read().splitlines()
+    urls = [u.strip() for u in urls if u.strip()]
+
+    exec_path = args.browser_path or os.environ.get("PLAYWRIGHT_EXECUTABLE")
+    headless = not args.headful
+
+    results = []
+    for u in urls:
+        print(f"抓取模型信息: {u}", flush=True)
+        results.append(scrape_model_info_standalone(u, headless=headless, timeout=args.timeout, executable_path=exec_path))
+
+    print(json.dumps(results, ensure_ascii=False, indent=2))

+ 182 - 0
backend/crawl/scrape_rate_limits.py

@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""
+scrape_rate_limits.py
+抓取阿里云百炼模型"模型限流与上下文"区块,字段与页面完全对应:
+  最大输入长度、RPM、最大输入长度(思考)、上下文长度
+  最大输出长度、TPM、最大输出长度(思考)、最大思维链长度
+
+原理:从页面文本直接提取,字段名和值与页面显示一致。
+"""
+
+import re
+import time
+import json
+from typing import Dict, List, Optional
+
+from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
+
+
+# 页面字段名 -> 输出 key 映射(按截图顺序)
+FIELD_PATTERNS = [
+    # (正则匹配页面文字,          输出 key)
+    (r"最大输入长度[((]思考[))]", "最大输入长度(思考)"),
+    (r"最大输入长度",              "最大输入长度"),
+    (r"最大输出长度[((]思考[))]", "最大输出长度(思考)"),
+    (r"最大输出长度",              "最大输出长度"),
+    (r"上下文长度",                "上下文长度"),
+    (r"最大思维链长度",            "最大思维链长度"),
+    (r"\bRPM\b",                  "RPM"),
+    (r"\bTPM\b",                  "TPM"),
+    (r"\bQPM\b",                  "QPM"),
+]
+
+# 值的格式:数字 + 可选单位(K/M/万 等)
+VALUE_RE = re.compile(r"(\d[\d,,]*(?:\.\d+)?\s*[KkMm万]?)")
+
+
+def _extract_model_id_from_url(url: str) -> str:
+    m = re.search(r"#.*?/detail/([^/?#&]+)", url)
+    if m:
+        return m.group(1).strip()
+    clean = re.sub(r"[?#].*", "", url)
+    parts = [p for p in clean.rstrip("/").split("/") if p]
+    return parts[-1] if parts else ""
+
+
+def _get_rate_limit_section_text(page) -> str:
+    """从页面提取"模型限流与上下文"区块的文本。"""
+    try:
+        return page.evaluate("""
+        () => {
+            // 找"模型限流与上下文"标题节点
+            const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
+            let node;
+            while ((node = walker.nextNode())) {
+                if (/模型限流|限流与上下文/.test(node.textContent)) {
+                    let el = node.parentElement;
+                    for (let i = 0; i < 10; i++) {
+                        if (!el) break;
+                        const txt = (el.innerText || '').trim();
+                        // 找到包含数字和限流关键词的容器
+                        if (txt.length > 50 && /RPM|TPM|\\d+K/.test(txt)) return txt;
+                        el = el.parentElement;
+                    }
+                }
+            }
+            return '';
+        }
+        """)
+    except Exception:
+        return ""
+
+
+def parse_rate_limits_from_text(text: str) -> Dict:
+    """
+    从限流区块文本中提取字段,输出与页面完全对应。
+    文本示例(紧凑格式):
+      模型限流与上下文最大输入长度252KRPM30000最大输入长度(思考)252K上下文长度256K
+      最大输出长度64KTPM5000000最大输出长度(思考)32K最大思维链长度80K
+    """
+    result: Dict = {}
+
+    # 把文本规范化:去掉多余空白
+    text = re.sub(r"\s+", " ", text).strip()
+
+    for pattern, key in FIELD_PATTERNS:
+        if key in result:
+            continue
+        # 找字段名,然后取紧跟其后的数值
+        m = re.search(pattern + r"\s*([0-9][0-9,,]*(?:\.\d+)?\s*[KkMm万]?)", text, re.I)
+        if m:
+            val = m.group(1).strip().replace(",", ",")
+            # 统一大写 K
+            val = re.sub(r"k$", "K", val)
+            result[key] = val
+
+    return result
+
+
+def scrape_rate_limits_standalone(
+    url: str,
+    headless: bool = True,
+    timeout: int = 20000,
+    executable_path: Optional[str] = None,
+) -> Dict:
+    """独立运行:启动浏览器,导航,抓取限流信息后关闭。"""
+    from playwright.sync_api import sync_playwright
+
+    target = _extract_model_id_from_url(url)
+    result: Dict = {"url": url, "model_code": target, "error": None}
+
+    with sync_playwright() as p:
+        launch_kwargs: Dict = {"headless": headless}
+        if executable_path:
+            launch_kwargs["executable_path"] = executable_path
+        browser = p.chromium.launch(**launch_kwargs)
+        page = browser.new_context().new_page()
+
+        try:
+            page.goto(url, wait_until="networkidle", timeout=timeout)
+        except PlaywrightTimeoutError:
+            try:
+                page.goto(url, wait_until="load", timeout=timeout)
+            except Exception as e:
+                result["error"] = f"导航失败: {e}"
+                browser.close()
+                return result
+
+        for sel in ["text=模型限流", "text=上下文长度", "text=RPM"]:
+            try:
+                page.wait_for_selector(sel, timeout=6000)
+                break
+            except PlaywrightTimeoutError:
+                pass
+        time.sleep(1.0)
+
+        # 滚动确保限流区块加载
+        try:
+            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+            time.sleep(0.8)
+        except Exception:
+            pass
+
+        text = _get_rate_limit_section_text(page)
+        print(f"[DEBUG] 限流区块文本: {text[:200]}")
+
+        if text:
+            result["rate_limits"] = parse_rate_limits_from_text(text)
+        else:
+            result["error"] = "未找到模型限流与上下文区块"
+            result["rate_limits"] = {}
+
+        browser.close()
+
+    return result
+
+
+if __name__ == "__main__":
+    import argparse, os
+
+    ap = argparse.ArgumentParser(description="抓取阿里云模型限流与上下文信息")
+    group = ap.add_mutually_exclusive_group(required=True)
+    group.add_argument("--url")
+    group.add_argument("--file")
+    ap.add_argument("--headful", action="store_true")
+    ap.add_argument("--timeout", type=int, default=20000)
+    ap.add_argument("--browser-path")
+    args = ap.parse_args()
+
+    urls = [args.url] if args.url else open(args.file, encoding="utf-8").read().splitlines()
+    urls = [u.strip() for u in urls if u.strip()]
+
+    exec_path = args.browser_path or os.environ.get("PLAYWRIGHT_EXECUTABLE")
+    headless = not args.headful
+
+    results = []
+    for u in urls:
+        print(f"抓取限流信息: {u}", flush=True)
+        results.append(scrape_rate_limits_standalone(
+            u, headless=headless, timeout=args.timeout, executable_path=exec_path
+        ))
+
+    print(json.dumps(results, ensure_ascii=False, indent=2))

+ 369 - 0
backend/crawl/scrape_tool_prices.py

@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+"""
+scrape_tool_prices.py
+抓取阿里云百炼模型页面的工具调用价格:
+  - 搜索策略、代码解释器、文生图等工具的调用费用
+  - 单位通常为 元/千次调用
+
+原理:复用 scrape_aliyun_models.py 的页面渲染逻辑,
+但专门提取工具调用相关价格行(原脚本会过滤掉这些)。
+"""
+
+import re
+import time
+import json
+from typing import Dict, List, Optional
+
+from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
+
+
+# 工具调用价格识别规则
+TOOL_CALL_RE = re.compile(
+    r"搜索策略|代码解释|文生图|数据增强|模型推理|工具调用|千次调用|/千次|次调用",
+    re.I,
+)
+
+# 单位识别
+TOOL_UNIT_RE = re.compile(r"千次调用|/千次|次调用", re.I)
+
+
+def _is_tool_call_item(label: str, raw: str) -> bool:
+    return bool(TOOL_CALL_RE.search(label) or TOOL_CALL_RE.search(raw))
+
+
+def parse_tool_prices_from_text(text: str) -> List[Dict]:
+    """
+    从"工具调用价格"区块文本中提取工具调用价格条目。
+    文本是一整行,格式:
+      工具名Completions API价格信息工具名Responses API价格信息...
+    """
+    items: List[Dict] = []
+    seen: set = set()
+
+    price_re = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
+    free_re = re.compile(r"限时免费|免费")
+
+    # 用 API 类型作为分隔符,切成 [工具名+价格, API类型, 工具名+价格, API类型, ...]
+    api_sep_re = re.compile(r"(Completions API|Responses API)")
+    parts = api_sep_re.split(text)
+    # parts 结构: ["工具调用价格tool1", "Completions API", "价格1tool2", "Responses API", "价格2tool3", ...]
+
+    # 每个条目 = parts[n](工具名在末尾) + parts[n+1](API类型,丢弃) + parts[n+2](价格在开头)
+    # 工具名在前一段的末尾,价格在后一段的开头
+    tool_re = re.compile(r"([a-zA-Z][a-zA-Z0-9_:]*)$")  # 段末尾的工具名
+
+    for i in range(0, len(parts) - 1, 2):
+        before = parts[i]       # 包含工具名(在末尾)
+        # parts[i+1] 是 API 类型,跳过
+        after = parts[i + 2] if i + 2 < len(parts) else ""  # 包含价格(在开头)
+
+        # 从 before 末尾提取工具名
+        m = tool_re.search(before)
+        if not m:
+            continue
+        label = m.group(1)
+        if label in seen:
+            continue
+
+        # 从 after 开头提取价格信息(到下一个工具名开始前)
+        next_tool_m = tool_re.search(after)
+        price_info = after[: next_tool_m.start()].strip() if next_tool_m else after.strip()
+
+        entry: Dict = {"label": label, "currency": "CNY", "unit": "元/千次调用"}
+
+        if free_re.search(price_info):
+            entry["price"] = 0
+            entry["note"] = "限时免费"
+        else:
+            nums = price_re.findall(price_info)
+            if not nums:
+                continue
+            try:
+                entry["price"] = float(nums[0])
+            except Exception:
+                entry["price"] = nums[0]
+            if re.search(r"限时优惠", price_info):
+                entry["note"] = "限时优惠"
+            dm = re.search(r"([0-9.]+)\s*折", price_info)
+            if dm:
+                entry["note"] = f"限时{dm.group(1)}折"
+
+        seen.add(label)
+        items.append(entry)
+
+    return items
+
+    price_re = re.compile(r"([0-9]+(?:\.[0-9]+)?)")
+    free_re = re.compile(r"限时免费|免费")
+
+    for m in pattern.finditer(text):
+        label = m.group(1).strip()
+        price_info = m.group(2).strip()
+
+        if not label or label in seen:
+            continue
+
+        entry: Dict = {"label": label, "currency": "CNY", "unit": "元/千次调用"}
+
+        if free_re.search(price_info):
+            entry["price"] = 0
+            entry["note"] = "限时免费"
+        else:
+            nums = price_re.findall(price_info)
+            if not nums:
+                continue
+            try:
+                entry["price"] = float(nums[0])
+            except Exception:
+                entry["price"] = nums[0]
+            if re.search(r"限时优惠", price_info):
+                entry["note"] = "限时优惠"
+            dm = re.search(r"([0-9.]+)\s*折", price_info)
+            if dm:
+                entry["note"] = f"限时{dm.group(1)}折"
+
+        seen.add(label)
+        items.append(entry)
+
+    return items
+    # 用 API 类型标注作为分隔符切割整段文本
+    api_sep_re = re.compile(r"(Completions API|Responses API)")
+    price_re = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s*元")
+    free_re = re.compile(r"限时免费|免费")
+
+    # 先去掉标题
+    text = re.sub(r"^工具调用价格", "", text.strip())
+
+    # 按 API 类型切割:得到 [工具名, API类型, 价格信息, 工具名, API类型, 价格信息, ...]
+    parts = api_sep_re.split(text)
+    # parts 结构:[工具名1, "Completions API", 价格1+工具名2, "Responses API", 价格2+工具名3, ...]
+
+    items: List[Dict] = []
+    seen: set = set()
+
+    i = 0
+    while i < len(parts):
+        segment = parts[i].strip()
+
+        # 跳过 API 类型标注本身
+        if api_sep_re.fullmatch(segment):
+            i += 1
+            continue
+
+        # 这段包含:上一条目的价格信息 + 下一条目的工具名
+        # 需要从末尾提取工具名(工具名是纯英文+冒号/下划线,不含中文和数字价格)
+        # 工具名模式:由字母、数字、下划线、冒号组成
+        tool_name_re = re.compile(r"([a-zA-Z][a-zA-Z0-9_:]*(?:\.[a-zA-Z0-9_:]+)*)$")
+
+        # 先提取末尾的工具名(留给下一轮用)
+        next_tool = ""
+        m = tool_name_re.search(segment)
+        if m:
+            next_tool = m.group(1)
+            price_part = segment[: m.start()].strip()
+        else:
+            price_part = segment
+
+        # 如果有上一个工具名等待配对价格
+        if i > 0:
+            # 找上一个工具名
+            prev_tool = ""
+            prev_seg = parts[i - 2].strip() if i >= 2 else ""
+            tm = tool_name_re.search(prev_seg)
+            if tm:
+                prev_tool = tm.group(1)
+            elif i == 1:
+                # 第一段就是工具名
+                prev_tool = parts[0].strip()
+
+            if prev_tool and prev_tool not in seen:
+                entry: Dict = {"label": prev_tool, "currency": "CNY", "unit": "元/千次调用"}
+                if free_re.search(price_part) and not price_re.search(price_part):
+                    entry["price"] = 0
+                    entry["note"] = "限时免费"
+                else:
+                    nums = price_re.findall(price_part)
+                    if nums:
+                        try:
+                            entry["price"] = float(nums[0])
+                        except Exception:
+                            entry["price"] = nums[0]
+                        if re.search(r"限时优惠", price_part):
+                            entry["note"] = "限时优惠"
+                        dm = re.search(r"([0-9.]+)\s*折", price_part)
+                        if dm:
+                            entry["note"] = f"限时{dm.group(1)}折"
+                    else:
+                        i += 1
+                        continue
+                seen.add(prev_tool)
+                items.append(entry)
+
+        i += 1
+
+    # 处理最后一个工具(最后一段没有后续 API 标注)
+    if parts:
+        last_seg = parts[-1].strip()
+        # 如果最后一段不是 API 类型,且含价格或免费信息
+        if not api_sep_re.fullmatch(last_seg):
+            # 找最后一个工具名(倒数第二个 API 标注之后的工具名)
+            # 已在循环中处理,这里处理最后一段的价格+工具名情况
+            tool_name_re = re.compile(r"([a-zA-Z][a-zA-Z0-9_:]*(?:\.[a-zA-Z0-9_:]+)*)$")
+            m = tool_name_re.search(last_seg)
+            if m:
+                last_tool = m.group(1)
+                last_price_part = last_seg[: m.start()].strip()
+                if last_tool not in seen and (free_re.search(last_price_part) or price_re.search(last_price_part)):
+                    entry = {"label": last_tool, "currency": "CNY", "unit": "元/千次调用"}
+                    if free_re.search(last_price_part) and not price_re.search(last_price_part):
+                        entry["price"] = 0
+                        entry["note"] = "限时免费"
+                    else:
+                        nums = price_re.findall(last_price_part)
+                        if nums:
+                            try:
+                                entry["price"] = float(nums[0])
+                            except Exception:
+                                entry["price"] = nums[0]
+                            if re.search(r"限时优惠", last_price_part):
+                                entry["note"] = "限时优惠"
+                    seen.add(last_tool)
+                    items.append(entry)
+
+    return items
+
+
+def _get_tool_price_section_text(html: str) -> str:
+    """
+    专门定位"工具调用价格"区块文本,排除 script/style。
+    工具调用价格是独立区块,标题为"工具调用价格",不在"模型价格"区块内。
+    """
+    try:
+        from bs4 import BeautifulSoup, FeatureNotFound
+        try:
+            soup = BeautifulSoup(html, "lxml")
+        except FeatureNotFound:
+            soup = BeautifulSoup(html, "html.parser")
+
+        # 优先找"工具调用价格"标题节点
+        target_node = None
+        for node in soup.find_all(string=re.compile(r"工具调用价格")):
+            if node.parent and node.parent.name in ("script", "style"):
+                continue
+            target_node = node
+            break
+
+        if not target_node:
+            return ""
+
+        # 向上找包含价格数字的容器
+        ancestor = target_node.parent
+        for _ in range(10):
+            txt = ancestor.get_text(separator="\n")
+            if ("元" in txt or "免费" in txt) and len(txt) > 50:
+                return txt
+            if ancestor.parent:
+                ancestor = ancestor.parent
+            else:
+                break
+        return ancestor.get_text(separator="\n")
+    except Exception:
+        return ""
+
+
+def scrape_tool_prices_standalone(
+    url: str,
+    headless: bool = True,
+    timeout: int = 20000,
+    executable_path: Optional[str] = None,
+) -> Dict:
+    """
+    独立运行:启动浏览器,导航,抓取工具调用价格后关闭。
+
+    返回:
+    {
+      "url": str,
+      "error": str | None,
+      "tool_call_prices": [
+        {"label": "搜索策略", "price": 0.5, "unit": "元/千次调用", "currency": "CNY"},
+        ...
+      ]
+    }
+    """
+    from playwright.sync_api import sync_playwright
+
+    result: Dict = {"url": url, "error": None, "tool_call_prices": []}
+
+    with sync_playwright() as p:
+        launch_kwargs: Dict = {"headless": headless}
+        if executable_path:
+            launch_kwargs["executable_path"] = executable_path
+        browser = p.chromium.launch(**launch_kwargs)
+        page = browser.new_context().new_page()
+
+        try:
+            page.goto(url, wait_until="networkidle", timeout=timeout)
+        except PlaywrightTimeoutError:
+            try:
+                page.goto(url, wait_until="load", timeout=timeout)
+            except Exception as e:
+                result["error"] = f"导航失败: {e}"
+                browser.close()
+                return result
+
+        try:
+            page.wait_for_selector("text=模型价格", timeout=8000)
+        except PlaywrightTimeoutError:
+            pass
+        time.sleep(1.2)
+
+        html = page.content()
+        price_text = _get_tool_price_section_text(html)
+
+        if not price_text:
+            # 尝试滚动后重试
+            try:
+                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                time.sleep(1.5)
+                html = page.content()
+                price_text = _get_tool_price_section_text(html)
+            except Exception:
+                pass
+
+        if not price_text:
+            result["error"] = "未找到工具调用价格区域"
+            browser.close()
+            return result
+
+        print(f"[DEBUG] 工具调用价格区域文本:\n{price_text[:300]}")
+        result["tool_call_prices"] = parse_tool_prices_from_text(price_text)
+
+        browser.close()
+
+    return result
+
+
+if __name__ == "__main__":
+    import argparse, os
+
+    ap = argparse.ArgumentParser(description="抓取阿里云模型工具调用价格")
+    group = ap.add_mutually_exclusive_group(required=True)
+    group.add_argument("--url")
+    group.add_argument("--file")
+    ap.add_argument("--headful", action="store_true")
+    ap.add_argument("--timeout", type=int, default=20000)
+    ap.add_argument("--browser-path")
+    args = ap.parse_args()
+
+    urls = [args.url] if args.url else open(args.file, encoding="utf-8").read().splitlines()
+    urls = [u.strip() for u in urls if u.strip()]
+
+    exec_path = args.browser_path or os.environ.get("PLAYWRIGHT_EXECUTABLE")
+    headless = not args.headful
+
+    results = []
+    for u in urls:
+        print(f"抓取工具调用价格: {u}", flush=True)
+        results.append(scrape_tool_prices_standalone(u, headless=headless, timeout=args.timeout, executable_path=exec_path))
+
+    print(json.dumps(results, ensure_ascii=False, indent=2))

+ 9 - 0
backend/migrations/002_models.sql

@@ -0,0 +1,9 @@
+-- Migration 002: model registry
+SET search_path TO crawl;
+
+CREATE TABLE IF NOT EXISTS models (
+    id         BIGSERIAL    PRIMARY KEY,
+    name       VARCHAR(200) NOT NULL,
+    url        TEXT         NOT NULL UNIQUE,
+    created_at TIMESTAMPTZ  NOT NULL DEFAULT NOW()
+);

+ 15 - 0
backend/migrations/003_schedule.sql

@@ -0,0 +1,15 @@
+-- Migration 003: scrape schedule config
+SET search_path TO crawl;
+
+CREATE TABLE IF NOT EXISTS scrape_schedule (
+    id           SERIAL      PRIMARY KEY,
+    enabled      BOOLEAN     NOT NULL DEFAULT FALSE,
+    interval_days INT        NOT NULL DEFAULT 1,   -- 每隔多少天
+    start_hour   SMALLINT    NOT NULL DEFAULT 2,   -- 每天几点开始(0-23)
+    updated_at   TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- 只保留一行配置
+INSERT INTO scrape_schedule (id, enabled, interval_days, start_hour)
+VALUES (1, FALSE, 1, 2)
+ON CONFLICT (id) DO NOTHING;

+ 7 - 0
backend/migrations/004_scrape_results_extend.sql

@@ -0,0 +1,7 @@
+-- Migration 004: extend scrape_results with model info, rate limits, tool prices
+SET search_path TO crawl;
+
+ALTER TABLE scrape_results
+    ADD COLUMN IF NOT EXISTS model_info  JSONB,
+    ADD COLUMN IF NOT EXISTS rate_limits JSONB,
+    ADD COLUMN IF NOT EXISTS tool_prices JSONB;

+ 5 - 0
backend/migrations/005_raw_data.sql

@@ -0,0 +1,5 @@
+-- Migration 005: store full raw scrape result
+SET search_path TO crawl;
+
+ALTER TABLE scrape_results
+    ADD COLUMN IF NOT EXISTS raw_data JSONB;

+ 4 - 0
backend/migrations/006_access_logs_org.sql

@@ -0,0 +1,4 @@
+-- Migration 006: add org column to access_logs for ASN organization info
+SET search_path TO crawl;
+
+ALTER TABLE access_logs ADD COLUMN IF NOT EXISTS org VARCHAR(200);

+ 15 - 0
backend/migrations/007_price_api_logs.sql

@@ -0,0 +1,15 @@
+-- Migration 007: track /prices API callers
+SET search_path TO crawl;
+
+CREATE TABLE IF NOT EXISTS price_api_logs (
+    id         BIGSERIAL    PRIMARY KEY,
+    ip         VARCHAR(45)  NOT NULL,
+    referer    TEXT,
+    org        VARCHAR(200),
+    country    VARCHAR(100),
+    city       VARCHAR(100),
+    created_at TIMESTAMPTZ  NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_price_api_logs_created_at ON price_api_logs (created_at DESC);
+CREATE INDEX IF NOT EXISTS idx_price_api_logs_ip ON price_api_logs (ip);

+ 13 - 0
backend/migrations/008_discounts.sql

@@ -0,0 +1,13 @@
+-- Migration 008: per-domain discount table
+SET search_path TO crawl;
+
+CREATE TABLE IF NOT EXISTS discounts (
+    id         BIGSERIAL    PRIMARY KEY,
+    domain     VARCHAR(255) NOT NULL UNIQUE,
+    discount   NUMERIC(5,4) NOT NULL CHECK (discount > 0 AND discount <= 1),
+    note       TEXT,
+    created_at TIMESTAMPTZ  NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ  NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX IF NOT EXISTS idx_discounts_domain ON discounts (domain);

+ 1 - 0
backend/requirements.txt

@@ -10,3 +10,4 @@ pytest
 pytest-asyncio
 hypothesis
 httpx
+apscheduler

+ 0 - 7
backend/scrape_aliyun_models.py

@@ -172,13 +172,6 @@ def _get_tier_options(page) -> List[str]:
         return []
     print("[DEBUG] 已展开阶梯计费下拉")
 
-    # 截图:点击后立即看看页面状态
-    try:
-        page.screenshot(path="debug_after_click.png", full_page=False)
-        print("[DEBUG] 已保存点击后截图 debug_after_click.png")
-    except Exception:
-        pass
-
     # 打印点击后所有可见容器的 class,帮助定位下拉 portal
     try:
         containers = page.evaluate(

+ 71 - 0
backend/test_price_parser.py

@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""
+测试 price_parser.py 的解析逻辑
+"""
+
+import json
+from app.utils.price_parser import parse_prices
+
+# ── 测试数据1:qwen3-max 阶梯计费 ──
+prices_qwen3_max = {
+    "input<=32k": {
+        "输入": {"raw": "2.5", "unit": "元/每百万tokens", "price": 2.5, "currency": "CNY"},
+        "输出": {"raw": "10", "unit": "元/每百万tokens", "price": 10.0, "currency": "CNY"},
+    },
+    "32k<input<=128k": {
+        "输入": {"raw": "4", "unit": "元/每百万tokens", "price": 4.0, "currency": "CNY"},
+        "输出": {"raw": "16", "unit": "元/每百万tokens", "price": 16.0, "currency": "CNY"},
+    },
+    "128k<input<=256k": {
+        "输入": {"raw": "7", "unit": "元/每百万tokens", "price": 7.0, "currency": "CNY"},
+        "输出": {"raw": "28", "unit": "元/每百万tokens", "price": 28.0, "currency": "CNY"},
+    },
+}
+
+# ── 测试数据2:wan2.6-i2v 按单位计费 ──
+prices_wan26_i2v = {
+    "视频生成(720P)": {"raw": "0.6", "unit": "元/每秒", "price": 0.6, "currency": "CNY"},
+    "视频生成(1080P)": {"raw": "1", "unit": "元/每秒", "price": 1.0, "currency": "CNY"},
+}
+
+# ── 测试数据3:非阶梯(假设某个简单模型) ──
+prices_simple = {
+    "输入": {"raw": "0.5", "unit": "元/每百万tokens", "price": 0.5, "currency": "CNY"},
+    "输出": {"raw": "2.0", "unit": "元/每百万tokens", "price": 2.0, "currency": "CNY"},
+}
+
+
+def test_case(name: str, prices: dict):
+    print(f"\n{'='*60}")
+    print(f"测试:{name}")
+    print(f"{'='*60}")
+    print("输入 prices:")
+    print(json.dumps(prices, ensure_ascii=False, indent=2))
+    print("\n解析结果:")
+    result = parse_prices(prices)
+    print(json.dumps(result, ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    test_case("qwen3-max 阶梯计费", prices_qwen3_max)
+    test_case("wan2.6-i2v 按单位计费", prices_wan26_i2v)
+    test_case("简单非阶梯", prices_simple)
+
+
+# ── 测试数据4:qwen-flash 含无上限阶梯 ──
+prices_qwen_flash = {
+    "input<=32k": {
+        "输入": {"raw": "0.15", "unit": "元/每百万tokens", "price": 0.15, "currency": "CNY"},
+        "输出": {"raw": "0.6", "unit": "元/每百万tokens", "price": 0.6, "currency": "CNY"},
+    },
+    "32k<input<=256k": {
+        "输入": {"raw": "0.6", "unit": "元/每百万tokens", "price": 0.6, "currency": "CNY"},
+        "输出": {"raw": "2.4", "unit": "元/每百万tokens", "price": 2.4, "currency": "CNY"},
+    },
+    "256k<input": {
+        "输入": {"raw": "1.2", "unit": "元/每百万tokens", "price": 1.2, "currency": "CNY"},
+        "输出": {"raw": "4.8", "unit": "元/每百万tokens", "price": 4.8, "currency": "CNY"},
+    },
+}
+
+test_case("qwen-flash 含无上限阶梯", prices_qwen_flash)

+ 4 - 2
frontend/src/App.tsx

@@ -1,6 +1,7 @@
 import { BrowserRouter, Route, Routes } from 'react-router-dom';
 import { BottomNav } from './components/BottomNav';
 import { Dashboard } from './pages/Dashboard';
+import { Discounts } from './pages/Discounts';
 import { Logs } from './pages/Logs';
 import { MapPage } from './pages/Map';
 import { Scraper } from './pages/Scraper';
@@ -9,15 +10,16 @@ import './index.css';
 export default function App() {
   return (
     <BrowserRouter>
-      <div style={{ paddingBottom: 64, minHeight: '100vh' }}>
+      <BottomNav />
+      <div style={{ marginLeft: 64, minHeight: '100vh' }}>
         <Routes>
           <Route path="/" element={<Dashboard />} />
           <Route path="/logs" element={<Logs />} />
           <Route path="/map" element={<MapPage />} />
           <Route path="/scraper" element={<Scraper />} />
+          <Route path="/discounts" element={<Discounts />} />
         </Routes>
       </div>
-      <BottomNav />
     </BrowserRouter>
   );
 }

+ 57 - 0
frontend/src/api.ts

@@ -29,3 +29,60 @@ export async function postScrape(urls: string[]): Promise<ScrapeJob> {
   if (!res.ok) throw new Error(`POST /api/scrape failed: ${res.status}`);
   return res.json() as Promise<ScrapeJob>;
 }
+
+export interface Model { id: number; name: string; url: string; created_at: string; }
+
+export const fetchModels = () => get<Model[]>('/api/models');
+
+export async function createModel(name: string, url: string): Promise<Model> {
+  const res = await fetch(`${BASE}/api/models`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ name, url }),
+  });
+  if (!res.ok) throw new Error(`添加失败: ${res.status}`);
+  return res.json() as Promise<Model>;
+}
+
+export async function deleteModel(id: number): Promise<void> {
+  const res = await fetch(`${BASE}/api/models/${id}`, { method: 'DELETE' });
+  if (!res.ok) throw new Error(`删除失败: ${res.status}`);
+}
+
+export interface Schedule {
+  enabled: boolean;
+  interval_days: number;
+  start_hour: number;
+  updated_at: string;
+}
+
+export const fetchSchedule = () => get<Schedule>('/api/schedule');
+
+export async function updateSchedule(data: Omit<Schedule, 'updated_at'>): Promise<Schedule> {
+  const res = await fetch(`${BASE}/api/schedule`, {
+    method: 'PUT',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify(data),
+  });
+  if (!res.ok) throw new Error(`更新失败: ${res.status}`);
+  return res.json() as Promise<Schedule>;
+}
+
+import type { Discount } from './types';
+
+export const fetchDiscounts = () => get<Discount[]>('/api/discounts');
+
+export async function upsertDiscount(domain: string, discount: number, note?: string): Promise<Discount> {
+  const res = await fetch(`${BASE}/api/discounts`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ domain, discount, note }),
+  });
+  if (!res.ok) throw new Error(`保存失败: ${res.status}`);
+  return res.json() as Promise<Discount>;
+}
+
+export async function deleteDiscount(id: number): Promise<void> {
+  const res = await fetch(`${BASE}/api/discounts/${id}`, { method: 'DELETE' });
+  if (!res.ok) throw new Error(`删除失败: ${res.status}`);
+}

+ 17 - 8
frontend/src/components/BottomNav.css

@@ -1,34 +1,43 @@
 .bottom-nav {
   position: fixed;
-  bottom: 0;
+  top: 0;
   left: 0;
-  right: 0;
+  bottom: 0;
+  width: 64px;
   display: flex;
+  flex-direction: column;
+  align-items: center;
+  padding: 24px 0;
   background: var(--bg-card);
-  border-top: 1px solid var(--bg-border);
+  border-right: 1px solid var(--bg-border);
   z-index: 100;
+  gap: 8px;
 }
 
 .nav-item {
-  flex: 1;
+  width: 100%;
   display: flex;
   flex-direction: column;
   align-items: center;
   justify-content: center;
-  padding: 10px 4px;
+  padding: 12px 4px;
   color: var(--text-muted);
-  font-size: 12px;
-  letter-spacing: 0.08em;
-  transition: color 0.2s;
+  font-size: 10px;
+  letter-spacing: 0.06em;
+  transition: color 0.2s, background 0.2s;
   text-decoration: none;
+  border-radius: 0;
 }
 
 .nav-item:hover {
   color: var(--neon-cyan);
+  background: rgba(0, 212, 255, 0.05);
 }
 
 .nav-item--active {
   color: var(--neon-cyan);
+  border-left: 2px solid var(--neon-cyan);
+  background: rgba(0, 212, 255, 0.08);
 }
 
 .nav-item--active .nav-icon {

+ 5 - 4
frontend/src/components/BottomNav.tsx

@@ -2,10 +2,11 @@ import { NavLink } from 'react-router-dom';
 import './BottomNav.css';
 
 const NAV_ITEMS = [
-  { to: '/', label: 'DASHBOARD', icon: '⊞' },
-  { to: '/logs', label: 'LOGS', icon: '≡' },
-  { to: '/map', label: 'MAP', icon: '◎' },
-  { to: '/scraper', label: 'SCRAPER', icon: '⟳' },
+  { to: '/', label: '仪表盘', icon: '⊞' },
+  { to: '/logs', label: '日志', icon: '≡' },
+  { to: '/map', label: '地图', icon: '◎' },
+  { to: '/scraper', label: '爬取', icon: '⟳' },
+  { to: '/discounts', label: '折扣', icon: '%' },
 ];
 
 export function BottomNav() {

+ 7 - 1
frontend/src/pages/Dashboard.css

@@ -110,7 +110,7 @@
 
 .geo-item {
   display: grid;
-  grid-template-columns: 140px 1fr 44px;
+  grid-template-columns: 140px 1fr 52px 52px;
   align-items: center;
   gap: 10px;
   font-size: 13px;
@@ -137,8 +137,14 @@
 }
 
 .geo-pct {
+  color: var(--neon-cyan);
+  text-align: right;
+}
+
+.geo-pct--dim {
   color: var(--text-muted);
   text-align: right;
+}
   font-size: 12px;
 }
 

+ 21 - 10
frontend/src/pages/Dashboard.tsx

@@ -1,3 +1,4 @@
+import { useEffect, useState } from 'react';
 import { fetchTopPriceIps, fetchStats } from '../api';
 import { usePolling } from '../hooks/usePolling';
 import './Dashboard.css';
@@ -12,35 +13,44 @@ function formatUptime(seconds: number): string {
 export function Dashboard() {
   const { data: stats } = usePolling(fetchStats, 5000);
   const { data: topIps } = usePolling(fetchTopPriceIps, 10000);
+  const [displayUptime, setDisplayUptime] = useState<number | null>(null);
+
+  // 每次从后端拿到 uptime 后,本地每秒递增
+  useEffect(() => {
+    if (stats == null) return;
+    setDisplayUptime(stats.uptime_seconds);
+    const timer = setInterval(() => setDisplayUptime(v => (v ?? 0) + 1), 1000);
+    return () => clearInterval(timer);
+  }, [stats?.uptime_seconds != null ? Math.floor(stats.uptime_seconds / 5) : null]);
 
   return (
     <div className="dashboard">
       <header className="dash-header">
-        <span className="dash-logo">◈ SENTINEL_LENS</span>
+        <span className="dash-logo">◈ 哨兵监控</span>
         <span className="dash-status">
-          <span className="dot dot--green" /> OPERATIONAL
+          <span className="dot dot--green" /> 运行中
         </span>
       </header>
 
       <div className="stat-grid">
         <div className="stat-card">
-          <div className="stat-label">SYSTEM_UPTIME</div>
-          <div className="stat-value">{stats ? formatUptime(stats.uptime_seconds) : '--:--:--'}</div>
+          <div className="stat-label">系统运行时间</div>
+          <div className="stat-value">{displayUptime != null ? formatUptime(displayUptime) : '--:--:--'}</div>
         </div>
         <div className="stat-card">
-          <div className="stat-label">TOTAL_HITS</div>
+          <div className="stat-label">价格接口请求数</div>
           <div className="stat-value neon-green">
             {stats ? stats.total_hits.toLocaleString() : '—'}
           </div>
         </div>
         <div className="stat-card">
-          <div className="stat-label">ACTIVE_IPS</div>
+          <div className="stat-label">活跃 IP 数</div>
           <div className="stat-value neon-cyan">
             <span className="blink">✦</span> {stats ? stats.active_ips : '—'}
           </div>
         </div>
         <div className="stat-card">
-          <div className="stat-label">AVG_LATENCY</div>
+          <div className="stat-label">平均延迟</div>
           <div className="stat-value neon-cyan">
             <span className="blink">◈</span> {stats ? `${stats.avg_latency_ms.toFixed(0)}ms` : '—'}
           </div>
@@ -49,7 +59,7 @@ export function Dashboard() {
 
       <section className="geo-section">
         <div className="section-title">
-          PRICE_API_CALLERS <span className="globe">📡</span>
+          价格接口调用来源 <span className="globe">📡</span>
         </div>
         {topIps && topIps.length > 0 ? (
           <ul className="geo-list">
@@ -59,12 +69,13 @@ export function Dashboard() {
                 <div className="geo-bar-wrap">
                   <div className="geo-bar" style={{ width: `${item.percentage}%` }} />
                 </div>
-                <span className="geo-pct">{item.hit_count}</span>
+                <span className="geo-pct">{item.hit_count} 次</span>
+                <span className="geo-pct geo-pct--dim">{item.percentage}%</span>
               </li>
             ))}
           </ul>
         ) : (
-          <div className="empty-msg">No price API calls yet</div>
+          <div className="empty-msg">暂无价格接口调用记录</div>
         )}
       </section>
     </div>

+ 118 - 0
frontend/src/pages/Discounts.css

@@ -0,0 +1,118 @@
+.discounts-page {
+  padding: 24px;
+  color: #e6edf3;
+}
+
+.discounts-header {
+  display: flex;
+  align-items: center;
+  margin-bottom: 20px;
+}
+
+.discounts-title {
+  font-size: 18px;
+  font-weight: 600;
+  letter-spacing: 0.05em;
+  color: #00d4ff;
+}
+
+.discount-form {
+  display: flex;
+  gap: 8px;
+  flex-wrap: wrap;
+  margin-bottom: 12px;
+}
+
+.discount-input {
+  background: #161b22;
+  border: 1px solid #30363d;
+  border-radius: 6px;
+  color: #e6edf3;
+  padding: 8px 12px;
+  font-size: 13px;
+  flex: 1;
+  min-width: 160px;
+}
+
+.discount-input--short {
+  flex: 0 0 120px;
+  min-width: 0;
+}
+
+.discount-input:focus {
+  outline: none;
+  border-color: #00d4ff;
+}
+
+.discount-btn {
+  background: #21262d;
+  border: 1px solid #30363d;
+  border-radius: 6px;
+  color: #e6edf3;
+  padding: 8px 16px;
+  font-size: 13px;
+  cursor: pointer;
+  white-space: nowrap;
+}
+
+.discount-btn--primary {
+  background: #00d4ff22;
+  border-color: #00d4ff;
+  color: #00d4ff;
+}
+
+.discount-btn--danger {
+  border-color: #f85149;
+  color: #f85149;
+}
+
+.discount-btn--sm {
+  padding: 4px 10px;
+  font-size: 12px;
+}
+
+.discount-btn:hover {
+  opacity: 0.8;
+}
+
+.discount-error {
+  color: #f85149;
+  font-size: 13px;
+  margin-bottom: 12px;
+}
+
+.discount-table-wrap {
+  overflow-x: auto;
+}
+
+.discount-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: 13px;
+}
+
+.discount-table th {
+  text-align: left;
+  padding: 10px 12px;
+  border-bottom: 1px solid #30363d;
+  color: #8b949e;
+  font-weight: 500;
+}
+
+.discount-table td {
+  padding: 10px 12px;
+  border-bottom: 1px solid #21262d;
+}
+
+.td-domain { color: #00d4ff; font-family: monospace; }
+.td-discount { color: #f0c040; }
+.td-note { color: #8b949e; }
+.td-time { color: #8b949e; font-size: 12px; }
+.td-actions { display: flex; gap: 6px; }
+
+.empty-msg {
+  text-align: center;
+  color: #8b949e;
+  padding: 40px 0;
+  font-size: 13px;
+}

+ 117 - 0
frontend/src/pages/Discounts.tsx

@@ -0,0 +1,117 @@
+import { useEffect, useState } from 'react';
+import { deleteDiscount, fetchDiscounts, upsertDiscount } from '../api';
+import type { Discount } from '../types';
+import './Discounts.css';
+
+export function Discounts() {
+  const [list, setList] = useState<Discount[]>([]);
+  const [domain, setDomain] = useState('');
+  const [discount, setDiscount] = useState('');
+  const [note, setNote] = useState('');
+  const [editing, setEditing] = useState<Discount | null>(null);
+  const [error, setError] = useState('');
+
+  const load = () => fetchDiscounts().then(setList).catch(() => {});
+
+  useEffect(() => { load(); }, []);
+
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    setError('');
+    const d = parseFloat(discount);
+    if (isNaN(d) || d <= 0 || d > 1) {
+      setError('折扣系数需在 0~1 之间,如 0.8 表示八折');
+      return;
+    }
+    try {
+      await upsertDiscount(domain.trim(), d, note.trim() || undefined);
+      setDomain(''); setDiscount(''); setNote(''); setEditing(null);
+      load();
+    } catch (err: any) {
+      setError(err.message);
+    }
+  };
+
+  const startEdit = (item: Discount) => {
+    setEditing(item);
+    setDomain(item.domain);
+    setDiscount(String(item.discount));
+    setNote(item.note ?? '');
+  };
+
+  const handleDelete = async (id: number) => {
+    await deleteDiscount(id);
+    load();
+  };
+
+  return (
+    <div className="discounts-page">
+      <header className="discounts-header">
+        <span className="discounts-title">折扣管理</span>
+      </header>
+
+      <form className="discount-form" onSubmit={handleSubmit}>
+        <input
+          className="discount-input"
+          placeholder="域名,如 aigc.wangxunai.com"
+          value={domain}
+          onChange={e => setDomain(e.target.value)}
+          required
+        />
+        <input
+          className="discount-input discount-input--short"
+          placeholder="折扣系数,如 0.8"
+          value={discount}
+          onChange={e => setDiscount(e.target.value)}
+          required
+        />
+        <input
+          className="discount-input"
+          placeholder="备注(可选)"
+          value={note}
+          onChange={e => setNote(e.target.value)}
+        />
+        <button className="discount-btn discount-btn--primary" type="submit">
+          {editing ? '保存' : '添加'}
+        </button>
+        {editing && (
+          <button className="discount-btn" type="button" onClick={() => {
+            setEditing(null); setDomain(''); setDiscount(''); setNote('');
+          }}>取消</button>
+        )}
+      </form>
+      {error && <div className="discount-error">{error}</div>}
+
+      <div className="discount-table-wrap">
+        <table className="discount-table">
+          <thead>
+            <tr>
+              <th>域名</th>
+              <th>折扣系数</th>
+              <th>折扣</th>
+              <th>备注</th>
+              <th>更新时间</th>
+              <th>操作</th>
+            </tr>
+          </thead>
+          <tbody>
+            {list.map(item => (
+              <tr key={item.id}>
+                <td className="td-domain">{item.domain}</td>
+                <td className="td-discount">{item.discount}</td>
+                <td className="td-discount">{Math.round(item.discount * 10)}折</td>
+                <td className="td-note">{item.note ?? '—'}</td>
+                <td className="td-time">{new Date(item.updated_at).toLocaleString()}</td>
+                <td className="td-actions">
+                  <button className="discount-btn discount-btn--sm" onClick={() => startEdit(item)}>编辑</button>
+                  <button className="discount-btn discount-btn--sm discount-btn--danger" onClick={() => handleDelete(item.id)}>删除</button>
+                </td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+        {list.length === 0 && <div className="empty-msg">暂无折扣配置,所有域名按原价返回</div>}
+      </div>
+    </div>
+  );
+}

+ 8 - 8
frontend/src/pages/Logs.tsx

@@ -37,9 +37,9 @@ export function Logs() {
   return (
     <div className="logs-page">
       <header className="logs-header">
-        <span className="logs-title">LIVE_TELEMETRY</span>
+        <span className="logs-title">实时访问日志</span>
         <span className={`ws-badge ${connected ? 'ws-badge--on' : 'ws-badge--off'}`}>
-          <span className="dot dot--sm" /> {connected ? 'STREAMING' : 'RECONNECTING'}
+          <span className="dot dot--sm" /> {connected ? '实时推送' : '重连中'}
         </span>
       </header>
 
@@ -47,12 +47,12 @@ export function Logs() {
         <table className="logs-table">
           <thead>
             <tr>
-              <th>TIME</th>
+              <th>时间</th>
               <th>IP</th>
-              <th>LOCATION</th>
-              <th>METHOD</th>
-              <th>PATH</th>
-              <th>STATUS</th>
+              <th>地区</th>
+              <th>方法</th>
+              <th>路径</th>
+              <th>状态码</th>
             </tr>
           </thead>
           <tbody>
@@ -68,7 +68,7 @@ export function Logs() {
             ))}
           </tbody>
         </table>
-        {merged.length === 0 && <div className="empty-msg">Waiting for traffic…</div>}
+        {merged.length === 0 && <div className="empty-msg">等待请求流入…</div>}
       </div>
     </div>
   );

+ 1 - 1
frontend/src/pages/Map.css

@@ -1,7 +1,7 @@
 .map-page {
   display: flex;
   flex-direction: column;
-  height: calc(100vh - 64px);
+  height: 100vh;
   padding: 16px;
 }
 

+ 373 - 8
frontend/src/pages/Scraper.css

@@ -1,7 +1,194 @@
 .scraper-page {
   padding: 16px;
-  max-width: 700px;
-  margin: 0 auto;
+  height: calc(100vh - 64px);
+  box-sizing: border-box;
+  overflow: hidden;
+}
+
+.scraper-layout {
+  display: grid;
+  grid-template-columns: 280px 1fr;
+  gap: 16px;
+  height: 100%;
+  overflow: hidden;
+}
+
+/* 左侧模型列表 */
+.model-sidebar {
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+  background: var(--bg-card);
+  border: 1px solid var(--bg-border);
+  border-radius: 4px;
+  padding: 14px;
+  overflow-y: auto;
+  height: 100%;
+  min-height: 0;
+  box-sizing: border-box;
+}
+
+.sidebar-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.sidebar-title {
+  font-size: 12px;
+  color: var(--text-muted);
+  letter-spacing: 0.08em;
+}
+
+.icon-btn {
+  background: transparent;
+  border: 1px solid var(--neon-cyan);
+  color: var(--neon-cyan);
+  width: 24px;
+  height: 24px;
+  border-radius: 4px;
+  font-size: 16px;
+  line-height: 1;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.icon-btn:hover { background: rgba(0, 212, 255, 0.1); }
+
+.add-form {
+  display: flex;
+  flex-direction: column;
+  gap: 6px;
+  padding: 10px;
+  background: var(--bg-panel, #0d1117);
+  border: 1px solid var(--bg-border);
+  border-radius: 4px;
+}
+
+.add-input {
+  background: var(--bg-card);
+  border: 1px solid var(--bg-border);
+  border-radius: 3px;
+  color: var(--text-primary);
+  font-family: var(--font-mono);
+  font-size: 12px;
+  padding: 6px 8px;
+  outline: none;
+}
+
+.add-input:focus { border-color: var(--neon-cyan); }
+
+.add-error { color: var(--neon-red); font-size: 11px; }
+
+.add-actions { display: flex; gap: 6px; }
+
+.add-confirm-btn {
+  flex: 1;
+  background: transparent;
+  border: 1px solid var(--neon-green);
+  color: var(--neon-green);
+  padding: 5px;
+  font-size: 11px;
+  border-radius: 3px;
+  cursor: pointer;
+}
+
+.add-confirm-btn:hover { background: rgba(0, 255, 136, 0.1); }
+
+.add-cancel-btn {
+  flex: 1;
+  background: transparent;
+  border: 1px solid var(--bg-border);
+  color: var(--text-muted);
+  padding: 5px;
+  font-size: 11px;
+  border-radius: 3px;
+  cursor: pointer;
+}
+
+.model-list-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  font-size: 11px;
+  color: var(--text-muted);
+  padding-bottom: 6px;
+  border-bottom: 1px solid var(--bg-border);
+}
+
+.check-label {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  cursor: pointer;
+  font-size: 12px;
+}
+
+.selected-count { color: var(--neon-cyan); font-size: 11px; }
+
+.model-list {
+  list-style: none;
+  flex: 1;
+  overflow-y: auto;
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+  min-height: 0;
+}
+
+.model-item {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 6px 8px;
+  border-radius: 3px;
+  border: 1px solid transparent;
+  transition: border-color 0.15s;
+  flex-shrink: 0;
+}
+
+.model-item:hover { border-color: var(--bg-border); }
+.model-item--selected { border-color: var(--neon-cyan) !important; background: rgba(0, 212, 255, 0.05); }
+
+.model-name {
+  font-size: 12px;
+  color: var(--text-primary);
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+  max-width: 170px;
+}
+
+.del-btn {
+  background: transparent;
+  border: none;
+  color: var(--text-muted);
+  font-size: 11px;
+  cursor: pointer;
+  padding: 2px 4px;
+  border-radius: 2px;
+  flex-shrink: 0;
+}
+
+.del-btn:hover { color: var(--neon-red); }
+
+.scrape-btn {
+  width: 100%;
+  text-align: center;
+  flex-shrink: 0;
+  align-self: auto;
+}
+
+/* 右侧主区域 */
+.scraper-main {
+  overflow-y: auto;
+  height: 100%;
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
+  min-height: 0;
 }
 
 .scraper-header {
@@ -188,21 +375,40 @@
 }
 
 .history-item {
-  display: grid;
-  grid-template-columns: 100px 80px 1fr;
-  align-items: center;
-  gap: 10px;
-  padding: 8px 12px;
+  display: flex;
+  flex-direction: column;
   background: var(--bg-card);
   border: 1px solid var(--bg-border);
   border-radius: 4px;
-  cursor: pointer;
   font-size: 11px;
   transition: border-color 0.2s;
+  overflow: hidden;
 }
 
 .history-item:hover { border-color: var(--neon-cyan); }
 
+.history-row {
+  display: grid;
+  grid-template-columns: 100px 80px 1fr auto;
+  align-items: center;
+  gap: 10px;
+  padding: 8px 12px;
+  cursor: pointer;
+}
+
+.history-toggle {
+  color: var(--text-muted);
+  font-size: 10px;
+}
+
+.history-detail {
+  border-top: 1px solid var(--bg-border);
+  padding: 12px;
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+}
+
 .history-id { color: var(--neon-cyan); }
 .history-time { color: var(--text-muted); text-align: right; }
 
@@ -210,3 +416,162 @@
 .history-item--failed .history-status { color: var(--neon-red); }
 .history-item--running .history-status,
 .history-item--pending .history-status { color: #ffaa00; }
+
+/* 定时爬取配置 */
+.schedule-box {
+  border: 1px solid var(--bg-border);
+  border-radius: 4px;
+  padding: 10px;
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+  flex-shrink: 0;
+}
+
+.schedule-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.schedule-title {
+  font-size: 11px;
+  color: var(--text-muted);
+  letter-spacing: 0.08em;
+}
+
+.toggle-btn {
+  font-size: 11px;
+  padding: 3px 10px;
+  border-radius: 20px;
+  border: 1px solid var(--bg-border);
+  background: transparent;
+  color: var(--text-muted);
+  cursor: pointer;
+  transition: all 0.2s;
+}
+
+.toggle-btn--on {
+  border-color: var(--neon-green);
+  color: var(--neon-green);
+  background: rgba(0, 255, 136, 0.08);
+}
+
+.toggle-btn:disabled { opacity: 0.5; cursor: not-allowed; }
+
+.schedule-row {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+}
+
+.schedule-label {
+  font-size: 11px;
+  color: var(--text-muted);
+  white-space: nowrap;
+  width: 36px;
+}
+
+.schedule-input {
+  background: var(--bg-card);
+  border: 1px solid var(--bg-border);
+  border-radius: 3px;
+  color: var(--text-primary);
+  font-family: var(--font-mono);
+  font-size: 12px;
+  padding: 4px 6px;
+  width: 52px;
+  outline: none;
+  text-align: center;
+}
+
+.schedule-input--sm { width: 52px; }
+.schedule-input:focus { border-color: var(--neon-cyan); }
+
+.schedule-unit {
+  font-size: 11px;
+  color: var(--text-muted);
+}
+
+/* 模型详情卡片各区块 */
+.info-section {
+  margin-top: 10px;
+  padding-top: 10px;
+  border-top: 1px solid var(--bg-border);
+}
+
+.info-section-title {
+  font-size: 10px;
+  color: var(--text-muted);
+  letter-spacing: 0.08em;
+  margin-bottom: 6px;
+}
+
+.info-desc {
+  font-size: 11px;
+  color: var(--text-primary);
+  line-height: 1.5;
+  margin-bottom: 6px;
+}
+
+.tag-row {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 4px;
+  margin-bottom: 6px;
+}
+
+.tag {
+  font-size: 10px;
+  padding: 2px 7px;
+  border-radius: 10px;
+  border: 1px solid var(--neon-cyan);
+  color: var(--neon-cyan);
+}
+
+.modality-row {
+  display: flex;
+  gap: 12px;
+  margin-bottom: 6px;
+}
+
+.modality {
+  font-size: 11px;
+  color: var(--text-muted);
+}
+
+.feature-grid {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 4px;
+  margin-top: 4px;
+}
+
+.feature-item {
+  font-size: 10px;
+  padding: 2px 7px;
+  border-radius: 3px;
+  border: 1px solid var(--bg-border);
+}
+
+.feature-item--on  { color: var(--neon-green); border-color: var(--neon-green); }
+.feature-item--off { color: var(--text-muted); opacity: 0.5; }
+
+.kv-grid {
+  display: grid;
+  grid-template-columns: repeat(2, 1fr);
+  gap: 4px 12px;
+}
+
+.kv-item {
+  display: flex;
+  justify-content: space-between;
+  font-size: 11px;
+  padding: 3px 0;
+  border-bottom: 1px solid var(--bg-border);
+}
+
+.kv-key { color: var(--text-muted); }
+.kv-val { color: var(--neon-cyan); }
+
+.price-note { color: var(--text-muted); font-size: 10px; }

+ 293 - 87
frontend/src/pages/Scraper.tsx

@@ -1,45 +1,129 @@
 import { useEffect, useRef, useState } from 'react';
-import { fetchScrapeJob, fetchScrapeJobs, postScrape } from '../api';
+import { createModel, deleteModel, fetchModels, fetchScrapeJob, fetchScrapeJobs, fetchSchedule, postScrape, updateSchedule } from '../api';
+import type { Model, Schedule } from '../api';
 import type { ScrapeJob, ScrapeJobDetail } from '../types';
 import './Scraper.css';
 
 function PriceCard({ result }: { result: NonNullable<ScrapeJobDetail['results']>[number] }) {
-  const entries = Object.entries(result.prices);
+  const { model_info, rate_limits, tool_prices, prices } = result;
+
   return (
     <div className="price-card">
       <div className="price-card-title">{result.model_name}</div>
       <div className="price-card-url">{result.url}</div>
-      <div className="price-entries">
-        {entries.length === 0 ? (
-          <span className="no-price">No price data</span>
-        ) : (
-          entries.map(([key, val]) => (
-            <div key={key} className="price-entry">
-              <span className="price-key">{key}</span>
+
+      {/* 模型信息 */}
+      {model_info && (
+        <div className="info-section">
+          <div className="info-section-title">模型信息</div>
+          {model_info.display_tags && model_info.display_tags.length > 0 && (
+            <div className="tag-row">
+              {model_info.display_tags.map(t => <span key={t} className="tag">{t}</span>)}
+            </div>
+          )}
+          {model_info.description && (
+            <div className="info-desc">{model_info.description}</div>
+          )}
+          {(model_info.input_modalities?.length || model_info.output_modalities?.length) ? (
+            <div className="modality-row">
+              {model_info.input_modalities?.length ? (
+                <span className="modality">输入:{model_info.input_modalities.join(' / ')}</span>
+              ) : null}
+              {model_info.output_modalities?.length ? (
+                <span className="modality">输出:{model_info.output_modalities.join(' / ')}</span>
+              ) : null}
+            </div>
+          ) : null}
+          {model_info.features && (
+            <div className="feature-grid">
+              {Object.entries(model_info.features).map(([k, v]) => (
+                <span key={k} className={`feature-item ${v ? 'feature-item--on' : 'feature-item--off'}`}>
+                  {v ? '✓' : '✗'} {k}
+                </span>
+              ))}
+            </div>
+          )}
+        </div>
+      )}
+
+      {/* 限流与上下文 */}
+      {rate_limits && Object.keys(rate_limits).length > 0 && (
+        <div className="info-section">
+          <div className="info-section-title">限流与上下文</div>
+          <div className="kv-grid">
+            {Object.entries(rate_limits).map(([k, v]) => (
+              <div key={k} className="kv-item">
+                <span className="kv-key">{k}</span>
+                <span className="kv-val">{v}</span>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+
+      {/* 工具调用价格 */}
+      {tool_prices && tool_prices.length > 0 && (
+        <div className="info-section">
+          <div className="info-section-title">工具调用价格</div>
+          {tool_prices.map((t, i) => (
+            <div key={i} className="price-entry">
+              <span className="price-key">{t.label}</span>
+              <span className="price-val">
+                {t.price === 0 ? '免费' : `${t.price} ${t.unit ?? '元/千次'}`}
+                {t.note ? <span className="price-note"> ({t.note})</span> : null}
+              </span>
+            </div>
+          ))}
+        </div>
+      )}
+
+      {/* Token 价格 */}
+      {Object.keys(prices).length > 0 && (
+        <div className="info-section">
+          <div className="info-section-title">Token 价格</div>
+          {Object.entries(prices).map(([tier, val]) => (
+            <div key={tier} className="price-entry">
+              <span className="price-key">{tier}</span>
               <span className="price-val">{JSON.stringify(val)}</span>
             </div>
-          ))
-        )}
-      </div>
-      <div className="price-time">Scraped: {new Date(result.scraped_at).toLocaleString()}</div>
+          ))}
+        </div>
+      )}
+
+      <div className="price-time">爬取时间:{new Date(result.scraped_at).toLocaleString()}</div>
     </div>
   );
 }
 
 export function Scraper() {
-  const [urlInput, setUrlInput] = useState('');
+  const [models, setModels] = useState<Model[]>([]);
+  const [selected, setSelected] = useState<Set<number>>(new Set());
+  const [showAdd, setShowAdd] = useState(false);
+  const [newName, setNewName] = useState('');
+  const [newUrl, setNewUrl] = useState('');
+  const [addError, setAddError] = useState<string | null>(null);
+
+  const [schedule, setSchedule] = useState<Schedule | null>(null);
+  const [scheduleEdit, setScheduleEdit] = useState({ interval_days: 1, start_hour: 2 });
+  const [scheduleSaving, setScheduleSaving] = useState(false);
+
   const [submitting, setSubmitting] = useState(false);
-  const [activeJob, setActiveJob] = useState<ScrapeJobDetail | null>(null);
+  const [expandedJobs, setExpandedJobs] = useState<Record<string, ScrapeJobDetail>>({});
   const [history, setHistory] = useState<ScrapeJob[]>([]);
   const [error, setError] = useState<string | null>(null);
   const pollRef = useRef<ReturnType<typeof setInterval> | null>(null);
+  const resultRef = useRef<HTMLDivElement>(null);
 
-  const loadHistory = () => {
-    fetchScrapeJobs().then(setHistory).catch(() => {});
-  };
+  const loadModels = () => fetchModels().then(setModels).catch(() => {});
+  const loadHistory = () => fetchScrapeJobs().then(setHistory).catch(() => {});
 
   useEffect(() => {
+    loadModels();
     loadHistory();
+    fetchSchedule().then(s => {
+      setSchedule(s);
+      setScheduleEdit({ interval_days: s.interval_days, start_hour: s.start_hour });
+    }).catch(() => {});
   }, []);
 
   const stopPolling = () => {
@@ -51,29 +135,40 @@ export function Scraper() {
     pollRef.current = setInterval(async () => {
       try {
         const detail = await fetchScrapeJob(jobId);
-        setActiveJob(detail);
+        setExpandedJobs(prev => ({ ...prev, [jobId]: detail }));
         if (detail.status === 'done' || detail.status === 'failed') {
           stopPolling();
           loadHistory();
         }
-      } catch {
-        stopPolling();
-      }
+      } catch { stopPolling(); }
     }, 2000);
   };
 
   useEffect(() => () => stopPolling(), []);
 
-  const handleSubmit = async () => {
-    const urls = urlInput.split('\n').map((u) => u.trim()).filter(Boolean);
+  const toggleSelect = (id: number) => {
+    setSelected(prev => {
+      const next = new Set(prev);
+      next.has(id) ? next.delete(id) : next.add(id);
+      return next;
+    });
+  };
+
+  const toggleAll = () => {
+    if (selected.size === models.length) setSelected(new Set());
+    else setSelected(new Set(models.map(m => m.id)));
+  };
+
+  const handleScrape = async () => {
+    const urls = models.filter(m => selected.has(m.id)).map(m => m.url);
     if (urls.length === 0) return;
     setSubmitting(true);
     setError(null);
-    setActiveJob(null);
     try {
       const job = await postScrape(urls);
-      setActiveJob({ ...job, results: undefined });
+      setExpandedJobs(prev => ({ ...prev, [job.job_id]: { ...job, results: undefined } }));
       startPolling(job.job_id);
+      loadHistory();
     } catch (e) {
       setError(e instanceof Error ? e.message : String(e));
     } finally {
@@ -81,80 +176,191 @@ export function Scraper() {
     }
   };
 
-  const handleHistoryClick = async (jobId: string) => {
+  const handleAdd = async () => {
+    if (!newName.trim() || !newUrl.trim()) return;
+    setAddError(null);
     try {
-      const detail = await fetchScrapeJob(jobId);
-      setActiveJob(detail);
-      stopPolling();
-      if (detail.status === 'pending' || detail.status === 'running') {
-        startPolling(jobId);
-      }
-    } catch {
-      setError('Failed to load job details');
+      await createModel(newName.trim(), newUrl.trim());
+      setNewName(''); setNewUrl(''); setShowAdd(false);
+      loadModels();
+    } catch (e) {
+      setAddError(e instanceof Error ? e.message : String(e));
     }
   };
 
-  return (
-    <div className="scraper-page">
-      <header className="scraper-header">
-        <span className="scraper-title">PRICE_SCRAPER</span>
-      </header>
-
-      <div className="scraper-input-area">
-        <textarea
-          className="url-input"
-          placeholder="Enter URLs (one per line)&#10;https://bailian.console.aliyun.com/..."
-          value={urlInput}
-          onChange={(e) => setUrlInput(e.target.value)}
-          rows={4}
-        />
-        <button className="submit-btn" onClick={handleSubmit} disabled={submitting}>
-          {submitting ? 'SUBMITTING…' : '▶ SCRAPE'}
-        </button>
-      </div>
+  const handleToggleSchedule = async () => {
+    if (!schedule) return;
+    setScheduleSaving(true);
+    try {
+      const updated = await updateSchedule({ ...scheduleEdit, enabled: !schedule.enabled });
+      setSchedule(updated);
+    } finally { setScheduleSaving(false); }
+  };
 
-      {error && <div className="error-banner">ERROR: {error}</div>}
+  const handleSaveSchedule = async () => {
+    if (!schedule) return;
+    setScheduleSaving(true);
+    try {
+      const updated = await updateSchedule({ ...scheduleEdit, enabled: schedule.enabled });
+      setSchedule(updated);
+    } finally { setScheduleSaving(false); }
+  };
 
-      {activeJob && (
-        <div className={`job-status job-status--${activeJob.status}`}>
-          <span>JOB {activeJob.job_id.slice(0, 8)}…</span>
-          <span className="job-badge">{activeJob.status.toUpperCase()}</span>
+  const handleDelete = async (id: number) => {
+    await deleteModel(id);
+    setSelected(prev => { const n = new Set(prev); n.delete(id); return n; });
+    loadModels();
+  };
+
+  const handleHistoryClick = async (jobId: string) => {
+    // 已展开则收起
+    if (expandedJobs[jobId]) {
+      setExpandedJobs(prev => { const n = { ...prev }; delete n[jobId]; return n; });
+      return;
+    }
+    try {
+      const detail = await fetchScrapeJob(jobId);
+      setExpandedJobs(prev => ({ ...prev, [jobId]: detail }));
+      if (detail.status === 'pending' || detail.status === 'running') startPolling(jobId);
+    } catch { setError('加载任务详情失败'); }
+  };
+
+  return (
+    <div className="scraper-page scraper-layout">
+      {/* 左侧:模型列表 */}
+      <aside className="model-sidebar">
+        <div className="sidebar-header">
+          <span className="sidebar-title">模型列表</span>
+          <button className="icon-btn" onClick={() => setShowAdd(v => !v)} title="添加模型">+</button>
         </div>
-      )}
 
-      {activeJob?.status === 'failed' && (
-        <div className="error-card">
-          <div className="error-card-title">SCRAPE FAILED</div>
-          <pre className="error-detail">{activeJob.error}</pre>
+        {showAdd && (
+          <div className="add-form">
+            <input className="add-input" placeholder="模型名称" value={newName} onChange={e => setNewName(e.target.value)} />
+            <input className="add-input" placeholder="URL" value={newUrl} onChange={e => setNewUrl(e.target.value)} />
+            {addError && <div className="add-error">{addError}</div>}
+            <div className="add-actions">
+              <button className="add-confirm-btn" onClick={handleAdd}>确认添加</button>
+              <button className="add-cancel-btn" onClick={() => { setShowAdd(false); setAddError(null); }}>取消</button>
+            </div>
+          </div>
+        )}
+
+        <div className="model-list-header">
+          <label className="check-label">
+            <input type="checkbox" checked={models.length > 0 && selected.size === models.length} onChange={toggleAll} />
+            <span>全选</span>
+          </label>
+          <span className="selected-count">{selected.size} / {models.length}</span>
         </div>
-      )}
 
-      {activeJob?.status === 'done' && activeJob.results && (
-        <div className="results-section">
-          {activeJob.results.map((r) => (
-            <PriceCard key={r.url} result={r} />
+        <ul className="model-list">
+          {models.length === 0 && <li className="empty-msg">暂无模型,点击 + 添加</li>}
+          {models.map(m => (
+            <li key={m.id} className={`model-item ${selected.has(m.id) ? 'model-item--selected' : ''}`}>
+              <label className="check-label">
+                <input type="checkbox" checked={selected.has(m.id)} onChange={() => toggleSelect(m.id)} />
+                <span className="model-name">{m.name}</span>
+              </label>
+              <button className="del-btn" onClick={() => handleDelete(m.id)} title="删除">✕</button>
+            </li>
           ))}
-        </div>
-      )}
+        </ul>
 
-      {history.length > 0 && (
-        <section className="history-section">
-          <div className="section-title">HISTORY</div>
-          <ul className="history-list">
-            {history.map((job) => (
-              <li
-                key={job.job_id}
-                className={`history-item history-item--${job.status}`}
-                onClick={() => handleHistoryClick(job.job_id)}
+        <button className="submit-btn scrape-btn" onClick={handleScrape} disabled={submitting || selected.size === 0}>
+          {submitting ? '爬取中…' : `▶ 爬取已选 (${selected.size})`}
+        </button>
+
+        {/* 定时爬取配置 */}
+        {schedule && (
+          <div className="schedule-box">
+            <div className="schedule-header">
+              <span className="schedule-title">定时爬取</span>
+              <button
+                className={`toggle-btn ${schedule.enabled ? 'toggle-btn--on' : ''}`}
+                onClick={handleToggleSchedule}
+                disabled={scheduleSaving}
               >
-                <span className="history-id">{job.job_id.slice(0, 8)}…</span>
-                <span className="history-status">{job.status}</span>
-                <span className="history-time">{new Date(job.created_at).toLocaleString()}</span>
-              </li>
-            ))}
-          </ul>
-        </section>
-      )}
+                {schedule.enabled ? '已开启' : '已关闭'}
+              </button>
+            </div>
+            <div className="schedule-row">
+              <label className="schedule-label">每隔</label>
+              <input
+                type="number" min={1} max={365}
+                className="schedule-input"
+                value={scheduleEdit.interval_days}
+                onChange={e => setScheduleEdit(v => ({ ...v, interval_days: Number(e.target.value) }))}
+              />
+              <span className="schedule-unit">天</span>
+            </div>
+            <div className="schedule-row">
+              <label className="schedule-label">开始时间</label>
+              <input
+                type="number" min={0} max={23}
+                className="schedule-input schedule-input--sm"
+                value={scheduleEdit.start_hour}
+                onChange={e => setScheduleEdit(v => ({ ...v, start_hour: Number(e.target.value) }))}
+              />
+              <span className="schedule-unit">时整</span>
+            </div>
+            <button className="add-confirm-btn" onClick={handleSaveSchedule} disabled={scheduleSaving}>
+              保存配置
+            </button>
+          </div>
+        )}
+      </aside>
+
+      {/* 右侧:结果区 */}
+      <main className="scraper-main">
+        <header className="scraper-header" ref={resultRef}>
+          <span className="scraper-title">价格爬取</span>
+        </header>
+
+        {error && <div className="error-banner">错误:{error}</div>}
+
+        {history.length > 0 && (
+          <section className="history-section">
+            <div className="section-title">历史记录</div>
+            <ul className="history-list">
+              {history.map(job => {
+                const expanded = expandedJobs[job.job_id];
+                const isOpen = !!expanded;
+                return (
+                  <li key={job.job_id} className={`history-item history-item--${job.status}`}>
+                    <div className="history-row" onClick={() => handleHistoryClick(job.job_id)}>
+                      <span className="history-id">{job.job_id.slice(0, 8)}…</span>
+                      <span className="history-status">{
+                        job.status === 'done' ? '完成' :
+                        job.status === 'failed' ? '失败' :
+                        job.status === 'running' ? '运行中' : '等待中'
+                      }</span>
+                      <span className="history-time">{new Date(job.created_at).toLocaleString()}</span>
+                      <span className="history-toggle">{isOpen ? '▲' : '▼'}</span>
+                    </div>
+                    {isOpen && (
+                      <div className="history-detail">
+                        {expanded.status === 'failed' && (
+                          <div className="error-card">
+                            <div className="error-card-title">爬取失败</div>
+                            <pre className="error-detail">{expanded.error}</pre>
+                          </div>
+                        )}
+                        {(expanded.status === 'pending' || expanded.status === 'running') && (
+                          <div className="empty-msg">爬取中,请稍候…</div>
+                        )}
+                        {expanded.status === 'done' && expanded.results && expanded.results.map(r => (
+                          <PriceCard key={r.url} result={r} />
+                        ))}
+                      </div>
+                    )}
+                  </li>
+                );
+              })}
+            </ul>
+          </section>
+        )}
+      </main>
     </div>
   );
 }

+ 19 - 0
frontend/src/types.ts

@@ -44,9 +44,28 @@ export interface ScrapeResult {
   url: string;
   model_name: string;
   prices: Record<string, unknown>;
+  model_info?: {
+    model_code?: string;
+    display_tags?: string[];
+    description?: string;
+    input_modalities?: string[];
+    output_modalities?: string[];
+    features?: Record<string, boolean>;
+  };
+  rate_limits?: Record<string, string>;
+  tool_prices?: Array<{ label: string; price: number | string; unit?: string; note?: string }>;
   scraped_at: string;
 }
 
 export interface ScrapeJobDetail extends ScrapeJob {
   results?: ScrapeResult[];
 }
+
+export interface Discount {
+  id: number;
+  domain: string;
+  discount: number;
+  note: string | null;
+  created_at: string;
+  updated_at: string;
+}