Browse Source

修复爬取模型版本号更新问题

lxylxy123321 1 week ago
parent
commit
79327e4348
2 changed files with 14 additions and 3 deletions
  1. 0 3
      backend/.env
  2. 14 0
      backend/app/services/scraper.py

+ 0 - 3
backend/.env

@@ -16,6 +16,3 @@ PLAYWRIGHT_EXECUTABLE=D:\playwright-browsers\chromium-1208\chrome-win64\chrome.e
 # PLAYWRIGHT_EXECUTABLE=/www/wwwroot/playwright/playwright-browsers/chromium-1208/chrome-win64/chrome.exe
 PLAYWRIGHT_HEADLESS=true
 
-# Linux 生产环境 Chrome 额外启动参数(逗号分隔)
-# 解决 crashpad 崩溃问题:禁用 crash reporter,并指定 crash dumps 目录
-# PLAYWRIGHT_EXTRA_ARGS=--disable-crash-reporter,--crash-dumps-dir=/tmp

+ 14 - 0
backend/app/services/scraper.py

@@ -45,6 +45,13 @@ class ScraperService:
 
             any_changed = False
 
+            # 如果 snapshot 里已有的 URL 集合与本次爬取的不一致(多或少),触发变更
+            async with pool.acquire() as conn:
+                rows = await conn.fetch("SELECT url FROM price_snapshot")
+                existing_snapshot_urls = {row["url"] for row in rows}
+            if existing_snapshot_urls != set(urls):
+                any_changed = True
+
             for url in urls:
                 result: dict = await loop.run_in_executor(
                     None,
@@ -114,6 +121,13 @@ class ScraperService:
                             json.dumps(rate_limits), json.dumps(tool_prices),
                         )
 
+            # 删除 snapshot 里不在本次爬取列表中的行(模型被移除的情况)
+            async with pool.acquire() as conn:
+                await conn.execute(
+                    "DELETE FROM price_snapshot WHERE url != ALL($1::text[])",
+                    urls,
+                )
+
             # 本批次有任何数据变化,全局版本号 +1(从 1 开始)
             if any_changed:
                 async with pool.acquire() as conn: