|
@@ -45,6 +45,13 @@ class ScraperService:
|
|
|
|
|
|
|
|
any_changed = False
|
|
any_changed = False
|
|
|
|
|
|
|
|
|
|
+ # 如果 snapshot 里已有的 URL 集合与本次爬取的不一致(多或少),触发变更
|
|
|
|
|
+ async with pool.acquire() as conn:
|
|
|
|
|
+ rows = await conn.fetch("SELECT url FROM price_snapshot")
|
|
|
|
|
+ existing_snapshot_urls = {row["url"] for row in rows}
|
|
|
|
|
+ if existing_snapshot_urls != set(urls):
|
|
|
|
|
+ any_changed = True
|
|
|
|
|
+
|
|
|
for url in urls:
|
|
for url in urls:
|
|
|
result: dict = await loop.run_in_executor(
|
|
result: dict = await loop.run_in_executor(
|
|
|
None,
|
|
None,
|
|
@@ -114,6 +121,13 @@ class ScraperService:
|
|
|
json.dumps(rate_limits), json.dumps(tool_prices),
|
|
json.dumps(rate_limits), json.dumps(tool_prices),
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ # 删除 snapshot 里不在本次爬取列表中的行(模型被移除的情况)
|
|
|
|
|
+ async with pool.acquire() as conn:
|
|
|
|
|
+ await conn.execute(
|
|
|
|
|
+ "DELETE FROM price_snapshot WHERE url != ALL($1::text[])",
|
|
|
|
|
+ urls,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
# 本批次有任何数据变化,全局版本号 +1(从 1 开始)
|
|
# 本批次有任何数据变化,全局版本号 +1(从 1 开始)
|
|
|
if any_changed:
|
|
if any_changed:
|
|
|
async with pool.acquire() as conn:
|
|
async with pool.acquire() as conn:
|