| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- from __future__ import annotations
- from typing import List, Optional
- from datetime import date
- from fastapi import APIRouter
- from pydantic import BaseModel
- from app.db import get_pool
- router = APIRouter(tags=["scrape-stats"])
- class ScrapeOverview(BaseModel):
- total_jobs: int
- success_jobs: int
- failed_jobs: int
- total_models_scraped: int
- last_scraped_at: Optional[str] = None
- class DailyCount(BaseModel):
- date: str
- count: int
- class ModelRankItem(BaseModel):
- model_name: str
- count: int
- class RecentJob(BaseModel):
- job_id: str
- status: str
- model_count: int
- created_at: str
- class ScrapeStatsOut(BaseModel):
- overview: ScrapeOverview
- daily_counts: List[DailyCount]
- model_ranks: List[ModelRankItem]
- recent_jobs: List[RecentJob]
- @router.get("/scrape-stats", response_model=ScrapeStatsOut)
- async def get_scrape_stats() -> ScrapeStatsOut:
- pool = get_pool()
- # 总览
- overview_row = await pool.fetchrow("""
- SELECT
- COUNT(*) AS total_jobs,
- COUNT(*) FILTER (WHERE status = 'done') AS success_jobs,
- COUNT(*) FILTER (WHERE status = 'failed') AS failed_jobs,
- MAX(updated_at) AS last_scraped_at
- FROM scrape_jobs
- """)
- total_models = await pool.fetchval(
- "SELECT COUNT(DISTINCT url) FROM scrape_results"
- ) or 0
- overview = ScrapeOverview(
- total_jobs=overview_row["total_jobs"] or 0,
- success_jobs=overview_row["success_jobs"] or 0,
- failed_jobs=overview_row["failed_jobs"] or 0,
- total_models_scraped=total_models,
- last_scraped_at=overview_row["last_scraped_at"].isoformat() if overview_row["last_scraped_at"] else None,
- )
- # 近30天每日爬取次数
- daily_rows = await pool.fetch("""
- SELECT DATE(created_at) AS day, COUNT(*) AS cnt
- FROM scrape_jobs
- WHERE created_at >= NOW() - INTERVAL '30 days'
- GROUP BY day
- ORDER BY day
- """)
- daily_counts = [DailyCount(date=str(r["day"]), count=r["cnt"]) for r in daily_rows]
- # 模型爬取排行 Top 15
- rank_rows = await pool.fetch("""
- SELECT model_name, COUNT(*) AS cnt
- FROM scrape_results
- GROUP BY model_name
- ORDER BY cnt DESC
- LIMIT 15
- """)
- model_ranks = [ModelRankItem(model_name=r["model_name"], count=r["cnt"]) for r in rank_rows]
- # 最近10条任务
- recent_rows = await pool.fetch("""
- SELECT j.id, j.status, j.created_at,
- COUNT(r.id) AS model_count
- FROM scrape_jobs j
- LEFT JOIN scrape_results r ON r.job_id = j.id
- GROUP BY j.id, j.status, j.created_at
- ORDER BY j.created_at DESC
- LIMIT 10
- """)
- recent_jobs = [
- RecentJob(
- job_id=str(r["id"]),
- status=r["status"],
- model_count=r["model_count"],
- created_at=r["created_at"].isoformat(),
- )
- for r in recent_rows
- ]
- return ScrapeStatsOut(
- overview=overview,
- daily_counts=daily_counts,
- model_ranks=model_ranks,
- recent_jobs=recent_jobs,
- )
|