from __future__ import annotations from typing import List, Optional from datetime import date from fastapi import APIRouter from pydantic import BaseModel from app.db import get_pool router = APIRouter(tags=["scrape-stats"]) class ScrapeOverview(BaseModel): total_jobs: int success_jobs: int failed_jobs: int total_models_scraped: int last_scraped_at: Optional[str] = None class DailyCount(BaseModel): date: str count: int class ModelRankItem(BaseModel): model_name: str count: int class RecentJob(BaseModel): job_id: str status: str model_count: int created_at: str class ScrapeStatsOut(BaseModel): overview: ScrapeOverview daily_counts: List[DailyCount] model_ranks: List[ModelRankItem] recent_jobs: List[RecentJob] @router.get("/scrape-stats", response_model=ScrapeStatsOut) async def get_scrape_stats() -> ScrapeStatsOut: pool = get_pool() # 总览 overview_row = await pool.fetchrow(""" SELECT COUNT(*) AS total_jobs, COUNT(*) FILTER (WHERE status = 'done') AS success_jobs, COUNT(*) FILTER (WHERE status = 'failed') AS failed_jobs, MAX(updated_at) AS last_scraped_at FROM scrape_jobs """) total_models = await pool.fetchval( "SELECT COUNT(DISTINCT url) FROM scrape_results" ) or 0 overview = ScrapeOverview( total_jobs=overview_row["total_jobs"] or 0, success_jobs=overview_row["success_jobs"] or 0, failed_jobs=overview_row["failed_jobs"] or 0, total_models_scraped=total_models, last_scraped_at=overview_row["last_scraped_at"].isoformat() if overview_row["last_scraped_at"] else None, ) # 近30天每日爬取次数 daily_rows = await pool.fetch(""" SELECT DATE(created_at) AS day, COUNT(*) AS cnt FROM scrape_jobs WHERE created_at >= NOW() - INTERVAL '30 days' GROUP BY day ORDER BY day """) daily_counts = [DailyCount(date=str(r["day"]), count=r["cnt"]) for r in daily_rows] # 模型爬取排行 Top 15 rank_rows = await pool.fetch(""" SELECT model_name, COUNT(*) AS cnt FROM scrape_results GROUP BY model_name ORDER BY cnt DESC LIMIT 15 """) model_ranks = [ModelRankItem(model_name=r["model_name"], count=r["cnt"]) for r in rank_rows] # 最近10条任务 recent_rows = await pool.fetch(""" SELECT j.id, j.status, j.created_at, COUNT(r.id) AS model_count FROM scrape_jobs j LEFT JOIN scrape_results r ON r.job_id = j.id GROUP BY j.id, j.status, j.created_at ORDER BY j.created_at DESC LIMIT 10 """) recent_jobs = [ RecentJob( job_id=str(r["id"]), status=r["status"], model_count=r["model_count"], created_at=r["created_at"].isoformat(), ) for r in recent_rows ] return ScrapeStatsOut( overview=overview, daily_counts=daily_counts, model_ranks=model_ranks, recent_jobs=recent_jobs, )