001_init.sql 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. -- Migration 001: initial schema
  2. -- Target: database=crawl, schema=crawl
  3. CREATE SCHEMA IF NOT EXISTS crawl;
  4. SET search_path TO crawl;
  5. -- access_logs: records every inbound HTTP request with geo info
  6. CREATE TABLE IF NOT EXISTS access_logs (
  7. id BIGSERIAL PRIMARY KEY,
  8. ip VARCHAR(45) NOT NULL,
  9. method VARCHAR(10) NOT NULL,
  10. path TEXT NOT NULL,
  11. status_code SMALLINT NOT NULL,
  12. latency_ms REAL NOT NULL,
  13. country VARCHAR(100) DEFAULT 'Unknown',
  14. city VARCHAR(100) DEFAULT 'Unknown',
  15. latitude DOUBLE PRECISION,
  16. longitude DOUBLE PRECISION,
  17. created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
  18. );
  19. CREATE INDEX IF NOT EXISTS idx_access_logs_created_at ON access_logs (created_at DESC);
  20. CREATE INDEX IF NOT EXISTS idx_access_logs_ip ON access_logs (ip);
  21. -- scrape_jobs: tracks scraping task lifecycle
  22. CREATE TABLE IF NOT EXISTS scrape_jobs (
  23. id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  24. urls TEXT[] NOT NULL,
  25. status VARCHAR(10) NOT NULL DEFAULT 'pending',
  26. error TEXT,
  27. created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  28. updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
  29. );
  30. CREATE INDEX IF NOT EXISTS idx_scrape_jobs_created_at ON scrape_jobs (created_at DESC);
  31. -- scrape_results: stores per-URL scraping output linked to a job
  32. CREATE TABLE IF NOT EXISTS scrape_results (
  33. id BIGSERIAL PRIMARY KEY,
  34. job_id UUID NOT NULL REFERENCES scrape_jobs(id),
  35. url TEXT NOT NULL,
  36. model_name VARCHAR(200) NOT NULL,
  37. prices JSONB NOT NULL,
  38. scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
  39. );
  40. CREATE INDEX IF NOT EXISTS idx_scrape_results_url_scraped ON scrape_results (url, scraped_at DESC);