-- Migration 001: initial schema -- Target: database=crawl, schema=crawl CREATE SCHEMA IF NOT EXISTS crawl; SET search_path TO crawl; -- access_logs: records every inbound HTTP request with geo info CREATE TABLE IF NOT EXISTS access_logs ( id BIGSERIAL PRIMARY KEY, ip VARCHAR(45) NOT NULL, method VARCHAR(10) NOT NULL, path TEXT NOT NULL, status_code SMALLINT NOT NULL, latency_ms REAL NOT NULL, country VARCHAR(100) DEFAULT 'Unknown', city VARCHAR(100) DEFAULT 'Unknown', latitude DOUBLE PRECISION, longitude DOUBLE PRECISION, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_access_logs_created_at ON access_logs (created_at DESC); CREATE INDEX IF NOT EXISTS idx_access_logs_ip ON access_logs (ip); -- scrape_jobs: tracks scraping task lifecycle CREATE TABLE IF NOT EXISTS scrape_jobs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), urls TEXT[] NOT NULL, status VARCHAR(10) NOT NULL DEFAULT 'pending', error TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_scrape_jobs_created_at ON scrape_jobs (created_at DESC); -- scrape_results: stores per-URL scraping output linked to a job CREATE TABLE IF NOT EXISTS scrape_results ( id BIGSERIAL PRIMARY KEY, job_id UUID NOT NULL REFERENCES scrape_jobs(id), url TEXT NOT NULL, model_name VARCHAR(200) NOT NULL, prices JSONB NOT NULL, scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_scrape_results_url_scraped ON scrape_results (url, scraped_at DESC);