| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- -- Migration 001: initial schema
- -- Target: database=crawl, schema=crawl
- CREATE SCHEMA IF NOT EXISTS crawl;
- SET search_path TO crawl;
- -- access_logs: records every inbound HTTP request with geo info
- CREATE TABLE IF NOT EXISTS access_logs (
- id BIGSERIAL PRIMARY KEY,
- ip VARCHAR(45) NOT NULL,
- method VARCHAR(10) NOT NULL,
- path TEXT NOT NULL,
- status_code SMALLINT NOT NULL,
- latency_ms REAL NOT NULL,
- country VARCHAR(100) DEFAULT 'Unknown',
- city VARCHAR(100) DEFAULT 'Unknown',
- latitude DOUBLE PRECISION,
- longitude DOUBLE PRECISION,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
- );
- CREATE INDEX IF NOT EXISTS idx_access_logs_created_at ON access_logs (created_at DESC);
- CREATE INDEX IF NOT EXISTS idx_access_logs_ip ON access_logs (ip);
- -- scrape_jobs: tracks scraping task lifecycle
- CREATE TABLE IF NOT EXISTS scrape_jobs (
- id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
- urls TEXT[] NOT NULL,
- status VARCHAR(10) NOT NULL DEFAULT 'pending',
- error TEXT,
- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
- );
- CREATE INDEX IF NOT EXISTS idx_scrape_jobs_created_at ON scrape_jobs (created_at DESC);
- -- scrape_results: stores per-URL scraping output linked to a job
- CREATE TABLE IF NOT EXISTS scrape_results (
- id BIGSERIAL PRIMARY KEY,
- job_id UUID NOT NULL REFERENCES scrape_jobs(id),
- url TEXT NOT NULL,
- model_name VARCHAR(200) NOT NULL,
- prices JSONB NOT NULL,
- scraped_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
- );
- CREATE INDEX IF NOT EXISTS idx_scrape_results_url_scraped ON scrape_results (url, scraped_at DESC);
|