-- Umbra — MariaDB Schema
-- Run: mysql -u root -p < schema.sql

CREATE DATABASE IF NOT EXISTS inumbra CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
USE inumbra;

-- ═══════════════════════════════════════════════════════════════
-- Core directory: one row per .onion base address
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS onions (
    id              BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address         VARCHAR(70)  NOT NULL UNIQUE,          -- abc123.onion
    title           VARCHAR(500) NOT NULL DEFAULT '',
    status          ENUM('unknown','online','offline') NOT NULL DEFAULT 'unknown',
    cti_type        VARCHAR(60)  NOT NULL DEFAULT '',       -- ransomware-dls, market, forum
    cti_name        VARCHAR(200) NOT NULL DEFAULT '',
    source          VARCHAR(120) NOT NULL DEFAULT '',       -- where we first found it
    tags            JSON         DEFAULT NULL,              -- ["drugs","forum","wiki"]
    notes           TEXT         DEFAULT NULL,

    -- Metadata captured during crawl/scrape
    page_language   VARCHAR(10)  NOT NULL DEFAULT '',
    meta_description VARCHAR(1000) NOT NULL DEFAULT '',
    meta_keywords   VARCHAR(500) NOT NULL DEFAULT '',
    meta_generator  VARCHAR(200) NOT NULL DEFAULT '',
    server_software VARCHAR(200) NOT NULL DEFAULT '',
    powered_by      VARCHAR(200) NOT NULL DEFAULT '',
    tech_stack      VARCHAR(500) NOT NULL DEFAULT '',       -- "nginx,PHP,WordPress,jQuery"
    favicon_url     VARCHAR(500) NOT NULL DEFAULT '',
    og_title        VARCHAR(500) NOT NULL DEFAULT '',
    og_description  VARCHAR(1000) NOT NULL DEFAULT '',
    og_image        VARCHAR(500) NOT NULL DEFAULT '',

    -- Page metrics
    page_size       INT UNSIGNED NOT NULL DEFAULT 0,
    response_time_ms INT UNSIGNED NOT NULL DEFAULT 0,
    link_count      INT UNSIGNED NOT NULL DEFAULT 0,
    form_count      INT UNSIGNED NOT NULL DEFAULT 0,
    image_count     INT UNSIGNED NOT NULL DEFAULT 0,

    -- Status tracking
    check_count     INT UNSIGNED NOT NULL DEFAULT 0,
    offline_streak  INT UNSIGNED NOT NULL DEFAULT 0,
    hits            INT UNSIGNED NOT NULL DEFAULT 0,        -- search/access count
    last_headers    VARCHAR(2000) NOT NULL DEFAULT '',

    -- Thumbnail (HTML preview stored on disk, path here)
    thumbnail_path  VARCHAR(500) NOT NULL DEFAULT '',

    -- Timestamps
    first_seen      DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_seen       DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_checked    DATETIME     DEFAULT NULL,
    created_at      DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated_at      DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,

    -- Fulltext for search
    FULLTEXT INDEX ft_search (title, meta_description, meta_keywords, tech_stack, tags, notes, cti_name, cti_type, server_software),
    INDEX idx_status (status),
    INDEX idx_cti (cti_type),
    INDEX idx_last_seen (last_seen),
    INDEX idx_offline (status, offline_streak),
    INDEX idx_hits (hits DESC)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Discovered sub-pages under each onion
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS pages (
    id              BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    onion_id        BIGINT UNSIGNED NOT NULL,
    page_url        VARCHAR(2000) NOT NULL,
    title           VARCHAR(500) NOT NULL DEFAULT '',
    source          VARCHAR(120) NOT NULL DEFAULT '',
    scraped         TINYINT(1)   NOT NULL DEFAULT 0,
    first_seen      DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_seen       DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,

    FOREIGN KEY (onion_id) REFERENCES onions(id) ON DELETE CASCADE,
    UNIQUE KEY uq_page (onion_id, page_url(500)),
    INDEX idx_onion (onion_id)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- CSAM blocklist (Ahmia MD5 hashes)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS blocklist (
    hash_md5    CHAR(32) PRIMARY KEY,
    source      VARCHAR(50) NOT NULL DEFAULT 'ahmia',
    added_at    DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- API keys for crawler authentication
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS api_keys (
    id          INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    key_hash    CHAR(64)     NOT NULL UNIQUE,  -- SHA-256 of the API key
    label       VARCHAR(100) NOT NULL DEFAULT '',
    permissions VARCHAR(200) NOT NULL DEFAULT 'write',  -- write, admin
    active      TINYINT(1)   NOT NULL DEFAULT 1,
    created_at  DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_used   DATETIME     DEFAULT NULL
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Rate limiting (per-IP, sliding window)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS rate_limits (
    ip_hash     CHAR(64)     PRIMARY KEY,      -- SHA-256 of IP
    requests    INT UNSIGNED NOT NULL DEFAULT 1,
    window_start DATETIME    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_window (window_start)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Search analytics (optional, anonymized)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS search_log (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    query_hash  CHAR(64)     NOT NULL,  -- SHA-256 of query (anonymized)
    result_count INT UNSIGNED NOT NULL DEFAULT 0,
    searched_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_time (searched_at)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- User reports (abuse, miscat, CSAM, scam, etc.)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS reports (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,                          -- reported .onion
    reason      ENUM('csam','scam','malware','miscat','other') NOT NULL DEFAULT 'other',
    detail      VARCHAR(500) NOT NULL DEFAULT '',               -- optional user note
    ip_hash     CHAR(64)     NOT NULL DEFAULT '',               -- SHA-256 of reporter IP
    status      ENUM('pending','dismissed','blocked') NOT NULL DEFAULT 'pending',
    admin_note  VARCHAR(500) NOT NULL DEFAULT '',
    reported_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    resolved_at DATETIME     DEFAULT NULL,
    INDEX idx_status (status),
    INDEX idx_address (address),
    INDEX idx_time (reported_at)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Uptime log — one row per status check per site (for sparklines)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS uptime_log (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,
    alive       TINYINT(1)   NOT NULL,   -- 1=online, 0=offline
    response_ms INT UNSIGNED NOT NULL DEFAULT 0,
    checked_at  DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_addr_time (address, checked_at),
    INDEX idx_time (checked_at)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Trending search terms (safe terms only, admin-curated)
-- Stores the raw term so it can be displayed. Terms containing
-- unsafe words are never inserted (enforced in PHP).
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS trending_terms (
    term        VARCHAR(100) PRIMARY KEY,
    hits        INT UNSIGNED NOT NULL DEFAULT 1,
    last_seen   DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    approved    TINYINT(1)   NOT NULL DEFAULT 0,  -- must be approved by admin to show
    INDEX idx_hits (hits DESC),
    INDEX idx_approved (approved, hits DESC)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Change detection log
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS site_changes (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,
    change_type ENUM('title_changed','came_online','went_offline',
                     'new_pages','content_changed','canary_changed') NOT NULL,
    old_val     VARCHAR(500) NOT NULL DEFAULT '',
    new_val     VARCHAR(500) NOT NULL DEFAULT '',
    detected_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_addr (address),
    INDEX idx_time (detected_at),
    INDEX idx_type_time (change_type, detected_at)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Mirror detection columns (add to onions)
-- ═══════════════════════════════════════════════════════════════
ALTER TABLE onions ADD COLUMN IF NOT EXISTS content_hash VARCHAR(64) NOT NULL DEFAULT '' AFTER thumbnail_path;
ALTER TABLE onions ADD COLUMN IF NOT EXISTS mirror_group VARCHAR(64) NOT NULL DEFAULT '' AFTER content_hash;
ALTER TABLE onions ADD INDEX IF NOT EXISTS idx_content_hash (content_hash);
ALTER TABLE onions ADD INDEX IF NOT EXISTS idx_lang (page_language);

-- ═══════════════════════════════════════════════════════════════
-- Canary tracker
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS canaries (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,
    page_url    VARCHAR(2000) NOT NULL DEFAULT '',
    canary_hash VARCHAR(64)  NOT NULL DEFAULT '',
    preview     VARCHAR(500) NOT NULL DEFAULT '',
    status      ENUM('active','changed','missing') NOT NULL DEFAULT 'active',
    first_seen  DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_seen   DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    last_hash   VARCHAR(64)  NOT NULL DEFAULT '',
    INDEX idx_addr (address),
    INDEX idx_status (status)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- User submissions (self-service)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS submissions (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,
    description VARCHAR(500) NOT NULL DEFAULT '',
    ip_hash     CHAR(64)     NOT NULL DEFAULT '',
    status      ENUM('pending','accepted','rejected') NOT NULL DEFAULT 'pending',
    submitted_at DATETIME    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    resolved_at DATETIME     DEFAULT NULL,
    INDEX idx_status (status),
    INDEX idx_time (submitted_at)
) ENGINE=InnoDB;

-- ═══════════════════════════════════════════════════════════════
-- Snapshots (wayback machine metadata, HTML on crawler disk)
-- ═══════════════════════════════════════════════════════════════
CREATE TABLE IF NOT EXISTS snapshots (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    address     VARCHAR(70)  NOT NULL,
    title       VARCHAR(500) NOT NULL DEFAULT '',
    content_hash VARCHAR(64) NOT NULL DEFAULT '',
    page_size   INT UNSIGNED NOT NULL DEFAULT 0,
    html_path   VARCHAR(500) NOT NULL DEFAULT '',
    captured_at DATETIME     NOT NULL DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_addr_time (address, captured_at),
    INDEX idx_time (captured_at)
) ENGINE=InnoDB;

-- ── Removal Requests ─────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS removal_requests (
    id          BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    type        ENUM('delist','personal','dmca','other') NOT NULL DEFAULT 'delist',
    address     VARCHAR(70)    NOT NULL DEFAULT '',
    email       VARCHAR(255)   NOT NULL DEFAULT '',
    detail      TEXT           NOT NULL,
    ip_hash     CHAR(64)       NOT NULL DEFAULT '',
    status      ENUM('pending','approved','denied','completed') NOT NULL DEFAULT 'pending',
    admin_note  TEXT           DEFAULT NULL,
    created_at  DATETIME       NOT NULL DEFAULT CURRENT_TIMESTAMP,
    resolved_at DATETIME       DEFAULT NULL,
    INDEX idx_status (status),
    INDEX idx_ip (ip_hash),
    INDEX idx_created (created_at)
) ENGINE=InnoDB;
