#!/usr/bin/env python3
"""Automate copying RC11 assets into the admin portal static directory."""

from __future__ import annotations

import argparse
import csv
import re
import shutil
from pathlib import Path
from typing import Dict


ROOT = Path(__file__).resolve().parents[1]
RC11_DIR = ROOT / "omega_document" / "vol1" / "dist" / "arxiv_rc11"
PORTAL_STATIC = ROOT / "omega_document" / "admin_portal" / "static"
HTML_DEST = PORTAL_STATIC / "html" / "volume1_rc11_body.html"
FIG_DEST = PORTAL_STATIC / "vol1_rc11_figures"
PORTAL_APP = ROOT / "omega_document" / "admin_portal" / "enhanced_app.py"
MANIFEST = RC11_DIR / "figures" / "manifest.csv"


def extract_body(html_text: str) -> str:
    match = re.search(r"<body[^>]*>(.*)</body>", html_text, re.S | re.I)
    if not match:
        raise ValueError("Unable to locate <body> in HTML input")
    return match.group(1).strip()


def load_manifest() -> Dict[str, Dict[str, str]]:
    mapping: Dict[str, Dict[str, str]] = {}
    if not MANIFEST.exists():
        return mapping
    with MANIFEST.open(encoding="utf-8") as handle:
        reader = csv.DictReader(handle)
        for row in reader:
            label = row["label"].strip()
            expected = Path(row["expected_filename"].strip()).with_suffix(".png").name
            caption = row["caption"].strip()
            mapping[label] = {"filename": expected, "caption": caption}
    return mapping


def rewrite_html(body_text: str, fig_map: Dict[str, Dict[str, str]]) -> str:
    for label, meta in fig_map.items():
        pattern = f'<figure id="{label}">'  # Pandoc default
        replacement = (
            f'<figure id="{label}" class="rft-figure">\n'
            f'<img src="/static/vol1_rc11_figures/{meta["filename"]}" '
            f'alt="{meta["caption"]}" loading="lazy" />'
        )
        body_text = body_text.replace(pattern, replacement)

    todo_regex = re.compile(r'<p><span style="color: red"><strong>TODO:</strong>\s*(.*?)</span></p>', re.S)
    body_text = todo_regex.sub(r'<div class="todo-note">TODO: \1</div>', body_text)
    return body_text


def copy_figures(src: Path) -> None:
    if src.resolve() == FIG_DEST.resolve():
        FIG_DEST.mkdir(parents=True, exist_ok=True)
        return
    if not src.exists():
        raise FileNotFoundError(f"Figure source directory missing: {src}")
    if FIG_DEST.exists():
        shutil.rmtree(FIG_DEST)
    shutil.copytree(src, FIG_DEST)


def ensure_portal_config() -> None:
    if not PORTAL_APP.exists():
        raise FileNotFoundError(f"Portal app not found: {PORTAL_APP}")
    text = PORTAL_APP.read_text(encoding="utf-8")
    block_pattern = re.compile(r'"rft_volume1_rc11"\s*:\s*\{[^}]*\}', re.S)
    match = block_pattern.search(text)
    default_block = (
        '    "rft_volume1_rc11": {\n'
        '        "title": "RFT Volume 1: Unified Field Dynamics (RC11 Preprint)",\n'
        '        "description": "v1.0-rc11 accessibility build with executive summary and preregistered gate overview.",\n'
        '        "pdf": "papers/RFT_Vol1_v1.0_RC11_PREPRINT.pdf",\n'
        '        "html": "html/volume1_rc11_body.html",\n'
        '        "downloads": [\n'
        '            {"label": "RC11 preprint package (ZIP)", "path": "papers/Volume1_rc11_arxiv_package.zip"}\n'
        '        ],\n'
        '        "tags": ["Volume1", "RC11", "Preprint"]\n'
        '    },\n'
    )

    if not match:
        marker = '    "rft_volume1_final"'
        idx = text.find(marker)
        if idx == -1:
            text = text.rstrip() + "\n" + default_block
        else:
            text = text[:idx] + default_block + text[idx:]
    else:
        block = match.group(0)
        if '"html"' not in block:
            new_block = re.sub(
                r'("pdf"[^\n]*\n)',
                r'\1        "html": "html/volume1_rc11_body.html",\n',
                block,
                count=1,
            )
            text = text[:match.start()] + new_block + text[match.end():]
        elif 'html/volume1_rc11_body.html' not in block:
            new_block = re.sub(
                r'"html"\s*:\s*"[^"]+"',
                '"html": "html/volume1_rc11_body.html"',
                block,
            )
            text = text[:match.start()] + new_block + text[match.end():]

    PORTAL_APP.write_text(text, encoding="utf-8")


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--html-source",
        default=RC11_DIR / "rc11.html",
        help="Full HTML file to transform (defaults to pandoc output rc11.html)",
    )
    parser.add_argument(
        "--figure-source",
        default=RC11_DIR / "web_figures",
        help="Directory containing prepared web figures (default rc11/web_figures)",
    )
    args = parser.parse_args()

    html_source = Path(args.html_source)
    if not html_source.exists():
        raise FileNotFoundError(f"HTML source missing: {html_source}")

    fig_map = load_manifest()
    body = extract_body(html_source.read_text(encoding="utf-8"))
    transformed = rewrite_html(body, fig_map)
    HTML_DEST.parent.mkdir(parents=True, exist_ok=True)
    HTML_DEST.write_text(transformed + "\n", encoding="utf-8")

    copy_figures(Path(args.figure_source))
    ensure_portal_config()
    print(f"HTML deployed to {HTML_DEST}")
    print(f"Figures synced to {FIG_DEST}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
