#!/usr/bin/env python3
"""RC11 validation pipeline: LaTeX build, figure diffs, HTML link checks."""

from __future__ import annotations

import argparse
import csv
import json
import os
import shutil
import subprocess
import sys
from dataclasses import dataclass, asdict
from datetime import datetime, UTC
from html.parser import HTMLParser
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple

try:
    from PIL import Image, ImageChops, ImageOps, ImageStat
    RESAMPLE = getattr(Image, "Resampling", Image).LANCZOS  # type: ignore[attr-defined]
except Exception:  # pragma: no cover - pillow may be absent in some envs
    Image = None  # type: ignore


ROOT = Path(__file__).resolve().parents[1]
RC11_DIR = ROOT / "omega_document" / "vol1" / "dist" / "arxiv_rc11"
RC11_HTML = ROOT / "omega_document" / "admin_portal" / "static" / "html" / "volume1_rc11_body.html"
RC11_FIG_DIR = ROOT / "omega_document" / "admin_portal" / "static" / "vol1_rc11_figures"
RC11_PDF = RC11_DIR / "RFT_Vol1_v1.0_RC11_PREPRINT.pdf"
RC11_MANIFEST = RC11_DIR / "figures" / "manifest.csv"
BASELINE_FIG_DIR = ROOT / "rft-vol1-arxiv" / "Volume1" / "figures"
VALIDATION_JSON = RC11_DIR / "validation_report.json"
VALIDATION_MD = RC11_DIR / "validation_report.md"


@dataclass
class StepResult:
    name: str
    status: str
    details: Dict[str, object]


def run_subprocess(cmd: List[str], cwd: Path) -> Tuple[int, str, str]:
    """Run a subprocess and capture output."""

    proc = subprocess.run(
        cmd,
        cwd=str(cwd),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        check=False,
        text=True,
    )
    return proc.returncode, proc.stdout, proc.stderr


def build_latex() -> StepResult:
    latex_tools: List[Tuple[str, List[str]]] = [
        ("latexmk", ["latexmk", "-pdf", "-interaction=nonstopmode", "main.tex"]),
        ("pdflatex", ["pdflatex", "-interaction=nonstopmode", "main.tex"]),
        ("tectonic", ["tectonic", "main.tex"]),
    ]

    if not RC11_DIR.exists():
        return StepResult(
            name="latex",
            status="error",
            details={"message": f"RC11 LaTeX directory missing: {RC11_DIR}"},
        )

    tool_used: Optional[str] = None
    stdout = ""
    stderr = ""
    exit_code = -1

    for tool, cmd in latex_tools:
        if shutil.which(cmd[0]):
            tool_used = tool
            exit_code, stdout, stderr = run_subprocess(cmd, RC11_DIR)
            break

    if tool_used is None:
        return StepResult(
            name="latex",
            status="skipped",
            details={
                "message": "No LaTeX engine found (looked for latexmk, pdflatex, tectonic).",
            },
        )

    status = "passed" if exit_code == 0 else "failed"
    log_path = RC11_DIR / "build.log"
    log_path.write_text(stdout + "\n" + stderr, encoding="utf-8")
    details = {
        "tool": tool_used,
        "exit_code": exit_code,
        "log": str(log_path.relative_to(ROOT)),
    }
    if exit_code != 0:
        details["message"] = "LaTeX build exited with non-zero status."
    return StepResult(name="latex", status=status, details=details)


def load_manifest() -> List[Dict[str, str]]:
    if not RC11_MANIFEST.exists():
        return []
    with RC11_MANIFEST.open(encoding="utf-8") as handle:
        reader = csv.DictReader(handle)
        return [row for row in reader]


def canonicalise(name: str) -> str:
    base = Path(name).stem
    return base.replace("_", "").replace("-", "").lower()


def diff_figures() -> StepResult:
    if Image is None:
        return StepResult(
            name="figures",
            status="skipped",
            details={"message": "Pillow not available; install pillow to enable image diffs."},
        )

    if not RC11_FIG_DIR.exists():
        return StepResult(
            name="figures",
            status="error",
            details={"message": f"RC11 figure directory missing: {RC11_FIG_DIR}"},
        )

    manifest_rows = load_manifest()
    if not manifest_rows:
        return StepResult(
            name="figures",
            status="skipped",
            details={"message": "No manifest rows found to compare."},
        )

    comparisons: List[Dict[str, object]] = []
    missing_new, missing_baseline, unreadable_baseline, vector_only = 0, 0, 0, 0

    for row in manifest_rows:
        label = row.get("label", "").strip()
        expected = row.get("expected_filename", "").strip()
        if not label or not expected:
            continue
        new_base = Path(expected).with_suffix(".png").name
        new_path = RC11_FIG_DIR / new_base
        if not new_path.exists():
            missing_new += 1
            comparisons.append(
                {
                    "label": label,
                    "status": "missing_new",
                    "new_path": str(new_path.relative_to(ROOT)),
                }
            )
            continue

        baseline_path: Optional[Path] = None
        if BASELINE_FIG_DIR.exists():
            target_key = canonicalise(new_base)
            for candidate in BASELINE_FIG_DIR.iterdir():
                if canonicalise(candidate.name) == target_key:
                    baseline_path = candidate
                    break

        if baseline_path is None:
            missing_baseline += 1
            comparisons.append(
                {
                    "label": label,
                    "status": "missing_baseline",
                    "new_path": str(new_path.relative_to(ROOT)),
                }
            )
            continue

        if baseline_path.suffix.lower() not in {".png", ".jpg", ".jpeg"}:
            vector_only += 1
            comparisons.append(
                {
                    "label": label,
                    "status": "vector_only",
                    "new_path": str(new_path.relative_to(ROOT)),
                    "baseline": str(baseline_path.relative_to(ROOT)),
                }
            )
            continue

        try:
            with Image.open(new_path) as new_img, Image.open(baseline_path) as base_img:
                base_converted = base_img.convert("RGB")
                new_converted = new_img.convert("RGB")
                if base_converted.size != new_converted.size:
                    base_converted = base_converted.resize(new_converted.size, RESAMPLE)
                diff = ImageChops.difference(new_converted, base_converted)
                stat = ImageStat.Stat(diff)
                mean_delta = sum(stat.mean) / len(stat.mean)
                rms_delta = sum(stat.rms) / len(stat.rms)
                extrema = diff.getextrema()
                max_delta = max(channel_max for (_min, channel_max) in extrema)
        except Exception as exc:  # pragma: no cover - surfaces in data issues
            unreadable_baseline += 1
            comparisons.append(
                {
                    "label": label,
                    "status": "unreadable_baseline",
                    "new_path": str(new_path.relative_to(ROOT)),
                    "baseline": str(baseline_path.relative_to(ROOT)),
                    "error": str(exc),
                }
            )
            continue

        comparisons.append(
            {
                "label": label,
                "status": "compared",
                "new_path": str(new_path.relative_to(ROOT)),
                "baseline": str(baseline_path.relative_to(ROOT)),
                "mean_delta": round(mean_delta, 3),
                "rms_delta": round(rms_delta, 3),
                "max_delta": max_delta,
            }
        )

    status = "passed"
    if missing_new or missing_baseline or unreadable_baseline:
        status = "failed"
    elif vector_only:
        status = "partial"

    details = {
        "figures_checked": len(comparisons),
        "missing_new": missing_new,
        "missing_baseline": missing_baseline,
        "unreadable_baseline": unreadable_baseline,
        "vector_only": vector_only,
        "results": comparisons,
    }
    return StepResult(name="figures", status=status, details=details)


class BodyParser(HTMLParser):
    def __init__(self) -> None:
        super().__init__()
        self.ids: set[str] = set()
        self.links: List[str] = []
        self.images: List[str] = []

    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
        attr_map = {k: v for k, v in attrs}
        element_id = attr_map.get("id")
        if element_id:
            self.ids.add(element_id)
        if tag == "a" and "href" in attr_map:
            href = attr_map["href"]
            if href:
                self.links.append(href)
        if tag == "img" and "src" in attr_map:
            src = attr_map["src"]
            if src:
                self.images.append(src)


def link_checks() -> StepResult:
    if not RC11_HTML.exists():
        return StepResult(
            name="html",
            status="error",
            details={"message": f"RC11 HTML body missing: {RC11_HTML}"},
        )

    parser = BodyParser()
    parser.feed(RC11_HTML.read_text(encoding="utf-8"))

    issues: List[str] = []

    static_root = ROOT / "omega_document" / "admin_portal" / "static"

    for href in parser.links:
        if href.startswith("#"):
            anchor = href[1:]
            if anchor and anchor not in parser.ids:
                issues.append(f"Broken internal anchor: {href}")
        elif href.startswith("mailto:"):
            continue
        elif href.startswith("http://") or href.startswith("https://"):
            issues.append(f"External link not checked: {href}")
        else:
            if href.startswith("/static/"):
                target = static_root / href[len("/static/") :]
            else:
                target = (RC11_HTML.parent / href).resolve()
            if not target.exists():
                issues.append(f"Missing local link target: {href}")

    for src in parser.images:
        if src.startswith("http://") or src.startswith("https://"):
            issues.append(f"External image reference: {src}")
            continue
        if src.startswith("/static/"):
            target = static_root / src[len("/static/") :]
        else:
            target = (RC11_HTML.parent / src).resolve()
        if not target.exists():
            issues.append(f"Missing image asset: {src}")

    status = "passed" if not issues else "partial"
    details = {
        "links_found": len(parser.links),
        "images_found": len(parser.images),
        "unique_ids": len(parser.ids),
        "issues": issues,
    }
    return StepResult(name="html", status=status, details=details)


def generate_reports(results: Iterable[StepResult]) -> None:
    payload = {
        "generated_at": datetime.now(UTC).isoformat(),
        "steps": [asdict(step) for step in results],
    }
    VALIDATION_JSON.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")

    lines = ["# RC11 Validation Report", ""]
    lines.append(f"Generated: {payload['generated_at']}")
    lines.append("")
    for step in results:
        lines.append(f"## {step.name.capitalize()} — {step.status}")
        for key, value in step.details.items():
            pretty = json.dumps(value, indent=2) if isinstance(value, (dict, list)) else value
            lines.append(f"- {key}: {pretty}")
        lines.append("")
    VALIDATION_MD.write_text("\n".join(lines), encoding="utf-8")


def main(argv: Optional[List[str]] = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--skip-latex",
        action="store_true",
        help="Skip LaTeX compilation step",
    )
    parser.add_argument(
        "--skip-figures",
        action="store_true",
        help="Skip figure comparison step",
    )
    parser.add_argument(
        "--skip-html",
        action="store_true",
        help="Skip HTML link validation step",
    )
    args = parser.parse_args(argv)

    results: List[StepResult] = []
    if not args.skip_latex:
        results.append(build_latex())
    if not args.skip_figures:
        results.append(diff_figures())
    if not args.skip_html:
        results.append(link_checks())

    generate_reports(results)

    status_map = {step.status for step in results}
    if "failed" in status_map or "error" in status_map:
        return 2
    return 0 if "passed" in status_map else 1


if __name__ == "__main__":
    sys.exit(main())
