#!/usr/bin/env python3
"""Generate provenance lines for each notebook bundle mentioned in index.qmd."""
from __future__ import annotations

import json
import re
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
RFT_DIR = ROOT / "rft-vol2-arxiv"
INDEX_FILE = RFT_DIR / "index.qmd"
MANIFEST_PATH = RFT_DIR / "rft-rc12-mini" / "notebook_manifest.json"
STAMP_PATH = RFT_DIR / "rft-rc12-mini" / "stamps" / "determinism.txt"
OUTPUT_DIR = RFT_DIR / "results" / "generated" / "provenance"

BUNDLE_FALLBACK = "unknown"


def load_stamp() -> str:
    if not STAMP_PATH.exists():
        return BUNDLE_FALLBACK
    stamp_data = {}
    for line in STAMP_PATH.read_text(encoding="utf-8").splitlines():
        if "=" in line:
            key, value = line.split("=", 1)
            stamp_data[key.strip()] = value.strip()
    return stamp_data.get("bundle_sha", BUNDLE_FALLBACK)


def load_manifest() -> dict[str, dict[str, str]]:
    if not MANIFEST_PATH.exists():
        return {}
    return json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))


def main() -> int:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    for footer in OUTPUT_DIR.glob("*.md"):
        footer.unlink()

    if not INDEX_FILE.exists():
        return 0

    notebook_ids = set()
    pattern = re.compile(r"results/nb/([A-Za-z0-9_\-]+)\.zip")
    for match in pattern.finditer(INDEX_FILE.read_text(encoding="utf-8")):
        notebook_ids.add(match.group(1))

    manifest = load_manifest()
    bundle_sha = load_stamp()
    bundle_short = bundle_sha[:12] if bundle_sha and bundle_sha != BUNDLE_FALLBACK else BUNDLE_FALLBACK

    for nb_id in sorted(notebook_ids):
        sha = manifest.get(nb_id, {}).get("sha256", "missing")
        line = f"_Provenance:_ SHA256 {sha} · build {bundle_short}\n"
        (OUTPUT_DIR / f"{nb_id}.md").write_text(line, encoding="utf-8")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())
