#!/usr/bin/env python3
"""
Staticize HTML - Remove all interactive elements and JavaScript
Creates a print-ready static version of the RFT Volume 2 document
"""
import sys
import re
from pathlib import Path

try:
    from bs4 import BeautifulSoup
except ImportError:
    print("Please install BeautifulSoup: pip install beautifulsoup4 lxml", file=sys.stderr)
    sys.exit(1)

def staticize_html(src_path, dst_path):
    """Remove all interactive elements from HTML file."""

    # Read source HTML
    src = Path(src_path)
    if not src.exists():
        print(f"Error: Source file not found: {src_path}", file=sys.stderr)
        sys.exit(2)

    html = src.read_text(encoding="utf-8", errors="ignore")
    soup = BeautifulSoup(html, "lxml")

    # Preserve KaTeX assets (CSS is required for server-side rendered math)
    for link in soup.find_all("link", href=True):
        href = link.get("href", "").lower()
        if "katex" in href:
            link["data-keep"] = "1"

    # 1) Remove ALL <script> tags (except structured data)
    print("Removing scripts...")
    script_count = 0
    for script in soup.find_all("script"):
        if script.get("type", "").lower() == "application/ld+json":
            continue  # Keep structured data
        script.decompose()
        script_count += 1
    print(f"  Removed {script_count} script tags")

    # 2) Remove interactive UI elements
    print("Removing interactive elements...")
    interactive_selectors = [
        # Claude P4 additions
        "#skeptic-mode", "#skeptic-panel", "#skeptic-toggle",
        "#artifacts-dash", "#print-scoreboard",
        ".claim-badge", ".floating-top", "#back-to-top",

        # General interactive elements
        "#filter-panel", ".filter-panel", ".filters", ".section-filter",
        ".search", ".search-bar", ".page-search", "#toc-search",
        ".theme-toggle", ".dark-mode-toggle", ".dark-toggle",
        ".copy-button", ".code-copy-button", ".copy-btn",
        ".accordion", ".collapse", ".toggle", ".toc-toggle",
        ".floating-tools", ".nav-controls", ".btn",

        # Progress tracking
        ".reading-progress", ".progress-indicator", ".reading-stats",

        # Section footers with interactivity
        ".section-auditor"  # Keep content but remove interactivity
    ]

    removed_count = 0
    for selector in interactive_selectors:
        for element in soup.select(selector):
            element.decompose()
            removed_count += 1
    print(f"  Removed {removed_count} interactive elements")

    # 3) Force all <details> elements open
    print("Opening all collapsible sections...")
    details_count = 0
    for details in soup.find_all("details"):
        details["open"] = ""
        details_count += 1
    print(f"  Opened {details_count} detail elements")

    # 4) Remove event handlers and data attributes
    print("Removing event handlers...")
    event_pattern = re.compile(r"^on[a-z]+$", re.I)
    handler_count = 0
    for element in soup.find_all(True):
        for attr in list(element.attrs):
            if (event_pattern.match(attr) or
                attr in ("data-action", "data-toggle", "data-target", "data-nb")):
                del element.attrs[attr]
                handler_count += 1
    print(f"  Removed {handler_count} event handlers")

    # 5) Convert notebook download links to simple text
    print("Converting notebook links...")
    nb_count = 0
    for link in soup.find_all("a", class_="nb-dl"):
        # Replace with span showing it's a notebook reference
        span = soup.new_tag("span")
        span.string = f"📦 {link.get('download', 'notebook.zip')}"
        span["style"] = "color: #666; font-style: italic;"
        link.replace_with(span)
        nb_count += 1
    print(f"  Converted {nb_count} notebook links")

    # 6) Add static CSS for clean printing
    print("Adding print-optimized CSS...")
    style = soup.new_tag("style")
    style.string = """
/* Static build - Print optimizations */
@media all {
    /* Remove any interactive affordances */
    .btn, .button, button { display: none !important; }
    input[type="text"], input[type="search"] { display: none !important; }

    /* Clean up details/summary */
    details > summary::-webkit-details-marker { display: none; }
    details > summary::before { display: none; }
    details > summary {
        list-style: none;
        font-weight: 600;
        margin: 0.5rem 0 0.25rem;
        cursor: default;
    }

    /* Ensure all content is visible */
    [hidden] { display: block !important; }
    .collapsed, .hidden { display: block !important; }
}

@media print {
    /* Hide navigation and chrome */
    nav, header, footer, .sidebar, .breadcrumbs { display: none !important; }
    .document-ribbon, .document-actions { display: none !important; }
    #toc, .toc, .standalone-toc { page-break-after: always; }

    /* Layout for print */
    body {
        max-width: 7.5in !important;
        margin: 0.5in auto !important;
        font-size: 11pt !important;
    }
    main, .document-body {
        max-width: 100% !important;
        margin: 0 !important;
    }

    /* Code blocks */
    pre, code {
        white-space: pre-wrap !important;
        word-break: break-word !important;
        font-size: 9pt !important;
    }

    /* Show link URLs */
    a[href^="http"]:not([href*="localhost"]):not([href*="127.0.0.1"])::after {
        content: " (" attr(href) ")";
        font-size: 0.8em;
        color: #666;
    }

    /* Page breaks */
    h1 { page-break-before: always; }
    h2 { page-break-before: auto; page-break-after: avoid; }
    figure, table { page-break-inside: avoid; }

    /* Remove colors for better printing */
    * {
        background: white !important;
        color: black !important;
    }

    /* Keep math readable */
    .MathJax, .MathJax_Display {
        color: black !important;
        font-size: 10pt !important;
    }

    /* Clean tables */
    table {
        border-collapse: collapse !important;
        width: 100% !important;
    }
    th, td {
        border: 1px solid #000 !important;
        padding: 0.3em !important;
    }

    /* Hide provenance footers */
    .prov, .provenance { display: none !important; }

    /* Show that figures are referenced */
    figure::after {
        content: attr(id);
        display: block;
        font-size: 0.8em;
        text-align: right;
        color: #666;
    }
}

/* Additional static-mode styles */
.mini-lecture, .derivation, .faq {
    border-left: 3px solid #ccc;
    padding-left: 1rem;
    margin: 1rem 0;
}

.falsifiers {
    background: #f8f9fa !important;
    border: 1px solid #dee2e6 !important;
    padding: 0.5rem !important;
    margin: 0.5rem 0 !important;
}

/* Keep formulas clear */
.formula, .equation {
    display: block;
    margin: 1rem 0;
    padding: 0.5rem;
    background: #f8f9fa;
    border-left: 3px solid #264fda;
}
"""

    if soup.head:
        soup.head.append(style)

    # 7) Clean up any remaining MathJax configuration
    for element in soup.find_all(string=re.compile(r'window\.MathJax')):
        if element.parent and element.parent.name == 'script':
            element.parent.decompose()

    # 7b) Clean up CSS rules for removed elements
    print("Cleaning CSS rules...")
    for style_tag in soup.find_all("style"):
        if style_tag.string:
            css = style_tag.string
            # Remove rules for interactive elements
            patterns_to_remove = [
                r'\.floating-top[^}]*\{[^}]*\}',
                r'#skeptic-[^}]*\{[^}]*\}',
                r'\.claim-badge[^}]*\{[^}]*\}',
                r'\.dark-toggle[^}]*\{[^}]*\}',
                r'\.copy-btn[^}]*\{[^}]*\}',
                r'\.section-auditor[^}]*\{[^}]*\}',
                r'#filter-panel[^}]*\{[^}]*\}',
                r'\.btn[^}]*\{[^}]*\}'
            ]
            for pattern in patterns_to_remove:
                css = re.sub(pattern, '', css, flags=re.DOTALL)
            style_tag.string = css

    # 8) Remove style attributes that might hide content
    for element in soup.find_all(style=re.compile(r'display:\s*none')):
        del element['style']

    # 9) Write output
    dst = Path(dst_path)
    dst.parent.mkdir(parents=True, exist_ok=True)
    dst.write_text(str(soup), encoding="utf-8")

    print(f"\n✅ Static HTML created: {dst}")
    print(f"   File size: {dst.stat().st_size / 1024 / 1024:.2f} MB")
    return dst

def main():
    if len(sys.argv) < 2:
        # Default paths
        src = "/home/rftuser/omega_document/admin_portal/static/html/standalone/rft-volume2-rc12.html"
        dst = "/home/rftuser/omega_document/admin_portal/static/html/standalone/rft-volume2-rc12.static.html"
    elif len(sys.argv) == 2:
        src = sys.argv[1]
        dst = src.replace('.html', '.static.html')
    else:
        src = sys.argv[1]
        dst = sys.argv[2]

    print(f"Staticizing HTML document...")
    print(f"  Source: {src}")
    print(f"  Destination: {dst}")
    print()

    staticize_html(src, dst)

if __name__ == "__main__":
    main()
