#!/usr/bin/env node

const fs = require('fs');
const path = require('path');
const cheerio = require('cheerio');
const yaml = require('js-yaml');

class SearchIndexBuilder {
    constructor() {
        this.index = [];
        this.siteUrl = 'http://localhost:5000';
        this.templatesDir = path.join(__dirname, '..', 'app', 'templates');
        this.outputPath = path.join(__dirname, '..', 'app', 'static', 'search', 'index.json');
    }

    // Extract text content from MathJax/LaTeX
    extractMathText(mathElement) {
        let latex = mathElement.text().trim();
        // Remove common LaTeX commands for searchability
        let plainText = latex
            .replace(/\\\w+{([^}]+)}/g, '$1')  // \command{text} -> text
            .replace(/\^{([^}]+)}/g, '^$1')     // Superscripts
            .replace(/_{([^}]+)}/g, '_$1')      // Subscripts
            .replace(/\\[a-zA-Z]+/g, ' ')       // Remove backslash commands
            .replace(/[{}]/g, '')               // Remove braces
            .replace(/\s+/g, ' ')               // Normalize whitespace
            .trim();
        
        return {
            latex: latex,
            plainText: plainText
        };
    }

    // Parse HTML template and extract searchable content
    parseTemplate(filePath, route) {
        const content = fs.readFileSync(filePath, 'utf8');
        const $ = cheerio.load(content);
        const fileName = path.basename(filePath, '.html');
        
        // Extract title
        let title = $('h1').first().text().trim() || fileName;
        
        // Extract headings
        $('h1, h2, h3, h4').each((i, elem) => {
            const $elem = $(elem);
            const text = $elem.text().trim();
            const id = $elem.attr('id') || '';
            
            if (text) {
                this.index.push({
                    url: id ? `${route}#${id}` : route,
                    title: text,
                    type: 'heading',
                    text: text,
                    context: $elem.next('p').text().slice(0, 120) || ''
                });
            }
        });

        // Extract math equations
        // Look for various math delimiters
        $('.math-equation, .MathJax, .MathJax_Display').each((i, elem) => {
            const $elem = $(elem);
            const math = this.extractMathText($elem);
            const id = $elem.closest('[id]').attr('id') || '';
            
            if (math.latex) {
                this.index.push({
                    url: id ? `${route}#${id}` : route,
                    title: `Equation: ${math.plainText.slice(0, 50)}...`,
                    type: 'equation',
                    text: math.plainText,
                    latex: math.latex,
                    context: $elem.parent().text().slice(0, 120) || ''
                });
            }
        });

        // Also look for inline math
        $('body').html().replace(/\$\$([^$]+)\$\$/g, (match, latex) => {
            const math = this.extractMathText({ text: () => latex });
            this.index.push({
                url: route,
                title: `Equation: ${math.plainText.slice(0, 50)}...`,
                type: 'equation',
                text: math.plainText,
                latex: latex,
                context: ''
            });
        });

        // Extract code snippets
        $('code').each((i, elem) => {
            const $elem = $(elem);
            const code = $elem.text().trim();
            const id = $elem.closest('[id]').attr('id') || '';
            
            // Skip very short snippets
            if (code.length > 20) {
                this.index.push({
                    url: id ? `${route}#${id}` : route,
                    title: `Code: ${code.slice(0, 50)}...`,
                    type: 'code',
                    text: code,
                    context: $elem.parent().text().slice(0, 120) || ''
                });
            }
        });
    }

    // Load parameters from YAML
    loadParameters() {
        const yamlPath = path.join(__dirname, '..', 'data', 'parameters.yaml');
        
        try {
            const fileContents = fs.readFileSync(yamlPath, 'utf8');
            const params = yaml.load(fileContents);
            
            // Recursively process parameters
            const processParams = (obj, prefix = '') => {
                for (const [key, value] of Object.entries(obj)) {
                    if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
                        processParams(value, `${prefix}${key}.`);
                    } else {
                        const paramName = prefix + key;
                        const displayName = paramName.replace(/_/g, ' ').replace(/\./g, ' → ');
                        
                        this.index.push({
                            url: '/math-reference#parameter-ledger',
                            title: `Parameter: ${displayName}`,
                            type: 'parameter',
                            text: `param:${paramName} = ${value}`,
                            value: value,
                            context: `RFT canonical parameter: ${displayName}`
                        });
                    }
                }
            };
            
            processParams(params);
        } catch (error) {
            console.error('Error loading parameters:', error);
        }
    }

    // Route mapping
    getRoutes() {
        return [
            { file: 'home.html', route: '/' },
            { file: 'theory.html', route: '/theory' },
            { file: 'evidence.html', route: '/evidence' },
            { file: 'papers.html', route: '/papers' },
            { file: 'papers_15x.html', route: '/papers/15x' },
            { file: 'quantum.html', route: '/quantum' },
            { file: 'math_reference.html', route: '/math-reference' },
            { file: 'testable_predictions.html', route: '/testable-predictions' },
            { file: 'simulators.html', route: '/simulators' },
            { file: 'about.html', route: '/about' },
            { file: 'faqs.html', route: '/faqs' },
            { file: 'glossary.html', route: '/glossary' },
            { file: 'introduction.html', route: '/introduction' },
            { file: 'quick_start.html', route: '/quick-start' },
            { file: 'advanced_quick_start.html', route: '/advanced-quick-start' },
            { file: 'cosmology/cmb_explorer.html', route: '/cosmology/cmb-explorer' },
            { file: 'vacuum_energy_paper.html', route: '/vacuum_energy' },
            { file: 'papers/sm_derivations.html', route: '/sm_derivations' },
            { file: 'arrow_of_time.html', route: '/arrow-of-time' },
            { file: 'twistor_bundle_demo.html', route: '/twistor-bundle-demo' },
            { file: 'scalaron_screening.html', route: '/scalaron-screening' },
            { file: 'structure_timeline.html', route: '/structure-timeline' },
            { file: 'predictions_dashboard.html', route: '/predictions-dashboard' },
            { file: 'equation_explorer.html', route: '/equation-explorer' },
            { file: 'data_watch.html', route: '/data-watch' },
            { file: 'blog.html', route: '/blog' }
        ];
    }

    // Build the complete search index
    build() {
        console.log('Building search index...');
        
        // Process each route
        const routes = this.getRoutes();
        let processedCount = 0;
        
        routes.forEach(({ file, route }) => {
            const filePath = path.join(this.templatesDir, file);
            if (fs.existsSync(filePath)) {
                this.parseTemplate(filePath, route);
                processedCount++;
            } else {
                console.warn(`Template not found: ${file}`);
            }
        });
        
        console.log(`Processed ${processedCount} templates`);
        
        // Load parameters
        this.loadParameters();
        
        // Ensure output directory exists
        const outputDir = path.dirname(this.outputPath);
        if (!fs.existsSync(outputDir)) {
            fs.mkdirSync(outputDir, { recursive: true });
        }
        
        // Save index
        fs.writeFileSync(this.outputPath, JSON.stringify(this.index, null, 2));
        console.log(`Search index saved: ${this.outputPath}`);
        console.log(`Total entries: ${this.index.length}`);
        
        // Stats
        const stats = {
            headings: this.index.filter(i => i.type === 'heading').length,
            equations: this.index.filter(i => i.type === 'equation').length,
            parameters: this.index.filter(i => i.type === 'parameter').length,
            code: this.index.filter(i => i.type === 'code').length
        };
        
        console.log('Index statistics:', stats);
    }
}

// Run if called directly
if (require.main === module) {
    // Check if we have the required dependencies
    try {
        require('cheerio');
        require('js-yaml');
    } catch (e) {
        console.error('Missing dependencies. Please run:');
        console.error('npm install cheerio js-yaml');
        process.exit(1);
    }
    
    const builder = new SearchIndexBuilder();
    builder.build();
}

module.exports = SearchIndexBuilder;