#!/usr/bin/env python3
"""
Tone Softening Script
Replaces overly confident claims with more measured language in AI-generated papers
"""

import re
import os
import sys
import shutil
from datetime import datetime

# Replacement patterns: (pattern, replacement)
SOFTENING_REPLACEMENTS = [
    # Overly confident verbs
    (r'\bproves that\b', 'suggests that'),
    (r'\bprove that\b', 'suggest that'),
    (r'\bproven\b', 'indicated'),
    (r'\bconclusive(?:ly)?\b', 'suggestive'),
    (r'\bdefinitively\b', 'potentially'),
    (r'\bunequivocally\b', 'strongly'),
    (r'\bindisputably\b', 'arguably'),
    (r'\birrefutably\b', 'persuasively'),
    
    # Absolute claims
    (r'\bexact match\b', 'close agreement'),
    (r'\bexactly matches\b', 'closely matches'),
    (r'\bperfect(?:ly)? agreement\b', 'excellent agreement'),
    (r'\bcompletely solves\b', 'addresses'),
    (r'\bfully explains\b', 'helps explain'),
    (r'\beliminate[sd]? the need\b', 'reduce the need'),
    
    # Certainty modifiers
    (r'\bcertainly\b', 'likely'),
    (r'\bundoubtedly\b', 'probably'),
    (r'\bobviously\b', 'apparently'),
    (r'\bclearly demonstrates\b', 'indicates'),
    
    # Universal claims
    (r'\ball anomalies\b', 'many anomalies'),
    (r'\bevery observation\b', 'most observations'),
    (r'\balways\b', 'typically'),
    (r'\bnever\b', 'rarely'),
    
    # Precision claims
    (r'to arbitrary precision', 'to high precision'),
    (r'with infinite accuracy', 'with high accuracy'),
    (r'exactly zero', 'negligibly small'),
    (r'precisely equal', 'approximately equal'),
    
    # Discovery claims
    (r'\bwe have discovered\b', 'we propose'),
    (r'\bwe discovered\b', 'we found'),
    (r'\bbreakthrough\b', 'advancement'),
    (r'\brevolutionary\b', 'novel'),
    (r'\bparadigm[- ]shifting\b', 'significant'),
]

# Patterns to add hedging phrases
HEDGE_INSERTIONS = [
    # Before strong claims
    (r'(\bThis )(shows|demonstrates|proves)', r'\1appears to \2'),
    (r'(\bThe results )(confirm|validate|verify)', r'\1tend to \2'),
    (r'(\bWe )(show|demonstrate) that', r'\1argue that'),
    
    # Quantitative claims
    (r'is (\d+\.?\d*%)', r'is approximately \1'),
    (r'equals (\d+\.?\d*)', r'is approximately \1'),
    (r'at exactly', 'at approximately'),
]

def apply_replacements(content, replacements):
    """Apply all replacement patterns to content"""
    modified_content = content
    changes_made = 0
    
    for pattern, replacement in replacements:
        new_content, count = re.subn(pattern, replacement, modified_content, flags=re.IGNORECASE)
        if count > 0:
            changes_made += count
            modified_content = new_content
    
    return modified_content, changes_made

def soften_file(filepath, dry_run=False):
    """Soften claims in a single file"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            original_content = f.read()
    except Exception as e:
        return False, 0, str(e)
    
    # Apply softening replacements
    content, changes = apply_replacements(original_content, SOFTENING_REPLACEMENTS)
    
    # Apply hedge insertions
    content, hedge_changes = apply_replacements(content, HEDGE_INSERTIONS)
    total_changes = changes + hedge_changes
    
    if total_changes > 0 and not dry_run:
        # Create backup
        backup_path = filepath + f'.backup.{datetime.now().strftime("%Y%m%d_%H%M%S")}'
        shutil.copy2(filepath, backup_path)
        
        # Write modified content
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(content)
        except Exception as e:
            # Restore from backup on error
            shutil.copy2(backup_path, filepath)
            os.remove(backup_path)
            return False, 0, str(e)
    
    return True, total_changes, None

def check_for_remaining_strong_claims(directory):
    """Check if any strong claims remain after softening"""
    strong_patterns = [
        r'\bproves that\b',
        r'\bexact match\b',
        r'\bconclusive\b',
        r'\bdefinitively\b',
        r'\bbreakthrough\b',
    ]
    
    remaining_issues = []
    
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(('.html', '.md', '.txt')) and not file.endswith('.backup'):
                filepath = os.path.join(root, file)
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()
                    
                    for pattern in strong_patterns:
                        matches = list(re.finditer(pattern, content, re.IGNORECASE))
                        if matches:
                            remaining_issues.append({
                                'file': filepath,
                                'pattern': pattern,
                                'count': len(matches)
                            })
                except:
                    pass
    
    return remaining_issues

def main():
    """Soften claims in all paper files"""
    import argparse
    
    parser = argparse.ArgumentParser(description='Soften overly confident claims in papers')
    parser.add_argument('--dry-run', action='store_true', help='Show what would be changed without modifying files')
    parser.add_argument('--check-only', action='store_true', help='Only check for strong claims without modifying')
    parser.add_argument('--directory', default='app', help='Directory to process (default: app)')
    args = parser.parse_args()
    
    paper_dirs = [
        os.path.join(args.directory, 'templates/papers/'),
        os.path.join(args.directory, 'static/papers/'),
        'docs/papers/'
    ]
    
    if args.check_only:
        print("Checking for strong claims...")
        remaining = check_for_remaining_strong_claims(args.directory)
        if remaining:
            print(f"\nFound {len(remaining)} files with strong claims:")
            for issue in remaining:
                print(f"  {issue['file']}: {issue['pattern']} ({issue['count']} occurrences)")
            return 1
        else:
            print("✓ No strong claims found!")
            return 0
    
    print(f"Tone Softening {'(DRY RUN)' if args.dry_run else ''}")
    print("=" * 60)
    
    total_files = 0
    total_changes = 0
    errors = []
    
    for paper_dir in paper_dirs:
        if os.path.exists(paper_dir):
            for root, dirs, files in os.walk(paper_dir):
                for file in files:
                    if file.endswith(('.html', '.md', '.txt')) and not file.endswith('.backup'):
                        filepath = os.path.join(root, file)
                        success, changes, error = soften_file(filepath, args.dry_run)
                        
                        if success:
                            total_files += 1
                            total_changes += changes
                            if changes > 0:
                                print(f"✓ {filepath}: {changes} changes")
                        else:
                            errors.append((filepath, error))
    
    print(f"\nSummary:")
    print(f"  Files processed: {total_files}")
    print(f"  Total changes: {total_changes}")
    
    if errors:
        print(f"\nErrors ({len(errors)}):")
        for filepath, error in errors:
            print(f"  {filepath}: {error}")
    
    if not args.dry_run and total_changes > 0:
        print(f"\n✓ Changes applied! Backup files created with .backup.* extension")
        
        # Check for remaining strong claims
        print("\nChecking for remaining strong claims...")
        remaining = check_for_remaining_strong_claims(args.directory)
        if remaining:
            print(f"Warning: {len(remaining)} files still contain strong claims")
            return 1
        else:
            print("✓ All strong claims have been softened!")
    
    return 0

if __name__ == "__main__":
    sys.exit(main())