import { NextRequest, NextResponse } from 'next/server';
import axios from 'axios';
import TurndownService from 'turndown';

export const dynamic = 'force-dynamic';

export async function POST(req: NextRequest) {
    try {
        const { url } = await req.json();

        if (!url) {
            return NextResponse.json({ error: 'URL is required' }, { status: 400 });
        }

        // 1. Fetch HTML
        const response = await axios.get(url, {
            headers: {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
            },
            timeout: 10000
        });

        const html = response.data;

        // 2. Initial Cleanup with Turndown
        const turndownService = new TurndownService({
            headingStyle: 'atx',
            codeBlockStyle: 'fenced'
        });
        
        const cleanHtml = html
            .replace(/<script\b[^>]*>([\s\S]*?)<\/script>/gmi, "")
            .replace(/<style\b[^>]*>([\s\S]*?)<\/style>/gmi, "")
            .replace(/<nav\b[^>]*>([\s\S]*?)<\/nav>/gmi, "")
            .replace(/<footer\b[^>]*>([\s\S]*?)<\/footer>/gmi, "")
            .replace(/<header\b[^>]*>([\s\S]*?)<\/header>/gmi, "")
            .replace(/<aside\b[^>]*>([\s\S]*?)<\/aside>/gmi, "");

        const rawMarkdown = turndownService.turndown(cleanHtml);

        return NextResponse.json({ 
            rawMarkdown: rawMarkdown.slice(0, 30000) // Truncate for AI context efficiency
        });

    } catch (error: any) {
        console.error('[WebsiteWiki Scraper] Error:', error.message);
        return NextResponse.json({ error: error.message || 'Failed to scrape content' }, { status: 500 });
    }
}
