const puppeteer = require('puppeteer');
const fs = require('fs').promises;
const PDFDocument = require('pdfkit');
class StriverSDEScraper {
    constructor() {
        this.baseUrl = 'https://takeuforward.org';
        this.sheetUrl = 'https://takeuforward.org/interviews/strivers-sde-sheet-
top-coding-interview-problems';
        this.questionsData = [];
        this.browser = null;
        this.page = null;
    }
    async init() {
        console.log('🚀 Initializing browser...');
        this.browser = await puppeteer.launch({
            headless: false, // Set to true for production
            args: ['--no-sandbox', '--disable-setuid-sandbox']
        });
        this.page = await this.browser.newPage();
        // Set user agent
        await this.page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
    }
    async getQuestionLinks() {
        console.log('🔍 Fetching SDE sheet page...');
        try {
            await this.page.goto(this.sheetUrl, { waitUntil: 'networkidle2' });
            // Extract question links
            const questionLinks = await this.page.evaluate(() => {
                const links = [];
                const anchors =
document.querySelectorAll('a[href*="takeuforward.org"]');
                anchors.forEach(anchor => {
                    const href = anchor.href;
                    const text = anchor.textContent.trim();
                      // Filter for actual question links
                      if (href && text &&
                          (href.includes('data-structure') ||
                           href.includes('algorithm') ||
                           href.includes('problem') ||
                           href.includes('solution'))) {
                          links.push({
                              title: text,
                              url: href
                          });
                      }
                });
                // Remove duplicates
                const uniqueLinks = [];
                const seen = new Set();
                  links.forEach(link => {
                      if (!seen.has(link.url)) {
                          seen.add(link.url);
                          uniqueLinks.push(link);
                      }
                  });
                  return uniqueLinks;
            });
            console.log(`✅ Found ${questionLinks.length} question links`);
            return questionLinks;
        } catch (error) {
            console.error('❌ Error fetching question links:', error);
            return [];
        }
    }
    async scrapeQuestionContent(questionUrl, title) {
        console.log(`📖 Scraping: ${title}`);
        try {
            await this.page.goto(questionUrl, { waitUntil: 'networkidle2' });
            const questionData = await this.page.evaluate(() => {
                const data = {
                    title: document.title || 'Unknown',
                    description: '',
                    approaches: [],
                    timeComplexity: '',
                    spaceComplexity: ''
                };
                // Extract description
                const contentDiv = document.querySelector('.entry-content, .post-
content, .content');
                if (contentDiv) {
                     const paragraphs = contentDiv.querySelectorAll('p');
                     const descParagraphs = Array.from(paragraphs)
                         .slice(0, 3)
                         .map(p => p.textContent.trim())
                         .filter(text => text.length > 20);
                     data.description = descParagraphs.join('\n');
                }
                // Extract code blocks
                const codeBlocks = document.querySelectorAll('pre,
code, .highlight');
                const approaches = [];
                  codeBlocks.forEach((block, index) => {
                      const codeText = block.textContent.trim();
                      if (codeText.length > 50) {
                          let approachName = 'Solution';
                          if (index === 0) {
                              approachName = 'Brute Force';
                          } else if (codeText.toLowerCase().includes('optimal') ||
index === codeBlocks.length - 1) {
                            approachName = 'Optimal Solution';
                        } else if (index > 0) {
                            approachName = `Approach ${index + 1}`;
                        }
                          approaches.push({
                              name: approachName,
                              code: codeText,
                              timeComplexity: extractComplexity(codeText, 'time'),
                              spaceComplexity: extractComplexity(codeText, 'space')
                          });
                      }
                });
                data.approaches = approaches;
                // Extract complexity from text
                const fullText = document.body.textContent.toLowerCase();
                data.timeComplexity = extractComplexityFromText(fullText, 'time');
                data.spaceComplexity = extractComplexityFromText(fullText,
'space');
                // Helper functions
                function extractComplexity(text, type) {
                    const patterns = [
                        /O\([^)]+\)/gi,
                        /time[:\s]*O\([^)]+\)/gi,
                        /space[:\s]*O\([^)]+\)/gi
                    ];
                      for (const pattern of patterns) {
                          const matches = text.match(pattern);
                          if (matches) {
                              return matches[0];
                          }
                      }
                      return 'Not specified';
                }
                function extractComplexityFromText(text, type) {
                    let patterns;
                    if (type === 'time') {
                        patterns = [/time complexity[:\s]*O\([^)]+\)/gi, /time[:\
s]*O\([^)]+\)/gi];
                    } else {
                        patterns = [/space complexity[:\s]*O\([^)]+\)/gi, /space[:\
s]*O\([^)]+\)/gi];
                    }
                      for (const pattern of patterns) {
                          const match = text.match(pattern);
                          if (match) {
                              const complexityMatch = match[0].match(/O\([^)]+\)/);
                              if (complexityMatch) {
                                  return complexityMatch[0];
                              }
                          }
                      }
                      return 'Not specified';
                  }
                  return data;
            });
            questionData.title = title;
            questionData.url = questionUrl;
            // Wait between requests
            await new Promise(resolve => setTimeout(resolve, 2000));
            return questionData;
        } catch (error) {
            console.error(`❌ Error scraping ${title}:`, error);
            return null;
        }
    }
    async scrapeAllQuestions() {
        console.log('🚀 Starting Striver SDE Sheet scraping...');
        await this.init();
        try {
            // Get all question links
            const questionLinks = await this.getQuestionLinks();
            if (questionLinks.length === 0) {
                console.log('❌ No question links found!');
                return;
            }
            console.log(`📚 Found ${questionLinks.length} questions to scrape`);
            // Scrape each question
            for (let i = 0; i < questionLinks.length; i++) {
                const link = questionLinks[i];
                console.log(`\n[${i + 1}/${questionLinks.length}] Processing...`);
                  const questionData = await this.scrapeQuestionContent(link.url,
link.title);
                  if (questionData) {
                      this.questionsData.push(questionData);
                      console.log(`✅ Scraped: ${questionData.title}`);
                  } else {
                      console.log(`❌ Failed to scrape: ${link.title}`);
                  }
                // Progress update
                if ((i + 1) % 10 === 0) {
                    console.log(`\n📊 Progress: ${i + 1}/${questionLinks.length}
questions completed`);
                }
            }
            console.log(`\n🎉 Scraping completed! Total questions: $
{this.questionsData.length}`);
              // Save to JSON
              await this.saveToJson();
              // Generate PDF
              await this.generatePDF();
        } finally {
            if (this.browser) {
                await this.browser.close();
            }
        }
    }
    async saveToJson() {
        const filename = 'striver_sde_questions.json';
        await fs.writeFile(filename, JSON.stringify(this.questionsData, null, 2));
        console.log(`💾 Data saved to ${filename}`);
    }
    async generatePDF() {
        console.log('📄 Generating PDF...');
        try {
            const filename = 'Striver_SDE_Sheet_Complete.pdf';
            const doc = new PDFDocument();
            const stream = require('fs').createWriteStream(filename);
            doc.pipe(stream);
              // Title page
              doc.fontSize(20).text('Striver SDE Sheet - Complete Solutions', 50,
50);
              doc.fontSize(12).text(`Total Questions: ${this.questionsData.length}`,
50, 100);
              doc.text(`Generated on: ${new Date().toLocaleDateString()}`, 50, 120);
              // Questions
              let yPosition = 180;
              this.questionsData.forEach((question, index) => {
                  // Check if new page needed
                  if (yPosition > 700) {
                      doc.addPage();
                      yPosition = 50;
                  }
                  // Question title
                  doc.fontSize(14).text(`${index + 1}. ${question.title}`, 50,
yPosition);
                  yPosition += 30;
                  // Description
                  if (question.description) {
                      doc.fontSize(10).text('Problem Description:', 50, yPosition);
                      yPosition += 15;
                      doc.text(question.description.substring(0, 500), 50, yPosition,
{ width: 500 });
                      yPosition += Math.ceil(question.description.length / 80) * 12 +
20;
                    }
                    // Approaches
                    question.approaches.forEach(approach => {
                        if (yPosition > 650) {
                            doc.addPage();
                            yPosition = 50;
                        }
                          doc.fontSize(12).text(approach.name, 60, yPosition);
                          yPosition += 20;
                    doc.fontSize(10).text(`Time: ${approach.timeComplexity} |
Space: ${approach.spaceComplexity}`, 60, yPosition);
                    yPosition += 15;
                          // Code (truncated for PDF)
                          const codeLines = approach.code.split('\n').slice(0, 10);
                          codeLines.forEach(line => {
                              if (yPosition > 750) {
                                  doc.addPage();
                                  yPosition = 50;
                              }
                              doc.text(line.substring(0, 80), 60, yPosition);
                              yPosition += 12;
                          });
                          yPosition += 20;
                    });
                    yPosition += 30;
              });
              doc.end();
              stream.on('finish', () => {
                  console.log(`✅ PDF generated: ${filename}`);
              });
          } catch (error) {
              console.error('❌ Error generating PDF:', error);
          }
      }
}
async function main() {
    const scraper = new StriverSDEScraper();
    await scraper.scrapeAllQuestions();
}
// Run the scraper
main().catch(console.error);