Striver Scraper.py
Striver Scraper.py
const fs = require('fs').promises;
const PDFDocument = require('pdfkit');
class StriverSDEScraper {
constructor() {
this.baseUrl = 'https://takeuforward.org';
this.sheetUrl = 'https://takeuforward.org/interviews/strivers-sde-sheet-
top-coding-interview-problems';
this.questionsData = [];
this.browser = null;
this.page = null;
}
async init() {
console.log('🚀 Initializing browser...');
this.browser = await puppeteer.launch({
headless: false, // Set to true for production
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
this.page = await this.browser.newPage();
async getQuestionLinks() {
console.log('🔍 Fetching SDE sheet page...');
try {
await this.page.goto(this.sheetUrl, { waitUntil: 'networkidle2' });
anchors.forEach(anchor => {
const href = anchor.href;
const text = anchor.textContent.trim();
// Remove duplicates
const uniqueLinks = [];
const seen = new Set();
links.forEach(link => {
if (!seen.has(link.url)) {
seen.add(link.url);
uniqueLinks.push(link);
}
});
return uniqueLinks;
});
} catch (error) {
console.error('❌ Error fetching question links:', error);
return [];
}
}
try {
await this.page.goto(questionUrl, { waitUntil: 'networkidle2' });
// Extract description
const contentDiv = document.querySelector('.entry-content, .post-
content, .content');
if (contentDiv) {
const paragraphs = contentDiv.querySelectorAll('p');
const descParagraphs = Array.from(paragraphs)
.slice(0, 3)
.map(p => p.textContent.trim())
.filter(text => text.length > 20);
data.description = descParagraphs.join('\n');
}
approaches.push({
name: approachName,
code: codeText,
timeComplexity: extractComplexity(codeText, 'time'),
spaceComplexity: extractComplexity(codeText, 'space')
});
}
});
data.approaches = approaches;
// Helper functions
function extractComplexity(text, type) {
const patterns = [
/O\([^)]+\)/gi,
/time[:\s]*O\([^)]+\)/gi,
/space[:\s]*O\([^)]+\)/gi
];
return data;
});
questionData.title = title;
questionData.url = questionUrl;
return questionData;
} catch (error) {
console.error(`❌ Error scraping ${title}:`, error);
return null;
}
}
async scrapeAllQuestions() {
console.log('🚀 Starting Striver SDE Sheet scraping...');
await this.init();
try {
// Get all question links
const questionLinks = await this.getQuestionLinks();
if (questionLinks.length === 0) {
console.log('❌ No question links found!');
return;
}
if (questionData) {
this.questionsData.push(questionData);
console.log(`✅ Scraped: ${questionData.title}`);
} else {
console.log(`❌ Failed to scrape: ${link.title}`);
}
// Progress update
if ((i + 1) % 10 === 0) {
console.log(`\n📊 Progress: ${i + 1}/${questionLinks.length}
questions completed`);
}
}
// Save to JSON
await this.saveToJson();
// Generate PDF
await this.generatePDF();
} finally {
if (this.browser) {
await this.browser.close();
}
}
}
async saveToJson() {
const filename = 'striver_sde_questions.json';
await fs.writeFile(filename, JSON.stringify(this.questionsData, null, 2));
console.log(`💾 Data saved to ${filename}`);
}
async generatePDF() {
console.log('📄 Generating PDF...');
try {
const filename = 'Striver_SDE_Sheet_Complete.pdf';
const doc = new PDFDocument();
const stream = require('fs').createWriteStream(filename);
doc.pipe(stream);
// Title page
doc.fontSize(20).text('Striver SDE Sheet - Complete Solutions', 50,
50);
doc.fontSize(12).text(`Total Questions: ${this.questionsData.length}`,
50, 100);
doc.text(`Generated on: ${new Date().toLocaleDateString()}`, 50, 120);
// Questions
let yPosition = 180;
// Question title
doc.fontSize(14).text(`${index + 1}. ${question.title}`, 50,
yPosition);
yPosition += 30;
// Description
if (question.description) {
doc.fontSize(10).text('Problem Description:', 50, yPosition);
yPosition += 15;
doc.text(question.description.substring(0, 500), 50, yPosition,
{ width: 500 });
yPosition += Math.ceil(question.description.length / 80) * 12 +
20;
}
// Approaches
question.approaches.forEach(approach => {
if (yPosition > 650) {
doc.addPage();
yPosition = 50;
}
doc.fontSize(10).text(`Time: ${approach.timeComplexity} |
Space: ${approach.spaceComplexity}`, 60, yPosition);
yPosition += 15;
yPosition += 20;
});
yPosition += 30;
});
doc.end();
stream.on('finish', () => {
console.log(`✅ PDF generated: ${filename}`);
});
} catch (error) {
console.error('❌ Error generating PDF:', error);
}
}
}