const puppeteer = require('puppeteer'); async function scrapePBTechCPUs(progressCallback) { let browser; try { progressCallback('Launching browser...'); browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] }); progressCallback('Opening PB Tech CPU page...'); const page = await browser.newPage(); // Set a realistic user agent await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); progressCallback('Loading CPU listings...'); await page.goto('https://www.pbtech.co.nz/category/components/cpus', { waitUntil: 'networkidle2', timeout: 30000 }); progressCallback('Extracting CPU data...'); // Extract CPU data from the page const cpus = await page.evaluate(() => { const products = []; // Find all "Add to cart" buttons which have product data const buttons = document.querySelectorAll('[data-product-id]'); buttons.forEach(btn => { try { const productId = btn.getAttribute('data-product-id'); const price = parseFloat(btn.getAttribute('data-price')); // Find the product link that contains this product ID let card = btn; let name = null; // Go up the DOM tree to find the card containing the product link for (let i = 0; i < 10; i++) { card = card.parentElement; if (!card) break; const links = card.querySelectorAll('a'); const productLink = Array.from(links).find(a => a.href && a.href.includes(productId) ); if (productLink) { name = productLink.textContent?.trim(); break; } } if (name && price && !isNaN(price)) { products.push({ name: name, price: price, inStock: true, // If there's an "Add to cart" button, it's in stock source: 'pbtech' }); } } catch (err) { // Skip problematic elements } }); return products; }); progressCallback(`Found ${cpus.length} CPUs from PB Tech`); await browser.close(); return cpus; } catch (error) { if (browser) { await browser.close(); } progressCallback(`Error scraping PB Tech: ${error.message}`); throw error; } } module.exports = { scrapePBTechCPUs };