config: Add 34 files
This commit is contained in:
88
scrapers/pbtech.js
Normal file
88
scrapers/pbtech.js
Normal file
@@ -0,0 +1,88 @@
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
async function scrapePBTechCPUs(progressCallback) {
|
||||
let browser;
|
||||
try {
|
||||
progressCallback('Launching browser...');
|
||||
browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
progressCallback('Opening PB Tech CPU page...');
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Set a realistic user agent
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
||||
|
||||
progressCallback('Loading CPU listings...');
|
||||
await page.goto('https://www.pbtech.co.nz/category/components/cpus', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
progressCallback('Extracting CPU data...');
|
||||
|
||||
// Extract CPU data from the page
|
||||
const cpus = await page.evaluate(() => {
|
||||
const products = [];
|
||||
|
||||
// Find all "Add to cart" buttons which have product data
|
||||
const buttons = document.querySelectorAll('[data-product-id]');
|
||||
|
||||
buttons.forEach(btn => {
|
||||
try {
|
||||
const productId = btn.getAttribute('data-product-id');
|
||||
const price = parseFloat(btn.getAttribute('data-price'));
|
||||
|
||||
// Find the product link that contains this product ID
|
||||
let card = btn;
|
||||
let name = null;
|
||||
|
||||
// Go up the DOM tree to find the card containing the product link
|
||||
for (let i = 0; i < 10; i++) {
|
||||
card = card.parentElement;
|
||||
if (!card) break;
|
||||
|
||||
const links = card.querySelectorAll('a');
|
||||
const productLink = Array.from(links).find(a =>
|
||||
a.href && a.href.includes(productId)
|
||||
);
|
||||
|
||||
if (productLink) {
|
||||
name = productLink.textContent?.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (name && price && !isNaN(price)) {
|
||||
products.push({
|
||||
name: name,
|
||||
price: price,
|
||||
inStock: true, // If there's an "Add to cart" button, it's in stock
|
||||
source: 'pbtech'
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
// Skip problematic elements
|
||||
}
|
||||
});
|
||||
|
||||
return products;
|
||||
});
|
||||
|
||||
progressCallback(`Found ${cpus.length} CPUs from PB Tech`);
|
||||
|
||||
await browser.close();
|
||||
return cpus;
|
||||
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
progressCallback(`Error scraping PB Tech: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { scrapePBTechCPUs };
|
Reference in New Issue
Block a user