config: Add 34 files

This commit is contained in:
2025-10-13 08:21:26 +13:00
parent ee7f11fce6
commit 9cc06ca37b
17 changed files with 1179 additions and 0 deletions

114
scrapers/cpubenchmark.js Normal file
View File

@@ -0,0 +1,114 @@
const puppeteer = require('puppeteer');
// Extract CPU model identifier with enough context to avoid false matches
function extractCPUModel(name) {
// Intel Core Ultra patterns: Core Ultra 5/7/9 XXXX
const ultraMatch = name.match(/\b(Core\s+Ultra\s+[579]\s+\w+[A-Z]?)\b/i);
if (ultraMatch) return ultraMatch[1];
// Intel patterns: Core i3-XXXX, Core i5-XXXX, etc. (include "Core" for specificity)
// Handle both "Core i5-14400F" and "Core i5 14400F" (with or without dash)
const intelMatch = name.match(/\b(Core\s+i[3579])-?\s*(\w+[A-Z]?)\b/i);
if (intelMatch) return `${intelMatch[1]}-${intelMatch[2]}`;
// Intel Xeon patterns: Xeon Silver/Gold/Platinum XXXX
const xeonMatch = name.match(/\b(Xeon\s+(?:Silver|Gold|Platinum|Bronze)?\s*\w+[A-Z]?)\b/i);
if (xeonMatch) return xeonMatch[1];
// AMD Ryzen patterns: Ryzen X XXXXX (include series number for specificity)
const ryzenMatch = name.match(/\b(Ryzen\s+[3579]\s+\w+)/i);
if (ryzenMatch) return ryzenMatch[1];
// AMD Threadripper patterns: Threadripper XXXX
const threadripperMatch = name.match(/\b(Threadripper\s+\w+)/i);
if (threadripperMatch) return threadripperMatch[1];
return null;
}
async function fetchBenchmarkData(progressCallback) {
let browser;
try {
progressCallback('Loading CPU benchmark database...');
browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
progressCallback('Fetching benchmark scores...');
await page.goto('https://www.cpubenchmark.net/cpu_list.php', {
waitUntil: 'networkidle2',
timeout: 60000
});
progressCallback('Parsing benchmark data...');
// Extract benchmark data from the table
const benchmarks = await page.evaluate(() => {
const data = {};
const rows = document.querySelectorAll('table tr');
rows.forEach(row => {
const cells = row.querySelectorAll('td');
if (cells.length >= 2) {
const cpuName = cells[0]?.textContent?.trim();
const cpuMark = cells[1]?.textContent?.trim();
if (cpuName && cpuMark) {
const score = parseInt(cpuMark.replace(/,/g, ''));
if (!isNaN(score) && score > 0) {
data[cpuName] = score;
}
}
}
});
return data;
});
const count = Object.keys(benchmarks).length;
progressCallback(`Loaded ${count} CPU benchmarks`);
await browser.close();
return benchmarks;
} catch (error) {
if (browser) {
await browser.close();
}
progressCallback(`Error fetching benchmarks: ${error.message}`);
throw error;
}
}
function findBenchmarkScore(cpuName, benchmarkData) {
// Try exact match first
if (benchmarkData[cpuName]) {
return benchmarkData[cpuName];
}
// Extract the CPU model identifier
const model = extractCPUModel(cpuName);
if (!model) return null;
// Search for CPUs containing this model identifier
const modelLower = model.toLowerCase();
for (const [benchName, score] of Object.entries(benchmarkData)) {
const benchLower = benchName.toLowerCase();
// Check if the benchmark name contains the exact model
// Use word boundaries to avoid partial matches
const modelRegex = new RegExp(`\\b${modelLower.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i');
if (modelRegex.test(benchLower)) {
return score;
}
}
return null;
}
module.exports = { fetchBenchmarkData, findBenchmarkScore };

88
scrapers/pbtech.js Normal file
View File

@@ -0,0 +1,88 @@
const puppeteer = require('puppeteer');
async function scrapePBTechCPUs(progressCallback) {
let browser;
try {
progressCallback('Launching browser...');
browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
progressCallback('Opening PB Tech CPU page...');
const page = await browser.newPage();
// Set a realistic user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
progressCallback('Loading CPU listings...');
await page.goto('https://www.pbtech.co.nz/category/components/cpus', {
waitUntil: 'networkidle2',
timeout: 30000
});
progressCallback('Extracting CPU data...');
// Extract CPU data from the page
const cpus = await page.evaluate(() => {
const products = [];
// Find all "Add to cart" buttons which have product data
const buttons = document.querySelectorAll('[data-product-id]');
buttons.forEach(btn => {
try {
const productId = btn.getAttribute('data-product-id');
const price = parseFloat(btn.getAttribute('data-price'));
// Find the product link that contains this product ID
let card = btn;
let name = null;
// Go up the DOM tree to find the card containing the product link
for (let i = 0; i < 10; i++) {
card = card.parentElement;
if (!card) break;
const links = card.querySelectorAll('a');
const productLink = Array.from(links).find(a =>
a.href && a.href.includes(productId)
);
if (productLink) {
name = productLink.textContent?.trim();
break;
}
}
if (name && price && !isNaN(price)) {
products.push({
name: name,
price: price,
inStock: true, // If there's an "Add to cart" button, it's in stock
source: 'pbtech'
});
}
} catch (err) {
// Skip problematic elements
}
});
return products;
});
progressCallback(`Found ${cpus.length} CPUs from PB Tech`);
await browser.close();
return cpus;
} catch (error) {
if (browser) {
await browser.close();
}
progressCallback(`Error scraping PB Tech: ${error.message}`);
throw error;
}
}
module.exports = { scrapePBTechCPUs };