Add curaleaf/sol dutchie detection, update batch crawl script with all 57 store IDs
- Add curaleaf.com and livewithsol.com to dutchie detection patterns - Update crawl-five-sequential.ts with all 57 dutchie store IDs for batch crawling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,26 +1,50 @@
|
||||
import { runDispensaryOrchestrator } from '../services/dispensary-orchestrator';
|
||||
|
||||
// Run 5 crawlers sequentially to avoid OOM
|
||||
const dispensaryIds = [112, 81, 115, 140, 177];
|
||||
// All 57 dutchie stores with platform_dispensary_id (as of 2024-12)
|
||||
const ALL_DISPENSARY_IDS = [
|
||||
72, 74, 75, 76, 77, 78, 81, 82, 85, 87, 91, 92, 97, 101, 106, 108, 110, 112,
|
||||
115, 120, 123, 125, 128, 131, 135, 139, 140, 143, 144, 145, 152, 153, 161,
|
||||
168, 176, 177, 180, 181, 189, 195, 196, 199, 200, 201, 205, 206, 207, 213,
|
||||
214, 224, 225, 227, 232, 235, 248, 252, 281
|
||||
];
|
||||
|
||||
const BATCH_SIZE = 5;
|
||||
|
||||
async function run() {
|
||||
console.log('Starting 5 crawlers SEQUENTIALLY...');
|
||||
const totalBatches = Math.ceil(ALL_DISPENSARY_IDS.length / BATCH_SIZE);
|
||||
console.log(`Starting crawl of ${ALL_DISPENSARY_IDS.length} stores in ${totalBatches} batches of ${BATCH_SIZE}...`);
|
||||
|
||||
for (const id of dispensaryIds) {
|
||||
console.log(`\n=== Starting crawler for dispensary ${id} ===`);
|
||||
try {
|
||||
const result = await runDispensaryOrchestrator(id);
|
||||
console.log(` Status: ${result.status}`);
|
||||
console.log(` Summary: ${result.summary}`);
|
||||
if (result.productsFound) {
|
||||
console.log(` Products: ${result.productsFound} found, ${result.productsNew} new, ${result.productsUpdated} updated`);
|
||||
let successCount = 0;
|
||||
let errorCount = 0;
|
||||
|
||||
for (let i = 0; i < ALL_DISPENSARY_IDS.length; i += BATCH_SIZE) {
|
||||
const batch = ALL_DISPENSARY_IDS.slice(i, i + BATCH_SIZE);
|
||||
const batchNum = Math.floor(i / BATCH_SIZE) + 1;
|
||||
console.log(`\n========== BATCH ${batchNum}/${totalBatches} (IDs: ${batch.join(', ')}) ==========`);
|
||||
|
||||
for (const id of batch) {
|
||||
console.log(`\n--- Crawling dispensary ${id} ---`);
|
||||
try {
|
||||
const result = await runDispensaryOrchestrator(id);
|
||||
console.log(` Status: ${result.status}`);
|
||||
console.log(` Summary: ${result.summary}`);
|
||||
if (result.productsFound) {
|
||||
console.log(` Products: ${result.productsFound} found, ${result.productsNew} new, ${result.productsUpdated} updated`);
|
||||
}
|
||||
successCount++;
|
||||
} catch (e: any) {
|
||||
console.log(` ERROR: ${e.message}`);
|
||||
errorCount++;
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.log(` ERROR: ${e.message}`);
|
||||
}
|
||||
|
||||
console.log(`\n--- Batch ${batchNum} complete. Progress: ${Math.min(i + BATCH_SIZE, ALL_DISPENSARY_IDS.length)}/${ALL_DISPENSARY_IDS.length} ---`);
|
||||
}
|
||||
|
||||
console.log('\n=== All 5 crawlers complete ===');
|
||||
console.log('\n========================================');
|
||||
console.log(`=== ALL CRAWLS COMPLETE ===`);
|
||||
console.log(`Success: ${successCount}, Errors: ${errorCount}`);
|
||||
console.log('========================================');
|
||||
}
|
||||
|
||||
run().catch(e => console.log('Fatal:', e.message));
|
||||
|
||||
Reference in New Issue
Block a user