"use strict"; /** * Scrape ALL active products via direct GraphQL pagination * This is more reliable than category navigation */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const puppeteer_extra_1 = __importDefault(require("puppeteer-extra")); const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth")); const pg_1 = require("pg"); const dutchie_graphql_1 = require("../scrapers/dutchie-graphql"); puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)()); const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus'; const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'; async function scrapeAllProducts(menuUrl, storeId) { const pool = new pg_1.Pool({ connectionString: DATABASE_URL }); const browser = await puppeteer_extra_1.default.launch({ headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'], }); try { const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36'); console.log('Loading menu to establish session...'); await page.goto(menuUrl, { waitUntil: 'networkidle2', timeout: 60000, }); await new Promise((r) => setTimeout(r, 3000)); const dispensaryId = await page.evaluate(() => window.reactEnv?.dispensaryId); console.log('Dispensary ID:', dispensaryId); // Paginate through all products const allProducts = []; let pageNum = 0; const perPage = 100; console.log('\nFetching all products via paginated GraphQL...'); while (true) { const result = await page.evaluate(async (dispId, hash, page, perPage) => { const variables = { includeEnterpriseSpecials: false, productsFilter: { dispensaryId: dispId, pricingType: 'rec', Status: 'Active', types: [], useCache: false, isDefaultSort: true, sortBy: 'popularSortIdx', sortDirection: 1, bypassOnlineThresholds: true, isKioskMenu: false, removeProductsBelowOptionThresholds: false, }, page, perPage, }; const qs = new URLSearchParams({ operationName: 'FilteredProducts', variables: JSON.stringify(variables), extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: hash } }), }); const resp = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, { method: 'GET', headers: { 'content-type': 'application/json', 'apollographql-client-name': 'Marketplace (production)', }, credentials: 'include', }); const json = await resp.json(); return { products: json?.data?.filteredProducts?.products || [], totalCount: json?.data?.filteredProducts?.queryInfo?.totalCount, }; }, dispensaryId, GRAPHQL_HASH, pageNum, perPage); if (result.products.length === 0) { break; } allProducts.push(...result.products); console.log(`Page ${pageNum}: ${result.products.length} products (total so far: ${allProducts.length}/${result.totalCount})`); pageNum++; // Safety limit if (pageNum > 50) { console.log('Reached page limit'); break; } } console.log(`\nTotal products fetched: ${allProducts.length}`); // Normalize and upsert console.log('\nNormalizing and upserting to database...'); const normalized = allProducts.map(dutchie_graphql_1.normalizeDutchieProduct); const client = await pool.connect(); let inserted = 0; let updated = 0; try { await client.query('BEGIN'); for (const product of normalized) { const result = await client.query(` INSERT INTO products ( store_id, external_id, slug, name, enterprise_product_id, brand, brand_external_id, brand_logo_url, subcategory, strain_type, canonical_category, price, rec_price, med_price, rec_special_price, med_special_price, is_on_special, special_name, discount_percent, special_data, sku, inventory_quantity, inventory_available, is_below_threshold, status, thc_percentage, cbd_percentage, cannabinoids, weight_mg, net_weight_value, net_weight_unit, options, raw_options, image_url, additional_images, is_featured, medical_only, rec_only, source_created_at, source_updated_at, description, raw_data, dutchie_url, last_seen_at, updated_at ) VALUES ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, '', NOW(), NOW() ) ON CONFLICT (store_id, slug) DO UPDATE SET name = EXCLUDED.name, enterprise_product_id = EXCLUDED.enterprise_product_id, brand = EXCLUDED.brand, brand_external_id = EXCLUDED.brand_external_id, brand_logo_url = EXCLUDED.brand_logo_url, subcategory = EXCLUDED.subcategory, strain_type = EXCLUDED.strain_type, canonical_category = EXCLUDED.canonical_category, price = EXCLUDED.price, rec_price = EXCLUDED.rec_price, med_price = EXCLUDED.med_price, rec_special_price = EXCLUDED.rec_special_price, med_special_price = EXCLUDED.med_special_price, is_on_special = EXCLUDED.is_on_special, special_name = EXCLUDED.special_name, discount_percent = EXCLUDED.discount_percent, special_data = EXCLUDED.special_data, sku = EXCLUDED.sku, inventory_quantity = EXCLUDED.inventory_quantity, inventory_available = EXCLUDED.inventory_available, is_below_threshold = EXCLUDED.is_below_threshold, status = EXCLUDED.status, thc_percentage = EXCLUDED.thc_percentage, cbd_percentage = EXCLUDED.cbd_percentage, cannabinoids = EXCLUDED.cannabinoids, weight_mg = EXCLUDED.weight_mg, net_weight_value = EXCLUDED.net_weight_value, net_weight_unit = EXCLUDED.net_weight_unit, options = EXCLUDED.options, raw_options = EXCLUDED.raw_options, image_url = EXCLUDED.image_url, additional_images = EXCLUDED.additional_images, is_featured = EXCLUDED.is_featured, medical_only = EXCLUDED.medical_only, rec_only = EXCLUDED.rec_only, source_created_at = EXCLUDED.source_created_at, source_updated_at = EXCLUDED.source_updated_at, description = EXCLUDED.description, raw_data = EXCLUDED.raw_data, last_seen_at = NOW(), updated_at = NOW() RETURNING (xmax = 0) AS was_inserted `, [ storeId, product.external_id, product.slug, product.name, product.enterprise_product_id, product.brand, product.brand_external_id, product.brand_logo_url, product.subcategory, product.strain_type, product.canonical_category, product.price, product.rec_price, product.med_price, product.rec_special_price, product.med_special_price, product.is_on_special, product.special_name, product.discount_percent, product.special_data ? JSON.stringify(product.special_data) : null, product.sku, product.inventory_quantity, product.inventory_available, product.is_below_threshold, product.status, product.thc_percentage, product.cbd_percentage, product.cannabinoids ? JSON.stringify(product.cannabinoids) : null, product.weight_mg, product.net_weight_value, product.net_weight_unit, product.options, product.raw_options, product.image_url, product.additional_images, product.is_featured, product.medical_only, product.rec_only, product.source_created_at, product.source_updated_at, product.description, product.raw_data ? JSON.stringify(product.raw_data) : null, ]); if (result.rows[0]?.was_inserted) { inserted++; } else { updated++; } } await client.query('COMMIT'); } catch (error) { await client.query('ROLLBACK'); throw error; } finally { client.release(); } console.log(`\nDatabase: ${inserted} inserted, ${updated} updated`); // Show summary stats const stats = await pool.query(` SELECT COUNT(*) as total, COUNT(*) FILTER (WHERE is_on_special) as specials, COUNT(DISTINCT brand) as brands, COUNT(DISTINCT subcategory) as categories FROM products WHERE store_id = $1 `, [storeId]); console.log('\nStore summary:'); console.log(` Total products: ${stats.rows[0].total}`); console.log(` On special: ${stats.rows[0].specials}`); console.log(` Unique brands: ${stats.rows[0].brands}`); console.log(` Categories: ${stats.rows[0].categories}`); return { success: true, totalProducts: allProducts.length, inserted, updated, }; } finally { await browser.close(); await pool.end(); } } // Run const menuUrl = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted'; const storeId = parseInt(process.argv[3] || '1', 10); console.log('='.repeat(60)); console.log('DUTCHIE GRAPHQL FULL SCRAPE'); console.log('='.repeat(60)); console.log(`Menu URL: ${menuUrl}`); console.log(`Store ID: ${storeId}`); console.log(''); scrapeAllProducts(menuUrl, storeId) .then((result) => { console.log('\n' + '='.repeat(60)); console.log('COMPLETE'); console.log(JSON.stringify(result, null, 2)); }) .catch((error) => { console.error('Error:', error.message); process.exit(1); });