From d7da0b938d939a4007cc1e73c5d4e8d627304c05 Mon Sep 17 00:00:00 2001 From: Kelly Date: Sat, 13 Dec 2025 16:05:50 -0700 Subject: [PATCH] feat(jane): Direct Algolia product fetch and multi-platform product-refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add fetchProductsByStoreIdDirect() for reliable Algolia product fetching - Update product-discovery-jane to use direct Algolia instead of network interception - Fix product-refresh handler to support both Dutchie and Jane payloads - Handle both `products` (Dutchie) and `hits` (Jane) formats - Use platform-appropriate raw_json structure for normalizers - Fix consecutive_misses tracking to use correct provider - Extract product IDs correctly (Dutchie _id vs Jane product_id) - Add store discovery deduplication (prefer REC over MED at same location) - Add storeTypes field to DiscoveredStore interface - Add scripts: run-jane-store-discovery.ts, run-jane-product-discovery.ts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/scripts/run-jane-product-discovery.ts | 138 +++++++++ backend/scripts/run-jane-store-discovery.ts | 137 +++++++++ backend/scripts/test-jane-discovery-az.ts | 50 ++++ backend/src/platforms/jane/index.ts | 1 + backend/src/platforms/jane/queries.ts | 268 ++++++++++++++---- .../tasks/handlers/product-discovery-jane.ts | 14 +- backend/src/tasks/handlers/product-refresh.ts | 42 +-- 7 files changed, 570 insertions(+), 80 deletions(-) create mode 100644 backend/scripts/run-jane-product-discovery.ts create mode 100644 backend/scripts/run-jane-store-discovery.ts create mode 100644 backend/scripts/test-jane-discovery-az.ts diff --git a/backend/scripts/run-jane-product-discovery.ts b/backend/scripts/run-jane-product-discovery.ts new file mode 100644 index 00000000..3d7e12f8 --- /dev/null +++ b/backend/scripts/run-jane-product-discovery.ts @@ -0,0 +1,138 @@ +/** + * Run Jane product discovery for stores in database + * Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID] + * Example: npx ts-node scripts/run-jane-product-discovery.ts 4220 + * Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all + */ + +import { Pool } from 'pg'; +import { fetchProductsByStoreIdDirect } from '../src/platforms/jane'; +import { saveRawPayload } from '../src/utils/payload-storage'; + +async function main() { + const arg = process.argv[2]; + + console.log('='.repeat(60)); + console.log('Jane Product Discovery'); + console.log('='.repeat(60)); + + const pool = new Pool({ + connectionString: process.env.DATABASE_URL, + }); + + try { + // Get dispensaries to process + let dispensaries: any[]; + + if (arg === 'all') { + const result = await pool.query( + `SELECT id, name, menu_url, platform_dispensary_id + FROM dispensaries + WHERE platform = 'jane' AND menu_url IS NOT NULL + ORDER BY id` + ); + dispensaries = result.rows; + } else if (arg) { + const result = await pool.query( + `SELECT id, name, menu_url, platform_dispensary_id + FROM dispensaries + WHERE id = $1`, + [parseInt(arg)] + ); + dispensaries = result.rows; + } else { + // Default: get first Jane store + const result = await pool.query( + `SELECT id, name, menu_url, platform_dispensary_id + FROM dispensaries + WHERE platform = 'jane' AND menu_url IS NOT NULL + ORDER BY id LIMIT 1` + ); + dispensaries = result.rows; + } + + if (dispensaries.length === 0) { + console.log('No Jane dispensaries found'); + return; + } + + console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`); + + let successCount = 0; + let failCount = 0; + + for (const disp of dispensaries) { + console.log(`\n${'─'.repeat(60)}`); + console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`); + console.log('─'.repeat(60)); + + try { + const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id); + + if (result.products.length === 0) { + console.log(' ✗ No products captured'); + failCount++; + continue; + } + + console.log(` ✓ Captured ${result.products.length} products`); + + // Build payload + const rawPayload = { + hits: result.products.map(p => p.raw), + store: result.store?.raw || null, + capturedAt: new Date().toISOString(), + platform: 'jane', + dispensaryId: disp.id, + storeId: disp.platform_dispensary_id, + }; + + // Save payload + const { id: payloadId, sizeBytes } = await saveRawPayload( + pool, + disp.id, + rawPayload, + null, + result.products.length, + 'jane' + ); + + console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`); + + // Update dispensary + await pool.query( + `UPDATE dispensaries + SET stage = 'hydrating', + last_fetch_at = NOW(), + product_count = $2, + consecutive_successes = consecutive_successes + 1, + consecutive_failures = 0, + updated_at = NOW() + WHERE id = $1`, + [disp.id, result.products.length] + ); + + console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`); + successCount++; + + } catch (error: any) { + console.log(` ✗ Error: ${error.message}`); + failCount++; + } + } + + console.log('\n' + '='.repeat(60)); + console.log('RESULTS'); + console.log('='.repeat(60)); + console.log(`Success: ${successCount}`); + console.log(`Failed: ${failCount}`); + + } catch (error: any) { + console.error('Error:', error.message); + process.exit(1); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/scripts/run-jane-store-discovery.ts b/backend/scripts/run-jane-store-discovery.ts new file mode 100644 index 00000000..25475907 --- /dev/null +++ b/backend/scripts/run-jane-store-discovery.ts @@ -0,0 +1,137 @@ +/** + * Run Jane store discovery and insert into database + * Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE] + * Example: npx ts-node scripts/run-jane-store-discovery.ts AZ + */ + +import { Pool } from 'pg'; +import { discoverStoresByState } from '../src/platforms/jane'; + +/** + * Generate slug from store name + * e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec" + */ +function generateSlug(name: string): string { + return name + .toLowerCase() + .replace(/[()]/g, '') // Remove parentheses + .replace(/[^a-z0-9\s-]/g, '') // Remove special chars + .replace(/\s+/g, '-') // Spaces to hyphens + .replace(/-+/g, '-') // Collapse multiple hyphens + .replace(/^-|-$/g, ''); // Trim hyphens +} + +async function main() { + const stateCode = process.argv[2] || 'AZ'; + + console.log('='.repeat(60)); + console.log(`Jane Store Discovery - ${stateCode}`); + console.log('='.repeat(60)); + + // Connect to database + const pool = new Pool({ + connectionString: process.env.DATABASE_URL, + }); + + try { + // Test connection + const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']); + console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`); + + // Discover stores + console.log(`\nDiscovering Jane stores in ${stateCode}...`); + const stores = await discoverStoresByState(stateCode); + + if (stores.length === 0) { + console.log(`No stores found in ${stateCode}`); + return; + } + + console.log(`\nFound ${stores.length} stores. Inserting into database...`); + + // Insert stores + let inserted = 0; + let updated = 0; + const newIds: number[] = []; + + for (const store of stores) { + const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`; + const slug = generateSlug(store.name); + + try { + const result = await pool.query( + `INSERT INTO dispensaries ( + name, slug, address1, city, state, zipcode, + latitude, longitude, menu_url, menu_type, platform, + platform_dispensary_id, is_medical, is_recreational, + stage, created_at, updated_at + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW()) + ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL + DO UPDATE SET + name = EXCLUDED.name, + slug = EXCLUDED.slug, + address1 = EXCLUDED.address1, + city = EXCLUDED.city, + latitude = EXCLUDED.latitude, + longitude = EXCLUDED.longitude, + menu_url = EXCLUDED.menu_url, + is_medical = EXCLUDED.is_medical, + is_recreational = EXCLUDED.is_recreational, + updated_at = NOW() + RETURNING id, (xmax = 0) AS is_new`, + [ + store.name, + slug, + store.address, + store.city, + stateCode, + store.zip, + store.lat, + store.long, + menuUrl, + 'embedded', // menu_type: how it's displayed + 'jane', // platform: who provides the menu + store.storeId, + store.medical, + store.recreational, + 'discovered', + ] + ); + + if (result.rows.length > 0) { + const { id, is_new } = result.rows[0]; + if (is_new) { + inserted++; + newIds.push(id); + console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`); + } else { + updated++; + console.log(` ~ Updated: ${store.name} (DB ID: ${id})`); + } + } + } catch (error: any) { + console.error(` ! Error inserting ${store.name}: ${error.message}`); + } + } + + console.log('\n' + '='.repeat(60)); + console.log('RESULTS'); + console.log('='.repeat(60)); + console.log(`Stores discovered: ${stores.length}`); + console.log(`New stores inserted: ${inserted}`); + console.log(`Existing stores updated: ${updated}`); + console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`); + + // Show final count + const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']); + console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`); + + } catch (error: any) { + console.error('Error:', error.message); + process.exit(1); + } finally { + await pool.end(); + } +} + +main(); diff --git a/backend/scripts/test-jane-discovery-az.ts b/backend/scripts/test-jane-discovery-az.ts new file mode 100644 index 00000000..381bf14c --- /dev/null +++ b/backend/scripts/test-jane-discovery-az.ts @@ -0,0 +1,50 @@ +/** + * Smoke test: Discover Jane stores in Arizona + * Usage: npx ts-node scripts/test-jane-discovery-az.ts + */ + +import { discoverStoresByState } from '../src/platforms/jane'; + +async function main() { + console.log('='.repeat(60)); + console.log('Jane Store Discovery - Arizona Smoke Test'); + console.log('='.repeat(60)); + console.log('Using local IP (no proxy)\n'); + + try { + const stores = await discoverStoresByState('AZ'); + + console.log(`\n${'='.repeat(60)}`); + console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`); + console.log('='.repeat(60)); + + if (stores.length > 0) { + console.log('\nSample stores:'); + for (const store of stores.slice(0, 10)) { + console.log(` - ${store.name}`); + console.log(` ID: ${store.storeId} | ${store.city}, AZ`); + console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`); + console.log(` Products: ${store.productCount || 'N/A'}`); + console.log(''); + } + + if (stores.length > 10) { + console.log(` ... and ${stores.length - 10} more stores`); + } + } + + console.log('\n' + '='.repeat(60)); + console.log('SMOKE TEST PASSED'); + console.log('='.repeat(60)); + + } catch (error: any) { + console.error('\n' + '='.repeat(60)); + console.error('SMOKE TEST FAILED'); + console.error('='.repeat(60)); + console.error(`Error: ${error.message}`); + console.error(error.stack); + process.exit(1); + } +} + +main(); diff --git a/backend/src/platforms/jane/index.ts b/backend/src/platforms/jane/index.ts index 787ddcc7..1b68eac7 100644 --- a/backend/src/platforms/jane/index.ts +++ b/backend/src/platforms/jane/index.ts @@ -36,6 +36,7 @@ export { getStoreById, fetchProductsFromUrl, fetchProductsByStoreId, + fetchProductsByStoreIdDirect, discoverStoresByState, // Types diff --git a/backend/src/platforms/jane/queries.ts b/backend/src/platforms/jane/queries.ts index 603151da..7105c5f2 100644 --- a/backend/src/platforms/jane/queries.ts +++ b/backend/src/platforms/jane/queries.ts @@ -158,6 +158,112 @@ export async function fetchProductsByStoreId( return fetchProductsFromUrl(menuUrl); } +/** + * Fetch ALL products for a store directly via Algolia API + * More reliable than network interception - calls Algolia directly from browser context + * + * @param storeId - Jane store ID + * @returns Products fetched from Algolia + */ +export async function fetchProductsByStoreIdDirect( + storeId: string | number +): Promise { + try { + await startSession(); + + const { page } = (await import('./client')).getCurrentSession()!; + + // Visit Jane to establish browser session (bypass Cloudflare) + console.log(`[Jane Queries] Establishing browser session...`); + await page.goto('https://www.iheartjane.com/stores', { + waitUntil: 'domcontentloaded', + timeout: 30000, + }); + await new Promise((r) => setTimeout(r, 2000)); + + // Fetch all products via Algolia from browser context + console.log(`[Jane Queries] Fetching products for store ${storeId} via Algolia...`); + + const algoliaResults = await page.evaluate(async (sid: string) => { + const results: any[] = []; + let pageNum = 0; + const hitsPerPage = 100; + + while (true) { + try { + const response = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + query: '', + hitsPerPage, + page: pageNum, + filters: `store_id=${sid}`, + }), + }); + + if (!response.ok) { + console.log(`Algolia request failed: ${response.status}`); + break; + } + + const data = await response.json(); + if (!data.hits || data.hits.length === 0) { + break; + } + + results.push(...data.hits); + console.log(`Fetched page ${pageNum}, got ${data.hits.length} products (total: ${results.length})`); + + if (pageNum >= data.nbPages - 1) { + break; + } + + pageNum++; + } catch (err) { + console.log(`Algolia error: ${err}`); + break; + } + } + + return results; + }, String(storeId)); + + console.log(`[Jane Queries] Algolia returned ${algoliaResults.length} products`); + + // Parse into JaneProductHit format + const products: JaneProductHit[] = algoliaResults.map((hit: any) => ({ + product_id: hit.product_id, + name: hit.name, + brand: hit.brand, + kind: hit.kind, + category: hit.category, + percent_thc: hit.percent_thc ?? null, + percent_cbd: hit.percent_cbd ?? null, + price_gram: hit.price_gram ?? null, + price_each: hit.price_each ?? null, + price_eighth_ounce: hit.price_eighth_ounce ?? null, + price_quarter_ounce: hit.price_quarter_ounce ?? null, + price_half_ounce: hit.price_half_ounce ?? null, + price_ounce: hit.price_ounce ?? null, + image_urls: hit.image_urls || [], + aggregate_rating: hit.aggregate_rating ?? null, + review_count: hit.review_count ?? null, + available_for_pickup: hit.available_for_pickup ?? false, + available_for_delivery: hit.available_for_delivery ?? false, + raw: hit, + })); + + return { + products, + totalCaptured: products.length, + responses: [], + }; + } finally { + await endSession(); + } +} + // ============================================================ // DISCOVERY OPERATIONS // ============================================================ @@ -175,91 +281,117 @@ export interface DiscoveredStore { recreational: boolean; productCount: number; urlSlug: string; + storeTypes: string[]; // e.g., ["recreational"] or ["medical"] } /** * Discover Jane stores in a state - * Navigates to Jane's store locator and extracts store data + * Uses Algolia stores-production index via browser context (bypasses Cloudflare) * * @param stateCode - Two-letter state code (e.g., 'AZ') * @returns Array of discovered stores */ export async function discoverStoresByState(stateCode: string): Promise { const stores: DiscoveredStore[] = []; + const stateName = getStateName(stateCode); try { await startSession(); const { page } = (await import('./client')).getCurrentSession()!; - // Jane has a store directory at /stores - // Try state-specific URL first - const storeListUrl = `https://www.iheartjane.com/stores?state=${stateCode}`; - - console.log(`[Jane Queries] Discovering stores in ${stateCode}: ${storeListUrl}`); - - await page.setRequestInterception(true); - - // Capture store list responses - const storeResponses: any[] = []; - - page.on('request', (req) => { - const type = req.resourceType(); - if (['image', 'font', 'media', 'stylesheet'].includes(type)) { - req.abort(); - } else { - req.continue(); - } + // First visit Jane's stores page to establish browser session (bypasses Cloudflare) + console.log(`[Jane Queries] Establishing browser session...`); + await page.goto('https://www.iheartjane.com/stores', { + waitUntil: 'domcontentloaded', + timeout: 30000, }); - page.on('response', async (response) => { - const url = response.url(); - const contentType = response.headers()['content-type'] || ''; + // Wait for Cloudflare to pass + await new Promise((r) => setTimeout(r, 2000)); - if (url.includes('iheartjane.com') && contentType.includes('json')) { + // Use Algolia stores-production index - it returns full store data with state + console.log(`[Jane Queries] Searching Algolia for stores in ${stateName}...`); + + // Fetch all stores from Algolia using facet filter on state + const algoliaResults = await page.evaluate(async (state: string) => { + const results: any[] = []; + let page = 0; + const hitsPerPage = 100; + + while (true) { try { - const json = await response.json(); - if (json.stores && Array.isArray(json.stores)) { - storeResponses.push(...json.stores); - console.log(`[Jane Queries] Captured ${json.stores.length} stores from API`); + const response = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: '', + hitsPerPage, + page, + facetFilters: [`state:${state}`], + }), + }); + + if (!response.ok) { + console.log(`Algolia request failed: ${response.status}`); + break; } - } catch { - // Not valid JSON + + const data = await response.json(); + if (!data.hits || data.hits.length === 0) { + break; + } + + results.push(...data.hits); + console.log(`Fetched page ${page}, got ${data.hits.length} stores (total: ${results.length})`); + + // Check if we've fetched all pages + if (page >= data.nbPages - 1) { + break; + } + + page++; + } catch (err) { + console.log(`Algolia error: ${err}`); + break; } } - }); - await page.goto(storeListUrl, { - waitUntil: 'networkidle2', - timeout: 60000, - }); + return results; + }, stateName); - // Wait for stores to load - await new Promise((r) => setTimeout(r, 3000)); + console.log(`[Jane Queries] Algolia returned ${algoliaResults.length} stores in ${stateName}`); - // Parse captured stores - for (const store of storeResponses) { - // Filter by state - if (store.state?.toLowerCase() === stateCode.toLowerCase() || - store.state?.toLowerCase() === getStateName(stateCode).toLowerCase()) { - stores.push({ - storeId: String(store.id), - name: store.name || '', - address: store.address || '', - city: store.city || '', - state: store.state || stateCode, - zip: store.zip || '', - lat: store.lat || 0, - long: store.long || 0, - medical: store.medical || false, - recreational: store.recreational || false, - productCount: store.product_count || 0, - urlSlug: store.url_slug || '', - }); + // Dedupe by location - prefer REC stores over MED + // Group by coordinates (rounded to ~100m precision) + const locationMap = new Map(); + + for (const hit of algoliaResults) { + const lat = hit._geoloc?.lat?.toFixed(3) || '0'; + const lng = hit._geoloc?.lng?.toFixed(3) || '0'; + const locationKey = `${lat},${lng}`; + + const existing = locationMap.get(locationKey); + if (!existing) { + // First store at this location + locationMap.set(locationKey, hit); + } else if (hit.recreational && !existing.recreational) { + // Prefer REC over MED at same location + locationMap.set(locationKey, hit); } + // Otherwise keep existing (which is either REC or first MED) } - console.log(`[Jane Queries] Found ${stores.length} stores in ${stateCode}`); + // Parse deduplicated stores + for (const hit of locationMap.values()) { + stores.push(parseAlgoliaStore(hit)); + } + + const recCount = stores.filter(s => s.recreational).length; + const medOnlyCount = stores.filter(s => s.medical && !s.recreational).length; + console.log(`[Jane Queries] Found ${stores.length} unique locations in ${stateCode} (${recCount} REC, ${medOnlyCount} MED-only)`); return stores; } finally { @@ -267,6 +399,30 @@ export async function discoverStoresByState(stateCode: string): Promise p._id || p.id) - .filter(Boolean); + .map((p: any) => p._id || p.product_id || p.id) + .filter(Boolean) + .map(String); // Ensure all IDs are strings // Reset consecutive_misses for products that ARE in the feed if (currentProductIds.length > 0) { @@ -263,9 +273,9 @@ export async function handleProductRefresh(ctx: TaskContext): Promise 0) { @@ -289,11 +299,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise= 3 AND stock_status != 'oos' RETURNING id - `, [dispensaryId]); + `, [dispensaryId, platform]); const markedOosCount = oosResult.rowCount || 0; if (markedOosCount > 0) {