feat(jane): Direct Algolia product fetch and multi-platform product-refresh
- Add fetchProductsByStoreIdDirect() for reliable Algolia product fetching - Update product-discovery-jane to use direct Algolia instead of network interception - Fix product-refresh handler to support both Dutchie and Jane payloads - Handle both `products` (Dutchie) and `hits` (Jane) formats - Use platform-appropriate raw_json structure for normalizers - Fix consecutive_misses tracking to use correct provider - Extract product IDs correctly (Dutchie _id vs Jane product_id) - Add store discovery deduplication (prefer REC over MED at same location) - Add storeTypes field to DiscoveredStore interface - Add scripts: run-jane-store-discovery.ts, run-jane-product-discovery.ts 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
138
backend/scripts/run-jane-product-discovery.ts
Normal file
138
backend/scripts/run-jane-product-discovery.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
/**
|
||||||
|
* Run Jane product discovery for stores in database
|
||||||
|
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
|
||||||
|
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
|
||||||
|
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
|
||||||
|
import { saveRawPayload } from '../src/utils/payload-storage';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const arg = process.argv[2];
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Jane Product Discovery');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: process.env.DATABASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get dispensaries to process
|
||||||
|
let dispensaries: any[];
|
||||||
|
|
||||||
|
if (arg === 'all') {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||||
|
ORDER BY id`
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
} else if (arg) {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1`,
|
||||||
|
[parseInt(arg)]
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
} else {
|
||||||
|
// Default: get first Jane store
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||||
|
ORDER BY id LIMIT 1`
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensaries.length === 0) {
|
||||||
|
console.log('No Jane dispensaries found');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
|
||||||
|
|
||||||
|
let successCount = 0;
|
||||||
|
let failCount = 0;
|
||||||
|
|
||||||
|
for (const disp of dispensaries) {
|
||||||
|
console.log(`\n${'─'.repeat(60)}`);
|
||||||
|
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
|
||||||
|
|
||||||
|
if (result.products.length === 0) {
|
||||||
|
console.log(' ✗ No products captured');
|
||||||
|
failCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ✓ Captured ${result.products.length} products`);
|
||||||
|
|
||||||
|
// Build payload
|
||||||
|
const rawPayload = {
|
||||||
|
hits: result.products.map(p => p.raw),
|
||||||
|
store: result.store?.raw || null,
|
||||||
|
capturedAt: new Date().toISOString(),
|
||||||
|
platform: 'jane',
|
||||||
|
dispensaryId: disp.id,
|
||||||
|
storeId: disp.platform_dispensary_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save payload
|
||||||
|
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
||||||
|
pool,
|
||||||
|
disp.id,
|
||||||
|
rawPayload,
|
||||||
|
null,
|
||||||
|
result.products.length,
|
||||||
|
'jane'
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
||||||
|
|
||||||
|
// Update dispensary
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET stage = 'hydrating',
|
||||||
|
last_fetch_at = NOW(),
|
||||||
|
product_count = $2,
|
||||||
|
consecutive_successes = consecutive_successes + 1,
|
||||||
|
consecutive_failures = 0,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[disp.id, result.products.length]
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
|
||||||
|
successCount++;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` ✗ Error: ${error.message}`);
|
||||||
|
failCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Success: ${successCount}`);
|
||||||
|
console.log(`Failed: ${failCount}`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
137
backend/scripts/run-jane-store-discovery.ts
Normal file
137
backend/scripts/run-jane-store-discovery.ts
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
/**
|
||||||
|
* Run Jane store discovery and insert into database
|
||||||
|
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
|
||||||
|
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { discoverStoresByState } from '../src/platforms/jane';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate slug from store name
|
||||||
|
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
|
||||||
|
*/
|
||||||
|
function generateSlug(name: string): string {
|
||||||
|
return name
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[()]/g, '') // Remove parentheses
|
||||||
|
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
|
||||||
|
.replace(/\s+/g, '-') // Spaces to hyphens
|
||||||
|
.replace(/-+/g, '-') // Collapse multiple hyphens
|
||||||
|
.replace(/^-|-$/g, ''); // Trim hyphens
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const stateCode = process.argv[2] || 'AZ';
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Jane Store Discovery - ${stateCode}`);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Connect to database
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: process.env.DATABASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test connection
|
||||||
|
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||||
|
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
|
||||||
|
|
||||||
|
// Discover stores
|
||||||
|
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
|
||||||
|
const stores = await discoverStoresByState(stateCode);
|
||||||
|
|
||||||
|
if (stores.length === 0) {
|
||||||
|
console.log(`No stores found in ${stateCode}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
|
||||||
|
|
||||||
|
// Insert stores
|
||||||
|
let inserted = 0;
|
||||||
|
let updated = 0;
|
||||||
|
const newIds: number[] = [];
|
||||||
|
|
||||||
|
for (const store of stores) {
|
||||||
|
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
|
||||||
|
const slug = generateSlug(store.name);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO dispensaries (
|
||||||
|
name, slug, address1, city, state, zipcode,
|
||||||
|
latitude, longitude, menu_url, menu_type, platform,
|
||||||
|
platform_dispensary_id, is_medical, is_recreational,
|
||||||
|
stage, created_at, updated_at
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
|
||||||
|
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name,
|
||||||
|
slug = EXCLUDED.slug,
|
||||||
|
address1 = EXCLUDED.address1,
|
||||||
|
city = EXCLUDED.city,
|
||||||
|
latitude = EXCLUDED.latitude,
|
||||||
|
longitude = EXCLUDED.longitude,
|
||||||
|
menu_url = EXCLUDED.menu_url,
|
||||||
|
is_medical = EXCLUDED.is_medical,
|
||||||
|
is_recreational = EXCLUDED.is_recreational,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) AS is_new`,
|
||||||
|
[
|
||||||
|
store.name,
|
||||||
|
slug,
|
||||||
|
store.address,
|
||||||
|
store.city,
|
||||||
|
stateCode,
|
||||||
|
store.zip,
|
||||||
|
store.lat,
|
||||||
|
store.long,
|
||||||
|
menuUrl,
|
||||||
|
'embedded', // menu_type: how it's displayed
|
||||||
|
'jane', // platform: who provides the menu
|
||||||
|
store.storeId,
|
||||||
|
store.medical,
|
||||||
|
store.recreational,
|
||||||
|
'discovered',
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length > 0) {
|
||||||
|
const { id, is_new } = result.rows[0];
|
||||||
|
if (is_new) {
|
||||||
|
inserted++;
|
||||||
|
newIds.push(id);
|
||||||
|
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
|
||||||
|
} else {
|
||||||
|
updated++;
|
||||||
|
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(` ! Error inserting ${store.name}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Stores discovered: ${stores.length}`);
|
||||||
|
console.log(`New stores inserted: ${inserted}`);
|
||||||
|
console.log(`Existing stores updated: ${updated}`);
|
||||||
|
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
|
||||||
|
|
||||||
|
// Show final count
|
||||||
|
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||||
|
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
50
backend/scripts/test-jane-discovery-az.ts
Normal file
50
backend/scripts/test-jane-discovery-az.ts
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Smoke test: Discover Jane stores in Arizona
|
||||||
|
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { discoverStoresByState } from '../src/platforms/jane';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Jane Store Discovery - Arizona Smoke Test');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Using local IP (no proxy)\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stores = await discoverStoresByState('AZ');
|
||||||
|
|
||||||
|
console.log(`\n${'='.repeat(60)}`);
|
||||||
|
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
if (stores.length > 0) {
|
||||||
|
console.log('\nSample stores:');
|
||||||
|
for (const store of stores.slice(0, 10)) {
|
||||||
|
console.log(` - ${store.name}`);
|
||||||
|
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
|
||||||
|
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
|
||||||
|
console.log(` Products: ${store.productCount || 'N/A'}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stores.length > 10) {
|
||||||
|
console.log(` ... and ${stores.length - 10} more stores`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SMOKE TEST PASSED');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n' + '='.repeat(60));
|
||||||
|
console.error('SMOKE TEST FAILED');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error(`Error: ${error.message}`);
|
||||||
|
console.error(error.stack);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -36,6 +36,7 @@ export {
|
|||||||
getStoreById,
|
getStoreById,
|
||||||
fetchProductsFromUrl,
|
fetchProductsFromUrl,
|
||||||
fetchProductsByStoreId,
|
fetchProductsByStoreId,
|
||||||
|
fetchProductsByStoreIdDirect,
|
||||||
discoverStoresByState,
|
discoverStoresByState,
|
||||||
|
|
||||||
// Types
|
// Types
|
||||||
|
|||||||
@@ -158,6 +158,112 @@ export async function fetchProductsByStoreId(
|
|||||||
return fetchProductsFromUrl(menuUrl);
|
return fetchProductsFromUrl(menuUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch ALL products for a store directly via Algolia API
|
||||||
|
* More reliable than network interception - calls Algolia directly from browser context
|
||||||
|
*
|
||||||
|
* @param storeId - Jane store ID
|
||||||
|
* @returns Products fetched from Algolia
|
||||||
|
*/
|
||||||
|
export async function fetchProductsByStoreIdDirect(
|
||||||
|
storeId: string | number
|
||||||
|
): Promise<FetchProductsResult> {
|
||||||
|
try {
|
||||||
|
await startSession();
|
||||||
|
|
||||||
|
const { page } = (await import('./client')).getCurrentSession()!;
|
||||||
|
|
||||||
|
// Visit Jane to establish browser session (bypass Cloudflare)
|
||||||
|
console.log(`[Jane Queries] Establishing browser session...`);
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
await new Promise((r) => setTimeout(r, 2000));
|
||||||
|
|
||||||
|
// Fetch all products via Algolia from browser context
|
||||||
|
console.log(`[Jane Queries] Fetching products for store ${storeId} via Algolia...`);
|
||||||
|
|
||||||
|
const algoliaResults = await page.evaluate(async (sid: string) => {
|
||||||
|
const results: any[] = [];
|
||||||
|
let pageNum = 0;
|
||||||
|
const hitsPerPage = 100;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
const response = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: '',
|
||||||
|
hitsPerPage,
|
||||||
|
page: pageNum,
|
||||||
|
filters: `store_id=${sid}`,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.log(`Algolia request failed: ${response.status}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
if (!data.hits || data.hits.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push(...data.hits);
|
||||||
|
console.log(`Fetched page ${pageNum}, got ${data.hits.length} products (total: ${results.length})`);
|
||||||
|
|
||||||
|
if (pageNum >= data.nbPages - 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pageNum++;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`Algolia error: ${err}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}, String(storeId));
|
||||||
|
|
||||||
|
console.log(`[Jane Queries] Algolia returned ${algoliaResults.length} products`);
|
||||||
|
|
||||||
|
// Parse into JaneProductHit format
|
||||||
|
const products: JaneProductHit[] = algoliaResults.map((hit: any) => ({
|
||||||
|
product_id: hit.product_id,
|
||||||
|
name: hit.name,
|
||||||
|
brand: hit.brand,
|
||||||
|
kind: hit.kind,
|
||||||
|
category: hit.category,
|
||||||
|
percent_thc: hit.percent_thc ?? null,
|
||||||
|
percent_cbd: hit.percent_cbd ?? null,
|
||||||
|
price_gram: hit.price_gram ?? null,
|
||||||
|
price_each: hit.price_each ?? null,
|
||||||
|
price_eighth_ounce: hit.price_eighth_ounce ?? null,
|
||||||
|
price_quarter_ounce: hit.price_quarter_ounce ?? null,
|
||||||
|
price_half_ounce: hit.price_half_ounce ?? null,
|
||||||
|
price_ounce: hit.price_ounce ?? null,
|
||||||
|
image_urls: hit.image_urls || [],
|
||||||
|
aggregate_rating: hit.aggregate_rating ?? null,
|
||||||
|
review_count: hit.review_count ?? null,
|
||||||
|
available_for_pickup: hit.available_for_pickup ?? false,
|
||||||
|
available_for_delivery: hit.available_for_delivery ?? false,
|
||||||
|
raw: hit,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return {
|
||||||
|
products,
|
||||||
|
totalCaptured: products.length,
|
||||||
|
responses: [],
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// DISCOVERY OPERATIONS
|
// DISCOVERY OPERATIONS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -175,91 +281,117 @@ export interface DiscoveredStore {
|
|||||||
recreational: boolean;
|
recreational: boolean;
|
||||||
productCount: number;
|
productCount: number;
|
||||||
urlSlug: string;
|
urlSlug: string;
|
||||||
|
storeTypes: string[]; // e.g., ["recreational"] or ["medical"]
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Discover Jane stores in a state
|
* Discover Jane stores in a state
|
||||||
* Navigates to Jane's store locator and extracts store data
|
* Uses Algolia stores-production index via browser context (bypasses Cloudflare)
|
||||||
*
|
*
|
||||||
* @param stateCode - Two-letter state code (e.g., 'AZ')
|
* @param stateCode - Two-letter state code (e.g., 'AZ')
|
||||||
* @returns Array of discovered stores
|
* @returns Array of discovered stores
|
||||||
*/
|
*/
|
||||||
export async function discoverStoresByState(stateCode: string): Promise<DiscoveredStore[]> {
|
export async function discoverStoresByState(stateCode: string): Promise<DiscoveredStore[]> {
|
||||||
const stores: DiscoveredStore[] = [];
|
const stores: DiscoveredStore[] = [];
|
||||||
|
const stateName = getStateName(stateCode);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await startSession();
|
await startSession();
|
||||||
|
|
||||||
const { page } = (await import('./client')).getCurrentSession()!;
|
const { page } = (await import('./client')).getCurrentSession()!;
|
||||||
|
|
||||||
// Jane has a store directory at /stores
|
// First visit Jane's stores page to establish browser session (bypasses Cloudflare)
|
||||||
// Try state-specific URL first
|
console.log(`[Jane Queries] Establishing browser session...`);
|
||||||
const storeListUrl = `https://www.iheartjane.com/stores?state=${stateCode}`;
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
console.log(`[Jane Queries] Discovering stores in ${stateCode}: ${storeListUrl}`);
|
timeout: 30000,
|
||||||
|
|
||||||
await page.setRequestInterception(true);
|
|
||||||
|
|
||||||
// Capture store list responses
|
|
||||||
const storeResponses: any[] = [];
|
|
||||||
|
|
||||||
page.on('request', (req) => {
|
|
||||||
const type = req.resourceType();
|
|
||||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
|
||||||
req.abort();
|
|
||||||
} else {
|
|
||||||
req.continue();
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
page.on('response', async (response) => {
|
// Wait for Cloudflare to pass
|
||||||
const url = response.url();
|
await new Promise((r) => setTimeout(r, 2000));
|
||||||
const contentType = response.headers()['content-type'] || '';
|
|
||||||
|
|
||||||
if (url.includes('iheartjane.com') && contentType.includes('json')) {
|
// Use Algolia stores-production index - it returns full store data with state
|
||||||
|
console.log(`[Jane Queries] Searching Algolia for stores in ${stateName}...`);
|
||||||
|
|
||||||
|
// Fetch all stores from Algolia using facet filter on state
|
||||||
|
const algoliaResults = await page.evaluate(async (state: string) => {
|
||||||
|
const results: any[] = [];
|
||||||
|
let page = 0;
|
||||||
|
const hitsPerPage = 100;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
try {
|
try {
|
||||||
const json = await response.json();
|
const response = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
|
||||||
if (json.stores && Array.isArray(json.stores)) {
|
method: 'POST',
|
||||||
storeResponses.push(...json.stores);
|
headers: {
|
||||||
console.log(`[Jane Queries] Captured ${json.stores.length} stores from API`);
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: '',
|
||||||
|
hitsPerPage,
|
||||||
|
page,
|
||||||
|
facetFilters: [`state:${state}`],
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.log(`Algolia request failed: ${response.status}`);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// Not valid JSON
|
const data = await response.json();
|
||||||
|
if (!data.hits || data.hits.length === 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push(...data.hits);
|
||||||
|
console.log(`Fetched page ${page}, got ${data.hits.length} stores (total: ${results.length})`);
|
||||||
|
|
||||||
|
// Check if we've fetched all pages
|
||||||
|
if (page >= data.nbPages - 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
page++;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`Algolia error: ${err}`);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
|
||||||
await page.goto(storeListUrl, {
|
return results;
|
||||||
waitUntil: 'networkidle2',
|
}, stateName);
|
||||||
timeout: 60000,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Wait for stores to load
|
console.log(`[Jane Queries] Algolia returned ${algoliaResults.length} stores in ${stateName}`);
|
||||||
await new Promise((r) => setTimeout(r, 3000));
|
|
||||||
|
|
||||||
// Parse captured stores
|
// Dedupe by location - prefer REC stores over MED
|
||||||
for (const store of storeResponses) {
|
// Group by coordinates (rounded to ~100m precision)
|
||||||
// Filter by state
|
const locationMap = new Map<string, any>();
|
||||||
if (store.state?.toLowerCase() === stateCode.toLowerCase() ||
|
|
||||||
store.state?.toLowerCase() === getStateName(stateCode).toLowerCase()) {
|
for (const hit of algoliaResults) {
|
||||||
stores.push({
|
const lat = hit._geoloc?.lat?.toFixed(3) || '0';
|
||||||
storeId: String(store.id),
|
const lng = hit._geoloc?.lng?.toFixed(3) || '0';
|
||||||
name: store.name || '',
|
const locationKey = `${lat},${lng}`;
|
||||||
address: store.address || '',
|
|
||||||
city: store.city || '',
|
const existing = locationMap.get(locationKey);
|
||||||
state: store.state || stateCode,
|
if (!existing) {
|
||||||
zip: store.zip || '',
|
// First store at this location
|
||||||
lat: store.lat || 0,
|
locationMap.set(locationKey, hit);
|
||||||
long: store.long || 0,
|
} else if (hit.recreational && !existing.recreational) {
|
||||||
medical: store.medical || false,
|
// Prefer REC over MED at same location
|
||||||
recreational: store.recreational || false,
|
locationMap.set(locationKey, hit);
|
||||||
productCount: store.product_count || 0,
|
|
||||||
urlSlug: store.url_slug || '',
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
// Otherwise keep existing (which is either REC or first MED)
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[Jane Queries] Found ${stores.length} stores in ${stateCode}`);
|
// Parse deduplicated stores
|
||||||
|
for (const hit of locationMap.values()) {
|
||||||
|
stores.push(parseAlgoliaStore(hit));
|
||||||
|
}
|
||||||
|
|
||||||
|
const recCount = stores.filter(s => s.recreational).length;
|
||||||
|
const medOnlyCount = stores.filter(s => s.medical && !s.recreational).length;
|
||||||
|
console.log(`[Jane Queries] Found ${stores.length} unique locations in ${stateCode} (${recCount} REC, ${medOnlyCount} MED-only)`);
|
||||||
|
|
||||||
return stores;
|
return stores;
|
||||||
} finally {
|
} finally {
|
||||||
@@ -267,6 +399,30 @@ export async function discoverStoresByState(stateCode: string): Promise<Discover
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse an Algolia store hit into DiscoveredStore format
|
||||||
|
*/
|
||||||
|
function parseAlgoliaStore(hit: any): DiscoveredStore {
|
||||||
|
return {
|
||||||
|
storeId: hit.objectID || String(hit.id),
|
||||||
|
name: hit.name || '',
|
||||||
|
address: hit.full_address?.split(',')[0]?.trim() || '',
|
||||||
|
city: hit.city || '',
|
||||||
|
state: hit.state || '',
|
||||||
|
zip: hit.full_address?.match(/\d{5}(?:-\d{4})?/)?.[0] || '',
|
||||||
|
lat: hit._geoloc?.lat || 0,
|
||||||
|
long: hit._geoloc?.lng || 0,
|
||||||
|
medical: hit.medical || false,
|
||||||
|
recreational: hit.recreational || false,
|
||||||
|
productCount: hit.product_count || 0,
|
||||||
|
urlSlug: hit.url_slug || '',
|
||||||
|
storeTypes: [
|
||||||
|
...(hit.recreational ? ['recreational'] : []),
|
||||||
|
...(hit.medical ? ['medical'] : []),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// UTILITY
|
// UTILITY
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -12,10 +12,8 @@
|
|||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import {
|
import {
|
||||||
startSession,
|
|
||||||
endSession,
|
|
||||||
setCrawlRotator,
|
setCrawlRotator,
|
||||||
fetchProductsFromUrl,
|
fetchProductsByStoreIdDirect,
|
||||||
} from '../../platforms/jane';
|
} from '../../platforms/jane';
|
||||||
import { saveRawPayload } from '../../utils/payload-storage';
|
import { saveRawPayload } from '../../utils/payload-storage';
|
||||||
import { taskService } from '../task-service';
|
import { taskService } from '../task-service';
|
||||||
@@ -50,22 +48,22 @@ export async function handleProductDiscoveryJane(ctx: TaskContext): Promise<Task
|
|||||||
|
|
||||||
const dispensary = dispResult.rows[0];
|
const dispensary = dispResult.rows[0];
|
||||||
|
|
||||||
if (!dispensary.menu_url) {
|
if (!dispensary.platform_dispensary_id) {
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: `Dispensary ${dispensaryId} has no menu_url`,
|
error: `Dispensary ${dispensaryId} has no platform_dispensary_id (Jane store ID)`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[JaneProductDiscovery] Fetching products from: ${dispensary.menu_url}`);
|
console.log(`[JaneProductDiscovery] Fetching products for Jane store ${dispensary.platform_dispensary_id}`);
|
||||||
|
|
||||||
// Attach crawl rotator
|
// Attach crawl rotator
|
||||||
if (crawlRotator) {
|
if (crawlRotator) {
|
||||||
setCrawlRotator(crawlRotator);
|
setCrawlRotator(crawlRotator);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch products
|
// Fetch products directly via Algolia
|
||||||
const result = await fetchProductsFromUrl(dispensary.menu_url);
|
const result = await fetchProductsByStoreIdDirect(dispensary.platform_dispensary_id);
|
||||||
|
|
||||||
if (result.products.length === 0) {
|
if (result.products.length === 0) {
|
||||||
console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`);
|
console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`);
|
||||||
|
|||||||
@@ -149,18 +149,21 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
console.log(`[ProductRefresh] Loaded latest payload #${payloadId} (${result.metadata.fetchedAt})`);
|
console.log(`[ProductRefresh] Loaded latest payload #${payloadId} (${result.metadata.fetchedAt})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const allProducts = payloadData.products || [];
|
// Extract products - handle both Dutchie (products) and Jane (hits) formats
|
||||||
|
const allProducts = payloadData.products || payloadData.hits || [];
|
||||||
|
|
||||||
if (allProducts.length === 0) {
|
if (allProducts.length === 0) {
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: 'Payload contains no products',
|
error: 'Payload contains no products (checked both products and hits)',
|
||||||
payloadId,
|
payloadId,
|
||||||
productsProcessed: 0,
|
productsProcessed: 0,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[ProductRefresh] Processing ${allProducts.length} products from payload #${payloadId}`);
|
// Detect platform from payload
|
||||||
|
const detectedPlatform = payloadData.platform || dispensary.platform || 'dutchie';
|
||||||
|
console.log(`[ProductRefresh] Processing ${allProducts.length} products from payload #${payloadId} (platform: ${detectedPlatform})`);
|
||||||
|
|
||||||
await ctx.heartbeat();
|
await ctx.heartbeat();
|
||||||
|
|
||||||
@@ -170,14 +173,22 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
updateStep('normalizing', `Normalizing ${allProducts.length} products`);
|
updateStep('normalizing', `Normalizing ${allProducts.length} products`);
|
||||||
console.log(`[ProductRefresh] Normalizing ${allProducts.length} products...`);
|
console.log(`[ProductRefresh] Normalizing ${allProducts.length} products...`);
|
||||||
|
|
||||||
|
// Select normalizer based on detected platform
|
||||||
|
const platform = detectedPlatform === 'jane' ? 'jane' : (dispensary.platform || 'dutchie');
|
||||||
|
|
||||||
// Build RawPayload for the normalizer
|
// Build RawPayload for the normalizer
|
||||||
|
// Use platform-appropriate structure
|
||||||
const rawPayload = {
|
const rawPayload = {
|
||||||
id: `refresh-${dispensaryId}-${Date.now()}`,
|
id: `refresh-${dispensaryId}-${Date.now()}`,
|
||||||
dispensary_id: dispensaryId,
|
dispensary_id: dispensaryId,
|
||||||
crawl_run_id: null,
|
crawl_run_id: null,
|
||||||
platform: 'dutchie',
|
platform,
|
||||||
payload_version: 1,
|
payload_version: 1,
|
||||||
raw_json: { data: { filteredProducts: { products: allProducts } } },
|
// For Dutchie: { data: { filteredProducts: { products: [...] } } }
|
||||||
|
// For Jane: { hits: [...] }
|
||||||
|
raw_json: platform === 'jane'
|
||||||
|
? { hits: allProducts }
|
||||||
|
: { data: { filteredProducts: { products: allProducts } } },
|
||||||
product_count: allProducts.length,
|
product_count: allProducts.length,
|
||||||
pricing_type: 'dual',
|
pricing_type: 'dual',
|
||||||
crawl_mode: 'dual_mode',
|
crawl_mode: 'dual_mode',
|
||||||
@@ -188,9 +199,6 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
hydration_attempts: 0,
|
hydration_attempts: 0,
|
||||||
created_at: new Date(),
|
created_at: new Date(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Select normalizer based on dispensary platform (menu_type)
|
|
||||||
const platform = dispensary.menu_type || 'dutchie';
|
|
||||||
const normalizer = getNormalizer(platform);
|
const normalizer = getNormalizer(platform);
|
||||||
console.log(`[ProductRefresh] Using ${platform} normalizer for ${dispensary.name}`);
|
console.log(`[ProductRefresh] Using ${platform} normalizer for ${dispensary.name}`);
|
||||||
|
|
||||||
@@ -253,9 +261,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
// - Products not in feed: increment consecutive_misses
|
// - Products not in feed: increment consecutive_misses
|
||||||
// - At 3 consecutive misses: mark as OOS
|
// - At 3 consecutive misses: mark as OOS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
// Extract product IDs - Dutchie uses _id, Jane uses product_id
|
||||||
const currentProductIds = allProducts
|
const currentProductIds = allProducts
|
||||||
.map((p: any) => p._id || p.id)
|
.map((p: any) => p._id || p.product_id || p.id)
|
||||||
.filter(Boolean);
|
.filter(Boolean)
|
||||||
|
.map(String); // Ensure all IDs are strings
|
||||||
|
|
||||||
// Reset consecutive_misses for products that ARE in the feed
|
// Reset consecutive_misses for products that ARE in the feed
|
||||||
if (currentProductIds.length > 0) {
|
if (currentProductIds.length > 0) {
|
||||||
@@ -263,9 +273,9 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
UPDATE store_products
|
UPDATE store_products
|
||||||
SET consecutive_misses = 0, last_seen_at = NOW()
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND provider = 'dutchie'
|
AND provider = $3
|
||||||
AND provider_product_id = ANY($2)
|
AND provider_product_id = ANY($2)
|
||||||
`, [dispensaryId, currentProductIds]);
|
`, [dispensaryId, currentProductIds, platform]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Increment consecutive_misses for products NOT in the feed
|
// Increment consecutive_misses for products NOT in the feed
|
||||||
@@ -273,11 +283,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
UPDATE store_products
|
UPDATE store_products
|
||||||
SET consecutive_misses = consecutive_misses + 1
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND provider = 'dutchie'
|
AND provider = $3
|
||||||
AND provider_product_id NOT IN (SELECT unnest($2::text[]))
|
AND provider_product_id NOT IN (SELECT unnest($2::text[]))
|
||||||
AND consecutive_misses < 3
|
AND consecutive_misses < 3
|
||||||
RETURNING id
|
RETURNING id
|
||||||
`, [dispensaryId, currentProductIds]);
|
`, [dispensaryId, currentProductIds, platform]);
|
||||||
|
|
||||||
const incrementedCount = incrementResult.rowCount || 0;
|
const incrementedCount = incrementResult.rowCount || 0;
|
||||||
if (incrementedCount > 0) {
|
if (incrementedCount > 0) {
|
||||||
@@ -289,11 +299,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
UPDATE store_products
|
UPDATE store_products
|
||||||
SET stock_status = 'oos', is_in_stock = false
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND provider = 'dutchie'
|
AND provider = $2
|
||||||
AND consecutive_misses >= 3
|
AND consecutive_misses >= 3
|
||||||
AND stock_status != 'oos'
|
AND stock_status != 'oos'
|
||||||
RETURNING id
|
RETURNING id
|
||||||
`, [dispensaryId]);
|
`, [dispensaryId, platform]);
|
||||||
|
|
||||||
const markedOosCount = oosResult.rowCount || 0;
|
const markedOosCount = oosResult.rowCount || 0;
|
||||||
if (markedOosCount > 0) {
|
if (markedOosCount > 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user