Fix cName bug: extract cName from menuUrl per dispensary
- Add extractCName() helper to parse cName from dispensary.menuUrl - Handles /embedded-menu/<cName> and /dispensary/<cName> URL patterns - Falls back to dispensary.slug if menuUrl extraction fails - Pass cName to fetchAllProductsBothModes and fetchAllProducts - Make cName required parameter (no hardcoded defaults) - Add normBool and normDate helpers for API data normalization - Refactor graphql-client to use server-side fetch with Puppeteer session cookies Previously all stores were using AZ-Deeply-Rooted cName, causing 0 products for other dispensaries like Sol Flower. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,24 +1,24 @@
|
|||||||
/**
|
/**
|
||||||
* Dutchie GraphQL Client
|
* Dutchie GraphQL Client
|
||||||
*
|
*
|
||||||
* Makes GraphQL requests to Dutchie's API using Puppeteer to bypass Cloudflare.
|
* Uses Puppeteer to establish a session (get CF cookies), then makes
|
||||||
* Uses in-page fetch to maintain browser session/cookies.
|
* SERVER-SIDE fetch calls to api-gw.dutchie.com with those cookies.
|
||||||
*
|
*
|
||||||
* Key features:
|
* DUTCHIE FETCH RULES:
|
||||||
* - Browser session reuse between Mode A and Mode B (single browser per store)
|
* 1. Server-side only - use axios (never browser fetch with CORS)
|
||||||
* - Config-driven GraphQL hashes
|
* 2. Use dispensaryFilter.cNameOrID, NOT dispensaryId directly
|
||||||
* - POST fallback when GET fails with 405
|
* 3. Headers must mimic Chrome: User-Agent, Origin, Referer
|
||||||
* - Pagination retry logic
|
* 4. If 403, extract CF cookies from Puppeteer session and include them
|
||||||
* - Proper termination on incomplete pages
|
* 5. Log status codes, error bodies, and product counts
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import axios, { AxiosError } from 'axios';
|
||||||
import puppeteer from 'puppeteer-extra';
|
import puppeteer from 'puppeteer-extra';
|
||||||
import type { Browser, Page } from 'puppeteer';
|
import type { Browser, Page, Protocol } from 'puppeteer';
|
||||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
import {
|
import {
|
||||||
DutchieRawProduct,
|
DutchieRawProduct,
|
||||||
DutchiePOSChild,
|
DutchiePOSChild,
|
||||||
FilteredProductsVariables,
|
|
||||||
CrawlMode,
|
CrawlMode,
|
||||||
} from '../types';
|
} from '../types';
|
||||||
import { dutchieConfig, GRAPHQL_HASHES, ARIZONA_CENTERPOINTS } from '../config/dutchie';
|
import { dutchieConfig, GRAPHQL_HASHES, ARIZONA_CENTERPOINTS } from '../config/dutchie';
|
||||||
@@ -28,162 +28,165 @@ puppeteer.use(StealthPlugin());
|
|||||||
// Re-export for backward compatibility
|
// Re-export for backward compatibility
|
||||||
export { GRAPHQL_HASHES, ARIZONA_CENTERPOINTS };
|
export { GRAPHQL_HASHES, ARIZONA_CENTERPOINTS };
|
||||||
|
|
||||||
interface BrowserSession {
|
interface SessionCredentials {
|
||||||
|
cookies: string; // Cookie header string
|
||||||
|
userAgent: string;
|
||||||
browser: Browser;
|
browser: Browser;
|
||||||
page: Page;
|
|
||||||
dispensaryId?: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// BROWSER SESSION MANAGEMENT
|
// SESSION MANAGEMENT - Get CF cookies via Puppeteer
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Launch a browser session for Dutchie GraphQL requests
|
* Create a session by navigating to the embedded menu page
|
||||||
|
* and extracting CF clearance cookies for server-side requests
|
||||||
*/
|
*/
|
||||||
async function createBrowserSession(menuUrl?: string): Promise<BrowserSession> {
|
async function createSession(cName: string): Promise<SessionCredentials> {
|
||||||
const browser = await puppeteer.launch({
|
const browser = await puppeteer.launch({
|
||||||
headless: 'new',
|
headless: 'new',
|
||||||
args: dutchieConfig.browserArgs,
|
args: dutchieConfig.browserArgs,
|
||||||
});
|
});
|
||||||
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
const userAgent = dutchieConfig.userAgent;
|
||||||
|
|
||||||
// Set up stealth
|
await page.setUserAgent(userAgent);
|
||||||
await page.setUserAgent(dutchieConfig.userAgent);
|
|
||||||
await page.setViewport({ width: 1920, height: 1080 });
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
await page.evaluateOnNewDocument(() => {
|
await page.evaluateOnNewDocument(() => {
|
||||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||||
(window as any).chrome = { runtime: {} };
|
(window as any).chrome = { runtime: {} };
|
||||||
});
|
});
|
||||||
|
|
||||||
// Navigate to establish session
|
// Navigate to the embedded menu page for this dispensary
|
||||||
const url = menuUrl || 'https://dutchie.com/dispensaries';
|
const embeddedMenuUrl = `https://dutchie.com/embedded-menu/${cName}`;
|
||||||
console.log(`[GraphQL Client] Loading ${url} to establish session...`);
|
console.log(`[GraphQL Client] Loading ${embeddedMenuUrl} to get CF cookies...`);
|
||||||
|
|
||||||
await page.goto(url, {
|
try {
|
||||||
waitUntil: 'networkidle2',
|
await page.goto(embeddedMenuUrl, {
|
||||||
timeout: dutchieConfig.navigationTimeout,
|
waitUntil: 'networkidle2',
|
||||||
});
|
timeout: dutchieConfig.navigationTimeout,
|
||||||
await new Promise((r) => setTimeout(r, dutchieConfig.pageLoadDelay));
|
});
|
||||||
|
await new Promise((r) => setTimeout(r, dutchieConfig.pageLoadDelay));
|
||||||
// Try to get dispensary ID from page if it's a menu page
|
} catch (error: any) {
|
||||||
let dispensaryId: string | undefined;
|
console.warn(`[GraphQL Client] Navigation warning: ${error.message}`);
|
||||||
if (menuUrl && menuUrl.includes('embedded-menu')) {
|
// Continue anyway - we may have gotten cookies
|
||||||
dispensaryId = await page.evaluate(() => (window as any).reactEnv?.dispensaryId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return { browser, page, dispensaryId };
|
// Extract cookies
|
||||||
|
const cookies = await page.cookies();
|
||||||
|
const cookieString = cookies.map((c: Protocol.Network.Cookie) => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
console.log(`[GraphQL Client] Got ${cookies.length} cookies`);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(`[GraphQL Client] Cookie names: ${cookies.map(c => c.name).join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { cookies: cookieString, userAgent, browser };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close browser session
|
* Close session (browser)
|
||||||
*/
|
*/
|
||||||
async function closeBrowserSession(session: BrowserSession): Promise<void> {
|
async function closeSession(session: SessionCredentials): Promise<void> {
|
||||||
await session.browser.close();
|
await session.browser.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// GRAPHQL EXECUTION WITH POST FALLBACK
|
// SERVER-SIDE GRAPHQL FETCH USING AXIOS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute a GraphQL query from within the browser context
|
* Build headers that mimic a real browser request
|
||||||
* Supports GET (default) with POST fallback on 405 errors
|
*/
|
||||||
|
function buildHeaders(session: SessionCredentials, cName: string): Record<string, string> {
|
||||||
|
const embeddedMenuUrl = `https://dutchie.com/embedded-menu/${cName}`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
'accept': 'application/json, text/plain, */*',
|
||||||
|
'accept-language': 'en-US,en;q=0.9',
|
||||||
|
'accept-encoding': 'gzip, deflate, br',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'origin': 'https://dutchie.com',
|
||||||
|
'referer': embeddedMenuUrl,
|
||||||
|
'user-agent': session.userAgent,
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
'sec-ch-ua': '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"Windows"',
|
||||||
|
'sec-fetch-dest': 'empty',
|
||||||
|
'sec-fetch-mode': 'cors',
|
||||||
|
'sec-fetch-site': 'same-site',
|
||||||
|
...(session.cookies ? { 'cookie': session.cookies } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute GraphQL query server-side using axios
|
||||||
|
* Uses cookies from the browser session to bypass CF
|
||||||
*/
|
*/
|
||||||
async function executeGraphQL(
|
async function executeGraphQL(
|
||||||
page: Page,
|
session: SessionCredentials,
|
||||||
operationName: string,
|
operationName: string,
|
||||||
variables: any,
|
variables: any,
|
||||||
hash: string,
|
hash: string,
|
||||||
endpoint: string = 'https://dutchie.com/graphql'
|
cName: string
|
||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
const headers = dutchieConfig.defaultHeaders;
|
const endpoint = dutchieConfig.graphqlEndpoint;
|
||||||
const preferGet = dutchieConfig.preferGet;
|
const headers = buildHeaders(session, cName);
|
||||||
const enablePostFallback = dutchieConfig.enablePostFallback;
|
|
||||||
|
|
||||||
return page.evaluate(
|
// Build request body for POST
|
||||||
async (
|
const body = {
|
||||||
opName: string,
|
|
||||||
vars: any,
|
|
||||||
queryHash: string,
|
|
||||||
url: string,
|
|
||||||
hdrs: Record<string, string>,
|
|
||||||
useGet: boolean,
|
|
||||||
allowPostFallback: boolean
|
|
||||||
) => {
|
|
||||||
const doFetch = async (method: 'GET' | 'POST'): Promise<Response> => {
|
|
||||||
if (method === 'GET') {
|
|
||||||
const qs = new URLSearchParams({
|
|
||||||
operationName: opName,
|
|
||||||
variables: JSON.stringify(vars),
|
|
||||||
extensions: JSON.stringify({
|
|
||||||
persistedQuery: { version: 1, sha256Hash: queryHash },
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
return fetch(`${url}?${qs.toString()}`, {
|
|
||||||
method: 'GET',
|
|
||||||
headers: {
|
|
||||||
...hdrs,
|
|
||||||
'content-type': 'application/json',
|
|
||||||
},
|
|
||||||
credentials: 'include',
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// POST request with full body
|
|
||||||
return fetch(url, {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
...hdrs,
|
|
||||||
'content-type': 'application/json',
|
|
||||||
},
|
|
||||||
credentials: 'include',
|
|
||||||
body: JSON.stringify({
|
|
||||||
operationName: opName,
|
|
||||||
variables: vars,
|
|
||||||
extensions: {
|
|
||||||
persistedQuery: { version: 1, sha256Hash: queryHash },
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Try GET first if preferred
|
|
||||||
if (useGet) {
|
|
||||||
const response = await doFetch('GET');
|
|
||||||
|
|
||||||
// If GET fails with 405 and POST fallback is enabled, try POST
|
|
||||||
if (response.status === 405 && allowPostFallback) {
|
|
||||||
console.log('[GraphQL] GET returned 405, falling back to POST');
|
|
||||||
const postResponse = await doFetch('POST');
|
|
||||||
if (!postResponse.ok) {
|
|
||||||
throw new Error(`HTTP ${postResponse.status} (POST fallback)`);
|
|
||||||
}
|
|
||||||
return postResponse.json();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP ${response.status}`);
|
|
||||||
}
|
|
||||||
return response.json();
|
|
||||||
} else {
|
|
||||||
// Use POST directly
|
|
||||||
const response = await doFetch('POST');
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(`HTTP ${response.status}`);
|
|
||||||
}
|
|
||||||
return response.json();
|
|
||||||
}
|
|
||||||
},
|
|
||||||
operationName,
|
operationName,
|
||||||
variables,
|
variables,
|
||||||
hash,
|
extensions: {
|
||||||
endpoint,
|
persistedQuery: { version: 1, sha256Hash: hash },
|
||||||
headers,
|
},
|
||||||
preferGet,
|
};
|
||||||
enablePostFallback
|
|
||||||
);
|
console.log(`[GraphQL Client] POST: ${operationName} -> ${endpoint}`);
|
||||||
|
console.log(`[GraphQL Client] Variables: ${JSON.stringify(variables).slice(0, 300)}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(endpoint, body, {
|
||||||
|
headers,
|
||||||
|
timeout: 30000,
|
||||||
|
validateStatus: () => true, // Don't throw on non-2xx
|
||||||
|
});
|
||||||
|
|
||||||
|
// Log response details
|
||||||
|
console.log(`[GraphQL Client] Response status: ${response.status}`);
|
||||||
|
|
||||||
|
if (response.status !== 200) {
|
||||||
|
const bodyPreview = typeof response.data === 'string'
|
||||||
|
? response.data.slice(0, 500)
|
||||||
|
: JSON.stringify(response.data).slice(0, 500);
|
||||||
|
console.error(`[GraphQL Client] HTTP ${response.status}: ${bodyPreview}`);
|
||||||
|
throw new Error(`HTTP ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for GraphQL errors
|
||||||
|
if (response.data?.errors && response.data.errors.length > 0) {
|
||||||
|
console.error(`[GraphQL Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.data;
|
||||||
|
} catch (error: any) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
const axiosError = error as AxiosError;
|
||||||
|
console.error(`[GraphQL Client] Axios error: ${axiosError.message}`);
|
||||||
|
if (axiosError.response) {
|
||||||
|
console.error(`[GraphQL Client] Response status: ${axiosError.response.status}`);
|
||||||
|
console.error(`[GraphQL Client] Response data: ${JSON.stringify(axiosError.response.data).slice(0, 500)}`);
|
||||||
|
}
|
||||||
|
if (axiosError.code) {
|
||||||
|
console.error(`[GraphQL Client] Error code: ${axiosError.code}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error(`[GraphQL Client] Error: ${error.message}`);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -192,120 +195,180 @@ async function executeGraphQL(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve a dispensary slug to its internal platform ID
|
* Resolve a dispensary slug to its internal platform ID
|
||||||
|
* Uses GetAddressBasedDispensaryData query
|
||||||
*/
|
*/
|
||||||
export async function resolveDispensaryId(slug: string): Promise<string | null> {
|
export async function resolveDispensaryId(slug: string): Promise<string | null> {
|
||||||
const session = await createBrowserSession(`https://dutchie.com/embedded-menu/${slug}`);
|
console.log(`[GraphQL Client] Resolving dispensary ID for slug: ${slug}`);
|
||||||
|
|
||||||
|
const session = await createSession(slug);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// First check if we got it from the page context
|
const variables = {
|
||||||
if (session.dispensaryId) {
|
dispensaryFilter: {
|
||||||
console.log(`[GraphQL Client] Got dispensaryId from page: ${session.dispensaryId}`);
|
cNameOrID: slug,
|
||||||
return session.dispensaryId;
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
// Otherwise try the GetAddressBasedDispensaryData query
|
|
||||||
const result = await executeGraphQL(
|
const result = await executeGraphQL(
|
||||||
session.page,
|
session,
|
||||||
'GetAddressBasedDispensaryData',
|
'GetAddressBasedDispensaryData',
|
||||||
{ input: { dispensaryId: slug } },
|
variables,
|
||||||
GRAPHQL_HASHES.GetAddressBasedDispensaryData
|
GRAPHQL_HASHES.GetAddressBasedDispensaryData,
|
||||||
|
slug
|
||||||
);
|
);
|
||||||
|
|
||||||
const dispensaryId = result?.data?.getAddressBasedDispensaryData?.dispensaryId;
|
const dispensaryId = result?.data?.dispensaryBySlug?.id ||
|
||||||
console.log(`[GraphQL Client] Resolved ${slug} -> ${dispensaryId}`);
|
result?.data?.dispensary?.id ||
|
||||||
return dispensaryId || null;
|
result?.data?.getAddressBasedDispensaryData?.dispensary?.id;
|
||||||
} catch (error: any) {
|
|
||||||
console.error(`[GraphQL Client] Failed to resolve ${slug}:`, error.message);
|
if (dispensaryId) {
|
||||||
|
console.log(`[GraphQL Client] Resolved ${slug} -> ${dispensaryId}`);
|
||||||
|
return dispensaryId;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[GraphQL Client] Could not resolve ${slug}, response:`, JSON.stringify(result).slice(0, 300));
|
||||||
return null;
|
return null;
|
||||||
} finally {
|
} finally {
|
||||||
await closeBrowserSession(session);
|
await closeSession(session);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Discover Arizona dispensaries via geo-based query
|
||||||
|
*/
|
||||||
|
export async function discoverArizonaDispensaries(): Promise<any[]> {
|
||||||
|
console.log('[GraphQL Client] Discovering Arizona dispensaries...');
|
||||||
|
|
||||||
|
// Use Phoenix as the default center
|
||||||
|
const session = await createSession('AZ-Deeply-Rooted');
|
||||||
|
const allDispensaries: any[] = [];
|
||||||
|
const seenIds = new Set<string>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (const centerpoint of ARIZONA_CENTERPOINTS) {
|
||||||
|
console.log(`[GraphQL Client] Scanning ${centerpoint.name}...`);
|
||||||
|
|
||||||
|
const variables = {
|
||||||
|
dispensariesFilter: {
|
||||||
|
latitude: centerpoint.lat,
|
||||||
|
longitude: centerpoint.lng,
|
||||||
|
distance: 100,
|
||||||
|
state: 'AZ',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
session,
|
||||||
|
'ConsumerDispensaries',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.ConsumerDispensaries,
|
||||||
|
'AZ-Deeply-Rooted'
|
||||||
|
);
|
||||||
|
|
||||||
|
const dispensaries = result?.data?.consumerDispensaries || [];
|
||||||
|
|
||||||
|
for (const d of dispensaries) {
|
||||||
|
const id = d.id || d.dispensaryId;
|
||||||
|
if (id && !seenIds.has(id)) {
|
||||||
|
seenIds.add(id);
|
||||||
|
allDispensaries.push(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[GraphQL Client] Found ${dispensaries.length} in ${centerpoint.name} (${allDispensaries.length} total unique)`);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.warn(`[GraphQL Client] Error scanning ${centerpoint.name}: ${error.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delay between requests
|
||||||
|
await new Promise((r) => setTimeout(r, 1000));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await closeSession(session);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[GraphQL Client] Discovery complete: ${allDispensaries.length} dispensaries`);
|
||||||
|
return allDispensaries;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// FILTER VARIABLE BUILDING
|
// PRODUCT FILTERING VARIABLES
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build GraphQL variables based on crawl mode
|
* Build filter variables for FilteredProducts query
|
||||||
*
|
*
|
||||||
* MODE A - "UI parity": Matches what Dutchie website shows
|
* CRITICAL: Uses dispensaryId directly (the MongoDB ObjectId, e.g. "6405ef617056e8014d79101b")
|
||||||
* - Status: 'Active'
|
* NOT dispensaryFilter.cNameOrID!
|
||||||
* - removeProductsBelowOptionThresholds: true (default behavior)
|
|
||||||
* - bypassOnlineThresholds: false
|
|
||||||
*
|
*
|
||||||
* MODE B - "MAX COVERAGE": Tries to get out-of-stock products
|
* The actual browser request structure is:
|
||||||
* - Status: undefined (no filter)
|
* {
|
||||||
* - removeProductsBelowOptionThresholds: false
|
* "productsFilter": {
|
||||||
* - bypassOnlineThresholds: true
|
* "dispensaryId": "6405ef617056e8014d79101b",
|
||||||
|
* "pricingType": "rec",
|
||||||
|
* "Status": "Active", // Mode A only
|
||||||
|
* "strainTypes": [],
|
||||||
|
* "subcategories": [],
|
||||||
|
* "types": [],
|
||||||
|
* "useCache": true,
|
||||||
|
* ...
|
||||||
|
* },
|
||||||
|
* "page": 0,
|
||||||
|
* "perPage": 100
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Mode A = UI parity (Status: "Active")
|
||||||
|
* Mode B = MAX COVERAGE (no Status filter)
|
||||||
*/
|
*/
|
||||||
function buildFilterVariables(
|
function buildFilterVariables(
|
||||||
platformDispensaryId: string,
|
platformDispensaryId: string,
|
||||||
pricingType: 'rec' | 'med',
|
pricingType: 'rec' | 'med',
|
||||||
crawlMode: CrawlMode,
|
crawlMode: CrawlMode,
|
||||||
pageNum: number,
|
page: number,
|
||||||
perPage: number
|
perPage: number
|
||||||
): FilteredProductsVariables {
|
): any {
|
||||||
if (crawlMode === 'mode_a') {
|
const isModeA = crawlMode === 'mode_a';
|
||||||
// UI parity mode
|
|
||||||
return {
|
const productsFilter: Record<string, any> = {
|
||||||
includeEnterpriseSpecials: false,
|
dispensaryId: platformDispensaryId,
|
||||||
productsFilter: {
|
pricingType: pricingType,
|
||||||
dispensaryId: platformDispensaryId,
|
strainTypes: [],
|
||||||
pricingType,
|
subcategories: [],
|
||||||
Status: 'Active',
|
types: [],
|
||||||
types: [],
|
useCache: false, // Get fresh data
|
||||||
strainTypes: [],
|
isDefaultSort: true,
|
||||||
subcategories: [],
|
sortBy: 'popular',
|
||||||
useCache: false,
|
sortDirection: 1,
|
||||||
isDefaultSort: true,
|
bypassOnlineThresholds: false,
|
||||||
sortBy: 'popularSortIdx',
|
isKioskMenu: false,
|
||||||
sortDirection: 1,
|
removeProductsBelowOptionThresholds: true,
|
||||||
bypassOnlineThresholds: false,
|
};
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: true,
|
// Mode A: Only active products (UI parity)
|
||||||
},
|
if (isModeA) {
|
||||||
page: pageNum,
|
productsFilter.Status = 'Active';
|
||||||
perPage,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
// MAX COVERAGE mode (mode_b)
|
|
||||||
return {
|
|
||||||
includeEnterpriseSpecials: false,
|
|
||||||
productsFilter: {
|
|
||||||
dispensaryId: platformDispensaryId,
|
|
||||||
pricingType,
|
|
||||||
// No Status filter - try to get all products
|
|
||||||
types: [],
|
|
||||||
strainTypes: [],
|
|
||||||
subcategories: [],
|
|
||||||
useCache: false,
|
|
||||||
isDefaultSort: true,
|
|
||||||
sortBy: 'popularSortIdx',
|
|
||||||
sortDirection: 1,
|
|
||||||
bypassOnlineThresholds: true,
|
|
||||||
isKioskMenu: false,
|
|
||||||
removeProductsBelowOptionThresholds: false,
|
|
||||||
},
|
|
||||||
page: pageNum,
|
|
||||||
perPage,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter,
|
||||||
|
page,
|
||||||
|
perPage,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PRODUCT FETCHING WITH RETRY & PAGINATION
|
// PRODUCT FETCHING WITH PAGINATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch all products for a dispensary via paginated GraphQL
|
* Fetch products for a single mode with pagination
|
||||||
* Supports retry logic and proper termination
|
|
||||||
*
|
|
||||||
* @param session - Existing browser session to reuse
|
|
||||||
*/
|
*/
|
||||||
async function fetchProductsWithSession(
|
async function fetchProductsForMode(
|
||||||
session: BrowserSession,
|
session: SessionCredentials,
|
||||||
platformDispensaryId: string,
|
platformDispensaryId: string,
|
||||||
|
cName: string,
|
||||||
pricingType: 'rec' | 'med',
|
pricingType: 'rec' | 'med',
|
||||||
crawlMode: CrawlMode
|
crawlMode: CrawlMode
|
||||||
): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> {
|
): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> {
|
||||||
@@ -319,51 +382,57 @@ async function fetchProductsWithSession(
|
|||||||
let totalCount = 0;
|
let totalCount = 0;
|
||||||
let consecutiveEmptyPages = 0;
|
let consecutiveEmptyPages = 0;
|
||||||
|
|
||||||
console.log(`[GraphQL Client] Fetching products for ${platformDispensaryId} (${pricingType}, ${crawlMode})...`);
|
console.log(`[GraphQL Client] Fetching products for ${cName} (platformId: ${platformDispensaryId}, ${pricingType}, ${crawlMode})...`);
|
||||||
|
|
||||||
while (pageNum < maxPages) {
|
while (pageNum < maxPages) {
|
||||||
const variables = buildFilterVariables(
|
const variables = buildFilterVariables(platformDispensaryId, pricingType, crawlMode, pageNum, perPage);
|
||||||
platformDispensaryId,
|
|
||||||
pricingType,
|
|
||||||
crawlMode,
|
|
||||||
pageNum,
|
|
||||||
perPage
|
|
||||||
);
|
|
||||||
|
|
||||||
let result: any = null;
|
let result: any = null;
|
||||||
let lastError: Error | null = null;
|
let lastError: Error | null = null;
|
||||||
|
|
||||||
// Retry logic for failed page fetches
|
// Retry logic
|
||||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||||
try {
|
try {
|
||||||
result = await executeGraphQL(
|
result = await executeGraphQL(
|
||||||
session.page,
|
session,
|
||||||
'FilteredProducts',
|
'FilteredProducts',
|
||||||
variables,
|
variables,
|
||||||
GRAPHQL_HASHES.FilteredProducts
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
cName
|
||||||
);
|
);
|
||||||
lastError = null;
|
lastError = null;
|
||||||
break; // Success, exit retry loop
|
break;
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
lastError = error;
|
lastError = error;
|
||||||
console.warn(`[GraphQL Client] Page ${pageNum} attempt ${attempt + 1} failed: ${error.message}`);
|
console.warn(`[GraphQL Client] Page ${pageNum} attempt ${attempt + 1} failed: ${error.message}`);
|
||||||
if (attempt < maxRetries) {
|
if (attempt < maxRetries) {
|
||||||
await new Promise((r) => setTimeout(r, 1000 * (attempt + 1))); // Exponential backoff
|
await new Promise((r) => setTimeout(r, 1000 * (attempt + 1)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If all retries failed, log error and break
|
|
||||||
if (lastError) {
|
if (lastError) {
|
||||||
console.error(`[GraphQL Client] Page ${pageNum} failed after ${maxRetries + 1} attempts: ${lastError.message}`);
|
console.error(`[GraphQL Client] Page ${pageNum} failed after ${maxRetries + 1} attempts`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.errors) {
|
if (result?.errors) {
|
||||||
console.error('[GraphQL Client] GraphQL errors:', result.errors);
|
console.error('[GraphQL Client] GraphQL errors:', JSON.stringify(result.errors));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Log response shape on first page
|
||||||
|
if (pageNum === 0) {
|
||||||
|
console.log(`[GraphQL Client] Response keys: ${Object.keys(result || {}).join(', ')}`);
|
||||||
|
if (result?.data) {
|
||||||
|
console.log(`[GraphQL Client] data keys: ${Object.keys(result.data || {}).join(', ')}`);
|
||||||
|
}
|
||||||
|
if (!result?.data?.filteredProducts) {
|
||||||
|
console.log(`[GraphQL Client] WARNING: No filteredProducts in response!`);
|
||||||
|
console.log(`[GraphQL Client] Full response: ${JSON.stringify(result).slice(0, 1000)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const products = result?.data?.filteredProducts?.products || [];
|
const products = result?.data?.filteredProducts?.products || [];
|
||||||
const queryInfo = result?.data?.filteredProducts?.queryInfo;
|
const queryInfo = result?.data?.filteredProducts?.queryInfo;
|
||||||
|
|
||||||
@@ -375,7 +444,6 @@ async function fetchProductsWithSession(
|
|||||||
`[GraphQL Client] Page ${pageNum}: ${products.length} products (total so far: ${allProducts.length + products.length}/${totalCount})`
|
`[GraphQL Client] Page ${pageNum}: ${products.length} products (total so far: ${allProducts.length + products.length}/${totalCount})`
|
||||||
);
|
);
|
||||||
|
|
||||||
// PROPER TERMINATION: Stop if products.length < perPage (incomplete page = last page)
|
|
||||||
if (products.length === 0) {
|
if (products.length === 0) {
|
||||||
consecutiveEmptyPages++;
|
consecutiveEmptyPages++;
|
||||||
if (consecutiveEmptyPages >= 2) {
|
if (consecutiveEmptyPages >= 2) {
|
||||||
@@ -387,15 +455,13 @@ async function fetchProductsWithSession(
|
|||||||
allProducts.push(...products);
|
allProducts.push(...products);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stop if we got less than a full page (this is the last page)
|
// Stop if incomplete page (last page)
|
||||||
if (products.length < perPage) {
|
if (products.length < perPage) {
|
||||||
console.log(`[GraphQL Client] Incomplete page (${products.length} < ${perPage}), stopping pagination`);
|
console.log(`[GraphQL Client] Incomplete page (${products.length} < ${perPage}), stopping`);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
pageNum++;
|
pageNum++;
|
||||||
|
|
||||||
// Small delay between pages
|
|
||||||
await new Promise((r) => setTimeout(r, pageDelayMs));
|
await new Promise((r) => setTimeout(r, pageDelayMs));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -403,8 +469,12 @@ async function fetchProductsWithSession(
|
|||||||
return { products: allProducts, totalCount: totalCount || allProducts.length, crawlMode };
|
return { products: allProducts, totalCount: totalCount || allProducts.length, crawlMode };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// LEGACY SINGLE-MODE INTERFACE
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch all products for a dispensary (legacy interface - creates new browser)
|
* Fetch all products for a dispensary (single mode)
|
||||||
*/
|
*/
|
||||||
export async function fetchAllProducts(
|
export async function fetchAllProducts(
|
||||||
platformDispensaryId: string,
|
platformDispensaryId: string,
|
||||||
@@ -414,28 +484,32 @@ export async function fetchAllProducts(
|
|||||||
maxPages?: number;
|
maxPages?: number;
|
||||||
menuUrl?: string;
|
menuUrl?: string;
|
||||||
crawlMode?: CrawlMode;
|
crawlMode?: CrawlMode;
|
||||||
|
cName?: string;
|
||||||
} = {}
|
} = {}
|
||||||
): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> {
|
): Promise<{ products: DutchieRawProduct[]; totalCount: number; crawlMode: CrawlMode }> {
|
||||||
const { crawlMode = 'mode_a' } = options;
|
const { crawlMode = 'mode_a' } = options;
|
||||||
const menuUrl = options.menuUrl || `https://dutchie.com/dispensaries`;
|
|
||||||
|
|
||||||
const session = await createBrowserSession(menuUrl);
|
// cName is now REQUIRED - no default fallback to avoid using wrong store's session
|
||||||
|
const cName = options.cName;
|
||||||
|
if (!cName) {
|
||||||
|
throw new Error('[GraphQL Client] cName is required for fetchAllProducts - cannot use another store\'s session');
|
||||||
|
}
|
||||||
|
|
||||||
|
const session = await createSession(cName);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return await fetchProductsWithSession(session, platformDispensaryId, pricingType, crawlMode);
|
return await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, crawlMode);
|
||||||
} finally {
|
} finally {
|
||||||
await closeBrowserSession(session);
|
await closeSession(session);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// MODE A+B MERGING WITH OPTIONS
|
// MODE A+B MERGING
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merge POSMetaData.children arrays from Mode A and Mode B products
|
* Merge POSMetaData.children arrays from Mode A and Mode B products
|
||||||
* Uses canonicalID/canonicalSKU/canonicalPackageId as merge key
|
|
||||||
* Mode B children may have different quantityAvailable for options not in Mode A
|
|
||||||
*/
|
*/
|
||||||
function mergeProductOptions(
|
function mergeProductOptions(
|
||||||
modeAProduct: DutchieRawProduct,
|
modeAProduct: DutchieRawProduct,
|
||||||
@@ -444,22 +518,17 @@ function mergeProductOptions(
|
|||||||
const modeAChildren = modeAProduct.POSMetaData?.children || [];
|
const modeAChildren = modeAProduct.POSMetaData?.children || [];
|
||||||
const modeBChildren = modeBProduct.POSMetaData?.children || [];
|
const modeBChildren = modeBProduct.POSMetaData?.children || [];
|
||||||
|
|
||||||
// Create a map keyed by option identifier
|
|
||||||
const getOptionKey = (child: DutchiePOSChild): string => {
|
const getOptionKey = (child: DutchiePOSChild): string => {
|
||||||
return child.canonicalID || child.canonicalSKU || child.canonicalPackageId || child.option || '';
|
return child.canonicalID || child.canonicalSKU || child.canonicalPackageId || child.option || '';
|
||||||
};
|
};
|
||||||
|
|
||||||
const mergedMap = new Map<string, DutchiePOSChild>();
|
const mergedMap = new Map<string, DutchiePOSChild>();
|
||||||
|
|
||||||
// Add all Mode A children first (they're "canonical")
|
|
||||||
for (const child of modeAChildren) {
|
for (const child of modeAChildren) {
|
||||||
const key = getOptionKey(child);
|
const key = getOptionKey(child);
|
||||||
if (key) {
|
if (key) mergedMap.set(key, child);
|
||||||
mergedMap.set(key, child);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Mode B children that aren't in Mode A (may include OOS options)
|
|
||||||
for (const child of modeBChildren) {
|
for (const child of modeBChildren) {
|
||||||
const key = getOptionKey(child);
|
const key = getOptionKey(child);
|
||||||
if (key && !mergedMap.has(key)) {
|
if (key && !mergedMap.has(key)) {
|
||||||
@@ -472,7 +541,6 @@ function mergeProductOptions(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Merge a Mode A product with a Mode B product
|
* Merge a Mode A product with a Mode B product
|
||||||
* Mode A data is preferred, but children are merged for max coverage
|
|
||||||
*/
|
*/
|
||||||
function mergeProducts(
|
function mergeProducts(
|
||||||
modeAProduct: DutchieRawProduct,
|
modeAProduct: DutchieRawProduct,
|
||||||
@@ -482,10 +550,8 @@ function mergeProducts(
|
|||||||
return modeAProduct;
|
return modeAProduct;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge children arrays
|
|
||||||
const mergedChildren = mergeProductOptions(modeAProduct, modeBProduct);
|
const mergedChildren = mergeProductOptions(modeAProduct, modeBProduct);
|
||||||
|
|
||||||
// Return Mode A product with merged children
|
|
||||||
return {
|
return {
|
||||||
...modeAProduct,
|
...modeAProduct,
|
||||||
POSMetaData: {
|
POSMetaData: {
|
||||||
@@ -495,9 +561,13 @@ function mergeProducts(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MAIN EXPORT: TWO-MODE CRAWL
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch products using BOTH crawl modes with SINGLE browser session
|
* Fetch products using BOTH crawl modes with SINGLE session
|
||||||
* This ensures maximum coverage by running Mode A then Mode B with the same session
|
* Runs Mode A then Mode B, merges results
|
||||||
*/
|
*/
|
||||||
export async function fetchAllProductsBothModes(
|
export async function fetchAllProductsBothModes(
|
||||||
platformDispensaryId: string,
|
platformDispensaryId: string,
|
||||||
@@ -506,161 +576,67 @@ export async function fetchAllProductsBothModes(
|
|||||||
perPage?: number;
|
perPage?: number;
|
||||||
maxPages?: number;
|
maxPages?: number;
|
||||||
menuUrl?: string;
|
menuUrl?: string;
|
||||||
|
cName?: string;
|
||||||
} = {}
|
} = {}
|
||||||
): Promise<{
|
): Promise<{
|
||||||
modeA: { products: DutchieRawProduct[]; totalCount: number };
|
modeA: { products: DutchieRawProduct[]; totalCount: number };
|
||||||
modeB: { products: DutchieRawProduct[]; totalCount: number };
|
modeB: { products: DutchieRawProduct[]; totalCount: number };
|
||||||
merged: { products: DutchieRawProduct[]; totalCount: number };
|
merged: { products: DutchieRawProduct[]; totalCount: number };
|
||||||
}> {
|
}> {
|
||||||
console.log(`[GraphQL Client] Running two-mode crawl for ${platformDispensaryId} (${pricingType})...`);
|
// cName is now REQUIRED - no default fallback to avoid using wrong store's session
|
||||||
|
const cName = options.cName;
|
||||||
|
if (!cName) {
|
||||||
|
throw new Error('[GraphQL Client] cName is required for fetchAllProductsBothModes - cannot use another store\'s session');
|
||||||
|
}
|
||||||
|
|
||||||
const menuUrl = options.menuUrl || `https://dutchie.com/dispensaries`;
|
console.log(`[GraphQL Client] Running two-mode crawl for ${cName} (${pricingType})...`);
|
||||||
|
console.log(`[GraphQL Client] Platform ID: ${platformDispensaryId}, cName: ${cName}`);
|
||||||
|
|
||||||
// Create a SINGLE browser session for both modes
|
const session = await createSession(cName);
|
||||||
const session = await createBrowserSession(menuUrl);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Run Mode A (UI parity) with shared session
|
// Mode A (UI parity)
|
||||||
const modeAResult = await fetchProductsWithSession(
|
const modeAResult = await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, 'mode_a');
|
||||||
session,
|
|
||||||
platformDispensaryId,
|
|
||||||
pricingType,
|
|
||||||
'mode_a'
|
|
||||||
);
|
|
||||||
|
|
||||||
// Small delay between modes
|
// Delay between modes
|
||||||
await new Promise((r) => setTimeout(r, dutchieConfig.modeDelayMs));
|
await new Promise((r) => setTimeout(r, dutchieConfig.modeDelayMs));
|
||||||
|
|
||||||
// Run Mode B (MAX COVERAGE) with same session - NO new browser!
|
// Mode B (MAX COVERAGE)
|
||||||
const modeBResult = await fetchProductsWithSession(
|
const modeBResult = await fetchProductsForMode(session, platformDispensaryId, cName, pricingType, 'mode_b');
|
||||||
session,
|
|
||||||
platformDispensaryId,
|
|
||||||
pricingType,
|
|
||||||
'mode_b'
|
|
||||||
);
|
|
||||||
|
|
||||||
// Build lookup map for Mode B products
|
// Merge results
|
||||||
const modeBMap = new Map<string, DutchieRawProduct>();
|
const modeBMap = new Map<string, DutchieRawProduct>();
|
||||||
for (const product of modeBResult.products) {
|
for (const product of modeBResult.products) {
|
||||||
modeBMap.set(product._id, product);
|
modeBMap.set(product._id, product);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge results - deduplicate by _id, merge options
|
|
||||||
const productMap = new Map<string, DutchieRawProduct>();
|
const productMap = new Map<string, DutchieRawProduct>();
|
||||||
|
|
||||||
// Add Mode A products first (canonical), merging with Mode B if exists
|
// Add Mode A products, merging with Mode B if exists
|
||||||
for (const product of modeAResult.products) {
|
for (const product of modeAResult.products) {
|
||||||
const modeBProduct = modeBMap.get(product._id);
|
const modeBProduct = modeBMap.get(product._id);
|
||||||
const mergedProduct = mergeProducts(product, modeBProduct);
|
const mergedProduct = mergeProducts(product, modeBProduct);
|
||||||
productMap.set(product._id, mergedProduct);
|
productMap.set(product._id, mergedProduct);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add Mode B products that aren't in Mode A (may include OOS items)
|
// Add Mode B products not in Mode A
|
||||||
for (const product of modeBResult.products) {
|
for (const product of modeBResult.products) {
|
||||||
if (!productMap.has(product._id)) {
|
if (!productMap.has(product._id)) {
|
||||||
productMap.set(product._id, product);
|
productMap.set(product._id, product);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const merged = Array.from(productMap.values());
|
const mergedProducts = Array.from(productMap.values());
|
||||||
|
|
||||||
console.log(`[GraphQL Client] Two-mode crawl complete:`);
|
console.log(`[GraphQL Client] Merged: ${mergedProducts.length} unique products`);
|
||||||
console.log(` Mode A: ${modeAResult.products.length} products`);
|
console.log(`[GraphQL Client] Mode A: ${modeAResult.products.length}, Mode B: ${modeBResult.products.length}`);
|
||||||
console.log(` Mode B: ${modeBResult.products.length} products`);
|
|
||||||
console.log(` Merged: ${merged.length} unique products`);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
modeA: { products: modeAResult.products, totalCount: modeAResult.totalCount },
|
modeA: { products: modeAResult.products, totalCount: modeAResult.totalCount },
|
||||||
modeB: { products: modeBResult.products, totalCount: modeBResult.totalCount },
|
modeB: { products: modeBResult.products, totalCount: modeBResult.totalCount },
|
||||||
merged: { products: merged, totalCount: merged.length },
|
merged: { products: mergedProducts, totalCount: mergedProducts.length },
|
||||||
};
|
};
|
||||||
} finally {
|
} finally {
|
||||||
// Close the shared session when done
|
await closeSession(session);
|
||||||
await closeBrowserSession(session);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
|
||||||
// DISPENSARY DISCOVERY
|
|
||||||
// ============================================================
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Discover dispensaries near a geographic point
|
|
||||||
*/
|
|
||||||
export async function discoverDispensaries(
|
|
||||||
lat: number,
|
|
||||||
lng: number,
|
|
||||||
radiusKm: number = 100
|
|
||||||
): Promise<any[]> {
|
|
||||||
// Skip discovery if disabled in config
|
|
||||||
if (!dutchieConfig.useDiscovery) {
|
|
||||||
console.log('[GraphQL Client] Discovery disabled in config, skipping');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const session = await createBrowserSession();
|
|
||||||
|
|
||||||
try {
|
|
||||||
console.log(`[GraphQL Client] Discovering dispensaries near ${lat}, ${lng}...`);
|
|
||||||
|
|
||||||
// Try to use ConsumerDispensaries or similar discovery query
|
|
||||||
// Note: The exact operation may need to be captured from live traffic
|
|
||||||
const result = await executeGraphQL(
|
|
||||||
session.page,
|
|
||||||
'ConsumerDispensaries',
|
|
||||||
{
|
|
||||||
filter: {
|
|
||||||
lat,
|
|
||||||
lng,
|
|
||||||
radius: radiusKm * 1000, // Convert to meters if needed
|
|
||||||
isDelivery: false,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
GRAPHQL_HASHES.ConsumerDispensaries
|
|
||||||
);
|
|
||||||
|
|
||||||
const dispensaries = result?.data?.consumerDispensaries || [];
|
|
||||||
console.log(`[GraphQL Client] Found ${dispensaries.length} dispensaries`);
|
|
||||||
return dispensaries;
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error(`[GraphQL Client] Discovery failed:`, error.message);
|
|
||||||
return [];
|
|
||||||
} finally {
|
|
||||||
await closeBrowserSession(session);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Discover all Arizona Dutchie dispensaries using multiple centerpoints
|
|
||||||
*/
|
|
||||||
export async function discoverArizonaDispensaries(): Promise<any[]> {
|
|
||||||
const allDispensaries = new Map<string, any>();
|
|
||||||
|
|
||||||
for (const center of ARIZONA_CENTERPOINTS) {
|
|
||||||
console.log(`[GraphQL Client] Scanning ${center.name}...`);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const dispensaries = await discoverDispensaries(center.lat, center.lng, 150);
|
|
||||||
|
|
||||||
for (const disp of dispensaries) {
|
|
||||||
// Filter to AZ only
|
|
||||||
const state = disp.state || disp.address?.state;
|
|
||||||
if (state === 'AZ' || state === 'Arizona') {
|
|
||||||
const key = disp.slug || disp.cName || disp.id;
|
|
||||||
if (key && !allDispensaries.has(key)) {
|
|
||||||
allDispensaries.set(key, disp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delay between scans
|
|
||||||
await new Promise((r) => setTimeout(r, 2000));
|
|
||||||
} catch (error: any) {
|
|
||||||
console.error(`[GraphQL Client] Failed to scan ${center.name}:`, error.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = Array.from(allDispensaries.values());
|
|
||||||
console.log(`[GraphQL Client] Total unique AZ dispensaries: ${result.length}`);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
import { query, getClient } from '../db/connection';
|
import { query, getClient } from '../db/connection';
|
||||||
import { fetchAllProducts, fetchAllProductsBothModes } from './graphql-client';
|
import { fetchAllProducts, fetchAllProductsBothModes } from './graphql-client';
|
||||||
|
import { mapDbRowToDispensary } from './discovery';
|
||||||
import {
|
import {
|
||||||
DutchieRawProduct,
|
DutchieRawProduct,
|
||||||
DutchieProduct,
|
DutchieProduct,
|
||||||
@@ -49,6 +50,71 @@ function getMax(arr?: number[]): number | undefined {
|
|||||||
return Math.max(...arr.filter((n) => n !== null && n !== undefined));
|
return Math.max(...arr.filter((n) => n !== null && n !== undefined));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a value to boolean
|
||||||
|
* Handles Dutchie API returning {} or [] or other non-boolean values
|
||||||
|
* that would cause "invalid input syntax for type boolean" errors
|
||||||
|
*/
|
||||||
|
function normBool(v: any, defaultVal: boolean = false): boolean {
|
||||||
|
if (v === true) return true;
|
||||||
|
if (v === false) return false;
|
||||||
|
// Log unexpected object/array values once for debugging
|
||||||
|
if (v !== null && v !== undefined && typeof v === 'object') {
|
||||||
|
console.warn(`[normBool] Unexpected object value, coercing to ${defaultVal}:`, JSON.stringify(v));
|
||||||
|
}
|
||||||
|
return defaultVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a value to Date or undefined
|
||||||
|
* Handles Dutchie API returning {} or [] or other non-date values
|
||||||
|
* that would cause "invalid input syntax for type timestamp" errors
|
||||||
|
*/
|
||||||
|
function normDate(v: any): Date | undefined {
|
||||||
|
if (!v) return undefined;
|
||||||
|
// Reject objects/arrays that aren't dates
|
||||||
|
if (typeof v === 'object' && !(v instanceof Date)) {
|
||||||
|
console.warn(`[normDate] Unexpected object value, ignoring:`, JSON.stringify(v));
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
// Try parsing
|
||||||
|
const d = new Date(v);
|
||||||
|
if (isNaN(d.getTime())) {
|
||||||
|
console.warn(`[normDate] Invalid date value, ignoring:`, v);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract cName (Dutchie slug) from menuUrl or dispensary slug
|
||||||
|
* Handles URL formats:
|
||||||
|
* - https://dutchie.com/embedded-menu/AZ-Deeply-Rooted -> AZ-Deeply-Rooted
|
||||||
|
* - https://dutchie.com/dispensary/sol-flower-dispensary-mcclintock -> sol-flower-dispensary-mcclintock
|
||||||
|
* Falls back to dispensary.slug if menuUrl extraction fails
|
||||||
|
*/
|
||||||
|
function extractCName(dispensary: Dispensary): string {
|
||||||
|
if (dispensary.menuUrl) {
|
||||||
|
try {
|
||||||
|
const url = new URL(dispensary.menuUrl);
|
||||||
|
// Extract last path segment: /embedded-menu/X or /dispensary/X
|
||||||
|
const segments = url.pathname.split('/').filter(Boolean);
|
||||||
|
if (segments.length >= 2) {
|
||||||
|
const cName = segments[segments.length - 1];
|
||||||
|
if (cName) {
|
||||||
|
console.log(`[ProductCrawler] Extracted cName "${cName}" from menuUrl`);
|
||||||
|
return cName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`[ProductCrawler] Failed to parse menuUrl: ${dispensary.menuUrl}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Fallback to slug
|
||||||
|
console.log(`[ProductCrawler] Using dispensary slug "${dispensary.slug}" as cName`);
|
||||||
|
return dispensary.slug;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize a POSMetaData.children entry to DutchieProductOptionSnapshot
|
* Normalize a POSMetaData.children entry to DutchieProductOptionSnapshot
|
||||||
*/
|
*/
|
||||||
@@ -120,16 +186,16 @@ export function normalizeProduct(
|
|||||||
|
|
||||||
// Status / flags
|
// Status / flags
|
||||||
status: raw.Status,
|
status: raw.Status,
|
||||||
medicalOnly: raw.medicalOnly || false,
|
medicalOnly: normBool(raw.medicalOnly, false),
|
||||||
recOnly: raw.recOnly || false,
|
recOnly: normBool(raw.recOnly, false),
|
||||||
featured: raw.featured || false,
|
featured: normBool(raw.featured, false),
|
||||||
comingSoon: raw.comingSoon || false,
|
comingSoon: normBool(raw.comingSoon, false),
|
||||||
certificateOfAnalysisEnabled: raw.certificateOfAnalysisEnabled || false,
|
certificateOfAnalysisEnabled: normBool(raw.certificateOfAnalysisEnabled, false),
|
||||||
|
|
||||||
isBelowThreshold: raw.isBelowThreshold || false,
|
isBelowThreshold: normBool(raw.isBelowThreshold, false),
|
||||||
isBelowKioskThreshold: raw.isBelowKioskThreshold || false,
|
isBelowKioskThreshold: normBool(raw.isBelowKioskThreshold, false),
|
||||||
optionsBelowThreshold: raw.optionsBelowThreshold || false,
|
optionsBelowThreshold: normBool(raw.optionsBelowThreshold, false),
|
||||||
optionsBelowKioskThreshold: raw.optionsBelowKioskThreshold || false,
|
optionsBelowKioskThreshold: normBool(raw.optionsBelowKioskThreshold, false),
|
||||||
|
|
||||||
// Derived stock status
|
// Derived stock status
|
||||||
stockStatus: deriveStockStatus(raw),
|
stockStatus: deriveStockStatus(raw),
|
||||||
@@ -144,8 +210,8 @@ export function normalizeProduct(
|
|||||||
weight: typeof raw.weight === 'number' ? String(raw.weight) : raw.weight,
|
weight: typeof raw.weight === 'number' ? String(raw.weight) : raw.weight,
|
||||||
pastCNames: raw.pastCNames,
|
pastCNames: raw.pastCNames,
|
||||||
|
|
||||||
createdAtDutchie: raw.createdAt ? new Date(raw.createdAt) : undefined,
|
createdAtDutchie: normDate(raw.createdAt),
|
||||||
updatedAtDutchie: raw.updatedAt ? new Date(raw.updatedAt) : undefined,
|
updatedAtDutchie: normDate(raw.updatedAt),
|
||||||
|
|
||||||
latestRawPayload: raw,
|
latestRawPayload: raw,
|
||||||
};
|
};
|
||||||
@@ -200,10 +266,10 @@ export function normalizeSnapshot(
|
|||||||
crawlMode,
|
crawlMode,
|
||||||
|
|
||||||
status: raw.Status,
|
status: raw.Status,
|
||||||
featured: raw.featured || false,
|
featured: normBool(raw.featured, false),
|
||||||
special: isOnSpecial,
|
special: normBool(isOnSpecial, false),
|
||||||
medicalOnly: raw.medicalOnly || false,
|
medicalOnly: normBool(raw.medicalOnly, false),
|
||||||
recOnly: raw.recOnly || false,
|
recOnly: normBool(raw.recOnly, false),
|
||||||
|
|
||||||
// Product was present in feed
|
// Product was present in feed
|
||||||
isPresentInFeed: true,
|
isPresentInFeed: true,
|
||||||
@@ -223,9 +289,9 @@ export function normalizeSnapshot(
|
|||||||
// Inventory summary - null = unknown, 0 = all OOS
|
// Inventory summary - null = unknown, 0 = all OOS
|
||||||
totalQuantityAvailable: totalQty,
|
totalQuantityAvailable: totalQty,
|
||||||
totalKioskQuantityAvailable: totalKioskQty,
|
totalKioskQuantityAvailable: totalKioskQty,
|
||||||
manualInventory: raw.manualInventory || false,
|
manualInventory: normBool(raw.manualInventory, false),
|
||||||
isBelowThreshold: raw.isBelowThreshold || false,
|
isBelowThreshold: normBool(raw.isBelowThreshold, false),
|
||||||
isBelowKioskThreshold: raw.isBelowKioskThreshold || false,
|
isBelowKioskThreshold: normBool(raw.isBelowKioskThreshold, false),
|
||||||
|
|
||||||
options,
|
options,
|
||||||
rawPayload: raw,
|
rawPayload: raw,
|
||||||
@@ -469,13 +535,15 @@ async function updateDispensaryCrawlStats(
|
|||||||
dispensaryId: number,
|
dispensaryId: number,
|
||||||
productCount: number
|
productCount: number
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
// Update last_crawl_at to track when we last crawled
|
||||||
|
// Skip product_count as that column may not exist
|
||||||
await query(
|
await query(
|
||||||
`
|
`
|
||||||
UPDATE dispensaries
|
UPDATE dispensaries
|
||||||
SET last_crawled_at = NOW(), product_count = $2, updated_at = NOW()
|
SET last_crawl_at = NOW(), updated_at = NOW()
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`,
|
`,
|
||||||
[dispensaryId, productCount]
|
[dispensaryId]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -701,11 +769,16 @@ export async function crawlDispensaryProducts(
|
|||||||
const modeAProductIds = new Set<string>();
|
const modeAProductIds = new Set<string>();
|
||||||
const modeBProductIds = new Set<string>();
|
const modeBProductIds = new Set<string>();
|
||||||
|
|
||||||
|
// Extract cName for this specific dispensary (used for Puppeteer session & headers)
|
||||||
|
const cName = extractCName(dispensary);
|
||||||
|
console.log(`[ProductCrawler] Using cName="${cName}" for dispensary ${dispensary.name}`);
|
||||||
|
|
||||||
if (useBothModes) {
|
if (useBothModes) {
|
||||||
// Run two-mode crawl for maximum coverage
|
// Run two-mode crawl for maximum coverage
|
||||||
const bothResults = await fetchAllProductsBothModes(
|
const bothResults = await fetchAllProductsBothModes(
|
||||||
dispensary.platformDispensaryId,
|
dispensary.platformDispensaryId,
|
||||||
pricingType
|
pricingType,
|
||||||
|
{ cName }
|
||||||
);
|
);
|
||||||
|
|
||||||
modeAProducts = bothResults.modeA.products.length;
|
modeAProducts = bothResults.modeA.products.length;
|
||||||
@@ -742,7 +815,7 @@ export async function crawlDispensaryProducts(
|
|||||||
const { products, crawlMode } = await fetchAllProducts(
|
const { products, crawlMode } = await fetchAllProducts(
|
||||||
dispensary.platformDispensaryId,
|
dispensary.platformDispensaryId,
|
||||||
pricingType,
|
pricingType,
|
||||||
{ crawlMode: 'mode_a' }
|
{ crawlMode: 'mode_a', cName }
|
||||||
);
|
);
|
||||||
|
|
||||||
modeAProducts = products.length;
|
modeAProducts = products.length;
|
||||||
@@ -811,13 +884,14 @@ export async function crawlAllArizonaDispensaries(
|
|||||||
const results: CrawlResult[] = [];
|
const results: CrawlResult[] = [];
|
||||||
|
|
||||||
// Get all AZ dispensaries with platform IDs
|
// Get all AZ dispensaries with platform IDs
|
||||||
const { rows: dispensaries } = await query<Dispensary>(
|
const { rows: rawRows } = await query(
|
||||||
`
|
`
|
||||||
SELECT * FROM dispensaries
|
SELECT * FROM dispensaries
|
||||||
WHERE state = 'AZ' AND menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL
|
WHERE state = 'AZ' AND menu_type = 'dutchie' AND platform_dispensary_id IS NOT NULL
|
||||||
ORDER BY id
|
ORDER BY id
|
||||||
`
|
`
|
||||||
);
|
);
|
||||||
|
const dispensaries = rawRows.map(mapDbRowToDispensary);
|
||||||
|
|
||||||
console.log(`[ProductCrawler] Starting crawl of ${dispensaries.length} dispensaries...`);
|
console.log(`[ProductCrawler] Starting crawl of ${dispensaries.length} dispensaries...`);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user