The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
228 lines
6.3 KiB
TypeScript
228 lines
6.3 KiB
TypeScript
import fs from 'fs/promises';
|
|
import path from 'path';
|
|
import { chromium } from 'playwright-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
|
|
chromium.use(StealthPlugin());
|
|
|
|
type Option = {
|
|
option?: string | null;
|
|
price?: number | null;
|
|
quantity?: number | null;
|
|
kioskQuantity?: number | null;
|
|
};
|
|
|
|
type Product = {
|
|
id: string;
|
|
slug?: string;
|
|
name: string;
|
|
brand?: string;
|
|
type?: string;
|
|
category?: string;
|
|
strainType?: string | null;
|
|
status?: string | null;
|
|
price?: number | null;
|
|
specialPrice?: number | null;
|
|
image?: string | null;
|
|
inStock: boolean;
|
|
options: Option[];
|
|
raw?: any;
|
|
};
|
|
|
|
const DISPENSARY_SLUG = 'AZ-Deeply-Rooted';
|
|
const DISPENSARY_ID = '6405ef617056e8014d79101b';
|
|
const HASH_FILTERED_PRODUCTS = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
|
const OUTPUT_DIR = path.join(process.cwd(), 'scrape-output', 'deeply-rooted');
|
|
const OUTPUT_FILE = path.join(OUTPUT_DIR, 'graphql-products.json');
|
|
|
|
async function ensureOutputDir() {
|
|
await fs.mkdir(OUTPUT_DIR, { recursive: true });
|
|
}
|
|
|
|
async function fetchAllProducts(): Promise<Product[]> {
|
|
const browser = await chromium.launch({
|
|
headless: true,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
|
});
|
|
|
|
const context = await browser.newContext({
|
|
viewport: { width: 1300, height: 900 },
|
|
userAgent:
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.7390.37 Safari/537.36',
|
|
});
|
|
|
|
const page = await context.newPage();
|
|
await page.goto(`https://dutchie.com/embedded-menu/${DISPENSARY_SLUG}`, {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 90000,
|
|
});
|
|
await page.waitForTimeout(3000);
|
|
|
|
const products: any[] = await page.evaluate(
|
|
async ({ dispensaryId, hash }) => {
|
|
const sessionRaw = localStorage.getItem('dutchie-session');
|
|
const session = sessionRaw ? sessionRaw.replace(/^\"|\"$/g, '') : '';
|
|
|
|
const all: any[] = [];
|
|
const perPage = 100;
|
|
|
|
for (let pageIdx = 0; pageIdx < 40; pageIdx++) {
|
|
const variables = {
|
|
includeEnterpriseSpecials: false,
|
|
productsFilter: {
|
|
dispensaryId,
|
|
pricingType: 'rec',
|
|
Status: 'Active', // set to null to try to include inactive if exposed
|
|
types: [],
|
|
useCache: true,
|
|
isDefaultSort: true,
|
|
sortBy: 'popularSortIdx',
|
|
sortDirection: 1,
|
|
bypassOnlineThresholds: true,
|
|
isKioskMenu: false,
|
|
removeProductsBelowOptionThresholds: false,
|
|
},
|
|
page: pageIdx,
|
|
perPage,
|
|
};
|
|
|
|
const qs = new URLSearchParams({
|
|
operationName: 'FilteredProducts',
|
|
variables: JSON.stringify(variables),
|
|
extensions: JSON.stringify({
|
|
persistedQuery: { version: 1, sha256Hash: hash },
|
|
}),
|
|
});
|
|
|
|
const url = `https://dutchie.com/api-3/graphql?${qs.toString()}`;
|
|
const res = await fetch(url, {
|
|
headers: {
|
|
'apollographql-client-name': 'Marketplace (production)',
|
|
'x-dutchie-session': session,
|
|
'content-type': 'application/json',
|
|
},
|
|
credentials: 'include',
|
|
});
|
|
|
|
if (!res.ok) {
|
|
console.warn(`Request failed ${res.status} on page ${pageIdx}`);
|
|
break;
|
|
}
|
|
|
|
const json = await res.json();
|
|
const chunk = json?.data?.filteredProducts?.products || [];
|
|
all.push(...chunk);
|
|
|
|
if (chunk.length < perPage) break;
|
|
}
|
|
|
|
return all;
|
|
},
|
|
{ dispensaryId: DISPENSARY_ID, hash: HASH_FILTERED_PRODUCTS }
|
|
);
|
|
|
|
await browser.close();
|
|
return normalizeProducts(products);
|
|
}
|
|
|
|
function normalizeProducts(items: any[]): Product[] {
|
|
return items.map((p) => {
|
|
const options: Option[] =
|
|
p?.POSMetaData?.children?.map((child: any) => ({
|
|
option: child.option ?? null,
|
|
price:
|
|
child.recPrice ??
|
|
child.price ??
|
|
child.medPrice ??
|
|
null,
|
|
quantity:
|
|
child.quantity ??
|
|
child.quantityAvailable ??
|
|
null,
|
|
kioskQuantity: child.kioskQuantityAvailable ?? null,
|
|
})) || [];
|
|
|
|
const basePrice =
|
|
(p.recSpecialPrices && p.recSpecialPrices[0]) ??
|
|
(p.recPrices && p.recPrices[0]) ??
|
|
(p.Prices && p.Prices[0]) ??
|
|
null;
|
|
|
|
const image =
|
|
p.Image ||
|
|
(p.images && p.images.find((img: any) => img.active)?.url) ||
|
|
null;
|
|
|
|
const inStock =
|
|
options.some(
|
|
(o) =>
|
|
(o.quantity ?? 0) > 0 ||
|
|
(o.kioskQuantity ?? 0) > 0
|
|
) ||
|
|
!p.isBelowThreshold;
|
|
|
|
return {
|
|
id: p.id || p._id,
|
|
slug: p.cName,
|
|
name: p.Name,
|
|
brand: p.brandName || p.brand?.name,
|
|
type: p.type,
|
|
category: p.subcategory,
|
|
strainType: p.strainType,
|
|
status: p.Status,
|
|
price: basePrice,
|
|
specialPrice:
|
|
(p.recSpecialPrices && p.recSpecialPrices[0]) ||
|
|
(p.medicalSpecialPrices && p.medicalSpecialPrices[0]) ||
|
|
null,
|
|
image,
|
|
inStock,
|
|
options,
|
|
raw: undefined,
|
|
};
|
|
});
|
|
}
|
|
|
|
function summarize(products: Product[]) {
|
|
const total = products.length;
|
|
const inStock = products.filter((p) => p.inStock).length;
|
|
const outOfStock = total - inStock;
|
|
const byBrand = new Map<string, number>();
|
|
for (const p of products) {
|
|
const key = (p.brand || 'Unknown').trim();
|
|
byBrand.set(key, (byBrand.get(key) || 0) + 1);
|
|
}
|
|
const topBrands = Array.from(byBrand.entries())
|
|
.sort((a, b) => b[1] - a[1])
|
|
.slice(0, 10);
|
|
return { total, inStock, outOfStock, topBrands };
|
|
}
|
|
|
|
function formatSample(products: Product[], n = 5) {
|
|
return products.slice(0, n).map((p) => ({
|
|
name: p.name,
|
|
brand: p.brand,
|
|
price: p.price,
|
|
specialPrice: p.specialPrice,
|
|
inStock: p.inStock,
|
|
options: p.options,
|
|
}));
|
|
}
|
|
|
|
async function main() {
|
|
await ensureOutputDir();
|
|
const products = await fetchAllProducts();
|
|
await fs.writeFile(OUTPUT_FILE, JSON.stringify(products, null, 2));
|
|
|
|
const summary = summarize(products);
|
|
console.log(`Saved ${products.length} products to ${OUTPUT_FILE}`);
|
|
console.log(`In stock: ${summary.inStock} | Out of stock: ${summary.outOfStock}`);
|
|
console.log('Top brands:', summary.topBrands);
|
|
console.log('Sample:', JSON.stringify(formatSample(products, 5), null, 2));
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error('GraphQL scrape failed:', err);
|
|
process.exit(1);
|
|
});
|