fix(monitor): remove non-existent worker columns from job_run_logs query
The job_run_logs table tracks scheduled job orchestration, not individual worker jobs. Worker info (worker_id, worker_hostname) belongs on dispensary_crawl_jobs, not job_run_logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
279
backend/dist/scripts/scrape-all-active.js
vendored
Normal file
279
backend/dist/scripts/scrape-all-active.js
vendored
Normal file
@@ -0,0 +1,279 @@
|
||||
"use strict";
|
||||
/**
|
||||
* Scrape ALL active products via direct GraphQL pagination
|
||||
* This is more reliable than category navigation
|
||||
*/
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
|
||||
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
|
||||
const pg_1 = require("pg");
|
||||
const dutchie_graphql_1 = require("../scrapers/dutchie-graphql");
|
||||
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
|
||||
const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus';
|
||||
const GRAPHQL_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
||||
async function scrapeAllProducts(menuUrl, storeId) {
|
||||
const pool = new pg_1.Pool({ connectionString: DATABASE_URL });
|
||||
const browser = await puppeteer_extra_1.default.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36');
|
||||
console.log('Loading menu to establish session...');
|
||||
await page.goto(menuUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 3000));
|
||||
const dispensaryId = await page.evaluate(() => window.reactEnv?.dispensaryId);
|
||||
console.log('Dispensary ID:', dispensaryId);
|
||||
// Paginate through all products
|
||||
const allProducts = [];
|
||||
let pageNum = 0;
|
||||
const perPage = 100;
|
||||
console.log('\nFetching all products via paginated GraphQL...');
|
||||
while (true) {
|
||||
const result = await page.evaluate(async (dispId, hash, page, perPage) => {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: dispId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active',
|
||||
types: [],
|
||||
useCache: false,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page,
|
||||
perPage,
|
||||
};
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify({ persistedQuery: { version: 1, sha256Hash: hash } }),
|
||||
});
|
||||
const resp = await fetch(`https://dutchie.com/graphql?${qs.toString()}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include',
|
||||
});
|
||||
const json = await resp.json();
|
||||
return {
|
||||
products: json?.data?.filteredProducts?.products || [],
|
||||
totalCount: json?.data?.filteredProducts?.queryInfo?.totalCount,
|
||||
};
|
||||
}, dispensaryId, GRAPHQL_HASH, pageNum, perPage);
|
||||
if (result.products.length === 0) {
|
||||
break;
|
||||
}
|
||||
allProducts.push(...result.products);
|
||||
console.log(`Page ${pageNum}: ${result.products.length} products (total so far: ${allProducts.length}/${result.totalCount})`);
|
||||
pageNum++;
|
||||
// Safety limit
|
||||
if (pageNum > 50) {
|
||||
console.log('Reached page limit');
|
||||
break;
|
||||
}
|
||||
}
|
||||
console.log(`\nTotal products fetched: ${allProducts.length}`);
|
||||
// Normalize and upsert
|
||||
console.log('\nNormalizing and upserting to database...');
|
||||
const normalized = allProducts.map(dutchie_graphql_1.normalizeDutchieProduct);
|
||||
const client = await pool.connect();
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
for (const product of normalized) {
|
||||
const result = await client.query(`
|
||||
INSERT INTO products (
|
||||
store_id, external_id, slug, name, enterprise_product_id,
|
||||
brand, brand_external_id, brand_logo_url,
|
||||
subcategory, strain_type, canonical_category,
|
||||
price, rec_price, med_price, rec_special_price, med_special_price,
|
||||
is_on_special, special_name, discount_percent, special_data,
|
||||
sku, inventory_quantity, inventory_available, is_below_threshold, status,
|
||||
thc_percentage, cbd_percentage, cannabinoids,
|
||||
weight_mg, net_weight_value, net_weight_unit, options, raw_options,
|
||||
image_url, additional_images,
|
||||
is_featured, medical_only, rec_only,
|
||||
source_created_at, source_updated_at,
|
||||
description, raw_data,
|
||||
dutchie_url, last_seen_at, updated_at
|
||||
)
|
||||
VALUES (
|
||||
$1, $2, $3, $4, $5,
|
||||
$6, $7, $8,
|
||||
$9, $10, $11,
|
||||
$12, $13, $14, $15, $16,
|
||||
$17, $18, $19, $20,
|
||||
$21, $22, $23, $24, $25,
|
||||
$26, $27, $28,
|
||||
$29, $30, $31, $32, $33,
|
||||
$34, $35,
|
||||
$36, $37, $38,
|
||||
$39, $40,
|
||||
$41, $42,
|
||||
'', NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (store_id, slug) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
enterprise_product_id = EXCLUDED.enterprise_product_id,
|
||||
brand = EXCLUDED.brand,
|
||||
brand_external_id = EXCLUDED.brand_external_id,
|
||||
brand_logo_url = EXCLUDED.brand_logo_url,
|
||||
subcategory = EXCLUDED.subcategory,
|
||||
strain_type = EXCLUDED.strain_type,
|
||||
canonical_category = EXCLUDED.canonical_category,
|
||||
price = EXCLUDED.price,
|
||||
rec_price = EXCLUDED.rec_price,
|
||||
med_price = EXCLUDED.med_price,
|
||||
rec_special_price = EXCLUDED.rec_special_price,
|
||||
med_special_price = EXCLUDED.med_special_price,
|
||||
is_on_special = EXCLUDED.is_on_special,
|
||||
special_name = EXCLUDED.special_name,
|
||||
discount_percent = EXCLUDED.discount_percent,
|
||||
special_data = EXCLUDED.special_data,
|
||||
sku = EXCLUDED.sku,
|
||||
inventory_quantity = EXCLUDED.inventory_quantity,
|
||||
inventory_available = EXCLUDED.inventory_available,
|
||||
is_below_threshold = EXCLUDED.is_below_threshold,
|
||||
status = EXCLUDED.status,
|
||||
thc_percentage = EXCLUDED.thc_percentage,
|
||||
cbd_percentage = EXCLUDED.cbd_percentage,
|
||||
cannabinoids = EXCLUDED.cannabinoids,
|
||||
weight_mg = EXCLUDED.weight_mg,
|
||||
net_weight_value = EXCLUDED.net_weight_value,
|
||||
net_weight_unit = EXCLUDED.net_weight_unit,
|
||||
options = EXCLUDED.options,
|
||||
raw_options = EXCLUDED.raw_options,
|
||||
image_url = EXCLUDED.image_url,
|
||||
additional_images = EXCLUDED.additional_images,
|
||||
is_featured = EXCLUDED.is_featured,
|
||||
medical_only = EXCLUDED.medical_only,
|
||||
rec_only = EXCLUDED.rec_only,
|
||||
source_created_at = EXCLUDED.source_created_at,
|
||||
source_updated_at = EXCLUDED.source_updated_at,
|
||||
description = EXCLUDED.description,
|
||||
raw_data = EXCLUDED.raw_data,
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING (xmax = 0) AS was_inserted
|
||||
`, [
|
||||
storeId,
|
||||
product.external_id,
|
||||
product.slug,
|
||||
product.name,
|
||||
product.enterprise_product_id,
|
||||
product.brand,
|
||||
product.brand_external_id,
|
||||
product.brand_logo_url,
|
||||
product.subcategory,
|
||||
product.strain_type,
|
||||
product.canonical_category,
|
||||
product.price,
|
||||
product.rec_price,
|
||||
product.med_price,
|
||||
product.rec_special_price,
|
||||
product.med_special_price,
|
||||
product.is_on_special,
|
||||
product.special_name,
|
||||
product.discount_percent,
|
||||
product.special_data ? JSON.stringify(product.special_data) : null,
|
||||
product.sku,
|
||||
product.inventory_quantity,
|
||||
product.inventory_available,
|
||||
product.is_below_threshold,
|
||||
product.status,
|
||||
product.thc_percentage,
|
||||
product.cbd_percentage,
|
||||
product.cannabinoids ? JSON.stringify(product.cannabinoids) : null,
|
||||
product.weight_mg,
|
||||
product.net_weight_value,
|
||||
product.net_weight_unit,
|
||||
product.options,
|
||||
product.raw_options,
|
||||
product.image_url,
|
||||
product.additional_images,
|
||||
product.is_featured,
|
||||
product.medical_only,
|
||||
product.rec_only,
|
||||
product.source_created_at,
|
||||
product.source_updated_at,
|
||||
product.description,
|
||||
product.raw_data ? JSON.stringify(product.raw_data) : null,
|
||||
]);
|
||||
if (result.rows[0]?.was_inserted) {
|
||||
inserted++;
|
||||
}
|
||||
else {
|
||||
updated++;
|
||||
}
|
||||
}
|
||||
await client.query('COMMIT');
|
||||
}
|
||||
catch (error) {
|
||||
await client.query('ROLLBACK');
|
||||
throw error;
|
||||
}
|
||||
finally {
|
||||
client.release();
|
||||
}
|
||||
console.log(`\nDatabase: ${inserted} inserted, ${updated} updated`);
|
||||
// Show summary stats
|
||||
const stats = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE is_on_special) as specials,
|
||||
COUNT(DISTINCT brand) as brands,
|
||||
COUNT(DISTINCT subcategory) as categories
|
||||
FROM products WHERE store_id = $1
|
||||
`, [storeId]);
|
||||
console.log('\nStore summary:');
|
||||
console.log(` Total products: ${stats.rows[0].total}`);
|
||||
console.log(` On special: ${stats.rows[0].specials}`);
|
||||
console.log(` Unique brands: ${stats.rows[0].brands}`);
|
||||
console.log(` Categories: ${stats.rows[0].categories}`);
|
||||
return {
|
||||
success: true,
|
||||
totalProducts: allProducts.length,
|
||||
inserted,
|
||||
updated,
|
||||
};
|
||||
}
|
||||
finally {
|
||||
await browser.close();
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
// Run
|
||||
const menuUrl = process.argv[2] || 'https://dutchie.com/embedded-menu/AZ-Deeply-Rooted';
|
||||
const storeId = parseInt(process.argv[3] || '1', 10);
|
||||
console.log('='.repeat(60));
|
||||
console.log('DUTCHIE GRAPHQL FULL SCRAPE');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Menu URL: ${menuUrl}`);
|
||||
console.log(`Store ID: ${storeId}`);
|
||||
console.log('');
|
||||
scrapeAllProducts(menuUrl, storeId)
|
||||
.then((result) => {
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('COMPLETE');
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user