Fix category-crawler-jobs store lookup query

- Fix column name from s.dutchie_plus_url to s.dutchie_url
- Add availability tracking and product freshness APIs
- Add crawl script for sequential dispensary processing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-01 00:07:00 -07:00
parent 20a7b69537
commit 9d8972aa86
15 changed files with 11604 additions and 42 deletions

View File

@@ -0,0 +1,52 @@
-- Migration 024: Product Availability Tracking
-- Adds normalized availability status and transition tracking
-- Add availability columns to products table
ALTER TABLE products ADD COLUMN IF NOT EXISTS availability_status VARCHAR(20) DEFAULT 'unknown';
ALTER TABLE products ADD COLUMN IF NOT EXISTS availability_raw JSONB;
ALTER TABLE products ADD COLUMN IF NOT EXISTS last_seen_in_stock_at TIMESTAMPTZ;
ALTER TABLE products ADD COLUMN IF NOT EXISTS last_seen_out_of_stock_at TIMESTAMPTZ;
-- Add comment for clarity
COMMENT ON COLUMN products.availability_status IS 'Normalized status: in_stock, out_of_stock, limited, unknown';
COMMENT ON COLUMN products.availability_raw IS 'Raw availability payload from provider for debugging';
COMMENT ON COLUMN products.last_seen_in_stock_at IS 'Last time product was seen in stock';
COMMENT ON COLUMN products.last_seen_out_of_stock_at IS 'Last time product was seen out of stock';
-- Create indexes for availability queries
CREATE INDEX IF NOT EXISTS idx_products_availability_status ON products(availability_status);
CREATE INDEX IF NOT EXISTS idx_products_availability_by_store ON products(store_id, availability_status) WHERE store_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_products_availability_by_dispensary ON products(dispensary_id, availability_status) WHERE dispensary_id IS NOT NULL;
-- Backfill availability_status from existing in_stock column
UPDATE products
SET availability_status = CASE
WHEN in_stock = true THEN 'in_stock'
WHEN in_stock = false THEN 'out_of_stock'
ELSE 'unknown'
END
WHERE availability_status = 'unknown' OR availability_status IS NULL;
-- Set last_seen_in_stock_at for currently in-stock products
UPDATE products
SET last_seen_in_stock_at = COALESCE(last_seen_at, updated_at, NOW())
WHERE in_stock = true AND last_seen_in_stock_at IS NULL;
-- Set last_seen_out_of_stock_at for currently out-of-stock products
UPDATE products
SET last_seen_out_of_stock_at = COALESCE(last_seen_at, updated_at, NOW())
WHERE in_stock = false AND last_seen_out_of_stock_at IS NULL;
-- Add availability tracking to dispensary_crawl_jobs
ALTER TABLE dispensary_crawl_jobs ADD COLUMN IF NOT EXISTS in_stock_count INTEGER;
ALTER TABLE dispensary_crawl_jobs ADD COLUMN IF NOT EXISTS out_of_stock_count INTEGER;
ALTER TABLE dispensary_crawl_jobs ADD COLUMN IF NOT EXISTS limited_count INTEGER;
ALTER TABLE dispensary_crawl_jobs ADD COLUMN IF NOT EXISTS unknown_count INTEGER;
ALTER TABLE dispensary_crawl_jobs ADD COLUMN IF NOT EXISTS availability_changed_count INTEGER;
-- Add availability tracking to crawl_jobs (store-based)
ALTER TABLE crawl_jobs ADD COLUMN IF NOT EXISTS in_stock_count INTEGER;
ALTER TABLE crawl_jobs ADD COLUMN IF NOT EXISTS out_of_stock_count INTEGER;
ALTER TABLE crawl_jobs ADD COLUMN IF NOT EXISTS limited_count INTEGER;
ALTER TABLE crawl_jobs ADD COLUMN IF NOT EXISTS unknown_count INTEGER;
ALTER TABLE crawl_jobs ADD COLUMN IF NOT EXISTS availability_changed_count INTEGER;

View File

@@ -6,6 +6,55 @@ import { getImageUrl } from '../utils/minio';
const router = Router(); const router = Router();
router.use(authMiddleware); router.use(authMiddleware);
// Freshness threshold: data older than this is considered stale
const STALE_THRESHOLD_HOURS = 4;
interface FreshnessInfo {
last_crawl_at: string | null;
is_stale: boolean;
freshness: string;
hours_since_crawl: number | null;
}
function calculateFreshness(lastCrawlAt: Date | null): FreshnessInfo {
if (!lastCrawlAt) {
return {
last_crawl_at: null,
is_stale: true,
freshness: 'Never crawled',
hours_since_crawl: null
};
}
const now = new Date();
const diffMs = now.getTime() - lastCrawlAt.getTime();
const diffHours = diffMs / (1000 * 60 * 60);
const isStale = diffHours > STALE_THRESHOLD_HOURS;
let freshnessText: string;
if (diffHours < 1) {
const mins = Math.round(diffHours * 60);
freshnessText = `Last crawled ${mins} minute${mins !== 1 ? 's' : ''} ago`;
} else if (diffHours < 24) {
const hrs = Math.round(diffHours);
freshnessText = `Last crawled ${hrs} hour${hrs !== 1 ? 's' : ''} ago`;
} else {
const days = Math.round(diffHours / 24);
freshnessText = `Last crawled ${days} day${days !== 1 ? 's' : ''} ago`;
}
if (isStale) {
freshnessText += ' (STALE)';
}
return {
last_crawl_at: lastCrawlAt.toISOString(),
is_stale: isStale,
freshness: freshnessText,
hours_since_crawl: Math.round(diffHours * 10) / 10
};
}
// Helper function to filter fields from object // Helper function to filter fields from object
function selectFields(obj: any, fields: string[]): any { function selectFields(obj: any, fields: string[]): any {
if (!fields || fields.length === 0) return obj; if (!fields || fields.length === 0) return obj;
@@ -216,11 +265,35 @@ router.get('/', async (req, res) => {
const countResult = await pool.query(countQuery, countParams); const countResult = await pool.query(countQuery, countParams);
// Get freshness info if store_id is specified
let freshnessInfo: FreshnessInfo | null = null;
let storeInfo: { id: number; name: string } | null = null;
if (store_id) {
const storeResult = await pool.query(
'SELECT id, name, last_scraped_at FROM stores WHERE id = $1',
[store_id]
);
if (storeResult.rows.length > 0) {
const store = storeResult.rows[0];
storeInfo = { id: store.id, name: store.name };
freshnessInfo = calculateFreshness(store.last_scraped_at);
}
}
res.json({ res.json({
products, products,
total: parseInt(countResult.rows[0].count), total: parseInt(countResult.rows[0].count),
limit: parseInt(limit as string), limit: parseInt(limit as string),
offset: parseInt(offset as string), offset: parseInt(offset as string),
// Add freshness metadata when store_id is provided
...(freshnessInfo && {
store: storeInfo,
last_crawl_at: freshnessInfo.last_crawl_at,
is_stale: freshnessInfo.is_stale,
freshness: freshnessInfo.freshness,
hours_since_crawl: freshnessInfo.hours_since_crawl
}),
filters: { filters: {
store_id, store_id,
category_id, category_id,

View File

@@ -28,28 +28,150 @@ router.get('/', async (req, res) => {
} }
}); });
// Get single store // Freshness threshold in hours
const STALE_THRESHOLD_HOURS = 4;
function calculateFreshness(lastScrapedAt: Date | null): {
last_scraped_at: string | null;
is_stale: boolean;
freshness: string;
hours_since_scrape: number | null;
} {
if (!lastScrapedAt) {
return {
last_scraped_at: null,
is_stale: true,
freshness: 'Never scraped',
hours_since_scrape: null
};
}
const now = new Date();
const diffMs = now.getTime() - lastScrapedAt.getTime();
const diffHours = diffMs / (1000 * 60 * 60);
const isStale = diffHours > STALE_THRESHOLD_HOURS;
let freshnessText: string;
if (diffHours < 1) {
const mins = Math.round(diffHours * 60);
freshnessText = `${mins} minute${mins !== 1 ? 's' : ''} ago`;
} else if (diffHours < 24) {
const hrs = Math.round(diffHours);
freshnessText = `${hrs} hour${hrs !== 1 ? 's' : ''} ago`;
} else {
const days = Math.round(diffHours / 24);
freshnessText = `${days} day${days !== 1 ? 's' : ''} ago`;
}
return {
last_scraped_at: lastScrapedAt.toISOString(),
is_stale: isStale,
freshness: freshnessText,
hours_since_scrape: Math.round(diffHours * 10) / 10
};
}
function detectProvider(dutchieUrl: string | null): string {
if (!dutchieUrl) return 'unknown';
if (dutchieUrl.includes('dutchie.com')) return 'Dutchie';
if (dutchieUrl.includes('iheartjane.com') || dutchieUrl.includes('jane.co')) return 'Jane';
if (dutchieUrl.includes('treez.io')) return 'Treez';
if (dutchieUrl.includes('weedmaps.com')) return 'Weedmaps';
if (dutchieUrl.includes('leafly.com')) return 'Leafly';
return 'Custom';
}
// Get single store with full details
router.get('/:id', async (req, res) => { router.get('/:id', async (req, res) => {
try { try {
const { id } = req.params; const { id } = req.params;
// Get store with counts and linked dispensary
const result = await pool.query(` const result = await pool.query(`
SELECT SELECT
s.*, s.*,
d.id as dispensary_id,
d.name as dispensary_name,
d.slug as dispensary_slug,
d.state as dispensary_state,
d.city as dispensary_city,
d.address as dispensary_address,
d.menu_provider as dispensary_menu_provider,
COUNT(DISTINCT p.id) as product_count, COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT c.id) as category_count COUNT(DISTINCT c.id) as category_count,
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = true) as in_stock_count,
COUNT(DISTINCT p.id) FILTER (WHERE p.in_stock = false) as out_of_stock_count
FROM stores s FROM stores s
LEFT JOIN dispensaries d ON s.dispensary_id = d.id
LEFT JOIN products p ON s.id = p.store_id LEFT JOIN products p ON s.id = p.store_id
LEFT JOIN categories c ON s.id = c.store_id LEFT JOIN categories c ON s.id = c.store_id
WHERE s.id = $1 WHERE s.id = $1
GROUP BY s.id GROUP BY s.id, d.id, d.name, d.slug, d.state, d.city, d.address, d.menu_provider
`, [id]); `, [id]);
if (result.rows.length === 0) { if (result.rows.length === 0) {
return res.status(404).json({ error: 'Store not found' }); return res.status(404).json({ error: 'Store not found' });
} }
res.json(result.rows[0]); const store = result.rows[0];
// Get recent crawl jobs for this store
const jobsResult = await pool.query(`
SELECT
id, status, job_type, trigger_type,
started_at, completed_at,
products_found, products_new, products_updated,
in_stock_count, out_of_stock_count,
error_message
FROM crawl_jobs
WHERE store_id = $1
ORDER BY created_at DESC
LIMIT 10
`, [id]);
// Get schedule info if exists
const scheduleResult = await pool.query(`
SELECT
enabled, interval_hours, next_run_at, last_run_at
FROM store_crawl_schedule
WHERE store_id = $1
`, [id]);
// Calculate freshness
const freshness = calculateFreshness(store.last_scraped_at);
// Detect provider from URL
const provider = detectProvider(store.dutchie_url);
// Build response
const response = {
...store,
provider,
freshness: freshness.freshness,
is_stale: freshness.is_stale,
hours_since_scrape: freshness.hours_since_scrape,
linked_dispensary: store.dispensary_id ? {
id: store.dispensary_id,
name: store.dispensary_name,
slug: store.dispensary_slug,
state: store.dispensary_state,
city: store.dispensary_city,
address: store.dispensary_address,
menu_provider: store.dispensary_menu_provider
} : null,
schedule: scheduleResult.rows[0] || null,
recent_jobs: jobsResult.rows
};
// Remove redundant dispensary fields from root
delete response.dispensary_name;
delete response.dispensary_slug;
delete response.dispensary_state;
delete response.dispensary_city;
delete response.dispensary_address;
delete response.dispensary_menu_provider;
res.json(response);
} catch (error) { } catch (error) {
console.error('Error fetching store:', error); console.error('Error fetching store:', error);
res.status(500).json({ error: 'Failed to fetch store' }); res.status(500).json({ error: 'Failed to fetch store' });

View File

@@ -0,0 +1,26 @@
import { runDispensaryOrchestrator } from '../services/dispensary-orchestrator';
// Run 5 crawlers sequentially to avoid OOM
const dispensaryIds = [112, 81, 115, 140, 177];
async function run() {
console.log('Starting 5 crawlers SEQUENTIALLY...');
for (const id of dispensaryIds) {
console.log(`\n=== Starting crawler for dispensary ${id} ===`);
try {
const result = await runDispensaryOrchestrator(id);
console.log(` Status: ${result.status}`);
console.log(` Summary: ${result.summary}`);
if (result.productsFound) {
console.log(` Products: ${result.productsFound} found, ${result.productsNew} new, ${result.productsUpdated} updated`);
}
} catch (e: any) {
console.log(` ERROR: ${e.message}`);
}
}
console.log('\n=== All 5 crawlers complete ===');
}
run().catch(e => console.log('Fatal:', e.message));

View File

@@ -0,0 +1,240 @@
/**
* Availability Service
*
* Normalizes product availability from various menu providers and tracks
* state transitions for inventory analytics.
*/
// Threshold for considering stock as "limited"
const LIMITED_THRESHOLD = 5;
export type AvailabilityStatus = 'in_stock' | 'out_of_stock' | 'limited' | 'unknown';
export interface NormalizedAvailability {
status: AvailabilityStatus;
quantity: number | null;
raw: any;
}
export interface AvailabilityHints {
hasOutOfStockBadge?: boolean;
hasLimitedBadge?: boolean;
hasInStockBadge?: boolean;
stockText?: string;
quantityText?: string;
}
/**
* Normalize availability from a Dutchie product
*
* Dutchie products can have various availability indicators:
* - potencyAmount.quantity: explicit stock count
* - status: sometimes includes stock status
* - variants[].quantity: stock per variant
* - isInStock / inStock: boolean flags
*/
export function normalizeAvailability(dutchieProduct: any): NormalizedAvailability {
const raw: any = {};
// Collect raw availability data for debugging
if (dutchieProduct.potencyAmount?.quantity !== undefined) {
raw.potencyQuantity = dutchieProduct.potencyAmount.quantity;
}
if (dutchieProduct.status !== undefined) {
raw.status = dutchieProduct.status;
}
if (dutchieProduct.isInStock !== undefined) {
raw.isInStock = dutchieProduct.isInStock;
}
if (dutchieProduct.inStock !== undefined) {
raw.inStock = dutchieProduct.inStock;
}
if (dutchieProduct.variants?.length) {
const variantQuantities = dutchieProduct.variants
.filter((v: any) => v.quantity !== undefined)
.map((v: any) => ({ option: v.option, quantity: v.quantity }));
if (variantQuantities.length) {
raw.variantQuantities = variantQuantities;
}
}
// Try to extract quantity
let quantity: number | null = null;
// Check potencyAmount.quantity first (most reliable for Dutchie)
if (typeof dutchieProduct.potencyAmount?.quantity === 'number') {
quantity = dutchieProduct.potencyAmount.quantity;
}
// Sum variant quantities if available
else if (dutchieProduct.variants?.length) {
const totalVariantQty = dutchieProduct.variants.reduce((sum: number, v: any) => {
return sum + (typeof v.quantity === 'number' ? v.quantity : 0);
}, 0);
if (totalVariantQty > 0) {
quantity = totalVariantQty;
}
}
// Determine status
let status: AvailabilityStatus = 'unknown';
// Explicit boolean flags take precedence
if (dutchieProduct.isInStock === false || dutchieProduct.inStock === false) {
status = 'out_of_stock';
} else if (dutchieProduct.isInStock === true || dutchieProduct.inStock === true) {
status = quantity !== null && quantity <= LIMITED_THRESHOLD ? 'limited' : 'in_stock';
}
// Check status string
else if (typeof dutchieProduct.status === 'string') {
const statusLower = dutchieProduct.status.toLowerCase();
if (statusLower.includes('out') || statusLower.includes('unavailable')) {
status = 'out_of_stock';
} else if (statusLower.includes('limited') || statusLower.includes('low')) {
status = 'limited';
} else if (statusLower.includes('in') || statusLower.includes('available')) {
status = 'in_stock';
}
}
// Infer from quantity
else if (quantity !== null) {
if (quantity === 0) {
status = 'out_of_stock';
} else if (quantity <= LIMITED_THRESHOLD) {
status = 'limited';
} else {
status = 'in_stock';
}
}
return { status, quantity, raw };
}
/**
* Extract availability hints from page content or product card HTML
*
* Used for sandbox provider scraping where we don't have structured data
*/
export function extractAvailabilityHints(pageContent: string, productElement?: string): AvailabilityHints {
const hints: AvailabilityHints = {};
const content = (productElement || pageContent).toLowerCase();
// Check for out-of-stock indicators
const oosPatterns = [
'out of stock',
'out-of-stock',
'sold out',
'soldout',
'unavailable',
'not available',
'coming soon',
'notify me'
];
hints.hasOutOfStockBadge = oosPatterns.some(p => content.includes(p));
// Check for limited stock indicators
const limitedPatterns = [
'limited stock',
'limited quantity',
'low stock',
'only \\d+ left',
'few remaining',
'almost gone',
'selling fast'
];
hints.hasLimitedBadge = limitedPatterns.some(p => {
if (p.includes('\\d')) {
return new RegExp(p, 'i').test(content);
}
return content.includes(p);
});
// Check for in-stock indicators
const inStockPatterns = [
'in stock',
'in-stock',
'add to cart',
'add to bag',
'buy now',
'available'
];
hints.hasInStockBadge = inStockPatterns.some(p => content.includes(p));
// Try to extract quantity text
const qtyMatch = content.match(/(\d+)\s*(left|remaining|in stock|available)/i);
if (qtyMatch) {
hints.quantityText = qtyMatch[0];
}
// Look for explicit stock text
const stockTextMatch = content.match(/(out of stock|in stock|low stock|limited|sold out)[^<]*/i);
if (stockTextMatch) {
hints.stockText = stockTextMatch[0].trim();
}
return hints;
}
/**
* Convert availability hints to normalized availability
*/
export function hintsToAvailability(hints: AvailabilityHints): NormalizedAvailability {
let status: AvailabilityStatus = 'unknown';
let quantity: number | null = null;
// Extract quantity if present
if (hints.quantityText) {
const match = hints.quantityText.match(/(\d+)/);
if (match) {
quantity = parseInt(match[1], 10);
}
}
// Determine status from hints
if (hints.hasOutOfStockBadge) {
status = 'out_of_stock';
} else if (hints.hasLimitedBadge) {
status = 'limited';
} else if (hints.hasInStockBadge) {
status = quantity !== null && quantity <= LIMITED_THRESHOLD ? 'limited' : 'in_stock';
}
return {
status,
quantity,
raw: hints
};
}
/**
* Aggregate availability counts from a list of products
*/
export interface AvailabilityCounts {
in_stock: number;
out_of_stock: number;
limited: number;
unknown: number;
changed: number;
}
export function aggregateAvailability(
products: Array<{ availability_status?: AvailabilityStatus; previous_status?: AvailabilityStatus }>
): AvailabilityCounts {
const counts: AvailabilityCounts = {
in_stock: 0,
out_of_stock: 0,
limited: 0,
unknown: 0,
changed: 0
};
for (const product of products) {
const status = product.availability_status || 'unknown';
counts[status]++;
if (product.previous_status && product.previous_status !== status) {
counts.changed++;
}
}
return counts;
}

View File

@@ -106,9 +106,10 @@ async function updateCategoryScanTime(
} }
async function getStoreIdForDispensary(dispensaryId: number): Promise<number | null> { async function getStoreIdForDispensary(dispensaryId: number): Promise<number | null> {
// First check if dispensary has menu_url - if so, try to match with stores.dutchie_url
const result = await pool.query( const result = await pool.query(
`SELECT s.id FROM stores s `SELECT s.id FROM stores s
JOIN dispensaries d ON d.menu_url = s.dutchie_plus_url OR d.name ILIKE '%' || s.name || '%' JOIN dispensaries d ON d.menu_url = s.dutchie_url OR d.name ILIKE '%' || s.name || '%'
WHERE d.id = $1 WHERE d.id = $1
LIMIT 1`, LIMIT 1`,
[dispensaryId] [dispensaryId]
@@ -118,6 +119,7 @@ async function getStoreIdForDispensary(dispensaryId: number): Promise<number | n
return result.rows[0].id; return result.rows[0].id;
} }
// Try matching by slug
const result2 = await pool.query( const result2 = await pool.query(
`SELECT s.id FROM stores s `SELECT s.id FROM stores s
JOIN dispensaries d ON d.website ILIKE '%' || s.slug || '%' JOIN dispensaries d ON d.website ILIKE '%' || s.slug || '%'

View File

@@ -14,6 +14,7 @@ import { scrapeStore } from '../scraper-v2';
import puppeteer, { Browser, Page } from 'puppeteer'; import puppeteer, { Browser, Page } from 'puppeteer';
import { promises as fs } from 'fs'; import { promises as fs } from 'fs';
import path from 'path'; import path from 'path';
import { extractAvailabilityHints } from './availability';
const WORKER_ID = `crawler-${process.pid}-${Date.now()}`; const WORKER_ID = `crawler-${process.pid}-${Date.now()}`;
@@ -500,9 +501,13 @@ export async function runSandboxCrawlJob(dispensaryId: number, sandboxId?: numbe
}; };
}); });
// Extract availability hints from page content
const availabilityHints = extractAvailabilityHints(html);
analysisData.page_structures.push({ analysisData.page_structures.push({
url, url,
...structure, ...structure,
availabilityHints,
}); });
} }

View File

@@ -8,6 +8,7 @@ import { logger } from './logger';
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor'; import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy'; import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
import { bypassAgeGate, detectStateFromUrl, setAgeGateCookies } from '../utils/age-gate'; import { bypassAgeGate, detectStateFromUrl, setAgeGateCookies } from '../utils/age-gate';
import { normalizeAvailability, AvailabilityStatus } from './availability';
// Apply stealth plugin for antidetect/anti-fingerprinting // Apply stealth plugin for antidetect/anti-fingerprinting
puppeteer.use(StealthPlugin()); puppeteer.use(StealthPlugin());
@@ -35,6 +36,10 @@ interface Product {
imageUrl?: string; imageUrl?: string;
dutchieUrl: string; dutchieUrl: string;
metadata?: any; metadata?: any;
// Availability tracking
availabilityStatus?: AvailabilityStatus;
availabilityRaw?: any;
stockQuantity?: number | null;
} }
export const USER_AGENTS = { export const USER_AGENTS = {
@@ -584,6 +589,8 @@ export async function scrapeCategory(storeId: number, categoryId: number, userAg
const formattedProducts: Product[] = products.map((p, index) => { const formattedProducts: Product[] = products.map((p, index) => {
const sanitized = sanitizeProductData(p); const sanitized = sanitizeProductData(p);
// Normalize availability from Dutchie product data
const availability = normalizeAvailability(p);
return { return {
dutchieProductId: `${category.store_slug}-${category.slug}-${Date.now()}-${index}`, dutchieProductId: `${category.store_slug}-${category.slug}-${Date.now()}-${index}`,
@@ -599,7 +606,10 @@ export async function scrapeCategory(storeId: number, categoryId: number, userAg
weight: sanitized.weight, weight: sanitized.weight,
imageUrl: p.imageUrl, imageUrl: p.imageUrl,
dutchieUrl: p.href, dutchieUrl: p.href,
metadata: p.metadata || {} metadata: p.metadata || {},
availabilityStatus: availability.status,
availabilityRaw: availability.raw,
stockQuantity: availability.quantity
}; };
}); });
@@ -660,47 +670,72 @@ async function autoScroll(page: Page) {
export async function saveProducts(storeId: number, categoryId: number, products: Product[]): Promise<void> { export async function saveProducts(storeId: number, categoryId: number, products: Product[]): Promise<void> {
const client = await pool.connect(); const client = await pool.connect();
try { try {
await client.query('BEGIN'); await client.query('BEGIN');
logger.info('scraper', `Saving ${products.length} products to database...`); logger.info('scraper', `Saving ${products.length} products to database...`);
// Mark all products as out-of-stock before processing (they'll be re-marked if found)
// Also update availability_status and last_seen_out_of_stock_at for state transition tracking
await client.query(` await client.query(`
UPDATE products UPDATE products
SET in_stock = false SET in_stock = false,
WHERE store_id = $1 AND category_id = $2 availability_status = 'out_of_stock',
last_seen_out_of_stock_at = CASE
WHEN availability_status != 'out_of_stock' THEN CURRENT_TIMESTAMP
ELSE last_seen_out_of_stock_at
END
WHERE store_id = $1 AND category_id = $2 AND in_stock = true
`, [storeId, categoryId]); `, [storeId, categoryId]);
for (const product of products) { for (const product of products) {
try { try {
// Get availability from product (defaults to in_stock if product exists in scraped data)
const availStatus = product.availabilityStatus || 'in_stock';
const availRaw = product.availabilityRaw ? JSON.stringify(product.availabilityRaw) : null;
const stockQty = product.stockQuantity ?? null;
const existingResult = await client.query(` const existingResult = await client.query(`
SELECT id, image_url, local_image_path SELECT id, image_url, local_image_path, availability_status
FROM products FROM products
WHERE store_id = $1 AND name = $2 AND category_id = $3 WHERE store_id = $1 AND name = $2 AND category_id = $3
AND (variant = $4 OR (variant IS NULL AND $4 IS NULL)) AND (variant = $4 OR (variant IS NULL AND $4 IS NULL))
`, [storeId, product.name, categoryId, product.variant || null]); `, [storeId, product.name, categoryId, product.variant || null]);
let localImagePath = null; let localImagePath = null;
let productId: number; let productId: number;
if (existingResult.rows.length > 0) { if (existingResult.rows.length > 0) {
productId = existingResult.rows[0].id; productId = existingResult.rows[0].id;
localImagePath = existingResult.rows[0].local_image_path; localImagePath = existingResult.rows[0].local_image_path;
const prevStatus = existingResult.rows[0].availability_status;
// Determine if we need to update last_seen_in_stock_at
const isNowInStock = availStatus === 'in_stock' || availStatus === 'limited';
const wasOutOfStock = prevStatus === 'out_of_stock' || prevStatus === 'unknown';
await client.query(` await client.query(`
UPDATE products UPDATE products
SET name = $1, variant = $2, description = $3, price = $4, SET name = $1, variant = $2, description = $3, price = $4,
strain_type = $5, thc_percentage = $6, cbd_percentage = $7, strain_type = $5, thc_percentage = $6, cbd_percentage = $7,
brand = $8, weight = $9, image_url = $10, dutchie_url = $11, brand = $8, weight = $9, image_url = $10, dutchie_url = $11,
in_stock = true, metadata = $12, last_seen_at = CURRENT_TIMESTAMP, in_stock = true, metadata = $12, last_seen_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP updated_at = CURRENT_TIMESTAMP,
availability_status = $14,
availability_raw = $15,
stock_quantity = $16,
last_seen_in_stock_at = CASE
WHEN $17 THEN CURRENT_TIMESTAMP
ELSE last_seen_in_stock_at
END
WHERE id = $13 WHERE id = $13
`, [ `, [
product.name, product.variant, product.description, product.price, product.name, product.variant, product.description, product.price,
product.strainType, product.thcPercentage, product.cbdPercentage, product.strainType, product.thcPercentage, product.cbdPercentage,
product.brand, product.weight, product.imageUrl, product.dutchieUrl, product.brand, product.weight, product.imageUrl, product.dutchieUrl,
JSON.stringify(product.metadata), productId JSON.stringify(product.metadata), productId, availStatus, availRaw, stockQty,
isNowInStock && wasOutOfStock
]); ]);
} else { } else {
// Generate unique slug from product name + timestamp + random suffix // Generate unique slug from product name + timestamp + random suffix
@@ -716,14 +751,15 @@ export async function saveProducts(storeId: number, categoryId: number, products
INSERT INTO products ( INSERT INTO products (
store_id, category_id, dutchie_product_id, name, slug, variant, description, store_id, category_id, dutchie_product_id, name, slug, variant, description,
price, strain_type, thc_percentage, cbd_percentage, price, strain_type, thc_percentage, cbd_percentage,
brand, weight, image_url, dutchie_url, in_stock, metadata brand, weight, image_url, dutchie_url, in_stock, metadata,
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, true, $16) availability_status, availability_raw, stock_quantity, last_seen_in_stock_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, true, $16, $17, $18, $19, CURRENT_TIMESTAMP)
RETURNING id RETURNING id
`, [ `, [
storeId, categoryId, product.dutchieProductId, product.name, slug, product.variant, product.description, storeId, categoryId, product.dutchieProductId, product.name, slug, product.variant, product.description,
product.price, product.strainType, product.thcPercentage, product.cbdPercentage, product.price, product.strainType, product.thcPercentage, product.cbdPercentage,
product.brand, product.weight, product.imageUrl, product.dutchieUrl, product.brand, product.weight, product.imageUrl, product.dutchieUrl,
JSON.stringify(product.metadata) JSON.stringify(product.metadata), availStatus, availRaw, stockQty
]); ]);
productId = insertResult.rows[0].id; productId = insertResult.rows[0].id;

File diff suppressed because it is too large Load Diff

592
docs/CRAWL_OPERATIONS.md Normal file
View File

@@ -0,0 +1,592 @@
# Crawl Operations & Data Philosophy
This document defines the operational constraints, scheduling requirements, and data integrity philosophy for the dispensary scraper system.
---
## 1. Frozen Crawler Policy
> **CRITICAL CONSTRAINT**: The crawler code is FROZEN. Do NOT modify any crawler logic.
### What Is Frozen
The following components are read-only and must not be modified:
- **Selectors**: All CSS/XPath selectors for extracting data from Dutchie pages
- **Parsing Logic**: Functions that transform raw HTML into structured data
- **Request Patterns**: URL construction, pagination, API calls to Dutchie
- **Browser Configuration**: Puppeteer settings, user agents, viewport sizes
- **Rate Limiting**: Request delays, retry logic, concurrent request limits
### What CAN Be Modified
You may build around the crawler's output:
| Layer | Allowed Changes |
|-------|-----------------|
| **Scheduling** | CronJobs, run frequency, store queuing |
| **Ingestion** | Post-processing of crawler output before DB insert |
| **API Layer** | Query logic, computed fields, response transformations |
| **Intelligence** | Aggregation tables, metrics computation |
| **Infrastructure** | K8s resources, scaling, monitoring |
### Rationale
The crawler has been stabilized through extensive testing. Changes to selectors or parsing risk:
- Breaking data extraction if Dutchie changes their UI
- Introducing regressions that are hard to detect
- Requiring re-validation across all store types
All improvements must happen in **downstream processing**, not in the crawler itself.
---
## 2. Crawl Scheduling
### Standard Schedule: Every 4 Hours
Run a full crawl for each store every 4 hours, 24/7.
```yaml
# K8s CronJob: Every 4 hours
apiVersion: batch/v1
kind: CronJob
metadata:
name: scraper-4h-cycle
namespace: dispensary-scraper
spec:
schedule: "0 */4 * * *" # 00:00, 04:00, 08:00, 12:00, 16:00, 20:00 UTC
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: scraper
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
command: ["node", "dist/scripts/run-all-stores.js"]
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: scraper-secrets
key: database-url
restartPolicy: OnFailure
```
### Daily Specials Crawl: 12:01 AM Store Local Time
Dispensaries often update their daily specials at midnight. We ensure a crawl happens at 12:01 AM in each store's local timezone.
```yaml
# K8s CronJob: Daily specials at store midnight (example for MST/Arizona)
apiVersion: batch/v1
kind: CronJob
metadata:
name: scraper-daily-specials-mst
namespace: dispensary-scraper
spec:
schedule: "1 7 * * *" # 12:01 AM MST = 07:01 UTC
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: scraper
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
command: ["node", "dist/scripts/run-stores-by-timezone.js", "America/Phoenix"]
restartPolicy: OnFailure
```
### Timezone-Aware Scheduling
Stores table includes timezone information:
```sql
ALTER TABLE stores ADD COLUMN IF NOT EXISTS timezone VARCHAR(50) DEFAULT 'America/Phoenix';
-- Lookup table for common dispensary timezones
-- America/Phoenix (Arizona, no DST)
-- America/Los_Angeles (California)
-- America/Denver (Colorado)
-- America/Chicago (Illinois)
```
### Scripts Required
```
/backend/src/scripts/
├── run-all-stores.ts # Run crawl for all enabled stores
├── run-stores-by-timezone.ts # Run crawl for stores in a specific timezone
└── scheduler.ts # Orchestrates CronJob dispatch
```
---
## 3. Specials Detection Logic
> **Problem**: The Specials tab in the frontend is EMPTY even though products have discounts.
### Root Cause Analysis
Database investigation reveals:
| Metric | Count |
|--------|-------|
| Total products | 1,414 |
| `is_special = true` | 0 |
| Has "Special Offer" in name | 325 |
| Has `sale_price < regular_price` | 4 |
The crawler captures "Special Offer" **embedded in the product name** but doesn't set `is_special = true`.
### Solution: API-Layer Specials Detection
Since the crawler is frozen, detect specials at query time:
```sql
-- Computed is_on_special in API queries
SELECT
p.*,
CASE
WHEN p.name ILIKE '%Special Offer%' THEN TRUE
WHEN p.sale_price IS NOT NULL
AND p.regular_price IS NOT NULL
AND p.sale_price::numeric < p.regular_price::numeric THEN TRUE
WHEN p.price IS NOT NULL
AND p.original_price IS NOT NULL
AND p.price::numeric < p.original_price::numeric THEN TRUE
ELSE FALSE
END AS is_on_special,
-- Compute special type
CASE
WHEN p.name ILIKE '%Special Offer%' THEN 'special_offer'
WHEN p.sale_price IS NOT NULL
AND p.regular_price IS NOT NULL
AND p.sale_price::numeric < p.regular_price::numeric THEN 'percent_off'
ELSE NULL
END AS computed_special_type,
-- Compute discount percentage
CASE
WHEN p.sale_price IS NOT NULL
AND p.regular_price IS NOT NULL
AND p.regular_price::numeric > 0
THEN ROUND((1 - p.sale_price::numeric / p.regular_price::numeric) * 100, 0)
ELSE NULL
END AS computed_discount_percent
FROM products p
WHERE p.store_id = :store_id;
```
### Special Detection Rules (Priority Order)
1. **Name Contains "Special Offer"**: `name ILIKE '%Special Offer%'`
- Type: `special_offer`
- Badge: "Special"
2. **Price Discount (sale < regular)**: `sale_price < regular_price`
- Type: `percent_off`
- Badge: Computed as "X% OFF"
3. **Price Discount (current < original)**: `price < original_price`
- Type: `percent_off`
- Badge: Computed as "X% OFF"
4. **Metadata Offers** (future): `metadata->'offers' IS NOT NULL`
- Parse offer type from metadata JSON
### Clean Product Name
Strip "Special Offer" from display name:
```typescript
function cleanProductName(rawName: string): string {
return rawName
.replace(/Special Offer$/i, '')
.replace(/\s+$/, '') // Trim trailing whitespace
.trim();
}
```
### API Specials Endpoint
```typescript
// GET /api/stores/:store_key/specials
async function getStoreSpecials(storeKey: string, options: SpecialsOptions) {
const query = `
WITH specials AS (
SELECT
p.*,
-- Detect special
CASE
WHEN p.name ILIKE '%Special Offer%' THEN TRUE
WHEN p.sale_price::numeric < p.regular_price::numeric THEN TRUE
ELSE FALSE
END AS is_on_special,
-- Compute discount
CASE
WHEN p.sale_price IS NOT NULL AND p.regular_price IS NOT NULL
THEN ROUND((1 - p.sale_price::numeric / p.regular_price::numeric) * 100)
ELSE NULL
END AS discount_percent
FROM products p
JOIN stores s ON p.store_id = s.id
WHERE s.store_key = $1
AND p.in_stock = TRUE
)
SELECT * FROM specials
WHERE is_on_special = TRUE
ORDER BY discount_percent DESC NULLS LAST
LIMIT $2 OFFSET $3
`;
return db.query(query, [storeKey, options.limit, options.offset]);
}
```
---
## 4. Append-Only Data Philosophy
> **Principle**: Every crawl should ADD information, never LOSE it.
### What Append-Only Means
| Action | Allowed | Not Allowed |
|--------|---------|-------------|
| Insert new product | ✅ | - |
| Update product price | ✅ | - |
| Mark product out-of-stock | ✅ | - |
| DELETE product row | ❌ | Never delete |
| TRUNCATE table | ❌ | Never truncate |
| UPDATE to remove data | ❌ | Never null-out existing data |
### Product Lifecycle States
```sql
-- Products are never deleted, only state changes
ALTER TABLE products ADD COLUMN IF NOT EXISTS status VARCHAR(20) DEFAULT 'active';
-- Statuses:
-- 'active' - Currently in stock or recently seen
-- 'out_of_stock' - Seen but marked out of stock
-- 'stale' - Not seen in last 3 crawls (likely discontinued)
-- 'archived' - Manually marked as discontinued
CREATE INDEX idx_products_status ON products(status);
```
### Marking Products Stale (NOT Deleting)
```typescript
// After crawl completes, mark unseen products as stale
async function markStaleProducts(storeId: number, crawlRunId: number) {
await db.query(`
UPDATE products
SET
status = 'stale',
updated_at = NOW()
WHERE store_id = $1
AND id NOT IN (
SELECT DISTINCT product_id
FROM store_product_snapshots
WHERE crawl_run_id = $2
)
AND status = 'active'
AND last_seen_at < NOW() - INTERVAL '3 days'
`, [storeId, crawlRunId]);
}
```
### Store Product Snapshots: True Append-Only
The `store_product_snapshots` table is strictly append-only:
```sql
CREATE TABLE store_product_snapshots (
id SERIAL PRIMARY KEY,
store_id INTEGER NOT NULL REFERENCES stores(id),
product_id INTEGER NOT NULL REFERENCES products(id),
crawl_run_id INTEGER NOT NULL REFERENCES crawl_runs(id),
-- Snapshot of data at crawl time
price_cents INTEGER,
regular_price_cents INTEGER,
sale_price_cents INTEGER,
in_stock BOOLEAN NOT NULL,
-- Computed at crawl time
is_on_special BOOLEAN NOT NULL DEFAULT FALSE,
special_type VARCHAR(50),
discount_percent INTEGER,
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Composite unique: one snapshot per product per crawl
CONSTRAINT uq_snapshot_product_crawl UNIQUE (product_id, crawl_run_id)
);
-- NO UPDATE or DELETE triggers - this table is INSERT-only
-- For data corrections, insert a new snapshot with corrected flag
CREATE INDEX idx_snapshots_crawl ON store_product_snapshots(crawl_run_id);
CREATE INDEX idx_snapshots_product_time ON store_product_snapshots(product_id, captured_at DESC);
```
### Crawl Runs Table
Track every crawl execution:
```sql
CREATE TABLE crawl_runs (
id SERIAL PRIMARY KEY,
store_id INTEGER NOT NULL REFERENCES stores(id),
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
completed_at TIMESTAMPTZ,
status VARCHAR(20) NOT NULL DEFAULT 'running',
products_found INTEGER,
products_new INTEGER,
products_updated INTEGER,
error_message TEXT,
-- Scheduling metadata
trigger_type VARCHAR(20) NOT NULL DEFAULT 'scheduled', -- 'scheduled', 'manual', 'daily_specials'
CONSTRAINT chk_crawl_status CHECK (status IN ('running', 'completed', 'failed'))
);
CREATE INDEX idx_crawl_runs_store_time ON crawl_runs(store_id, started_at DESC);
```
### Data Correction Pattern
If data needs correction, don't UPDATE - insert a correction record:
```sql
CREATE TABLE data_corrections (
id SERIAL PRIMARY KEY,
table_name VARCHAR(50) NOT NULL,
record_id INTEGER NOT NULL,
field_name VARCHAR(100) NOT NULL,
old_value JSONB,
new_value JSONB,
reason TEXT NOT NULL,
corrected_by VARCHAR(100) NOT NULL,
corrected_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
```
---
## 5. Safe Ingestion Patterns
### Upsert Products (Preserving History)
```typescript
async function upsertProduct(storeId: number, crawlRunId: number, product: ScrapedProduct) {
// 1. Find or create product
const existing = await db.query(
`SELECT id, price, regular_price, sale_price FROM products
WHERE store_id = $1 AND dutchie_product_id = $2`,
[storeId, product.dutchieId]
);
let productId: number;
if (existing.rows.length === 0) {
// INSERT new product
const result = await db.query(`
INSERT INTO products (
store_id, dutchie_product_id, name, slug, price, regular_price, sale_price,
in_stock, first_seen_at, last_seen_at, status
) VALUES ($1, $2, $3, $4, $5, $6, $7, TRUE, NOW(), NOW(), 'active')
RETURNING id
`, [storeId, product.dutchieId, product.name, product.slug,
product.price, product.regularPrice, product.salePrice]);
productId = result.rows[0].id;
} else {
// UPDATE existing - only update if values changed, never null-out
productId = existing.rows[0].id;
await db.query(`
UPDATE products SET
name = COALESCE($2, name),
price = COALESCE($3, price),
regular_price = COALESCE($4, regular_price),
sale_price = COALESCE($5, sale_price),
in_stock = TRUE,
last_seen_at = NOW(),
status = 'active',
updated_at = NOW()
WHERE id = $1
`, [productId, product.name, product.price, product.regularPrice, product.salePrice]);
}
// 2. Always create snapshot (append-only)
const isOnSpecial = detectSpecial(product);
const discountPercent = computeDiscount(product);
await db.query(`
INSERT INTO store_product_snapshots (
store_id, product_id, crawl_run_id,
price_cents, regular_price_cents, sale_price_cents,
in_stock, is_on_special, special_type, discount_percent
) VALUES ($1, $2, $3, $4, $5, $6, TRUE, $7, $8, $9)
ON CONFLICT (product_id, crawl_run_id) DO NOTHING
`, [
storeId, productId, crawlRunId,
toCents(product.price), toCents(product.regularPrice), toCents(product.salePrice),
isOnSpecial, isOnSpecial ? 'percent_off' : null, discountPercent
]);
return productId;
}
function detectSpecial(product: ScrapedProduct): boolean {
// Check name for "Special Offer"
if (product.name?.includes('Special Offer')) return true;
// Check price discount
if (product.salePrice && product.regularPrice) {
return parseFloat(product.salePrice) < parseFloat(product.regularPrice);
}
return false;
}
function computeDiscount(product: ScrapedProduct): number | null {
if (!product.salePrice || !product.regularPrice) return null;
const sale = parseFloat(product.salePrice);
const regular = parseFloat(product.regularPrice);
if (regular <= 0) return null;
return Math.round((1 - sale / regular) * 100);
}
```
---
## 6. K8s Deployment Configuration
### CronJobs Overview
```yaml
# All CronJobs for scheduling
apiVersion: v1
kind: List
items:
# 1. Standard 4-hour crawl cycle
- apiVersion: batch/v1
kind: CronJob
metadata:
name: scraper-4h-00
namespace: dispensary-scraper
spec:
schedule: "0 0,4,8,12,16,20 * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
activeDeadlineSeconds: 3600 # 1 hour timeout
template:
spec:
containers:
- name: scraper
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
command: ["node", "dist/scripts/run-all-stores.js"]
resources:
requests:
memory: "512Mi"
cpu: "250m"
limits:
memory: "2Gi"
cpu: "1000m"
restartPolicy: OnFailure
# 2. Daily specials crawl - Arizona (MST, no DST)
- apiVersion: batch/v1
kind: CronJob
metadata:
name: scraper-daily-mst
namespace: dispensary-scraper
spec:
schedule: "1 7 * * *" # 12:01 AM MST = 07:01 UTC
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: scraper
command: ["node", "dist/scripts/run-stores-by-timezone.js", "America/Phoenix"]
# 3. Daily specials crawl - California (PST/PDT)
- apiVersion: batch/v1
kind: CronJob
metadata:
name: scraper-daily-pst
namespace: dispensary-scraper
spec:
schedule: "1 8 * * *" # 12:01 AM PST = 08:01 UTC (adjust for DST)
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
containers:
- name: scraper
command: ["node", "dist/scripts/run-stores-by-timezone.js", "America/Los_Angeles"]
```
### Monitoring and Alerts
```yaml
# PrometheusRule for scraper monitoring
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: scraper-alerts
namespace: dispensary-scraper
spec:
groups:
- name: scraper.rules
rules:
- alert: ScraperJobFailed
expr: kube_job_status_failed{namespace="dispensary-scraper"} > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Scraper job failed"
- alert: ScraperMissedSchedule
expr: time() - kube_cronjob_status_last_successful_time{namespace="dispensary-scraper"} > 18000
for: 10m
labels:
severity: critical
annotations:
summary: "Scraper hasn't run successfully in 5+ hours"
```
---
## 7. Summary
| Constraint | Implementation |
|------------|----------------|
| **Frozen Crawler** | No changes to selectors, parsing, or request logic |
| **4-Hour Schedule** | K8s CronJob at 0,4,8,12,16,20 UTC |
| **12:01 AM Specials** | Timezone-specific CronJobs for store local midnight |
| **Specials Detection** | API-layer detection via name pattern + price comparison |
| **Append-Only Data** | Never DELETE; use status flags; `store_product_snapshots` is INSERT-only |
| **Historical Preservation** | All crawls create snapshots; stale products marked, never deleted |
This design ensures we maximize the value of crawler data without risking breakage from crawler modifications.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,10 @@ import { useEffect, useState } from 'react';
import { useParams, useNavigate } from 'react-router-dom'; import { useParams, useNavigate } from 'react-router-dom';
import { Layout } from '../components/Layout'; import { Layout } from '../components/Layout';
import { api } from '../lib/api'; import { api } from '../lib/api';
import { Package, Tag, Zap, TrendingUp, Calendar, DollarSign } from 'lucide-react'; import {
Package, Tag, Zap, Clock, ExternalLink, CheckCircle, XCircle,
AlertCircle, Building, MapPin, RefreshCw, Calendar, Activity
} from 'lucide-react';
export function StoreDetail() { export function StoreDetail() {
const { slug } = useParams(); const { slug } = useParams();
@@ -14,7 +17,7 @@ export function StoreDetail() {
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [selectedCategory, setSelectedCategory] = useState<number | null>(null); const [selectedCategory, setSelectedCategory] = useState<number | null>(null);
const [selectedBrand, setSelectedBrand] = useState<string>(''); const [selectedBrand, setSelectedBrand] = useState<string>('');
const [view, setView] = useState<'products' | 'brands' | 'specials'>('products'); const [view, setView] = useState<'products' | 'brands' | 'specials' | 'crawl-history'>('products');
const [sortBy, setSortBy] = useState('name'); const [sortBy, setSortBy] = useState('name');
useEffect(() => { useEffect(() => {
@@ -30,19 +33,22 @@ export function StoreDetail() {
const loadStoreData = async () => { const loadStoreData = async () => {
setLoading(true); setLoading(true);
try { try {
// First, find store by slug to get its ID
const allStores = await api.getStores(); const allStores = await api.getStores();
const storeData = allStores.stores.find((s: any) => s.slug === slug); const basicStore = allStores.stores.find((s: any) => s.slug === slug);
if (!storeData) { if (!basicStore) {
throw new Error('Store not found'); throw new Error('Store not found');
} }
const [categoriesData, brandsData] = await Promise.all([ // Fetch full store details using the enhanced endpoint
api.getCategories(storeData.id), const [fullStoreData, categoriesData, brandsData] = await Promise.all([
api.getStoreBrands(storeData.id) api.getStore(basicStore.id),
api.getCategories(basicStore.id),
api.getStoreBrands(basicStore.id)
]); ]);
setStore(storeData); setStore(fullStoreData);
setCategories(categoriesData.categories || []); setCategories(categoriesData.categories || []);
setBrands(brandsData.brands || []); setBrands(brandsData.brands || []);
} catch (error) { } catch (error) {
@@ -101,6 +107,43 @@ export function StoreDetail() {
return 'https://via.placeholder.com/300x300?text=No+Image'; return 'https://via.placeholder.com/300x300?text=No+Image';
}; };
const formatDate = (dateString: string | null) => {
if (!dateString) return 'Never';
return new Date(dateString).toLocaleString('en-US', {
month: 'short',
day: 'numeric',
year: 'numeric',
hour: '2-digit',
minute: '2-digit'
});
};
const getProviderBadgeColor = (provider: string) => {
switch (provider?.toLowerCase()) {
case 'dutchie': return 'bg-green-100 text-green-700';
case 'jane': return 'bg-purple-100 text-purple-700';
case 'treez': return 'bg-blue-100 text-blue-700';
case 'weedmaps': return 'bg-orange-100 text-orange-700';
case 'leafly': return 'bg-emerald-100 text-emerald-700';
default: return 'bg-gray-100 text-gray-700';
}
};
const getJobStatusBadge = (status: string) => {
switch (status) {
case 'completed':
return <span className="px-2 py-1 text-xs font-medium bg-green-100 text-green-700 rounded-full flex items-center gap-1"><CheckCircle className="w-3 h-3" /> Completed</span>;
case 'running':
return <span className="px-2 py-1 text-xs font-medium bg-blue-100 text-blue-700 rounded-full flex items-center gap-1"><RefreshCw className="w-3 h-3 animate-spin" /> Running</span>;
case 'failed':
return <span className="px-2 py-1 text-xs font-medium bg-red-100 text-red-700 rounded-full flex items-center gap-1"><XCircle className="w-3 h-3" /> Failed</span>;
case 'pending':
return <span className="px-2 py-1 text-xs font-medium bg-yellow-100 text-yellow-700 rounded-full flex items-center gap-1"><Clock className="w-3 h-3" /> Pending</span>;
default:
return <span className="px-2 py-1 text-xs font-medium bg-gray-100 text-gray-700 rounded-full">{status}</span>;
}
};
if (loading) { if (loading) {
return ( return (
<Layout> <Layout>
@@ -127,33 +170,112 @@ export function StoreDetail() {
return ( return (
<Layout> <Layout>
<div className="space-y-6"> <div className="space-y-6">
{/* Header */} {/* Header with Store Info */}
<div className="bg-white rounded-xl border border-gray-200 p-6"> <div className="bg-white rounded-xl border border-gray-200 p-6">
<div className="flex items-center justify-between mb-4"> <div className="flex items-start justify-between mb-6">
<div className="flex items-center gap-4"> <div className="flex items-start gap-4">
<button <button
onClick={() => navigate('/stores')} onClick={() => navigate('/stores')}
className="text-gray-600 hover:text-gray-900" className="text-gray-600 hover:text-gray-900 mt-1"
> >
Back Back
</button> </button>
<div> <div>
<h1 className="text-2xl font-semibold text-gray-900">{store.name}</h1> <div className="flex items-center gap-3">
<p className="text-sm text-gray-500 mt-1"> <h1 className="text-2xl font-semibold text-gray-900">{store.name}</h1>
{products.length} products {categories.length} categories {brands.length} brands <span className={`px-2 py-1 text-xs font-medium rounded ${getProviderBadgeColor(store.provider)}`}>
</p> {store.provider || 'Unknown'}
</span>
</div>
<p className="text-sm text-gray-500 mt-1">Store ID: {store.id}</p>
</div> </div>
</div> </div>
<a <a
href={store.dutchie_url} href={store.dutchie_url}
target="_blank" target="_blank"
rel="noopener noreferrer" rel="noopener noreferrer"
className="text-sm text-blue-600 hover:text-blue-700" className="flex items-center gap-1 text-sm text-blue-600 hover:text-blue-700"
> >
View on Dutchie View Menu <ExternalLink className="w-4 h-4" />
</a> </a>
</div> </div>
{/* Stats Row */}
<div className="grid grid-cols-2 md:grid-cols-4 lg:grid-cols-6 gap-4 mb-6">
<div className="p-4 bg-gray-50 rounded-lg">
<div className="flex items-center gap-2 text-gray-500 text-xs mb-1">
<Package className="w-4 h-4" />
Products
</div>
<p className="text-xl font-semibold text-gray-900">{store.product_count || 0}</p>
</div>
<div className="p-4 bg-gray-50 rounded-lg">
<div className="flex items-center gap-2 text-gray-500 text-xs mb-1">
<Tag className="w-4 h-4" />
Categories
</div>
<p className="text-xl font-semibold text-gray-900">{store.category_count || 0}</p>
</div>
<div className="p-4 bg-green-50 rounded-lg">
<div className="flex items-center gap-2 text-green-600 text-xs mb-1">
<CheckCircle className="w-4 h-4" />
In Stock
</div>
<p className="text-xl font-semibold text-green-700">{store.in_stock_count || 0}</p>
</div>
<div className="p-4 bg-red-50 rounded-lg">
<div className="flex items-center gap-2 text-red-600 text-xs mb-1">
<XCircle className="w-4 h-4" />
Out of Stock
</div>
<p className="text-xl font-semibold text-red-700">{store.out_of_stock_count || 0}</p>
</div>
<div className={`p-4 rounded-lg ${store.is_stale ? 'bg-yellow-50' : 'bg-blue-50'}`}>
<div className={`flex items-center gap-2 text-xs mb-1 ${store.is_stale ? 'text-yellow-600' : 'text-blue-600'}`}>
<Clock className="w-4 h-4" />
Freshness
</div>
<p className={`text-sm font-semibold ${store.is_stale ? 'text-yellow-700' : 'text-blue-700'}`}>
{store.freshness || 'Never scraped'}
</p>
</div>
<div className="p-4 bg-gray-50 rounded-lg">
<div className="flex items-center gap-2 text-gray-500 text-xs mb-1">
<Calendar className="w-4 h-4" />
Next Crawl
</div>
<p className="text-sm font-semibold text-gray-700">
{store.schedule?.next_run_at ? formatDate(store.schedule.next_run_at) : 'Not scheduled'}
</p>
</div>
</div>
{/* Linked Dispensary */}
{store.linked_dispensary && (
<div className="p-4 bg-indigo-50 rounded-lg mb-6">
<div className="flex items-center gap-2 text-indigo-600 text-xs mb-2">
<Building className="w-4 h-4" />
Linked Dispensary
</div>
<div className="flex items-center justify-between">
<div>
<p className="font-semibold text-indigo-900">{store.linked_dispensary.name}</p>
<p className="text-sm text-indigo-700 flex items-center gap-1">
<MapPin className="w-3 h-3" />
{store.linked_dispensary.city}, {store.linked_dispensary.state}
{store.linked_dispensary.address && ` - ${store.linked_dispensary.address}`}
</p>
</div>
<button
onClick={() => navigate(`/dispensaries/${store.linked_dispensary.slug}`)}
className="text-sm text-indigo-600 hover:text-indigo-700 font-medium"
>
View Dispensary
</button>
</div>
</div>
)}
{/* View Tabs */} {/* View Tabs */}
<div className="flex gap-2 border-b border-gray-200"> <div className="flex gap-2 border-b border-gray-200">
<button <button
@@ -195,9 +317,75 @@ export function StoreDetail() {
Specials Specials
</div> </div>
</button> </button>
<button
onClick={() => setView('crawl-history')}
className={`px-4 py-2 border-b-2 transition-colors ${
view === 'crawl-history'
? 'border-blue-600 text-blue-600 font-medium'
: 'border-transparent text-gray-600 hover:text-gray-900'
}`}
>
<div className="flex items-center gap-2">
<Activity className="w-4 h-4" />
Crawl History ({store.recent_jobs?.length || 0})
</div>
</button>
</div> </div>
</div> </div>
{/* Crawl History View */}
{view === 'crawl-history' && (
<div className="bg-white rounded-xl border border-gray-200 overflow-hidden">
<div className="p-4 border-b border-gray-200">
<h2 className="text-lg font-semibold text-gray-900">Recent Crawl Jobs</h2>
<p className="text-sm text-gray-500">Last 10 crawl jobs for this store</p>
</div>
{store.recent_jobs && store.recent_jobs.length > 0 ? (
<div className="overflow-x-auto">
<table className="w-full">
<thead className="bg-gray-50">
<tr>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Type</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Started</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Completed</th>
<th className="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Found</th>
<th className="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">New</th>
<th className="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Updated</th>
<th className="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">In Stock</th>
<th className="px-4 py-3 text-center text-xs font-medium text-gray-500 uppercase">Out of Stock</th>
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Error</th>
</tr>
</thead>
<tbody className="divide-y divide-gray-100">
{store.recent_jobs.map((job: any) => (
<tr key={job.id} className="hover:bg-gray-50">
<td className="px-4 py-3">{getJobStatusBadge(job.status)}</td>
<td className="px-4 py-3 text-sm text-gray-700">{job.job_type || '-'}</td>
<td className="px-4 py-3 text-sm text-gray-700">{formatDate(job.started_at)}</td>
<td className="px-4 py-3 text-sm text-gray-700">{formatDate(job.completed_at)}</td>
<td className="px-4 py-3 text-center text-sm font-medium text-gray-900">{job.products_found ?? '-'}</td>
<td className="px-4 py-3 text-center text-sm font-medium text-green-600">{job.products_new ?? '-'}</td>
<td className="px-4 py-3 text-center text-sm font-medium text-blue-600">{job.products_updated ?? '-'}</td>
<td className="px-4 py-3 text-center text-sm font-medium text-green-600">{job.in_stock_count ?? '-'}</td>
<td className="px-4 py-3 text-center text-sm font-medium text-red-600">{job.out_of_stock_count ?? '-'}</td>
<td className="px-4 py-3 text-sm text-red-600 max-w-xs truncate" title={job.error_message || ''}>
{job.error_message || '-'}
</td>
</tr>
))}
</tbody>
</table>
</div>
) : (
<div className="text-center py-12">
<Activity className="w-16 h-16 text-gray-300 mx-auto mb-4" />
<p className="text-gray-500">No crawl history available</p>
</div>
)}
</div>
)}
{/* Products View */} {/* Products View */}
{view === 'products' && ( {view === 'products' && (
<> <>