feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -3,14 +3,23 @@
*
* Main orchestrator for the Dutchie store discovery pipeline.
*
* Flow:
* 1. Discover cities from Dutchie (or use seeded cities)
* 2. For each city, discover store locations
* 3. Upsert all data to discovery tables
* 4. Admin verifies locations manually
* 5. Verified locations are promoted to canonical dispensaries
* AUTOMATED FLOW (as of 2025-01):
* 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState)
* 2. For each city, discover store locations via ConsumerDispensaries query
* 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id)
* 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id)
* 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true
* 6. All actions logged to dutchie_promotion_log for audit
*
* This module does NOT create canonical dispensaries automatically.
* Tables involved:
* - dutchie_discovery_cities: Known cities for each state
* - dutchie_discovery_locations: Raw discovered store data
* - dispensaries: Canonical store records (promoted from discovery)
* - dutchie_promotion_log: Audit trail for validation/promotion
*
* Usage:
* npx tsx src/scripts/run-discovery.ts discover:state AZ
* npx tsx src/scripts/run-discovery.ts discover:state CA
*/
import { Pool } from 'pg';
@@ -24,11 +33,12 @@ import {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
import {
discoverLocationsForCity,
getCitiesForState,
} from './location-discovery';
import { promoteDiscoveredLocations } from './promotion';
// ============================================================
// FULL DISCOVERY
@@ -162,6 +172,25 @@ export async function runFullDiscovery(
console.log(`Errors: ${totalErrors}`);
}
// Step 4: Auto-validate and promote discovered locations
if (!dryRun && totalLocationsUpserted > 0) {
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
const promotionResult = await promoteDiscoveredLocations(stateCode, false);
console.log(`[Discovery] Promotion complete:`);
console.log(` Created: ${promotionResult.created} new dispensaries`);
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
if (promotionResult.rejectedRecords.length > 0) {
console.log(` Rejection reasons:`);
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
console.log(` - ${r.name}: ${r.errors.join(', ')}`);
});
if (promotionResult.rejectedRecords.length > 5) {
console.log(` ... and ${promotionResult.rejectedRecords.length - 5} more`);
}
}
}
return {
cities: cityResult,
locations: locationResults,
@@ -235,11 +264,19 @@ export async function discoverState(
console.log(`[Discovery] Discovering state: ${stateCode}`);
// Seed known cities for this state
if (stateCode === 'AZ') {
console.log('[Discovery] Seeding Arizona cities...');
const seeded = await seedKnownCities(pool, ARIZONA_CITIES);
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`);
// Dynamically fetch and seed cities for this state
console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`);
const cityNames = await getCitiesForState(stateCode);
if (cityNames.length > 0) {
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode,
}));
const seeded = await seedKnownCities(pool, cities);
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`);
} else {
console.log(`[Discovery] No cities found for ${stateCode}`);
}
// Run full discovery for this state

View File

@@ -13,7 +13,6 @@ export {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
// Location Discovery
@@ -33,5 +32,17 @@ export {
DiscoveryStats,
} from './discovery-crawler';
// Promotion
export {
validateForPromotion,
validateDiscoveredLocations,
promoteDiscoveredLocations,
promoteSingleLocation,
ValidationResult,
ValidationSummary,
PromotionResult,
PromotionSummary,
} from './promotion';
// Routes
export { createDiscoveryRoutes } from './routes';

View File

@@ -134,10 +134,10 @@ export interface StateWithCities {
}
/**
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
* Fetch all states with their cities via direct GraphQL query
*
* This fetches a city page and extracts the statesWithDispensaries data
* which contains all states and their cities where Dutchie has dispensaries.
* Uses the getAllCitiesByState persisted query which returns all states
* and cities where Dutchie has dispensaries.
*/
export async function fetchStatesWithDispensaries(
options: { verbose?: boolean } = {}
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
// Initialize proxy if USE_PROXY=true
await initDiscoveryProxy();
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
// Using a known city that's likely to exist
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
try {
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
const result = await executeGraphQL(
'getAllCitiesByState',
{}, // No variables needed
GRAPHQL_HASHES.GetAllCitiesByState,
{ maxRetries: 3, retryOn403: true }
);
if (!result || result.status !== 200) {
console.error('[LocationDiscovery] Failed to fetch city page');
return [];
}
const nextData = extractNextData(result.html);
if (!nextData) {
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
return [];
}
// Extract statesWithDispensaries from Apollo state
const apolloState = nextData.props?.pageProps?.initialApolloState;
if (!apolloState) {
console.error('[LocationDiscovery] No initialApolloState found');
return [];
}
// Find ROOT_QUERY.statesWithDispensaries
const rootQuery = apolloState['ROOT_QUERY'];
if (!rootQuery) {
console.error('[LocationDiscovery] No ROOT_QUERY found');
return [];
}
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
const statesRefs = rootQuery.statesWithDispensaries;
if (!Array.isArray(statesRefs)) {
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
return [];
}
// Resolve the references to actual state data
const states: StateWithCities[] = [];
for (const ref of statesRefs) {
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
let stateData: any;
if (ref && ref.__ref) {
stateData = apolloState[ref.__ref];
} else {
stateData = ref;
const statesData = result?.data?.statesWithDispensaries;
if (!Array.isArray(statesData)) {
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
return [];
}
if (stateData && stateData.name) {
// Parse cities JSON array if it's a string
let cities = stateData.cities;
if (typeof cities === 'string') {
try {
cities = JSON.parse(cities);
} catch {
cities = [];
}
// Map to our StateWithCities format
const states: StateWithCities[] = [];
for (const state of statesData) {
if (state && state.name) {
// Filter out null cities
const cities = Array.isArray(state.cities)
? state.cities.filter((c: string | null) => c !== null)
: [];
states.push({
name: state.name,
country: state.country || 'US',
cities,
});
}
states.push({
name: stateData.name,
country: stateData.country || 'US',
cities: Array.isArray(cities) ? cities : [],
});
}
}
if (verbose) {
console.log(`[LocationDiscovery] Found ${states.length} states`);
for (const state of states) {
console.log(` ${state.name}: ${state.cities.length} cities`);
if (verbose) {
console.log(`[LocationDiscovery] Found ${states.length} states`);
for (const state of states) {
console.log(` ${state.name}: ${state.cities.length} cities`);
}
}
}
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
return states;
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
return states;
} catch (error: any) {
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
return [];
}
}
/**
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
/**
* Normalize a raw location response to a consistent format.
* Maps Dutchie camelCase fields to our snake_case equivalents.
*/
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
const slug = raw.slug || raw.cName || raw.urlSlug || '';
const id = raw.id || raw._id || raw.dispensaryId || '';
// Extract location data - GraphQL response nests address info in .location
const loc = raw.location || {};
// Extract coordinates from geometry.coordinates [longitude, latitude]
const coords = loc.geometry?.coordinates || [];
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
return {
id,
name: raw.name || raw.dispensaryName || '',
slug,
address: raw.address || raw.fullAddress || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
address2: raw.address2 || raw.addressLine2 || '',
city: raw.city || '',
state: raw.state || raw.stateCode || '',
zip: raw.zip || raw.zipCode || raw.postalCode || '',
country: raw.country || raw.countryCode || 'US',
latitude: raw.latitude || raw.lat || raw.location?.latitude,
longitude: raw.longitude || raw.lng || raw.location?.longitude,
cName: raw.cName || raw.slug || '',
address: raw.address || raw.fullAddress || loc.ln1 || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
city: raw.city || loc.city || '',
state: raw.state || raw.stateCode || loc.state || '',
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
country: raw.country || raw.countryCode || loc.country || 'United States',
latitude,
longitude,
timezone: raw.timezone || raw.tz || '',
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
retailType: raw.retailType || raw.type || '',
// Service offerings
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
// License types
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
// Contact info
phone: raw.phone || '',
email: raw.email || '',
website: raw.embedBackUrl || '',
// Branding
description: raw.description || '',
logoImage: raw.logoImage || '',
bannerImage: raw.bannerImage || '',
// Chain/enterprise info
chainSlug: raw.chain || '',
enterpriseId: raw.retailer?.enterpriseId || '',
// Status
status: raw.status || '',
// Preserve raw data
...raw,
};
@@ -826,15 +821,27 @@ export async function upsertLocation(
offers_pickup,
is_recreational,
is_medical,
phone,
website,
email,
description,
logo_image,
banner_image,
chain_slug,
enterprise_id,
c_name,
country,
store_status,
last_seen_at,
updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
ON CONFLICT (platform, platform_location_id)
DO UPDATE SET
name = EXCLUDED.name,
platform_menu_url = EXCLUDED.platform_menu_url,
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
@@ -846,6 +853,17 @@ export async function upsertLocation(
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
last_seen_at = NOW(),
updated_at = NOW()
RETURNING id, (xmax = 0) as is_new`,
@@ -861,7 +879,7 @@ export async function upsertLocation(
location.city || null,
location.state || null,
location.zip || null,
location.country || 'US',
location.country || 'United States',
location.latitude || null,
location.longitude || null,
location.timezone || null,
@@ -871,6 +889,17 @@ export async function upsertLocation(
location.offerPickup ?? null,
location.isRecreational ?? null,
location.isMedical ?? null,
location.phone || null,
location.website || null,
location.email || null,
location.description || null,
location.logoImage || null,
location.bannerImage || null,
location.chainSlug || null,
location.enterpriseId || null,
location.cName || null,
location.country || 'United States',
location.status || null,
]
);

View File

@@ -0,0 +1,579 @@
/**
* Discovery Promotion Service
*
* Handles the promotion of discovery locations to dispensaries:
* 1. Discovery → Raw data in dutchie_discovery_locations (status='discovered')
* 2. Validation → Check required fields, reject incomplete records
* 3. Promotion → Idempotent upsert to dispensaries, link back via dispensary_id
*/
import { pool } from '../db/pool';
import { DiscoveryLocationRow, DiscoveryStatus } from './types';
// ============================================================
// VALIDATION
// ============================================================
export interface ValidationResult {
valid: boolean;
errors: string[];
}
export interface ValidationSummary {
totalChecked: number;
validCount: number;
invalidCount: number;
invalidRecords: Array<{
id: number;
name: string;
errors: string[];
}>;
}
/**
* Validate a single discovery location has all required fields for promotion
*/
export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult {
const errors: string[] = [];
// Required fields
if (!loc.platform_location_id) {
errors.push('Missing platform_location_id');
}
if (!loc.name || loc.name.trim() === '') {
errors.push('Missing name');
}
if (!loc.city || loc.city.trim() === '') {
errors.push('Missing city');
}
if (!loc.state_code || loc.state_code.trim() === '') {
errors.push('Missing state_code');
}
if (!loc.platform_menu_url) {
errors.push('Missing platform_menu_url');
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate all discovered locations and return summary
*/
export async function validateDiscoveredLocations(
stateCode?: string
): Promise<ValidationSummary> {
let query = `
SELECT * FROM dutchie_discovery_locations
WHERE status = 'discovered'
`;
const params: string[] = [];
if (stateCode) {
query += ` AND state_code = $1`;
params.push(stateCode);
}
const result = await pool.query(query, params);
const locations = result.rows as DiscoveryLocationRow[];
const invalidRecords: ValidationSummary['invalidRecords'] = [];
let validCount = 0;
for (const loc of locations) {
const validation = validateForPromotion(loc);
if (validation.valid) {
validCount++;
} else {
invalidRecords.push({
id: loc.id,
name: loc.name,
errors: validation.errors,
});
}
}
return {
totalChecked: locations.length,
validCount,
invalidCount: invalidRecords.length,
invalidRecords,
};
}
// ============================================================
// PROMOTION
// ============================================================
export interface PromotionResult {
discoveryId: number;
dispensaryId: number;
action: 'created' | 'updated' | 'skipped';
name: string;
}
export interface PromotionSummary {
totalProcessed: number;
created: number;
updated: number;
skipped: number;
rejected: number;
results: PromotionResult[];
rejectedRecords: Array<{
id: number;
name: string;
errors: string[];
}>;
durationMs: number;
}
/**
* Generate a URL-safe slug from name and city
*/
function generateSlug(name: string, city: string, state: string): string {
const base = `${name}-${city}-${state}`
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.substring(0, 100);
return base;
}
/**
* Log a promotion action to dutchie_promotion_log
*/
async function logPromotionAction(
action: string,
discoveryId: number | null,
dispensaryId: number | null,
stateCode: string | null,
storeName: string | null,
validationErrors: string[] | null = null,
fieldChanges: Record<string, any> | null = null,
triggeredBy: string = 'auto'
): Promise<void> {
await pool.query(`
INSERT INTO dutchie_promotion_log
(discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`, [
discoveryId,
dispensaryId,
action,
stateCode,
storeName,
validationErrors,
fieldChanges ? JSON.stringify(fieldChanges) : null,
triggeredBy,
]);
}
/**
* Create a status alert for the dashboard
*/
export async function createStatusAlert(
dispensaryId: number,
profileId: number | null,
alertType: string,
severity: 'info' | 'warning' | 'error' | 'critical',
message: string,
previousStatus?: string | null,
newStatus?: string | null,
metadata?: Record<string, any>
): Promise<number> {
const result = await pool.query(`
INSERT INTO crawler_status_alerts
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
RETURNING id
`, [
dispensaryId,
profileId,
alertType,
severity,
message,
previousStatus || null,
newStatus || null,
metadata ? JSON.stringify(metadata) : null,
]);
return result.rows[0].id;
}
/**
* Create or update crawler profile for a dispensary with initial sandbox status
*/
async function ensureCrawlerProfile(
dispensaryId: number,
dispensaryName: string,
platformDispensaryId: string
): Promise<{ profileId: number; created: boolean }> {
// Check if profile already exists
const existingResult = await pool.query(`
SELECT id FROM dispensary_crawler_profiles
WHERE dispensary_id = $1 AND enabled = true
LIMIT 1
`, [dispensaryId]);
if (existingResult.rows.length > 0) {
return { profileId: existingResult.rows[0].id, created: false };
}
// Create new profile with sandbox status
const profileKey = dispensaryName
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.substring(0, 50);
const insertResult = await pool.query(`
INSERT INTO dispensary_crawler_profiles (
dispensary_id,
profile_name,
profile_key,
crawler_type,
status,
status_reason,
status_changed_at,
config,
enabled,
consecutive_successes,
consecutive_failures,
created_at,
updated_at
) VALUES (
$1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP,
$4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
)
RETURNING id
`, [
dispensaryId,
dispensaryName,
profileKey,
JSON.stringify({
platformDispensaryId,
useBothModes: true,
downloadImages: true,
trackStock: true,
}),
]);
const profileId = insertResult.rows[0].id;
// Create status alert for new sandbox store
await createStatusAlert(
dispensaryId,
profileId,
'promoted',
'info',
`${dispensaryName} promoted to sandbox - awaiting first successful crawl`,
null,
'sandbox',
{ source: 'discovery_promotion', platformDispensaryId }
);
return { profileId, created: true };
}
/**
* Promote a single discovery location to dispensaries table
* Idempotent: uses ON CONFLICT on platform_dispensary_id
*/
async function promoteLocation(
loc: DiscoveryLocationRow
): Promise<PromotionResult> {
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
// Upsert into dispensaries
// ON CONFLICT by platform_dispensary_id ensures idempotency
const upsertResult = await pool.query(`
INSERT INTO dispensaries (
platform,
name,
slug,
city,
state,
address1,
address2,
zipcode,
postal_code,
phone,
website,
email,
latitude,
longitude,
timezone,
platform_dispensary_id,
menu_url,
menu_type,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
is_medical,
is_recreational,
chain_slug,
enterprise_id,
c_name,
country,
status,
crawl_enabled,
dutchie_verified,
dutchie_verified_at,
dutchie_discovery_id,
created_at,
updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
)
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET
name = EXCLUDED.name,
city = EXCLUDED.city,
state = EXCLUDED.state,
address1 = EXCLUDED.address1,
address2 = EXCLUDED.address2,
zipcode = EXCLUDED.zipcode,
postal_code = EXCLUDED.postal_code,
phone = EXCLUDED.phone,
website = EXCLUDED.website,
email = EXCLUDED.email,
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
timezone = EXCLUDED.timezone,
menu_url = EXCLUDED.menu_url,
description = EXCLUDED.description,
logo_image = EXCLUDED.logo_image,
banner_image = EXCLUDED.banner_image,
offer_pickup = EXCLUDED.offer_pickup,
offer_delivery = EXCLUDED.offer_delivery,
is_medical = EXCLUDED.is_medical,
is_recreational = EXCLUDED.is_recreational,
chain_slug = EXCLUDED.chain_slug,
enterprise_id = EXCLUDED.enterprise_id,
c_name = EXCLUDED.c_name,
country = EXCLUDED.country,
status = EXCLUDED.status,
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
updated_at = CURRENT_TIMESTAMP
RETURNING id, (xmax = 0) AS inserted
`, [
loc.platform || 'dutchie', // $1 platform
loc.name, // $2 name
slug, // $3 slug
loc.city, // $4 city
loc.state_code, // $5 state
loc.address_line1, // $6 address1
loc.address_line2, // $7 address2
loc.postal_code, // $8 zipcode
loc.postal_code, // $9 postal_code
loc.phone, // $10 phone
loc.website, // $11 website
loc.email, // $12 email
loc.latitude, // $13 latitude
loc.longitude, // $14 longitude
loc.timezone, // $15 timezone
loc.platform_location_id, // $16 platform_dispensary_id
loc.platform_menu_url, // $17 menu_url
'dutchie', // $18 menu_type
loc.description, // $19 description
loc.logo_image, // $20 logo_image
loc.banner_image, // $21 banner_image
loc.offers_pickup ?? true, // $22 offer_pickup
loc.offers_delivery ?? false, // $23 offer_delivery
loc.is_medical ?? false, // $24 is_medical
loc.is_recreational ?? true, // $25 is_recreational
loc.chain_slug, // $26 chain_slug
loc.enterprise_id, // $27 enterprise_id
loc.c_name, // $28 c_name
loc.country || 'United States', // $29 country
loc.store_status || 'open', // $30 status
true, // $31 crawl_enabled
true, // $32 dutchie_verified
new Date(), // $33 dutchie_verified_at
loc.id, // $34 dutchie_discovery_id
]);
const dispensaryId = upsertResult.rows[0].id;
const wasInserted = upsertResult.rows[0].inserted;
// Link discovery location back to dispensary and update status
await pool.query(`
UPDATE dutchie_discovery_locations
SET
dispensary_id = $1,
status = 'verified',
verified_at = CURRENT_TIMESTAMP,
verified_by = 'auto-promotion'
WHERE id = $2
`, [dispensaryId, loc.id]);
// Create crawler profile with sandbox status for new dispensaries
if (wasInserted && loc.platform_location_id) {
await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id);
}
const action = wasInserted ? 'promoted_create' : 'promoted_update';
// Log the promotion
await logPromotionAction(
action,
loc.id,
dispensaryId,
loc.state_code,
loc.name,
null,
{ slug, city: loc.city, platform_location_id: loc.platform_location_id }
);
return {
discoveryId: loc.id,
dispensaryId,
action: wasInserted ? 'created' : 'updated',
name: loc.name,
};
}
/**
* Promote all valid discovered locations to dispensaries
*
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
* @param dryRun If true, only validate without making changes
*/
export async function promoteDiscoveredLocations(
stateCode?: string,
dryRun = false
): Promise<PromotionSummary> {
const startTime = Date.now();
let query = `
SELECT * FROM dutchie_discovery_locations
WHERE status = 'discovered'
`;
const params: string[] = [];
if (stateCode) {
query += ` AND state_code = $1`;
params.push(stateCode);
}
query += ` ORDER BY id`;
const result = await pool.query(query, params);
const locations = result.rows as DiscoveryLocationRow[];
const results: PromotionResult[] = [];
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
let created = 0;
let updated = 0;
let skipped = 0;
let rejected = 0;
for (const loc of locations) {
// Step 2: Validation
const validation = validateForPromotion(loc);
if (!validation.valid) {
rejected++;
rejectedRecords.push({
id: loc.id,
name: loc.name,
errors: validation.errors,
});
// Mark as rejected if not dry run
if (!dryRun) {
await pool.query(`
UPDATE dutchie_discovery_locations
SET status = 'rejected', notes = $1
WHERE id = $2
`, [validation.errors.join('; '), loc.id]);
// Log the rejection
await logPromotionAction(
'rejected',
loc.id,
null,
loc.state_code,
loc.name,
validation.errors
);
}
continue;
}
// Step 3: Promotion (skip if dry run)
if (dryRun) {
skipped++;
results.push({
discoveryId: loc.id,
dispensaryId: 0,
action: 'skipped',
name: loc.name,
});
continue;
}
try {
const promotionResult = await promoteLocation(loc);
results.push(promotionResult);
if (promotionResult.action === 'created') {
created++;
} else {
updated++;
}
} catch (error: any) {
console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message);
rejected++;
rejectedRecords.push({
id: loc.id,
name: loc.name,
errors: [`Promotion error: ${error.message}`],
});
}
}
return {
totalProcessed: locations.length,
created,
updated,
skipped,
rejected,
results,
rejectedRecords,
durationMs: Date.now() - startTime,
};
}
/**
* Promote a single discovery location by ID
*/
export async function promoteSingleLocation(
discoveryId: number
): Promise<PromotionResult> {
const result = await pool.query(
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
[discoveryId]
);
if (result.rows.length === 0) {
throw new Error(`Discovery location ${discoveryId} not found`);
}
const loc = result.rows[0] as DiscoveryLocationRow;
// Validate
const validation = validateForPromotion(loc);
if (!validation.valid) {
throw new Error(`Validation failed: ${validation.errors.join(', ')}`);
}
// Promote
return promoteLocation(loc);
}

View File

@@ -18,8 +18,8 @@ import {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
import { getCitiesForState } from './location-discovery';
import {
DiscoveryLocation,
DiscoveryCity,
@@ -27,6 +27,11 @@ import {
mapLocationRowToLocation,
mapCityRowToCity,
} from './types';
import {
validateDiscoveredLocations,
promoteDiscoveredLocations,
promoteSingleLocation,
} from './promotion';
export function createDiscoveryRoutes(pool: Pool): Router {
const router = Router();
@@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router {
offset = '0',
} = req.query;
let whereClause = 'WHERE platform = $1 AND active = TRUE';
let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
const params: any[] = [platform];
let paramIndex = 2;
if (status) {
whereClause += ` AND status = $${paramIndex}`;
whereClause += ` AND dl.status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
whereClause += ` AND dl.state_code = $${paramIndex}`;
params.push(stateCode);
paramIndex++;
}
if (countryCode) {
whereClause += ` AND country_code = $${paramIndex}`;
whereClause += ` AND dl.country_code = $${paramIndex}`;
params.push(countryCode);
paramIndex++;
}
if (city) {
whereClause += ` AND city ILIKE $${paramIndex}`;
whereClause += ` AND dl.city ILIKE $${paramIndex}`;
params.push(`%${city}%`);
paramIndex++;
}
if (search) {
whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`;
whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
params.push(`%${search}%`);
paramIndex++;
}
if (hasDispensary === 'true') {
whereClause += ' AND dispensary_id IS NOT NULL';
whereClause += ' AND dl.dispensary_id IS NOT NULL';
} else if (hasDispensary === 'false') {
whereClause += ' AND dispensary_id IS NULL';
whereClause += ' AND dl.dispensary_id IS NULL';
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
@@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router {
return res.status(400).json({ error: 'stateCode is required' });
}
let cities: any[] = [];
if (stateCode === 'AZ') {
cities = ARIZONA_CITIES;
} else {
// Dynamically fetch cities from Dutchie for any state
const cityNames = await getCitiesForState(stateCode as string);
if (cityNames.length === 0) {
return res.status(400).json({
error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`,
error: `No cities found for state: ${stateCode}`,
});
}
// Convert to seed format
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode as string,
}));
const result = await seedKnownCities(pool, cities);
res.json({
@@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router {
}
});
// ============================================================
// PROMOTION ENDPOINTS
// ============================================================
/**
* GET /api/discovery/admin/validate
* Validate discovered locations before promotion
*/
router.get('/admin/validate', async (req: Request, res: Response) => {
try {
const { stateCode } = req.query;
const summary = await validateDiscoveredLocations(stateCode as string | undefined);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote
* Promote all valid discovered locations to dispensaries (idempotent)
*
* Query params:
* - stateCode: Filter by state (e.g., 'CA', 'AZ')
* - dryRun: If true, only validate without making changes
*/
router.post('/admin/promote', async (req: Request, res: Response) => {
try {
const { stateCode, dryRun = false } = req.body;
console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
const summary = await promoteDiscoveredLocations(stateCode, dryRun);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote/:id
* Promote a single discovery location by ID
*/
router.post('/admin/promote/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
console.log(`[Discovery API] Promoting single location ${id}`);
const result = await promoteSingleLocation(parseInt(id, 10));
res.json({
success: true,
...result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// PROMOTION LOG
// ============================================================
/**
* GET /api/discovery/promotion-log
* Get promotion audit log
*/
router.get('/promotion-log', async (req: Request, res: Response) => {
try {
const { state, dispensary_id, limit = '100' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (state) {
whereClause += ` AND pl.state_code = $${paramIndex}`;
params.push(state);
paramIndex++;
}
if (dispensary_id) {
whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
params.push(parseInt(dispensary_id as string, 10));
paramIndex++;
}
params.push(parseInt(limit as string, 10));
const { rows } = await pool.query(`
SELECT
pl.*,
dl.name as discovery_name,
d.name as dispensary_name
FROM dutchie_promotion_log pl
LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
${whereClause}
ORDER BY pl.created_at DESC
LIMIT $${paramIndex}
`, params);
res.json({
logs: rows.map((r: any) => ({
id: r.id,
discoveryId: r.discovery_id,
dispensaryId: r.dispensary_id,
action: r.action,
stateCode: r.state_code,
storeName: r.store_name,
validationErrors: r.validation_errors,
fieldChanges: r.field_changes,
triggeredBy: r.triggered_by,
createdAt: r.created_at,
discoveryName: r.discovery_name,
dispensaryName: r.dispensary_name,
})),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
return router;
}

View File

@@ -60,6 +60,7 @@ export interface DiscoveryLocation {
stateCode: string | null;
postalCode: string | null;
countryCode: string | null;
country: string | null;
latitude: number | null;
longitude: number | null;
timezone: string | null;
@@ -72,6 +73,18 @@ export interface DiscoveryLocation {
offersPickup: boolean | null;
isRecreational: boolean | null;
isMedical: boolean | null;
// New Dutchie fields
phone: string | null;
website: string | null;
email: string | null;
description: string | null;
logoImage: string | null;
bannerImage: string | null;
chainSlug: string | null;
enterpriseId: string | null;
cName: string | null;
storeStatus: string | null;
// Timestamps
firstSeenAt: Date;
lastSeenAt: Date;
lastCheckedAt: Date | null;
@@ -96,6 +109,7 @@ export interface DiscoveryLocationRow {
state_code: string | null;
postal_code: string | null;
country_code: string | null;
country: string | null;
latitude: number | null;
longitude: number | null;
timezone: string | null;
@@ -108,6 +122,18 @@ export interface DiscoveryLocationRow {
offers_pickup: boolean | null;
is_recreational: boolean | null;
is_medical: boolean | null;
// New Dutchie fields (snake_case for DB row)
phone: string | null;
website: string | null;
email: string | null;
description: string | null;
logo_image: string | null;
banner_image: string | null;
chain_slug: string | null;
enterprise_id: string | null;
c_name: string | null;
store_status: string | null;
// Timestamps
first_seen_at: Date;
last_seen_at: Date;
last_checked_at: Date | null;
@@ -245,6 +271,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
stateCode: row.state_code,
postalCode: row.postal_code,
countryCode: row.country_code,
country: row.country,
latitude: row.latitude,
longitude: row.longitude,
timezone: row.timezone,
@@ -257,6 +284,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
offersPickup: row.offers_pickup,
isRecreational: row.is_recreational,
isMedical: row.is_medical,
// New Dutchie fields
phone: row.phone,
website: row.website,
email: row.email,
description: row.description,
logoImage: row.logo_image,
bannerImage: row.banner_image,
chainSlug: row.chain_slug,
enterpriseId: row.enterprise_id,
cName: row.c_name,
storeStatus: row.store_status,
// Timestamps
firstSeenAt: row.first_seen_at,
lastSeenAt: row.last_seen_at,
lastCheckedAt: row.last_checked_at,