feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library - Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration) - Add Template Library tab in SEO Orchestrator with accordion-based editors - Add template preview, validation, and variable injection engine - Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate ## Discovery Pipeline - Add promotion.ts for discovery location validation and promotion - Add discover-all-states.ts script for multi-state discovery - Add promotion log migration (067) - Enhance discovery routes and types ## Orchestrator & Admin - Add crawl_enabled filter to stores page - Add API permissions page - Add job queue management - Add price analytics routes - Add markets and intelligence routes - Enhance dashboard and worker monitoring ## Infrastructure - Add migrations for worker definitions, SEO settings, field alignment - Add canonical pipeline for scraper v2 - Update hydration and sync orchestrator - Enhance multi-state query service 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -134,10 +134,10 @@ export interface StateWithCities {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
|
||||
* Fetch all states with their cities via direct GraphQL query
|
||||
*
|
||||
* This fetches a city page and extracts the statesWithDispensaries data
|
||||
* which contains all states and their cities where Dutchie has dispensaries.
|
||||
* Uses the getAllCitiesByState persisted query which returns all states
|
||||
* and cities where Dutchie has dispensaries.
|
||||
*/
|
||||
export async function fetchStatesWithDispensaries(
|
||||
options: { verbose?: boolean } = {}
|
||||
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
|
||||
// Initialize proxy if USE_PROXY=true
|
||||
await initDiscoveryProxy();
|
||||
|
||||
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
|
||||
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
|
||||
|
||||
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
|
||||
// Using a known city that's likely to exist
|
||||
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
|
||||
try {
|
||||
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
|
||||
const result = await executeGraphQL(
|
||||
'getAllCitiesByState',
|
||||
{}, // No variables needed
|
||||
GRAPHQL_HASHES.GetAllCitiesByState,
|
||||
{ maxRetries: 3, retryOn403: true }
|
||||
);
|
||||
|
||||
if (!result || result.status !== 200) {
|
||||
console.error('[LocationDiscovery] Failed to fetch city page');
|
||||
return [];
|
||||
}
|
||||
|
||||
const nextData = extractNextData(result.html);
|
||||
if (!nextData) {
|
||||
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Extract statesWithDispensaries from Apollo state
|
||||
const apolloState = nextData.props?.pageProps?.initialApolloState;
|
||||
if (!apolloState) {
|
||||
console.error('[LocationDiscovery] No initialApolloState found');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Find ROOT_QUERY.statesWithDispensaries
|
||||
const rootQuery = apolloState['ROOT_QUERY'];
|
||||
if (!rootQuery) {
|
||||
console.error('[LocationDiscovery] No ROOT_QUERY found');
|
||||
return [];
|
||||
}
|
||||
|
||||
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
|
||||
const statesRefs = rootQuery.statesWithDispensaries;
|
||||
if (!Array.isArray(statesRefs)) {
|
||||
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Resolve the references to actual state data
|
||||
const states: StateWithCities[] = [];
|
||||
for (const ref of statesRefs) {
|
||||
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
|
||||
let stateData: any;
|
||||
|
||||
if (ref && ref.__ref) {
|
||||
stateData = apolloState[ref.__ref];
|
||||
} else {
|
||||
stateData = ref;
|
||||
const statesData = result?.data?.statesWithDispensaries;
|
||||
if (!Array.isArray(statesData)) {
|
||||
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (stateData && stateData.name) {
|
||||
// Parse cities JSON array if it's a string
|
||||
let cities = stateData.cities;
|
||||
if (typeof cities === 'string') {
|
||||
try {
|
||||
cities = JSON.parse(cities);
|
||||
} catch {
|
||||
cities = [];
|
||||
}
|
||||
// Map to our StateWithCities format
|
||||
const states: StateWithCities[] = [];
|
||||
for (const state of statesData) {
|
||||
if (state && state.name) {
|
||||
// Filter out null cities
|
||||
const cities = Array.isArray(state.cities)
|
||||
? state.cities.filter((c: string | null) => c !== null)
|
||||
: [];
|
||||
|
||||
states.push({
|
||||
name: state.name,
|
||||
country: state.country || 'US',
|
||||
cities,
|
||||
});
|
||||
}
|
||||
|
||||
states.push({
|
||||
name: stateData.name,
|
||||
country: stateData.country || 'US',
|
||||
cities: Array.isArray(cities) ? cities : [],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
||||
for (const state of states) {
|
||||
console.log(` ${state.name}: ${state.cities.length} cities`);
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
||||
for (const state of states) {
|
||||
console.log(` ${state.name}: ${state.cities.length} cities`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
||||
return states;
|
||||
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
||||
return states;
|
||||
} catch (error: any) {
|
||||
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
|
||||
|
||||
/**
|
||||
* Normalize a raw location response to a consistent format.
|
||||
* Maps Dutchie camelCase fields to our snake_case equivalents.
|
||||
*/
|
||||
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
||||
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
||||
const id = raw.id || raw._id || raw.dispensaryId || '';
|
||||
|
||||
// Extract location data - GraphQL response nests address info in .location
|
||||
const loc = raw.location || {};
|
||||
|
||||
// Extract coordinates from geometry.coordinates [longitude, latitude]
|
||||
const coords = loc.geometry?.coordinates || [];
|
||||
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
|
||||
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
|
||||
|
||||
return {
|
||||
id,
|
||||
name: raw.name || raw.dispensaryName || '',
|
||||
slug,
|
||||
address: raw.address || raw.fullAddress || '',
|
||||
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
|
||||
address2: raw.address2 || raw.addressLine2 || '',
|
||||
city: raw.city || '',
|
||||
state: raw.state || raw.stateCode || '',
|
||||
zip: raw.zip || raw.zipCode || raw.postalCode || '',
|
||||
country: raw.country || raw.countryCode || 'US',
|
||||
latitude: raw.latitude || raw.lat || raw.location?.latitude,
|
||||
longitude: raw.longitude || raw.lng || raw.location?.longitude,
|
||||
cName: raw.cName || raw.slug || '',
|
||||
address: raw.address || raw.fullAddress || loc.ln1 || '',
|
||||
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
|
||||
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
|
||||
city: raw.city || loc.city || '',
|
||||
state: raw.state || raw.stateCode || loc.state || '',
|
||||
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
|
||||
country: raw.country || raw.countryCode || loc.country || 'United States',
|
||||
latitude,
|
||||
longitude,
|
||||
timezone: raw.timezone || raw.tz || '',
|
||||
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
||||
retailType: raw.retailType || raw.type || '',
|
||||
// Service offerings
|
||||
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
||||
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
||||
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
|
||||
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
|
||||
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
|
||||
// License types
|
||||
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
|
||||
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
|
||||
// Contact info
|
||||
phone: raw.phone || '',
|
||||
email: raw.email || '',
|
||||
website: raw.embedBackUrl || '',
|
||||
// Branding
|
||||
description: raw.description || '',
|
||||
logoImage: raw.logoImage || '',
|
||||
bannerImage: raw.bannerImage || '',
|
||||
// Chain/enterprise info
|
||||
chainSlug: raw.chain || '',
|
||||
enterpriseId: raw.retailer?.enterpriseId || '',
|
||||
// Status
|
||||
status: raw.status || '',
|
||||
// Preserve raw data
|
||||
...raw,
|
||||
};
|
||||
@@ -826,15 +821,27 @@ export async function upsertLocation(
|
||||
offers_pickup,
|
||||
is_recreational,
|
||||
is_medical,
|
||||
phone,
|
||||
website,
|
||||
email,
|
||||
description,
|
||||
logo_image,
|
||||
banner_image,
|
||||
chain_slug,
|
||||
enterprise_id,
|
||||
c_name,
|
||||
country,
|
||||
store_status,
|
||||
last_seen_at,
|
||||
updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
|
||||
ON CONFLICT (platform, platform_location_id)
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
platform_menu_url = EXCLUDED.platform_menu_url,
|
||||
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
||||
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
||||
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
|
||||
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
||||
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
||||
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
||||
@@ -846,6 +853,17 @@ export async function upsertLocation(
|
||||
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
||||
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
||||
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
||||
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
|
||||
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
|
||||
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
|
||||
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
|
||||
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
|
||||
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
|
||||
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
|
||||
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
|
||||
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
|
||||
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
|
||||
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING id, (xmax = 0) as is_new`,
|
||||
@@ -861,7 +879,7 @@ export async function upsertLocation(
|
||||
location.city || null,
|
||||
location.state || null,
|
||||
location.zip || null,
|
||||
location.country || 'US',
|
||||
location.country || 'United States',
|
||||
location.latitude || null,
|
||||
location.longitude || null,
|
||||
location.timezone || null,
|
||||
@@ -871,6 +889,17 @@ export async function upsertLocation(
|
||||
location.offerPickup ?? null,
|
||||
location.isRecreational ?? null,
|
||||
location.isMedical ?? null,
|
||||
location.phone || null,
|
||||
location.website || null,
|
||||
location.email || null,
|
||||
location.description || null,
|
||||
location.logoImage || null,
|
||||
location.bannerImage || null,
|
||||
location.chainSlug || null,
|
||||
location.enterpriseId || null,
|
||||
location.cName || null,
|
||||
location.country || 'United States',
|
||||
location.status || null,
|
||||
]
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user