feat: SEO template library, discovery pipeline, and orchestrator enhancements

## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Kelly
2025-12-09 00:05:34 -07:00
parent 9711d594db
commit 2f483b3084
83 changed files with 16700 additions and 1277 deletions

View File

@@ -134,10 +134,10 @@ export interface StateWithCities {
}
/**
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
* Fetch all states with their cities via direct GraphQL query
*
* This fetches a city page and extracts the statesWithDispensaries data
* which contains all states and their cities where Dutchie has dispensaries.
* Uses the getAllCitiesByState persisted query which returns all states
* and cities where Dutchie has dispensaries.
*/
export async function fetchStatesWithDispensaries(
options: { verbose?: boolean } = {}
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
// Initialize proxy if USE_PROXY=true
await initDiscoveryProxy();
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
// Using a known city that's likely to exist
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
try {
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
const result = await executeGraphQL(
'getAllCitiesByState',
{}, // No variables needed
GRAPHQL_HASHES.GetAllCitiesByState,
{ maxRetries: 3, retryOn403: true }
);
if (!result || result.status !== 200) {
console.error('[LocationDiscovery] Failed to fetch city page');
return [];
}
const nextData = extractNextData(result.html);
if (!nextData) {
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
return [];
}
// Extract statesWithDispensaries from Apollo state
const apolloState = nextData.props?.pageProps?.initialApolloState;
if (!apolloState) {
console.error('[LocationDiscovery] No initialApolloState found');
return [];
}
// Find ROOT_QUERY.statesWithDispensaries
const rootQuery = apolloState['ROOT_QUERY'];
if (!rootQuery) {
console.error('[LocationDiscovery] No ROOT_QUERY found');
return [];
}
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
const statesRefs = rootQuery.statesWithDispensaries;
if (!Array.isArray(statesRefs)) {
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
return [];
}
// Resolve the references to actual state data
const states: StateWithCities[] = [];
for (const ref of statesRefs) {
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
let stateData: any;
if (ref && ref.__ref) {
stateData = apolloState[ref.__ref];
} else {
stateData = ref;
const statesData = result?.data?.statesWithDispensaries;
if (!Array.isArray(statesData)) {
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
return [];
}
if (stateData && stateData.name) {
// Parse cities JSON array if it's a string
let cities = stateData.cities;
if (typeof cities === 'string') {
try {
cities = JSON.parse(cities);
} catch {
cities = [];
}
// Map to our StateWithCities format
const states: StateWithCities[] = [];
for (const state of statesData) {
if (state && state.name) {
// Filter out null cities
const cities = Array.isArray(state.cities)
? state.cities.filter((c: string | null) => c !== null)
: [];
states.push({
name: state.name,
country: state.country || 'US',
cities,
});
}
states.push({
name: stateData.name,
country: stateData.country || 'US',
cities: Array.isArray(cities) ? cities : [],
});
}
}
if (verbose) {
console.log(`[LocationDiscovery] Found ${states.length} states`);
for (const state of states) {
console.log(` ${state.name}: ${state.cities.length} cities`);
if (verbose) {
console.log(`[LocationDiscovery] Found ${states.length} states`);
for (const state of states) {
console.log(` ${state.name}: ${state.cities.length} cities`);
}
}
}
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
return states;
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
return states;
} catch (error: any) {
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
return [];
}
}
/**
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
/**
* Normalize a raw location response to a consistent format.
* Maps Dutchie camelCase fields to our snake_case equivalents.
*/
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
const slug = raw.slug || raw.cName || raw.urlSlug || '';
const id = raw.id || raw._id || raw.dispensaryId || '';
// Extract location data - GraphQL response nests address info in .location
const loc = raw.location || {};
// Extract coordinates from geometry.coordinates [longitude, latitude]
const coords = loc.geometry?.coordinates || [];
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
return {
id,
name: raw.name || raw.dispensaryName || '',
slug,
address: raw.address || raw.fullAddress || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
address2: raw.address2 || raw.addressLine2 || '',
city: raw.city || '',
state: raw.state || raw.stateCode || '',
zip: raw.zip || raw.zipCode || raw.postalCode || '',
country: raw.country || raw.countryCode || 'US',
latitude: raw.latitude || raw.lat || raw.location?.latitude,
longitude: raw.longitude || raw.lng || raw.location?.longitude,
cName: raw.cName || raw.slug || '',
address: raw.address || raw.fullAddress || loc.ln1 || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
city: raw.city || loc.city || '',
state: raw.state || raw.stateCode || loc.state || '',
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
country: raw.country || raw.countryCode || loc.country || 'United States',
latitude,
longitude,
timezone: raw.timezone || raw.tz || '',
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
retailType: raw.retailType || raw.type || '',
// Service offerings
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
// License types
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
// Contact info
phone: raw.phone || '',
email: raw.email || '',
website: raw.embedBackUrl || '',
// Branding
description: raw.description || '',
logoImage: raw.logoImage || '',
bannerImage: raw.bannerImage || '',
// Chain/enterprise info
chainSlug: raw.chain || '',
enterpriseId: raw.retailer?.enterpriseId || '',
// Status
status: raw.status || '',
// Preserve raw data
...raw,
};
@@ -826,15 +821,27 @@ export async function upsertLocation(
offers_pickup,
is_recreational,
is_medical,
phone,
website,
email,
description,
logo_image,
banner_image,
chain_slug,
enterprise_id,
c_name,
country,
store_status,
last_seen_at,
updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
ON CONFLICT (platform, platform_location_id)
DO UPDATE SET
name = EXCLUDED.name,
platform_menu_url = EXCLUDED.platform_menu_url,
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
@@ -846,6 +853,17 @@ export async function upsertLocation(
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
last_seen_at = NOW(),
updated_at = NOW()
RETURNING id, (xmax = 0) as is_new`,
@@ -861,7 +879,7 @@ export async function upsertLocation(
location.city || null,
location.state || null,
location.zip || null,
location.country || 'US',
location.country || 'United States',
location.latitude || null,
location.longitude || null,
location.timezone || null,
@@ -871,6 +889,17 @@ export async function upsertLocation(
location.offerPickup ?? null,
location.isRecreational ?? null,
location.isMedical ?? null,
location.phone || null,
location.website || null,
location.email || null,
location.description || null,
location.logoImage || null,
location.bannerImage || null,
location.chainSlug || null,
location.enterpriseId || null,
location.cName || null,
location.country || 'United States',
location.status || null,
]
);