feat: Add v2 architecture with multi-state support and orchestrator services
Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
686
backend/src/discovery/location-discovery.ts
Normal file
686
backend/src/discovery/location-discovery.ts
Normal file
@@ -0,0 +1,686 @@
|
||||
/**
|
||||
* Dutchie Location Discovery Service
|
||||
*
|
||||
* Discovers store locations from Dutchie city pages.
|
||||
* Each city can contain multiple dispensary locations.
|
||||
*
|
||||
* This module:
|
||||
* 1. Fetches location listings for a given city
|
||||
* 2. Upserts locations into dutchie_discovery_locations
|
||||
* 3. Does NOT create any canonical dispensary records
|
||||
*
|
||||
* Locations remain in "discovered" status until manually verified.
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import axios from 'axios';
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import type { Browser, Page, Protocol } from 'puppeteer';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import {
|
||||
DiscoveryLocation,
|
||||
DiscoveryLocationRow,
|
||||
DutchieLocationResponse,
|
||||
LocationDiscoveryResult,
|
||||
DiscoveryStatus,
|
||||
mapLocationRowToLocation,
|
||||
} from './types';
|
||||
import { DiscoveryCity } from './types';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const PLATFORM = 'dutchie';
|
||||
|
||||
// ============================================================
|
||||
// GRAPHQL / API FETCHING
|
||||
// ============================================================
|
||||
|
||||
interface SessionCredentials {
|
||||
cookies: string;
|
||||
userAgent: string;
|
||||
browser: Browser;
|
||||
page: Page;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a browser session for fetching location data.
|
||||
*/
|
||||
async function createSession(citySlug: string): Promise<SessionCredentials> {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
const userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
await page.setUserAgent(userAgent);
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'webdriver', { get: () => false });
|
||||
(window as any).chrome = { runtime: {} };
|
||||
});
|
||||
|
||||
// Navigate to a dispensaries page to get cookies
|
||||
const url = `https://dutchie.com/dispensaries/az/${citySlug}`;
|
||||
console.log(`[LocationDiscovery] Loading ${url} to establish session...`);
|
||||
|
||||
try {
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
} catch (error: any) {
|
||||
console.warn(`[LocationDiscovery] Navigation warning: ${error.message}`);
|
||||
}
|
||||
|
||||
const cookies = await page.cookies();
|
||||
const cookieString = cookies.map((c: Protocol.Network.Cookie) => `${c.name}=${c.value}`).join('; ');
|
||||
|
||||
return { cookies: cookieString, userAgent, browser, page };
|
||||
}
|
||||
|
||||
async function closeSession(session: SessionCredentials): Promise<void> {
|
||||
await session.browser.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch locations for a city using Dutchie's internal search API.
|
||||
*/
|
||||
export async function fetchLocationsForCity(
|
||||
city: DiscoveryCity,
|
||||
options: {
|
||||
session?: SessionCredentials;
|
||||
verbose?: boolean;
|
||||
} = {}
|
||||
): Promise<DutchieLocationResponse[]> {
|
||||
const { verbose = false } = options;
|
||||
let session = options.session;
|
||||
let shouldCloseSession = false;
|
||||
|
||||
if (!session) {
|
||||
session = await createSession(city.citySlug);
|
||||
shouldCloseSession = true;
|
||||
}
|
||||
|
||||
try {
|
||||
console.log(`[LocationDiscovery] Fetching locations for ${city.cityName}, ${city.stateCode}...`);
|
||||
|
||||
// Try multiple approaches to get location data
|
||||
|
||||
// Approach 1: Extract from page __NEXT_DATA__ or similar
|
||||
const locations = await extractLocationsFromPage(session.page, verbose);
|
||||
if (locations.length > 0) {
|
||||
console.log(`[LocationDiscovery] Found ${locations.length} locations from page data`);
|
||||
return locations;
|
||||
}
|
||||
|
||||
// Approach 2: Try the geo-based GraphQL query
|
||||
const geoLocations = await fetchLocationsViaGraphQL(session, city, verbose);
|
||||
if (geoLocations.length > 0) {
|
||||
console.log(`[LocationDiscovery] Found ${geoLocations.length} locations from GraphQL`);
|
||||
return geoLocations;
|
||||
}
|
||||
|
||||
// Approach 3: Scrape visible location cards
|
||||
const scrapedLocations = await scrapeLocationCards(session.page, verbose);
|
||||
if (scrapedLocations.length > 0) {
|
||||
console.log(`[LocationDiscovery] Found ${scrapedLocations.length} locations from scraping`);
|
||||
return scrapedLocations;
|
||||
}
|
||||
|
||||
console.log(`[LocationDiscovery] No locations found for ${city.cityName}`);
|
||||
return [];
|
||||
} finally {
|
||||
if (shouldCloseSession) {
|
||||
await closeSession(session);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract locations from page's embedded data (__NEXT_DATA__, window.*, etc.)
|
||||
*/
|
||||
async function extractLocationsFromPage(
|
||||
page: Page,
|
||||
verbose: boolean
|
||||
): Promise<DutchieLocationResponse[]> {
|
||||
try {
|
||||
const data = await page.evaluate(() => {
|
||||
// Try __NEXT_DATA__
|
||||
const nextDataEl = document.querySelector('#__NEXT_DATA__');
|
||||
if (nextDataEl?.textContent) {
|
||||
try {
|
||||
const nextData = JSON.parse(nextDataEl.textContent);
|
||||
// Look for dispensaries in various paths
|
||||
const dispensaries =
|
||||
nextData?.props?.pageProps?.dispensaries ||
|
||||
nextData?.props?.pageProps?.initialDispensaries ||
|
||||
nextData?.props?.pageProps?.data?.dispensaries ||
|
||||
[];
|
||||
if (Array.isArray(dispensaries) && dispensaries.length > 0) {
|
||||
return { source: '__NEXT_DATA__', dispensaries };
|
||||
}
|
||||
} catch {
|
||||
// Ignore parse errors
|
||||
}
|
||||
}
|
||||
|
||||
// Try window variables
|
||||
const win = window as any;
|
||||
if (win.__APOLLO_STATE__) {
|
||||
// Extract from Apollo cache
|
||||
const entries = Object.entries(win.__APOLLO_STATE__).filter(
|
||||
([key]) => key.startsWith('Dispensary:')
|
||||
);
|
||||
if (entries.length > 0) {
|
||||
return { source: 'APOLLO_STATE', dispensaries: entries.map(([, v]) => v) };
|
||||
}
|
||||
}
|
||||
|
||||
return { source: 'none', dispensaries: [] };
|
||||
});
|
||||
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] Page data source: ${data.source}, count: ${data.dispensaries.length}`);
|
||||
}
|
||||
|
||||
return data.dispensaries.map((d: any) => normalizeLocationResponse(d));
|
||||
} catch (error: any) {
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] Could not extract from page data: ${error.message}`);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch locations via GraphQL geo-based query.
|
||||
*/
|
||||
async function fetchLocationsViaGraphQL(
|
||||
session: SessionCredentials,
|
||||
city: DiscoveryCity,
|
||||
verbose: boolean
|
||||
): Promise<DutchieLocationResponse[]> {
|
||||
// Use a known center point for the city or default to a central US location
|
||||
const CITY_COORDS: Record<string, { lat: number; lng: number }> = {
|
||||
'phoenix': { lat: 33.4484, lng: -112.074 },
|
||||
'tucson': { lat: 32.2226, lng: -110.9747 },
|
||||
'scottsdale': { lat: 33.4942, lng: -111.9261 },
|
||||
'mesa': { lat: 33.4152, lng: -111.8315 },
|
||||
'tempe': { lat: 33.4255, lng: -111.94 },
|
||||
'flagstaff': { lat: 35.1983, lng: -111.6513 },
|
||||
// Add more as needed
|
||||
};
|
||||
|
||||
const coords = CITY_COORDS[city.citySlug] || { lat: 33.4484, lng: -112.074 };
|
||||
|
||||
const variables = {
|
||||
dispensariesFilter: {
|
||||
latitude: coords.lat,
|
||||
longitude: coords.lng,
|
||||
distance: 50, // miles
|
||||
state: city.stateCode,
|
||||
city: city.cityName,
|
||||
},
|
||||
};
|
||||
|
||||
const hash = '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b';
|
||||
|
||||
try {
|
||||
const response = await axios.post(
|
||||
'https://dutchie.com/api-3/graphql',
|
||||
{
|
||||
operationName: 'ConsumerDispensaries',
|
||||
variables,
|
||||
extensions: {
|
||||
persistedQuery: { version: 1, sha256Hash: hash },
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://dutchie.com',
|
||||
'referer': `https://dutchie.com/dispensaries/${city.stateCode?.toLowerCase()}/${city.citySlug}`,
|
||||
'user-agent': session.userAgent,
|
||||
'cookie': session.cookies,
|
||||
},
|
||||
timeout: 30000,
|
||||
validateStatus: () => true,
|
||||
}
|
||||
);
|
||||
|
||||
if (response.status !== 200) {
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] GraphQL returned ${response.status}`);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
const dispensaries = response.data?.data?.consumerDispensaries || [];
|
||||
return dispensaries.map((d: any) => normalizeLocationResponse(d));
|
||||
} catch (error: any) {
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] GraphQL error: ${error.message}`);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrape location cards from the visible page.
|
||||
*/
|
||||
async function scrapeLocationCards(
|
||||
page: Page,
|
||||
verbose: boolean
|
||||
): Promise<DutchieLocationResponse[]> {
|
||||
try {
|
||||
const locations = await page.evaluate(() => {
|
||||
const cards: any[] = [];
|
||||
|
||||
// Look for common dispensary card patterns
|
||||
const selectors = [
|
||||
'[data-testid="dispensary-card"]',
|
||||
'.dispensary-card',
|
||||
'a[href*="/dispensary/"]',
|
||||
'[class*="DispensaryCard"]',
|
||||
];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const elements = document.querySelectorAll(selector);
|
||||
if (elements.length > 0) {
|
||||
elements.forEach((el) => {
|
||||
const link = el.querySelector('a')?.href || (el as HTMLAnchorElement).href || '';
|
||||
const name = el.querySelector('h2, h3, [class*="name"]')?.textContent?.trim() || '';
|
||||
const address = el.querySelector('[class*="address"], address')?.textContent?.trim() || '';
|
||||
|
||||
// Extract slug from URL
|
||||
const slugMatch = link.match(/\/dispensary\/([^/?]+)/);
|
||||
const slug = slugMatch ? slugMatch[1] : '';
|
||||
|
||||
if (slug && name) {
|
||||
cards.push({
|
||||
slug,
|
||||
name,
|
||||
address,
|
||||
menuUrl: link,
|
||||
});
|
||||
}
|
||||
});
|
||||
break; // Stop after first successful selector
|
||||
}
|
||||
}
|
||||
|
||||
return cards;
|
||||
});
|
||||
|
||||
return locations.map((d: any) => ({
|
||||
id: '',
|
||||
name: d.name,
|
||||
slug: d.slug,
|
||||
address: d.address,
|
||||
menuUrl: d.menuUrl,
|
||||
}));
|
||||
} catch (error: any) {
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery] Scraping error: ${error.message}`);
|
||||
}
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a raw location response to a consistent format.
|
||||
*/
|
||||
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
||||
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
||||
const id = raw.id || raw._id || raw.dispensaryId || '';
|
||||
|
||||
return {
|
||||
id,
|
||||
name: raw.name || raw.dispensaryName || '',
|
||||
slug,
|
||||
address: raw.address || raw.fullAddress || '',
|
||||
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
|
||||
address2: raw.address2 || raw.addressLine2 || '',
|
||||
city: raw.city || '',
|
||||
state: raw.state || raw.stateCode || '',
|
||||
zip: raw.zip || raw.zipCode || raw.postalCode || '',
|
||||
country: raw.country || raw.countryCode || 'US',
|
||||
latitude: raw.latitude || raw.lat || raw.location?.latitude,
|
||||
longitude: raw.longitude || raw.lng || raw.location?.longitude,
|
||||
timezone: raw.timezone || raw.tz || '',
|
||||
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
||||
retailType: raw.retailType || raw.type || '',
|
||||
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
||||
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
||||
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
|
||||
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
|
||||
// Preserve raw data
|
||||
...raw,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// DATABASE OPERATIONS
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Upsert a location into dutchie_discovery_locations.
|
||||
*/
|
||||
export async function upsertLocation(
|
||||
pool: Pool,
|
||||
location: DutchieLocationResponse,
|
||||
cityId: number | null
|
||||
): Promise<{ id: number; isNew: boolean }> {
|
||||
const platformLocationId = location.id || location.slug;
|
||||
const menuUrl = location.menuUrl || `https://dutchie.com/dispensary/${location.slug}`;
|
||||
|
||||
const result = await pool.query(
|
||||
`INSERT INTO dutchie_discovery_locations (
|
||||
platform,
|
||||
platform_location_id,
|
||||
platform_slug,
|
||||
platform_menu_url,
|
||||
name,
|
||||
raw_address,
|
||||
address_line1,
|
||||
address_line2,
|
||||
city,
|
||||
state_code,
|
||||
postal_code,
|
||||
country_code,
|
||||
latitude,
|
||||
longitude,
|
||||
timezone,
|
||||
discovery_city_id,
|
||||
metadata,
|
||||
offers_delivery,
|
||||
offers_pickup,
|
||||
is_recreational,
|
||||
is_medical,
|
||||
last_seen_at,
|
||||
updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
|
||||
ON CONFLICT (platform, platform_location_id)
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
platform_menu_url = EXCLUDED.platform_menu_url,
|
||||
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
||||
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
||||
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
||||
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
||||
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
||||
latitude = COALESCE(EXCLUDED.latitude, dutchie_discovery_locations.latitude),
|
||||
longitude = COALESCE(EXCLUDED.longitude, dutchie_discovery_locations.longitude),
|
||||
timezone = COALESCE(EXCLUDED.timezone, dutchie_discovery_locations.timezone),
|
||||
metadata = EXCLUDED.metadata,
|
||||
offers_delivery = COALESCE(EXCLUDED.offers_delivery, dutchie_discovery_locations.offers_delivery),
|
||||
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
||||
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
||||
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
||||
last_seen_at = NOW(),
|
||||
updated_at = NOW()
|
||||
RETURNING id, (xmax = 0) as is_new`,
|
||||
[
|
||||
PLATFORM,
|
||||
platformLocationId,
|
||||
location.slug,
|
||||
menuUrl,
|
||||
location.name,
|
||||
location.address || null,
|
||||
location.address1 || null,
|
||||
location.address2 || null,
|
||||
location.city || null,
|
||||
location.state || null,
|
||||
location.zip || null,
|
||||
location.country || 'US',
|
||||
location.latitude || null,
|
||||
location.longitude || null,
|
||||
location.timezone || null,
|
||||
cityId,
|
||||
JSON.stringify(location),
|
||||
location.offerDelivery ?? null,
|
||||
location.offerPickup ?? null,
|
||||
location.isRecreational ?? null,
|
||||
location.isMedical ?? null,
|
||||
]
|
||||
);
|
||||
|
||||
return {
|
||||
id: result.rows[0].id,
|
||||
isNew: result.rows[0].is_new,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get locations by status.
|
||||
*/
|
||||
export async function getLocationsByStatus(
|
||||
pool: Pool,
|
||||
status: DiscoveryStatus,
|
||||
options: {
|
||||
stateCode?: string;
|
||||
countryCode?: string;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
} = {}
|
||||
): Promise<DiscoveryLocation[]> {
|
||||
const { stateCode, countryCode, limit = 100, offset = 0 } = options;
|
||||
|
||||
let query = `
|
||||
SELECT * FROM dutchie_discovery_locations
|
||||
WHERE status = $1 AND active = TRUE
|
||||
`;
|
||||
const params: any[] = [status];
|
||||
let paramIdx = 2;
|
||||
|
||||
if (stateCode) {
|
||||
query += ` AND state_code = $${paramIdx}`;
|
||||
params.push(stateCode);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
if (countryCode) {
|
||||
query += ` AND country_code = $${paramIdx}`;
|
||||
params.push(countryCode);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
query += ` ORDER BY first_seen_at DESC LIMIT $${paramIdx} OFFSET $${paramIdx + 1}`;
|
||||
params.push(limit, offset);
|
||||
|
||||
const result = await pool.query<DiscoveryLocationRow>(query, params);
|
||||
return result.rows.map(mapLocationRowToLocation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a location by ID.
|
||||
*/
|
||||
export async function getLocationById(
|
||||
pool: Pool,
|
||||
id: number
|
||||
): Promise<DiscoveryLocation | null> {
|
||||
const result = await pool.query<DiscoveryLocationRow>(
|
||||
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
|
||||
[id]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return mapLocationRowToLocation(result.rows[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update location status.
|
||||
*/
|
||||
export async function updateLocationStatus(
|
||||
pool: Pool,
|
||||
locationId: number,
|
||||
status: DiscoveryStatus,
|
||||
options: {
|
||||
dispensaryId?: number;
|
||||
verifiedBy?: string;
|
||||
notes?: string;
|
||||
} = {}
|
||||
): Promise<void> {
|
||||
const { dispensaryId, verifiedBy, notes } = options;
|
||||
|
||||
await pool.query(
|
||||
`UPDATE dutchie_discovery_locations
|
||||
SET status = $2,
|
||||
dispensary_id = COALESCE($3, dispensary_id),
|
||||
verified_at = CASE WHEN $2 IN ('verified', 'merged') THEN NOW() ELSE verified_at END,
|
||||
verified_by = COALESCE($4, verified_by),
|
||||
notes = COALESCE($5, notes),
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[locationId, status, dispensaryId || null, verifiedBy || null, notes || null]
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search locations by name or address.
|
||||
*/
|
||||
export async function searchLocations(
|
||||
pool: Pool,
|
||||
query: string,
|
||||
options: {
|
||||
status?: DiscoveryStatus;
|
||||
stateCode?: string;
|
||||
limit?: number;
|
||||
} = {}
|
||||
): Promise<DiscoveryLocation[]> {
|
||||
const { status, stateCode, limit = 50 } = options;
|
||||
const searchPattern = `%${query}%`;
|
||||
|
||||
let sql = `
|
||||
SELECT * FROM dutchie_discovery_locations
|
||||
WHERE active = TRUE
|
||||
AND (name ILIKE $1 OR city ILIKE $1 OR raw_address ILIKE $1 OR platform_slug ILIKE $1)
|
||||
`;
|
||||
const params: any[] = [searchPattern];
|
||||
let paramIdx = 2;
|
||||
|
||||
if (status) {
|
||||
sql += ` AND status = $${paramIdx}`;
|
||||
params.push(status);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
if (stateCode) {
|
||||
sql += ` AND state_code = $${paramIdx}`;
|
||||
params.push(stateCode);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
sql += ` ORDER BY name LIMIT $${paramIdx}`;
|
||||
params.push(limit);
|
||||
|
||||
const result = await pool.query<DiscoveryLocationRow>(sql, params);
|
||||
return result.rows.map(mapLocationRowToLocation);
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// MAIN DISCOVERY FUNCTION
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Discover locations for a specific city.
|
||||
*/
|
||||
export async function discoverLocationsForCity(
|
||||
pool: Pool,
|
||||
city: DiscoveryCity,
|
||||
options: {
|
||||
dryRun?: boolean;
|
||||
verbose?: boolean;
|
||||
} = {}
|
||||
): Promise<LocationDiscoveryResult> {
|
||||
const startTime = Date.now();
|
||||
const { dryRun = false, verbose = false } = options;
|
||||
const errors: string[] = [];
|
||||
|
||||
console.log(`[LocationDiscovery] Discovering locations for ${city.cityName}, ${city.stateCode}...`);
|
||||
console.log(`[LocationDiscovery] Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||
|
||||
const locations = await fetchLocationsForCity(city, { verbose });
|
||||
|
||||
if (locations.length === 0) {
|
||||
console.log(`[LocationDiscovery] No locations found for ${city.cityName}`);
|
||||
return {
|
||||
cityId: city.id,
|
||||
citySlug: city.citySlug,
|
||||
locationsFound: 0,
|
||||
locationsUpserted: 0,
|
||||
locationsNew: 0,
|
||||
locationsUpdated: 0,
|
||||
errors: [],
|
||||
durationMs: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
|
||||
let newCount = 0;
|
||||
let updatedCount = 0;
|
||||
|
||||
for (const location of locations) {
|
||||
try {
|
||||
if (dryRun) {
|
||||
if (verbose) {
|
||||
console.log(`[LocationDiscovery][DryRun] Would upsert: ${location.name} (${location.slug})`);
|
||||
}
|
||||
newCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const result = await upsertLocation(pool, location, city.id);
|
||||
|
||||
if (result.isNew) {
|
||||
newCount++;
|
||||
} else {
|
||||
updatedCount++;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
const action = result.isNew ? 'Created' : 'Updated';
|
||||
console.log(`[LocationDiscovery] ${action}: ${location.name} -> ID ${result.id}`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
errors.push(`Location ${location.slug}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Update city crawl status
|
||||
if (!dryRun) {
|
||||
await pool.query(
|
||||
`UPDATE dutchie_discovery_cities
|
||||
SET last_crawled_at = NOW(),
|
||||
location_count = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[city.id, locations.length]
|
||||
);
|
||||
}
|
||||
|
||||
const durationMs = Date.now() - startTime;
|
||||
|
||||
console.log(`[LocationDiscovery] Complete for ${city.cityName}: ${newCount} new, ${updatedCount} updated, ${errors.length} errors in ${durationMs}ms`);
|
||||
|
||||
return {
|
||||
cityId: city.id,
|
||||
citySlug: city.citySlug,
|
||||
locationsFound: locations.length,
|
||||
locationsUpserted: newCount + updatedCount,
|
||||
locationsNew: newCount,
|
||||
locationsUpdated: updatedCount,
|
||||
errors,
|
||||
durationMs,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user