Major additions: - Multi-state expansion: states table, StateSelector, NationalDashboard, StateHeatmap, CrossStateCompare - Orchestrator services: trace service, error taxonomy, retry manager, proxy rotator - Discovery system: dutchie discovery service, geo validation, city seeding scripts - Analytics infrastructure: analytics v2 routes, brand/pricing/stores intelligence pages - Local development: setup-local.sh starts all 5 services (postgres, backend, cannaiq, findadispo, findagram) - Migrations 037-056: crawler profiles, states, analytics indexes, worker metadata Frontend pages added: - Discovery, ChainsDashboard, IntelligenceBrands, IntelligencePricing, IntelligenceStores - StateHeatmap, CrossStateCompare, SyncInfoPanel Components added: - StateSelector, OrchestratorTraceModal, WorkflowStepper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1250 lines
41 KiB
TypeScript
1250 lines
41 KiB
TypeScript
/**
|
|
* DtLocationDiscoveryService
|
|
*
|
|
* Core service for Dutchie location discovery.
|
|
* Contains shared logic used by multiple entrypoints.
|
|
*
|
|
* Responsibilities:
|
|
* - Fetch locations from city pages
|
|
* - Extract geo coordinates when available
|
|
* - Upsert locations to dutchie_discovery_locations
|
|
* - DO NOT overwrite protected statuses or existing lat/lng
|
|
*/
|
|
|
|
import { Pool } from 'pg';
|
|
import puppeteer from 'puppeteer-extra';
|
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
|
|
puppeteer.use(StealthPlugin());
|
|
|
|
// ============================================================
|
|
// TYPES
|
|
// ============================================================
|
|
|
|
export interface DiscoveryCity {
|
|
id: number;
|
|
platform: string;
|
|
cityName: string;
|
|
citySlug: string;
|
|
stateCode: string | null;
|
|
countryCode: string;
|
|
crawlEnabled: boolean;
|
|
}
|
|
|
|
export interface DutchieLocation {
|
|
platformLocationId: string;
|
|
platformSlug: string;
|
|
platformMenuUrl: string;
|
|
name: string;
|
|
rawAddress: string | null;
|
|
addressLine1: string | null;
|
|
addressLine2: string | null;
|
|
city: string | null;
|
|
stateCode: string | null;
|
|
postalCode: string | null;
|
|
countryCode: string | null;
|
|
latitude: number | null;
|
|
longitude: number | null;
|
|
timezone: string | null;
|
|
offersDelivery: boolean | null;
|
|
offersPickup: boolean | null;
|
|
isRecreational: boolean | null;
|
|
isMedical: boolean | null;
|
|
metadata: Record<string, any>;
|
|
}
|
|
|
|
export interface LocationDiscoveryResult {
|
|
cityId: number;
|
|
citySlug: string;
|
|
locationsFound: number;
|
|
locationsInserted: number;
|
|
locationsUpdated: number;
|
|
locationsSkipped: number;
|
|
reportedStoreCount: number | null;
|
|
errors: string[];
|
|
durationMs: number;
|
|
}
|
|
|
|
interface FetchResult {
|
|
locations: DutchieLocation[];
|
|
reportedStoreCount: number | null;
|
|
}
|
|
|
|
export interface BatchDiscoveryResult {
|
|
totalCities: number;
|
|
totalLocationsFound: number;
|
|
totalInserted: number;
|
|
totalUpdated: number;
|
|
totalSkipped: number;
|
|
errors: string[];
|
|
durationMs: number;
|
|
}
|
|
|
|
// ============================================================
|
|
// COORDINATE EXTRACTION HELPERS
|
|
// ============================================================
|
|
|
|
/**
|
|
* Extract latitude from various payload formats
|
|
*/
|
|
function extractLatitude(data: any): number | null {
|
|
// Direct lat/latitude fields
|
|
if (typeof data.lat === 'number') return data.lat;
|
|
if (typeof data.latitude === 'number') return data.latitude;
|
|
|
|
// Nested in location object
|
|
if (data.location) {
|
|
if (typeof data.location.lat === 'number') return data.location.lat;
|
|
if (typeof data.location.latitude === 'number') return data.location.latitude;
|
|
}
|
|
|
|
// Nested in coordinates object
|
|
if (data.coordinates) {
|
|
if (typeof data.coordinates.lat === 'number') return data.coordinates.lat;
|
|
if (typeof data.coordinates.latitude === 'number') return data.coordinates.latitude;
|
|
// GeoJSON format [lng, lat]
|
|
if (Array.isArray(data.coordinates) && data.coordinates.length >= 2) {
|
|
return data.coordinates[1];
|
|
}
|
|
}
|
|
|
|
// Geometry object (GeoJSON)
|
|
if (data.geometry?.coordinates && Array.isArray(data.geometry.coordinates)) {
|
|
return data.geometry.coordinates[1];
|
|
}
|
|
|
|
// Nested in address
|
|
if (data.address) {
|
|
if (typeof data.address.lat === 'number') return data.address.lat;
|
|
if (typeof data.address.latitude === 'number') return data.address.latitude;
|
|
}
|
|
|
|
// geo object
|
|
if (data.geo) {
|
|
if (typeof data.geo.lat === 'number') return data.geo.lat;
|
|
if (typeof data.geo.latitude === 'number') return data.geo.latitude;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Extract longitude from various payload formats
|
|
*/
|
|
function extractLongitude(data: any): number | null {
|
|
// Direct lng/longitude fields
|
|
if (typeof data.lng === 'number') return data.lng;
|
|
if (typeof data.lon === 'number') return data.lon;
|
|
if (typeof data.longitude === 'number') return data.longitude;
|
|
|
|
// Nested in location object
|
|
if (data.location) {
|
|
if (typeof data.location.lng === 'number') return data.location.lng;
|
|
if (typeof data.location.lon === 'number') return data.location.lon;
|
|
if (typeof data.location.longitude === 'number') return data.location.longitude;
|
|
}
|
|
|
|
// Nested in coordinates object
|
|
if (data.coordinates) {
|
|
if (typeof data.coordinates.lng === 'number') return data.coordinates.lng;
|
|
if (typeof data.coordinates.lon === 'number') return data.coordinates.lon;
|
|
if (typeof data.coordinates.longitude === 'number') return data.coordinates.longitude;
|
|
// GeoJSON format [lng, lat]
|
|
if (Array.isArray(data.coordinates) && data.coordinates.length >= 2) {
|
|
return data.coordinates[0];
|
|
}
|
|
}
|
|
|
|
// Geometry object (GeoJSON)
|
|
if (data.geometry?.coordinates && Array.isArray(data.geometry.coordinates)) {
|
|
return data.geometry.coordinates[0];
|
|
}
|
|
|
|
// Nested in address
|
|
if (data.address) {
|
|
if (typeof data.address.lng === 'number') return data.address.lng;
|
|
if (typeof data.address.lon === 'number') return data.address.lon;
|
|
if (typeof data.address.longitude === 'number') return data.address.longitude;
|
|
}
|
|
|
|
// geo object
|
|
if (data.geo) {
|
|
if (typeof data.geo.lng === 'number') return data.geo.lng;
|
|
if (typeof data.geo.lon === 'number') return data.geo.lon;
|
|
if (typeof data.geo.longitude === 'number') return data.geo.longitude;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
// ============================================================
|
|
// LOCATION FETCHING
|
|
// ============================================================
|
|
|
|
/**
|
|
* Parse dispensary data from Dutchie's API/JSON response with coordinate extraction
|
|
*/
|
|
function parseDispensaryData(d: any, city: DiscoveryCity): DutchieLocation {
|
|
const id = d.id || d._id || d.dispensaryId || '';
|
|
const slug = d.slug || d.cName || d.name?.toLowerCase().replace(/\s+/g, '-') || '';
|
|
|
|
// Build menu URL
|
|
let menuUrl = `https://dutchie.com/dispensary/${slug}`;
|
|
if (d.menuUrl) {
|
|
menuUrl = d.menuUrl;
|
|
} else if (d.embeddedMenuUrl) {
|
|
menuUrl = d.embeddedMenuUrl;
|
|
}
|
|
|
|
// Parse address
|
|
const address = d.address || d.location?.address || {};
|
|
const rawAddress = [
|
|
address.line1 || address.street1 || d.address1,
|
|
address.line2 || address.street2 || d.address2,
|
|
[
|
|
address.city || d.city,
|
|
address.state || address.stateCode || d.state,
|
|
address.zip || address.zipCode || address.postalCode || d.zip,
|
|
]
|
|
.filter(Boolean)
|
|
.join(' '),
|
|
]
|
|
.filter(Boolean)
|
|
.join(', ');
|
|
|
|
// Extract coordinates from various possible locations in the payload
|
|
const latitude = extractLatitude(d);
|
|
const longitude = extractLongitude(d);
|
|
|
|
if (latitude !== null && longitude !== null) {
|
|
console.log(`[DtLocationDiscoveryService] Extracted coordinates for ${slug}: ${latitude}, ${longitude}`);
|
|
}
|
|
|
|
return {
|
|
platformLocationId: id,
|
|
platformSlug: slug,
|
|
platformMenuUrl: menuUrl,
|
|
name: d.name || d.dispensaryName || '',
|
|
rawAddress: rawAddress || null,
|
|
addressLine1: address.line1 || address.street1 || d.address1 || null,
|
|
addressLine2: address.line2 || address.street2 || d.address2 || null,
|
|
city: address.city || d.city || city.cityName,
|
|
stateCode: address.state || address.stateCode || d.state || city.stateCode,
|
|
postalCode: address.zip || address.zipCode || address.postalCode || d.zip || null,
|
|
countryCode: address.country || address.countryCode || d.country || city.countryCode,
|
|
latitude,
|
|
longitude,
|
|
timezone: d.timezone || d.timeZone || null,
|
|
offersDelivery: d.offerDelivery ?? d.offersDelivery ?? d.delivery ?? null,
|
|
offersPickup: d.offerPickup ?? d.offersPickup ?? d.pickup ?? null,
|
|
isRecreational: d.isRecreational ?? d.recreational ?? (d.retailType === 'recreational' || d.retailType === 'both'),
|
|
isMedical: d.isMedical ?? d.medical ?? (d.retailType === 'medical' || d.retailType === 'both'),
|
|
metadata: {
|
|
source: 'next_data',
|
|
retailType: d.retailType,
|
|
brand: d.brand,
|
|
logo: d.logo || d.logoUrl,
|
|
raw: d,
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Fetch locations for a city using Puppeteer
|
|
* Returns both locations and Dutchie's reported store count from page header
|
|
*/
|
|
async function fetchLocationsForCity(city: DiscoveryCity): Promise<FetchResult> {
|
|
console.log(`[DtLocationDiscoveryService] Fetching locations for ${city.cityName}, ${city.stateCode}...`);
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: 'new',
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
|
|
});
|
|
|
|
try {
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent(
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
);
|
|
|
|
// Use the /us/dispensaries/{city_slug} pattern (NOT /city/{state}/{slug})
|
|
const cityUrl = `https://dutchie.com/us/dispensaries/${city.citySlug}`;
|
|
console.log(`[DtLocationDiscoveryService] Navigating to ${cityUrl}...`);
|
|
|
|
await page.goto(cityUrl, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000,
|
|
});
|
|
|
|
await new Promise((r) => setTimeout(r, 3000));
|
|
|
|
// Extract reported store count from page header (e.g., "18 dispensaries")
|
|
const reportedStoreCount = await page.evaluate(() => {
|
|
// Look for patterns like "18 dispensaries", "18 stores", "18 results"
|
|
const headerSelectors = [
|
|
'h1', 'h2', '[data-testid="city-header"]', '[data-testid="results-count"]',
|
|
'.results-header', '.city-header', '.page-header'
|
|
];
|
|
|
|
for (const selector of headerSelectors) {
|
|
const elements = Array.from(document.querySelectorAll(selector));
|
|
for (const el of elements) {
|
|
const text = el.textContent || '';
|
|
// Match patterns like "18 dispensaries", "18 stores", "18 results", or just "18" followed by word
|
|
const match = text.match(/(\d+)\s*(?:dispensar(?:y|ies)|stores?|results?|locations?)/i);
|
|
if (match) {
|
|
return parseInt(match[1], 10);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Also check for count in any element containing "dispensaries" or "stores"
|
|
const allText = document.body.innerText;
|
|
const globalMatch = allText.match(/(\d+)\s+dispensar(?:y|ies)/i);
|
|
if (globalMatch) {
|
|
return parseInt(globalMatch[1], 10);
|
|
}
|
|
|
|
return null;
|
|
});
|
|
|
|
if (reportedStoreCount !== null) {
|
|
console.log(`[DtLocationDiscoveryService] Dutchie reports ${reportedStoreCount} stores for ${city.citySlug}`);
|
|
}
|
|
|
|
// Try to extract __NEXT_DATA__
|
|
const nextData = await page.evaluate(() => {
|
|
const script = document.querySelector('script#__NEXT_DATA__');
|
|
if (script) {
|
|
try {
|
|
return JSON.parse(script.textContent || '{}');
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
return null;
|
|
});
|
|
|
|
let locations: DutchieLocation[] = [];
|
|
|
|
if (nextData?.props?.pageProps?.dispensaries) {
|
|
const dispensaries = nextData.props.pageProps.dispensaries;
|
|
console.log(`[DtLocationDiscoveryService] Found ${dispensaries.length} dispensaries in __NEXT_DATA__`);
|
|
locations = dispensaries.map((d: any) => parseDispensaryData(d, city));
|
|
} else {
|
|
// Fall back to DOM scraping
|
|
console.log('[DtLocationDiscoveryService] No __NEXT_DATA__, trying DOM scraping...');
|
|
|
|
const scrapedData = await page.evaluate(() => {
|
|
const stores: Array<{
|
|
name: string;
|
|
href: string;
|
|
address: string | null;
|
|
}> = [];
|
|
|
|
const cards = document.querySelectorAll('[data-testid="dispensary-card"], .dispensary-card, a[href*="/dispensary/"]');
|
|
cards.forEach((card) => {
|
|
const link = card.querySelector('a[href*="/dispensary/"]') || (card as HTMLAnchorElement);
|
|
const href = (link as HTMLAnchorElement).href || '';
|
|
const name =
|
|
card.querySelector('[data-testid="dispensary-name"]')?.textContent ||
|
|
card.querySelector('h2, h3, .name')?.textContent ||
|
|
link.textContent ||
|
|
'';
|
|
const address = card.querySelector('[data-testid="dispensary-address"], .address')?.textContent || null;
|
|
|
|
if (href && name) {
|
|
stores.push({
|
|
name: name.trim(),
|
|
href,
|
|
address: address?.trim() || null,
|
|
});
|
|
}
|
|
});
|
|
|
|
return stores;
|
|
});
|
|
|
|
console.log(`[DtLocationDiscoveryService] DOM scraping found ${scrapedData.length} raw store cards`);
|
|
|
|
locations = scrapedData.map((s) => {
|
|
const match = s.href.match(/\/dispensary\/([^/?]+)/);
|
|
const slug = match ? match[1] : s.name.toLowerCase().replace(/\s+/g, '-');
|
|
|
|
return {
|
|
platformLocationId: slug,
|
|
platformSlug: slug,
|
|
platformMenuUrl: `https://dutchie.com/dispensary/${slug}`,
|
|
name: s.name,
|
|
rawAddress: s.address,
|
|
addressLine1: null,
|
|
addressLine2: null,
|
|
city: city.cityName,
|
|
stateCode: city.stateCode,
|
|
postalCode: null,
|
|
countryCode: city.countryCode,
|
|
latitude: null, // Not available from DOM scraping
|
|
longitude: null,
|
|
timezone: null,
|
|
offersDelivery: null,
|
|
offersPickup: null,
|
|
isRecreational: null,
|
|
isMedical: null,
|
|
metadata: { source: 'dom_scrape', originalUrl: s.href },
|
|
};
|
|
});
|
|
}
|
|
|
|
// =========================================================================
|
|
// FILTERING AND DEDUPLICATION
|
|
// =========================================================================
|
|
|
|
const beforeFilterCount = locations.length;
|
|
|
|
// 1. Filter out ghost entries and marketing links
|
|
locations = locations.filter((loc) => {
|
|
// Filter out slug matching city slug (e.g., /dispensary/ak-anchorage)
|
|
if (loc.platformSlug === city.citySlug) {
|
|
console.log(`[DtLocationDiscoveryService] Filtering ghost entry: /dispensary/${loc.platformSlug} (matches city slug)`);
|
|
return false;
|
|
}
|
|
|
|
// Filter out marketing/referral links (e.g., try.dutchie.com/dispensary/referral/)
|
|
if (!loc.platformMenuUrl.startsWith('https://dutchie.com/dispensary/')) {
|
|
console.log(`[DtLocationDiscoveryService] Filtering non-store URL: ${loc.platformMenuUrl}`);
|
|
return false;
|
|
}
|
|
|
|
// Filter out generic marketing slugs
|
|
const marketingSlugs = ['referral', 'refer-a-dispensary', 'sign-up', 'signup'];
|
|
if (marketingSlugs.includes(loc.platformSlug.toLowerCase())) {
|
|
console.log(`[DtLocationDiscoveryService] Filtering marketing slug: ${loc.platformSlug}`);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
});
|
|
|
|
// 2. Deduplicate by platformMenuUrl (unique store URL)
|
|
const seenUrls = new Set<string>();
|
|
locations = locations.filter((loc) => {
|
|
if (seenUrls.has(loc.platformMenuUrl)) {
|
|
return false;
|
|
}
|
|
seenUrls.add(loc.platformMenuUrl);
|
|
return true;
|
|
});
|
|
|
|
const afterFilterCount = locations.length;
|
|
if (beforeFilterCount !== afterFilterCount) {
|
|
console.log(`[DtLocationDiscoveryService] Filtered: ${beforeFilterCount} -> ${afterFilterCount} (removed ${beforeFilterCount - afterFilterCount} ghost/duplicate entries)`);
|
|
}
|
|
|
|
// Log comparison for QA
|
|
console.log(`[DtLocationDiscoveryService] [${city.citySlug}] reported_store_count=${reportedStoreCount ?? 'N/A'}, scraped_store_count=${afterFilterCount}`);
|
|
if (reportedStoreCount !== null && reportedStoreCount !== afterFilterCount) {
|
|
console.log(`[DtLocationDiscoveryService] [${city.citySlug}] MISMATCH: Dutchie reports ${reportedStoreCount}, we scraped ${afterFilterCount}`);
|
|
}
|
|
|
|
return { locations, reportedStoreCount };
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
// ============================================================
|
|
// DATABASE OPERATIONS
|
|
// ============================================================
|
|
|
|
/**
|
|
* Upsert a location into dutchie_discovery_locations
|
|
* - Does NOT overwrite status if already verified/merged/rejected
|
|
* - Does NOT overwrite dispensary_id if already set
|
|
* - Does NOT overwrite existing lat/lng (only fills nulls)
|
|
*/
|
|
async function upsertLocation(
|
|
pool: Pool,
|
|
location: DutchieLocation,
|
|
cityId: number
|
|
): Promise<{ inserted: boolean; updated: boolean; skipped: boolean }> {
|
|
// First check if this location exists and has a protected status
|
|
const existing = await pool.query(
|
|
`
|
|
SELECT id, status, dispensary_id, latitude, longitude
|
|
FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND platform_location_id = $1
|
|
`,
|
|
[location.platformLocationId]
|
|
);
|
|
|
|
if (existing.rows.length > 0) {
|
|
const row = existing.rows[0];
|
|
const protectedStatuses = ['verified', 'merged', 'rejected'];
|
|
|
|
if (protectedStatuses.includes(row.status)) {
|
|
// Only update last_seen_at for protected statuses
|
|
// But still update coordinates if they were null and we now have them
|
|
await pool.query(
|
|
`
|
|
UPDATE dutchie_discovery_locations
|
|
SET
|
|
last_seen_at = NOW(),
|
|
updated_at = NOW(),
|
|
latitude = CASE WHEN latitude IS NULL THEN $2 ELSE latitude END,
|
|
longitude = CASE WHEN longitude IS NULL THEN $3 ELSE longitude END
|
|
WHERE id = $1
|
|
`,
|
|
[row.id, location.latitude, location.longitude]
|
|
);
|
|
return { inserted: false, updated: false, skipped: true };
|
|
}
|
|
|
|
// Update existing discovered location
|
|
// Preserve existing lat/lng if already set (only fill nulls)
|
|
await pool.query(
|
|
`
|
|
UPDATE dutchie_discovery_locations
|
|
SET
|
|
platform_slug = $2,
|
|
platform_menu_url = $3,
|
|
name = $4,
|
|
raw_address = COALESCE($5, raw_address),
|
|
address_line1 = COALESCE($6, address_line1),
|
|
address_line2 = COALESCE($7, address_line2),
|
|
city = COALESCE($8, city),
|
|
state_code = COALESCE($9, state_code),
|
|
postal_code = COALESCE($10, postal_code),
|
|
country_code = COALESCE($11, country_code),
|
|
latitude = CASE WHEN latitude IS NULL THEN $12 ELSE latitude END,
|
|
longitude = CASE WHEN longitude IS NULL THEN $13 ELSE longitude END,
|
|
timezone = COALESCE($14, timezone),
|
|
offers_delivery = COALESCE($15, offers_delivery),
|
|
offers_pickup = COALESCE($16, offers_pickup),
|
|
is_recreational = COALESCE($17, is_recreational),
|
|
is_medical = COALESCE($18, is_medical),
|
|
metadata = COALESCE($19, metadata),
|
|
discovery_city_id = $20,
|
|
last_seen_at = NOW(),
|
|
updated_at = NOW()
|
|
WHERE id = $1
|
|
`,
|
|
[
|
|
row.id,
|
|
location.platformSlug,
|
|
location.platformMenuUrl,
|
|
location.name,
|
|
location.rawAddress,
|
|
location.addressLine1,
|
|
location.addressLine2,
|
|
location.city,
|
|
location.stateCode,
|
|
location.postalCode,
|
|
location.countryCode,
|
|
location.latitude,
|
|
location.longitude,
|
|
location.timezone,
|
|
location.offersDelivery,
|
|
location.offersPickup,
|
|
location.isRecreational,
|
|
location.isMedical,
|
|
JSON.stringify(location.metadata),
|
|
cityId,
|
|
]
|
|
);
|
|
return { inserted: false, updated: true, skipped: false };
|
|
}
|
|
|
|
// Insert new location
|
|
await pool.query(
|
|
`
|
|
INSERT INTO dutchie_discovery_locations (
|
|
platform,
|
|
platform_location_id,
|
|
platform_slug,
|
|
platform_menu_url,
|
|
name,
|
|
raw_address,
|
|
address_line1,
|
|
address_line2,
|
|
city,
|
|
state_code,
|
|
postal_code,
|
|
country_code,
|
|
latitude,
|
|
longitude,
|
|
timezone,
|
|
status,
|
|
offers_delivery,
|
|
offers_pickup,
|
|
is_recreational,
|
|
is_medical,
|
|
metadata,
|
|
discovery_city_id,
|
|
first_seen_at,
|
|
last_seen_at,
|
|
active,
|
|
created_at,
|
|
updated_at
|
|
) VALUES (
|
|
'dutchie',
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14,
|
|
'discovered',
|
|
$15, $16, $17, $18, $19, $20,
|
|
NOW(), NOW(), TRUE, NOW(), NOW()
|
|
)
|
|
`,
|
|
[
|
|
location.platformLocationId,
|
|
location.platformSlug,
|
|
location.platformMenuUrl,
|
|
location.name,
|
|
location.rawAddress,
|
|
location.addressLine1,
|
|
location.addressLine2,
|
|
location.city,
|
|
location.stateCode,
|
|
location.postalCode,
|
|
location.countryCode,
|
|
location.latitude,
|
|
location.longitude,
|
|
location.timezone,
|
|
location.offersDelivery,
|
|
location.offersPickup,
|
|
location.isRecreational,
|
|
location.isMedical,
|
|
JSON.stringify(location.metadata),
|
|
cityId,
|
|
]
|
|
);
|
|
|
|
return { inserted: true, updated: false, skipped: false };
|
|
}
|
|
|
|
// ============================================================
|
|
// MAIN SERVICE CLASS
|
|
// ============================================================
|
|
|
|
export class DtLocationDiscoveryService {
|
|
constructor(private pool: Pool) {}
|
|
|
|
/**
|
|
* Get a city by slug
|
|
*/
|
|
async getCityBySlug(citySlug: string): Promise<DiscoveryCity | null> {
|
|
const { rows } = await this.pool.query(
|
|
`
|
|
SELECT id, platform, city_name, city_slug, state_code, country_code, crawl_enabled
|
|
FROM dutchie_discovery_cities
|
|
WHERE platform = 'dutchie' AND city_slug = $1
|
|
LIMIT 1
|
|
`,
|
|
[citySlug]
|
|
);
|
|
|
|
if (rows.length === 0) return null;
|
|
|
|
const r = rows[0];
|
|
return {
|
|
id: r.id,
|
|
platform: r.platform,
|
|
cityName: r.city_name,
|
|
citySlug: r.city_slug,
|
|
stateCode: r.state_code,
|
|
countryCode: r.country_code,
|
|
crawlEnabled: r.crawl_enabled,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get all crawl-enabled cities
|
|
*/
|
|
async getEnabledCities(limit?: number): Promise<DiscoveryCity[]> {
|
|
const { rows } = await this.pool.query(
|
|
`
|
|
SELECT id, platform, city_name, city_slug, state_code, country_code, crawl_enabled
|
|
FROM dutchie_discovery_cities
|
|
WHERE platform = 'dutchie' AND crawl_enabled = TRUE
|
|
ORDER BY last_crawled_at ASC NULLS FIRST, city_name ASC
|
|
${limit ? `LIMIT ${limit}` : ''}
|
|
`
|
|
);
|
|
|
|
return rows.map((r) => ({
|
|
id: r.id,
|
|
platform: r.platform,
|
|
cityName: r.city_name,
|
|
citySlug: r.city_slug,
|
|
stateCode: r.state_code,
|
|
countryCode: r.country_code,
|
|
crawlEnabled: r.crawl_enabled,
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Discover locations for a single city
|
|
*/
|
|
async discoverForCity(city: DiscoveryCity): Promise<LocationDiscoveryResult> {
|
|
const startTime = Date.now();
|
|
const errors: string[] = [];
|
|
let locationsFound = 0;
|
|
let locationsInserted = 0;
|
|
let locationsUpdated = 0;
|
|
let locationsSkipped = 0;
|
|
let reportedStoreCount: number | null = null;
|
|
|
|
console.log(`[DtLocationDiscoveryService] Discovering locations for ${city.cityName}, ${city.stateCode}...`);
|
|
|
|
try {
|
|
const fetchResult = await fetchLocationsForCity(city);
|
|
const locations = fetchResult.locations;
|
|
reportedStoreCount = fetchResult.reportedStoreCount;
|
|
|
|
locationsFound = locations.length;
|
|
console.log(`[DtLocationDiscoveryService] Found ${locationsFound} locations`);
|
|
|
|
// Count how many have coordinates
|
|
const withCoords = locations.filter(l => l.latitude !== null && l.longitude !== null).length;
|
|
if (withCoords > 0) {
|
|
console.log(`[DtLocationDiscoveryService] ${withCoords}/${locationsFound} locations have coordinates`);
|
|
}
|
|
|
|
for (const location of locations) {
|
|
try {
|
|
const result = await upsertLocation(this.pool, location, city.id);
|
|
if (result.inserted) locationsInserted++;
|
|
else if (result.updated) locationsUpdated++;
|
|
else if (result.skipped) locationsSkipped++;
|
|
} catch (error: any) {
|
|
const msg = `Failed to upsert location ${location.platformSlug}: ${error.message}`;
|
|
console.error(`[DtLocationDiscoveryService] ${msg}`);
|
|
errors.push(msg);
|
|
}
|
|
}
|
|
|
|
// Update city's last_crawled_at, location_count, and reported_store_count in metadata
|
|
await this.pool.query(
|
|
`
|
|
UPDATE dutchie_discovery_cities
|
|
SET last_crawled_at = NOW(),
|
|
location_count = $1,
|
|
metadata = COALESCE(metadata, '{}')::jsonb || jsonb_build_object(
|
|
'reported_store_count', $3::int,
|
|
'scraped_store_count', $1::int,
|
|
'last_discovery_at', NOW()::text
|
|
),
|
|
updated_at = NOW()
|
|
WHERE id = $2
|
|
`,
|
|
[locationsFound, city.id, reportedStoreCount]
|
|
);
|
|
} catch (error: any) {
|
|
const msg = `Location discovery failed for ${city.citySlug}: ${error.message}`;
|
|
console.error(`[DtLocationDiscoveryService] ${msg}`);
|
|
errors.push(msg);
|
|
}
|
|
|
|
const durationMs = Date.now() - startTime;
|
|
|
|
console.log(`[DtLocationDiscoveryService] City ${city.citySlug} complete:`);
|
|
console.log(` Reported count: ${reportedStoreCount ?? 'N/A'}`);
|
|
console.log(` Locations found: ${locationsFound}`);
|
|
console.log(` Inserted: ${locationsInserted}`);
|
|
console.log(` Updated: ${locationsUpdated}`);
|
|
console.log(` Skipped (protected): ${locationsSkipped}`);
|
|
console.log(` Errors: ${errors.length}`);
|
|
console.log(` Duration: ${(durationMs / 1000).toFixed(1)}s`);
|
|
|
|
return {
|
|
cityId: city.id,
|
|
citySlug: city.citySlug,
|
|
locationsFound,
|
|
locationsInserted,
|
|
locationsUpdated,
|
|
locationsSkipped,
|
|
reportedStoreCount,
|
|
errors,
|
|
durationMs,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Discover locations for all enabled cities
|
|
*/
|
|
async discoverAllEnabled(options: {
|
|
limit?: number;
|
|
delayMs?: number;
|
|
} = {}): Promise<BatchDiscoveryResult> {
|
|
const { limit, delayMs = 2000 } = options;
|
|
const startTime = Date.now();
|
|
let totalLocationsFound = 0;
|
|
let totalInserted = 0;
|
|
let totalUpdated = 0;
|
|
let totalSkipped = 0;
|
|
const allErrors: string[] = [];
|
|
|
|
const cities = await this.getEnabledCities(limit);
|
|
console.log(`[DtLocationDiscoveryService] Discovering locations for ${cities.length} cities...`);
|
|
|
|
for (let i = 0; i < cities.length; i++) {
|
|
const city = cities[i];
|
|
console.log(`\n[DtLocationDiscoveryService] City ${i + 1}/${cities.length}: ${city.cityName}, ${city.stateCode}`);
|
|
|
|
try {
|
|
const result = await this.discoverForCity(city);
|
|
totalLocationsFound += result.locationsFound;
|
|
totalInserted += result.locationsInserted;
|
|
totalUpdated += result.locationsUpdated;
|
|
totalSkipped += result.locationsSkipped;
|
|
allErrors.push(...result.errors);
|
|
} catch (error: any) {
|
|
allErrors.push(`City ${city.citySlug} failed: ${error.message}`);
|
|
}
|
|
|
|
if (i < cities.length - 1 && delayMs > 0) {
|
|
await new Promise((r) => setTimeout(r, delayMs));
|
|
}
|
|
}
|
|
|
|
const durationMs = Date.now() - startTime;
|
|
|
|
return {
|
|
totalCities: cities.length,
|
|
totalLocationsFound,
|
|
totalInserted,
|
|
totalUpdated,
|
|
totalSkipped,
|
|
errors: allErrors,
|
|
durationMs,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get location statistics
|
|
*/
|
|
async getStats(): Promise<{
|
|
total: number;
|
|
withCoordinates: number;
|
|
byStatus: Array<{ status: string; count: number }>;
|
|
byState: Array<{ stateCode: string; count: number }>;
|
|
}> {
|
|
const [totalRes, coordsRes, byStatusRes, byStateRes] = await Promise.all([
|
|
this.pool.query(`
|
|
SELECT COUNT(*) as cnt FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND active = TRUE
|
|
`),
|
|
this.pool.query(`
|
|
SELECT COUNT(*) as cnt FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND active = TRUE
|
|
AND latitude IS NOT NULL AND longitude IS NOT NULL
|
|
`),
|
|
this.pool.query(`
|
|
SELECT status, COUNT(*) as cnt
|
|
FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND active = TRUE
|
|
GROUP BY status
|
|
ORDER BY cnt DESC
|
|
`),
|
|
this.pool.query(`
|
|
SELECT state_code, COUNT(*) as cnt
|
|
FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND active = TRUE AND state_code IS NOT NULL
|
|
GROUP BY state_code
|
|
ORDER BY cnt DESC
|
|
LIMIT 20
|
|
`),
|
|
]);
|
|
|
|
return {
|
|
total: parseInt(totalRes.rows[0]?.cnt || '0', 10),
|
|
withCoordinates: parseInt(coordsRes.rows[0]?.cnt || '0', 10),
|
|
byStatus: byStatusRes.rows.map((r) => ({
|
|
status: r.status,
|
|
count: parseInt(r.cnt, 10),
|
|
})),
|
|
byState: byStateRes.rows.map((r) => ({
|
|
stateCode: r.state_code,
|
|
count: parseInt(r.cnt, 10),
|
|
})),
|
|
};
|
|
}
|
|
|
|
// ============================================================
|
|
// ALICE - FULL DISCOVERY FROM /CITIES PAGE
|
|
// ============================================================
|
|
|
|
/**
|
|
* Fetch all states and cities from https://dutchie.com/cities
|
|
* Returns the complete hierarchy of states -> cities
|
|
*/
|
|
async fetchCitiesFromMasterPage(): Promise<{
|
|
states: Array<{
|
|
stateCode: string;
|
|
stateName: string;
|
|
cities: Array<{ cityName: string; citySlug: string; storeCount?: number }>;
|
|
}>;
|
|
errors: string[];
|
|
}> {
|
|
console.log('[Alice] Fetching master cities page from https://dutchie.com/cities...');
|
|
|
|
const browser = await puppeteer.launch({
|
|
headless: 'new',
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
|
|
});
|
|
|
|
try {
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent(
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
);
|
|
|
|
await page.goto('https://dutchie.com/cities', {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000,
|
|
});
|
|
|
|
await new Promise((r) => setTimeout(r, 3000));
|
|
|
|
// Try to extract from __NEXT_DATA__
|
|
const citiesData = await page.evaluate(() => {
|
|
const script = document.querySelector('script#__NEXT_DATA__');
|
|
if (script) {
|
|
try {
|
|
const data = JSON.parse(script.textContent || '{}');
|
|
return data?.props?.pageProps || null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
return null;
|
|
});
|
|
|
|
const states: Array<{
|
|
stateCode: string;
|
|
stateName: string;
|
|
cities: Array<{ cityName: string; citySlug: string; storeCount?: number }>;
|
|
}> = [];
|
|
const errors: string[] = [];
|
|
|
|
if (citiesData?.states || citiesData?.regions) {
|
|
// Parse from structured data
|
|
const statesList = citiesData.states || citiesData.regions || [];
|
|
for (const state of statesList) {
|
|
const stateCode = state.code || state.stateCode || state.abbreviation || '';
|
|
const stateName = state.name || state.stateName || '';
|
|
const cities = (state.cities || []).map((c: any) => ({
|
|
cityName: c.name || c.cityName || '',
|
|
citySlug: c.slug || c.citySlug || c.name?.toLowerCase().replace(/\s+/g, '-') || '',
|
|
storeCount: c.dispensaryCount || c.storeCount || undefined,
|
|
}));
|
|
if (stateCode && cities.length > 0) {
|
|
states.push({ stateCode, stateName, cities });
|
|
}
|
|
}
|
|
} else {
|
|
// Fallback: DOM scraping
|
|
console.log('[Alice] No __NEXT_DATA__, attempting DOM scrape...');
|
|
const scrapedStates = await page.evaluate(() => {
|
|
const result: Array<{
|
|
stateCode: string;
|
|
stateName: string;
|
|
cities: Array<{ cityName: string; citySlug: string }>;
|
|
}> = [];
|
|
|
|
// Look for state sections
|
|
const stateHeaders = document.querySelectorAll('h2, h3, [data-testid*="state"]');
|
|
stateHeaders.forEach((header) => {
|
|
const stateName = header.textContent?.trim() || '';
|
|
// Try to extract state code from data attributes or guess from name
|
|
const stateCode = (header as HTMLElement).dataset?.stateCode ||
|
|
stateName.substring(0, 2).toUpperCase();
|
|
|
|
// Find city links following this header
|
|
const container = header.closest('section') || header.parentElement;
|
|
const cityLinks = container?.querySelectorAll('a[href*="/dispensaries/"]') || [];
|
|
const cities: Array<{ cityName: string; citySlug: string }> = [];
|
|
|
|
cityLinks.forEach((link) => {
|
|
const href = (link as HTMLAnchorElement).href || '';
|
|
const match = href.match(/\/dispensaries\/([^/?]+)/);
|
|
if (match) {
|
|
cities.push({
|
|
cityName: link.textContent?.trim() || '',
|
|
citySlug: match[1],
|
|
});
|
|
}
|
|
});
|
|
|
|
if (stateName && cities.length > 0) {
|
|
result.push({ stateCode, stateName, cities });
|
|
}
|
|
});
|
|
|
|
return result;
|
|
});
|
|
|
|
states.push(...scrapedStates);
|
|
|
|
if (states.length === 0) {
|
|
errors.push('Could not parse cities from master page');
|
|
}
|
|
}
|
|
|
|
console.log(`[Alice] Found ${states.length} states with cities from master page`);
|
|
return { states, errors };
|
|
|
|
} finally {
|
|
await browser.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Upsert cities from master page discovery
|
|
*/
|
|
async upsertCitiesFromMaster(states: Array<{
|
|
stateCode: string;
|
|
stateName: string;
|
|
cities: Array<{ cityName: string; citySlug: string; storeCount?: number }>;
|
|
}>): Promise<{ inserted: number; updated: number }> {
|
|
let inserted = 0;
|
|
let updated = 0;
|
|
|
|
for (const state of states) {
|
|
for (const city of state.cities) {
|
|
const existing = await this.pool.query(
|
|
`SELECT id FROM dutchie_discovery_cities
|
|
WHERE platform = 'dutchie' AND city_slug = $1`,
|
|
[city.citySlug]
|
|
);
|
|
|
|
if (existing.rows.length === 0) {
|
|
// Insert new city
|
|
await this.pool.query(
|
|
`INSERT INTO dutchie_discovery_cities (
|
|
platform, city_name, city_slug, state_code, state_name,
|
|
country_code, crawl_enabled, discovered_at, last_verified_at,
|
|
store_count_reported, created_at, updated_at
|
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW(), $8, NOW(), NOW())`,
|
|
[
|
|
'dutchie',
|
|
city.cityName,
|
|
city.citySlug,
|
|
state.stateCode,
|
|
state.stateName,
|
|
'US',
|
|
true,
|
|
city.storeCount || null,
|
|
]
|
|
);
|
|
inserted++;
|
|
} else {
|
|
// Update existing city
|
|
await this.pool.query(
|
|
`UPDATE dutchie_discovery_cities SET
|
|
city_name = COALESCE($2, city_name),
|
|
state_code = COALESCE($3, state_code),
|
|
state_name = COALESCE($4, state_name),
|
|
last_verified_at = NOW(),
|
|
store_count_reported = COALESCE($5, store_count_reported),
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[existing.rows[0].id, city.cityName, state.stateCode, state.stateName, city.storeCount]
|
|
);
|
|
updated++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return { inserted, updated };
|
|
}
|
|
|
|
/**
|
|
* Detect stores that have been removed from source
|
|
* Mark them as retired instead of deleting
|
|
*/
|
|
async detectAndMarkRemovedStores(
|
|
currentLocationIds: Set<string>
|
|
): Promise<{ retiredCount: number; retiredIds: string[] }> {
|
|
// Get all active locations we know about
|
|
const { rows: existingLocations } = await this.pool.query<{
|
|
id: number;
|
|
platform_location_id: string;
|
|
name: string;
|
|
}>(`
|
|
SELECT id, platform_location_id, name
|
|
FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie'
|
|
AND active = TRUE
|
|
AND retired_at IS NULL
|
|
`);
|
|
|
|
const retiredIds: string[] = [];
|
|
|
|
for (const loc of existingLocations) {
|
|
if (!currentLocationIds.has(loc.platform_location_id)) {
|
|
// This store no longer appears in source - mark as retired
|
|
await this.pool.query(
|
|
`UPDATE dutchie_discovery_locations SET
|
|
active = FALSE,
|
|
retired_at = NOW(),
|
|
retirement_reason = 'removed_from_source',
|
|
updated_at = NOW()
|
|
WHERE id = $1`,
|
|
[loc.id]
|
|
);
|
|
retiredIds.push(loc.platform_location_id);
|
|
console.log(`[Alice] Marked store as retired: ${loc.name} (${loc.platform_location_id})`);
|
|
}
|
|
}
|
|
|
|
return { retiredCount: retiredIds.length, retiredIds };
|
|
}
|
|
|
|
/**
|
|
* Detect and track slug changes
|
|
*/
|
|
async detectSlugChanges(
|
|
locationId: string,
|
|
newSlug: string
|
|
): Promise<{ changed: boolean; previousSlug?: string }> {
|
|
const { rows } = await this.pool.query<{ platform_slug: string }>(
|
|
`SELECT platform_slug FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND platform_location_id = $1`,
|
|
[locationId]
|
|
);
|
|
|
|
if (rows.length === 0) return { changed: false };
|
|
|
|
const currentSlug = rows[0].platform_slug;
|
|
if (currentSlug && currentSlug !== newSlug) {
|
|
// Slug changed - update with tracking
|
|
await this.pool.query(
|
|
`UPDATE dutchie_discovery_locations SET
|
|
platform_slug = $1,
|
|
previous_slug = $2,
|
|
slug_changed_at = NOW(),
|
|
updated_at = NOW()
|
|
WHERE platform = 'dutchie' AND platform_location_id = $3`,
|
|
[newSlug, currentSlug, locationId]
|
|
);
|
|
console.log(`[Alice] Slug change detected: ${currentSlug} -> ${newSlug}`);
|
|
return { changed: true, previousSlug: currentSlug };
|
|
}
|
|
|
|
return { changed: false };
|
|
}
|
|
|
|
/**
|
|
* Full discovery run with change detection (Alice's main job)
|
|
* Fetches from /cities, discovers all stores, detects changes
|
|
*/
|
|
async runFullDiscoveryWithChangeDetection(options: {
|
|
scope?: { states?: string[]; storeIds?: number[] };
|
|
delayMs?: number;
|
|
} = {}): Promise<{
|
|
statesDiscovered: number;
|
|
citiesDiscovered: number;
|
|
newStoreCount: number;
|
|
removedStoreCount: number;
|
|
updatedStoreCount: number;
|
|
slugChangedCount: number;
|
|
totalLocationsFound: number;
|
|
errors: string[];
|
|
durationMs: number;
|
|
}> {
|
|
const startTime = Date.now();
|
|
const { scope, delayMs = 2000 } = options;
|
|
const errors: string[] = [];
|
|
let slugChangedCount = 0;
|
|
|
|
console.log('[Alice] Starting full discovery with change detection...');
|
|
if (scope?.states) {
|
|
console.log(`[Alice] Scope limited to states: ${scope.states.join(', ')}`);
|
|
}
|
|
|
|
// Step 1: Fetch master cities page
|
|
const { states: masterStates, errors: fetchErrors } = await this.fetchCitiesFromMasterPage();
|
|
errors.push(...fetchErrors);
|
|
|
|
// Filter by scope if provided
|
|
const statesToProcess = scope?.states
|
|
? masterStates.filter(s => scope.states!.includes(s.stateCode))
|
|
: masterStates;
|
|
|
|
// Step 2: Upsert cities
|
|
const citiesResult = await this.upsertCitiesFromMaster(statesToProcess);
|
|
console.log(`[Alice] Cities: ${citiesResult.inserted} new, ${citiesResult.updated} updated`);
|
|
|
|
// Step 3: Discover locations for each city
|
|
const allLocationIds = new Set<string>();
|
|
let totalLocationsFound = 0;
|
|
let totalInserted = 0;
|
|
let totalUpdated = 0;
|
|
|
|
const cities = await this.getEnabledCities();
|
|
const citiesToProcess = scope?.states
|
|
? cities.filter(c => c.stateCode && scope.states!.includes(c.stateCode))
|
|
: cities;
|
|
|
|
for (let i = 0; i < citiesToProcess.length; i++) {
|
|
const city = citiesToProcess[i];
|
|
console.log(`[Alice] City ${i + 1}/${citiesToProcess.length}: ${city.cityName}, ${city.stateCode}`);
|
|
|
|
try {
|
|
const result = await this.discoverForCity(city);
|
|
totalLocationsFound += result.locationsFound;
|
|
totalInserted += result.locationsInserted;
|
|
totalUpdated += result.locationsUpdated;
|
|
errors.push(...result.errors);
|
|
|
|
// Track all discovered location IDs for removal detection
|
|
// (This requires modifying discoverForCity to return IDs, or query them after)
|
|
|
|
} catch (error: any) {
|
|
errors.push(`City ${city.citySlug}: ${error.message}`);
|
|
}
|
|
|
|
if (i < citiesToProcess.length - 1 && delayMs > 0) {
|
|
await new Promise((r) => setTimeout(r, delayMs));
|
|
}
|
|
}
|
|
|
|
// Step 4: Get all current active location IDs for removal detection
|
|
const { rows: currentLocations } = await this.pool.query<{ platform_location_id: string }>(
|
|
`SELECT platform_location_id FROM dutchie_discovery_locations
|
|
WHERE platform = 'dutchie' AND active = TRUE AND last_seen_at > NOW() - INTERVAL '1 day'`
|
|
);
|
|
currentLocations.forEach(loc => allLocationIds.add(loc.platform_location_id));
|
|
|
|
// Step 5: Detect removed stores (only if we had a successful discovery)
|
|
let removedResult = { retiredCount: 0, retiredIds: [] as string[] };
|
|
if (totalLocationsFound > 0 && !scope) {
|
|
// Only detect removals on full (unscoped) runs
|
|
removedResult = await this.detectAndMarkRemovedStores(allLocationIds);
|
|
}
|
|
|
|
const durationMs = Date.now() - startTime;
|
|
|
|
console.log('[Alice] Full discovery complete:');
|
|
console.log(` States: ${statesToProcess.length}`);
|
|
console.log(` Cities: ${citiesToProcess.length}`);
|
|
console.log(` Locations found: ${totalLocationsFound}`);
|
|
console.log(` New: ${totalInserted}, Updated: ${totalUpdated}`);
|
|
console.log(` Removed: ${removedResult.retiredCount}`);
|
|
console.log(` Duration: ${(durationMs / 1000).toFixed(1)}s`);
|
|
|
|
return {
|
|
statesDiscovered: statesToProcess.length,
|
|
citiesDiscovered: citiesToProcess.length,
|
|
newStoreCount: totalInserted,
|
|
removedStoreCount: removedResult.retiredCount,
|
|
updatedStoreCount: totalUpdated,
|
|
slugChangedCount,
|
|
totalLocationsFound,
|
|
errors,
|
|
durationMs,
|
|
};
|
|
}
|
|
}
|
|
|
|
export default DtLocationDiscoveryService;
|