Major changes: - Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API - Add migration 057 for crawl_enabled and dutchie_verified fields - Remove legacy dutchie-az module (replaced by platforms/dutchie) - Clean up deprecated crawlers, scrapers, and orchestrator code - Update location-discovery to not fallback to slug when ID is missing - Add crawl-rotator service for proxy rotation - Add types/index.ts for shared type definitions - Add woodpecker-agent k8s manifest Harmonization script: - Queries ConsumerDispensaries API for all 32 AZ cities - Matches dispensaries by platform_dispensary_id (not slug) - Updates existing records with full Dutchie data - Creates new records for unmatched Dutchie dispensaries - Disables dispensaries not found in Dutchie 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
89 lines
3.0 KiB
TypeScript
89 lines
3.0 KiB
TypeScript
/**
|
|
* Discover all Arizona dispensaries from Dutchie
|
|
* Uses the state/city HTML pages which contain __NEXT_DATA__ with full dispensary list
|
|
*/
|
|
import { fetchPage, extractNextData } from '../platforms/dutchie/client';
|
|
|
|
interface DutchieDispensary {
|
|
platform_dispensary_id: string;
|
|
name: string;
|
|
slug: string;
|
|
city: string;
|
|
state: string;
|
|
address: string;
|
|
zip: string;
|
|
}
|
|
|
|
async function discoverAZDispensaries() {
|
|
console.log('Discovering Arizona dispensaries from Dutchie...\n');
|
|
|
|
const allDispensaries: Map<string, DutchieDispensary> = new Map();
|
|
|
|
// Fetch the Arizona state page
|
|
console.log('Fetching /dispensaries/arizona...');
|
|
const stateResult = await fetchPage('/dispensaries/arizona');
|
|
|
|
if (!stateResult) {
|
|
console.error('Failed to fetch Arizona page');
|
|
return;
|
|
}
|
|
|
|
console.log(`Got ${stateResult.status} response, ${stateResult.html.length} bytes`);
|
|
|
|
const nextData = extractNextData(stateResult.html);
|
|
if (!nextData) {
|
|
console.error('Failed to extract __NEXT_DATA__');
|
|
// Try to find dispensary links in HTML
|
|
const links = stateResult.html.match(/\/dispensary\/([a-z0-9-]+)/gi) || [];
|
|
console.log(`Found ${links.length} dispensary links in HTML`);
|
|
const uniqueSlugs = [...new Set(links.map(l => l.replace('/dispensary/', '')))];
|
|
console.log('Unique slugs:', uniqueSlugs.slice(0, 20));
|
|
return;
|
|
}
|
|
|
|
console.log('Extracted __NEXT_DATA__');
|
|
console.log('Keys:', Object.keys(nextData));
|
|
|
|
// The dispensary data is usually in props.pageProps
|
|
const pageProps = nextData?.props?.pageProps;
|
|
if (pageProps) {
|
|
console.log('pageProps keys:', Object.keys(pageProps));
|
|
|
|
// Try various possible locations
|
|
const dispensaries = pageProps.dispensaries ||
|
|
pageProps.nearbyDispensaries ||
|
|
pageProps.filteredDispensaries ||
|
|
pageProps.allDispensaries ||
|
|
[];
|
|
|
|
console.log(`Found ${dispensaries.length} dispensaries in pageProps`);
|
|
|
|
if (dispensaries.length > 0) {
|
|
console.log('Sample:', JSON.stringify(dispensaries[0], null, 2));
|
|
}
|
|
}
|
|
|
|
// Also look for dehydratedState (Apollo cache)
|
|
const dehydratedState = nextData?.props?.pageProps?.__APOLLO_STATE__;
|
|
if (dehydratedState) {
|
|
console.log('Found Apollo state');
|
|
const dispensaryKeys = Object.keys(dehydratedState).filter(k =>
|
|
k.startsWith('Dispensary:') || k.includes('dispensary')
|
|
);
|
|
console.log(`Found ${dispensaryKeys.length} dispensary entries`);
|
|
if (dispensaryKeys.length > 0) {
|
|
console.log('Sample key:', dispensaryKeys[0]);
|
|
console.log('Sample value:', JSON.stringify(dehydratedState[dispensaryKeys[0]], null, 2).slice(0, 500));
|
|
}
|
|
}
|
|
|
|
// Output the raw pageProps for analysis
|
|
if (pageProps) {
|
|
const fs = await import('fs');
|
|
fs.writeFileSync('/tmp/az-pageprops.json', JSON.stringify(pageProps, null, 2));
|
|
console.log('\nWrote pageProps to /tmp/az-pageprops.json');
|
|
}
|
|
}
|
|
|
|
discoverAZDispensaries().catch(console.error);
|