Files
cannaiq/backend/src/scripts/discover-az-dutchie.ts
Kelly b7cfec0770 feat: AZ dispensary harmonization with Dutchie source of truth
Major changes:
- Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API
- Add migration 057 for crawl_enabled and dutchie_verified fields
- Remove legacy dutchie-az module (replaced by platforms/dutchie)
- Clean up deprecated crawlers, scrapers, and orchestrator code
- Update location-discovery to not fallback to slug when ID is missing
- Add crawl-rotator service for proxy rotation
- Add types/index.ts for shared type definitions
- Add woodpecker-agent k8s manifest

Harmonization script:
- Queries ConsumerDispensaries API for all 32 AZ cities
- Matches dispensaries by platform_dispensary_id (not slug)
- Updates existing records with full Dutchie data
- Creates new records for unmatched Dutchie dispensaries
- Disables dispensaries not found in Dutchie

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 10:19:49 -07:00

89 lines
3.0 KiB
TypeScript

/**
* Discover all Arizona dispensaries from Dutchie
* Uses the state/city HTML pages which contain __NEXT_DATA__ with full dispensary list
*/
import { fetchPage, extractNextData } from '../platforms/dutchie/client';
interface DutchieDispensary {
platform_dispensary_id: string;
name: string;
slug: string;
city: string;
state: string;
address: string;
zip: string;
}
async function discoverAZDispensaries() {
console.log('Discovering Arizona dispensaries from Dutchie...\n');
const allDispensaries: Map<string, DutchieDispensary> = new Map();
// Fetch the Arizona state page
console.log('Fetching /dispensaries/arizona...');
const stateResult = await fetchPage('/dispensaries/arizona');
if (!stateResult) {
console.error('Failed to fetch Arizona page');
return;
}
console.log(`Got ${stateResult.status} response, ${stateResult.html.length} bytes`);
const nextData = extractNextData(stateResult.html);
if (!nextData) {
console.error('Failed to extract __NEXT_DATA__');
// Try to find dispensary links in HTML
const links = stateResult.html.match(/\/dispensary\/([a-z0-9-]+)/gi) || [];
console.log(`Found ${links.length} dispensary links in HTML`);
const uniqueSlugs = [...new Set(links.map(l => l.replace('/dispensary/', '')))];
console.log('Unique slugs:', uniqueSlugs.slice(0, 20));
return;
}
console.log('Extracted __NEXT_DATA__');
console.log('Keys:', Object.keys(nextData));
// The dispensary data is usually in props.pageProps
const pageProps = nextData?.props?.pageProps;
if (pageProps) {
console.log('pageProps keys:', Object.keys(pageProps));
// Try various possible locations
const dispensaries = pageProps.dispensaries ||
pageProps.nearbyDispensaries ||
pageProps.filteredDispensaries ||
pageProps.allDispensaries ||
[];
console.log(`Found ${dispensaries.length} dispensaries in pageProps`);
if (dispensaries.length > 0) {
console.log('Sample:', JSON.stringify(dispensaries[0], null, 2));
}
}
// Also look for dehydratedState (Apollo cache)
const dehydratedState = nextData?.props?.pageProps?.__APOLLO_STATE__;
if (dehydratedState) {
console.log('Found Apollo state');
const dispensaryKeys = Object.keys(dehydratedState).filter(k =>
k.startsWith('Dispensary:') || k.includes('dispensary')
);
console.log(`Found ${dispensaryKeys.length} dispensary entries`);
if (dispensaryKeys.length > 0) {
console.log('Sample key:', dispensaryKeys[0]);
console.log('Sample value:', JSON.stringify(dehydratedState[dispensaryKeys[0]], null, 2).slice(0, 500));
}
}
// Output the raw pageProps for analysis
if (pageProps) {
const fs = await import('fs');
fs.writeFileSync('/tmp/az-pageprops.json', JSON.stringify(pageProps, null, 2));
console.log('\nWrote pageProps to /tmp/az-pageprops.json');
}
}
discoverAZDispensaries().catch(console.error);