Files
cannaiq/backend/src/scripts/debug-dutchie-page.ts
Kelly b7cfec0770 feat: AZ dispensary harmonization with Dutchie source of truth
Major changes:
- Add harmonize-az-dispensaries.ts script to sync dispensaries with Dutchie API
- Add migration 057 for crawl_enabled and dutchie_verified fields
- Remove legacy dutchie-az module (replaced by platforms/dutchie)
- Clean up deprecated crawlers, scrapers, and orchestrator code
- Update location-discovery to not fallback to slug when ID is missing
- Add crawl-rotator service for proxy rotation
- Add types/index.ts for shared type definitions
- Add woodpecker-agent k8s manifest

Harmonization script:
- Queries ConsumerDispensaries API for all 32 AZ cities
- Matches dispensaries by platform_dispensary_id (not slug)
- Updates existing records with full Dutchie data
- Creates new records for unmatched Dutchie dispensaries
- Disables dispensaries not found in Dutchie

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 10:19:49 -07:00

115 lines
3.6 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Debug Dutchie city page to see what data is available
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const cityUrl = process.argv[2] || 'https://dutchie.com/us/dispensaries/wa-bellevue';
console.log(`Debugging page: ${cityUrl}`);
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'],
});
try {
const page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
);
console.log('Navigating...');
await page.goto(cityUrl, {
waitUntil: 'networkidle2',
timeout: 60000,
});
await new Promise((r) => setTimeout(r, 5000));
// Get page title
const title = await page.title();
console.log(`\nPage title: ${title}`);
// Check for Cloudflare challenge
const isCFChallenge = await page.evaluate(() => {
return document.title.includes('Just a moment') ||
document.body.textContent?.includes('Enable JavaScript');
});
if (isCFChallenge) {
console.log('\n⚠ CLOUDFLARE CHALLENGE DETECTED - waiting longer...');
await new Promise((r) => setTimeout(r, 10000));
}
// Check for __NEXT_DATA__
const nextData = await page.evaluate(() => {
const script = document.querySelector('script#__NEXT_DATA__');
if (script) {
try {
return JSON.parse(script.textContent || '{}');
} catch {
return { error: 'Failed to parse __NEXT_DATA__' };
}
}
return null;
});
if (nextData) {
console.log('\n✅ __NEXT_DATA__ found!');
console.log('Keys:', Object.keys(nextData));
if (nextData.props?.pageProps) {
console.log('pageProps keys:', Object.keys(nextData.props.pageProps));
if (nextData.props.pageProps.dispensaries) {
console.log('Dispensaries count:', nextData.props.pageProps.dispensaries.length);
// Show first dispensary structure
const first = nextData.props.pageProps.dispensaries[0];
if (first) {
console.log('\nFirst dispensary keys:', Object.keys(first));
console.log('First dispensary sample:', JSON.stringify(first, null, 2).slice(0, 1000));
}
}
}
} else {
console.log('\n❌ No __NEXT_DATA__ found');
// Check what scripts are on the page
const scripts = await page.evaluate(() => {
return Array.from(document.querySelectorAll('script[id]')).map(s => ({
id: s.id,
src: (s as HTMLScriptElement).src?.slice(0, 100),
}));
});
console.log('Scripts with IDs:', scripts);
// Try to find dispensary data in window object
const windowData = await page.evaluate(() => {
const w = window as any;
const keys = ['__NEXT_DATA__', '__PRELOADED_STATE__', '__INITIAL_STATE__',
'dispensaries', '__data', 'pageData', '__remixContext'];
const found: Record<string, any> = {};
for (const key of keys) {
if (w[key]) {
found[key] = typeof w[key] === 'object' ? Object.keys(w[key]) : typeof w[key];
}
}
return found;
});
console.log('Window data:', windowData);
// Get some page content
const bodyText = await page.evaluate(() => document.body.innerText.slice(0, 500));
console.log('\nPage text preview:', bodyText);
}
} finally {
await browser.close();
}
}
main().catch(console.error);