Files
cannaiq/backend/src/discovery/routes.ts
Kelly 2f483b3084 feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 00:05:34 -07:00

983 lines
27 KiB
TypeScript

/**
* Dutchie Discovery API Routes
*
* Express routes for the Dutchie store discovery pipeline.
* Provides endpoints for discovering, listing, and verifying locations.
*/
import { Router, Request, Response } from 'express';
import { Pool } from 'pg';
import {
runFullDiscovery,
discoverCity,
discoverState,
getDiscoveryStats,
} from './discovery-crawler';
import {
discoverCities,
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
} from './city-discovery';
import { getCitiesForState } from './location-discovery';
import {
DiscoveryLocation,
DiscoveryCity,
DiscoveryStatus,
mapLocationRowToLocation,
mapCityRowToCity,
} from './types';
import {
validateDiscoveredLocations,
promoteDiscoveredLocations,
promoteSingleLocation,
} from './promotion';
export function createDiscoveryRoutes(pool: Pool): Router {
const router = Router();
// ============================================================
// DISCOVERY LOCATIONS
// ============================================================
/**
* GET /api/discovery/locations
* List discovered locations with filtering
*/
router.get('/locations', async (req: Request, res: Response) => {
try {
const {
status,
stateCode,
countryCode,
city,
platform = 'dutchie',
search,
hasDispensary,
limit = '50',
offset = '0',
} = req.query;
let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
const params: any[] = [platform];
let paramIndex = 2;
if (status) {
whereClause += ` AND dl.status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (stateCode) {
whereClause += ` AND dl.state_code = $${paramIndex}`;
params.push(stateCode);
paramIndex++;
}
if (countryCode) {
whereClause += ` AND dl.country_code = $${paramIndex}`;
params.push(countryCode);
paramIndex++;
}
if (city) {
whereClause += ` AND dl.city ILIKE $${paramIndex}`;
params.push(`%${city}%`);
paramIndex++;
}
if (search) {
whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
params.push(`%${search}%`);
paramIndex++;
}
if (hasDispensary === 'true') {
whereClause += ' AND dl.dispensary_id IS NOT NULL';
} else if (hasDispensary === 'false') {
whereClause += ' AND dl.dispensary_id IS NULL';
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
const { rows } = await pool.query(
`
SELECT
dl.*,
d.name as dispensary_name,
dc.city_name as discovery_city_name
FROM dutchie_discovery_locations dl
LEFT JOIN dispensaries d ON dl.dispensary_id = d.id
LEFT JOIN dutchie_discovery_cities dc ON dl.discovery_city_id = dc.id
${whereClause}
ORDER BY dl.first_seen_at DESC
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`,
params
);
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM dutchie_discovery_locations dl ${whereClause}`,
params.slice(0, -2)
);
const locations = rows.map((row: any) => ({
...mapLocationRowToLocation(row),
dispensaryName: row.dispensary_name,
discoveryCityName: row.discovery_city_name,
}));
res.json({
locations,
total: parseInt(countRows[0]?.total || '0', 10),
limit: parseInt(limit as string, 10),
offset: parseInt(offset as string, 10),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/discovery/locations/:id
* Get a single discovery location
*/
router.get('/locations/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(
`
SELECT
dl.*,
d.name as dispensary_name,
d.menu_url as dispensary_menu_url,
dc.city_name as discovery_city_name
FROM dutchie_discovery_locations dl
LEFT JOIN dispensaries d ON dl.dispensary_id = d.id
LEFT JOIN dutchie_discovery_cities dc ON dl.discovery_city_id = dc.id
WHERE dl.id = $1
`,
[parseInt(id, 10)]
);
if (rows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
res.json({
...mapLocationRowToLocation(rows[0]),
dispensaryName: rows[0].dispensary_name,
dispensaryMenuUrl: rows[0].dispensary_menu_url,
discoveryCityName: rows[0].discovery_city_name,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/discovery/locations/pending
* Get locations awaiting verification
*/
router.get('/locations/pending', async (req: Request, res: Response) => {
try {
const { stateCode, countryCode, limit = '100' } = req.query;
let whereClause = `WHERE status = 'discovered' AND active = TRUE`;
const params: any[] = [];
let paramIndex = 1;
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
params.push(stateCode);
paramIndex++;
}
if (countryCode) {
whereClause += ` AND country_code = $${paramIndex}`;
params.push(countryCode);
paramIndex++;
}
params.push(parseInt(limit as string, 10));
const { rows } = await pool.query(
`
SELECT * FROM dutchie_discovery_locations
${whereClause}
ORDER BY state_code, city, name
LIMIT $${paramIndex}
`,
params
);
res.json({
locations: rows.map(mapLocationRowToLocation),
total: rows.length,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// DISCOVERY CITIES
// ============================================================
/**
* GET /api/discovery/cities
* List discovery cities
*/
router.get('/cities', async (req: Request, res: Response) => {
try {
const {
stateCode,
countryCode,
crawlEnabled,
platform = 'dutchie',
limit = '100',
offset = '0',
} = req.query;
let whereClause = 'WHERE platform = $1';
const params: any[] = [platform];
let paramIndex = 2;
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
params.push(stateCode);
paramIndex++;
}
if (countryCode) {
whereClause += ` AND country_code = $${paramIndex}`;
params.push(countryCode);
paramIndex++;
}
if (crawlEnabled === 'true') {
whereClause += ' AND crawl_enabled = TRUE';
} else if (crawlEnabled === 'false') {
whereClause += ' AND crawl_enabled = FALSE';
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
const { rows } = await pool.query(
`
SELECT
dc.*,
(SELECT COUNT(*) FROM dutchie_discovery_locations dl WHERE dl.discovery_city_id = dc.id) as actual_location_count
FROM dutchie_discovery_cities dc
${whereClause}
ORDER BY dc.country_code, dc.state_code, dc.city_name
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`,
params
);
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM dutchie_discovery_cities dc ${whereClause}`,
params.slice(0, -2)
);
const cities = rows.map((row: any) => ({
...mapCityRowToCity(row),
actualLocationCount: parseInt(row.actual_location_count || '0', 10),
}));
res.json({
cities,
total: parseInt(countRows[0]?.total || '0', 10),
limit: parseInt(limit as string, 10),
offset: parseInt(offset as string, 10),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// STATISTICS
// ============================================================
/**
* GET /api/discovery/stats
* Get discovery statistics
*/
router.get('/stats', async (_req: Request, res: Response) => {
try {
const stats = await getDiscoveryStats(pool);
res.json(stats);
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// VERIFICATION ACTIONS
// ============================================================
/**
* POST /api/discovery/locations/:id/verify
* Verify a discovered location and create a new canonical dispensary
*/
router.post('/locations/:id/verify', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { verifiedBy = 'admin' } = req.body;
// Get the discovery location
const { rows: locRows } = await pool.query(
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
[parseInt(id, 10)]
);
if (locRows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
const location = locRows[0];
if (location.status !== 'discovered') {
return res.status(400).json({
error: `Location already has status: ${location.status}`,
});
}
// Create the canonical dispensary
const { rows: dispRows } = await pool.query(
`
INSERT INTO dispensaries (
name,
slug,
address,
city,
state,
zip,
latitude,
longitude,
timezone,
menu_type,
menu_url,
platform_dispensary_id,
active,
created_at,
updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, TRUE, NOW(), NOW()
)
RETURNING id
`,
[
location.name,
location.platform_slug,
location.address_line1,
location.city,
location.state_code,
location.postal_code,
location.latitude,
location.longitude,
location.timezone,
location.platform,
location.platform_menu_url,
location.platform_location_id,
]
);
const dispensaryId = dispRows[0].id;
// Update the discovery location
await pool.query(
`
UPDATE dutchie_discovery_locations
SET status = 'verified',
dispensary_id = $1,
verified_at = NOW(),
verified_by = $2,
updated_at = NOW()
WHERE id = $3
`,
[dispensaryId, verifiedBy, id]
);
res.json({
success: true,
action: 'created',
discoveryId: parseInt(id, 10),
dispensaryId,
message: `Created new dispensary (ID: ${dispensaryId})`,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/locations/:id/link
* Link a discovered location to an existing dispensary
*/
router.post('/locations/:id/link', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { dispensaryId, verifiedBy = 'admin' } = req.body;
if (!dispensaryId) {
return res.status(400).json({ error: 'dispensaryId is required' });
}
// Verify dispensary exists
const { rows: dispRows } = await pool.query(
`SELECT id, name FROM dispensaries WHERE id = $1`,
[dispensaryId]
);
if (dispRows.length === 0) {
return res.status(404).json({ error: 'Dispensary not found' });
}
// Get the discovery location
const { rows: locRows } = await pool.query(
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
[parseInt(id, 10)]
);
if (locRows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
const location = locRows[0];
if (location.status !== 'discovered') {
return res.status(400).json({
error: `Location already has status: ${location.status}`,
});
}
// Update dispensary with platform info if missing
await pool.query(
`
UPDATE dispensaries
SET platform_dispensary_id = COALESCE(platform_dispensary_id, $1),
menu_url = COALESCE(menu_url, $2),
menu_type = COALESCE(menu_type, $3),
updated_at = NOW()
WHERE id = $4
`,
[
location.platform_location_id,
location.platform_menu_url,
location.platform,
dispensaryId,
]
);
// Update the discovery location
await pool.query(
`
UPDATE dutchie_discovery_locations
SET status = 'merged',
dispensary_id = $1,
verified_at = NOW(),
verified_by = $2,
updated_at = NOW()
WHERE id = $3
`,
[dispensaryId, verifiedBy, id]
);
res.json({
success: true,
action: 'linked',
discoveryId: parseInt(id, 10),
dispensaryId,
dispensaryName: dispRows[0].name,
message: `Linked to existing dispensary: ${dispRows[0].name}`,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/locations/:id/reject
* Reject a discovered location
*/
router.post('/locations/:id/reject', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { reason, verifiedBy = 'admin' } = req.body;
const { rows } = await pool.query(
`SELECT status FROM dutchie_discovery_locations WHERE id = $1`,
[parseInt(id, 10)]
);
if (rows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
if (rows[0].status !== 'discovered') {
return res.status(400).json({
error: `Location already has status: ${rows[0].status}`,
});
}
await pool.query(
`
UPDATE dutchie_discovery_locations
SET status = 'rejected',
verified_at = NOW(),
verified_by = $1,
notes = $2,
updated_at = NOW()
WHERE id = $3
`,
[verifiedBy, reason || 'Rejected by admin', id]
);
res.json({
success: true,
action: 'rejected',
discoveryId: parseInt(id, 10),
message: 'Location rejected',
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/locations/:id/unreject
* Restore a rejected location back to discovered status
*/
router.post('/locations/:id/unreject', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(
`SELECT status FROM dutchie_discovery_locations WHERE id = $1`,
[parseInt(id, 10)]
);
if (rows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
if (rows[0].status !== 'rejected') {
return res.status(400).json({
error: `Location is not rejected. Current status: ${rows[0].status}`,
});
}
await pool.query(
`
UPDATE dutchie_discovery_locations
SET status = 'discovered',
verified_at = NULL,
verified_by = NULL,
updated_at = NOW()
WHERE id = $1
`,
[id]
);
res.json({
success: true,
action: 'unrejected',
discoveryId: parseInt(id, 10),
message: 'Location restored to discovered status',
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// DISCOVERY ADMIN ACTIONS
// ============================================================
/**
* POST /api/discovery/admin/discover-state
* Run discovery for an entire state
*/
router.post('/admin/discover-state', async (req: Request, res: Response) => {
try {
const { stateCode, dryRun = false, cityLimit = 100 } = req.body;
if (!stateCode) {
return res.status(400).json({ error: 'stateCode is required' });
}
console.log(`[Discovery API] Starting state discovery for ${stateCode}`);
const result = await discoverState(pool, stateCode, {
dryRun,
cityLimit,
verbose: true,
});
res.json({
success: true,
stateCode,
result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/discover-city
* Run discovery for a single city
*/
router.post('/admin/discover-city', async (req: Request, res: Response) => {
try {
const { citySlug, stateCode, countryCode = 'US', dryRun = false } = req.body;
if (!citySlug) {
return res.status(400).json({ error: 'citySlug is required' });
}
console.log(`[Discovery API] Starting city discovery for ${citySlug}`);
const result = await discoverCity(pool, citySlug, {
stateCode,
countryCode,
dryRun,
verbose: true,
});
if (!result) {
return res.status(404).json({ error: `City not found: ${citySlug}` });
}
res.json({
success: true,
citySlug,
result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/run-full
* Run full discovery pipeline
*/
router.post('/admin/run-full', async (req: Request, res: Response) => {
try {
const {
stateCode,
countryCode = 'US',
cityLimit = 50,
skipCityDiscovery = false,
onlyStale = true,
staleDays = 7,
dryRun = false,
} = req.body;
console.log(`[Discovery API] Starting full discovery`);
const result = await runFullDiscovery(pool, {
stateCode,
countryCode,
cityLimit,
skipCityDiscovery,
onlyStale,
staleDays,
dryRun,
verbose: true,
});
res.json({
success: true,
result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/seed-cities
* Seed known cities for a state
*/
router.post('/admin/seed-cities', async (req: Request, res: Response) => {
try {
const { stateCode } = req.body;
if (!stateCode) {
return res.status(400).json({ error: 'stateCode is required' });
}
// Dynamically fetch cities from Dutchie for any state
const cityNames = await getCitiesForState(stateCode as string);
if (cityNames.length === 0) {
return res.status(400).json({
error: `No cities found for state: ${stateCode}`,
});
}
// Convert to seed format
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode as string,
}));
const result = await seedKnownCities(pool, cities);
res.json({
success: true,
stateCode,
...result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/discovery/admin/match-candidates/:id
* Find potential dispensary matches for a discovery location
*/
router.get('/admin/match-candidates/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
// Get the discovery location
const { rows: locRows } = await pool.query(
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
[parseInt(id, 10)]
);
if (locRows.length === 0) {
return res.status(404).json({ error: 'Location not found' });
}
const location = locRows[0];
// Find potential matches by name similarity and location
const { rows: candidates } = await pool.query(
`
SELECT
d.id,
d.name,
d.city,
d.state,
d.address,
d.menu_type,
d.platform_dispensary_id,
d.menu_url,
d.latitude,
d.longitude,
CASE
WHEN d.name ILIKE $1 THEN 'exact_name'
WHEN d.name ILIKE $2 THEN 'partial_name'
WHEN d.city ILIKE $3 AND d.state = $4 THEN 'same_city'
ELSE 'location_match'
END as match_type,
-- Distance in miles if coordinates available
CASE
WHEN d.latitude IS NOT NULL AND d.longitude IS NOT NULL
AND $5::float IS NOT NULL AND $6::float IS NOT NULL
THEN (3959 * acos(
cos(radians($5::float)) * cos(radians(d.latitude)) *
cos(radians(d.longitude) - radians($6::float)) +
sin(radians($5::float)) * sin(radians(d.latitude))
))
ELSE NULL
END as distance_miles
FROM dispensaries d
WHERE d.state = $4
AND (
d.name ILIKE $1
OR d.name ILIKE $2
OR d.city ILIKE $3
OR (
d.latitude IS NOT NULL
AND d.longitude IS NOT NULL
AND $5::float IS NOT NULL
AND $6::float IS NOT NULL
AND (3959 * acos(
cos(radians($5::float)) * cos(radians(d.latitude)) *
cos(radians(d.longitude) - radians($6::float)) +
sin(radians($5::float)) * sin(radians(d.latitude))
)) < 5
)
)
ORDER BY
CASE
WHEN d.name ILIKE $1 THEN 1
WHEN d.name ILIKE $2 THEN 2
ELSE 3
END,
distance_miles NULLS LAST
LIMIT 10
`,
[
location.name,
`%${location.name.split(' ')[0]}%`,
location.city,
location.state_code,
location.latitude,
location.longitude,
]
);
res.json({
location: mapLocationRowToLocation(location),
candidates: candidates.map((c: any) => ({
id: c.id,
name: c.name,
city: c.city,
state: c.state,
address: c.address,
menuType: c.menu_type,
platformDispensaryId: c.platform_dispensary_id,
menuUrl: c.menu_url,
matchType: c.match_type,
distanceMiles: c.distance_miles ? Math.round(c.distance_miles * 10) / 10 : null,
})),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// PROMOTION ENDPOINTS
// ============================================================
/**
* GET /api/discovery/admin/validate
* Validate discovered locations before promotion
*/
router.get('/admin/validate', async (req: Request, res: Response) => {
try {
const { stateCode } = req.query;
const summary = await validateDiscoveredLocations(stateCode as string | undefined);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote
* Promote all valid discovered locations to dispensaries (idempotent)
*
* Query params:
* - stateCode: Filter by state (e.g., 'CA', 'AZ')
* - dryRun: If true, only validate without making changes
*/
router.post('/admin/promote', async (req: Request, res: Response) => {
try {
const { stateCode, dryRun = false } = req.body;
console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
const summary = await promoteDiscoveredLocations(stateCode, dryRun);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote/:id
* Promote a single discovery location by ID
*/
router.post('/admin/promote/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
console.log(`[Discovery API] Promoting single location ${id}`);
const result = await promoteSingleLocation(parseInt(id, 10));
res.json({
success: true,
...result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// PROMOTION LOG
// ============================================================
/**
* GET /api/discovery/promotion-log
* Get promotion audit log
*/
router.get('/promotion-log', async (req: Request, res: Response) => {
try {
const { state, dispensary_id, limit = '100' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (state) {
whereClause += ` AND pl.state_code = $${paramIndex}`;
params.push(state);
paramIndex++;
}
if (dispensary_id) {
whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
params.push(parseInt(dispensary_id as string, 10));
paramIndex++;
}
params.push(parseInt(limit as string, 10));
const { rows } = await pool.query(`
SELECT
pl.*,
dl.name as discovery_name,
d.name as dispensary_name
FROM dutchie_promotion_log pl
LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
${whereClause}
ORDER BY pl.created_at DESC
LIMIT $${paramIndex}
`, params);
res.json({
logs: rows.map((r: any) => ({
id: r.id,
discoveryId: r.discovery_id,
dispensaryId: r.dispensary_id,
action: r.action,
stateCode: r.state_code,
storeName: r.store_name,
validationErrors: r.validation_errors,
fieldChanges: r.field_changes,
triggeredBy: r.triggered_by,
createdAt: r.created_at,
discoveryName: r.discovery_name,
dispensaryName: r.dispensary_name,
})),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
return router;
}
export default createDiscoveryRoutes;