Add CLAUDE guidelines for consolidated pipeline

This commit is contained in:
Kelly
2025-12-02 13:28:23 -07:00
parent 9219d8a77a
commit 04b5c3bd09
32 changed files with 4485 additions and 169 deletions

View File

@@ -456,8 +456,12 @@ class ApiClient {
}
// Dispensary Schedule (new dispensary-centric API)
async getDispensarySchedules() {
return this.request<{ dispensaries: any[] }>('/api/schedule/dispensaries');
async getDispensarySchedules(filters?: { state?: string; search?: string }) {
const params = new URLSearchParams();
if (filters?.state) params.append('state', filters.state);
if (filters?.search) params.append('search', filters.search);
const queryString = params.toString();
return this.request<{ dispensaries: any[] }>(`/api/schedule/dispensaries${queryString ? `?${queryString}` : ''}`);
}
async getDispensarySchedule(dispensaryId: number) {
@@ -482,6 +486,63 @@ class ApiClient {
});
}
async resolvePlatformId(dispensaryId: number) {
return this.request<{
success: boolean;
platform_dispensary_id?: string;
slug_resolved?: string;
message: string;
already_resolved?: boolean;
error?: string;
}>(`/api/schedule/dispensaries/${dispensaryId}/resolve-platform-id`, {
method: 'POST',
});
}
async detectMenuType(dispensaryId: number) {
return this.request<{
success: boolean;
menu_type: string;
url_checked: string;
message: string;
}>(`/api/schedule/dispensaries/${dispensaryId}/detect-menu-type`, {
method: 'POST',
});
}
async refreshDetection(dispensaryId: number) {
return this.request<{
success: boolean;
menu_type: string;
platform_dispensary_id: string | null;
url_checked: string;
can_crawl: boolean;
}>(`/api/schedule/dispensaries/${dispensaryId}/refresh-detection`, {
method: 'POST',
});
}
async toggleDispensarySchedule(dispensaryId: number, isActive: boolean) {
return this.request<{
success: boolean;
schedule: any;
message: string;
}>(`/api/schedule/dispensaries/${dispensaryId}/toggle-active`, {
method: 'PUT',
body: JSON.stringify({ is_active: isActive }),
});
}
async deleteDispensarySchedule(dispensaryId: number) {
return this.request<{
success: boolean;
deleted: boolean;
message: string;
}>(`/api/schedule/dispensaries/${dispensaryId}/schedule`, {
method: 'DELETE',
});
}
async getCrawlJobs(limit?: number) {
const params = limit ? `?limit=${limit}` : '';
return this.request<{ jobs: any[] }>(`/api/schedule/jobs${params}`);

View File

@@ -18,21 +18,27 @@ interface DispensarySchedule {
dispensary_name: string;
city: string | null;
state: string | null;
dispensary_slug: string | null;
slug: string | null;
website: string | null;
menu_url: string | null;
menu_type: string | null;
platform_dispensary_id: string | null;
product_provider: string | null;
provider_type: string | null;
product_confidence: number | null;
product_crawler_mode: string | null;
last_product_scan_at: string | null;
is_active: boolean;
schedule_active: boolean;
interval_minutes: number;
interval_minutes: number | null;
priority: number;
last_run_at: string | null;
next_run_at: string | null;
schedule_last_status: string | null;
last_status: string | null;
last_summary: string | null;
schedule_last_error: string | null;
last_error: string | null;
consecutive_failures: number | null;
total_runs: number | null;
@@ -42,6 +48,9 @@ interface DispensarySchedule {
latest_job_status: string | null;
latest_job_started: string | null;
latest_products_found: number | null;
// Computed from view
can_crawl: boolean;
schedule_status_reason: string | null;
}
interface CrawlJob {
@@ -69,6 +78,21 @@ export function ScraperSchedule() {
const [autoRefresh, setAutoRefresh] = useState(true);
const [activeTab, setActiveTab] = useState<'dispensaries' | 'jobs' | 'global'>('dispensaries');
const [triggeringDispensary, setTriggeringDispensary] = useState<number | null>(null);
const [resolvingId, setResolvingId] = useState<number | null>(null);
const [refreshingDetection, setRefreshingDetection] = useState<number | null>(null);
const [togglingSchedule, setTogglingSchedule] = useState<number | null>(null);
const [filterDutchieOnly, setFilterDutchieOnly] = useState(false);
const [stateFilter, setStateFilter] = useState<'all' | 'AZ'>('all');
const [searchTerm, setSearchTerm] = useState('');
const [searchInput, setSearchInput] = useState(''); // For debouncing
// Debounce search input
useEffect(() => {
const timer = setTimeout(() => {
setSearchTerm(searchInput);
}, 300);
return () => clearTimeout(timer);
}, [searchInput]);
useEffect(() => {
loadData();
@@ -77,13 +101,22 @@ export function ScraperSchedule() {
const interval = setInterval(loadData, 5000);
return () => clearInterval(interval);
}
}, [autoRefresh]);
}, [autoRefresh, stateFilter, searchTerm]);
const loadData = async () => {
try {
// Build filters for dispensary schedules
const filters: { state?: string; search?: string } = {};
if (stateFilter === 'AZ') {
filters.state = 'AZ';
}
if (searchTerm.trim()) {
filters.search = searchTerm.trim();
}
const [globalData, dispensaryData, jobsData] = await Promise.all([
api.getGlobalSchedule(),
api.getDispensarySchedules(),
api.getDispensarySchedules(Object.keys(filters).length > 0 ? filters : undefined),
api.getDispensaryCrawlJobs(100)
]);
@@ -129,6 +162,62 @@ export function ScraperSchedule() {
}
};
const handleResolvePlatformId = async (dispensaryId: number) => {
setResolvingId(dispensaryId);
try {
const result = await api.resolvePlatformId(dispensaryId);
if (result.success) {
alert(result.message);
} else {
alert(`Failed: ${result.error || result.message}`);
}
await loadData();
} catch (error: any) {
console.error('Failed to resolve platform ID:', error);
alert(`Error: ${error.message}`);
} finally {
setResolvingId(null);
}
};
const handleRefreshDetection = async (dispensaryId: number) => {
setRefreshingDetection(dispensaryId);
try {
const result = await api.refreshDetection(dispensaryId);
alert(`Detected: ${result.menu_type}${result.platform_dispensary_id ? `, Platform ID: ${result.platform_dispensary_id}` : ''}`);
await loadData();
} catch (error: any) {
console.error('Failed to refresh detection:', error);
alert(`Error: ${error.message}`);
} finally {
setRefreshingDetection(null);
}
};
const handleToggleSchedule = async (dispensaryId: number, currentActive: boolean) => {
setTogglingSchedule(dispensaryId);
try {
await api.toggleDispensarySchedule(dispensaryId, !currentActive);
await loadData();
} catch (error: any) {
console.error('Failed to toggle schedule:', error);
alert(`Error: ${error.message}`);
} finally {
setTogglingSchedule(null);
}
};
const handleDeleteSchedule = async (dispensaryId: number) => {
if (!confirm('Are you sure you want to delete this schedule?')) return;
try {
await api.deleteDispensarySchedule(dispensaryId);
await loadData();
} catch (error: any) {
console.error('Failed to delete schedule:', error);
alert(`Error: ${error.message}`);
}
};
const handleUpdateGlobalSchedule = async (type: string, data: any) => {
try {
await api.updateGlobalSchedule(type, data);
@@ -373,32 +462,127 @@ export function ScraperSchedule() {
)}
{activeTab === 'dispensaries' && (
<div style={{
background: 'white',
borderRadius: '8px',
boxShadow: '0 2px 8px rgba(0,0,0,0.1)',
overflow: 'hidden'
}}>
<table style={{ width: '100%', borderCollapse: 'collapse' }}>
<div>
{/* Filter Bar */}
<div style={{ marginBottom: '15px', display: 'flex', gap: '20px', alignItems: 'center', flexWrap: 'wrap' }}>
{/* State Filter Toggle */}
<div style={{ display: 'flex', alignItems: 'center', gap: '8px' }}>
<span style={{ fontWeight: '500', color: '#374151' }}>State:</span>
<div style={{ display: 'flex', borderRadius: '6px', overflow: 'hidden', border: '1px solid #d1d5db' }}>
<button
onClick={() => setStateFilter('all')}
style={{
padding: '6px 14px',
background: stateFilter === 'all' ? '#2563eb' : 'white',
color: stateFilter === 'all' ? 'white' : '#374151',
border: 'none',
cursor: 'pointer',
fontSize: '14px',
fontWeight: '500'
}}
>
All
</button>
<button
onClick={() => setStateFilter('AZ')}
style={{
padding: '6px 14px',
background: stateFilter === 'AZ' ? '#2563eb' : 'white',
color: stateFilter === 'AZ' ? 'white' : '#374151',
border: 'none',
borderLeft: '1px solid #d1d5db',
cursor: 'pointer',
fontSize: '14px',
fontWeight: '500'
}}
>
AZ Only
</button>
</div>
</div>
{/* Search Box */}
<div style={{ display: 'flex', alignItems: 'center', gap: '8px' }}>
<span style={{ fontWeight: '500', color: '#374151' }}>Search:</span>
<input
type="text"
placeholder="Store name or slug..."
value={searchInput}
onChange={(e) => setSearchInput(e.target.value)}
style={{
padding: '6px 12px',
borderRadius: '6px',
border: '1px solid #d1d5db',
fontSize: '14px',
width: '200px'
}}
/>
{searchInput && (
<button
onClick={() => { setSearchInput(''); setSearchTerm(''); }}
style={{
padding: '4px 8px',
background: '#f3f4f6',
border: '1px solid #d1d5db',
borderRadius: '4px',
cursor: 'pointer',
fontSize: '12px'
}}
>
Clear
</button>
)}
</div>
{/* Dutchie Only Checkbox */}
<label style={{ display: 'flex', alignItems: 'center', gap: '8px', cursor: 'pointer' }}>
<input
type="checkbox"
checked={filterDutchieOnly}
onChange={(e) => setFilterDutchieOnly(e.target.checked)}
style={{ width: '16px', height: '16px', cursor: 'pointer' }}
/>
<span>Dutchie only</span>
</label>
{/* Results Count */}
<span style={{ color: '#666', fontSize: '14px', marginLeft: 'auto' }}>
Showing {(filterDutchieOnly
? dispensarySchedules.filter(d => d.menu_type === 'dutchie')
: dispensarySchedules
).length} dispensaries
</span>
</div>
<div style={{
background: 'white',
borderRadius: '8px',
boxShadow: '0 2px 8px rgba(0,0,0,0.1)',
overflow: 'auto'
}}>
<table style={{ width: '100%', borderCollapse: 'collapse', minWidth: '1200px' }}>
<thead>
<tr style={{ background: '#f8f8f8', borderBottom: '2px solid #eee' }}>
<th style={{ padding: '15px', textAlign: 'left', fontWeight: '600' }}>Dispensary</th>
<th style={{ padding: '15px', textAlign: 'center', fontWeight: '600' }}>Provider</th>
<th style={{ padding: '15px', textAlign: 'center', fontWeight: '600' }}>Schedule</th>
<th style={{ padding: '15px', textAlign: 'left', fontWeight: '600' }}>Last Run</th>
<th style={{ padding: '15px', textAlign: 'left', fontWeight: '600' }}>Next Run</th>
<th style={{ padding: '15px', textAlign: 'left', fontWeight: '600' }}>Last Result</th>
<th style={{ padding: '15px', textAlign: 'center', fontWeight: '600' }}>Actions</th>
<th style={{ padding: '12px', textAlign: 'left', fontWeight: '600' }}>Dispensary</th>
<th style={{ padding: '12px', textAlign: 'center', fontWeight: '600' }}>Menu Type</th>
<th style={{ padding: '12px', textAlign: 'center', fontWeight: '600' }}>Platform ID</th>
<th style={{ padding: '12px', textAlign: 'center', fontWeight: '600' }}>Status</th>
<th style={{ padding: '12px', textAlign: 'left', fontWeight: '600' }}>Last Run</th>
<th style={{ padding: '12px', textAlign: 'left', fontWeight: '600' }}>Next Run</th>
<th style={{ padding: '12px', textAlign: 'left', fontWeight: '600' }}>Last Result</th>
<th style={{ padding: '12px', textAlign: 'center', fontWeight: '600', minWidth: '220px' }}>Actions</th>
</tr>
</thead>
<tbody>
{dispensarySchedules.map((disp) => (
{(filterDutchieOnly
? dispensarySchedules.filter(d => d.menu_type === 'dutchie')
: dispensarySchedules
).map((disp) => (
<tr key={disp.dispensary_id} style={{ borderBottom: '1px solid #eee' }}>
<td style={{ padding: '15px' }}>
<td style={{ padding: '12px' }}>
<div style={{ display: 'flex', alignItems: 'center', gap: '8px' }}>
{disp.state && disp.city && disp.slug ? (
{disp.state && disp.city && (disp.dispensary_slug || disp.slug) ? (
<Link
to={`/dispensaries/${disp.state}/${disp.city.toLowerCase().replace(/\s+/g, '-')}/${disp.slug}`}
to={`/dispensaries/${disp.state}/${disp.city.toLowerCase().replace(/\s+/g, '-')}/${disp.dispensary_slug || disp.slug}`}
style={{
fontWeight: '600',
color: '#2563eb',
@@ -411,66 +595,86 @@ export function ScraperSchedule() {
<span style={{ fontWeight: '600' }}>{disp.dispensary_name}</span>
)}
</div>
<div style={{ fontSize: '13px', color: '#666' }}>
<div style={{ fontSize: '12px', color: '#666' }}>
{disp.city ? `${disp.city}, ${disp.state}` : disp.state}
</div>
</td>
<td style={{ padding: '15px', textAlign: 'center' }}>
{(disp.product_provider || disp.provider_type) && disp.product_provider !== 'unknown' && disp.provider_type !== 'unknown' ? (
<div>
<span style={{
padding: '4px 10px',
borderRadius: '12px',
fontSize: '12px',
fontWeight: '600',
background: disp.product_crawler_mode === 'production' ? '#d1fae5' : '#fef3c7',
color: disp.product_crawler_mode === 'production' ? '#065f46' : '#92400e'
}}>
{disp.product_provider || disp.provider_type}
</span>
{disp.product_crawler_mode !== 'production' && (
<div style={{ fontSize: '10px', color: '#92400e', marginTop: '2px' }}>sandbox</div>
)}
</div>
) : disp.menu_url ? (
{/* Menu Type Column */}
<td style={{ padding: '12px', textAlign: 'center' }}>
{disp.menu_type ? (
<span style={{
padding: '4px 10px',
borderRadius: '12px',
fontSize: '12px',
fontSize: '11px',
fontWeight: '600',
background: '#dbeafe',
color: '#1e40af'
background: disp.menu_type === 'dutchie' ? '#d1fae5' : '#e0e7ff',
color: disp.menu_type === 'dutchie' ? '#065f46' : '#3730a3'
}}>
Pending
{disp.menu_type}
</span>
) : (
<span style={{
padding: '4px 10px',
borderRadius: '12px',
fontSize: '12px',
fontSize: '11px',
fontWeight: '600',
background: '#f3f4f6',
color: '#666'
}}>
-
unknown
</span>
)}
</td>
<td style={{ padding: '15px', textAlign: 'center' }}>
{/* Platform ID Column */}
<td style={{ padding: '12px', textAlign: 'center' }}>
{disp.platform_dispensary_id ? (
<span style={{
padding: '4px 8px',
borderRadius: '4px',
fontSize: '10px',
fontFamily: 'monospace',
background: '#d1fae5',
color: '#065f46'
}} title={disp.platform_dispensary_id}>
{disp.platform_dispensary_id.length > 12
? `${disp.platform_dispensary_id.slice(0, 6)}...${disp.platform_dispensary_id.slice(-4)}`
: disp.platform_dispensary_id}
</span>
) : (
<span style={{
padding: '4px 8px',
borderRadius: '4px',
fontSize: '10px',
background: '#fee2e2',
color: '#991b1b'
}}>
missing
</span>
)}
</td>
{/* Status Column - Shows can_crawl and reason */}
<td style={{ padding: '12px', textAlign: 'center' }}>
<div style={{ display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '4px' }}>
<span style={{
padding: '4px 10px',
borderRadius: '12px',
fontSize: '12px',
fontSize: '11px',
fontWeight: '600',
background: disp.schedule_active ? '#d1fae5' : '#fee2e2',
color: disp.schedule_active ? '#065f46' : '#991b1b'
background: disp.can_crawl ? '#d1fae5' : (disp.is_active !== false ? '#fef3c7' : '#fee2e2'),
color: disp.can_crawl ? '#065f46' : (disp.is_active !== false ? '#92400e' : '#991b1b')
}}>
{disp.schedule_active ? 'Active' : 'Disabled'}
</span>
<span style={{ fontSize: '12px', color: '#666' }}>
Every {Math.round(disp.interval_minutes / 60)}h
{disp.can_crawl ? 'Ready' : (disp.is_active !== false ? 'Not Ready' : 'Disabled')}
</span>
{disp.schedule_status_reason && disp.schedule_status_reason !== 'ready' && (
<span style={{ fontSize: '10px', color: '#666', maxWidth: '100px', textAlign: 'center' }}>
{disp.schedule_status_reason}
</span>
)}
{disp.interval_minutes && (
<span style={{ fontSize: '10px', color: '#999' }}>
Every {Math.round(disp.interval_minutes / 60)}h
</span>
)}
</div>
</td>
<td style={{ padding: '15px' }}>
@@ -530,28 +734,91 @@ export function ScraperSchedule() {
<span style={{ color: '#999', fontSize: '13px' }}>No runs yet</span>
)}
</td>
<td style={{ padding: '15px', textAlign: 'center' }}>
<button
onClick={() => handleTriggerCrawl(disp.dispensary_id)}
disabled={triggeringDispensary === disp.dispensary_id}
style={{
padding: '6px 12px',
background: triggeringDispensary === disp.dispensary_id ? '#94a3b8' : '#2563eb',
color: 'white',
border: 'none',
borderRadius: '4px',
cursor: triggeringDispensary === disp.dispensary_id ? 'wait' : 'pointer',
fontSize: '13px'
}}
>
{triggeringDispensary === disp.dispensary_id ? 'Starting...' : 'Run Now'}
</button>
<td style={{ padding: '12px', textAlign: 'center' }}>
<div style={{ display: 'flex', gap: '6px', justifyContent: 'center', flexWrap: 'wrap' }}>
{/* Refresh Detection - always available */}
<button
onClick={() => handleRefreshDetection(disp.dispensary_id)}
disabled={refreshingDetection === disp.dispensary_id}
style={{
padding: '4px 8px',
background: refreshingDetection === disp.dispensary_id ? '#94a3b8' : '#f3f4f6',
color: '#374151',
border: '1px solid #d1d5db',
borderRadius: '4px',
cursor: refreshingDetection === disp.dispensary_id ? 'wait' : 'pointer',
fontSize: '11px'
}}
title="Re-detect menu type and resolve platform ID"
>
{refreshingDetection === disp.dispensary_id ? '...' : 'Refresh'}
</button>
{/* Resolve ID - only if dutchie and missing platform ID */}
{disp.menu_type === 'dutchie' && !disp.platform_dispensary_id && (
<button
onClick={() => handleResolvePlatformId(disp.dispensary_id)}
disabled={resolvingId === disp.dispensary_id}
style={{
padding: '4px 8px',
background: resolvingId === disp.dispensary_id ? '#94a3b8' : '#fef3c7',
color: '#92400e',
border: '1px solid #fcd34d',
borderRadius: '4px',
cursor: resolvingId === disp.dispensary_id ? 'wait' : 'pointer',
fontSize: '11px'
}}
title="Resolve platform dispensary ID via GraphQL"
>
{resolvingId === disp.dispensary_id ? '...' : 'Resolve ID'}
</button>
)}
{/* Run Now - only if can_crawl */}
<button
onClick={() => handleTriggerCrawl(disp.dispensary_id)}
disabled={triggeringDispensary === disp.dispensary_id || !disp.can_crawl}
style={{
padding: '4px 8px',
background: triggeringDispensary === disp.dispensary_id ? '#94a3b8' :
!disp.can_crawl ? '#e5e7eb' : '#2563eb',
color: !disp.can_crawl ? '#9ca3af' : 'white',
border: 'none',
borderRadius: '4px',
cursor: triggeringDispensary === disp.dispensary_id || !disp.can_crawl ? 'not-allowed' : 'pointer',
fontSize: '11px'
}}
title={disp.can_crawl ? 'Trigger immediate crawl' : `Cannot crawl: ${disp.schedule_status_reason}`}
>
{triggeringDispensary === disp.dispensary_id ? '...' : 'Run'}
</button>
{/* Enable/Disable Schedule Toggle */}
<button
onClick={() => handleToggleSchedule(disp.dispensary_id, disp.is_active)}
disabled={togglingSchedule === disp.dispensary_id}
style={{
padding: '4px 8px',
background: togglingSchedule === disp.dispensary_id ? '#94a3b8' :
disp.is_active ? '#fee2e2' : '#d1fae5',
color: disp.is_active ? '#991b1b' : '#065f46',
border: 'none',
borderRadius: '4px',
cursor: togglingSchedule === disp.dispensary_id ? 'wait' : 'pointer',
fontSize: '11px'
}}
title={disp.is_active ? 'Disable scheduled crawling' : 'Enable scheduled crawling'}
>
{togglingSchedule === disp.dispensary_id ? '...' : (disp.is_active ? 'Disable' : 'Enable')}
</button>
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
{activeTab === 'jobs' && (