feat: Platform isolation, Evomi geo-targeting, proxy management
Platform isolation:
- Rename handlers to {task}-{platform}.ts convention
- Deprecate -curl variants (now _deprecated-*)
- Platform-based routing in task-worker.ts
- Add Jane platform handlers and client
Evomi geo-targeting:
- Add dynamic proxy URL builder with state/city targeting
- Session stickiness per worker per state (30 min)
- Fallback to static proxy table when API unavailable
- Add proxy tracking columns to worker_tasks
Proxy management:
- New /proxies admin page for visibility
- Track proxy_ip, proxy_geo, proxy_source per task
- Show active sessions and task history
Validation filtering:
- Filter by validated stores (platform_dispensary_id + menu_url)
- Mark incomplete stores as deprecated
- Update all dashboard/stats queries
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
23
backend/migrations/107_proxy_tracking.sql
Normal file
23
backend/migrations/107_proxy_tracking.sql
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
-- Migration: 107_proxy_tracking.sql
|
||||||
|
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Add proxy tracking columns to worker_tasks
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
|
||||||
|
|
||||||
|
-- Index for proxy analysis
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
|
||||||
|
ON worker_tasks(proxy_ip)
|
||||||
|
WHERE proxy_ip IS NOT NULL;
|
||||||
@@ -388,6 +388,9 @@ async function promoteLocation(
|
|||||||
country = EXCLUDED.country,
|
country = EXCLUDED.country,
|
||||||
status = EXCLUDED.status,
|
status = EXCLUDED.status,
|
||||||
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
|
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
|
||||||
|
dutchie_verified = TRUE,
|
||||||
|
dutchie_verified_at = COALESCE(dispensaries.dutchie_verified_at, CURRENT_TIMESTAMP),
|
||||||
|
crawl_enabled = COALESCE(dispensaries.crawl_enabled, TRUE),
|
||||||
last_modified_at = EXCLUDED.last_modified_at,
|
last_modified_at = EXCLUDED.last_modified_at,
|
||||||
last_modified_by_task = EXCLUDED.last_modified_by_task,
|
last_modified_by_task = EXCLUDED.last_modified_by_task,
|
||||||
last_modified_task_id = EXCLUDED.last_modified_task_id,
|
last_modified_task_id = EXCLUDED.last_modified_task_id,
|
||||||
|
|||||||
250
backend/src/hydration/normalizers/jane.ts
Normal file
250
backend/src/hydration/normalizers/jane.ts
Normal file
@@ -0,0 +1,250 @@
|
|||||||
|
/**
|
||||||
|
* iHeartJane Platform Normalizer
|
||||||
|
*
|
||||||
|
* Normalizes raw Jane/Algolia product responses to canonical format.
|
||||||
|
*
|
||||||
|
* Jane uses Algolia for product search. Key differences from Dutchie:
|
||||||
|
* - Product ID is numeric (not MongoDB ObjectId)
|
||||||
|
* - Prices are per-weight (price_gram, price_eighth_ounce, etc.)
|
||||||
|
* - Category = strain type (hybrid, indica, sativa)
|
||||||
|
* - Kind = product type (vape, flower, edible, etc.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { BaseNormalizer } from './base';
|
||||||
|
import {
|
||||||
|
NormalizedProduct,
|
||||||
|
NormalizedPricing,
|
||||||
|
NormalizedAvailability,
|
||||||
|
NormalizedBrand,
|
||||||
|
NormalizedCategory,
|
||||||
|
} from '../types';
|
||||||
|
|
||||||
|
export class JaneNormalizer extends BaseNormalizer {
|
||||||
|
readonly platform = 'jane';
|
||||||
|
readonly supportedVersions = [1];
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// EXTRACTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
extractProducts(rawJson: any): any[] {
|
||||||
|
// Algolia response format: { hits: [...] }
|
||||||
|
if (rawJson?.hits && Array.isArray(rawJson.hits)) {
|
||||||
|
return rawJson.hits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Direct array of products
|
||||||
|
if (Array.isArray(rawJson)) {
|
||||||
|
return rawJson;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Products array wrapper
|
||||||
|
if (rawJson?.products && Array.isArray(rawJson.products)) {
|
||||||
|
return rawJson.products;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try data.hits (nested response)
|
||||||
|
if (rawJson?.data?.hits && Array.isArray(rawJson.data.hits)) {
|
||||||
|
return rawJson.data.hits;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn('[JaneNormalizer] Could not extract products from payload');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
validatePayload(rawJson: any): { valid: boolean; errors: string[] } {
|
||||||
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
if (!rawJson) {
|
||||||
|
errors.push('Payload is null or undefined');
|
||||||
|
return { valid: false, errors };
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = this.extractProducts(rawJson);
|
||||||
|
if (products.length === 0) {
|
||||||
|
errors.push('No products found in payload');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for Algolia errors
|
||||||
|
if (rawJson?.message) {
|
||||||
|
errors.push(`Algolia error: ${rawJson.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: errors.length === 0, errors };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// NORMALIZATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
protected normalizeProduct(rawProduct: any, dispensaryId: number): NormalizedProduct | null {
|
||||||
|
const externalId = rawProduct.product_id || rawProduct.objectID;
|
||||||
|
if (!externalId) {
|
||||||
|
console.warn('[JaneNormalizer] Product missing ID, skipping');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const name = rawProduct.name;
|
||||||
|
if (!name) {
|
||||||
|
console.warn(`[JaneNormalizer] Product ${externalId} missing name, skipping`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
externalProductId: String(externalId),
|
||||||
|
dispensaryId,
|
||||||
|
platform: 'jane',
|
||||||
|
platformDispensaryId: '', // Will be set by handler
|
||||||
|
|
||||||
|
// Core fields
|
||||||
|
name,
|
||||||
|
brandName: rawProduct.brand || null,
|
||||||
|
brandId: rawProduct.product_brand_id ? String(rawProduct.product_brand_id) : null,
|
||||||
|
category: rawProduct.kind || null, // Jane's "kind" = product type (vape, flower, etc.)
|
||||||
|
subcategory: rawProduct.kind_subtype || rawProduct.root_subtype || null,
|
||||||
|
type: rawProduct.kind || null,
|
||||||
|
strainType: rawProduct.category || null, // Jane's "category" = strain (hybrid, indica, sativa)
|
||||||
|
|
||||||
|
// Potency
|
||||||
|
thcPercent: rawProduct.percent_thc ?? null,
|
||||||
|
cbdPercent: rawProduct.percent_cbd ?? null,
|
||||||
|
thcContent: rawProduct.percent_thc ?? null,
|
||||||
|
cbdContent: rawProduct.percent_cbd ?? null,
|
||||||
|
|
||||||
|
// Status - Jane products in search are always active
|
||||||
|
status: 'Active',
|
||||||
|
isActive: true,
|
||||||
|
medicalOnly: rawProduct.store_types?.includes('medical') && !rawProduct.store_types?.includes('recreational'),
|
||||||
|
recOnly: rawProduct.store_types?.includes('recreational') && !rawProduct.store_types?.includes('medical'),
|
||||||
|
|
||||||
|
// Images
|
||||||
|
primaryImageUrl: rawProduct.image_urls?.[0] || null,
|
||||||
|
images: (rawProduct.image_urls || []).map((url: string, i: number) => ({
|
||||||
|
url,
|
||||||
|
position: i,
|
||||||
|
})),
|
||||||
|
|
||||||
|
// Raw reference
|
||||||
|
rawProduct,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected normalizePricing(rawProduct: any): NormalizedPricing | null {
|
||||||
|
const externalId = rawProduct.product_id || rawProduct.objectID;
|
||||||
|
if (!externalId) return null;
|
||||||
|
|
||||||
|
// Jane has multiple price fields by weight
|
||||||
|
const prices: number[] = [];
|
||||||
|
const specialPrices: number[] = [];
|
||||||
|
|
||||||
|
// Collect all regular prices
|
||||||
|
if (rawProduct.price_gram) prices.push(rawProduct.price_gram);
|
||||||
|
if (rawProduct.price_each) prices.push(rawProduct.price_each);
|
||||||
|
if (rawProduct.price_half_gram) prices.push(rawProduct.price_half_gram);
|
||||||
|
if (rawProduct.price_eighth_ounce) prices.push(rawProduct.price_eighth_ounce);
|
||||||
|
if (rawProduct.price_quarter_ounce) prices.push(rawProduct.price_quarter_ounce);
|
||||||
|
if (rawProduct.price_half_ounce) prices.push(rawProduct.price_half_ounce);
|
||||||
|
if (rawProduct.price_ounce) prices.push(rawProduct.price_ounce);
|
||||||
|
if (rawProduct.price_two_gram) prices.push(rawProduct.price_two_gram);
|
||||||
|
|
||||||
|
// Collect special/discounted prices
|
||||||
|
if (rawProduct.special_price_gram) specialPrices.push(rawProduct.special_price_gram);
|
||||||
|
if (rawProduct.special_price_each) specialPrices.push(rawProduct.special_price_each);
|
||||||
|
if (rawProduct.discounted_price_gram) specialPrices.push(rawProduct.discounted_price_gram);
|
||||||
|
if (rawProduct.discounted_price_each) specialPrices.push(rawProduct.discounted_price_each);
|
||||||
|
|
||||||
|
// Also check bucket_price and sort_price
|
||||||
|
if (rawProduct.bucket_price && !prices.includes(rawProduct.bucket_price)) {
|
||||||
|
prices.push(rawProduct.bucket_price);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if on special
|
||||||
|
const isOnSpecial = specialPrices.length > 0 || rawProduct.has_brand_discount === true;
|
||||||
|
|
||||||
|
// Calculate discount percent
|
||||||
|
let discountPercent: number | null = null;
|
||||||
|
if (isOnSpecial && prices.length > 0 && specialPrices.length > 0) {
|
||||||
|
const regularMin = Math.min(...prices);
|
||||||
|
const specialMin = Math.min(...specialPrices);
|
||||||
|
if (regularMin > 0 && specialMin < regularMin) {
|
||||||
|
discountPercent = Math.round(((regularMin - specialMin) / regularMin) * 100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get special name from brand_special_prices
|
||||||
|
let specialName: string | null = null;
|
||||||
|
if (rawProduct.brand_special_prices) {
|
||||||
|
const firstSpecial = Object.values(rawProduct.brand_special_prices)[0] as any;
|
||||||
|
if (firstSpecial?.title) {
|
||||||
|
specialName = firstSpecial.title;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
externalProductId: String(externalId),
|
||||||
|
|
||||||
|
// Use minimum price for display
|
||||||
|
priceRec: this.toCents(this.getMin(prices)),
|
||||||
|
priceRecMin: this.toCents(this.getMin(prices)),
|
||||||
|
priceRecMax: this.toCents(this.getMax(prices)),
|
||||||
|
priceRecSpecial: this.toCents(this.getMin(specialPrices)),
|
||||||
|
|
||||||
|
// Jane doesn't distinguish med pricing in Algolia response
|
||||||
|
priceMed: null,
|
||||||
|
priceMedMin: null,
|
||||||
|
priceMedMax: null,
|
||||||
|
priceMedSpecial: null,
|
||||||
|
|
||||||
|
isOnSpecial,
|
||||||
|
specialName,
|
||||||
|
discountPercent,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected normalizeAvailability(rawProduct: any): NormalizedAvailability | null {
|
||||||
|
const externalId = rawProduct.product_id || rawProduct.objectID;
|
||||||
|
if (!externalId) return null;
|
||||||
|
|
||||||
|
// Jane products in Algolia are in-stock (OOS products aren't returned)
|
||||||
|
const availableForPickup = rawProduct.available_for_pickup ?? true;
|
||||||
|
const availableForDelivery = rawProduct.available_for_delivery ?? false;
|
||||||
|
const inStock = availableForPickup || availableForDelivery;
|
||||||
|
|
||||||
|
// Jane doesn't expose quantity in Algolia
|
||||||
|
const quantity = rawProduct.max_cart_quantity || null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
externalProductId: String(externalId),
|
||||||
|
inStock,
|
||||||
|
stockStatus: inStock ? 'in_stock' : 'out_of_stock',
|
||||||
|
quantity,
|
||||||
|
quantityAvailable: quantity,
|
||||||
|
isBelowThreshold: false, // Jane doesn't expose this
|
||||||
|
optionsBelowThreshold: false,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected extractBrand(rawProduct: any): NormalizedBrand | null {
|
||||||
|
const brandName = rawProduct.brand;
|
||||||
|
if (!brandName) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
externalBrandId: rawProduct.product_brand_id ? String(rawProduct.product_brand_id) : null,
|
||||||
|
name: brandName,
|
||||||
|
slug: this.slugify(brandName),
|
||||||
|
logoUrl: rawProduct.brand_logo_url || null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected extractCategory(rawProduct: any): NormalizedCategory | null {
|
||||||
|
// Use "kind" as the primary category (vape, flower, edible, etc.)
|
||||||
|
const categoryName = rawProduct.kind;
|
||||||
|
if (!categoryName) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: categoryName,
|
||||||
|
slug: this.slugify(categoryName),
|
||||||
|
parentCategory: null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -90,6 +90,7 @@ import publicApiRoutes from './routes/public-api';
|
|||||||
import usersRoutes from './routes/users';
|
import usersRoutes from './routes/users';
|
||||||
import staleProcessesRoutes from './routes/stale-processes';
|
import staleProcessesRoutes from './routes/stale-processes';
|
||||||
import orchestratorAdminRoutes from './routes/orchestrator-admin';
|
import orchestratorAdminRoutes from './routes/orchestrator-admin';
|
||||||
|
import proxyAdminRoutes from './routes/proxy-admin';
|
||||||
import adminDebugRoutes from './routes/admin-debug';
|
import adminDebugRoutes from './routes/admin-debug';
|
||||||
import intelligenceRoutes from './routes/intelligence';
|
import intelligenceRoutes from './routes/intelligence';
|
||||||
import marketsRoutes from './routes/markets';
|
import marketsRoutes from './routes/markets';
|
||||||
@@ -172,6 +173,10 @@ app.use('/api/stale-processes', staleProcessesRoutes);
|
|||||||
// Admin routes - orchestrator actions
|
// Admin routes - orchestrator actions
|
||||||
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
|
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
|
||||||
|
|
||||||
|
// Admin routes - proxy management (geo-targeting, sessions, fallback)
|
||||||
|
app.use('/api/admin/proxies', proxyAdminRoutes);
|
||||||
|
console.log('[ProxyAdmin] Routes registered at /api/admin/proxies');
|
||||||
|
|
||||||
// Admin routes - debug endpoints (snapshot inspection)
|
// Admin routes - debug endpoints (snapshot inspection)
|
||||||
app.use('/api/admin/debug', adminDebugRoutes);
|
app.use('/api/admin/debug', adminDebugRoutes);
|
||||||
console.log('[AdminDebug] Routes registered at /api/admin/debug');
|
console.log('[AdminDebug] Routes registered at /api/admin/debug');
|
||||||
|
|||||||
545
backend/src/platforms/jane/client.ts
Normal file
545
backend/src/platforms/jane/client.ts
Normal file
@@ -0,0 +1,545 @@
|
|||||||
|
/**
|
||||||
|
* ============================================================
|
||||||
|
* iHEARTJANE PLATFORM CLIENT
|
||||||
|
* ============================================================
|
||||||
|
*
|
||||||
|
* Jane uses Cloudflare protection, so all requests must go through
|
||||||
|
* Puppeteer with stealth plugin. This client manages browser sessions
|
||||||
|
* and network interception to capture API responses.
|
||||||
|
*
|
||||||
|
* API Endpoints:
|
||||||
|
* - Store: GET https://api.iheartjane.com/v1/stores/{store_id}
|
||||||
|
* - Products: POST https://search.iheartjane.com/1/indexes/menu-products-production/query (Algolia)
|
||||||
|
*
|
||||||
|
* Store ID Format: Numeric (e.g., "2788")
|
||||||
|
*
|
||||||
|
* ============================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Browser, Page } from 'puppeteer';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
import puppeteerExtra from 'puppeteer-extra';
|
||||||
|
|
||||||
|
import type { CrawlRotator, BrowserFingerprint } from '../../services/crawl-rotator';
|
||||||
|
|
||||||
|
// Register stealth plugin
|
||||||
|
puppeteerExtra.use(StealthPlugin());
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// TYPES
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface JaneStoreData {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
address: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
zip: string;
|
||||||
|
lat: number;
|
||||||
|
long: number;
|
||||||
|
phone: string;
|
||||||
|
medical: boolean;
|
||||||
|
recreational: boolean;
|
||||||
|
product_count: number;
|
||||||
|
rating: number;
|
||||||
|
reviews_count: number;
|
||||||
|
working_hours: Array<{ day: string; period: { from: string; to: string } }>;
|
||||||
|
url_slug: string;
|
||||||
|
photo?: string;
|
||||||
|
// Full raw data preserved
|
||||||
|
raw: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JaneProductHit {
|
||||||
|
product_id: number;
|
||||||
|
name: string;
|
||||||
|
brand: string;
|
||||||
|
kind: string;
|
||||||
|
category: string;
|
||||||
|
percent_thc: number | null;
|
||||||
|
percent_cbd: number | null;
|
||||||
|
price_gram: number | null;
|
||||||
|
price_each: number | null;
|
||||||
|
price_eighth_ounce: number | null;
|
||||||
|
price_quarter_ounce: number | null;
|
||||||
|
price_half_ounce: number | null;
|
||||||
|
price_ounce: number | null;
|
||||||
|
image_urls: string[];
|
||||||
|
aggregate_rating: number | null;
|
||||||
|
review_count: number | null;
|
||||||
|
available_for_pickup: boolean;
|
||||||
|
available_for_delivery: boolean;
|
||||||
|
// Full raw data preserved
|
||||||
|
raw: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JaneSession {
|
||||||
|
sessionId: string;
|
||||||
|
browser: Browser;
|
||||||
|
page: Page;
|
||||||
|
fingerprint: BrowserFingerprint;
|
||||||
|
proxyUrl: string | null;
|
||||||
|
startedAt: Date;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CapturedResponse {
|
||||||
|
type: 'store' | 'product' | 'algolia' | 'api';
|
||||||
|
url: string;
|
||||||
|
data: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// CONFIGURATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export const JANE_CONFIG = {
|
||||||
|
storeApiBase: 'https://api.iheartjane.com/v1',
|
||||||
|
algoliaEndpoint: 'https://search.iheartjane.com/1/indexes/menu-products-production/query',
|
||||||
|
timeout: 60000,
|
||||||
|
maxRetries: 3,
|
||||||
|
navigationTimeout: 60000,
|
||||||
|
productFetchDelay: 1000,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SESSION MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
let currentSession: JaneSession | null = null;
|
||||||
|
let crawlRotator: CrawlRotator | null = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set CrawlRotator for proxy/fingerprint management
|
||||||
|
*/
|
||||||
|
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
||||||
|
crawlRotator = rotator;
|
||||||
|
if (rotator) {
|
||||||
|
console.log('[Jane Client] CrawlRotator attached');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get attached CrawlRotator
|
||||||
|
*/
|
||||||
|
export function getCrawlRotator(): CrawlRotator | null {
|
||||||
|
return crawlRotator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start a new Jane browser session
|
||||||
|
* Uses Puppeteer + Stealth plugin to bypass Cloudflare
|
||||||
|
*/
|
||||||
|
export async function startSession(storeId?: string): Promise<JaneSession> {
|
||||||
|
if (currentSession) {
|
||||||
|
console.log('[Jane Client] Closing existing session before starting new one');
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get fingerprint from rotator or use defaults
|
||||||
|
let fingerprint: BrowserFingerprint;
|
||||||
|
let proxyUrl: string | null = null;
|
||||||
|
|
||||||
|
if (crawlRotator) {
|
||||||
|
fingerprint = crawlRotator.userAgent.getCurrent();
|
||||||
|
const proxy = crawlRotator.proxy.getCurrent();
|
||||||
|
if (proxy) {
|
||||||
|
proxyUrl = crawlRotator.proxy.getProxyUrl(proxy);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Default fingerprint for local testing
|
||||||
|
fingerprint = {
|
||||||
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
|
browserName: 'Chrome',
|
||||||
|
deviceCategory: 'desktop',
|
||||||
|
platform: 'Windows',
|
||||||
|
screenWidth: 1920,
|
||||||
|
screenHeight: 1080,
|
||||||
|
viewportWidth: 1920,
|
||||||
|
viewportHeight: 1080,
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||||
|
secChUaPlatform: '"Windows"',
|
||||||
|
secChUaMobile: '?0',
|
||||||
|
httpFingerprint: {
|
||||||
|
browserType: 'Chrome' as const,
|
||||||
|
headers: {},
|
||||||
|
headerOrder: [],
|
||||||
|
curlImpersonateBinary: 'curl_chrome131',
|
||||||
|
hasDNT: false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build browser args
|
||||||
|
const browserArgs = [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
];
|
||||||
|
|
||||||
|
if (proxyUrl) {
|
||||||
|
// Extract host:port from proxy URL for puppeteer
|
||||||
|
const proxyMatch = proxyUrl.match(/:\/\/([^@]+@)?([^/]+)/);
|
||||||
|
if (proxyMatch) {
|
||||||
|
browserArgs.push(`--proxy-server=${proxyMatch[2]}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[Jane Client] Launching browser...');
|
||||||
|
const browser = await puppeteerExtra.launch({
|
||||||
|
headless: true,
|
||||||
|
args: browserArgs,
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Set viewport
|
||||||
|
await page.setViewport({
|
||||||
|
width: fingerprint.viewportWidth || 1920,
|
||||||
|
height: fingerprint.viewportHeight || 1080,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Set user agent
|
||||||
|
await page.setUserAgent(fingerprint.userAgent);
|
||||||
|
|
||||||
|
// Handle proxy authentication if needed
|
||||||
|
if (proxyUrl) {
|
||||||
|
const authMatch = proxyUrl.match(/:\/\/([^:]+):([^@]+)@/);
|
||||||
|
if (authMatch) {
|
||||||
|
await page.authenticate({
|
||||||
|
username: authMatch[1],
|
||||||
|
password: authMatch[2],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionId = `jane_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
||||||
|
|
||||||
|
currentSession = {
|
||||||
|
sessionId,
|
||||||
|
browser,
|
||||||
|
page,
|
||||||
|
fingerprint,
|
||||||
|
proxyUrl,
|
||||||
|
startedAt: new Date(),
|
||||||
|
storeId,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[Jane Client] Started session ${sessionId}`);
|
||||||
|
console.log(`[Jane Client] Browser: ${fingerprint.browserName} (${fingerprint.deviceCategory})`);
|
||||||
|
if (proxyUrl) {
|
||||||
|
console.log(`[Jane Client] Proxy: ${proxyUrl.replace(/:[^:@]+@/, ':***@')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End the current browser session
|
||||||
|
*/
|
||||||
|
export async function endSession(): Promise<void> {
|
||||||
|
if (currentSession) {
|
||||||
|
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
|
||||||
|
console.log(`[Jane Client] Ending session ${currentSession.sessionId} (${duration}s)`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await currentSession.browser.close();
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Jane Client] Error closing browser:', e);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSession = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current active session
|
||||||
|
*/
|
||||||
|
export function getCurrentSession(): JaneSession | null {
|
||||||
|
return currentSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// NETWORK INTERCEPTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Navigate to a Jane menu page and capture API responses
|
||||||
|
* Returns captured store and product data from network interception
|
||||||
|
*/
|
||||||
|
export async function navigateAndCapture(
|
||||||
|
menuUrl: string,
|
||||||
|
options: { waitForProducts?: boolean; captureStore?: boolean } = {}
|
||||||
|
): Promise<{ store?: JaneStoreData; products: JaneProductHit[]; responses: CapturedResponse[] }> {
|
||||||
|
const { waitForProducts = true, captureStore = true } = options;
|
||||||
|
|
||||||
|
if (!currentSession) {
|
||||||
|
throw new Error('[Jane Client] No active session - call startSession() first');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { page } = currentSession;
|
||||||
|
const capturedResponses: CapturedResponse[] = [];
|
||||||
|
let storeData: JaneStoreData | undefined;
|
||||||
|
const products: JaneProductHit[] = [];
|
||||||
|
|
||||||
|
// Enable request interception
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
|
||||||
|
// Block heavy resources to speed up page load
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Capture API responses
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
const contentType = response.headers()['content-type'] || '';
|
||||||
|
|
||||||
|
// Only process JSON responses from Jane domains
|
||||||
|
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
|
||||||
|
try {
|
||||||
|
const json = await response.json();
|
||||||
|
|
||||||
|
// Categorize response
|
||||||
|
if (url.includes('/stores/')) {
|
||||||
|
capturedResponses.push({ type: 'store', url, data: json });
|
||||||
|
|
||||||
|
if (captureStore && json.store) {
|
||||||
|
storeData = parseStoreData(json.store);
|
||||||
|
console.log(`[Jane Client] Captured store: ${storeData.name} (ID: ${storeData.id})`);
|
||||||
|
}
|
||||||
|
} else if (url.includes('algolia') || url.includes('search.iheartjane')) {
|
||||||
|
capturedResponses.push({ type: 'algolia', url, data: json });
|
||||||
|
|
||||||
|
if (json.hits && Array.isArray(json.hits)) {
|
||||||
|
const newProducts = json.hits.map(parseProductHit);
|
||||||
|
products.push(...newProducts);
|
||||||
|
console.log(`[Jane Client] Captured ${json.hits.length} products from Algolia`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
capturedResponses.push({ type: 'api', url, data: json });
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not valid JSON, skip
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[Jane Client] Navigating to ${menuUrl}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(menuUrl, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: JANE_CONFIG.navigationTimeout,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for products to load
|
||||||
|
if (waitForProducts) {
|
||||||
|
console.log('[Jane Client] Waiting for product data...');
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Try to wait for product selector
|
||||||
|
try {
|
||||||
|
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
console.log('[Jane Client] No product cards found via selector (may have loaded via API)');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[Jane Client] Navigation error: ${error.message}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record success if we got data
|
||||||
|
if (crawlRotator && (storeData || products.length > 0)) {
|
||||||
|
await crawlRotator.recordSuccess();
|
||||||
|
}
|
||||||
|
|
||||||
|
return { store: storeData, products, responses: capturedResponses };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch store info directly via browser context
|
||||||
|
* Uses page.evaluate to make fetch request from browser (bypasses Cloudflare)
|
||||||
|
*/
|
||||||
|
export async function fetchStoreInfo(storeId: string | number): Promise<JaneStoreData | null> {
|
||||||
|
if (!currentSession) {
|
||||||
|
throw new Error('[Jane Client] No active session - call startSession() first');
|
||||||
|
}
|
||||||
|
|
||||||
|
const { page } = currentSession;
|
||||||
|
const url = `${JANE_CONFIG.storeApiBase}/stores/${storeId}`;
|
||||||
|
|
||||||
|
console.log(`[Jane Client] Fetching store info: ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await page.evaluate(async (apiUrl: string) => {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(apiUrl);
|
||||||
|
if (resp.ok) {
|
||||||
|
return await resp.json();
|
||||||
|
}
|
||||||
|
return { error: resp.status };
|
||||||
|
} catch (e: any) {
|
||||||
|
return { error: e.message };
|
||||||
|
}
|
||||||
|
}, url);
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
console.error(`[Jane Client] Store fetch failed: ${result.error}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.store) {
|
||||||
|
const storeData = parseStoreData(result.store);
|
||||||
|
console.log(`[Jane Client] Got store: ${storeData.name}`);
|
||||||
|
return storeData;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[Jane Client] Error fetching store: ${error.message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract store ID from a Jane menu URL
|
||||||
|
* Jane URLs don't always contain the store ID directly - we may need to intercept it
|
||||||
|
*/
|
||||||
|
export async function extractStoreIdFromUrl(menuUrl: string): Promise<string | null> {
|
||||||
|
if (!currentSession) {
|
||||||
|
throw new Error('[Jane Client] No active session - call startSession() first');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to extract from URL pattern first (if embedded)
|
||||||
|
const urlMatch = menuUrl.match(/store[s]?[\/=](\d+)/i);
|
||||||
|
if (urlMatch) {
|
||||||
|
return urlMatch[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, navigate and intercept the store API call
|
||||||
|
console.log('[Jane Client] Store ID not in URL, intercepting from navigation...');
|
||||||
|
|
||||||
|
const { page } = currentSession;
|
||||||
|
let capturedStoreId: string | null = null;
|
||||||
|
|
||||||
|
// Listen for store API calls
|
||||||
|
const storeIdPromise = new Promise<string | null>((resolve) => {
|
||||||
|
const timeout = setTimeout(() => resolve(null), 30000);
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
if (url.includes('/v1/stores/') && !capturedStoreId) {
|
||||||
|
const match = url.match(/\/stores\/(\d+)/);
|
||||||
|
if (match) {
|
||||||
|
capturedStoreId = match[1];
|
||||||
|
clearTimeout(timeout);
|
||||||
|
resolve(capturedStoreId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Enable interception and navigate
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto(menuUrl, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: JANE_CONFIG.navigationTimeout,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for store ID to be captured
|
||||||
|
const storeId = await storeIdPromise;
|
||||||
|
|
||||||
|
if (storeId) {
|
||||||
|
console.log(`[Jane Client] Extracted store ID: ${storeId}`);
|
||||||
|
} else {
|
||||||
|
console.warn('[Jane Client] Could not extract store ID from URL');
|
||||||
|
}
|
||||||
|
|
||||||
|
return storeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DATA PARSING
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse raw store data into normalized structure
|
||||||
|
*/
|
||||||
|
function parseStoreData(raw: any): JaneStoreData {
|
||||||
|
return {
|
||||||
|
id: raw.id,
|
||||||
|
name: raw.name,
|
||||||
|
address: raw.address || '',
|
||||||
|
city: raw.city || '',
|
||||||
|
state: raw.state || '',
|
||||||
|
zip: raw.zip || '',
|
||||||
|
lat: raw.lat || 0,
|
||||||
|
long: raw.long || 0,
|
||||||
|
phone: raw.phone || '',
|
||||||
|
medical: raw.medical || false,
|
||||||
|
recreational: raw.recreational || false,
|
||||||
|
product_count: raw.product_count || 0,
|
||||||
|
rating: raw.rating || 0,
|
||||||
|
reviews_count: raw.reviews_count || 0,
|
||||||
|
working_hours: raw.working_hours || [],
|
||||||
|
url_slug: raw.url_slug || '',
|
||||||
|
photo: raw.photo,
|
||||||
|
raw,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse raw Algolia product hit into normalized structure
|
||||||
|
*/
|
||||||
|
function parseProductHit(hit: any): JaneProductHit {
|
||||||
|
return {
|
||||||
|
product_id: hit.product_id,
|
||||||
|
name: hit.name,
|
||||||
|
brand: hit.brand,
|
||||||
|
kind: hit.kind,
|
||||||
|
category: hit.category,
|
||||||
|
percent_thc: hit.percent_thc ?? null,
|
||||||
|
percent_cbd: hit.percent_cbd ?? null,
|
||||||
|
price_gram: hit.price_gram ?? null,
|
||||||
|
price_each: hit.price_each ?? null,
|
||||||
|
price_eighth_ounce: hit.price_eighth_ounce ?? null,
|
||||||
|
price_quarter_ounce: hit.price_quarter_ounce ?? null,
|
||||||
|
price_half_ounce: hit.price_half_ounce ?? null,
|
||||||
|
price_ounce: hit.price_ounce ?? null,
|
||||||
|
image_urls: hit.image_urls || [],
|
||||||
|
aggregate_rating: hit.aggregate_rating ?? null,
|
||||||
|
review_count: hit.review_count ?? null,
|
||||||
|
available_for_pickup: hit.available_for_pickup ?? false,
|
||||||
|
available_for_delivery: hit.available_for_delivery ?? false,
|
||||||
|
raw: hit,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UTILITY
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
48
backend/src/platforms/jane/index.ts
Normal file
48
backend/src/platforms/jane/index.ts
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
/**
|
||||||
|
* iHeartJane Platform Module
|
||||||
|
*
|
||||||
|
* Single export point for all Jane communication.
|
||||||
|
* All Jane workers MUST import from this module.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export {
|
||||||
|
// Session Management
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getCurrentSession,
|
||||||
|
|
||||||
|
// Proxy/Rotation
|
||||||
|
setCrawlRotator,
|
||||||
|
getCrawlRotator,
|
||||||
|
|
||||||
|
// Core Operations
|
||||||
|
navigateAndCapture,
|
||||||
|
fetchStoreInfo,
|
||||||
|
extractStoreIdFromUrl,
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
JANE_CONFIG,
|
||||||
|
|
||||||
|
// Types
|
||||||
|
type JaneSession,
|
||||||
|
type JaneStoreData,
|
||||||
|
type JaneProductHit,
|
||||||
|
type CapturedResponse,
|
||||||
|
} from './client';
|
||||||
|
|
||||||
|
// High-level Query Functions
|
||||||
|
export {
|
||||||
|
resolveStoreFromUrl,
|
||||||
|
getStoreById,
|
||||||
|
fetchProductsFromUrl,
|
||||||
|
fetchProductsByStoreId,
|
||||||
|
discoverStoresByState,
|
||||||
|
|
||||||
|
// Types
|
||||||
|
type ResolveStoreResult,
|
||||||
|
type FetchProductsResult,
|
||||||
|
type DiscoveredStore,
|
||||||
|
} from './queries';
|
||||||
|
|
||||||
|
// Re-export CrawlRotator types from canonical location
|
||||||
|
export type { CrawlRotator, Proxy, ProxyStats } from '../../services/crawl-rotator';
|
||||||
292
backend/src/platforms/jane/queries.ts
Normal file
292
backend/src/platforms/jane/queries.ts
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
/**
|
||||||
|
* iHeartJane High-Level Query Functions
|
||||||
|
*
|
||||||
|
* Wraps the low-level client methods with business logic
|
||||||
|
* for common operations like store lookup and product fetching.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
navigateAndCapture,
|
||||||
|
fetchStoreInfo,
|
||||||
|
extractStoreIdFromUrl,
|
||||||
|
JaneStoreData,
|
||||||
|
JaneProductHit,
|
||||||
|
CapturedResponse,
|
||||||
|
} from './client';
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STORE OPERATIONS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ResolveStoreResult {
|
||||||
|
storeId: string;
|
||||||
|
store: JaneStoreData;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a menu URL to a Jane store ID and fetch store details
|
||||||
|
*
|
||||||
|
* @param menuUrl - The dispensary's menu URL (e.g., https://theflowershopusa.com/mesa/menu/)
|
||||||
|
* @returns Store ID and store data, or null if not found
|
||||||
|
*/
|
||||||
|
export async function resolveStoreFromUrl(menuUrl: string): Promise<ResolveStoreResult | null> {
|
||||||
|
try {
|
||||||
|
await startSession();
|
||||||
|
|
||||||
|
// Navigate and capture store data
|
||||||
|
const { store, responses } = await navigateAndCapture(menuUrl, {
|
||||||
|
waitForProducts: false,
|
||||||
|
captureStore: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (store) {
|
||||||
|
return {
|
||||||
|
storeId: String(store.id),
|
||||||
|
store,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// If store wasn't captured from navigation, try to extract ID and fetch directly
|
||||||
|
const storeId = await extractStoreIdFromUrl(menuUrl);
|
||||||
|
if (storeId) {
|
||||||
|
const storeData = await fetchStoreInfo(storeId);
|
||||||
|
if (storeData) {
|
||||||
|
return {
|
||||||
|
storeId,
|
||||||
|
store: storeData,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we captured any store response
|
||||||
|
const storeResponse = responses.find((r) => r.type === 'store');
|
||||||
|
if (storeResponse?.data?.store) {
|
||||||
|
const storeData = storeResponse.data.store;
|
||||||
|
return {
|
||||||
|
storeId: String(storeData.id),
|
||||||
|
store: storeData,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get store information by store ID
|
||||||
|
*
|
||||||
|
* @param storeId - Jane store ID (numeric string)
|
||||||
|
* @returns Store data or null if not found
|
||||||
|
*/
|
||||||
|
export async function getStoreById(storeId: string | number): Promise<JaneStoreData | null> {
|
||||||
|
try {
|
||||||
|
await startSession(String(storeId));
|
||||||
|
|
||||||
|
// Need to navigate to a Jane page first to establish browser context
|
||||||
|
// Use the Jane stores page as a base
|
||||||
|
const { page } = (await import('./client')).getCurrentSession()!;
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now fetch store info
|
||||||
|
return await fetchStoreInfo(storeId);
|
||||||
|
} finally {
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PRODUCT OPERATIONS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface FetchProductsResult {
|
||||||
|
store?: JaneStoreData;
|
||||||
|
products: JaneProductHit[];
|
||||||
|
totalCaptured: number;
|
||||||
|
responses: CapturedResponse[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch all products from a Jane menu page
|
||||||
|
*
|
||||||
|
* @param menuUrl - The dispensary's menu URL
|
||||||
|
* @returns Products and store data captured from the page
|
||||||
|
*/
|
||||||
|
export async function fetchProductsFromUrl(menuUrl: string): Promise<FetchProductsResult> {
|
||||||
|
try {
|
||||||
|
await startSession();
|
||||||
|
|
||||||
|
const { store, products, responses } = await navigateAndCapture(menuUrl, {
|
||||||
|
waitForProducts: true,
|
||||||
|
captureStore: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
store,
|
||||||
|
products,
|
||||||
|
totalCaptured: products.length,
|
||||||
|
responses,
|
||||||
|
};
|
||||||
|
} finally {
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch products for a known store ID
|
||||||
|
* Constructs a Jane menu URL and fetches products
|
||||||
|
*
|
||||||
|
* @param storeId - Jane store ID
|
||||||
|
* @param urlSlug - Optional URL slug for the store
|
||||||
|
* @returns Products and store data
|
||||||
|
*/
|
||||||
|
export async function fetchProductsByStoreId(
|
||||||
|
storeId: string | number,
|
||||||
|
urlSlug?: string
|
||||||
|
): Promise<FetchProductsResult> {
|
||||||
|
// Construct a Jane embedded menu URL
|
||||||
|
// Jane stores can be accessed via: https://www.iheartjane.com/stores/{store_id}/{url_slug}
|
||||||
|
const slug = urlSlug || 'menu';
|
||||||
|
const menuUrl = `https://www.iheartjane.com/stores/${storeId}/${slug}`;
|
||||||
|
|
||||||
|
return fetchProductsFromUrl(menuUrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DISCOVERY OPERATIONS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface DiscoveredStore {
|
||||||
|
storeId: string;
|
||||||
|
name: string;
|
||||||
|
address: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
zip: string;
|
||||||
|
lat: number;
|
||||||
|
long: number;
|
||||||
|
medical: boolean;
|
||||||
|
recreational: boolean;
|
||||||
|
productCount: number;
|
||||||
|
urlSlug: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Discover Jane stores in a state
|
||||||
|
* Navigates to Jane's store locator and extracts store data
|
||||||
|
*
|
||||||
|
* @param stateCode - Two-letter state code (e.g., 'AZ')
|
||||||
|
* @returns Array of discovered stores
|
||||||
|
*/
|
||||||
|
export async function discoverStoresByState(stateCode: string): Promise<DiscoveredStore[]> {
|
||||||
|
const stores: DiscoveredStore[] = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
await startSession();
|
||||||
|
|
||||||
|
const { page } = (await import('./client')).getCurrentSession()!;
|
||||||
|
|
||||||
|
// Jane has a store directory at /stores
|
||||||
|
// Try state-specific URL first
|
||||||
|
const storeListUrl = `https://www.iheartjane.com/stores?state=${stateCode}`;
|
||||||
|
|
||||||
|
console.log(`[Jane Queries] Discovering stores in ${stateCode}: ${storeListUrl}`);
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
|
||||||
|
// Capture store list responses
|
||||||
|
const storeResponses: any[] = [];
|
||||||
|
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
const contentType = response.headers()['content-type'] || '';
|
||||||
|
|
||||||
|
if (url.includes('iheartjane.com') && contentType.includes('json')) {
|
||||||
|
try {
|
||||||
|
const json = await response.json();
|
||||||
|
if (json.stores && Array.isArray(json.stores)) {
|
||||||
|
storeResponses.push(...json.stores);
|
||||||
|
console.log(`[Jane Queries] Captured ${json.stores.length} stores from API`);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not valid JSON
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto(storeListUrl, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for stores to load
|
||||||
|
await new Promise((r) => setTimeout(r, 3000));
|
||||||
|
|
||||||
|
// Parse captured stores
|
||||||
|
for (const store of storeResponses) {
|
||||||
|
// Filter by state
|
||||||
|
if (store.state?.toLowerCase() === stateCode.toLowerCase() ||
|
||||||
|
store.state?.toLowerCase() === getStateName(stateCode).toLowerCase()) {
|
||||||
|
stores.push({
|
||||||
|
storeId: String(store.id),
|
||||||
|
name: store.name || '',
|
||||||
|
address: store.address || '',
|
||||||
|
city: store.city || '',
|
||||||
|
state: store.state || stateCode,
|
||||||
|
zip: store.zip || '',
|
||||||
|
lat: store.lat || 0,
|
||||||
|
long: store.long || 0,
|
||||||
|
medical: store.medical || false,
|
||||||
|
recreational: store.recreational || false,
|
||||||
|
productCount: store.product_count || 0,
|
||||||
|
urlSlug: store.url_slug || '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[Jane Queries] Found ${stores.length} stores in ${stateCode}`);
|
||||||
|
|
||||||
|
return stores;
|
||||||
|
} finally {
|
||||||
|
await endSession();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UTILITY
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert state code to full state name
|
||||||
|
*/
|
||||||
|
function getStateName(code: string): string {
|
||||||
|
const states: Record<string, string> = {
|
||||||
|
AL: 'Alabama', AK: 'Alaska', AZ: 'Arizona', AR: 'Arkansas', CA: 'California',
|
||||||
|
CO: 'Colorado', CT: 'Connecticut', DE: 'Delaware', FL: 'Florida', GA: 'Georgia',
|
||||||
|
HI: 'Hawaii', ID: 'Idaho', IL: 'Illinois', IN: 'Indiana', IA: 'Iowa',
|
||||||
|
KS: 'Kansas', KY: 'Kentucky', LA: 'Louisiana', ME: 'Maine', MD: 'Maryland',
|
||||||
|
MA: 'Massachusetts', MI: 'Michigan', MN: 'Minnesota', MS: 'Mississippi', MO: 'Missouri',
|
||||||
|
MT: 'Montana', NE: 'Nebraska', NV: 'Nevada', NH: 'New Hampshire', NJ: 'New Jersey',
|
||||||
|
NM: 'New Mexico', NY: 'New York', NC: 'North Carolina', ND: 'North Dakota', OH: 'Ohio',
|
||||||
|
OK: 'Oklahoma', OR: 'Oregon', PA: 'Pennsylvania', RI: 'Rhode Island', SC: 'South Carolina',
|
||||||
|
SD: 'South Dakota', TN: 'Tennessee', TX: 'Texas', UT: 'Utah', VT: 'Vermont',
|
||||||
|
VA: 'Virginia', WA: 'Washington', WV: 'West Virginia', WI: 'Wisconsin', WY: 'Wyoming',
|
||||||
|
DC: 'District of Columbia',
|
||||||
|
};
|
||||||
|
return states[code.toUpperCase()] || code;
|
||||||
|
}
|
||||||
@@ -130,7 +130,7 @@ router.get('/national/summary', async (req, res) => {
|
|||||||
return res.json(cached);
|
return res.json(cached);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single optimized query for all state metrics
|
// Single optimized query for all state metrics (only validated stores)
|
||||||
const { rows: stateMetrics } = await pool.query(`
|
const { rows: stateMetrics } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.state,
|
d.state,
|
||||||
@@ -146,6 +146,9 @@ router.get('/national/summary', async (req, res) => {
|
|||||||
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
LEFT JOIN states s ON d.state = s.code
|
LEFT JOIN states s ON d.state = s.code
|
||||||
WHERE d.state IS NOT NULL
|
WHERE d.state IS NOT NULL
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND d.menu_url IS NOT NULL
|
||||||
|
AND (d.stage IS NULL OR d.stage != 'deprecated')
|
||||||
GROUP BY d.state, s.name
|
GROUP BY d.state, s.name
|
||||||
ORDER BY store_count DESC
|
ORDER BY store_count DESC
|
||||||
`);
|
`);
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ router.get('/', async (req, res) => {
|
|||||||
last_crawl_at,
|
last_crawl_at,
|
||||||
crawl_enabled,
|
crawl_enabled,
|
||||||
dutchie_verified,
|
dutchie_verified,
|
||||||
|
stage,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at
|
updated_at
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
@@ -79,15 +80,17 @@ router.get('/', async (req, res) => {
|
|||||||
params.push(state);
|
params.push(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by crawl_enabled - defaults to showing only enabled
|
// Filter by crawl_enabled
|
||||||
if (crawl_enabled === 'false' || crawl_enabled === '0') {
|
if (crawl_enabled === 'false' || crawl_enabled === '0') {
|
||||||
// Explicitly show disabled only
|
// Explicitly show disabled only
|
||||||
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
|
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
|
||||||
} else if (crawl_enabled === 'all') {
|
} else if (crawl_enabled === 'all') {
|
||||||
// Show all (no filter)
|
// Show all including deprecated (no validation filter)
|
||||||
} else {
|
} else {
|
||||||
// Default: show only enabled
|
// Default: show only validated stores (has location data, not deprecated)
|
||||||
conditions.push(`crawl_enabled = true`);
|
conditions.push(`platform_dispensary_id IS NOT NULL`);
|
||||||
|
conditions.push(`menu_url IS NOT NULL`);
|
||||||
|
conditions.push(`(stage IS NULL OR stage != 'deprecated')`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by dutchie_verified if provided
|
// Filter by dutchie_verified if provided
|
||||||
@@ -140,12 +143,15 @@ router.get('/', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Get menu type stats
|
// Get menu type stats (only validated stores)
|
||||||
router.get('/stats/menu-types', async (req, res) => {
|
router.get('/stats/menu-types', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT menu_type, COUNT(*) as count
|
SELECT menu_type, COUNT(*) as count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
|
WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
AND menu_url IS NOT NULL
|
||||||
|
AND (stage IS NULL OR stage != 'deprecated')
|
||||||
GROUP BY menu_type
|
GROUP BY menu_type
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
`);
|
`);
|
||||||
@@ -163,6 +169,8 @@ router.get('/stats/crawl-status', async (req, res) => {
|
|||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE platform_dispensary_id IS NOT NULL AND menu_url IS NOT NULL AND (stage IS NULL OR stage != 'deprecated')) as validated_count,
|
||||||
|
COUNT(*) FILTER (WHERE stage = 'deprecated') as deprecated_count,
|
||||||
COUNT(*) FILTER (WHERE crawl_enabled = true) as enabled_count,
|
COUNT(*) FILTER (WHERE crawl_enabled = true) as enabled_count,
|
||||||
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
|
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
|
||||||
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
|
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
|
||||||
|
|||||||
@@ -26,10 +26,10 @@ router.get('/dashboard', async (req: Request, res: Response) => {
|
|||||||
return res.json(dashboardCache.data);
|
return res.json(dashboardCache.data);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single optimized query for all counts
|
// Single optimized query for all counts (only validated stores)
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
(SELECT COUNT(*) FROM dispensaries) as dispensary_count,
|
(SELECT COUNT(*) FROM dispensaries WHERE platform_dispensary_id IS NOT NULL AND menu_url IS NOT NULL AND (stage IS NULL OR stage != 'deprecated')) as dispensary_count,
|
||||||
(SELECT n_live_tup FROM pg_stat_user_tables WHERE relname = 'store_products') as product_count,
|
(SELECT n_live_tup FROM pg_stat_user_tables WHERE relname = 'store_products') as product_count,
|
||||||
(SELECT COUNT(*) FROM (SELECT DISTINCT brand_name_raw FROM store_products WHERE brand_name_raw IS NOT NULL LIMIT 10000) b) as brand_count,
|
(SELECT COUNT(*) FROM (SELECT DISTINCT brand_name_raw FROM store_products WHERE brand_name_raw IS NOT NULL LIMIT 10000) b) as brand_count,
|
||||||
(SELECT COUNT(*) FROM (SELECT DISTINCT category_raw FROM store_products WHERE category_raw IS NOT NULL LIMIT 1000) c) as category_count,
|
(SELECT COUNT(*) FROM (SELECT DISTINCT category_raw FROM store_products WHERE category_raw IS NOT NULL LIMIT 1000) c) as category_count,
|
||||||
|
|||||||
@@ -78,14 +78,17 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/orchestrator/states
|
* GET /api/admin/orchestrator/states
|
||||||
* Returns array of states with at least one crawl-enabled dispensary
|
* Returns array of states with at least one validated dispensary
|
||||||
*/
|
*/
|
||||||
router.get('/states', async (_req: Request, res: Response) => {
|
router.get('/states', async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT DISTINCT state, COUNT(*) as store_count
|
SELECT DISTINCT state, COUNT(*) as store_count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state IS NOT NULL AND crawl_enabled = true
|
WHERE state IS NOT NULL
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
AND menu_url IS NOT NULL
|
||||||
|
AND (stage IS NULL OR stage != 'deprecated')
|
||||||
GROUP BY state
|
GROUP BY state
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
`);
|
`);
|
||||||
|
|||||||
@@ -982,19 +982,24 @@ router.get('/stats', async (_req: Request, res: Response) => {
|
|||||||
GROUP BY stage
|
GROUP BY stage
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Dispensaries by stage
|
// Dispensaries by stage (only validated stores)
|
||||||
const { rows: dispensaryStats } = await pool.query(`
|
const { rows: dispensaryStats } = await pool.query(`
|
||||||
SELECT stage, COUNT(*) as count
|
SELECT stage, COUNT(*) as count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE crawl_enabled = true
|
WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
AND menu_url IS NOT NULL
|
||||||
|
AND (stage IS NULL OR stage != 'deprecated')
|
||||||
GROUP BY stage
|
GROUP BY stage
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// By state for dispensaries
|
// By state for dispensaries (only validated stores)
|
||||||
const { rows: byState } = await pool.query(`
|
const { rows: byState } = await pool.query(`
|
||||||
SELECT state, stage, COUNT(*) as count
|
SELECT state, stage, COUNT(*) as count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE crawl_enabled = true AND state IS NOT NULL
|
WHERE state IS NOT NULL
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
AND menu_url IS NOT NULL
|
||||||
|
AND (stage IS NULL OR stage != 'deprecated')
|
||||||
GROUP BY state, stage
|
GROUP BY state, stage
|
||||||
ORDER BY state, stage
|
ORDER BY state, stage
|
||||||
`);
|
`);
|
||||||
|
|||||||
173
backend/src/routes/proxy-admin.ts
Normal file
173
backend/src/routes/proxy-admin.ts
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/proxies
|
||||||
|
* Get all static proxies (fallback when API unavailable)
|
||||||
|
*/
|
||||||
|
router.get('/', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id, host, port, protocol, username,
|
||||||
|
city, state, country, country_code,
|
||||||
|
active, failure_count, consecutive_403_count,
|
||||||
|
response_time_ms, last_tested_at
|
||||||
|
FROM proxies
|
||||||
|
ORDER BY active DESC, failure_count ASC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({ proxies: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error fetching proxies:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/proxies/sessions
|
||||||
|
* Get active proxy sessions (geo-locked workers)
|
||||||
|
*/
|
||||||
|
router.get('/sessions', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// For now, return worker preflight data as "sessions"
|
||||||
|
// TODO: Implement proper session tracking table
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
worker_id,
|
||||||
|
http_ip as proxy_ip,
|
||||||
|
'US' as proxy_geo,
|
||||||
|
'AZ' as state,
|
||||||
|
preflight_http_at as started_at,
|
||||||
|
preflight_http_at + interval '30 minutes' as expires_at,
|
||||||
|
tasks_completed as tasks_completed
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status = 'active'
|
||||||
|
AND http_ip IS NOT NULL
|
||||||
|
ORDER BY preflight_http_at DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({ sessions: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error fetching sessions:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/proxies/tasks
|
||||||
|
* Get recent tasks with proxy usage info
|
||||||
|
*/
|
||||||
|
router.get('/tasks', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = parseInt(req.query.limit as string) || 100;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.worker_id,
|
||||||
|
t.role,
|
||||||
|
t.status,
|
||||||
|
t.proxy_ip,
|
||||||
|
t.proxy_geo,
|
||||||
|
t.proxy_source,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city as dispensary_city,
|
||||||
|
d.state as dispensary_state,
|
||||||
|
t.created_at
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
ORDER BY t.id DESC
|
||||||
|
LIMIT $1
|
||||||
|
`, [limit]);
|
||||||
|
|
||||||
|
res.json({ tasks: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error fetching tasks:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/proxies/evomi-settings
|
||||||
|
* Get Evomi API configuration status
|
||||||
|
*/
|
||||||
|
router.get('/evomi-settings', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const apiKeySet = !!process.env.EVOMI_API_KEY;
|
||||||
|
|
||||||
|
// US state codes for region mapping
|
||||||
|
const usStates = [
|
||||||
|
'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
|
||||||
|
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
|
||||||
|
'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
|
||||||
|
'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
|
||||||
|
'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
|
||||||
|
];
|
||||||
|
|
||||||
|
// Major US cities we know are available
|
||||||
|
const majorCities = [
|
||||||
|
'phoenix', 'tucson', 'los.angeles', 'san.francisco', 'denver',
|
||||||
|
'chicago', 'new.york', 'miami', 'seattle', 'portland',
|
||||||
|
'las.vegas', 'boston', 'detroit', 'atlanta', 'dallas'
|
||||||
|
];
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
enabled: apiKeySet,
|
||||||
|
api_key_set: apiKeySet,
|
||||||
|
available_regions: usStates,
|
||||||
|
available_cities: majorCities,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error fetching Evomi settings:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/proxies
|
||||||
|
* Add a static proxy (fallback)
|
||||||
|
*/
|
||||||
|
router.post('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { host, port, protocol, username, password, city, state, country } = req.body;
|
||||||
|
|
||||||
|
if (!host || !port) {
|
||||||
|
return res.status(400).json({ error: 'Host and port are required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO proxies (host, port, protocol, username, password, city, state, country, active)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, true)
|
||||||
|
RETURNING *
|
||||||
|
`, [host, port, protocol || 'http', username, password, city, state, country]);
|
||||||
|
|
||||||
|
res.json({ proxy: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error adding proxy:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/admin/proxies/:id
|
||||||
|
* Remove a static proxy
|
||||||
|
*/
|
||||||
|
router.delete('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
await pool.query('DELETE FROM proxies WHERE id = $1', [id]);
|
||||||
|
|
||||||
|
res.json({ success: true });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ProxyAdmin] Error deleting proxy:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -900,6 +900,168 @@ export interface PreflightResult {
|
|||||||
responseTimeMs: number | null;
|
responseTimeMs: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// EVOMI GEO-TARGETING
|
||||||
|
// Dynamic proxy URL generation based on dispensary state
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* State code to Evomi region ID mapping
|
||||||
|
* Format: lowercase, spaces replaced with dots
|
||||||
|
*/
|
||||||
|
const STATE_TO_REGION: Record<string, string> = {
|
||||||
|
'AL': 'alabama', 'AK': 'alaska', 'AZ': 'arizona', 'AR': 'arkansas',
|
||||||
|
'CA': 'california', 'CO': 'colorado', 'CT': 'connecticut', 'DE': 'delaware',
|
||||||
|
'FL': 'florida', 'GA': 'georgia', 'HI': 'hawaii', 'ID': 'idaho',
|
||||||
|
'IL': 'illinois', 'IN': 'indiana', 'IA': 'iowa', 'KS': 'kansas',
|
||||||
|
'KY': 'kentucky', 'LA': 'louisiana', 'ME': 'maine', 'MD': 'maryland',
|
||||||
|
'MA': 'massachusetts', 'MI': 'michigan', 'MN': 'minnesota', 'MS': 'mississippi',
|
||||||
|
'MO': 'missouri', 'MT': 'montana', 'NE': 'nebraska', 'NV': 'nevada',
|
||||||
|
'NH': 'new.hampshire', 'NJ': 'new.jersey', 'NM': 'new.mexico', 'NY': 'new.york',
|
||||||
|
'NC': 'north.carolina', 'ND': 'north.dakota', 'OH': 'ohio', 'OK': 'oklahoma',
|
||||||
|
'OR': 'oregon', 'PA': 'pennsylvania', 'RI': 'rhode.island', 'SC': 'south.carolina',
|
||||||
|
'SD': 'south.dakota', 'TN': 'tennessee', 'TX': 'texas', 'UT': 'utah',
|
||||||
|
'VT': 'vermont', 'VA': 'virginia', 'WA': 'washington', 'WV': 'west.virginia',
|
||||||
|
'WI': 'wisconsin', 'WY': 'wyoming',
|
||||||
|
// Canadian provinces
|
||||||
|
'AB': 'alberta', 'BC': 'british.columbia', 'MB': 'manitoba', 'NB': 'new.brunswick',
|
||||||
|
'NL': 'newfoundland.and.labrador', 'NS': 'nova.scotia', 'ON': 'ontario',
|
||||||
|
'PE': 'prince.edward.island', 'QC': 'quebec', 'SK': 'saskatchewan',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evomi proxy configuration from environment
|
||||||
|
*/
|
||||||
|
export interface EvomiConfig {
|
||||||
|
enabled: boolean;
|
||||||
|
user: string;
|
||||||
|
pass: string;
|
||||||
|
host: string;
|
||||||
|
port: number;
|
||||||
|
sessionLifetimeMinutes: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Evomi configuration from environment variables
|
||||||
|
*/
|
||||||
|
export function getEvomiConfig(): EvomiConfig {
|
||||||
|
const user = process.env.EVOMI_USER || '';
|
||||||
|
const pass = process.env.EVOMI_PASS || '';
|
||||||
|
|
||||||
|
return {
|
||||||
|
enabled: !!(user && pass),
|
||||||
|
user,
|
||||||
|
pass,
|
||||||
|
host: process.env.EVOMI_HOST || 'rpc.evomi.com',
|
||||||
|
port: parseInt(process.env.EVOMI_PORT || '1000', 10),
|
||||||
|
sessionLifetimeMinutes: parseInt(process.env.EVOMI_SESSION_LIFETIME || '30', 10),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a geo-targeted Evomi proxy URL
|
||||||
|
*
|
||||||
|
* @param stateCode - US state code (e.g., 'AZ')
|
||||||
|
* @param workerId - Worker ID for session stickiness
|
||||||
|
* @param city - Optional city name (lowercase, dots for spaces)
|
||||||
|
* @returns Proxy URL or null if Evomi not configured
|
||||||
|
*/
|
||||||
|
export function buildEvomiProxyUrl(
|
||||||
|
stateCode: string,
|
||||||
|
workerId: string,
|
||||||
|
city?: string
|
||||||
|
): { url: string; geo: string; source: 'api' } | null {
|
||||||
|
const config = getEvomiConfig();
|
||||||
|
|
||||||
|
if (!config.enabled) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const region = STATE_TO_REGION[stateCode.toUpperCase()];
|
||||||
|
if (!region) {
|
||||||
|
console.warn(`[Evomi] Unknown state code: ${stateCode}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate session ID: workerId + region (sticky per worker per state)
|
||||||
|
const sessionId = `${workerId.replace(/[^a-zA-Z0-9]/g, '').slice(0, 6)}${region.slice(0, 4)}`;
|
||||||
|
|
||||||
|
// Build geo target string
|
||||||
|
let geoParams = `_country-US_region-${region}`;
|
||||||
|
let geoDisplay = region;
|
||||||
|
|
||||||
|
if (city) {
|
||||||
|
geoParams += `_city-${city}`;
|
||||||
|
geoDisplay = `${city}, ${region}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build full proxy URL
|
||||||
|
// Format: http://user:pass_geo_session@host:port
|
||||||
|
const url = `http://${config.user}:${config.pass}${geoParams}_session-${sessionId}_lifetime-${config.sessionLifetimeMinutes}@${config.host}:${config.port}`;
|
||||||
|
|
||||||
|
return {
|
||||||
|
url,
|
||||||
|
geo: geoDisplay,
|
||||||
|
source: 'api' as const,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result from getProxyForTask
|
||||||
|
*/
|
||||||
|
export interface GeoProxyResult {
|
||||||
|
url: string;
|
||||||
|
ip: string | null; // Will be resolved after first request
|
||||||
|
geo: string;
|
||||||
|
source: 'api' | 'static';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a geo-targeted proxy for a specific task
|
||||||
|
* Prefers Evomi dynamic proxies, falls back to static table
|
||||||
|
*
|
||||||
|
* @param stateCode - Dispensary state code (e.g., 'AZ')
|
||||||
|
* @param workerId - Worker ID for session stickiness
|
||||||
|
* @param city - Optional city name
|
||||||
|
* @param rotator - CrawlRotator instance for fallback to static proxies
|
||||||
|
* @returns GeoProxyResult with URL and metadata
|
||||||
|
*/
|
||||||
|
export function getProxyForTask(
|
||||||
|
stateCode: string,
|
||||||
|
workerId: string,
|
||||||
|
city?: string,
|
||||||
|
rotator?: CrawlRotator
|
||||||
|
): GeoProxyResult | null {
|
||||||
|
// Try Evomi first
|
||||||
|
const evomiProxy = buildEvomiProxyUrl(stateCode, workerId, city);
|
||||||
|
if (evomiProxy) {
|
||||||
|
console.log(`[GeoProxy] Using Evomi for ${stateCode}: ${evomiProxy.geo}`);
|
||||||
|
return {
|
||||||
|
url: evomiProxy.url,
|
||||||
|
ip: null, // Will be resolved after connection
|
||||||
|
geo: evomiProxy.geo,
|
||||||
|
source: 'api',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to static proxy table
|
||||||
|
if (rotator) {
|
||||||
|
const staticProxy = rotator.proxy.getCurrent();
|
||||||
|
if (staticProxy) {
|
||||||
|
console.log(`[GeoProxy] Falling back to static proxy: ${staticProxy.host}`);
|
||||||
|
return {
|
||||||
|
url: rotator.proxy.getProxyUrl(staticProxy),
|
||||||
|
ip: staticProxy.host,
|
||||||
|
geo: staticProxy.state || staticProxy.country || 'unknown',
|
||||||
|
source: 'static',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn(`[GeoProxy] No proxy available for ${stateCode}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// SINGLETON INSTANCES
|
// SINGLETON INSTANCES
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -244,15 +244,22 @@ class TaskScheduler {
|
|||||||
* - Different crawl frequencies per state (e.g., AZ=4h, MI=6h)
|
* - Different crawl frequencies per state (e.g., AZ=4h, MI=6h)
|
||||||
* - Better rate limit management (one state at a time)
|
* - Better rate limit management (one state at a time)
|
||||||
* - Easier debugging and monitoring per state
|
* - Easier debugging and monitoring per state
|
||||||
|
*
|
||||||
|
* Platform-aware: Each task gets platform from dispensary.menu_type
|
||||||
|
* (not from schedule.platform) so Jane/Dutchie stores route correctly.
|
||||||
*/
|
*/
|
||||||
private async generateProductDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
|
private async generateProductDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
|
||||||
let dispensaryIds: number[] = [];
|
interface DispensaryRow {
|
||||||
|
id: number;
|
||||||
|
menu_type: string | null;
|
||||||
|
}
|
||||||
|
let dispensaries: DispensaryRow[] = [];
|
||||||
|
|
||||||
// Single-dispensary schedule (e.g., "Deeply Rooted Hourly")
|
// Single-dispensary schedule (e.g., "Deeply Rooted Hourly")
|
||||||
if (schedule.dispensary_id) {
|
if (schedule.dispensary_id) {
|
||||||
// Check if this specific store needs refresh
|
// Check if this specific store needs refresh
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT d.id
|
SELECT d.id, d.menu_type
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
WHERE d.id = $1
|
WHERE d.id = $1
|
||||||
AND d.crawl_enabled = true
|
AND d.crawl_enabled = true
|
||||||
@@ -266,9 +273,9 @@ class TaskScheduler {
|
|||||||
)
|
)
|
||||||
`, [schedule.dispensary_id]);
|
`, [schedule.dispensary_id]);
|
||||||
|
|
||||||
dispensaryIds = result.rows.map((r: { id: number }) => r.id);
|
dispensaries = result.rows;
|
||||||
|
|
||||||
if (dispensaryIds.length === 0) {
|
if (dispensaries.length === 0) {
|
||||||
console.log(`[TaskScheduler] Store ${schedule.dispensary_id} has pending task or is disabled`);
|
console.log(`[TaskScheduler] Store ${schedule.dispensary_id} has pending task or is disabled`);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -277,9 +284,9 @@ class TaskScheduler {
|
|||||||
}
|
}
|
||||||
// Per-state schedule (e.g., "AZ Product Refresh")
|
// Per-state schedule (e.g., "AZ Product Refresh")
|
||||||
else if (schedule.state_code) {
|
else if (schedule.state_code) {
|
||||||
// Find stores in this state needing refresh
|
// Find stores in this state needing refresh (include menu_type for platform routing)
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT d.id
|
SELECT d.id, d.menu_type
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
JOIN states s ON d.state_id = s.id
|
JOIN states s ON d.state_id = s.id
|
||||||
WHERE d.crawl_enabled = true
|
WHERE d.crawl_enabled = true
|
||||||
@@ -300,14 +307,14 @@ class TaskScheduler {
|
|||||||
ORDER BY d.last_fetch_at NULLS FIRST, d.id
|
ORDER BY d.last_fetch_at NULLS FIRST, d.id
|
||||||
`, [schedule.state_code, schedule.interval_hours]);
|
`, [schedule.state_code, schedule.interval_hours]);
|
||||||
|
|
||||||
dispensaryIds = result.rows.map((r: { id: number }) => r.id);
|
dispensaries = result.rows;
|
||||||
|
|
||||||
if (dispensaryIds.length === 0) {
|
if (dispensaries.length === 0) {
|
||||||
console.log(`[TaskScheduler] No stores in ${schedule.state_code} need refresh`);
|
console.log(`[TaskScheduler] No stores in ${schedule.state_code} need refresh`);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[TaskScheduler] Creating ${dispensaryIds.length} product_discovery tasks for ${schedule.state_code}`);
|
console.log(`[TaskScheduler] Creating ${dispensaries.length} product_discovery tasks for ${schedule.state_code}`);
|
||||||
}
|
}
|
||||||
// No dispensary_id or state_code - invalid schedule
|
// No dispensary_id or state_code - invalid schedule
|
||||||
else {
|
else {
|
||||||
@@ -315,21 +322,37 @@ class TaskScheduler {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create product_discovery tasks with HTTP transport
|
// Group dispensaries by platform (menu_type)
|
||||||
// No stagger - worker controls pacing
|
const byPlatform: Record<string, number[]> = {};
|
||||||
const { created } = await taskService.createStaggeredTasks(
|
for (const d of dispensaries) {
|
||||||
dispensaryIds,
|
const platform = d.menu_type || 'dutchie';
|
||||||
'product_discovery',
|
if (!byPlatform[platform]) {
|
||||||
0, // No stagger - worker controls pacing
|
byPlatform[platform] = [];
|
||||||
schedule.platform || 'dutchie',
|
|
||||||
'http', // Force HTTP transport
|
|
||||||
{
|
|
||||||
source: 'schedule',
|
|
||||||
source_schedule_id: schedule.id,
|
|
||||||
}
|
}
|
||||||
);
|
byPlatform[platform].push(d.id);
|
||||||
|
}
|
||||||
|
|
||||||
return created;
|
// Create tasks per platform so each task routes to the correct handler
|
||||||
|
let totalCreated = 0;
|
||||||
|
for (const [platform, ids] of Object.entries(byPlatform)) {
|
||||||
|
if (ids.length > 0) {
|
||||||
|
console.log(`[TaskScheduler] Creating ${ids.length} product_discovery tasks for platform=${platform}`);
|
||||||
|
const { created } = await taskService.createStaggeredTasks(
|
||||||
|
ids,
|
||||||
|
'product_discovery',
|
||||||
|
0, // No stagger - worker controls pacing
|
||||||
|
platform,
|
||||||
|
'http', // Force HTTP transport
|
||||||
|
{
|
||||||
|
source: 'schedule',
|
||||||
|
source_schedule_id: schedule.id,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
totalCreated += created;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalCreated;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,4 +1,12 @@
|
|||||||
/**
|
/**
|
||||||
|
* @deprecated DO NOT USE - This file is deprecated.
|
||||||
|
* Use product-discovery-dutchie.ts instead (HTTP/Puppeteer transport).
|
||||||
|
*
|
||||||
|
* This was the old curl-based payload fetch handler.
|
||||||
|
* Kept for historical reference only.
|
||||||
|
*
|
||||||
|
* ---
|
||||||
|
* Original docs:
|
||||||
* Payload Fetch Handler
|
* Payload Fetch Handler
|
||||||
*
|
*
|
||||||
* Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing.
|
* Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing.
|
||||||
@@ -1,4 +1,12 @@
|
|||||||
/**
|
/**
|
||||||
|
* @deprecated DO NOT USE - This file is deprecated.
|
||||||
|
* Use product-discovery-dutchie.ts instead (HTTP/Puppeteer transport).
|
||||||
|
*
|
||||||
|
* This was the old curl-based product discovery handler.
|
||||||
|
* Kept for historical reference only.
|
||||||
|
*
|
||||||
|
* ---
|
||||||
|
* Original docs:
|
||||||
* Product Discovery Handler
|
* Product Discovery Handler
|
||||||
*
|
*
|
||||||
* Per TASK_WORKFLOW_2024-12-10.md: Initial product fetch for newly discovered stores.
|
* Per TASK_WORKFLOW_2024-12-10.md: Initial product fetch for newly discovered stores.
|
||||||
@@ -13,7 +21,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import { handlePayloadFetch } from './payload-fetch-curl';
|
import { handlePayloadFetch } from './_deprecated-payload-fetch-curl';
|
||||||
|
|
||||||
export async function handleProductDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleProductDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { task } = ctx;
|
const { task } = ctx;
|
||||||
@@ -1,4 +1,12 @@
|
|||||||
/**
|
/**
|
||||||
|
* @deprecated DO NOT USE - This file is deprecated.
|
||||||
|
* Use store-discovery-dutchie.ts instead (HTTP/Puppeteer transport).
|
||||||
|
*
|
||||||
|
* This was the old curl-based store discovery handler.
|
||||||
|
* Kept for historical reference only.
|
||||||
|
*
|
||||||
|
* ---
|
||||||
|
* Original docs:
|
||||||
* Store Discovery Handler
|
* Store Discovery Handler
|
||||||
*
|
*
|
||||||
* Per TASK_WORKFLOW_2024-12-10.md: Discovers new stores and returns their IDs for task chaining.
|
* Per TASK_WORKFLOW_2024-12-10.md: Discovers new stores and returns their IDs for task chaining.
|
||||||
162
backend/src/tasks/handlers/entry-point-discovery-jane.ts
Normal file
162
backend/src/tasks/handlers/entry-point-discovery-jane.ts
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/**
|
||||||
|
* Jane Entry Point Discovery Handler
|
||||||
|
*
|
||||||
|
* Resolves a dispensary's menu_url to a Jane store ID (platform_dispensary_id).
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Load dispensary with menu_url
|
||||||
|
* 2. Navigate to menu URL and capture store API call
|
||||||
|
* 3. Extract store ID from API response
|
||||||
|
* 4. Update dispensary.platform_dispensary_id
|
||||||
|
* 5. Queue product_discovery task
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
setCrawlRotator,
|
||||||
|
resolveStoreFromUrl,
|
||||||
|
} from '../../platforms/jane';
|
||||||
|
import { taskService } from '../task-service';
|
||||||
|
|
||||||
|
export async function handleEntryPointDiscoveryJane(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task, crawlRotator } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Missing dispensary_id in task',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneEntryPoint] Starting for dispensary ${dispensaryId}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load dispensary
|
||||||
|
const dispResult = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id, menu_type
|
||||||
|
FROM dispensaries WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Dispensary ${dispensaryId} not found`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispResult.rows[0];
|
||||||
|
|
||||||
|
// Skip if already resolved
|
||||||
|
if (dispensary.platform_dispensary_id) {
|
||||||
|
console.log(`[JaneEntryPoint] Already resolved: ${dispensary.platform_dispensary_id}`);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
alreadyResolved: true,
|
||||||
|
storeId: dispensary.platform_dispensary_id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!dispensary.menu_url) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Dispensary ${dispensaryId} has no menu_url`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneEntryPoint] Resolving: ${dispensary.menu_url}`);
|
||||||
|
|
||||||
|
// Attach crawl rotator
|
||||||
|
if (crawlRotator) {
|
||||||
|
setCrawlRotator(crawlRotator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve store from URL
|
||||||
|
const result = await resolveStoreFromUrl(dispensary.menu_url);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
// Update dispensary with resolution failure
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET id_resolution_status = 'failed',
|
||||||
|
last_id_resolution_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Could not resolve Jane store ID from URL',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const { storeId, store } = result;
|
||||||
|
console.log(`[JaneEntryPoint] Resolved store ID: ${storeId} (${store.name})`);
|
||||||
|
|
||||||
|
// Update dispensary with resolved store ID and store data
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET platform_dispensary_id = $1,
|
||||||
|
menu_type = 'jane',
|
||||||
|
id_resolution_status = 'resolved',
|
||||||
|
last_id_resolution_at = NOW(),
|
||||||
|
stage = 'promoted',
|
||||||
|
latitude = COALESCE(latitude, $2),
|
||||||
|
longitude = COALESCE(longitude, $3),
|
||||||
|
phone = COALESCE(phone, $4),
|
||||||
|
is_medical = $5,
|
||||||
|
is_recreational = $6,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $7`,
|
||||||
|
[
|
||||||
|
storeId,
|
||||||
|
store.lat || null,
|
||||||
|
store.long || null,
|
||||||
|
store.phone || null,
|
||||||
|
store.medical,
|
||||||
|
store.recreational,
|
||||||
|
dispensaryId,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Queue product_discovery task
|
||||||
|
console.log(`[JaneEntryPoint] Queuing product_discovery for dispensary ${dispensaryId}`);
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_discovery',
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
platform: 'jane',
|
||||||
|
method: 'http',
|
||||||
|
priority: task.priority || 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
storeId,
|
||||||
|
storeName: store.name,
|
||||||
|
productCount: store.product_count,
|
||||||
|
queuedProductDiscovery: true,
|
||||||
|
};
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[JaneEntryPoint] Error:`, errorMessage);
|
||||||
|
|
||||||
|
// Update dispensary with failure
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET id_resolution_status = 'failed',
|
||||||
|
consecutive_failures = consecutive_failures + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
).catch(() => {});
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -3,18 +3,27 @@
|
|||||||
*
|
*
|
||||||
* Exports all task handlers for the task worker.
|
* Exports all task handlers for the task worker.
|
||||||
*
|
*
|
||||||
* Product Discovery:
|
* Naming convention: {task}-{platform}.ts
|
||||||
* - handleProductDiscoveryCurl: curl/axios based (for curl transport)
|
* - product-discovery-dutchie.ts
|
||||||
* - handleProductDiscoveryHttp: Puppeteer browser-based (for http transport)
|
* - product-discovery-jane.ts
|
||||||
|
* - store-discovery-dutchie.ts
|
||||||
|
* - store-discovery-jane.ts
|
||||||
|
*
|
||||||
|
* Deprecated handlers (curl transport) are in _deprecated-*.ts files.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export { handleProductDiscovery as handleProductDiscoveryCurl } from './product-discovery-curl';
|
// Shared handlers (platform-agnostic)
|
||||||
export { handleProductDiscoveryHttp } from './product-discovery-http';
|
|
||||||
export { handlePayloadFetch as handlePayloadFetchCurl } from './payload-fetch-curl';
|
|
||||||
export { handleProductRefresh } from './product-refresh';
|
export { handleProductRefresh } from './product-refresh';
|
||||||
export { handleStoreDiscovery } from './store-discovery';
|
|
||||||
export { handleStoreDiscoveryHttp } from './store-discovery-http';
|
|
||||||
export { handleStoreDiscoveryState } from './store-discovery-state';
|
export { handleStoreDiscoveryState } from './store-discovery-state';
|
||||||
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
||||||
export { handleAnalyticsRefresh } from './analytics-refresh';
|
export { handleAnalyticsRefresh } from './analytics-refresh';
|
||||||
export { handleWhoami } from './whoami';
|
export { handleWhoami } from './whoami';
|
||||||
|
|
||||||
|
// Dutchie Platform Handlers
|
||||||
|
export { handleProductDiscoveryDutchie } from './product-discovery-dutchie';
|
||||||
|
export { handleStoreDiscoveryDutchie } from './store-discovery-dutchie';
|
||||||
|
|
||||||
|
// Jane Platform Handlers
|
||||||
|
export { handleStoreDiscoveryJane } from './store-discovery-jane';
|
||||||
|
export { handleEntryPointDiscoveryJane } from './entry-point-discovery-jane';
|
||||||
|
export { handleProductDiscoveryJane } from './product-discovery-jane';
|
||||||
|
|||||||
@@ -1,15 +1,17 @@
|
|||||||
/**
|
/**
|
||||||
* Product Discovery HTTP Handler (Browser-based)
|
* Product Discovery Handler - Dutchie Platform
|
||||||
*
|
*
|
||||||
* Uses Puppeteer + StealthPlugin to fetch products via browser context.
|
* Uses Puppeteer + StealthPlugin to fetch products via browser context.
|
||||||
* Based on test-intercept.js pattern from ORGANIC_SCRAPING_GUIDE.md.
|
* Based on test-intercept.js pattern from ORGANIC_SCRAPING_GUIDE.md.
|
||||||
*
|
*
|
||||||
|
* Naming convention: {task}-{platform}.ts
|
||||||
|
*
|
||||||
* This handler:
|
* This handler:
|
||||||
* 1. Loads dispensary info
|
* 1. Loads dispensary info
|
||||||
* 2. Launches headless browser with proxy (if provided)
|
* 2. Launches headless browser with proxy (if provided)
|
||||||
* 3. Establishes session by visiting embedded menu
|
* 3. Establishes session by visiting embedded menu
|
||||||
* 4. Fetches ALL products via GraphQL from browser context
|
* 4. Fetches ALL products via GraphQL from browser context
|
||||||
* 5. Saves raw payload to filesystem (gzipped)
|
* 5. Saves raw payload to filesystem (gzipped) at payloads/dutchie/...
|
||||||
* 6. Records metadata in raw_crawl_payloads table
|
* 6. Records metadata in raw_crawl_payloads table
|
||||||
* 7. Queues product_refresh task to process the payload
|
* 7. Queues product_refresh task to process the payload
|
||||||
*
|
*
|
||||||
@@ -26,7 +28,7 @@ import { taskService } from '../task-service';
|
|||||||
// GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
|
// GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
|
||||||
const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
|
||||||
|
|
||||||
export async function handleProductDiscoveryHttp(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleProductDiscoveryDutchie(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task, crawlRotator, updateStep } = ctx;
|
const { pool, task, crawlRotator, updateStep } = ctx;
|
||||||
const dispensaryId = task.dispensary_id;
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
168
backend/src/tasks/handlers/product-discovery-jane.ts
Normal file
168
backend/src/tasks/handlers/product-discovery-jane.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
/**
|
||||||
|
* Jane Product Discovery Handler
|
||||||
|
*
|
||||||
|
* Fetches all products from a Jane store via Puppeteer + network interception.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Load dispensary with platform_dispensary_id
|
||||||
|
* 2. Navigate to menu URL and capture Algolia product responses
|
||||||
|
* 3. Save raw payload to filesystem
|
||||||
|
* 4. Queue product_refresh task for normalization
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
setCrawlRotator,
|
||||||
|
fetchProductsFromUrl,
|
||||||
|
} from '../../platforms/jane';
|
||||||
|
import { saveRawPayload } from '../../utils/payload-storage';
|
||||||
|
import { taskService } from '../task-service';
|
||||||
|
|
||||||
|
export async function handleProductDiscoveryJane(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task, crawlRotator } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Missing dispensary_id in task',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneProductDiscovery] Starting for dispensary ${dispensaryId}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load dispensary
|
||||||
|
const dispResult = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id, menu_type
|
||||||
|
FROM dispensaries WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Dispensary ${dispensaryId} not found`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispResult.rows[0];
|
||||||
|
|
||||||
|
if (!dispensary.menu_url) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Dispensary ${dispensaryId} has no menu_url`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneProductDiscovery] Fetching products from: ${dispensary.menu_url}`);
|
||||||
|
|
||||||
|
// Attach crawl rotator
|
||||||
|
if (crawlRotator) {
|
||||||
|
setCrawlRotator(crawlRotator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch products
|
||||||
|
const result = await fetchProductsFromUrl(dispensary.menu_url);
|
||||||
|
|
||||||
|
if (result.products.length === 0) {
|
||||||
|
console.warn(`[JaneProductDiscovery] No products captured for dispensary ${dispensaryId}`);
|
||||||
|
|
||||||
|
// Update dispensary with failure
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET consecutive_failures = consecutive_failures + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'No products captured from Jane menu page',
|
||||||
|
productCount: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneProductDiscovery] Captured ${result.products.length} products`);
|
||||||
|
|
||||||
|
// Build payload for storage
|
||||||
|
// Store the raw Algolia hits for the normalizer
|
||||||
|
const rawPayload = {
|
||||||
|
hits: result.products.map(p => p.raw), // Use raw product data
|
||||||
|
store: result.store?.raw || null,
|
||||||
|
capturedAt: new Date().toISOString(),
|
||||||
|
platform: 'jane',
|
||||||
|
dispensaryId,
|
||||||
|
storeId: dispensary.platform_dispensary_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save raw payload to filesystem (platform = 'jane')
|
||||||
|
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
rawPayload,
|
||||||
|
null, // crawl_run_id
|
||||||
|
result.products.length,
|
||||||
|
'jane' // platform
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[JaneProductDiscovery] Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
||||||
|
|
||||||
|
// Update dispensary stage and timestamps
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET stage = 'hydrating',
|
||||||
|
last_fetch_at = NOW(),
|
||||||
|
consecutive_successes = consecutive_successes + 1,
|
||||||
|
consecutive_failures = 0,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Queue product_refresh task for normalization
|
||||||
|
console.log(`[JaneProductDiscovery] Queuing product_refresh for payload ${payloadId}`);
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_refresh',
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
platform: 'jane',
|
||||||
|
// method undefined = any worker can process (product_refresh is local)
|
||||||
|
priority: task.priority || 0,
|
||||||
|
payload: { payload_id: payloadId },
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
productCount: result.products.length,
|
||||||
|
payloadId,
|
||||||
|
payloadSizeKB: Math.round(sizeBytes / 1024),
|
||||||
|
storeInfo: result.store ? {
|
||||||
|
id: result.store.id,
|
||||||
|
name: result.store.name,
|
||||||
|
productCount: result.store.product_count,
|
||||||
|
} : null,
|
||||||
|
queuedProductRefresh: true,
|
||||||
|
};
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[JaneProductDiscovery] Error:`, errorMessage);
|
||||||
|
|
||||||
|
// Update dispensary with failure
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET consecutive_failures = consecutive_failures + 1,
|
||||||
|
stage = CASE WHEN consecutive_failures >= 2 THEN 'failing' ELSE stage END,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
).catch(() => {});
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,19 +8,23 @@
|
|||||||
*
|
*
|
||||||
* Flow:
|
* Flow:
|
||||||
* 1. Load payload from filesystem (by payload_id or latest for dispensary)
|
* 1. Load payload from filesystem (by payload_id or latest for dispensary)
|
||||||
* 2. Normalize data via DutchieNormalizer
|
* 2. Select normalizer based on dispensary.menu_type (dutchie or jane)
|
||||||
* 3. Upsert to store_products and store_product_snapshots
|
* 3. Normalize data via platform-specific normalizer
|
||||||
* 4. Track missing products (increment consecutive_misses, mark OOS at 3)
|
* 4. Upsert to store_products and store_product_snapshots
|
||||||
* 5. Download new product images
|
* 5. Track missing products (increment consecutive_misses, mark OOS at 3)
|
||||||
|
* 6. Download new product images
|
||||||
*
|
*
|
||||||
* Benefits of separation:
|
* Benefits of separation:
|
||||||
* - Retry-friendly: If this fails, re-run without re-crawling
|
* - Retry-friendly: If this fails, re-run without re-crawling
|
||||||
* - Replay-able: Run against any historical payload
|
* - Replay-able: Run against any historical payload
|
||||||
* - Faster: Local file read vs network call
|
* - Faster: Local file read vs network call
|
||||||
|
* - Platform-agnostic: Same handler works for Dutchie and Jane
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { TaskContext, TaskResult } from '../task-worker';
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
|
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
|
||||||
|
import { JaneNormalizer } from '../../hydration/normalizers/jane';
|
||||||
|
import { BaseNormalizer } from '../../hydration/normalizers/base';
|
||||||
import {
|
import {
|
||||||
upsertStoreProducts,
|
upsertStoreProducts,
|
||||||
createStoreProductSnapshots,
|
createStoreProductSnapshots,
|
||||||
@@ -29,7 +33,19 @@ import {
|
|||||||
import { loadRawPayloadById, getLatestPayload } from '../../utils/payload-storage';
|
import { loadRawPayloadById, getLatestPayload } from '../../utils/payload-storage';
|
||||||
import { taskService } from '../task-service';
|
import { taskService } from '../task-service';
|
||||||
|
|
||||||
const normalizer = new DutchieNormalizer();
|
// Platform-aware normalizer registry
|
||||||
|
const NORMALIZERS: Record<string, BaseNormalizer> = {
|
||||||
|
dutchie: new DutchieNormalizer(),
|
||||||
|
jane: new JaneNormalizer(),
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get normalizer for a platform, defaults to Dutchie
|
||||||
|
*/
|
||||||
|
function getNormalizer(platform: string | null): BaseNormalizer {
|
||||||
|
const key = platform || 'dutchie';
|
||||||
|
return NORMALIZERS[key] || NORMALIZERS.dutchie;
|
||||||
|
}
|
||||||
|
|
||||||
export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task, updateStep } = ctx;
|
const { pool, task, updateStep } = ctx;
|
||||||
@@ -90,7 +106,9 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
const result = await getLatestPayload(pool, dispensaryId);
|
const result = await getLatestPayload(pool, dispensaryId);
|
||||||
if (!result) {
|
if (!result) {
|
||||||
// No payload exists - queue upstream task to fetch products
|
// No payload exists - queue upstream task to fetch products
|
||||||
console.log(`[ProductRefresh] No payload found for dispensary ${dispensaryId} - queuing upstream task`);
|
// Use dispensary.menu_type for platform routing (jane vs dutchie)
|
||||||
|
const dispPlatform = dispensary.menu_type || 'dutchie';
|
||||||
|
console.log(`[ProductRefresh] No payload found for dispensary ${dispensaryId} - queuing upstream task (platform=${dispPlatform})`);
|
||||||
|
|
||||||
if (dispensary.platform_dispensary_id) {
|
if (dispensary.platform_dispensary_id) {
|
||||||
// Has platform ID - can go straight to product_discovery
|
// Has platform ID - can go straight to product_discovery
|
||||||
@@ -98,12 +116,14 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
await taskService.createTask({
|
await taskService.createTask({
|
||||||
role: 'product_discovery',
|
role: 'product_discovery',
|
||||||
dispensary_id: dispensaryId,
|
dispensary_id: dispensaryId,
|
||||||
|
platform: dispPlatform,
|
||||||
priority: task.priority || 0,
|
priority: task.priority || 0,
|
||||||
method: 'http', // Use browser-based handler for session proxies
|
method: 'http', // Use browser-based handler for session proxies
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
queued: 'product_discovery',
|
queued: 'product_discovery',
|
||||||
|
platform: dispPlatform,
|
||||||
reason: 'No payload exists - queued product_discovery to fetch initial data',
|
reason: 'No payload exists - queued product_discovery to fetch initial data',
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
@@ -112,12 +132,14 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
await taskService.createTask({
|
await taskService.createTask({
|
||||||
role: 'entry_point_discovery',
|
role: 'entry_point_discovery',
|
||||||
dispensary_id: dispensaryId,
|
dispensary_id: dispensaryId,
|
||||||
|
platform: dispPlatform,
|
||||||
priority: task.priority || 0,
|
priority: task.priority || 0,
|
||||||
method: 'http', // Browser-only transport
|
method: 'http', // Browser-only transport
|
||||||
});
|
});
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
queued: 'entry_point_discovery',
|
queued: 'entry_point_discovery',
|
||||||
|
platform: dispPlatform,
|
||||||
reason: 'No payload and no platform_dispensary_id - queued entry_point_discovery to resolve ID',
|
reason: 'No payload and no platform_dispensary_id - queued entry_point_discovery to resolve ID',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -167,6 +189,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
created_at: new Date(),
|
created_at: new Date(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Select normalizer based on dispensary platform (menu_type)
|
||||||
|
const platform = dispensary.menu_type || 'dutchie';
|
||||||
|
const normalizer = getNormalizer(platform);
|
||||||
|
console.log(`[ProductRefresh] Using ${platform} normalizer for ${dispensary.name}`);
|
||||||
|
|
||||||
const normalizationResult = normalizer.normalize(rawPayload);
|
const normalizationResult = normalizer.normalize(rawPayload);
|
||||||
|
|
||||||
if (normalizationResult.errors.length > 0) {
|
if (normalizationResult.errors.length > 0) {
|
||||||
@@ -179,10 +206,11 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
|||||||
error: 'Normalization produced no products',
|
error: 'Normalization produced no products',
|
||||||
payloadId,
|
payloadId,
|
||||||
productsProcessed: 0,
|
productsProcessed: 0,
|
||||||
|
platform,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[ProductRefresh] Normalized ${normalizationResult.products.length} products`);
|
console.log(`[ProductRefresh] Normalized ${normalizationResult.products.length} products (platform=${platform})`);
|
||||||
|
|
||||||
await ctx.heartbeat();
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
* Store Discovery HTTP Handler (Browser-based)
|
* Store Discovery Handler - Dutchie Platform
|
||||||
*
|
*
|
||||||
* Uses Puppeteer + StealthPlugin to discover stores via browser context.
|
* Uses Puppeteer + StealthPlugin to discover stores via browser context.
|
||||||
* Based on product-discovery-http.ts pattern.
|
*
|
||||||
|
* Naming convention: {task}-{platform}.ts
|
||||||
*
|
*
|
||||||
* This handler:
|
* This handler:
|
||||||
* 1. Launches headless browser with proxy (if provided)
|
* 1. Launches headless browser with proxy (if provided)
|
||||||
@@ -70,7 +71,7 @@ interface DiscoveredLocation {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function handleStoreDiscoveryHttp(ctx: TaskContext): Promise<TaskResult> {
|
export async function handleStoreDiscoveryDutchie(ctx: TaskContext): Promise<TaskResult> {
|
||||||
const { pool, task, crawlRotator, updateStep } = ctx;
|
const { pool, task, crawlRotator, updateStep } = ctx;
|
||||||
const platform = task.platform || 'dutchie';
|
const platform = task.platform || 'dutchie';
|
||||||
|
|
||||||
166
backend/src/tasks/handlers/store-discovery-jane.ts
Normal file
166
backend/src/tasks/handlers/store-discovery-jane.ts
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
/**
|
||||||
|
* Jane Store Discovery Handler
|
||||||
|
*
|
||||||
|
* Discovers iHeartJane stores by state and inserts them into the dispensaries table.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Navigate to Jane's store directory
|
||||||
|
* 2. Capture store data from API responses
|
||||||
|
* 3. Insert new stores into dispensaries table with menu_type='jane'
|
||||||
|
* 4. Return newStoreIds[] for chaining to product_discovery
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
setCrawlRotator,
|
||||||
|
discoverStoresByState,
|
||||||
|
DiscoveredStore,
|
||||||
|
} from '../../platforms/jane';
|
||||||
|
|
||||||
|
export async function handleStoreDiscoveryJane(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task, crawlRotator } = ctx;
|
||||||
|
|
||||||
|
// Get state from task payload
|
||||||
|
const stateCode = task.payload?.state as string;
|
||||||
|
if (!stateCode) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Missing state in task payload',
|
||||||
|
newStoreIds: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneStoreDiscovery] Starting discovery for state: ${stateCode}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Attach crawl rotator if available
|
||||||
|
if (crawlRotator) {
|
||||||
|
setCrawlRotator(crawlRotator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discover stores
|
||||||
|
const stores = await discoverStoresByState(stateCode);
|
||||||
|
|
||||||
|
if (stores.length === 0) {
|
||||||
|
console.log(`[JaneStoreDiscovery] No stores found in ${stateCode}`);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
storesDiscovered: 0,
|
||||||
|
storesInserted: 0,
|
||||||
|
newStoreIds: [],
|
||||||
|
message: `No Jane stores found in ${stateCode}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[JaneStoreDiscovery] Found ${stores.length} stores in ${stateCode}`);
|
||||||
|
|
||||||
|
// Insert stores into dispensaries table
|
||||||
|
const { inserted, newIds } = await insertJaneStores(pool, stores, stateCode);
|
||||||
|
|
||||||
|
console.log(`[JaneStoreDiscovery] Inserted ${inserted} new stores, ${newIds.length} IDs for chaining`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
storesDiscovered: stores.length,
|
||||||
|
storesInserted: inserted,
|
||||||
|
newStoreIds: newIds,
|
||||||
|
state: stateCode,
|
||||||
|
};
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[JaneStoreDiscovery] Error:`, errorMessage);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
newStoreIds: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert discovered Jane stores into dispensaries table
|
||||||
|
*/
|
||||||
|
async function insertJaneStores(
|
||||||
|
pool: Pool,
|
||||||
|
stores: DiscoveredStore[],
|
||||||
|
stateCode: string
|
||||||
|
): Promise<{ inserted: number; newIds: number[] }> {
|
||||||
|
const newIds: number[] = [];
|
||||||
|
let inserted = 0;
|
||||||
|
|
||||||
|
for (const store of stores) {
|
||||||
|
try {
|
||||||
|
// Build menu URL from store data
|
||||||
|
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
|
||||||
|
|
||||||
|
// Upsert into dispensaries
|
||||||
|
// Use platform_dispensary_id as conflict key to avoid duplicates
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO dispensaries (
|
||||||
|
name,
|
||||||
|
address,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
zip,
|
||||||
|
latitude,
|
||||||
|
longitude,
|
||||||
|
menu_url,
|
||||||
|
menu_type,
|
||||||
|
platform_dispensary_id,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
stage,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, NOW(), NOW())
|
||||||
|
ON CONFLICT (menu_type, platform_dispensary_id)
|
||||||
|
WHERE menu_type = 'jane' AND platform_dispensary_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name,
|
||||||
|
address = EXCLUDED.address,
|
||||||
|
city = EXCLUDED.city,
|
||||||
|
latitude = EXCLUDED.latitude,
|
||||||
|
longitude = EXCLUDED.longitude,
|
||||||
|
menu_url = EXCLUDED.menu_url,
|
||||||
|
is_medical = EXCLUDED.is_medical,
|
||||||
|
is_recreational = EXCLUDED.is_recreational,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) AS is_new`,
|
||||||
|
[
|
||||||
|
store.name,
|
||||||
|
store.address,
|
||||||
|
store.city,
|
||||||
|
stateCode,
|
||||||
|
store.zip,
|
||||||
|
store.lat,
|
||||||
|
store.long,
|
||||||
|
menuUrl,
|
||||||
|
'jane',
|
||||||
|
store.storeId,
|
||||||
|
store.medical,
|
||||||
|
store.recreational,
|
||||||
|
'discovered',
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length > 0) {
|
||||||
|
const { id, is_new } = result.rows[0];
|
||||||
|
if (is_new) {
|
||||||
|
newIds.push(id);
|
||||||
|
inserted++;
|
||||||
|
console.log(`[JaneStoreDiscovery] Inserted: ${store.name} (ID: ${id}, Jane ID: ${store.storeId})`);
|
||||||
|
} else {
|
||||||
|
console.log(`[JaneStoreDiscovery] Updated: ${store.name} (ID: ${id})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
// Log but continue with other stores
|
||||||
|
console.error(`[JaneStoreDiscovery] Error inserting ${store.name}:`, error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { inserted, newIds };
|
||||||
|
}
|
||||||
@@ -17,10 +17,15 @@ export {
|
|||||||
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
||||||
|
|
||||||
export {
|
export {
|
||||||
handleProductDiscoveryCurl,
|
// Shared handlers
|
||||||
handleProductDiscoveryHttp,
|
|
||||||
handleProductRefresh,
|
handleProductRefresh,
|
||||||
handleStoreDiscovery,
|
|
||||||
handleEntryPointDiscovery,
|
handleEntryPointDiscovery,
|
||||||
handleAnalyticsRefresh,
|
handleAnalyticsRefresh,
|
||||||
|
// Dutchie handlers
|
||||||
|
handleProductDiscoveryDutchie,
|
||||||
|
handleStoreDiscoveryDutchie,
|
||||||
|
// Jane handlers
|
||||||
|
handleProductDiscoveryJane,
|
||||||
|
handleStoreDiscoveryJane,
|
||||||
|
handleEntryPointDiscoveryJane,
|
||||||
} from './handlers';
|
} from './handlers';
|
||||||
|
|||||||
@@ -68,19 +68,22 @@ import { runCurlPreflight, CurlPreflightResult } from '../services/curl-prefligh
|
|||||||
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
|
import { runPuppeteerPreflightWithRetry, PuppeteerPreflightResult } from '../services/puppeteer-preflight';
|
||||||
|
|
||||||
// Task handlers by role
|
// Task handlers by role
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
// Platform-based handlers: {task}-{platform}.ts convention
|
||||||
// Dual-transport: curl vs http (browser-based) handlers
|
|
||||||
import { handlePayloadFetch } from './handlers/payload-fetch-curl';
|
|
||||||
import { handleProductRefresh } from './handlers/product-refresh';
|
import { handleProductRefresh } from './handlers/product-refresh';
|
||||||
import { handleProductDiscovery } from './handlers/product-discovery-curl';
|
|
||||||
import { handleProductDiscoveryHttp } from './handlers/product-discovery-http';
|
|
||||||
import { handleStoreDiscovery } from './handlers/store-discovery';
|
|
||||||
import { handleStoreDiscoveryHttp } from './handlers/store-discovery-http';
|
|
||||||
import { handleStoreDiscoveryState } from './handlers/store-discovery-state';
|
import { handleStoreDiscoveryState } from './handlers/store-discovery-state';
|
||||||
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||||
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||||
import { handleWhoami } from './handlers/whoami';
|
import { handleWhoami } from './handlers/whoami';
|
||||||
|
|
||||||
|
// Dutchie Platform Handlers
|
||||||
|
import { handleProductDiscoveryDutchie } from './handlers/product-discovery-dutchie';
|
||||||
|
import { handleStoreDiscoveryDutchie } from './handlers/store-discovery-dutchie';
|
||||||
|
|
||||||
|
// Jane Platform Handlers
|
||||||
|
import { handleStoreDiscoveryJane } from './handlers/store-discovery-jane';
|
||||||
|
import { handleEntryPointDiscoveryJane } from './handlers/entry-point-discovery-jane';
|
||||||
|
import { handleProductDiscoveryJane } from './handlers/product-discovery-jane';
|
||||||
|
|
||||||
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||||
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||||
@@ -152,48 +155,66 @@ export interface TaskResult {
|
|||||||
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
||||||
|
|
||||||
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
||||||
// payload_fetch: Fetches from Dutchie API, saves to disk
|
// Platform-agnostic handlers (shared across Dutchie and Jane)
|
||||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
// product_refresh: Reads local payload, uses platform-aware normalizer, upserts to DB
|
||||||
// product_discovery: Main handler for product crawling (has curl and http variants)
|
const SHARED_HANDLERS: Partial<Record<TaskRole, TaskHandler>> = {
|
||||||
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
product_refresh: handleProductRefresh,
|
||||||
payload_fetch: handlePayloadFetch, // API fetch -> disk (curl)
|
store_discovery_state: handleStoreDiscoveryState,
|
||||||
product_refresh: handleProductRefresh, // disk -> DB
|
|
||||||
product_discovery: handleProductDiscovery, // Default: curl (see getHandlerForTask for http override)
|
|
||||||
store_discovery: handleStoreDiscovery,
|
|
||||||
store_discovery_state: handleStoreDiscoveryState, // Per-state parallelized discovery
|
|
||||||
entry_point_discovery: handleEntryPointDiscovery,
|
entry_point_discovery: handleEntryPointDiscovery,
|
||||||
analytics_refresh: handleAnalyticsRefresh,
|
analytics_refresh: handleAnalyticsRefresh,
|
||||||
whoami: handleWhoami, // Tests proxy + anti-detect
|
whoami: handleWhoami,
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the appropriate handler for a task, considering both role and method.
|
* Get the appropriate handler for a task based on platform.
|
||||||
*
|
*
|
||||||
* Dual-transport handlers:
|
* Naming convention: {task}-{platform}.ts
|
||||||
* - product_discovery: curl (axios) or http (Puppeteer)
|
* - product-discovery-dutchie.ts
|
||||||
* - store_discovery: curl (axios) or http (Puppeteer)
|
* - product-discovery-jane.ts
|
||||||
|
* - store-discovery-dutchie.ts
|
||||||
|
* - store-discovery-jane.ts
|
||||||
*
|
*
|
||||||
* Default method is 'http' since all GraphQL queries should use browser transport
|
* All handlers use HTTP/Puppeteer transport (curl transport is deprecated).
|
||||||
* for better TLS fingerprinting and session-based proxy compatibility.
|
|
||||||
*/
|
*/
|
||||||
function getHandlerForTask(task: WorkerTask): TaskHandler | undefined {
|
function getHandlerForTask(task: WorkerTask): TaskHandler | undefined {
|
||||||
const role = task.role as TaskRole;
|
const role = task.role as TaskRole;
|
||||||
const method = task.method || 'http'; // Default to HTTP for all GraphQL tasks
|
const platform = task.platform || 'dutchie';
|
||||||
|
|
||||||
// product_discovery: dual-transport support
|
// ==========================================================================
|
||||||
if (role === 'product_discovery' && method === 'http') {
|
// JANE PLATFORM ROUTING
|
||||||
console.log(`[TaskWorker] Using HTTP handler for product_discovery (method=${method})`);
|
// ==========================================================================
|
||||||
return handleProductDiscoveryHttp;
|
if (platform === 'jane') {
|
||||||
|
if (role === 'store_discovery' || role === 'store_discovery_state') {
|
||||||
|
console.log(`[TaskWorker] Using Jane handler for store_discovery`);
|
||||||
|
return handleStoreDiscoveryJane;
|
||||||
|
}
|
||||||
|
if (role === 'entry_point_discovery') {
|
||||||
|
console.log(`[TaskWorker] Using Jane handler for entry_point_discovery`);
|
||||||
|
return handleEntryPointDiscoveryJane;
|
||||||
|
}
|
||||||
|
if (role === 'product_discovery') {
|
||||||
|
console.log(`[TaskWorker] Using Jane handler for product_discovery`);
|
||||||
|
return handleProductDiscoveryJane;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// store_discovery: dual-transport support
|
// ==========================================================================
|
||||||
if (role === 'store_discovery' && method === 'http') {
|
// DUTCHIE PLATFORM ROUTING (default)
|
||||||
console.log(`[TaskWorker] Using HTTP handler for store_discovery (method=${method})`);
|
// ==========================================================================
|
||||||
return handleStoreDiscoveryHttp;
|
if (role === 'product_discovery') {
|
||||||
|
console.log(`[TaskWorker] Using Dutchie handler for product_discovery`);
|
||||||
|
return handleProductDiscoveryDutchie;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default: use the static handler registry (curl-based)
|
if (role === 'store_discovery') {
|
||||||
return TASK_HANDLERS[role];
|
console.log(`[TaskWorker] Using Dutchie handler for store_discovery`);
|
||||||
|
return handleStoreDiscoveryDutchie;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================================================
|
||||||
|
// SHARED HANDLERS (platform-agnostic)
|
||||||
|
// ==========================================================================
|
||||||
|
return SHARED_HANDLERS[role];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,17 +1,23 @@
|
|||||||
/**
|
/**
|
||||||
* Payload Storage Utility
|
* Payload Storage Utility
|
||||||
*
|
*
|
||||||
* Per TASK_WORKFLOW_2024-12-10.md: Store raw GraphQL payloads for historical analysis.
|
* Per TASK_WORKFLOW_2024-12-10.md: Store raw API payloads for historical analysis.
|
||||||
*
|
*
|
||||||
* Design Pattern: Metadata/Payload Separation
|
* Design Pattern: Metadata/Payload Separation
|
||||||
* - Metadata in PostgreSQL (raw_crawl_payloads table): Small, indexed, queryable
|
* - Metadata in PostgreSQL (raw_crawl_payloads table): Small, indexed, queryable
|
||||||
* - Payload stored in MinIO/S3 (or local filesystem as fallback): Gzipped JSON
|
* - Payload stored in MinIO/S3 (or local filesystem as fallback): Gzipped JSON
|
||||||
*
|
*
|
||||||
* Storage structure (MinIO):
|
* Storage structure (MinIO):
|
||||||
* cannaiq/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
* cannaiq/payloads/{platform}/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
*
|
*
|
||||||
* Storage structure (Local fallback):
|
* Storage structure (Local fallback):
|
||||||
* ./storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
* ./storage/payloads/{platform}/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
|
*
|
||||||
|
* Platform values: 'dutchie', 'jane'
|
||||||
|
*
|
||||||
|
* Examples:
|
||||||
|
* payloads/dutchie/2024/12/13/store_456_1734105600000.json.gz
|
||||||
|
* payloads/jane/2024/12/13/store_2788_1734105600000.json.gz
|
||||||
*
|
*
|
||||||
* Benefits:
|
* Benefits:
|
||||||
* - Compare any two crawls to see what changed
|
* - Compare any two crawls to see what changed
|
||||||
@@ -20,6 +26,7 @@
|
|||||||
* - DB stays small, backups stay fast
|
* - DB stays small, backups stay fast
|
||||||
* - ~90% compression (1.5MB -> 150KB per crawl)
|
* - ~90% compression (1.5MB -> 150KB per crawl)
|
||||||
* - Shared storage accessible by all worker pods (MinIO)
|
* - Shared storage accessible by all worker pods (MinIO)
|
||||||
|
* - Platform separation for different retention/management policies
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
@@ -98,23 +105,25 @@ export interface LoadPayloadResult {
|
|||||||
/**
|
/**
|
||||||
* Generate storage path/key for a payload
|
* Generate storage path/key for a payload
|
||||||
*
|
*
|
||||||
* MinIO format: payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
* MinIO format: payloads/{platform}/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
* Local format: ./storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
* Local format: ./storage/payloads/{platform}/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
|
*
|
||||||
|
* Platform defaults to 'dutchie' for backward compatibility
|
||||||
*/
|
*/
|
||||||
function generateStoragePath(dispensaryId: number, timestamp: Date): string {
|
function generateStoragePath(dispensaryId: number, timestamp: Date, platform: string = 'dutchie'): string {
|
||||||
const year = timestamp.getFullYear();
|
const year = timestamp.getFullYear();
|
||||||
const month = String(timestamp.getMonth() + 1).padStart(2, '0');
|
const month = String(timestamp.getMonth() + 1).padStart(2, '0');
|
||||||
const day = String(timestamp.getDate()).padStart(2, '0');
|
const day = String(timestamp.getDate()).padStart(2, '0');
|
||||||
const ts = timestamp.getTime();
|
const ts = timestamp.getTime();
|
||||||
|
|
||||||
const relativePath = `payloads/${year}/${month}/${day}/store_${dispensaryId}_${ts}.json.gz`;
|
const relativePath = `payloads/${platform}/${year}/${month}/${day}/store_${dispensaryId}_${ts}.json.gz`;
|
||||||
|
|
||||||
if (useMinIO) {
|
if (useMinIO) {
|
||||||
// MinIO uses forward slashes, no leading slash
|
// MinIO uses forward slashes, no leading slash
|
||||||
return relativePath;
|
return relativePath;
|
||||||
} else {
|
} else {
|
||||||
// Local filesystem uses OS-specific path
|
// Local filesystem uses OS-specific path
|
||||||
return path.join(PAYLOAD_BASE_PATH, String(year), month, day, `store_${dispensaryId}_${ts}.json.gz`);
|
return path.join(PAYLOAD_BASE_PATH, platform, String(year), month, day, `store_${dispensaryId}_${ts}.json.gz`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,9 +147,10 @@ function calculateChecksum(data: Buffer): string {
|
|||||||
*
|
*
|
||||||
* @param pool - Database connection pool
|
* @param pool - Database connection pool
|
||||||
* @param dispensaryId - ID of the dispensary
|
* @param dispensaryId - ID of the dispensary
|
||||||
* @param payload - Raw JSON payload from GraphQL
|
* @param payload - Raw JSON payload from GraphQL/API
|
||||||
* @param crawlRunId - Optional crawl_run ID for linking
|
* @param crawlRunId - Optional crawl_run ID for linking
|
||||||
* @param productCount - Number of products in payload
|
* @param productCount - Number of products in payload
|
||||||
|
* @param platform - Platform identifier ('dutchie' | 'jane'), defaults to 'dutchie'
|
||||||
* @returns SavePayloadResult with file info and DB record ID
|
* @returns SavePayloadResult with file info and DB record ID
|
||||||
*/
|
*/
|
||||||
export async function saveRawPayload(
|
export async function saveRawPayload(
|
||||||
@@ -148,10 +158,11 @@ export async function saveRawPayload(
|
|||||||
dispensaryId: number,
|
dispensaryId: number,
|
||||||
payload: any,
|
payload: any,
|
||||||
crawlRunId: number | null = null,
|
crawlRunId: number | null = null,
|
||||||
productCount: number = 0
|
productCount: number = 0,
|
||||||
|
platform: string = 'dutchie'
|
||||||
): Promise<SavePayloadResult> {
|
): Promise<SavePayloadResult> {
|
||||||
const timestamp = new Date();
|
const timestamp = new Date();
|
||||||
const storagePath = generateStoragePath(dispensaryId, timestamp);
|
const storagePath = generateStoragePath(dispensaryId, timestamp, platform);
|
||||||
|
|
||||||
// Serialize and compress
|
// Serialize and compress
|
||||||
const jsonStr = JSON.stringify(payload);
|
const jsonStr = JSON.stringify(payload);
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ import CrossStateCompare from './pages/CrossStateCompare';
|
|||||||
import StateDetail from './pages/StateDetail';
|
import StateDetail from './pages/StateDetail';
|
||||||
import { Discovery } from './pages/Discovery';
|
import { Discovery } from './pages/Discovery';
|
||||||
import { WorkersDashboard } from './pages/WorkersDashboard';
|
import { WorkersDashboard } from './pages/WorkersDashboard';
|
||||||
|
import { ProxyManagement } from './pages/ProxyManagement';
|
||||||
import TasksDashboard from './pages/TasksDashboard';
|
import TasksDashboard from './pages/TasksDashboard';
|
||||||
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
||||||
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
||||||
@@ -124,6 +125,8 @@ export default function App() {
|
|||||||
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
||||||
{/* Workers Dashboard */}
|
{/* Workers Dashboard */}
|
||||||
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
||||||
|
{/* Proxy Management */}
|
||||||
|
<Route path="/proxies" element={<PrivateRoute><ProxyManagement /></PrivateRoute>} />
|
||||||
{/* Task Queue Dashboard */}
|
{/* Task Queue Dashboard */}
|
||||||
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
||||||
{/* Scraper Overview Dashboard (new primary) */}
|
{/* Scraper Overview Dashboard (new primary) */}
|
||||||
|
|||||||
436
cannaiq/src/pages/ProxyManagement.tsx
Normal file
436
cannaiq/src/pages/ProxyManagement.tsx
Normal file
@@ -0,0 +1,436 @@
|
|||||||
|
import { useState, useEffect, useCallback } from 'react';
|
||||||
|
import { Layout } from '../components/Layout';
|
||||||
|
import { api } from '../lib/api';
|
||||||
|
import {
|
||||||
|
Globe,
|
||||||
|
RefreshCw,
|
||||||
|
Server,
|
||||||
|
MapPin,
|
||||||
|
Trash2,
|
||||||
|
Plus,
|
||||||
|
Shield,
|
||||||
|
ShieldCheck,
|
||||||
|
ShieldX,
|
||||||
|
Wifi,
|
||||||
|
WifiOff,
|
||||||
|
Clock,
|
||||||
|
Database,
|
||||||
|
Cloud,
|
||||||
|
Activity,
|
||||||
|
} from 'lucide-react';
|
||||||
|
|
||||||
|
interface Proxy {
|
||||||
|
id: number;
|
||||||
|
host: string;
|
||||||
|
port: number;
|
||||||
|
protocol: string;
|
||||||
|
username: string;
|
||||||
|
city: string | null;
|
||||||
|
state: string | null;
|
||||||
|
country: string | null;
|
||||||
|
country_code: string | null;
|
||||||
|
active: boolean;
|
||||||
|
failure_count: number;
|
||||||
|
consecutive_403_count: number;
|
||||||
|
response_time_ms: number | null;
|
||||||
|
last_tested_at: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ProxySession {
|
||||||
|
worker_id: string;
|
||||||
|
proxy_ip: string;
|
||||||
|
proxy_geo: string;
|
||||||
|
state: string;
|
||||||
|
started_at: string;
|
||||||
|
expires_at: string;
|
||||||
|
tasks_completed: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EvomiSettings {
|
||||||
|
enabled: boolean;
|
||||||
|
api_key_set: boolean;
|
||||||
|
available_regions: string[];
|
||||||
|
available_cities: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TaskWithProxy {
|
||||||
|
id: number;
|
||||||
|
worker_id: string;
|
||||||
|
role: string;
|
||||||
|
status: string;
|
||||||
|
proxy_ip: string | null;
|
||||||
|
proxy_geo: string | null;
|
||||||
|
proxy_source: 'api' | 'static' | null;
|
||||||
|
dispensary_name: string;
|
||||||
|
dispensary_city: string;
|
||||||
|
dispensary_state: string;
|
||||||
|
created_at: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function ProxyManagement() {
|
||||||
|
const [proxies, setProxies] = useState<Proxy[]>([]);
|
||||||
|
const [sessions, setSessions] = useState<ProxySession[]>([]);
|
||||||
|
const [tasks, setTasks] = useState<TaskWithProxy[]>([]);
|
||||||
|
const [evomiSettings, setEvomiSettings] = useState<EvomiSettings | null>(null);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [activeTab, setActiveTab] = useState<'sessions' | 'tasks' | 'static'>('sessions');
|
||||||
|
const [proxySource, setProxySource] = useState<'api' | 'static'>('api');
|
||||||
|
|
||||||
|
const fetchData = useCallback(async () => {
|
||||||
|
try {
|
||||||
|
setLoading(true);
|
||||||
|
|
||||||
|
// Fetch all proxy-related data
|
||||||
|
const [proxiesRes, sessionsRes, tasksRes, settingsRes] = await Promise.all([
|
||||||
|
api.get('/api/admin/proxies'),
|
||||||
|
api.get('/api/admin/proxies/sessions'),
|
||||||
|
api.get('/api/admin/proxies/tasks?limit=100'),
|
||||||
|
api.get('/api/admin/proxies/evomi-settings'),
|
||||||
|
]);
|
||||||
|
|
||||||
|
setProxies(proxiesRes.data.proxies || []);
|
||||||
|
setSessions(sessionsRes.data.sessions || []);
|
||||||
|
setTasks(tasksRes.data.tasks || []);
|
||||||
|
setEvomiSettings(settingsRes.data || null);
|
||||||
|
setProxySource(settingsRes.data?.enabled ? 'api' : 'static');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching proxy data:', error);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchData();
|
||||||
|
const interval = setInterval(fetchData, 15000); // Refresh every 15s
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, [fetchData]);
|
||||||
|
|
||||||
|
const getSourceBadge = (source: 'api' | 'static' | null) => {
|
||||||
|
if (source === 'api') {
|
||||||
|
return (
|
||||||
|
<span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium bg-blue-500/20 text-blue-400 border border-blue-500/30">
|
||||||
|
<Cloud className="w-3 h-3" />
|
||||||
|
Evomi API
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
} else if (source === 'static') {
|
||||||
|
return (
|
||||||
|
<span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium bg-gray-500/20 text-gray-400 border border-gray-500/30">
|
||||||
|
<Database className="w-3 h-3" />
|
||||||
|
Static
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<span className="inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium bg-gray-700 text-gray-500">
|
||||||
|
-
|
||||||
|
</span>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Layout>
|
||||||
|
<div className="p-6">
|
||||||
|
{/* Header */}
|
||||||
|
<div className="flex items-center justify-between mb-6">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<Globe className="w-8 h-8 text-emerald-400" />
|
||||||
|
<div>
|
||||||
|
<h1 className="text-2xl font-bold text-white">Proxy Management</h1>
|
||||||
|
<p className="text-gray-400 text-sm">Geo-targeted proxy sessions and usage tracking</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
{/* Source indicator */}
|
||||||
|
<div className="flex items-center gap-2 px-3 py-2 bg-gray-800 rounded-lg border border-gray-700">
|
||||||
|
<span className="text-gray-400 text-sm">Source:</span>
|
||||||
|
{proxySource === 'api' ? (
|
||||||
|
<span className="flex items-center gap-1.5 text-blue-400">
|
||||||
|
<Cloud className="w-4 h-4" />
|
||||||
|
Evomi API
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="flex items-center gap-1.5 text-gray-400">
|
||||||
|
<Database className="w-4 h-4" />
|
||||||
|
Static Table
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<button
|
||||||
|
onClick={fetchData}
|
||||||
|
className="flex items-center gap-2 px-4 py-2 bg-gray-700 hover:bg-gray-600 rounded-lg text-white transition-colors"
|
||||||
|
>
|
||||||
|
<RefreshCw className={`w-4 h-4 ${loading ? 'animate-spin' : ''}`} />
|
||||||
|
Refresh
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Evomi API Status Card */}
|
||||||
|
{evomiSettings && (
|
||||||
|
<div className="mb-6 p-4 bg-gray-800 rounded-lg border border-gray-700">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex items-center gap-3">
|
||||||
|
<div className={`p-2 rounded-lg ${evomiSettings.enabled ? 'bg-emerald-500/20' : 'bg-gray-700'}`}>
|
||||||
|
{evomiSettings.enabled ? (
|
||||||
|
<ShieldCheck className="w-5 h-5 text-emerald-400" />
|
||||||
|
) : (
|
||||||
|
<ShieldX className="w-5 h-5 text-gray-500" />
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h3 className="text-white font-medium">Evomi Residential Proxies</h3>
|
||||||
|
<p className="text-gray-400 text-sm">
|
||||||
|
{evomiSettings.enabled
|
||||||
|
? `Dynamic geo-targeting enabled - ${evomiSettings.available_regions.length} regions available`
|
||||||
|
: 'API not configured - using static proxy table'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-4 text-sm">
|
||||||
|
<div className="text-gray-400">
|
||||||
|
<span className="text-white font-medium">{evomiSettings.available_regions.length}</span> US States
|
||||||
|
</div>
|
||||||
|
<div className="text-gray-400">
|
||||||
|
<span className="text-white font-medium">{evomiSettings.available_cities.length}</span> Cities
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Tabs */}
|
||||||
|
<div className="flex gap-2 mb-6">
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveTab('sessions')}
|
||||||
|
className={`px-4 py-2 rounded-lg font-medium transition-colors ${
|
||||||
|
activeTab === 'sessions'
|
||||||
|
? 'bg-emerald-500/20 text-emerald-400 border border-emerald-500/30'
|
||||||
|
: 'bg-gray-800 text-gray-400 hover:bg-gray-700 border border-gray-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Activity className="w-4 h-4" />
|
||||||
|
Active Sessions
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveTab('tasks')}
|
||||||
|
className={`px-4 py-2 rounded-lg font-medium transition-colors ${
|
||||||
|
activeTab === 'tasks'
|
||||||
|
? 'bg-emerald-500/20 text-emerald-400 border border-emerald-500/30'
|
||||||
|
: 'bg-gray-800 text-gray-400 hover:bg-gray-700 border border-gray-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Server className="w-4 h-4" />
|
||||||
|
Task History
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
onClick={() => setActiveTab('static')}
|
||||||
|
className={`px-4 py-2 rounded-lg font-medium transition-colors ${
|
||||||
|
activeTab === 'static'
|
||||||
|
? 'bg-emerald-500/20 text-emerald-400 border border-emerald-500/30'
|
||||||
|
: 'bg-gray-800 text-gray-400 hover:bg-gray-700 border border-gray-700'
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Database className="w-4 h-4" />
|
||||||
|
Static Proxies ({proxies.length})
|
||||||
|
</div>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Active Sessions Tab */}
|
||||||
|
{activeTab === 'sessions' && (
|
||||||
|
<div className="bg-gray-800 rounded-lg border border-gray-700 overflow-hidden">
|
||||||
|
<table className="w-full">
|
||||||
|
<thead className="bg-gray-900">
|
||||||
|
<tr>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Worker</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Proxy IP</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Geo Target</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">State</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Started</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Expires</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Tasks</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className="divide-y divide-gray-700">
|
||||||
|
{sessions.length === 0 ? (
|
||||||
|
<tr>
|
||||||
|
<td colSpan={7} className="px-4 py-8 text-center text-gray-500">
|
||||||
|
No active proxy sessions
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
) : (
|
||||||
|
sessions.map((session, idx) => (
|
||||||
|
<tr key={idx} className="hover:bg-gray-750">
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-white">{session.worker_id}</td>
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-emerald-400">{session.proxy_ip}</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-300">{session.proxy_geo}</td>
|
||||||
|
<td className="px-4 py-3">
|
||||||
|
<span className="inline-flex items-center gap-1 px-2 py-0.5 rounded bg-blue-500/20 text-blue-400 text-xs font-medium">
|
||||||
|
<MapPin className="w-3 h-3" />
|
||||||
|
{session.state}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">
|
||||||
|
{new Date(session.started_at).toLocaleTimeString()}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">
|
||||||
|
{new Date(session.expires_at).toLocaleTimeString()}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-white">{session.tasks_completed}</td>
|
||||||
|
</tr>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Task History Tab */}
|
||||||
|
{activeTab === 'tasks' && (
|
||||||
|
<div className="bg-gray-800 rounded-lg border border-gray-700 overflow-hidden">
|
||||||
|
<table className="w-full">
|
||||||
|
<thead className="bg-gray-900">
|
||||||
|
<tr>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">ID</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Worker</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Proxy IP</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Geo</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Source</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Task</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Dispensary</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Location</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className="divide-y divide-gray-700">
|
||||||
|
{tasks.length === 0 ? (
|
||||||
|
<tr>
|
||||||
|
<td colSpan={9} className="px-4 py-8 text-center text-gray-500">
|
||||||
|
No task history with proxy data
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
) : (
|
||||||
|
tasks.map((task) => (
|
||||||
|
<tr key={task.id} className="hover:bg-gray-750">
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-gray-400">{task.id}</td>
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-white">
|
||||||
|
{task.worker_id ? task.worker_id.substring(0, 20) : '-'}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-emerald-400">
|
||||||
|
{task.proxy_ip || '-'}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-300">
|
||||||
|
{task.proxy_geo || '-'}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3">{getSourceBadge(task.proxy_source)}</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-300">{task.role}</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-white max-w-[200px] truncate">
|
||||||
|
{task.dispensary_name}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">
|
||||||
|
{task.dispensary_city}, {task.dispensary_state}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3">
|
||||||
|
<span className={`inline-flex px-2 py-0.5 rounded text-xs font-medium ${
|
||||||
|
task.status === 'completed'
|
||||||
|
? 'bg-emerald-500/20 text-emerald-400'
|
||||||
|
: task.status === 'failed'
|
||||||
|
? 'bg-red-500/20 text-red-400'
|
||||||
|
: task.status === 'running'
|
||||||
|
? 'bg-blue-500/20 text-blue-400'
|
||||||
|
: 'bg-gray-500/20 text-gray-400'
|
||||||
|
}`}>
|
||||||
|
{task.status}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{/* Static Proxies Tab */}
|
||||||
|
{activeTab === 'static' && (
|
||||||
|
<div className="bg-gray-800 rounded-lg border border-gray-700 overflow-hidden">
|
||||||
|
<div className="p-4 border-b border-gray-700 flex items-center justify-between">
|
||||||
|
<p className="text-gray-400 text-sm">
|
||||||
|
Static proxy list for fallback when Evomi API is unavailable
|
||||||
|
</p>
|
||||||
|
<button className="flex items-center gap-2 px-3 py-1.5 bg-emerald-600 hover:bg-emerald-500 rounded text-white text-sm transition-colors">
|
||||||
|
<Plus className="w-4 h-4" />
|
||||||
|
Add Proxy
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<table className="w-full">
|
||||||
|
<thead className="bg-gray-900">
|
||||||
|
<tr>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Host:Port</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Location</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Status</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Response Time</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Failures</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">403s</th>
|
||||||
|
<th className="px-4 py-3 text-left text-xs font-medium text-gray-400 uppercase">Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody className="divide-y divide-gray-700">
|
||||||
|
{proxies.length === 0 ? (
|
||||||
|
<tr>
|
||||||
|
<td colSpan={7} className="px-4 py-8 text-center text-gray-500">
|
||||||
|
No static proxies configured. Using Evomi API for geo-targeted proxies.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
) : (
|
||||||
|
proxies.map((proxy) => (
|
||||||
|
<tr key={proxy.id} className="hover:bg-gray-750">
|
||||||
|
<td className="px-4 py-3 text-sm font-mono text-white">
|
||||||
|
{proxy.host}:{proxy.port}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-300">
|
||||||
|
{proxy.city && proxy.state
|
||||||
|
? `${proxy.city}, ${proxy.state}`
|
||||||
|
: proxy.country || '-'}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3">
|
||||||
|
{proxy.active ? (
|
||||||
|
<span className="inline-flex items-center gap-1 text-emerald-400 text-sm">
|
||||||
|
<Wifi className="w-3 h-3" />
|
||||||
|
Active
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
<span className="inline-flex items-center gap-1 text-gray-500 text-sm">
|
||||||
|
<WifiOff className="w-3 h-3" />
|
||||||
|
Inactive
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">
|
||||||
|
{proxy.response_time_ms ? `${proxy.response_time_ms}ms` : '-'}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">{proxy.failure_count}</td>
|
||||||
|
<td className="px-4 py-3 text-sm text-gray-400">{proxy.consecutive_403_count}</td>
|
||||||
|
<td className="px-4 py-3">
|
||||||
|
<button className="p-1.5 text-gray-400 hover:text-red-400 transition-colors">
|
||||||
|
<Trash2 className="w-4 h-4" />
|
||||||
|
</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
))
|
||||||
|
)}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</Layout>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -350,33 +350,23 @@ function PreflightSummary({ worker }: { worker: Worker }) {
|
|||||||
}
|
}
|
||||||
if (httpError) tooltipLines.push(`Error: ${httpError}`);
|
if (httpError) tooltipLines.push(`Error: ${httpError}`);
|
||||||
|
|
||||||
// Qualification styling - GOLD for qualified workers
|
// Qualification styling - compact with icon badge and geo
|
||||||
if (isQualified) {
|
if (isQualified) {
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col gap-1" title={tooltipLines.join('\n')}>
|
<div className="flex flex-col gap-1" title={tooltipLines.join('\n')}>
|
||||||
{/* Qualified badge - GOLD */}
|
{/* Qualified icon + IP on same line */}
|
||||||
<div className="inline-flex items-center gap-1.5 px-2 py-1 rounded-lg bg-gradient-to-r from-amber-100 to-yellow-100 border border-amber-300">
|
<div className="inline-flex items-center gap-1.5">
|
||||||
<ShieldCheck className="w-4 h-4 text-amber-600" />
|
<div className="p-1 rounded bg-gradient-to-r from-amber-100 to-yellow-100 border border-amber-300">
|
||||||
<span className="text-xs font-bold text-amber-700">QUALIFIED</span>
|
<ShieldCheck className="w-3.5 h-3.5 text-amber-600" />
|
||||||
|
</div>
|
||||||
|
{httpIp && (
|
||||||
|
<span className="font-mono text-xs text-gray-600">{httpIp}</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
{/* IP address */}
|
{/* Antidetect status with response time */}
|
||||||
{httpIp && (
|
<div className="flex items-center gap-1 text-xs text-gray-500">
|
||||||
<div className="flex items-center gap-1 text-xs text-gray-600">
|
<span className="text-emerald-600">Antidetect</span>
|
||||||
<Globe className="w-3 h-3 text-blue-500" />
|
<span className="text-emerald-500">OK</span>
|
||||||
<span className="font-mono">{httpIp}</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{/* Fingerprint summary */}
|
|
||||||
{fingerprint?.browser && (
|
|
||||||
<div className="flex items-center gap-1 text-xs text-gray-500">
|
|
||||||
<Fingerprint className="w-3 h-3 text-purple-500" />
|
|
||||||
<span className="truncate max-w-[100px]">{fingerprint.browser}</span>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
{/* Antidetect status */}
|
|
||||||
<div className="flex items-center gap-1 text-xs">
|
|
||||||
<Shield className="w-3 h-3 text-emerald-500" />
|
|
||||||
<span className="text-emerald-600">Antidetect OK</span>
|
|
||||||
{httpMs && <span className="text-gray-400">({httpMs}ms)</span>}
|
{httpMs && <span className="text-gray-400">({httpMs}ms)</span>}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user