Initial commit - Dutchie dispensary scraper

This commit is contained in:
Kelly
2025-11-28 19:45:44 -07:00
commit 5757a8e9bd
23375 changed files with 3788799 additions and 0 deletions

340
backend/src/db/migrate.ts Executable file
View File

@@ -0,0 +1,340 @@
import { Pool } from 'pg';
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
export async function runMigrations() {
const client = await pool.connect();
try {
await client.query('BEGIN');
// Users table
await client.query(`
CREATE TABLE IF NOT EXISTS users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
role VARCHAR(50) DEFAULT 'admin',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
// Stores table
await client.query(`
CREATE TABLE IF NOT EXISTS stores (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) UNIQUE NOT NULL,
dutchie_url TEXT NOT NULL,
active BOOLEAN DEFAULT true,
scrape_enabled BOOLEAN DEFAULT true,
last_scraped_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
// Categories table (shop, brands, specials)
await client.query(`
CREATE TABLE IF NOT EXISTS categories (
id SERIAL PRIMARY KEY,
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) NOT NULL,
dutchie_url TEXT NOT NULL,
scrape_enabled BOOLEAN DEFAULT true,
last_scraped_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(store_id, slug)
);
`);
// Products table
await client.query(`
CREATE TABLE IF NOT EXISTS products (
id SERIAL PRIMARY KEY,
store_id INTEGER REFERENCES stores(id) ON DELETE CASCADE,
category_id INTEGER REFERENCES categories(id) ON DELETE SET NULL,
dutchie_product_id VARCHAR(255),
name VARCHAR(500) NOT NULL,
slug VARCHAR(500),
description TEXT,
price DECIMAL(10, 2),
original_price DECIMAL(10, 2),
strain_type VARCHAR(100),
thc_percentage DECIMAL(5, 2),
cbd_percentage DECIMAL(5, 2),
brand VARCHAR(255),
weight VARCHAR(100),
image_url TEXT,
local_image_path TEXT,
dutchie_url TEXT NOT NULL,
in_stock BOOLEAN DEFAULT true,
is_special BOOLEAN DEFAULT false,
metadata JSONB,
first_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(store_id, dutchie_product_id)
);
`);
// Campaigns table
await client.query(`
CREATE TABLE IF NOT EXISTS campaigns (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
slug VARCHAR(255) UNIQUE NOT NULL,
description TEXT,
display_style VARCHAR(50) DEFAULT 'grid',
active BOOLEAN DEFAULT true,
start_date TIMESTAMP,
end_date TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
// Add variant column to products table (for different sizes/options of same product)
await client.query(`
ALTER TABLE products ADD COLUMN IF NOT EXISTS variant VARCHAR(255);
`);
// Add special tracking columns (DEPRECATED - not used with new approach)
await client.query(`
ALTER TABLE products ADD COLUMN IF NOT EXISTS special_ends_at TIMESTAMP;
ALTER TABLE products ADD COLUMN IF NOT EXISTS special_text TEXT;
ALTER TABLE products ADD COLUMN IF NOT EXISTS special_type VARCHAR(100);
`);
// ====== NEW SCHEMA ADDITIONS ======
// Add array columns for product attributes
await client.query(`
ALTER TABLE products ADD COLUMN IF NOT EXISTS terpenes TEXT[];
ALTER TABLE products ADD COLUMN IF NOT EXISTS effects TEXT[];
ALTER TABLE products ADD COLUMN IF NOT EXISTS flavors TEXT[];
`);
// Add new price columns (regular_price = market price, sale_price = discount price)
await client.query(`
ALTER TABLE products ADD COLUMN IF NOT EXISTS regular_price DECIMAL(10, 2);
ALTER TABLE products ADD COLUMN IF NOT EXISTS sale_price DECIMAL(10, 2);
`);
// Migrate existing price data
await client.query(`
UPDATE products
SET regular_price = original_price
WHERE regular_price IS NULL AND original_price IS NOT NULL;
`);
await client.query(`
UPDATE products
SET sale_price = price
WHERE sale_price IS NULL AND price IS NOT NULL AND original_price IS NOT NULL AND price < original_price;
`);
// Make slug NOT NULL and add unique constraint
await client.query(`
UPDATE products SET slug = dutchie_product_id WHERE slug IS NULL;
ALTER TABLE products ALTER COLUMN slug SET NOT NULL;
`);
// Drop old unique constraint and add new one on slug
await client.query(`
ALTER TABLE products DROP CONSTRAINT IF EXISTS products_store_id_dutchie_product_id_key;
ALTER TABLE products ADD CONSTRAINT products_store_id_slug_unique UNIQUE (store_id, slug);
`);
// Product Categories (many-to-many) - products can appear in multiple categories
await client.query(`
CREATE TABLE IF NOT EXISTS product_categories (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
category_slug VARCHAR(255) NOT NULL,
first_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(product_id, category_slug)
);
`);
await client.query(`
CREATE INDEX IF NOT EXISTS idx_product_categories_slug ON product_categories(category_slug, last_seen_at DESC);
CREATE INDEX IF NOT EXISTS idx_product_categories_product ON product_categories(product_id);
`);
// Price History - track regular and sale price changes over time
await client.query(`
CREATE TABLE IF NOT EXISTS price_history (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
regular_price DECIMAL(10, 2),
sale_price DECIMAL(10, 2),
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
await client.query(`
CREATE INDEX IF NOT EXISTS idx_price_history_product ON price_history(product_id, recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_price_history_recorded ON price_history(recorded_at DESC);
`);
// Batch History - track cannabinoid/terpene changes (different batches)
await client.query(`
CREATE TABLE IF NOT EXISTS batch_history (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
thc_percentage DECIMAL(5, 2),
cbd_percentage DECIMAL(5, 2),
terpenes TEXT[],
strain_type VARCHAR(100),
recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
await client.query(`
CREATE INDEX IF NOT EXISTS idx_batch_history_product ON batch_history(product_id, recorded_at DESC);
CREATE INDEX IF NOT EXISTS idx_batch_history_recorded ON batch_history(recorded_at DESC);
`);
// Campaign products (many-to-many with ordering)
await client.query(`
CREATE TABLE IF NOT EXISTS campaign_products (
id SERIAL PRIMARY KEY,
campaign_id INTEGER REFERENCES campaigns(id) ON DELETE CASCADE,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
display_order INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(campaign_id, product_id)
);
`);
// Click tracking
await client.query(`
CREATE TABLE IF NOT EXISTS clicks (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
campaign_id INTEGER REFERENCES campaigns(id) ON DELETE SET NULL,
ip_address VARCHAR(45),
user_agent TEXT,
referrer TEXT,
clicked_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
// Create index on clicked_at for analytics queries
await client.query(`
CREATE INDEX IF NOT EXISTS idx_clicks_clicked_at ON clicks(clicked_at);
CREATE INDEX IF NOT EXISTS idx_clicks_product_id ON clicks(product_id);
CREATE INDEX IF NOT EXISTS idx_clicks_campaign_id ON clicks(campaign_id);
`);
// Proxies table
await client.query(`
CREATE TABLE IF NOT EXISTS proxies (
id SERIAL PRIMARY KEY,
host VARCHAR(255) NOT NULL,
port INTEGER NOT NULL,
protocol VARCHAR(10) NOT NULL,
username VARCHAR(255),
password VARCHAR(255),
active BOOLEAN DEFAULT true,
is_anonymous BOOLEAN DEFAULT false,
last_tested_at TIMESTAMP,
test_result VARCHAR(50),
response_time_ms INTEGER,
failure_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(host, port, protocol)
);
`);
// Add failure_count column if it doesn't exist
await client.query(`
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS failure_count INTEGER DEFAULT 0;
`);
// Failed proxies table
await client.query(`
CREATE TABLE IF NOT EXISTS failed_proxies (
id SERIAL PRIMARY KEY,
host VARCHAR(255) NOT NULL,
port INTEGER NOT NULL,
protocol VARCHAR(10) NOT NULL,
username VARCHAR(255),
password VARCHAR(255),
failure_count INTEGER NOT NULL,
last_error TEXT,
failed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(host, port, protocol)
);
`);
// Proxy test jobs table
await client.query(`
CREATE TABLE IF NOT EXISTS proxy_test_jobs (
id SERIAL PRIMARY KEY,
status VARCHAR(20) NOT NULL DEFAULT 'pending',
total_proxies INTEGER NOT NULL DEFAULT 0,
tested_proxies INTEGER NOT NULL DEFAULT 0,
passed_proxies INTEGER NOT NULL DEFAULT 0,
failed_proxies INTEGER NOT NULL DEFAULT 0,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
await client.query(`
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_status ON proxy_test_jobs(status);
CREATE INDEX IF NOT EXISTS idx_proxy_test_jobs_created_at ON proxy_test_jobs(created_at DESC);
`);
// Settings table
await client.query(`
CREATE TABLE IF NOT EXISTS settings (
key VARCHAR(255) PRIMARY KEY,
value TEXT NOT NULL,
description TEXT,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
// Insert default settings
await client.query(`
INSERT INTO settings (key, value, description) VALUES
('scrape_interval_hours', '4', 'How often to scrape stores (in hours)'),
('scrape_specials_time', '00:01', 'Time to scrape specials daily (HH:MM in 24h format)'),
('analytics_retention_days', '365', 'How many days to keep analytics data'),
('proxy_timeout_ms', '3000', 'Proxy timeout in milliseconds'),
('proxy_test_url', 'https://httpbin.org/ip', 'URL to test proxies against')
ON CONFLICT (key) DO NOTHING;
`);
await client.query('COMMIT');
console.log('✅ Migrations completed successfully');
} catch (error) {
await client.query('ROLLBACK');
console.error('❌ Migration failed:', error);
throw error;
} finally {
client.release();
}
}
export { pool };
// Run migrations if this file is executed directly
if (require.main === module) {
runMigrations()
.then(() => process.exit(0))
.catch(() => process.exit(1));
}