umami/prisma/migrations/18_setup_timescaledb/migration.sql
iskandarsulaili 5f496fdb79 feat: integrate First8 Marketing hyper-personalization system
Enhanced Umami Analytics with First8 Marketing integration for
hyper-personalized recommendation engine.

Database Enhancements:
- PostgreSQL 17 with Apache AGE 1.6.0 (graph database)
- TimescaleDB 2.23.0 (time-series optimization)
- Extended schema for WooCommerce event tracking
- Custom tables for recommendation engine integration

Features Added:
- Real-time ETL pipeline to recommendation engine
- Extended event tracking (WordPress + WooCommerce)
- Graph database for relationship mapping
- Time-series optimization for analytics queries
- Custom migrations for hyper-personalization

Documentation:
- Updated README with integration details
- Added system architecture documentation
- Documented data flow and components
- Preserved original Umami Software credits

Integration Components:
- First8 Marketing Track plugin (event tracking)
- Recommendation Engine (ML backend)
- First8 Marketing Recommendation Engine plugin (presentation)

Status: Production-ready
Version: Based on Umami latest + First8 Marketing enhancements
2025-11-05 19:17:57 +08:00

214 lines
7.3 KiB
SQL

-- Migration: Setup TimescaleDB for Time-Series Analytics
-- Created: 2025-01-15
-- Description: Installs TimescaleDB extension and creates hypertables for time-series data
-- Requirements: PostgreSQL 17 + TimescaleDB 2.23.0
-- ============================================================================
-- Step 1: Install TimescaleDB Extension
-- ============================================================================
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- ============================================================================
-- Step 2: Create Time-Series Events Table
-- ============================================================================
CREATE TABLE IF NOT EXISTS time_series_events (
time TIMESTAMPTZ NOT NULL,
website_id UUID NOT NULL,
session_id UUID NOT NULL,
user_id VARCHAR(255),
-- Event details
event_type VARCHAR(50) NOT NULL,
event_name VARCHAR(50),
event_value DECIMAL(19, 4),
properties JSONB,
-- Dimensions for fast filtering
page_url VARCHAR(500),
product_id VARCHAR(50),
category VARCHAR(100),
device VARCHAR(20),
country CHAR(2),
PRIMARY KEY (time, website_id, session_id)
);
-- Convert to hypertable (partitioned by time)
SELECT create_hypertable('time_series_events', 'time',
chunk_time_interval => INTERVAL '7 days',
if_not_exists => TRUE
);
-- ============================================================================
-- Step 3: Create Indexes for Time-Series Queries
-- ============================================================================
-- Index for website-specific queries
CREATE INDEX IF NOT EXISTS idx_ts_events_website
ON time_series_events (website_id, time DESC);
-- Index for session-based queries
CREATE INDEX IF NOT EXISTS idx_ts_events_session
ON time_series_events (session_id, time DESC);
-- Index for product-based queries (partial index for sparse data)
CREATE INDEX IF NOT EXISTS idx_ts_events_product
ON time_series_events (product_id, time DESC)
WHERE product_id IS NOT NULL;
-- Index for event type queries
CREATE INDEX IF NOT EXISTS idx_ts_events_event_type
ON time_series_events (event_type, time DESC);
-- ============================================================================
-- Step 4: Create Aggregated Metrics Tables
-- ============================================================================
-- Website metrics aggregated hourly
CREATE TABLE IF NOT EXISTS website_metrics_hourly (
time TIMESTAMPTZ NOT NULL,
website_id UUID NOT NULL,
-- Traffic metrics
pageviews INTEGER DEFAULT 0,
unique_sessions INTEGER DEFAULT 0,
unique_users INTEGER DEFAULT 0,
avg_time_on_page INTEGER,
avg_scroll_depth INTEGER,
bounce_rate DECIMAL(5, 4),
-- Conversion metrics
add_to_cart_count INTEGER DEFAULT 0,
checkout_start_count INTEGER DEFAULT 0,
purchase_count INTEGER DEFAULT 0,
conversion_rate DECIMAL(5, 4),
-- Revenue metrics
total_revenue DECIMAL(19, 4) DEFAULT 0,
avg_order_value DECIMAL(19, 4),
PRIMARY KEY (time, website_id)
);
-- Convert to hypertable
SELECT create_hypertable('website_metrics_hourly', 'time',
chunk_time_interval => INTERVAL '30 days',
if_not_exists => TRUE
);
-- Product metrics aggregated daily
CREATE TABLE IF NOT EXISTS product_metrics_daily (
time DATE NOT NULL,
website_id UUID NOT NULL,
product_id VARCHAR(50) NOT NULL,
-- View metrics
views INTEGER DEFAULT 0,
unique_viewers INTEGER DEFAULT 0,
avg_time_viewed INTEGER,
-- Conversion metrics
add_to_cart_count INTEGER DEFAULT 0,
purchase_count INTEGER DEFAULT 0,
conversion_rate DECIMAL(5, 4),
-- Revenue metrics
revenue DECIMAL(19, 4) DEFAULT 0,
units_sold INTEGER DEFAULT 0,
PRIMARY KEY (time, website_id, product_id)
);
-- Convert to hypertable
SELECT create_hypertable('product_metrics_daily', 'time',
chunk_time_interval => INTERVAL '30 days',
if_not_exists => TRUE
);
-- ============================================================================
-- Step 5: Create Continuous Aggregates (Materialized Views)
-- ============================================================================
-- Hourly website metrics continuous aggregate
CREATE MATERIALIZED VIEW IF NOT EXISTS website_metrics_hourly_agg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS time,
website_id,
COUNT(*) FILTER (WHERE event_type = 'pageview') as pageviews,
COUNT(DISTINCT session_id) as unique_sessions,
COUNT(DISTINCT user_id) FILTER (WHERE user_id IS NOT NULL) as unique_users,
COUNT(*) FILTER (WHERE event_name = 'add_to_cart') as add_to_cart_count,
COUNT(*) FILTER (WHERE event_name = 'checkout_start') as checkout_start_count,
COUNT(*) FILTER (WHERE event_name = 'purchase') as purchase_count,
SUM(event_value) FILTER (WHERE event_name = 'purchase') as total_revenue
FROM time_series_events
GROUP BY time_bucket('1 hour', time), website_id;
-- Add refresh policy for continuous aggregate
SELECT add_continuous_aggregate_policy('website_metrics_hourly_agg',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour',
if_not_exists => TRUE
);
-- Daily product metrics continuous aggregate
CREATE MATERIALIZED VIEW IF NOT EXISTS product_metrics_daily_agg
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 day', time) AS time,
website_id,
product_id,
COUNT(*) FILTER (WHERE event_name = 'product_view') as views,
COUNT(DISTINCT session_id) FILTER (WHERE event_name = 'product_view') as unique_viewers,
COUNT(*) FILTER (WHERE event_name = 'add_to_cart') as add_to_cart_count,
COUNT(*) FILTER (WHERE event_name = 'purchase') as purchase_count,
SUM(event_value) FILTER (WHERE event_name = 'purchase') as revenue
FROM time_series_events
WHERE product_id IS NOT NULL
GROUP BY time_bucket('1 day', time), website_id, product_id;
-- Add refresh policy for product metrics
SELECT add_continuous_aggregate_policy('product_metrics_daily_agg',
start_offset => INTERVAL '7 days',
end_offset => INTERVAL '1 day',
schedule_interval => INTERVAL '1 day',
if_not_exists => TRUE
);
-- ============================================================================
-- Step 6: Create Data Retention Policies
-- ============================================================================
-- Retain raw time-series events for 90 days
SELECT add_retention_policy('time_series_events',
INTERVAL '90 days',
if_not_exists => TRUE
);
-- Retain hourly aggregates for 1 year
SELECT add_retention_policy('website_metrics_hourly',
INTERVAL '1 year',
if_not_exists => TRUE
);
-- Retain daily product metrics for 2 years
SELECT add_retention_policy('product_metrics_daily',
INTERVAL '2 years',
if_not_exists => TRUE
);
-- ============================================================================
-- Migration Complete
-- ============================================================================
DO $$
BEGIN
RAISE NOTICE '=================================================================';
RAISE NOTICE 'TimescaleDB Migration Complete';
RAISE NOTICE 'Hypertables: time_series_events, website_metrics_hourly, product_metrics_daily';
RAISE NOTICE 'Continuous Aggregates: website_metrics_hourly_agg, product_metrics_daily_agg';
RAISE NOTICE 'Retention Policies: 90 days (raw), 1 year (hourly), 2 years (daily)';
RAISE NOTICE '=================================================================';
END $$;