mirror of
https://github.com/umami-software/umami.git
synced 2026-02-08 14:47:14 +01:00
Enhanced Umami Analytics with First8 Marketing integration for hyper-personalized recommendation engine. Database Enhancements: - PostgreSQL 17 with Apache AGE 1.6.0 (graph database) - TimescaleDB 2.23.0 (time-series optimization) - Extended schema for WooCommerce event tracking - Custom tables for recommendation engine integration Features Added: - Real-time ETL pipeline to recommendation engine - Extended event tracking (WordPress + WooCommerce) - Graph database for relationship mapping - Time-series optimization for analytics queries - Custom migrations for hyper-personalization Documentation: - Updated README with integration details - Added system architecture documentation - Documented data flow and components - Preserved original Umami Software credits Integration Components: - First8 Marketing Track plugin (event tracking) - Recommendation Engine (ML backend) - First8 Marketing Recommendation Engine plugin (presentation) Status: Production-ready Version: Based on Umami latest + First8 Marketing enhancements
214 lines
7.3 KiB
SQL
214 lines
7.3 KiB
SQL
-- Migration: Setup TimescaleDB for Time-Series Analytics
|
|
-- Created: 2025-01-15
|
|
-- Description: Installs TimescaleDB extension and creates hypertables for time-series data
|
|
-- Requirements: PostgreSQL 17 + TimescaleDB 2.23.0
|
|
|
|
-- ============================================================================
|
|
-- Step 1: Install TimescaleDB Extension
|
|
-- ============================================================================
|
|
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
|
|
|
-- ============================================================================
|
|
-- Step 2: Create Time-Series Events Table
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS time_series_events (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
website_id UUID NOT NULL,
|
|
session_id UUID NOT NULL,
|
|
user_id VARCHAR(255),
|
|
|
|
-- Event details
|
|
event_type VARCHAR(50) NOT NULL,
|
|
event_name VARCHAR(50),
|
|
event_value DECIMAL(19, 4),
|
|
properties JSONB,
|
|
|
|
-- Dimensions for fast filtering
|
|
page_url VARCHAR(500),
|
|
product_id VARCHAR(50),
|
|
category VARCHAR(100),
|
|
device VARCHAR(20),
|
|
country CHAR(2),
|
|
|
|
PRIMARY KEY (time, website_id, session_id)
|
|
);
|
|
|
|
-- Convert to hypertable (partitioned by time)
|
|
SELECT create_hypertable('time_series_events', 'time',
|
|
chunk_time_interval => INTERVAL '7 days',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- Step 3: Create Indexes for Time-Series Queries
|
|
-- ============================================================================
|
|
|
|
-- Index for website-specific queries
|
|
CREATE INDEX IF NOT EXISTS idx_ts_events_website
|
|
ON time_series_events (website_id, time DESC);
|
|
|
|
-- Index for session-based queries
|
|
CREATE INDEX IF NOT EXISTS idx_ts_events_session
|
|
ON time_series_events (session_id, time DESC);
|
|
|
|
-- Index for product-based queries (partial index for sparse data)
|
|
CREATE INDEX IF NOT EXISTS idx_ts_events_product
|
|
ON time_series_events (product_id, time DESC)
|
|
WHERE product_id IS NOT NULL;
|
|
|
|
-- Index for event type queries
|
|
CREATE INDEX IF NOT EXISTS idx_ts_events_event_type
|
|
ON time_series_events (event_type, time DESC);
|
|
|
|
-- ============================================================================
|
|
-- Step 4: Create Aggregated Metrics Tables
|
|
-- ============================================================================
|
|
|
|
-- Website metrics aggregated hourly
|
|
CREATE TABLE IF NOT EXISTS website_metrics_hourly (
|
|
time TIMESTAMPTZ NOT NULL,
|
|
website_id UUID NOT NULL,
|
|
|
|
-- Traffic metrics
|
|
pageviews INTEGER DEFAULT 0,
|
|
unique_sessions INTEGER DEFAULT 0,
|
|
unique_users INTEGER DEFAULT 0,
|
|
avg_time_on_page INTEGER,
|
|
avg_scroll_depth INTEGER,
|
|
bounce_rate DECIMAL(5, 4),
|
|
|
|
-- Conversion metrics
|
|
add_to_cart_count INTEGER DEFAULT 0,
|
|
checkout_start_count INTEGER DEFAULT 0,
|
|
purchase_count INTEGER DEFAULT 0,
|
|
conversion_rate DECIMAL(5, 4),
|
|
|
|
-- Revenue metrics
|
|
total_revenue DECIMAL(19, 4) DEFAULT 0,
|
|
avg_order_value DECIMAL(19, 4),
|
|
|
|
PRIMARY KEY (time, website_id)
|
|
);
|
|
|
|
-- Convert to hypertable
|
|
SELECT create_hypertable('website_metrics_hourly', 'time',
|
|
chunk_time_interval => INTERVAL '30 days',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- Product metrics aggregated daily
|
|
CREATE TABLE IF NOT EXISTS product_metrics_daily (
|
|
time DATE NOT NULL,
|
|
website_id UUID NOT NULL,
|
|
product_id VARCHAR(50) NOT NULL,
|
|
|
|
-- View metrics
|
|
views INTEGER DEFAULT 0,
|
|
unique_viewers INTEGER DEFAULT 0,
|
|
avg_time_viewed INTEGER,
|
|
|
|
-- Conversion metrics
|
|
add_to_cart_count INTEGER DEFAULT 0,
|
|
purchase_count INTEGER DEFAULT 0,
|
|
conversion_rate DECIMAL(5, 4),
|
|
|
|
-- Revenue metrics
|
|
revenue DECIMAL(19, 4) DEFAULT 0,
|
|
units_sold INTEGER DEFAULT 0,
|
|
|
|
PRIMARY KEY (time, website_id, product_id)
|
|
);
|
|
|
|
-- Convert to hypertable
|
|
SELECT create_hypertable('product_metrics_daily', 'time',
|
|
chunk_time_interval => INTERVAL '30 days',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- Step 5: Create Continuous Aggregates (Materialized Views)
|
|
-- ============================================================================
|
|
|
|
-- Hourly website metrics continuous aggregate
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS website_metrics_hourly_agg
|
|
WITH (timescaledb.continuous) AS
|
|
SELECT
|
|
time_bucket('1 hour', time) AS time,
|
|
website_id,
|
|
COUNT(*) FILTER (WHERE event_type = 'pageview') as pageviews,
|
|
COUNT(DISTINCT session_id) as unique_sessions,
|
|
COUNT(DISTINCT user_id) FILTER (WHERE user_id IS NOT NULL) as unique_users,
|
|
COUNT(*) FILTER (WHERE event_name = 'add_to_cart') as add_to_cart_count,
|
|
COUNT(*) FILTER (WHERE event_name = 'checkout_start') as checkout_start_count,
|
|
COUNT(*) FILTER (WHERE event_name = 'purchase') as purchase_count,
|
|
SUM(event_value) FILTER (WHERE event_name = 'purchase') as total_revenue
|
|
FROM time_series_events
|
|
GROUP BY time_bucket('1 hour', time), website_id;
|
|
|
|
-- Add refresh policy for continuous aggregate
|
|
SELECT add_continuous_aggregate_policy('website_metrics_hourly_agg',
|
|
start_offset => INTERVAL '3 hours',
|
|
end_offset => INTERVAL '1 hour',
|
|
schedule_interval => INTERVAL '1 hour',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- Daily product metrics continuous aggregate
|
|
CREATE MATERIALIZED VIEW IF NOT EXISTS product_metrics_daily_agg
|
|
WITH (timescaledb.continuous) AS
|
|
SELECT
|
|
time_bucket('1 day', time) AS time,
|
|
website_id,
|
|
product_id,
|
|
COUNT(*) FILTER (WHERE event_name = 'product_view') as views,
|
|
COUNT(DISTINCT session_id) FILTER (WHERE event_name = 'product_view') as unique_viewers,
|
|
COUNT(*) FILTER (WHERE event_name = 'add_to_cart') as add_to_cart_count,
|
|
COUNT(*) FILTER (WHERE event_name = 'purchase') as purchase_count,
|
|
SUM(event_value) FILTER (WHERE event_name = 'purchase') as revenue
|
|
FROM time_series_events
|
|
WHERE product_id IS NOT NULL
|
|
GROUP BY time_bucket('1 day', time), website_id, product_id;
|
|
|
|
-- Add refresh policy for product metrics
|
|
SELECT add_continuous_aggregate_policy('product_metrics_daily_agg',
|
|
start_offset => INTERVAL '7 days',
|
|
end_offset => INTERVAL '1 day',
|
|
schedule_interval => INTERVAL '1 day',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- Step 6: Create Data Retention Policies
|
|
-- ============================================================================
|
|
|
|
-- Retain raw time-series events for 90 days
|
|
SELECT add_retention_policy('time_series_events',
|
|
INTERVAL '90 days',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- Retain hourly aggregates for 1 year
|
|
SELECT add_retention_policy('website_metrics_hourly',
|
|
INTERVAL '1 year',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- Retain daily product metrics for 2 years
|
|
SELECT add_retention_policy('product_metrics_daily',
|
|
INTERVAL '2 years',
|
|
if_not_exists => TRUE
|
|
);
|
|
|
|
-- ============================================================================
|
|
-- Migration Complete
|
|
-- ============================================================================
|
|
DO $$
|
|
BEGIN
|
|
RAISE NOTICE '=================================================================';
|
|
RAISE NOTICE 'TimescaleDB Migration Complete';
|
|
RAISE NOTICE 'Hypertables: time_series_events, website_metrics_hourly, product_metrics_daily';
|
|
RAISE NOTICE 'Continuous Aggregates: website_metrics_hourly_agg, product_metrics_daily_agg';
|
|
RAISE NOTICE 'Retention Policies: 90 days (raw), 1 year (hourly), 2 years (daily)';
|
|
RAISE NOTICE '=================================================================';
|
|
END $$;
|
|
|