From 23cbff30f0b5206b25399213805f41a78a4856a1 Mon Sep 17 00:00:00 2001 From: Andreas Hocevar Date: Tue, 20 Jan 2026 10:50:55 +0100 Subject: [PATCH] Covering index for website_event and query optimization --- .../migration.sql | 2 + prisma/schema.prisma | 1 + .../sql/pageviews/getPageviewMetrics.ts | 33 ++++++++++-- src/queries/sql/sessions/getSessionMetrics.ts | 37 +++++++++++-- .../sql/sessions/getWebsiteSessions.ts | 52 ++++++++++--------- 5 files changed, 93 insertions(+), 32 deletions(-) create mode 100644 prisma/migrations/18_add_website_event_covering_index/migration.sql diff --git a/prisma/migrations/18_add_website_event_covering_index/migration.sql b/prisma/migrations/18_add_website_event_covering_index/migration.sql new file mode 100644 index 00000000..7061aea5 --- /dev/null +++ b/prisma/migrations/18_add_website_event_covering_index/migration.sql @@ -0,0 +1,2 @@ +-- CreateIndex +CREATE INDEX "website_event_website_id_created_at_event_type_session_id_idx" ON "website_event"("website_id", "created_at", "event_type", "session_id"); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index aeb11648..dc3a447a 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -136,6 +136,7 @@ model WebsiteEvent { @@index([websiteId, createdAt, eventName]) @@index([websiteId, createdAt, tag]) @@index([websiteId, sessionId, createdAt]) + @@index([websiteId, createdAt, eventType, sessionId]) @@index([websiteId, visitId, createdAt]) @@index([websiteId, createdAt, hostname]) @@map("website_event") diff --git a/src/queries/sql/pageviews/getPageviewMetrics.ts b/src/queries/sql/pageviews/getPageviewMetrics.ts index 9d4f6278..6a57bd8b 100644 --- a/src/queries/sql/pageviews/getPageviewMetrics.ts +++ b/src/queries/sql/pageviews/getPageviewMetrics.ts @@ -1,5 +1,5 @@ import clickhouse from '@/lib/clickhouse'; -import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; import prisma from '@/lib/prisma'; import type { QueryFilters } from '@/lib/types'; @@ -52,6 +52,31 @@ async function relationalQuery( if (type === 'entry' || type === 'exit') { const order = type === 'entry' ? 'asc' : 'desc'; + + if (!filterQuery && !cohortQuery) { + return rawQuery( + ` + select x.url_path as x, + count(*) as y + from ( + select distinct on (visit_id) + url_path + from website_event + where website_id = {{websiteId::uuid}} + and created_at between {{startDate}} and {{endDate}} + and event_type = {{eventType}} + order by visit_id, created_at ${order} + ) as x + group by 1 + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView }, + FUNCTION_NAME, + ); + } + column = `x.${column}`; entryExitQuery = ` @@ -62,7 +87,7 @@ async function relationalQuery( from website_event where website_event.website_id = {{websiteId::uuid}} and website_event.created_at between {{startDate}} and {{endDate}} - and website_event.event_type != 2 + and website_event.event_type = {{eventType}} order by visit_id, created_at ${order} ) x on x.visit_id = website_event.visit_id @@ -79,7 +104,7 @@ async function relationalQuery( ${entryExitQuery} where website_event.website_id = {{websiteId::uuid}} and website_event.created_at between {{startDate}} and {{endDate}} - and website_event.event_type != 2 + and website_event.event_type = {{eventType}} ${excludeDomain} ${filterQuery} group by 1 @@ -87,7 +112,7 @@ async function relationalQuery( limit ${limit} offset ${offset} `, - { ...queryParams, ...parameters }, + { ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView }, FUNCTION_NAME, ); } diff --git a/src/queries/sql/sessions/getSessionMetrics.ts b/src/queries/sql/sessions/getSessionMetrics.ts index c519bdd0..73a4b07c 100644 --- a/src/queries/sql/sessions/getSessionMetrics.ts +++ b/src/queries/sql/sessions/getSessionMetrics.ts @@ -1,5 +1,5 @@ import clickhouse from '@/lib/clickhouse'; -import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; +import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; import prisma from '@/lib/prisma'; import type { QueryFilters } from '@/lib/types'; @@ -44,6 +44,37 @@ async function relationalQuery( column = `lower(left(${type}, 2))`; } + // Optimization: use subquery to get distinct sessions before joining + if (!Object.keys(filters).some(key => EVENT_COLUMNS.includes(key))) { + return rawQuery( + ` + with unique_sessions as ( + select distinct session_id, website_id + from website_event + where website_id = {{websiteId::uuid}} + and created_at between {{startDate}} and {{endDate}} + and event_type = {{eventType}} + ) + select + ${column} x, + count(*) y + ${includeCountry ? ', country' : ''} + from unique_sessions + join session on session.session_id = unique_sessions.session_id + and session.website_id = unique_sessions.website_id + where 1 = 1 + ${filterQuery} + group by 1 + ${includeCountry ? ', 3' : ''} + order by 2 desc + limit ${limit} + offset ${offset} + `, + { ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView }, + FUNCTION_NAME, + ); + } + return rawQuery( ` select @@ -55,7 +86,7 @@ async function relationalQuery( ${joinSessionQuery} where website_event.website_id = {{websiteId::uuid}} and website_event.created_at between {{startDate}} and {{endDate}} - and website_event.event_type != 2 + and website_event.event_type = {{eventType}} ${filterQuery} group by 1 ${includeCountry ? ', 3' : ''} @@ -63,7 +94,7 @@ async function relationalQuery( limit ${limit} offset ${offset} `, - { ...queryParams, ...parameters }, + { ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView }, FUNCTION_NAME, ); } diff --git a/src/queries/sql/sessions/getWebsiteSessions.ts b/src/queries/sql/sessions/getWebsiteSessions.ts index df640d6d..c93f654c 100644 --- a/src/queries/sql/sessions/getWebsiteSessions.ts +++ b/src/queries/sql/sessions/getWebsiteSessions.ts @@ -32,10 +32,26 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) { return pagedRawQuery( ` + with session_stats as ( + select + session_id, + website_id, + hostname, + min(created_at) as "firstAt", + max(created_at) as "lastAt", + count(distinct visit_id) as "visits", + sum(case when event_type = 1 then 1 else 0 end) as "views" + from website_event + ${cohortQuery} + where website_id = {{websiteId::uuid}} + ${dateQuery} + ${filterQuery} + group by session_id, website_id, hostname + ) select session.session_id as "id", session.website_id as "websiteId", - website_event.hostname, + session_stats.hostname, session.browser, session.os, session.device, @@ -44,31 +60,17 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) { session.country, session.region, session.city, - min(website_event.created_at) as "firstAt", - max(website_event.created_at) as "lastAt", - count(distinct website_event.visit_id) as "visits", - sum(case when website_event.event_type = 1 then 1 else 0 end) as "views", - max(website_event.created_at) as "createdAt" - from website_event - ${cohortQuery} - join session on session.session_id = website_event.session_id - and session.website_id = website_event.website_id - where website_event.website_id = {{websiteId::uuid}} - ${dateQuery} - ${filterQuery} + session_stats."firstAt", + session_stats."lastAt", + session_stats."visits", + session_stats."views", + session_stats."lastAt" as "createdAt" + from session_stats + join session on session.session_id = session_stats.session_id + and session.website_id = session_stats.website_id + where 1 = 1 ${searchQuery} - group by session.session_id, - session.website_id, - website_event.hostname, - session.browser, - session.os, - session.device, - session.screen, - session.language, - session.country, - session.region, - session.city - order by max(website_event.created_at) desc + order by session_stats."lastAt" desc `, queryParams, filters,