Covering index for website_event and query optimization

This commit is contained in:
Andreas Hocevar 2026-01-20 10:50:55 +01:00
parent 860e6390f1
commit 23cbff30f0
No known key found for this signature in database
GPG key ID: F7FA44133EB59BCD
5 changed files with 93 additions and 32 deletions

View file

@ -0,0 +1,2 @@
-- CreateIndex
CREATE INDEX "website_event_website_id_created_at_event_type_session_id_idx" ON "website_event"("website_id", "created_at", "event_type", "session_id");

View file

@ -136,6 +136,7 @@ model WebsiteEvent {
@@index([websiteId, createdAt, eventName])
@@index([websiteId, createdAt, tag])
@@index([websiteId, sessionId, createdAt])
@@index([websiteId, createdAt, eventType, sessionId])
@@index([websiteId, visitId, createdAt])
@@index([websiteId, createdAt, hostname])
@@map("website_event")

View file

@ -1,5 +1,5 @@
import clickhouse from '@/lib/clickhouse';
import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import type { QueryFilters } from '@/lib/types';
@ -52,6 +52,31 @@ async function relationalQuery(
if (type === 'entry' || type === 'exit') {
const order = type === 'entry' ? 'asc' : 'desc';
if (!filterQuery && !cohortQuery) {
return rawQuery(
`
select x.url_path as x,
count(*) as y
from (
select distinct on (visit_id)
url_path
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
order by visit_id, created_at ${order}
) as x
group by 1
order by 2 desc
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView },
FUNCTION_NAME,
);
}
column = `x.${column}`;
entryExitQuery = `
@ -62,7 +87,7 @@ async function relationalQuery(
from website_event
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and website_event.event_type != 2
and website_event.event_type = {{eventType}}
order by visit_id, created_at ${order}
) x
on x.visit_id = website_event.visit_id
@ -79,7 +104,7 @@ async function relationalQuery(
${entryExitQuery}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and website_event.event_type != 2
and website_event.event_type = {{eventType}}
${excludeDomain}
${filterQuery}
group by 1
@ -87,7 +112,7 @@ async function relationalQuery(
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters },
{ ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView },
FUNCTION_NAME,
);
}

View file

@ -1,5 +1,5 @@
import clickhouse from '@/lib/clickhouse';
import { EVENT_COLUMNS, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { EVENT_COLUMNS, EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import type { QueryFilters } from '@/lib/types';
@ -44,6 +44,37 @@ async function relationalQuery(
column = `lower(left(${type}, 2))`;
}
// Optimization: use subquery to get distinct sessions before joining
if (!Object.keys(filters).some(key => EVENT_COLUMNS.includes(key))) {
return rawQuery(
`
with unique_sessions as (
select distinct session_id, website_id
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
)
select
${column} x,
count(*) y
${includeCountry ? ', country' : ''}
from unique_sessions
join session on session.session_id = unique_sessions.session_id
and session.website_id = unique_sessions.website_id
where 1 = 1
${filterQuery}
group by 1
${includeCountry ? ', 3' : ''}
order by 2 desc
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView },
FUNCTION_NAME,
);
}
return rawQuery(
`
select
@ -55,7 +86,7 @@ async function relationalQuery(
${joinSessionQuery}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and website_event.event_type != 2
and website_event.event_type = {{eventType}}
${filterQuery}
group by 1
${includeCountry ? ', 3' : ''}
@ -63,7 +94,7 @@ async function relationalQuery(
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters },
{ ...queryParams, ...parameters, eventType: EVENT_TYPE.pageView },
FUNCTION_NAME,
);
}

View file

@ -32,10 +32,26 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
return pagedRawQuery(
`
with session_stats as (
select
session_id,
website_id,
hostname,
min(created_at) as "firstAt",
max(created_at) as "lastAt",
count(distinct visit_id) as "visits",
sum(case when event_type = 1 then 1 else 0 end) as "views"
from website_event
${cohortQuery}
where website_id = {{websiteId::uuid}}
${dateQuery}
${filterQuery}
group by session_id, website_id, hostname
)
select
session.session_id as "id",
session.website_id as "websiteId",
website_event.hostname,
session_stats.hostname,
session.browser,
session.os,
session.device,
@ -44,31 +60,17 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
session.country,
session.region,
session.city,
min(website_event.created_at) as "firstAt",
max(website_event.created_at) as "lastAt",
count(distinct website_event.visit_id) as "visits",
sum(case when website_event.event_type = 1 then 1 else 0 end) as "views",
max(website_event.created_at) as "createdAt"
from website_event
${cohortQuery}
join session on session.session_id = website_event.session_id
and session.website_id = website_event.website_id
where website_event.website_id = {{websiteId::uuid}}
${dateQuery}
${filterQuery}
session_stats."firstAt",
session_stats."lastAt",
session_stats."visits",
session_stats."views",
session_stats."lastAt" as "createdAt"
from session_stats
join session on session.session_id = session_stats.session_id
and session.website_id = session_stats.website_id
where 1 = 1
${searchQuery}
group by session.session_id,
session.website_id,
website_event.hostname,
session.browser,
session.os,
session.device,
session.screen,
session.language,
session.country,
session.region,
session.city
order by max(website_event.created_at) desc
order by session_stats."lastAt" desc
`,
queryParams,
filters,