From 5a0908964c5897733121c758fbe62b41846735fe Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Tue, 25 Jul 2023 13:23:44 -0700 Subject: [PATCH] align CH / postgres funnel queries --- lib/clickhouse.ts | 41 +++++++++++++---- lib/prisma.ts | 4 +- .../analytics/pageview/getPageviewFunnel.ts | 46 ++++++++----------- 3 files changed, 54 insertions(+), 37 deletions(-) diff --git a/lib/clickhouse.ts b/lib/clickhouse.ts index eb73d83c..d7cc572b 100644 --- a/lib/clickhouse.ts +++ b/lib/clickhouse.ts @@ -121,24 +121,47 @@ function getFilterQuery(filters = {}, params = {}) { return query.join('\n'); } -function getFunnelQuery(urls: string[]): { - columnsQuery: string; - conditionQuery: string; +function getFunnelQuery( + urls: string[], + windowMinutes: number, +): { + levelQuery: string; + sumQuery: string; + urlFilterQuery: string; urlParams: { [key: string]: string }; } { return urls.reduce( (pv, cv, i) => { - pv.columnsQuery += `\n,url_path = {url${i}:String}${ - i > 0 && urls[i - 1] ? ` AND referrer_path = {url${i - 1}:String}` : '' - }`; - pv.conditionQuery += `${i > 0 ? ',' : ''} {url${i}:String}`; + const levelNumber = i + 1; + const startSum = i > 0 ? 'union all ' : ''; + const startFilter = i > 0 ? ', ' : ''; + + if (levelNumber >= 2) { + pv.levelQuery += `\n + , level${levelNumber} AS ( + select distinct y.session_id as session_id, + y.url_path as url_path, + y.referrer_path as referrer_path, + y.created_at as created_at + from level${i} x + join level0 y + on x.session_id = y.session_id + where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute + and y.referrer_path = {url${i - 1}:String} + and y.url_path = {url${i}:String} + )`; + } + + pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`; + pv.urlFilterQuery += `${startFilter}{url${i}:String} `; pv.urlParams[`url${i}`] = cv; return pv; }, { - columnsQuery: '', - conditionQuery: '', + levelQuery: '', + sumQuery: '', + urlFilterQuery: '', urlParams: {}, }, ); diff --git a/lib/prisma.ts b/lib/prisma.ts index 85dc25b8..e250987e 100644 --- a/lib/prisma.ts +++ b/lib/prisma.ts @@ -153,7 +153,7 @@ function getFunnelQuery( if (levelNumber >= 2) { pv.levelQuery += `\n , level${levelNumber} AS ( - select distinct l.session_id, we.created_at + select distinct we.session_id, we.created_at from level${i} l join website_event we on l.session_id = we.session_id @@ -161,7 +161,7 @@ function getFunnelQuery( and ${getAddMinutesQuery(`l.created_at `, windowMinutes)} and we.referrer_path = $${i + initParamLength} and we.url_path = $${levelNumber + initParamLength} - and we.created_at between $2 and $3 + and we.created_at <= $3 and we.website_id = $1${toUuid()} )`; } diff --git a/queries/analytics/pageview/getPageviewFunnel.ts b/queries/analytics/pageview/getPageviewFunnel.ts index 422338da..3af1545a 100644 --- a/queries/analytics/pageview/getPageviewFunnel.ts +++ b/queries/analytics/pageview/getPageviewFunnel.ts @@ -41,17 +41,14 @@ async function relationalQuery( const params: any = [websiteId, startDate, endDate, ...urls]; return rawQuery( - `WITH level0 AS ( - select distinct session_id, url_path, referrer_path, created_at + `WITH level1 AS ( + select distinct session_id, created_at from website_event where url_path in (${urlFilterQuery}) and website_id = $1${toUuid()} and created_at between $2 and $3 - ),level1 AS ( - select distinct session_id, created_at - from level0 - where url_path = $4 - )${levelQuery} + and url_path = $4) + ${levelQuery} ${sumQuery} ORDER BY level;`, params, @@ -76,38 +73,35 @@ async function clickhouseQuery( { x: string; y: number; + z: number; }[] > { const { windowMinutes, startDate, endDate, urls } = criteria; const { rawQuery, getBetweenDates, getFunnelQuery } = clickhouse; - const { columnsQuery, urlParams } = getFunnelQuery(urls); + const { levelQuery, sumQuery, urlFilterQuery, urlParams } = getFunnelQuery(urls, windowMinutes); const params = { websiteId, - window: windowMinutes * 60, ...urlParams, }; return rawQuery<{ level: number; count: number }[]>( ` - WITH funnel as (select level, - count(*) AS count + WITH level0 AS ( + select distinct session_id, url_path, referrer_path, created_at + from umami.website_event + where url_path in (${urlFilterQuery}) + and website_id = {websiteId:UUID} + and ${getBetweenDates('created_at', startDate, endDate)} + ), level1 AS ( + select * + from level0 + where url_path = {url0:String}) + ${levelQuery} + select * from ( - select session_id, - windowFunnel({window:UInt32}, 'strict_increase') - ( - created_at - ${columnsQuery} - ) AS level - from website_event - where website_id = {websiteId:UUID} - and ${getBetweenDates('created_at', startDate, endDate)} - group by 1 - ) - group by level - order by level asc) - select * from funnel where level > 0; - `, + ${sumQuery} + ) ORDER BY level;`, params, ).then(results => { return urls.map((a, i) => ({