update psql expanded metrics queries
Some checks failed
Node.js CI / build (postgresql, 18.18, 10) (push) Has been cancelled

This commit is contained in:
Francis Cao 2025-10-24 10:38:35 -07:00
parent e71a34d1fa
commit 61b667c587
5 changed files with 179 additions and 80 deletions

View file

@ -37,7 +37,7 @@ async function relationalQuery(
) { ) {
const { type, limit = 500, offset = 0 } = parameters; const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type; const column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = prisma; const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma;
const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters( const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters(
{ {
...filters, ...filters,
@ -49,16 +49,31 @@ async function relationalQuery(
return rawQuery( return rawQuery(
` `
select ${column} x, select
count(*) as y name,
sum(t.c) as "pageviews",
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime"
from (
select
${column} name,
website_event.session_id,
website_event.visit_id,
count(*) as "c",
min(website_event.created_at) as "min_time",
max(website_event.created_at) as "max_time"
from website_event from website_event
${cohortQuery} ${cohortQuery}
${joinSessionQuery} ${joinSessionQuery}
where website_event.website_id = {{websiteId::uuid}} where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}} and website_event.created_at between {{startDate}} and {{endDate}}
${filterQuery} ${filterQuery}
group by 1 group by name, website_event.session_id, website_event.visit_id
order by 2 desc ) as t
group by name
order by visitors desc, visits desc
limit ${limit} limit ${limit}
offset ${offset} offset ${offset}
`, `,

View file

@ -40,7 +40,7 @@ async function relationalQuery(
websiteId: string, websiteId: string,
filters: QueryFilters, filters: QueryFilters,
): Promise<ChannelExpandedMetricsData[]> { ): Promise<ChannelExpandedMetricsData[]> {
const { rawQuery, parseFilters } = prisma; const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma;
const { queryParams, filterQuery, joinSessionQuery, cohortQuery, dateQuery } = parseFilters({ const { queryParams, filterQuery, joinSessionQuery, cohortQuery, dateQuery } = parseFilters({
...filters, ...filters,
websiteId, websiteId,
@ -48,21 +48,20 @@ async function relationalQuery(
return rawQuery( return rawQuery(
` `
WITH channels as ( WITH prefix AS (
select case when ${toPostgresPositionClause('utm_medium', ['cp', 'ppc', 'retargeting', 'paid'])} then 'paid' else 'organic' end prefix, select case when utm_medium LIKE 'p%' OR
case utm_medium LIKE '%ppc%' OR
when referrer_domain = '' and url_query = '' then 'direct' utm_medium LIKE '%retargeting%' OR
when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' utm_medium LIKE '%paid%' then 'paid' else 'organic' end prefix,
when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' referrer_domain,
when position(utm_medium, 'affiliate') > 0 then 'affiliate' url_query,
when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms' utm_medium,
when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search') utm_source,
when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') session_id,
when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or position(utm_medium, 'mail') > 0 then 'email' visit_id,
when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping') count(*) c,
when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or position(utm_medium, 'video') > 0 then concat(prefix, 'Video') min(created_at) min_time,
else '' end AS x, max(created_at) max_time
count(distinct session_id) y
from website_event from website_event
${cohortQuery} ${cohortQuery}
${joinSessionQuery} ${joinSessionQuery}
@ -70,18 +69,49 @@ async function relationalQuery(
and website_event.event_type != 2 and website_event.event_type != 2
${dateQuery} ${dateQuery}
${filterQuery} ${filterQuery}
group by 1, 2 group by prefix,
order by y desc) referrer_domain,
url_query,
utm_medium,
utm_source,
session_id,
visit_id),
select x, sum(y) y channels as (
select case
when referrer_domain = '' and url_query = '' then 'direct'
when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds'
when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral'
when utm_medium ilike '%affiliate%' then 'affiliate'
when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms'
when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search')
when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social')
when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email'
when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping')
when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video')
else '' end AS name,
session_id,
visit_id,
c,
min_time,
max_time
from prefix)
select
name,
sum(c) as "pageviews",
count(distinct session_id) as "visitors",
count(distinct visit_id) as "visits",
sum(case when c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffSQL('min_time', 'max_time')}) as "totaltime"
from channels from channels
where x != '' where name != ''
group by x group by name
order by y desc; order by visitors desc, visits desc
`, `,
queryParams, queryParams,
FUNCTION_NAME, FUNCTION_NAME,
); ).then(results => results.map(item => ({ ...item, y: Number(item.y) })));
} }
async function clickhouseQuery( async function clickhouseQuery(
@ -156,5 +186,5 @@ function toClickHouseStringArray(arr: string[]): string {
} }
function toPostgresPositionClause(column: string, arr: string[]) { function toPostgresPositionClause(column: string, arr: string[]) {
return arr.map(val => `position(${column}, '${val.replace(/'/g, "''")}') > 0`).join(' OR\n '); return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n ');
} }

View file

@ -50,15 +50,15 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
channels as ( channels as (
select case select case
when referrer_domain = '' and url_query = '' then 'direct' when referrer_domain = '' and url_query = '' then 'direct'
when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' when ${toPostgresLikeClause('url_query', PAID_AD_PARAMS)} then 'paidAds'
when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' when ${toPostgresLikeClause('utm_medium', ['referral', 'app', 'link'])} then 'referral'
when utm_medium ilike '%affiliate%' then 'affiliate' when utm_medium ilike '%affiliate%' then 'affiliate'
when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms' when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms'
when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') when ${toPostgresLikeClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search')
when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') when ${toPostgresLikeClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social')
when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' when ${toPostgresLikeClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email'
when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') when ${toPostgresLikeClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping')
when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') when ${toPostgresLikeClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video')
else '' end AS x, else '' end AS x,
count(distinct session_id) y count(distinct session_id) y
from prefix from prefix
@ -137,6 +137,6 @@ function toClickHouseStringArray(arr: string[]): string {
return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', '); return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', ');
} }
function toPostgresPositionClause(column: string, arr: string[]) { function toPostgresLikeClause(column: string, arr: string[]) {
return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n ');
} }

View file

@ -36,8 +36,8 @@ async function relationalQuery(
filters: QueryFilters, filters: QueryFilters,
): Promise<PageviewExpandedMetricsData[]> { ): Promise<PageviewExpandedMetricsData[]> {
const { type, limit = 500, offset = 0 } = parameters; const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type; let column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = prisma; const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma;
const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
{ {
...filters, ...filters,
@ -52,6 +52,9 @@ async function relationalQuery(
if (column === 'referrer_domain') { if (column === 'referrer_domain') {
excludeDomain = `and website_event.referrer_domain != website_event.hostname excludeDomain = `and website_event.referrer_domain != website_event.hostname
and website_event.referrer_domain != ''`; and website_event.referrer_domain != ''`;
if (type === 'domain') {
column = toPostgresGroupedReferrer(GROUPED_DOMAINS);
}
} }
if (type === 'entry' || type === 'exit') { if (type === 'entry' || type === 'exit') {
@ -74,8 +77,21 @@ async function relationalQuery(
return rawQuery( return rawQuery(
` `
select ${column} x, select
count(distinct website_event.session_id) as y name,
sum(t.c) as "pageviews",
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime"
from (
select
${column} name,
website_event.session_id,
website_event.visit_id,
count(*) as "c",
min(website_event.created_at) as "min_time",
max(website_event.created_at) as "max_time"
from website_event from website_event
${cohortQuery} ${cohortQuery}
${joinSessionQuery} ${joinSessionQuery}
@ -85,8 +101,10 @@ async function relationalQuery(
and website_event.event_type != 2 and website_event.event_type != 2
${excludeDomain} ${excludeDomain}
${filterQuery} ${filterQuery}
group by 1 group by name, website_event.session_id, website_event.visit_id
order by 2 desc ) as t
group by name
order by visitors desc, visits desc
limit ${limit} limit ${limit}
offset ${offset} offset ${offset}
`, `,
@ -186,3 +204,23 @@ export function toClickHouseGroupedReferrer(
'END', 'END',
].join('\n'); ].join('\n');
} }
export function toPostgresGroupedReferrer(
domains: any[],
column: string = 'referrer_domain',
): string {
return [
'CASE',
...domains.map(group => {
const matches = Array.isArray(group.match) ? group.match : [group.match];
return `WHEN ${toPostgresLikeClause(column, matches)} THEN '${group.domain}'`;
}),
" ELSE 'Other'",
'END',
].join('\n');
}
function toPostgresLikeClause(column: string, arr: string[]) {
return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n ');
}

View file

@ -37,7 +37,7 @@ async function relationalQuery(
): Promise<SessionExpandedMetricsData[]> { ): Promise<SessionExpandedMetricsData[]> {
const { type, limit = 500, offset = 0 } = parameters; const { type, limit = 500, offset = 0 } = parameters;
let column = FILTER_COLUMNS[type] || type; let column = FILTER_COLUMNS[type] || type;
const { parseFilters, rawQuery } = prisma; const { parseFilters, rawQuery, getTimestampDiffSQL } = prisma;
const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
{ {
...filters, ...filters,
@ -56,9 +56,22 @@ async function relationalQuery(
return rawQuery( return rawQuery(
` `
select select
${column} x, name,
count(distinct website_event.session_id) y ${includeCountry ? 'country,' : ''}
${includeCountry ? ', country' : ''} sum(t.c) as "pageviews",
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime"
from (
select
${column} name,
${includeCountry ? 'country,' : ''}
website_event.session_id,
website_event.visit_id,
count(*) as "c",
min(website_event.created_at) as "min_time",
max(website_event.created_at) as "max_time"
from website_event from website_event
${cohortQuery} ${cohortQuery}
${joinSessionQuery} ${joinSessionQuery}
@ -66,9 +79,12 @@ async function relationalQuery(
and website_event.created_at between {{startDate}} and {{endDate}} and website_event.created_at between {{startDate}} and {{endDate}}
and website_event.event_type != 2 and website_event.event_type != 2
${filterQuery} ${filterQuery}
group by 1 group by name, website_event.session_id, website_event.visit_id
${includeCountry ? ', 3' : ''} ${includeCountry ? ', country' : ''}
order by 2 desc ) as t
group by name
${includeCountry ? ', country' : ''}
order by visitors desc, visits desc
limit ${limit} limit ${limit}
offset ${offset} offset ${offset}
`, `,