diff --git a/src/queries/sql/events/getEventExpandedMetrics.ts b/src/queries/sql/events/getEventExpandedMetrics.ts index c36a829a..fb200c4a 100644 --- a/src/queries/sql/events/getEventExpandedMetrics.ts +++ b/src/queries/sql/events/getEventExpandedMetrics.ts @@ -37,7 +37,7 @@ async function relationalQuery( ) { const { type, limit = 500, offset = 0 } = parameters; const column = FILTER_COLUMNS[type] || type; - const { rawQuery, parseFilters } = prisma; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters( { ...filters, @@ -49,16 +49,31 @@ async function relationalQuery( return rawQuery( ` - select ${column} x, - count(*) as y - from website_event - ${cohortQuery} - ${joinSessionQuery} - where website_event.website_id = {{websiteId::uuid}} - and website_event.created_at between {{startDate}} and {{endDate}} - ${filterQuery} - group by 1 - order by 2 desc + select + name, + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} name, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + ${filterQuery} + group by name, website_event.session_id, website_event.visit_id + ) as t + group by name + order by visitors desc, visits desc limit ${limit} offset ${offset} `, diff --git a/src/queries/sql/getChannelExpandedMetrics.ts b/src/queries/sql/getChannelExpandedMetrics.ts index 571c9d9d..4515ddd6 100644 --- a/src/queries/sql/getChannelExpandedMetrics.ts +++ b/src/queries/sql/getChannelExpandedMetrics.ts @@ -40,7 +40,7 @@ async function relationalQuery( websiteId: string, filters: QueryFilters, ): Promise { - const { rawQuery, parseFilters } = prisma; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; const { queryParams, filterQuery, joinSessionQuery, cohortQuery, dateQuery } = parseFilters({ ...filters, websiteId, @@ -48,40 +48,70 @@ async function relationalQuery( return rawQuery( ` - WITH channels as ( - select case when ${toPostgresPositionClause('utm_medium', ['cp', 'ppc', 'retargeting', 'paid'])} then 'paid' else 'organic' end prefix, - case - when referrer_domain = '' and url_query = '' then 'direct' - when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' - when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' - when position(utm_medium, 'affiliate') > 0 then 'affiliate' - when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms' - when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search') - when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') - when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or position(utm_medium, 'mail') > 0 then 'email' - when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping') - when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or position(utm_medium, 'video') > 0 then concat(prefix, 'Video') - else '' end AS x, - count(distinct session_id) y - from website_event - ${cohortQuery} - ${joinSessionQuery} - where website_event.website_id = {{websiteId::uuid}} - and website_event.event_type != 2 - ${dateQuery} - ${filterQuery} - group by 1, 2 - order by y desc) - - select x, sum(y) y - from channels - where x != '' - group by x - order by y desc; - `, + WITH prefix AS ( + select case when utm_medium LIKE 'p%' OR + utm_medium LIKE '%ppc%' OR + utm_medium LIKE '%retargeting%' OR + utm_medium LIKE '%paid%' then 'paid' else 'organic' end prefix, + referrer_domain, + url_query, + utm_medium, + utm_source, + session_id, + visit_id, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.event_type != 2 + ${dateQuery} + ${filterQuery} + group by prefix, + referrer_domain, + url_query, + utm_medium, + utm_source, + session_id, + visit_id), + + channels as ( + select case + when referrer_domain = '' and url_query = '' then 'direct' + when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' + when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' + when utm_medium ilike '%affiliate%' then 'affiliate' + when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms' + when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') + when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') + when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' + when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') + when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') + else '' end AS name, + session_id, + visit_id, + c, + min_time, + max_time + from prefix) + + select + name, + sum(c) as "pageviews", + count(distinct session_id) as "visitors", + count(distinct visit_id) as "visits", + sum(case when c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('min_time', 'max_time')}) as "totaltime" + from channels + where name != '' + group by name + order by visitors desc, visits desc + `, queryParams, FUNCTION_NAME, - ); + ).then(results => results.map(item => ({ ...item, y: Number(item.y) }))); } async function clickhouseQuery( @@ -156,5 +186,5 @@ function toClickHouseStringArray(arr: string[]): string { } function toPostgresPositionClause(column: string, arr: string[]) { - return arr.map(val => `position(${column}, '${val.replace(/'/g, "''")}') > 0`).join(' OR\n '); + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); } diff --git a/src/queries/sql/getChannelMetrics.ts b/src/queries/sql/getChannelMetrics.ts index c91afc5c..592ed212 100644 --- a/src/queries/sql/getChannelMetrics.ts +++ b/src/queries/sql/getChannelMetrics.ts @@ -50,15 +50,15 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) { channels as ( select case when referrer_domain = '' and url_query = '' then 'direct' - when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds' - when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' + when ${toPostgresLikeClause('url_query', PAID_AD_PARAMS)} then 'paidAds' + when ${toPostgresLikeClause('utm_medium', ['referral', 'app', 'link'])} then 'referral' when utm_medium ilike '%affiliate%' then 'affiliate' when utm_medium ilike '%sms%' or utm_source ilike '%sms%' then 'sms' - when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') - when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') - when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' - when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') - when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') + when ${toPostgresLikeClause('referrer_domain', SEARCH_DOMAINS)} or utm_medium ilike '%organic%' then concat(prefix, 'Search') + when ${toPostgresLikeClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social') + when ${toPostgresLikeClause('referrer_domain', EMAIL_DOMAINS)} or utm_medium ilike '%mail%' then 'email' + when ${toPostgresLikeClause('referrer_domain', SHOPPING_DOMAINS)} or utm_medium ilike '%shop%' then concat(prefix, 'Shopping') + when ${toPostgresLikeClause('referrer_domain', VIDEO_DOMAINS)} or utm_medium ilike '%video%' then concat(prefix, 'Video') else '' end AS x, count(distinct session_id) y from prefix @@ -137,6 +137,6 @@ function toClickHouseStringArray(arr: string[]): string { return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', '); } -function toPostgresPositionClause(column: string, arr: string[]) { +function toPostgresLikeClause(column: string, arr: string[]) { return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); } diff --git a/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts index a718c411..c5f467dc 100644 --- a/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts +++ b/src/queries/sql/pageviews/getPageviewExpandedMetrics.ts @@ -36,8 +36,8 @@ async function relationalQuery( filters: QueryFilters, ): Promise { const { type, limit = 500, offset = 0 } = parameters; - const column = FILTER_COLUMNS[type] || type; - const { rawQuery, parseFilters } = prisma; + let column = FILTER_COLUMNS[type] || type; + const { rawQuery, parseFilters, getTimestampDiffSQL } = prisma; const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( { ...filters, @@ -52,6 +52,9 @@ async function relationalQuery( if (column === 'referrer_domain') { excludeDomain = `and website_event.referrer_domain != website_event.hostname and website_event.referrer_domain != ''`; + if (type === 'domain') { + column = toPostgresGroupedReferrer(GROUPED_DOMAINS); + } } if (type === 'entry' || type === 'exit') { @@ -74,19 +77,34 @@ async function relationalQuery( return rawQuery( ` - select ${column} x, - count(distinct website_event.session_id) as y - from website_event - ${cohortQuery} - ${joinSessionQuery} - ${entryExitQuery} - where website_event.website_id = {{websiteId::uuid}} + select + name, + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} name, + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + ${entryExitQuery} + where website_event.website_id = {{websiteId::uuid}} and website_event.created_at between {{startDate}} and {{endDate}} and website_event.event_type != 2 - ${excludeDomain} - ${filterQuery} - group by 1 - order by 2 desc + ${excludeDomain} + ${filterQuery} + group by name, website_event.session_id, website_event.visit_id + ) as t + group by name + order by visitors desc, visits desc limit ${limit} offset ${offset} `, @@ -186,3 +204,23 @@ export function toClickHouseGroupedReferrer( 'END', ].join('\n'); } + +export function toPostgresGroupedReferrer( + domains: any[], + column: string = 'referrer_domain', +): string { + return [ + 'CASE', + ...domains.map(group => { + const matches = Array.isArray(group.match) ? group.match : [group.match]; + + return `WHEN ${toPostgresLikeClause(column, matches)} THEN '${group.domain}'`; + }), + " ELSE 'Other'", + 'END', + ].join('\n'); +} + +function toPostgresLikeClause(column: string, arr: string[]) { + return arr.map(val => `${column} ilike '%${val.replace(/'/g, "''")}%'`).join(' OR\n '); +} diff --git a/src/queries/sql/sessions/getSessionExpandedMetrics.ts b/src/queries/sql/sessions/getSessionExpandedMetrics.ts index 3adfd346..1d211137 100644 --- a/src/queries/sql/sessions/getSessionExpandedMetrics.ts +++ b/src/queries/sql/sessions/getSessionExpandedMetrics.ts @@ -37,7 +37,7 @@ async function relationalQuery( ): Promise { const { type, limit = 500, offset = 0 } = parameters; let column = FILTER_COLUMNS[type] || type; - const { parseFilters, rawQuery } = prisma; + const { parseFilters, rawQuery, getTimestampDiffSQL } = prisma; const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters( { ...filters, @@ -55,20 +55,36 @@ async function relationalQuery( return rawQuery( ` - select - ${column} x, - count(distinct website_event.session_id) y + select + name, + ${includeCountry ? 'country,' : ''} + sum(t.c) as "pageviews", + count(distinct t.session_id) as "visitors", + count(distinct t.visit_id) as "visits", + sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime" + from ( + select + ${column} name, + ${includeCountry ? 'country,' : ''} + website_event.session_id, + website_event.visit_id, + count(*) as "c", + min(website_event.created_at) as "min_time", + max(website_event.created_at) as "max_time" + from website_event + ${cohortQuery} + ${joinSessionQuery} + where website_event.website_id = {{websiteId::uuid}} + and website_event.created_at between {{startDate}} and {{endDate}} + and website_event.event_type != 2 + ${filterQuery} + group by name, website_event.session_id, website_event.visit_id ${includeCountry ? ', country' : ''} - from website_event - ${cohortQuery} - ${joinSessionQuery} - where website_event.website_id = {{websiteId::uuid}} - and website_event.created_at between {{startDate}} and {{endDate}} - and website_event.event_type != 2 - ${filterQuery} - group by 1 - ${includeCountry ? ', 3' : ''} - order by 2 desc + ) as t + group by name + ${includeCountry ? ', country' : ''} + order by visitors desc, visits desc limit ${limit} offset ${offset} `,