implement pageviews, events, and channels queries

This commit is contained in:
Francis Cao 2025-08-05 00:37:44 -07:00
parent c1cad16cb9
commit 0a0c1f27c6
24 changed files with 521 additions and 86 deletions

View file

@ -0,0 +1,113 @@
import clickhouse from '@/lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { QueryFilters } from '@/lib/types';
export interface EventExpandedMetricParameters {
type: string;
limit?: string;
offset?: string;
}
export interface EventExpandedMetricData {
name: string;
pageviews: number;
visitors: number;
visits: number;
bounces: number;
totaltime: number;
}
export async function getEventExpandedMetrics(
...args: [websiteId: string, parameters: EventExpandedMetricParameters, filters: QueryFilters]
): Promise<EventExpandedMetricData[]> {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
parameters: EventExpandedMetricParameters,
filters: QueryFilters,
) {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = prisma;
const { filterQuery, cohortQuery, joinSessionQuery, queryParams } = parseFilters(
{
...filters,
websiteId,
eventType: EVENT_TYPE.customEvent,
},
{ joinSession: SESSION_COLUMNS.includes(type) },
);
return rawQuery(
`
select ${column} x,
count(*) as y
from website_event
${cohortQuery}
${joinSessionQuery}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
${filterQuery}
group by 1
order by 2 desc
limit ${limit}
offset ${offset}
`,
queryParams,
);
}
async function clickhouseQuery(
websiteId: string,
parameters: EventExpandedMetricParameters,
filters: QueryFilters,
): Promise<EventExpandedMetricData[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = clickhouse;
const { filterQuery, cohortQuery, queryParams } = parseFilters({
...filters,
websiteId,
});
return rawQuery(
`
select
name,
sum(t.c) as "pageviews",
uniq(t.session_id) as "visitors",
uniq(t.visit_id) as "visits",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime"
from (
select
${column} name,
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and name != ''
${filterQuery}
group by name, session_id, visit_id
) as t
group by name
order by visitors desc, visits desc
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters },
);
}

View file

@ -83,7 +83,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x
order by y desc

View file

@ -72,7 +72,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x, t
order by t

View file

@ -0,0 +1,162 @@
import clickhouse from '@/lib/clickhouse';
import {
EMAIL_DOMAINS,
EVENT_TYPE,
PAID_AD_PARAMS,
SEARCH_DOMAINS,
SHOPPING_DOMAINS,
SOCIAL_DOMAINS,
VIDEO_DOMAINS,
} from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { QueryFilters } from '@/lib/types';
export interface ChannelExpandedMetricsParameters {
limit?: number | string;
offset?: number | string;
}
export interface ChannelExpandedMetricsData {
name: string;
pageviews: number;
visitors: number;
visits: number;
bounces: number;
totaltime: number;
}
export async function getChannelExpandedMetrics(
...args: [websiteId: string, parameters: ChannelExpandedMetricsParameters, filters?: QueryFilters]
): Promise<ChannelExpandedMetricsData[]> {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
parameters: ChannelExpandedMetricsParameters,
filters: QueryFilters,
): Promise<ChannelExpandedMetricsData[]> {
const { rawQuery, parseFilters } = prisma;
const { queryParams, filterQuery, cohortQuery, dateQuery } = parseFilters({
...filters,
websiteId,
eventType: EVENT_TYPE.pageView,
});
return rawQuery(
`
WITH channels as (
select case when ${toPostgresPositionClause('utm_medium', ['cp', 'ppc', 'retargeting', 'paid'])} then 'paid' else 'organic' end prefix,
case
when referrer_domain = '' and url_query = '' then 'direct'
when ${toPostgresPositionClause('url_query', PAID_AD_PARAMS)} then 'paidAds'
when ${toPostgresPositionClause('utm_medium', ['referral', 'app', 'link'])} then 'referral'
when position(utm_medium, 'affiliate') > 0 then 'affiliate'
when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms'
when ${toPostgresPositionClause('referrer_domain', SEARCH_DOMAINS)} or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search')
when ${toPostgresPositionClause('referrer_domain', SOCIAL_DOMAINS)} then concat(prefix, 'Social')
when ${toPostgresPositionClause('referrer_domain', EMAIL_DOMAINS)} or position(utm_medium, 'mail') > 0 then 'email'
when ${toPostgresPositionClause('referrer_domain', SHOPPING_DOMAINS)} or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping')
when ${toPostgresPositionClause('referrer_domain', VIDEO_DOMAINS)} or position(utm_medium, 'video') > 0 then concat(prefix, 'Video')
else '' end AS x,
count(distinct session_id) y
from website_event
${cohortQuery}
where website_id = {{websiteId::uuid}}
and event_type = {{eventType}}
${dateQuery}
${filterQuery}
group by 1, 2
order by y desc)
select x, sum(y) y
from channels
where x != ''
group by x
order by y desc;
`,
{ ...queryParams, ...parameters },
);
}
async function clickhouseQuery(
websiteId: string,
parameters: ChannelExpandedMetricsParameters,
filters: QueryFilters,
): Promise<ChannelExpandedMetricsData[]> {
const { limit = 500, offset = 0 } = parameters;
const { rawQuery, parseFilters } = clickhouse;
const { queryParams, filterQuery, cohortQuery } = parseFilters({
...filters,
websiteId,
eventType: EVENT_TYPE.pageView,
});
return rawQuery(
`
select
name,
sum(t.c) as "pageviews",
uniq(t.session_id) as "visitors",
uniq(t.visit_id) as "visits",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime"
from (
select case when multiSearchAny(utm_medium, ['cp', 'ppc', 'retargeting', 'paid']) != 0 then 'paid' else 'organic' end prefix,
case
when referrer_domain = '' and url_query = '' then 'direct'
when multiSearchAny(url_query, [${toClickHouseStringArray(
PAID_AD_PARAMS,
)}]) != 0 then 'paidAds'
when multiSearchAny(utm_medium, ['referral', 'app','link']) != 0 then 'referral'
when position(utm_medium, 'affiliate') > 0 then 'affiliate'
when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms'
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SEARCH_DOMAINS,
)}]) != 0 or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SOCIAL_DOMAINS,
)}]) != 0 then concat(prefix, 'Social')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
EMAIL_DOMAINS,
)}]) != 0 or position(utm_medium, 'mail') > 0 then 'email'
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SHOPPING_DOMAINS,
)}]) != 0 or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
VIDEO_DOMAINS,
)}]) != 0 or position(utm_medium, 'video') > 0 then concat(prefix, 'Video')
else '' end AS name,
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and name != ''
${filterQuery}
group by prefix, name, session_id, visit_id
) as t
group by name
order by visitors desc, visits desc
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters },
);
}
function toClickHouseStringArray(arr: string[]): string {
return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', ');
}
function toPostgresPositionClause(column: string, arr: string[]) {
return arr.map(val => `position(${column}, '${val.replace(/'/g, "''")}') > 0`).join(' OR\n ');
}

View file

@ -105,7 +105,6 @@ async function clickhouseQuery(
from website_event
${cohortQuery}
where website_id = {websiteId:UUID}
and event_type = {eventType:UInt32}
${dateQuery}
${filterQuery}
group by 1, 2

View file

@ -94,7 +94,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by session_id, visit_id
) as t;
@ -117,7 +116,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by session_id, visit_id
) as t;

View file

@ -0,0 +1,166 @@
import clickhouse from '@/lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { QueryFilters } from '@/lib/types';
export interface PageviewExpandedMetricsParameters {
type: string;
limit?: number | string;
offset?: number | string;
}
export interface PageviewExpandedMetricsData {
name: string;
pageviews: number;
visitors: number;
visits: number;
bounces: number;
totaltime: number;
}
export async function getPageviewExpandedMetrics(
...args: [websiteId: string, parameters: PageviewExpandedMetricsParameters, filters: QueryFilters]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
parameters: PageviewExpandedMetricsParameters,
filters: QueryFilters,
): Promise<PageviewExpandedMetricsData[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = prisma;
const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
{
...filters,
websiteId,
eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
},
{ joinSession: SESSION_COLUMNS.includes(type) },
);
let entryExitQuery = '';
let excludeDomain = '';
if (column === 'referrer_domain') {
excludeDomain = `and website_event.referrer_domain != website_event.hostname
and website_event.referrer_domain != ''`;
}
if (type === 'entry' || type === 'exit') {
const aggregrate = type === 'entry' ? 'min' : 'max';
entryExitQuery = `
join (
select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
group by visit_id
) x
on x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at
`;
}
return rawQuery(
`
select ${column} x,
count(distinct website_event.session_id) as y
from website_event
${joinSessionQuery}
${cohortQuery}
${entryExitQuery}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
${excludeDomain}
${filterQuery}
group by 1
order by 2 desc
limit ${limit}
offset ${offset}
`,
queryParams,
);
}
async function clickhouseQuery(
websiteId: string,
parameters: PageviewExpandedMetricsParameters,
filters: QueryFilters,
): Promise<{ x: string; y: number }[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
const { rawQuery, parseFilters } = clickhouse;
const { filterQuery, cohortQuery, queryParams } = parseFilters({
...filters,
websiteId,
eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
});
let excludeDomain = '';
let entryExitQuery = '';
if (column === 'referrer_domain') {
excludeDomain = `and referrer_domain != hostname and referrer_domain != ''`;
}
if (type === 'entry' || type === 'exit') {
const aggregrate = type === 'entry' ? 'min' : 'max';
entryExitQuery = `
JOIN (select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
group by visit_id) x
ON x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at`;
}
return rawQuery(
`
select
name,
sum(t.c) as "pageviews",
uniq(t.session_id) as "visitors",
uniq(t.visit_id) as "visits",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime"
from (
select
${column} name,
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
${cohortQuery}
${entryExitQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and name != ''
${excludeDomain}
${filterQuery}
group by name, session_id, visit_id
) as t
group by name
order by visitors desc, visits desc
limit ${limit}
offset ${offset}
`,
{ ...queryParams, ...parameters },
);
}

View file

@ -136,7 +136,6 @@ async function clickhouseQuery(
${entryExitQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${excludeDomain}
${filterQuery}
group by x
@ -174,7 +173,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${excludeDomain}
${filterQuery}
${groupByQuery}) as g

View file

@ -66,7 +66,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g
@ -85,7 +84,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g

View file

@ -477,7 +477,6 @@ async function clickhouseQuery(
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and ${column} = {step:String}
and event_type = {eventType:UInt32}
${filterQuery}
`,
queryParams,

View file

@ -115,7 +115,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by ${parseFieldsByName(fields)},
session_id, visit_id

View file

@ -11,7 +11,7 @@ export interface SessionExpandedMetricsParameters {
}
export interface SessionExpandedMetricsData {
label: string;
name: string;
pageviews: number;
visitors: number;
visits: number;
@ -34,7 +34,7 @@ async function relationalQuery(
filters: QueryFilters,
): Promise<SessionExpandedMetricsData[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
let column = FILTER_COLUMNS[type] || type;
const { parseFilters, rawQuery } = prisma;
const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
{
@ -48,6 +48,10 @@ async function relationalQuery(
);
const includeCountry = column === 'city' || column === 'region';
if (type === 'language') {
column = `lower(left(${type}, 2))`;
}
return rawQuery(
`
select
@ -77,7 +81,7 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<SessionExpandedMetricsData[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
let column = FILTER_COLUMNS[type] || type;
const { parseFilters, rawQuery } = clickhouse;
const { filterQuery, cohortQuery, queryParams } = parseFilters({
...filters,
@ -86,10 +90,14 @@ async function clickhouseQuery(
});
const includeCountry = column === 'city' || column === 'region';
if (type === 'language') {
column = `lower(left(${type}, 2))`;
}
return rawQuery(
`
select
label,
name,
${includeCountry ? 'country,' : ''}
sum(t.c) as "pageviews",
uniq(t.session_id) as "visitors",
@ -98,7 +106,7 @@ async function clickhouseQuery(
sum(max_time-min_time) as "totaltime"
from (
select
${column} label,
${column} name,
${includeCountry ? 'country,' : ''}
session_id,
visit_id,
@ -109,13 +117,12 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
and label != ''
and name != ''
${filterQuery}
group by label, session_id, visit_id
group by name, session_id, visit_id
${includeCountry ? ', country' : ''}
) as t
group by label
group by name
${includeCountry ? ', country' : ''}
order by visitors desc, visits desc
limit ${limit}

View file

@ -25,7 +25,7 @@ async function relationalQuery(
filters: QueryFilters,
) {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
let column = FILTER_COLUMNS[type] || type;
const { parseFilters, rawQuery } = prisma;
const { filterQuery, joinSessionQuery, cohortQuery, queryParams } = parseFilters(
{
@ -39,6 +39,10 @@ async function relationalQuery(
);
const includeCountry = column === 'city' || column === 'region';
if (type === 'language') {
column = `lower(left(${type}, 2))`;
}
return rawQuery(
`
select
@ -68,7 +72,7 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<{ x: string; y: number }[]> {
const { type, limit = 500, offset = 0 } = parameters;
const column = FILTER_COLUMNS[type] || type;
let column = FILTER_COLUMNS[type] || type;
const { parseFilters, rawQuery } = clickhouse;
const { filterQuery, cohortQuery, queryParams } = parseFilters({
...filters,
@ -77,6 +81,10 @@ async function clickhouseQuery(
});
const includeCountry = column === 'city' || column === 'region';
if (type === 'language') {
column = `lower(left(${type}, 2))`;
}
let sql = '';
if (EVENT_COLUMNS.some(item => Object.keys(filters).includes(item))) {
@ -89,7 +97,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x
${includeCountry ? ', country' : ''}
@ -107,7 +114,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by x
${includeCountry ? ', country' : ''}

View file

@ -66,7 +66,6 @@ async function clickhouseQuery(
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g
@ -84,7 +83,6 @@ async function clickhouseQuery(
from website_event_stats_hourly as website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by t
) as g