convert clickhouse getChannels to SQL query

This commit is contained in:
Francis Cao 2025-07-28 17:13:13 -07:00
parent 784677e180
commit 86e0fc2262
2 changed files with 68 additions and 101 deletions

View file

@ -1,27 +1,15 @@
import { z } from 'zod';
import thenby from 'thenby';
import { canViewWebsite } from '@/lib/auth';
import { EVENT_COLUMNS, FILTER_COLUMNS, OPERATORS, SESSION_COLUMNS } from '@/lib/constants';
import { getRequestDateRange, getRequestFilters, parseRequest } from '@/lib/request';
import { badRequest, json, unauthorized } from '@/lib/response';
import { filterParams } from '@/lib/schema';
import {
SESSION_COLUMNS,
EVENT_COLUMNS,
FILTER_COLUMNS,
OPERATORS,
SEARCH_DOMAINS,
SOCIAL_DOMAINS,
EMAIL_DOMAINS,
SHOPPING_DOMAINS,
VIDEO_DOMAINS,
PAID_AD_PARAMS,
} from '@/lib/constants';
import { getRequestFilters, getRequestDateRange, parseRequest } from '@/lib/request';
import { json, unauthorized, badRequest } from '@/lib/response';
import {
getChannelMetrics,
getEventMetrics,
getPageviewMetrics,
getSessionMetrics,
getEventMetrics,
getChannelMetrics,
} from '@/queries';
import { filterParams } from '@/lib/schema';
import { z } from 'zod';
export async function GET(
request: Request,
@ -104,75 +92,8 @@ export async function GET(
if (type === 'channel') {
const data = await getChannelMetrics(websiteId, filters);
const channels = getChannels(data);
return json(
Object.keys(channels)
.map(key => ({ x: key, y: channels[key] }))
.sort(thenby.firstBy('y', -1)),
);
return json(data);
}
return badRequest();
}
function getChannels(data: { domain: string; query: string; visitors: number }[]) {
const channels = {
direct: 0,
referral: 0,
affiliate: 0,
email: 0,
sms: 0,
organicSearch: 0,
organicSocial: 0,
organicShopping: 0,
organicVideo: 0,
paidAds: 0,
paidSearch: 0,
paidSocial: 0,
paidShopping: 0,
paidVideo: 0,
};
const match = (value: string) => {
return (str: string | RegExp) => {
return typeof str === 'string' ? value?.includes(str) : (str as RegExp).test(value);
};
};
for (const { domain, query, visitors } of data) {
if (!domain && !query) {
channels.direct += Number(visitors);
}
const prefix = /utm_medium=(.*cp.*|ppc|retargeting|paid.*)/.test(query) ? 'paid' : 'organic';
if (PAID_AD_PARAMS.some(match(query))) {
channels.paidAds += Number(visitors);
} else if (/utm_medium=(referral|app|link)/.test(query)) {
channels.referral += Number(visitors);
} else if (/utm_medium=affiliate/.test(query)) {
channels.affiliate += Number(visitors);
} else if (/utm_(source|medium)=sms/.test(query)) {
channels.sms += Number(visitors);
} else if (SEARCH_DOMAINS.some(match(domain)) || /utm_medium=organic/.test(query)) {
channels[`${prefix}Search`] += Number(visitors);
} else if (
SOCIAL_DOMAINS.some(match(domain)) ||
/utm_medium=(social|social-network|social-media|sm|social network|social media)/.test(query)
) {
channels[`${prefix}Social`] += Number(visitors);
} else if (EMAIL_DOMAINS.some(match(domain)) || /utm_medium=(.*e[-_ ]?mail.*)/.test(query)) {
channels.email += Number(visitors);
} else if (
SHOPPING_DOMAINS.some(match(domain)) ||
/utm_campaign=(.*(([^a-df-z]|^)shop|shopping).*)/.test(query)
) {
channels[`${prefix}Shopping`] += Number(visitors);
} else if (VIDEO_DOMAINS.some(match(domain)) || /utm_medium=(.*video.*)/.test(query)) {
channels[`${prefix}Video`] += Number(visitors);
}
}
return channels;
}

View file

@ -1,6 +1,15 @@
import prisma from '@/lib/prisma';
import clickhouse from '@/lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA } from '@/lib/db';
import {
EMAIL_DOMAINS,
EVENT_TYPE,
PAID_AD_PARAMS,
SEARCH_DOMAINS,
SHOPPING_DOMAINS,
SOCIAL_DOMAINS,
VIDEO_DOMAINS,
} from '@/lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { QueryFilters } from '@/lib/types';
export async function getChannelMetrics(...args: [websiteId: string, filters?: QueryFilters]) {
@ -37,21 +46,58 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<{ x: string; y: number }[]> {
const { rawQuery, parseFilters } = clickhouse;
const { params, filterQuery, cohortQuery, dateQuery } = await parseFilters(websiteId, filters);
const { params, filterQuery, cohortQuery } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
});
const sql = `
select
referrer_domain as domain,
url_query as query,
uniq(session_id) as visitors
from website_event
${cohortQuery}
where website_id = {websiteId:UUID}
${filterQuery}
${dateQuery}
group by 1, 2
order by visitors desc
WITH channels as (
select case when multiSearchAny(utm_medium, ['cp', 'ppc', 'retargeting', 'paid']) != 0 then 'paid' else 'organic' end prefix,
case
when referrer_domain = '' and url_query = '' then 'direct'
when multiSearchAny(url_query, [${toClickHouseStringArray(
PAID_AD_PARAMS,
)}]) != 0 then 'paidAds'
when multiSearchAny(utm_medium, ['referral', 'app','link']) != 0 then 'referral'
when position(utm_medium, 'affiliate') > 0 then 'affiliate'
when position(utm_medium, 'sms') > 0 or position(utm_source, 'sms') > 0 then 'sms'
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SEARCH_DOMAINS,
)}]) != 0 or position(utm_medium, 'organic') > 0 then concat(prefix, 'Search')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SOCIAL_DOMAINS,
)}]) != 0 then concat(prefix, 'Social')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
EMAIL_DOMAINS,
)}]) != 0 or position(utm_medium, 'mail') > 0 then 'email'
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
SHOPPING_DOMAINS,
)}]) != 0 or position(utm_medium, 'shop') > 0 then concat(prefix, 'Shopping')
when multiSearchAny(referrer_domain, [${toClickHouseStringArray(
VIDEO_DOMAINS,
)}]) != 0 or position(utm_medium, 'video') > 0 then concat(prefix, 'Video')
else '' end AS x,
count(distinct session_id) y
from website_event
${cohortQuery}
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by 1, 2
order by y desc)
select x, sum(y) y
from channels
where x != ''
group by x
order by y desc;
`;
return rawQuery(sql, params);
}
function toClickHouseStringArray(arr: string[]): string {
return arr.map(p => `'${p.replace(/'/g, "\\'")}'`).join(', ');
}