import clickhouse from '@/lib/clickhouse'; import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db'; import prisma from '@/lib/prisma'; export interface RetentionCriteria { startDate: Date; endDate: Date; timezone?: string; } export interface RetentionResult { date: string; day: number; visitors: number; returnVisitors: number; percentage: number; } export async function getRetention(...args: [websiteId: string, criteria: RetentionCriteria]) { return runQuery({ [PRISMA]: () => relationalQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args), }); } async function relationalQuery( websiteId: string, criteria: RetentionCriteria, ): Promise { const { startDate, endDate, timezone } = criteria; const { getDateSQL, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma; const unit = 'day'; return rawQuery( ` WITH cohort_items AS ( select session_id, ${getDateSQL('created_at', unit)} as cohort_date from session where website_id = {{websiteId::uuid}} and created_at between {{startDate}} and {{endDate}} ), user_activities AS ( select distinct w.session_id, ${getDayDiffQuery(getDateSQL('created_at', unit, timezone), 'c.cohort_date')} as day_number from website_event w join cohort_items c on w.session_id = c.session_id where website_id = {{websiteId::uuid}} and created_at between {{startDate}} and {{endDate}} ), cohort_size as ( select cohort_date, count(*) as visitors from cohort_items group by 1 order by 1 ), cohort_date as ( select c.cohort_date, a.day_number, count(*) as visitors from user_activities a join cohort_items c on a.session_id = c.session_id group by 1, 2 ) select c.cohort_date as date, c.day_number as day, s.visitors, c.visitors as "returnVisitors", ${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage from cohort_date c join cohort_size s on c.cohort_date = s.cohort_date where c.day_number <= 31 order by 1, 2`, { websiteId, startDate, endDate, }, ); } async function clickhouseQuery( websiteId: string, criteria: RetentionCriteria, ): Promise { const { startDate, endDate, timezone } = criteria; const { getDateSQL, rawQuery } = clickhouse; const unit = 'day'; return rawQuery( ` WITH cohort_items AS ( select min(${getDateSQL('created_at', unit, timezone)}) as cohort_date, session_id from website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} group by session_id ), user_activities AS ( select distinct w.session_id, (${getDateSQL('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number from website_event w join cohort_items c on w.session_id = c.session_id where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} ), cohort_size as ( select cohort_date, count(*) as visitors from cohort_items group by 1 order by 1 ), cohort_date as ( select c.cohort_date, a.day_number, count(*) as visitors from user_activities a join cohort_items c on a.session_id = c.session_id group by 1, 2 ) select c.cohort_date as date, c.day_number as day, s.visitors as visitors, c.visitors returnVisitors, c.visitors * 100 / s.visitors as percentage from cohort_date c join cohort_size s on c.cohort_date = s.cohort_date where c.day_number <= 31 order by 1, 2`, { websiteId, startDate, endDate, }, ); }