Renamed folder.

This commit is contained in:
Mike Cao 2025-02-05 20:31:48 -08:00
parent 8525188e42
commit dcf0da7b14
39 changed files with 0 additions and 0 deletions

View file

@ -0,0 +1,250 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
const formatResults = (steps: { type: string; value: string }[]) => (results: unknown) => {
return steps.map((step: { type: string; value: string }, i: number) => {
const visitors = Number(results[i]?.count) || 0;
const previous = Number(results[i - 1]?.count) || 0;
const dropped = previous > 0 ? previous - visitors : 0;
const dropoff = 1 - visitors / previous;
const remaining = visitors / Number(results[0].count);
return {
...step,
visitors,
previous,
dropped,
dropoff,
remaining,
};
});
};
export async function getFunnel(
...args: [
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
steps: { type: string; value: string }[];
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
steps: { type: string; value: string }[];
},
): Promise<
{
value: string;
visitors: number;
dropoff: number;
}[]
> {
const { windowMinutes, startDate, endDate, steps } = criteria;
const { rawQuery, getAddIntervalQuery } = prisma;
const { levelOneQuery, levelQuery, sumQuery, params } = getFunnelQuery(steps, windowMinutes);
function getFunnelQuery(
steps: { type: string; value: string }[],
windowMinutes: number,
): {
levelOneQuery: string;
levelQuery: string;
sumQuery: string;
params: string[];
} {
return steps.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union ' : '';
const isURL = cv.type === 'url';
const column = isURL ? 'url_path' : 'event_name';
let operator = '=';
let paramValue = cv.value;
if (cv.value.startsWith('*') || cv.value.endsWith('*')) {
operator = 'like';
paramValue = cv.value.replace(/^\*|\*$/g, '%');
}
if (levelNumber === 1) {
pv.levelOneQuery = `
WITH level1 AS (
select distinct session_id, created_at
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and ${column} ${operator} {{${i}}}
)`;
} else {
pv.levelQuery += `
, level${levelNumber} AS (
select distinct we.session_id, we.created_at
from level${i} l
join website_event we
on l.session_id = we.session_id
where we.website_id = {{websiteId::uuid}}
and we.created_at between l.created_at and ${getAddIntervalQuery(
`l.created_at `,
`${windowMinutes} minute`,
)}
and we.${column} ${operator} {{${i}}}
and we.created_at <= {{endDate}}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.params.push(paramValue);
return pv;
},
{
levelOneQuery: '',
levelQuery: '',
sumQuery: '',
params: [],
},
);
}
return rawQuery(
`
${levelOneQuery}
${levelQuery}
${sumQuery}
ORDER BY level;
`,
{
websiteId,
startDate,
endDate,
...params,
},
).then(formatResults(steps));
}
async function clickhouseQuery(
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
steps: { type: string; value: string }[];
},
): Promise<
{
value: string;
visitors: number;
dropoff: number;
}[]
> {
const { windowMinutes, startDate, endDate, steps } = criteria;
const { rawQuery } = clickhouse;
const { levelOneQuery, levelQuery, sumQuery, stepFilterQuery, params } = getFunnelQuery(
steps,
windowMinutes,
);
function getFunnelQuery(
steps: { type: string; value: string }[],
windowMinutes: number,
): {
levelOneQuery: string;
levelQuery: string;
sumQuery: string;
stepFilterQuery: string;
params: { [key: string]: string };
} {
return steps.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union all ' : '';
const startFilter = i > 0 ? 'or' : '';
const isURL = cv.type === 'url';
const column = isURL ? 'url_path' : 'event_name';
let operator = '=';
let paramValue = cv.value;
if (cv.value.startsWith('*') || cv.value.endsWith('*')) {
operator = 'like';
paramValue = cv.value.replace(/^\*|\*$/g, '%');
}
if (levelNumber === 1) {
pv.levelOneQuery = `\n
level1 AS (
select *
from level0
where ${column} ${operator} {param${i}:String}
)`;
} else {
pv.levelQuery += `\n
, level${levelNumber} AS (
select distinct y.session_id as session_id,
y.url_path as url_path,
y.referrer_path as referrer_path,
y.event_name,
y.created_at as created_at
from level${i} x
join level0 y
on x.session_id = y.session_id
where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute
and y.${column} ${operator} {param${i}:String}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.stepFilterQuery += `${startFilter} ${column} ${operator} {param${i}:String} `;
pv.params[`param${i}`] = paramValue;
return pv;
},
{
levelOneQuery: '',
levelQuery: '',
sumQuery: '',
stepFilterQuery: '',
params: {},
},
);
}
return rawQuery(
`
WITH level0 AS (
select distinct session_id, url_path, referrer_path, event_name, created_at
from umami.website_event
where (${stepFilterQuery})
and website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
${levelOneQuery}
${levelQuery}
select *
from (
${sumQuery}
) ORDER BY level;
`,
{
websiteId,
startDate,
endDate,
...params,
},
).then(formatResults(steps));
}

View file

@ -0,0 +1,375 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
export async function getGoals(
...args: [
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
goals: { type: string; value: string; goal: number; operator?: string }[];
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
goals: { type: string; value: string; goal: number; operator?: string }[];
},
): Promise<any> {
const { startDate, endDate, goals } = criteria;
const { rawQuery } = prisma;
const urls = goals.filter(a => a.type === 'url');
const events = goals.filter(a => a.type === 'event');
const eventData = goals.filter(a => a.type === 'event-data');
const hasUrl = urls.length > 0;
const hasEvent = events.length > 0;
const hasEventData = eventData.length > 0;
function getParameters(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlParam = urls.reduce((acc, cv, i) => {
acc[`${cv.type}${i}`] = cv.value;
return acc;
}, {});
const eventParam = events.reduce((acc, cv, i) => {
acc[`${cv.type}${i}`] = cv.value;
return acc;
}, {});
const eventDataParam = eventData.reduce((acc, cv, i) => {
acc[`eventData${i}`] = cv.value;
acc[`property${i}`] = cv.property;
return acc;
}, {});
return {
urls: { ...urlParam, startDate, endDate, websiteId },
events: { ...eventParam, startDate, endDate, websiteId },
eventData: { ...eventDataParam, startDate, endDate, websiteId },
};
}
function getColumns(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlColumns = urls
.map((a, i) => `COUNT(CASE WHEN url_path = {{url${i}}} THEN 1 END) AS URL${i},`)
.join('\n')
.slice(0, -1);
const eventColumns = events
.map((a, i) => `COUNT(CASE WHEN event_name = {{event${i}}} THEN 1 END) AS EVENT${i},`)
.join('\n')
.slice(0, -1);
const eventDataColumns = eventData
.map(
(a, i) =>
`${
a.operator === 'average' ? 'avg' : a.operator
}(CASE WHEN event_name = {{eventData${i}}} AND data_key = {{property${i}}} THEN ${
a.operator === 'count' ? '1' : 'number_value'
} END) AS EVENT_DATA${i},`,
)
.join('\n')
.slice(0, -1);
return { urls: urlColumns, events: eventColumns, eventData: eventDataColumns };
}
function getWhere(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlWhere = urls.map((a, i) => `{{url${i}}}`).join(',');
const eventWhere = events.map((a, i) => `{{event${i}}}`).join(',');
const eventDataNameWhere = eventData.map((a, i) => `{{eventData${i}}}`).join(',');
const eventDataKeyWhere = eventData.map((a, i) => `{{property${i}}}`).join(',');
return {
urls: `and url_path in (${urlWhere})`,
events: `and event_name in (${eventWhere})`,
eventData: `and event_name in (${eventDataNameWhere}) and data_key in (${eventDataKeyWhere})`,
};
}
const parameters = getParameters(urls, events, eventData);
const columns = getColumns(urls, events, eventData);
const where = getWhere(urls, events, eventData);
const urlResults = hasUrl
? await rawQuery(
`
select
${columns.urls}
from website_event
where website_id = {{websiteId::uuid}}
${where.urls}
and created_at between {{startDate}} and {{endDate}}
`,
parameters.urls,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => ({
...urls[i],
goal: Number(urls[i].goal),
result: Number(results[key]),
}));
})
: [];
const eventResults = hasEvent
? await rawQuery(
`
select
${columns.events}
from website_event
where website_id = {{websiteId::uuid}}
${where.events}
and created_at between {{startDate}} and {{endDate}}
`,
parameters.events,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => {
return { ...events[i], goal: Number(events[i].goal), result: Number(results[key]) };
});
})
: [];
const eventDataResults = hasEventData
? await rawQuery(
`
select
${columns.eventData}
from website_event w
join event_data d
on d.website_event_id = w.event_id
where w.website_id = {{websiteId::uuid}}
${where.eventData}
and w.created_at between {{startDate}} and {{endDate}}
`,
parameters.eventData,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => {
return { ...eventData[i], goal: Number(eventData[i].goal), result: Number(results[key]) };
});
})
: [];
return [...urlResults, ...eventResults, ...eventDataResults];
}
async function clickhouseQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
goals: { type: string; value: string; goal: number; operator?: string; property?: string }[];
},
): Promise<{ type: string; value: string; goal: number; result: number }[]> {
const { startDate, endDate, goals } = criteria;
const { rawQuery } = clickhouse;
const urls = goals.filter(a => a.type === 'url');
const events = goals.filter(a => a.type === 'event');
const eventData = goals.filter(a => a.type === 'event-data');
const hasUrl = urls.length > 0;
const hasEvent = events.length > 0;
const hasEventData = eventData.length > 0;
function getParameters(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlParam = urls.reduce((acc, cv, i) => {
acc[`${cv.type}${i}`] = cv.value;
return acc;
}, {});
const eventParam = events.reduce((acc, cv, i) => {
acc[`${cv.type}${i}`] = cv.value;
return acc;
}, {});
const eventDataParam = eventData.reduce((acc, cv, i) => {
acc[`eventData${i}`] = cv.value;
acc[`property${i}`] = cv.property;
return acc;
}, {});
return {
urls: { ...urlParam, startDate, endDate, websiteId },
events: { ...eventParam, startDate, endDate, websiteId },
eventData: { ...eventDataParam, startDate, endDate, websiteId },
};
}
function getColumns(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlColumns = urls
.map((a, i) => `countIf(url_path = {url${i}:String}) AS URL${i},`)
.join('\n')
.slice(0, -1);
const eventColumns = events
.map((a, i) => `countIf(event_name = {event${i}:String}) AS EVENT${i},`)
.join('\n')
.slice(0, -1);
const eventDataColumns = eventData
.map(
(a, i) =>
`${a.operator === 'average' ? 'avg' : a.operator}If(${
a.operator !== 'count' ? 'number_value, ' : ''
}event_name = {eventData${i}:String} AND data_key = {property${i}:String}) AS EVENT_DATA${i},`,
)
.join('\n')
.slice(0, -1);
return { url: urlColumns, events: eventColumns, eventData: eventDataColumns };
}
function getWhere(
urls: { type: string; value: string; goal: number }[],
events: { type: string; value: string; goal: number }[],
eventData: {
type: string;
value: string;
goal: number;
operator?: string;
property?: string;
}[],
) {
const urlWhere = urls.map((a, i) => `{url${i}:String}`).join(',');
const eventWhere = events.map((a, i) => `{event${i}:String}`).join(',');
const eventDataNameWhere = eventData.map((a, i) => `{eventData${i}:String}`).join(',');
const eventDataKeyWhere = eventData.map((a, i) => `{property${i}:String}`).join(',');
return {
urls: `and url_path in (${urlWhere})`,
events: `and event_name in (${eventWhere})`,
eventData: `and event_name in (${eventDataNameWhere}) and data_key in (${eventDataKeyWhere})`,
};
}
const parameters = getParameters(urls, events, eventData);
const columns = getColumns(urls, events, eventData);
const where = getWhere(urls, events, eventData);
const urlResults = hasUrl
? await rawQuery(
`
select
${columns.url}
from website_event
where website_id = {websiteId:UUID}
${where.urls}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
`,
parameters.urls,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => {
return { ...urls[i], goal: Number(urls[i].goal), result: Number(results[key]) };
});
})
: [];
const eventResults = hasEvent
? await rawQuery(
`
select
${columns.events}
from website_event
where website_id = {websiteId:UUID}
${where.events}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
`,
parameters.events,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => {
return { ...events[i], goal: Number(events[i].goal), result: Number(results[key]) };
});
})
: [];
const eventDataResults = hasEventData
? await rawQuery(
`
select
${columns.eventData}
from event_data
where website_id = {websiteId:UUID}
${where.eventData}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
`,
parameters.eventData,
).then(a => {
const results = a[0];
return Object.keys(results).map((key, i) => {
return { ...eventData[i], goal: Number(eventData[i].goal), result: Number(results[key]) };
});
})
: [];
return [...urlResults, ...eventResults, ...eventDataResults];
}

View file

@ -0,0 +1,138 @@
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import clickhouse from '@/lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from '@/lib/constants';
import { QueryFilters } from '@/lib/types';
export async function getInsights(
...args: [websiteId: string, fields: { name: string; type?: string }[], filters: QueryFilters]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
fields: { name: string; type?: string }[],
filters: QueryFilters,
): Promise<
{
x: string;
y: number;
}[]
> {
const { getTimestampDiffSQL, parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(
websiteId,
{
...filters,
eventType: EVENT_TYPE.pageView,
},
{
joinSession: !!fields.find(({ name }) => SESSION_COLUMNS.includes(name)),
},
);
return rawQuery(
`
select
sum(t.c) as "views",
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime",
${parseFieldsByName(fields)}
from (
select
${parseFields(fields)},
website_event.session_id,
website_event.visit_id,
count(*) as "c",
min(website_event.created_at) as "min_time",
max(website_event.created_at) as "max_time"
from website_event
${joinSession}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
${filterQuery}
group by ${parseFieldsByName(fields)},
website_event.session_id, website_event.visit_id
) as t
group by ${parseFieldsByName(fields)}
order by 1 desc, 2 desc
limit 500
`,
params,
);
}
async function clickhouseQuery(
websiteId: string,
fields: { name: string; type?: string }[],
filters: QueryFilters,
): Promise<
{
x: string;
y: number;
}[]
> {
const { parseFilters, rawQuery } = clickhouse;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
});
return rawQuery(
`
select
sum(t.c) as "views",
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(max_time-min_time) as "totaltime",
${parseFieldsByName(fields)}
from (
select
${parseFields(fields)},
session_id,
visit_id,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and event_type = {eventType:UInt32}
${filterQuery}
group by ${parseFieldsByName(fields)},
session_id, visit_id
) as t
group by ${parseFieldsByName(fields)}
order by 1 desc, 2 desc
limit 500
`,
params,
).then(a => {
return Object.values(a).map(a => {
return {
...a,
views: Number(a.views),
visitors: Number(a.visitors),
visits: Number(a.visits),
bounces: Number(a.bounces),
totaltime: Number(a.totaltime),
};
});
});
}
function parseFields(fields: { name: any }[]) {
return fields.map(({ name }) => `${FILTER_COLUMNS[name]} as "${name}"`).join(',');
}
function parseFieldsByName(fields: { name: any }[]) {
return `${fields.map(({ name }) => name).join(',')}`;
}

View file

@ -0,0 +1,272 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
interface JourneyResult {
e1: string;
e2: string;
e3: string;
e4: string;
e5: string;
e6: string;
e7: string;
count: number;
}
export async function getJourney(
...args: [
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
steps: number;
startStep?: string;
endStep?: string;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
steps: number;
startStep?: string;
endStep?: string;
},
): Promise<JourneyResult[]> {
const { startDate, endDate, steps, startStep, endStep } = filters;
const { rawQuery } = prisma;
const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery(
steps,
startStep,
endStep,
);
function getJourneyQuery(
steps: number,
startStep?: string,
endStep?: string,
): {
sequenceQuery: string;
startStepQuery: string;
endStepQuery: string;
params: { [key: string]: string };
} {
const params = {};
let sequenceQuery = '';
let startStepQuery = '';
let endStepQuery = '';
// create sequence query
let selectQuery = '';
let maxQuery = '';
let groupByQuery = '';
for (let i = 1; i <= steps; i++) {
const endQuery = i < steps ? ',' : '';
selectQuery += `s.e${i},`;
maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN event ELSE NULL END) AS e${i}${endQuery}`;
groupByQuery += `s.e${i}${endQuery} `;
}
sequenceQuery = `\nsequences as (
select ${selectQuery}
count(*) count
FROM (
select visit_id,
${maxQuery}
FROM events
group by visit_id) s
group by ${groupByQuery})
`;
// create start Step params query
if (startStep) {
startStepQuery = `and e1 = {{startStep}}`;
params['startStep'] = startStep;
}
// create end Step params query
if (endStep) {
for (let i = 1; i < steps; i++) {
const startQuery = i === 1 ? 'and (' : '\nor ';
endStepQuery += `${startQuery}(e${i} = {{endStep}} and e${i + 1} is null) `;
}
endStepQuery += `\nor (e${steps} = {{endStep}}))`;
params['endStep'] = endStep;
}
return {
sequenceQuery,
startStepQuery,
endStepQuery,
params,
};
}
return rawQuery(
`
WITH events AS (
select distinct
visit_id,
referrer_path,
coalesce(nullIf(event_name, ''), url_path) event,
row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}),
${sequenceQuery}
select *
from sequences
where 1 = 1
${startStepQuery}
${endStepQuery}
order by count desc
limit 100
`,
{
websiteId,
startDate,
endDate,
...params,
},
).then(parseResult);
}
async function clickhouseQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
steps: number;
startStep?: string;
endStep?: string;
},
): Promise<JourneyResult[]> {
const { startDate, endDate, steps, startStep, endStep } = filters;
const { rawQuery } = clickhouse;
const { sequenceQuery, startStepQuery, endStepQuery, params } = getJourneyQuery(
steps,
startStep,
endStep,
);
function getJourneyQuery(
steps: number,
startStep?: string,
endStep?: string,
): {
sequenceQuery: string;
startStepQuery: string;
endStepQuery: string;
params: { [key: string]: string };
} {
const params = {};
let sequenceQuery = '';
let startStepQuery = '';
let endStepQuery = '';
// create sequence query
let selectQuery = '';
let maxQuery = '';
let groupByQuery = '';
for (let i = 1; i <= steps; i++) {
const endQuery = i < steps ? ',' : '';
selectQuery += `s.e${i},`;
maxQuery += `\nmax(CASE WHEN event_number = ${i} THEN event ELSE NULL END) AS e${i}${endQuery}`;
groupByQuery += `s.e${i}${endQuery} `;
}
sequenceQuery = `\nsequences as (
select ${selectQuery}
count(*) count
FROM (
select visit_id,
${maxQuery}
FROM events
group by visit_id) s
group by ${groupByQuery})
`;
// create start Step params query
if (startStep) {
startStepQuery = `and e1 = {startStep:String}`;
params['startStep'] = startStep;
}
// create end Step params query
if (endStep) {
for (let i = 1; i < steps; i++) {
const startQuery = i === 1 ? 'and (' : '\nor ';
endStepQuery += `${startQuery}(e${i} = {endStep:String} and e${i + 1} is null) `;
}
endStepQuery += `\nor (e${steps} = {endStep:String}))`;
params['endStep'] = endStep;
}
return {
sequenceQuery,
startStepQuery,
endStepQuery,
params,
};
}
return rawQuery(
`
WITH events AS (
select distinct
visit_id,
coalesce(nullIf(event_name, ''), url_path) event,
row_number() OVER (PARTITION BY visit_id ORDER BY created_at) AS event_number
from umami.website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}),
${sequenceQuery}
select *
from sequences
where 1 = 1
${startStepQuery}
${endStepQuery}
order by count desc
limit 100
`,
{
websiteId,
startDate,
endDate,
...params,
},
).then(parseResult);
}
function combineSequentialDuplicates(array: any) {
if (array.length === 0) return array;
const result = [array[0]];
for (let i = 1; i < array.length; i++) {
if (array[i] !== array[i - 1]) {
result.push(array[i]);
}
}
return result;
}
function parseResult(data: any) {
return data.map(({ e1, e2, e3, e4, e5, e6, e7, count }) => ({
items: combineSequentialDuplicates([e1, e2, e3, e4, e5, e6, e7]),
count: +Number(count),
}));
}

View file

@ -0,0 +1,171 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
export async function getRetention(
...args: [
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
): Promise<
{
date: string;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateSQL, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select session_id,
${getDateSQL('created_at', unit, timezone)} as cohort_date
from session
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
),
user_activities AS (
select distinct
w.session_id,
${getDayDiffQuery(getDateSQL('created_at', unit, timezone), 'c.cohort_date')} as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
group by 1, 2
)
select
c.cohort_date as date,
c.day_number as day,
s.visitors,
c.visitors as "returnVisitors",
${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
where c.day_number <= 31
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
);
}
async function clickhouseQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
): Promise<
{
date: string;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateSQL, rawQuery } = clickhouse;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select
min(${getDateSQL('created_at', unit, timezone)}) as cohort_date,
session_id
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
group by session_id
),
user_activities AS (
select distinct
w.session_id,
(${getDateSQL('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
group by 1, 2
)
select
c.cohort_date as date,
c.day_number as day,
s.visitors as visitors,
c.visitors returnVisitors,
c.visitors * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
where c.day_number <= 31
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
);
}

View file

@ -0,0 +1,274 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, getDatabaseType, POSTGRESQL, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
export async function getRevenue(
...args: [
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
unit: string;
timezone: string;
currency: string;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
unit: string;
timezone: string;
currency: string;
},
): Promise<{
chart: { x: string; t: string; y: number }[];
country: { name: string; value: number }[];
total: { sum: number; count: number; unique_count: number };
table: {
currency: string;
sum: number;
count: number;
unique_count: number;
}[];
}> {
const { startDate, endDate, timezone = 'UTC', unit = 'day', currency } = criteria;
const { getDateSQL, rawQuery } = prisma;
const db = getDatabaseType();
const like = db === POSTGRESQL ? 'ilike' : 'like';
const chartRes = await rawQuery(
`
select
we.event_name x,
${getDateSQL('ed.created_at', unit, timezone)} t,
sum(coalesce(cast(number_value as decimal(10,2)), cast(string_value as decimal(10,2)))) y
from event_data ed
join website_event we
on we.event_id = ed.website_event_id
join (select website_event_id
from event_data
where data_key ${like} '%currency%'
and string_value = {{currency}}) currency
on currency.website_event_id = ed.website_event_id
where ed.website_id = {{websiteId::uuid}}
and ed.created_at between {{startDate}} and {{endDate}}
and ed.data_key ${like} '%revenue%'
group by x, t
order by t
`,
{ websiteId, startDate, endDate, unit, timezone, currency },
);
const countryRes = await rawQuery(
`
select
s.country as name,
sum(coalesce(cast(number_value as decimal(10,2)), cast(string_value as decimal(10,2)))) value
from event_data ed
join website_event we
on we.event_id = ed.website_event_id
join session s
on s.session_id = we.session_id
join (select website_event_id
from event_data
where data_key ${like} '%currency%'
and string_value = {{currency}}) currency
on currency.website_event_id = ed.website_event_id
where ed.website_id = {{websiteId::uuid}}
and ed.created_at between {{startDate}} and {{endDate}}
and ed.data_key ${like} '%revenue%'
group by s.country
`,
{ websiteId, startDate, endDate, currency },
);
const totalRes = await rawQuery(
`
select
sum(coalesce(cast(number_value as decimal(10,2)), cast(string_value as decimal(10,2)))) as sum,
count(distinct event_id) as count,
count(distinct session_id) as unique_count
from event_data ed
join website_event we
on we.event_id = ed.website_event_id
join (select website_event_id
from event_data
where data_key ${like} '%currency%'
and string_value = {{currency}}) currency
on currency.website_event_id = ed.website_event_id
where ed.website_id = {{websiteId::uuid}}
and ed.created_at between {{startDate}} and {{endDate}}
and ed.data_key ${like} '%revenue%'
`,
{ websiteId, startDate, endDate, currency },
).then(result => result?.[0]);
const tableRes = await rawQuery(
`
select
c.currency,
sum(coalesce(cast(number_value as decimal(10,2)), cast(string_value as decimal(10,2)))) as sum,
count(distinct ed.website_event_id) as count,
count(distinct we.session_id) as unique_count
from event_data ed
join website_event we
on we.event_id = ed.website_event_id
join (select website_event_id, string_value as currency
from event_data
where data_key ${like} '%currency%') c
on c.website_event_id = ed.website_event_id
where ed.website_id = {{websiteId::uuid}}
and ed.created_at between {{startDate}} and {{endDate}}
and ed.data_key ${like} '%revenue%'
group by c.currency
order by sum desc;
`,
{ websiteId, startDate, endDate, unit, timezone, currency },
);
return { chart: chartRes, country: countryRes, total: totalRes, table: tableRes };
}
async function clickhouseQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
unit: string;
timezone: string;
currency: string;
},
): Promise<{
chart: { x: string; t: string; y: number }[];
country: { name: string; value: number }[];
total: { sum: number; count: number; unique_count: number };
table: {
currency: string;
sum: number;
count: number;
unique_count: number;
}[];
}> {
const { startDate, endDate, timezone = 'UTC', unit = 'day', currency } = criteria;
const { getDateSQL, rawQuery } = clickhouse;
const chartRes = await rawQuery<
{
x: string;
t: string;
y: number;
}[]
>(
`
select
event_name x,
${getDateSQL('created_at', unit, timezone)} t,
sum(coalesce(toDecimal64(number_value, 2), toDecimal64(string_value, 2))) y
from event_data
join (select event_id
from event_data
where positionCaseInsensitive(data_key, 'currency') > 0
and string_value = {currency:String}) currency
on currency.event_id = event_data.event_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and positionCaseInsensitive(data_key, 'revenue') > 0
group by x, t
order by t
`,
{ websiteId, startDate, endDate, unit, timezone, currency },
);
const countryRes = await rawQuery<
{
name: string;
value: number;
}[]
>(
`
select
s.country as name,
sum(coalesce(toDecimal64(number_value, 2), toDecimal64(string_value, 2))) as value
from event_data ed
join (select event_id
from event_data
where positionCaseInsensitive(data_key, 'currency') > 0
and string_value = {currency:String}) c
on c.event_id = ed.event_id
join (select distinct website_id, session_id, country
from website_event_stats_hourly
where website_id = {websiteId:UUID}) s
on ed.website_id = s.website_id
and ed.session_id = s.session_id
where ed.website_id = {websiteId:UUID}
and ed.created_at between {startDate:DateTime64} and {endDate:DateTime64}
and positionCaseInsensitive(ed.data_key, 'revenue') > 0
group by s.country
`,
{ websiteId, startDate, endDate, currency },
);
const totalRes = await rawQuery<{
sum: number;
avg: number;
count: number;
unique_count: number;
}>(
`
select
sum(coalesce(toDecimal64(number_value, 2), toDecimal64(string_value, 2))) as sum,
uniqExact(event_id) as count,
uniqExact(session_id) as unique_count
from event_data
join (select event_id
from event_data
where positionCaseInsensitive(data_key, 'currency') > 0
and string_value = {currency:String}) currency
on currency.event_id = event_data.event_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and positionCaseInsensitive(data_key, 'revenue') > 0
`,
{ websiteId, startDate, endDate, currency },
).then(result => result?.[0]);
const tableRes = await rawQuery<
{
currency: string;
sum: number;
avg: number;
count: number;
unique_count: number;
}[]
>(
`
select
c.currency,
sum(coalesce(toDecimal64(ed.number_value, 2), toDecimal64(ed.string_value, 2))) as sum,
uniqExact(ed.event_id) as count,
uniqExact(ed.session_id) as unique_count
from event_data ed
join (select event_id, string_value as currency
from event_data
where positionCaseInsensitive(data_key, 'currency') > 0) c
on c.event_id = ed.event_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and positionCaseInsensitive(data_key, 'revenue') > 0
group by c.currency
order by sum desc;
`,
{ websiteId, startDate, endDate, unit, timezone, currency },
);
return { chart: chartRes, country: countryRes, total: totalRes, table: tableRes };
}

View file

@ -0,0 +1,75 @@
import prisma from '@/lib/prisma';
import clickhouse from '@/lib/clickhouse';
import { runQuery, CLICKHOUSE, PRISMA, getDatabaseType, POSTGRESQL } from '@/lib/db';
export async function getRevenueValues(
...args: [
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
},
) {
const { rawQuery } = prisma;
const { startDate, endDate } = criteria;
const db = getDatabaseType();
const like = db === POSTGRESQL ? 'ilike' : 'like';
return rawQuery(
`
select distinct string_value as currency
from event_data
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and data_key ${like} '%currency%'
order by currency
`,
{
websiteId,
startDate,
endDate,
},
);
}
async function clickhouseQuery(
websiteId: string,
criteria: {
startDate: Date;
endDate: Date;
},
) {
const { rawQuery } = clickhouse;
const { startDate, endDate } = criteria;
return rawQuery(
`
select distinct string_value as currency
from event_data
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and positionCaseInsensitive(data_key, 'currency') > 0
order by currency
`,
{
websiteId,
startDate,
endDate,
},
);
}

View file

@ -0,0 +1,102 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
export async function getUTM(
...args: [
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
) {
const { startDate, endDate } = filters;
const { rawQuery } = prisma;
return rawQuery(
`
select url_query, count(*) as "num"
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and coalesce(url_query, '') != ''
and event_type = 1
group by 1
`,
{
websiteId,
startDate,
endDate,
},
).then(result => parseParameters(result as any[]));
}
async function clickhouseQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone?: string;
},
) {
const { startDate, endDate } = filters;
const { rawQuery } = clickhouse;
return rawQuery(
`
select url_query, count(*) as "num"
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
and url_query != ''
and event_type = 1
group by 1
`,
{
websiteId,
startDate,
endDate,
},
).then(result => parseParameters(result as any[]));
}
function parseParameters(data: any[]) {
return data.reduce((obj, { url_query, num }) => {
try {
const searchParams = new URLSearchParams(url_query);
for (const [key, value] of searchParams) {
if (key.match(/^utm_(\w+)$/)) {
const name = value;
if (!obj[key]) {
obj[key] = { [name]: Number(num) };
} else if (!obj[key][name]) {
obj[key][name] = Number(num);
} else {
obj[key][name] += Number(num);
}
}
}
} catch {
// Ignore
}
return obj;
}, {});
}