Moved code into src folder. Added build for component library.

This commit is contained in:
Mike Cao 2023-08-21 02:06:09 -07:00
parent 7a7233ead4
commit ede658771e
490 changed files with 749 additions and 442 deletions

View file

@ -0,0 +1,208 @@
import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
export async function getFunnel(
...args: [
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
},
): Promise<
{
x: string;
y: number;
z: number;
}[]
> {
const { windowMinutes, startDate, endDate, urls } = criteria;
const { rawQuery, getAddMinutesQuery } = prisma;
const { levelQuery, sumQuery } = getFunnelQuery(urls, windowMinutes);
function getFunnelQuery(
urls: string[],
windowMinutes: number,
): {
levelQuery: string;
sumQuery: string;
} {
return urls.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union ' : '';
if (levelNumber >= 2) {
pv.levelQuery += `
, level${levelNumber} AS (
select distinct we.session_id, we.created_at
from level${i} l
join website_event we
on l.session_id = we.session_id
where we.website_id = {{websiteId::uuid}}
and we.created_at between l.created_at and ${getAddMinutesQuery(
`l.created_at `,
windowMinutes,
)}
and we.referrer_path = {{${i - 1}}}
and we.url_path = {{${i}}}
and we.created_at <= {{endDate}}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
return pv;
},
{
levelQuery: '',
sumQuery: '',
},
);
}
return rawQuery(
`
WITH level1 AS (
select distinct session_id, created_at
from website_event
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
and url_path = {{0}}
)
${levelQuery}
${sumQuery}
ORDER BY level;
`,
{
websiteId,
startDate,
endDate,
...urls,
},
).then(results => {
return urls.map((a, i) => ({
x: a,
y: results[i]?.count || 0,
z: (1 - Number(results[i]?.count) / Number(results[i - 1]?.count)) * 100 || 0, // drop off
}));
});
}
async function clickhouseQuery(
websiteId: string,
criteria: {
windowMinutes: number;
startDate: Date;
endDate: Date;
urls: string[];
},
): Promise<
{
x: string;
y: number;
z: number;
}[]
> {
const { windowMinutes, startDate, endDate, urls } = criteria;
const { rawQuery } = clickhouse;
const { levelQuery, sumQuery, urlFilterQuery, urlParams } = getFunnelQuery(urls, windowMinutes);
function getFunnelQuery(
urls: string[],
windowMinutes: number,
): {
levelQuery: string;
sumQuery: string;
urlFilterQuery: string;
urlParams: { [key: string]: string };
} {
return urls.reduce(
(pv, cv, i) => {
const levelNumber = i + 1;
const startSum = i > 0 ? 'union all ' : '';
const startFilter = i > 0 ? ', ' : '';
if (levelNumber >= 2) {
pv.levelQuery += `\n
, level${levelNumber} AS (
select distinct y.session_id as session_id,
y.url_path as url_path,
y.referrer_path as referrer_path,
y.created_at as created_at
from level${i} x
join level0 y
on x.session_id = y.session_id
where y.created_at between x.created_at and x.created_at + interval ${windowMinutes} minute
and y.referrer_path = {url${i - 1}:String}
and y.url_path = {url${i}:String}
)`;
}
pv.sumQuery += `\n${startSum}select ${levelNumber} as level, count(distinct(session_id)) as count from level${levelNumber}`;
pv.urlFilterQuery += `${startFilter}{url${i}:String} `;
pv.urlParams[`url${i}`] = cv;
return pv;
},
{
levelQuery: '',
sumQuery: '',
urlFilterQuery: '',
urlParams: {},
},
);
}
return rawQuery<{ level: number; count: number }[]>(
`
WITH level0 AS (
select distinct session_id, url_path, referrer_path, created_at
from umami.website_event
where url_path in (${urlFilterQuery})
and website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
level1 AS (
select *
from level0
where url_path = {url0:String}
)
${levelQuery}
select *
from (
${sumQuery}
) ORDER BY level;
`,
{
websiteId,
startDate,
endDate,
...urlParams,
},
).then(results => {
return urls.map((a, i) => ({
x: a,
y: results[i]?.count || 0,
z: (1 - Number(results[i]?.count) / Number(results[i - 1]?.count)) * 100 || 0, // drop off
}));
});
}

View file

@ -0,0 +1,107 @@
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants';
import { QueryFilters } from 'lib/types';
export async function getInsights(
...args: [websiteId: string, fields: { name: string; type?: string }[], filters: QueryFilters]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
fields: { name: string; type?: string }[],
filters: QueryFilters,
): Promise<
{
x: string;
y: number;
}[]
> {
const { parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(
websiteId,
{
...filters,
eventType: EVENT_TYPE.pageView,
},
{
joinSession: !!fields.find(({ name }) => SESSION_COLUMNS.includes(name)),
},
);
return rawQuery(
`
select
${parseFields(fields)}
from website_event
${joinSession}
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and website_event.event_type = {{eventType}}
${filterQuery}
${parseGroupBy(fields)}
order by 1 desc, 2 desc
limit 500
`,
params,
);
}
async function clickhouseQuery(
websiteId: string,
fields: { name: string; type?: string }[],
filters: QueryFilters,
): Promise<
{
x: string;
y: number;
}[]
> {
const { parseFilters, rawQuery } = clickhouse;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
});
return rawQuery(
`
select
${parseFields(fields)}
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime} and {endDate:DateTime}
and event_type = {eventType:UInt32}
${filterQuery}
${parseGroupBy(fields)}
order by 1 desc, 2 desc
limit 500
`,
params,
);
}
function parseFields(fields) {
const query = fields.reduce(
(arr, field) => {
const { name } = field;
return arr.concat(`${FILTER_COLUMNS[name]} as "${name}"`);
},
['count(*) as views', 'count(distinct website_event.session_id) as visitors'],
);
return query.join(',\n');
}
function parseGroupBy(fields) {
if (!fields.length) {
return '';
}
return `group by ${fields.map(({ name }) => FILTER_COLUMNS[name]).join(',')}`;
}

View file

@ -0,0 +1,176 @@
import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
export async function getRetention(
...args: [
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone: string;
},
]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone: string;
},
): Promise<
{
date: string;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateQuery, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select session_id,
${getDateQuery('created_at', unit, timezone)} as cohort_date
from session
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
),
user_activities AS (
select distinct
w.session_id,
${getDayDiffQuery(
getDateQuery('created_at', unit, timezone),
'c.cohort_date',
)} as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
group by 1, 2
)
select
c.cohort_date as date,
c.day_number as day,
s.visitors,
c.visitors as "returnVisitors",
${getCastColumnQuery('c.visitors', 'float')} * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
where c.day_number <= 31
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
).then(results => {
return results.map(i => ({ ...i, percentage: Number(i.percentage) || 0 }));
});
}
async function clickhouseQuery(
websiteId: string,
filters: {
startDate: Date;
endDate: Date;
timezone: string;
},
): Promise<
{
date: string;
day: number;
visitors: number;
returnVisitors: number;
percentage: number;
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateQuery, getDateStringQuery, rawQuery } = clickhouse;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select
min(${getDateQuery('created_at', unit, timezone)}) as cohort_date,
session_id
from website_event
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
group by session_id
),
user_activities AS (
select distinct
w.session_id,
(${getDateQuery('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
where website_id = {websiteId:UUID}
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
),
cohort_size as (
select cohort_date,
count(*) as visitors
from cohort_items
group by 1
order by 1
),
cohort_date as (
select
c.cohort_date,
a.day_number,
count(*) as visitors
from user_activities a
join cohort_items c
on a.session_id = c.session_id
group by 1, 2
)
select
${getDateStringQuery('c.cohort_date', unit)} as date,
c.day_number as day,
s.visitors as visitors,
c.visitors returnVisitors,
c.visitors * 100 / s.visitors as percentage
from cohort_date c
join cohort_size s
on c.cohort_date = s.cohort_date
where c.day_number <= 31
order by 1, 2`,
{
websiteId,
startDate,
endDate,
},
);
}