Merge branch 'dev' of https://github.com/umami-software/umami into feat/clickhouse-mv

This commit is contained in:
Francis Cao 2024-07-19 11:27:03 -07:00
commit 77fcdc0646
157 changed files with 1780 additions and 1476 deletions

View file

@ -3,6 +3,7 @@ import { DATA_TYPE } from 'lib/constants';
import { uuid } from 'lib/crypto';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import { flattenJSON, getStringValue } from 'lib/data';
import clickhouse from 'lib/clickhouse';
import kafka from 'lib/kafka';
import prisma from 'lib/prisma';
import { DynamicData } from 'lib/types';
@ -59,6 +60,7 @@ async function clickhouseQuery(data: {
}) {
const { websiteId, sessionId, eventId, urlPath, eventName, eventData, createdAt } = data;
const { insert } = clickhouse;
const { getDateFormat, sendMessages } = kafka;
const jsonKeys = flattenJSON(eventData);
@ -79,7 +81,11 @@ async function clickhouseQuery(data: {
};
});
await sendMessages(messages, 'event_data');
if (kafka.enabled) {
await sendMessages('event_data', messages);
} else {
await insert('event_data', messages);
}
return data;
}

View file

@ -15,7 +15,7 @@ export async function getEventMetrics(
async function relationalQuery(websiteId: string, filters: QueryFilters) {
const { timezone = 'utc', unit = 'day' } = filters;
const { rawQuery, getDateQuery, parseFilters } = prisma;
const { rawQuery, getDateSQL, parseFilters } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.customEvent,
@ -25,7 +25,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
`
select
event_name x,
${getDateQuery('website_event.created_at', unit, timezone)} t,
${getDateSQL('website_event.created_at', unit, timezone)} t,
count(*) y
from website_event
${joinSession}
@ -45,7 +45,7 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<{ x: string; t: string; y: number }[]> {
const { timezone = 'UTC', unit = 'day' } = filters;
const { rawQuery, getDateQuery, parseFilters } = clickhouse;
const { rawQuery, getDateSQL, parseFilters } = clickhouse;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.customEvent,
@ -57,7 +57,7 @@ async function clickhouseQuery(
`
select
event_name x,
${getDateQuery('created_at', unit, timezone)} t,
${getDateSQL('created_at', unit, timezone)} t,
count(*) y
from (
select arrayJoin(event_name) as event_name,

View file

@ -1,63 +1,54 @@
import clickhouse from 'lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import prisma from 'lib/prisma';
import { QueryFilters } from 'lib/types';
import { PageParams, QueryFilters } from 'lib/types';
export function getEvents(...args: [websiteId: string, filters: QueryFilters]) {
export function getEvents(
...args: [websiteId: string, filters: QueryFilters, pageParams?: PageParams]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
function relationalQuery(websiteId: string, filters: QueryFilters) {
const { startDate } = filters;
async function relationalQuery(websiteId: string, filters: QueryFilters, pageParams?: PageParams) {
const { pagedQuery } = prisma;
return prisma.client.websiteEvent
.findMany({
where: {
websiteId,
createdAt: {
gte: startDate,
},
},
orderBy: {
createdAt: 'desc',
},
})
.then(a => {
return Object.values(a).map(a => {
return {
...a,
timestamp: new Date(a.createdAt).getTime() / 1000,
};
});
});
const where = {
...filters,
id: websiteId,
};
return pagedQuery('website_event', { where }, pageParams);
}
function clickhouseQuery(websiteId: string, filters: QueryFilters) {
const { rawQuery } = clickhouse;
const { startDate } = filters;
async function clickhouseQuery(websiteId: string, filters: QueryFilters, pageParams?: PageParams) {
const { pagedQuery, parseFilters, getDateStringSQL } = clickhouse;
const { params, dateQuery, filterQuery } = await parseFilters(websiteId, filters);
return rawQuery(
return pagedQuery(
`
select
event_id as id,
website_id as websiteId,
session_id as sessionId,
created_at as createdAt,
toUnixTimestamp(created_at) as timestamp,
${getDateStringSQL('created_at', 'second', filters.timezone)} as createdAt,
url_path as urlPath,
url_query as urlQuery,
referrer_path as referrerPath,
referrer_query as referrerQuery,
referrer_domain as referrerDomain,
page_title as pageTitle,
event_type as eventType,
event_name as eventName
from website_event
where website_id = {websiteId:UUID}
and created_at >= {startDate:DateTime64}
${dateQuery}
${filterQuery}
order by created_at desc
`,
{
websiteId,
startDate,
},
params,
pageParams,
);
}

View file

@ -1,5 +1,6 @@
import { EVENT_NAME_LENGTH, URL_LENGTH, EVENT_TYPE, PAGE_TITLE_LENGTH } from 'lib/constants';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import clickhouse from 'lib/clickhouse';
import kafka from 'lib/kafka';
import prisma from 'lib/prisma';
import { uuid } from 'lib/crypto';
@ -134,6 +135,7 @@ async function clickhouseQuery(data: {
city,
...args
} = data;
const { insert } = clickhouse;
const { getDateFormat, sendMessage } = kafka;
const eventId = uuid();
const createdAt = getDateFormat(new Date());
@ -164,7 +166,11 @@ async function clickhouseQuery(data: {
created_at: createdAt,
};
await sendMessage(message, 'event');
if (kafka.enabled) {
await sendMessage('event', message);
} else {
await insert('website_event', [message]);
}
if (eventData) {
await saveEventData({

View file

@ -19,15 +19,15 @@ export async function getRealtimeData(
const { startDate, timezone } = criteria;
const filters = { startDate, endDate: new Date(), unit: 'minute', timezone };
const [events, sessions, pageviews, sessionviews] = await Promise.all([
getEvents(websiteId, { startDate }),
getSessions(websiteId, { startDate }),
getEvents(websiteId, { startDate, timezone }, { pageSize: 10000 }),
getSessions(websiteId, { startDate, timezone }, { pageSize: 10000 }),
getPageviewStats(websiteId, filters),
getSessionStats(websiteId, filters),
]);
const uniques = new Set();
const sessionStats = sessions.reduce(
const sessionStats = sessions.data.reduce(
(obj: { visitors: any; countries: any }, session: { id: any; country: any }) => {
const { countries, visitors } = obj;
const { id, country } = session;
@ -49,7 +49,7 @@ export async function getRealtimeData(
},
);
const eventStats = events.reduce(
const eventStats = events.data.reduce(
(
obj: { urls: any; referrers: any; events: any },
event: { urlPath: any; referrerDomain: any },
@ -81,9 +81,9 @@ export async function getRealtimeData(
visitors: sessionviews,
},
totals: {
views: events.filter(e => !e.eventName).length,
views: events.data.filter(e => !e.eventName).length,
visitors: uniques.size,
events: events.filter(e => e.eventName).length,
events: events.data.filter(e => e.eventName).length,
countries: Object.keys(sessionStats.countries).length,
},
timestamp: Date.now(),

View file

@ -18,11 +18,11 @@ async function relationalQuery(
endDate: Date,
search: string,
) {
const { rawQuery, getSearchQuery } = prisma;
const { rawQuery, getSearchSQL } = prisma;
let searchQuery = '';
if (search) {
searchQuery = getSearchQuery(column);
searchQuery = getSearchSQL(column);
}
return rawQuery(

View file

@ -23,7 +23,7 @@ async function relationalQuery(
): Promise<
{ pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[]
> {
const { getTimestampDiffQuery, parseFilters, rawQuery } = prisma;
const { getTimestampDiffSQL, parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
@ -36,7 +36,7 @@ async function relationalQuery(
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffQuery('t.min_time', 't.max_time')}) as "totaltime"
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime"
from (
select
website_event.session_id,

View file

@ -51,15 +51,18 @@ async function relationalQuery(
const aggregrate = type === 'entry' ? 'min' : 'max';
entryExitQuery = `
JOIN (select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
group by visit_id) x
ON x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at`;
join (
select visit_id,
${aggregrate}(created_at) target_created_at
from website_event
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
and event_type = {{eventType}}
group by visit_id
) x
on x.visit_id = website_event.visit_id
and x.target_created_at = website_event.created_at
`;
}
return rawQuery(

View file

@ -13,7 +13,7 @@ export async function getPageviewStats(...args: [websiteId: string, filters: Que
async function relationalQuery(websiteId: string, filters: QueryFilters) {
const { timezone = 'utc', unit = 'day' } = filters;
const { getDateQuery, parseFilters, rawQuery } = prisma;
const { getDateSQL, parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
@ -22,7 +22,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
return rawQuery(
`
select
${getDateQuery('website_event.created_at', unit, timezone)} x,
${getDateSQL('website_event.created_at', unit, timezone)} x,
count(*) y
from website_event
${joinSession}
@ -41,7 +41,7 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<{ x: string; y: number }[]> {
const { timezone = 'UTC', unit = 'day' } = filters;
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery } = clickhouse;
const { parseFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
@ -51,11 +51,11 @@ async function clickhouseQuery(
return rawQuery(
`
select
${getDateStringQuery('g.t', unit)} as x,
${getDateStringSQL('g.t', unit)} as x,
g.y as y
from (
select
${getDateQuery('created_at', unit, timezone)} as t,
${getDateSQL('created_at', unit, timezone)} as t,
sum(views) as y
from ${table} website_event
where website_id = {websiteId:UUID}

View file

@ -23,7 +23,7 @@ async function relationalQuery(
y: number;
}[]
> {
const { getTimestampDiffQuery, parseFilters, rawQuery } = prisma;
const { getTimestampDiffSQL, parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(
websiteId,
{
@ -42,7 +42,7 @@ async function relationalQuery(
count(distinct t.session_id) as "visitors",
count(distinct t.visit_id) as "visits",
sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(${getTimestampDiffQuery('t.min_time', 't.max_time')}) as "totaltime",
sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}) as "totaltime",
${parseFieldsByName(fields)}
from (
select

View file

@ -35,14 +35,14 @@ async function relationalQuery(
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateQuery, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
const { getDateSQL, getDayDiffQuery, getCastColumnQuery, rawQuery } = prisma;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select session_id,
${getDateQuery('created_at', unit, timezone)} as cohort_date
${getDateSQL('created_at', unit, timezone)} as cohort_date
from session
where website_id = {{websiteId::uuid}}
and created_at between {{startDate}} and {{endDate}}
@ -50,10 +50,7 @@ async function relationalQuery(
user_activities AS (
select distinct
w.session_id,
${getDayDiffQuery(
getDateQuery('created_at', unit, timezone),
'c.cohort_date',
)} as day_number
${getDayDiffQuery(getDateSQL('created_at', unit, timezone), 'c.cohort_date')} as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
@ -115,14 +112,14 @@ async function clickhouseQuery(
}[]
> {
const { startDate, endDate, timezone = 'UTC' } = filters;
const { getDateQuery, getDateStringQuery, rawQuery } = clickhouse;
const { getDateSQL, getDateStringSQL, rawQuery } = clickhouse;
const unit = 'day';
return rawQuery(
`
WITH cohort_items AS (
select
min(${getDateQuery('created_at', unit, timezone)}) as cohort_date,
min(${getDateSQL('created_at', unit, timezone)}) as cohort_date,
session_id
from website_event
where website_id = {websiteId:UUID}
@ -132,7 +129,7 @@ async function clickhouseQuery(
user_activities AS (
select distinct
w.session_id,
(${getDateQuery('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
(${getDateSQL('created_at', unit, timezone)} - c.cohort_date) / 86400 as day_number
from website_event w
join cohort_items c
on w.session_id = c.session_id
@ -157,7 +154,7 @@ async function clickhouseQuery(
group by 1, 2
)
select
${getDateStringQuery('c.cohort_date', unit)} as date,
${getDateStringSQL('c.cohort_date', unit)} as date,
c.day_number as day,
s.visitors as visitors,
c.visitors returnVisitors,

View file

@ -46,12 +46,12 @@ async function relationalQuery(
timezone = 'UTC',
unit = 'day',
} = criteria;
const { getDateQuery, rawQuery } = prisma;
const { getDateSQL, rawQuery } = prisma;
const chartRes = await rawQuery(
`
select
${getDateQuery('website_event.created_at', unit, timezone)} time,
${getDateSQL('website_event.created_at', unit, timezone)} time,
sum(case when data_key = {{revenueProperty}} then number_value else 0 end) sum,
avg(case when data_key = {{revenueProperty}} then number_value else 0 end) avg,
count(case when data_key = {{revenueProperty}} then 1 else 0 end) count,
@ -110,7 +110,7 @@ async function clickhouseQuery(
timezone = 'UTC',
unit = 'day',
} = criteria;
const { getDateStringQuery, getDateQuery, rawQuery } = clickhouse;
const { getDateStringSQL, getDateSQL, rawQuery } = clickhouse;
const chartRes = await rawQuery<{
time: string;
@ -121,14 +121,14 @@ async function clickhouseQuery(
}>(
`
select
${getDateStringQuery('g.time', unit)} as time,
${getDateStringSQL('g.time', unit)} as time,
g.sum as sum,
g.avg as avg,
g.count as count,
g.uniqueCount as uniqueCount
from (
select
${getDateQuery('created_at', unit, timezone)} as time,
${getDateSQL('created_at', unit, timezone)} as time,
sumIf(number_value, data_key = {revenueProperty:String}) as sum,
avgIf(number_value, data_key = {revenueProperty:String}) as avg,
countIf(data_key = {revenueProperty:String}) as count,

View file

@ -13,7 +13,7 @@ export async function getSessionStats(...args: [websiteId: string, filters: Quer
async function relationalQuery(websiteId: string, filters: QueryFilters) {
const { timezone = 'utc', unit = 'day' } = filters;
const { getDateQuery, parseFilters, rawQuery } = prisma;
const { getDateSQL, parseFilters, rawQuery } = prisma;
const { filterQuery, joinSession, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
@ -22,7 +22,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters) {
return rawQuery(
`
select
${getDateQuery('website_event.created_at', unit, timezone)} x,
${getDateSQL('website_event.created_at', unit, timezone)} x,
count(distinct website_event.session_id) y
from website_event
${joinSession}
@ -41,7 +41,7 @@ async function clickhouseQuery(
filters: QueryFilters,
): Promise<{ x: string; y: number }[]> {
const { timezone = 'UTC', unit = 'day' } = filters;
const { parseFilters, rawQuery, getDateStringQuery, getDateQuery } = clickhouse;
const { parseFilters, rawQuery, getDateStringSQL, getDateSQL } = clickhouse;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
eventType: EVENT_TYPE.pageView,
@ -51,11 +51,11 @@ async function clickhouseQuery(
return rawQuery(
`
select
${getDateStringQuery('g.t', unit)} as x,
${getDateStringSQL('g.t', unit)} as x,
g.y as y
from (
select
${getDateQuery('created_at', unit, timezone)} as t,
${getDateSQL('created_at', unit, timezone)} as t,
uniq(session_id) as y
from ${table} website_event
where website_id = {websiteId:UUID}

View file

@ -1,51 +1,38 @@
import prisma from 'lib/prisma';
import clickhouse from 'lib/clickhouse';
import { runQuery, PRISMA, CLICKHOUSE } from 'lib/db';
import { QueryFilters } from 'lib/types';
import { PageParams, QueryFilters } from 'lib/types';
export async function getSessions(...args: [websiteId: string, filters: QueryFilters]) {
export async function getSessions(
...args: [websiteId: string, filters?: QueryFilters, pageParams?: PageParams]
) {
return runQuery({
[PRISMA]: () => relationalQuery(...args),
[CLICKHOUSE]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(websiteId: string, filters: QueryFilters) {
const { startDate } = filters;
async function relationalQuery(websiteId: string, filters: QueryFilters, pageParams: PageParams) {
const { pagedQuery } = prisma;
return prisma.client.session
.findMany({
where: {
websiteId,
createdAt: {
gte: startDate,
},
},
orderBy: {
createdAt: 'desc',
},
})
.then(a => {
return Object.values(a).map(a => {
return {
...a,
timestamp: new Date(a.createdAt).getTime() / 1000,
};
});
});
const where = {
...filters,
id: websiteId,
};
return pagedQuery('session', { where }, pageParams);
}
async function clickhouseQuery(websiteId: string, filters: QueryFilters) {
const { rawQuery } = clickhouse;
const { startDate } = filters;
async function clickhouseQuery(websiteId: string, filters: QueryFilters, pageParams?: PageParams) {
const { pagedQuery, parseFilters, getDateStringSQL } = clickhouse;
const { params, dateQuery, filterQuery } = await parseFilters(websiteId, filters);
return rawQuery(
return pagedQuery(
`
select
session_id as id,
website_id as websiteId,
created_at as createdAt,
toUnixTimestamp(created_at) as timestamp,
${getDateStringSQL('created_at', 'second', filters.timezone)} as createdAt,
hostname,
browser,
os,
@ -58,12 +45,11 @@ async function clickhouseQuery(websiteId: string, filters: QueryFilters) {
city
from website_event
where website_id = {websiteId:UUID}
and created_at >= {startDate:DateTime64}
${dateQuery}
${filterQuery}
order by created_at desc
`,
{
websiteId,
startDate,
},
params,
pageParams,
);
}

View file

@ -5,6 +5,7 @@ import prisma from 'lib/prisma';
import { DynamicData } from 'lib/types';
import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db';
import kafka from 'lib/kafka';
import clickhouse from 'lib/clickhouse';
export async function saveSessionData(data: {
websiteId: string;
@ -81,6 +82,7 @@ async function clickhouseQuery(data: {
}) {
const { websiteId, sessionId, sessionData, createdAt } = data;
const { insert } = clickhouse;
const { getDateFormat, sendMessages } = kafka;
const jsonKeys = flattenJSON(sessionData);
@ -98,7 +100,11 @@ async function clickhouseQuery(data: {
};
});
await sendMessages(messages, 'session_data');
if (kafka.enabled) {
await sendMessages('session_data', messages);
} else {
await insert('session_data', messages);
}
return data;
}

View file

@ -1,8 +1,8 @@
export * from './admin/report';
export * from './admin/team';
export * from './admin/teamUser';
export * from './admin/user';
export * from './admin/website';
export * from 'queries/prisma/report';
export * from 'queries/prisma/team';
export * from 'queries/prisma/teamUser';
export * from 'queries/prisma/user';
export * from 'queries/prisma/website';
export * from './analytics/events/getEventMetrics';
export * from './analytics/events/getEventUsage';
export * from './analytics/events/getEvents';

View file

@ -17,9 +17,9 @@ export async function getReport(reportId: string): Promise<Report> {
export async function getReports(
criteria: ReportFindManyArgs,
filters: PageParams = {},
pageParams: PageParams = {},
): Promise<PageResult<Report[]>> {
const { query } = filters;
const { query } = pageParams;
const where: Prisma.ReportWhereInput = {
...criteria.where,
@ -45,7 +45,7 @@ export async function getReports(
]),
};
return prisma.pagedQuery('report', { ...criteria, where }, filters);
return prisma.pagedQuery('report', { ...criteria, where }, pageParams);
}
export async function getUserReports(

View file

@ -49,9 +49,9 @@ export async function getUserByUsername(username: string, options: GetUserOption
export async function getUsers(
criteria: UserFindManyArgs,
filters?: PageParams,
pageParams?: PageParams,
): Promise<PageResult<User[]>> {
const { query } = filters;
const { query } = pageParams;
const where: Prisma.UserWhereInput = {
...criteria.where,
@ -68,7 +68,7 @@ export async function getUsers(
{
orderBy: 'createdAt',
sortDescending: true,
...filters,
...pageParams,
},
);
}

View file

@ -27,9 +27,9 @@ export async function getSharedWebsite(shareId: string) {
export async function getWebsites(
criteria: WebsiteFindManyArgs,
filters: PageParams,
pageParams: PageParams,
): Promise<PageResult<Website[]>> {
const { query } = filters;
const { query } = pageParams;
const where: Prisma.WebsiteWhereInput = {
...criteria.where,
@ -42,7 +42,7 @@ export async function getWebsites(
deletedAt: null,
};
return prisma.pagedQuery('website', { ...criteria, where }, filters);
return prisma.pagedQuery('website', { ...criteria, where }, pageParams);
}
export async function getAllWebsites(userId: string) {