Merge branch 'dev' into jajaja

# Conflicts:
#	pnpm-lock.yaml
#	postcss.config.js
#	src/app/(main)/websites/[websiteId]/sessions/SessionsDataTable.tsx
#	src/app/(main)/websites/[websiteId]/sessions/[sessionId]/SessionInfo.tsx
This commit is contained in:
Mike Cao 2025-05-01 03:31:51 -07:00
commit c0ccffeab4
21 changed files with 382 additions and 1281 deletions

1
.gitignore vendored
View file

@ -18,7 +18,6 @@ node_modules
/public/script.js
/geo
/dist
src/generated/prisma/
# misc
.DS_Store

View file

@ -0,0 +1,103 @@
-- add tag column
ALTER TABLE umami.website_event ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.session_data ADD COLUMN "distinct_id" String AFTER "data_type";
-- update materialized view
DROP TABLE umami.website_event_stats_hourly_mv;
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly
AS
SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
entry_url,
exit_url,
url_paths as url_path,
url_query,
utm_source,
utm_medium,
utm_campaign,
utm_content,
utm_term,
referrer_domain,
page_title,
gclid,
fbclid,
msclkid,
ttclid,
li_fat_id,
twclid,
event_type,
event_name,
views,
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
argMinState(url_path, created_at) entry_url,
argMaxState(url_path, created_at) exit_url,
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
arrayFilter(x -> x != '', groupArray(gclid)) gclid,
arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
arrayFilter(x -> x != '', groupArray(twclid)) twclid,
event_type,
if(event_type = 2, groupArray(event_name), []) event_name,
sumIf(1, event_type = 1) views,
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
event_type,
distinct_id,
timestamp);

View file

@ -38,6 +38,7 @@ CREATE TABLE umami.website_event
event_type UInt32,
event_name String,
tag String,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -75,6 +76,7 @@ CREATE TABLE umami.session_data
number_value Nullable(Decimal64(4)),
date_value Nullable(DateTime('UTC')),
data_type UInt32,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -120,6 +122,7 @@ CREATE TABLE umami.website_event_stats_hourly
min_time SimpleAggregateFunction(min, DateTime('UTC')),
max_time SimpleAggregateFunction(max, DateTime('UTC')),
tag SimpleAggregateFunction(groupArrayArray, Array(String)),
distinct_id,
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
@ -172,6 +175,7 @@ SELECT
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
@ -209,6 +213,7 @@ FROM (SELECT
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id String,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
@ -224,6 +229,7 @@ GROUP BY website_id,
region,
city,
event_type,
distinct_id,
timestamp);
-- projections

View file

@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE `session` ADD COLUMN `distinct_id` VARCHAR(50) NULL;
-- AlterTable
ALTER TABLE `session_data` ADD COLUMN `distinct_id` VARCHAR(50) NULL;

View file

@ -1,7 +1,5 @@
generator client {
provider = "prisma-client-js"
output = "../src/generated/prisma"
binaryTargets = ["native"]
}
datasource db {
@ -40,6 +38,7 @@ model Session {
country String? @db.Char(2)
region String? @db.Char(20)
city String? @db.VarChar(50)
distinctId String? @map("distinct_id") @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
websiteEvent WebsiteEvent[]
@ -166,6 +165,7 @@ model SessionData {
numberValue Decimal? @map("number_value") @db.Decimal(19, 4)
dateValue DateTime? @map("date_value") @db.Timestamp(0)
dataType Int @map("data_type") @db.UnsignedInt
distinctId String? @map("distinct_id") @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
website Website @relation(fields: [websiteId], references: [id])

View file

@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE "session" ADD COLUMN "distinct_id" VARCHAR(50);
-- AlterTable
ALTER TABLE "session_data" ADD COLUMN "distinct_id" VARCHAR(50);

View file

@ -1,7 +1,5 @@
generator client {
provider = "prisma-client-js"
output = "../src/generated/prisma"
binaryTargets = ["native"]
}
datasource db {
@ -40,6 +38,7 @@ model Session {
country String? @db.Char(2)
region String? @db.VarChar(20)
city String? @db.VarChar(50)
distinctId String? @map("distinct_id") @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
websiteEvent WebsiteEvent[]
@ -166,6 +165,7 @@ model SessionData {
numberValue Decimal? @map("number_value") @db.Decimal(19, 4)
dateValue DateTime? @map("date_value") @db.Timestamptz(6)
dataType Int @map("data_type") @db.Integer
distinctId String? @map("distinct_id") @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
website Website @relation(fields: [websiteId], references: [id])

1340
pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,48 @@
-----------------------------------------------------
-- postgreSQL
-----------------------------------------------------
UPDATE "website_event" we
SET fbclid = url.fbclid,
gclid = url.gclid,
li_fat_id = url.li_fat_id,
msclkid = url.msclkid,
ttclid = url.ttclid,
twclid = url.twclid,
utm_campaign = url.utm_campaign,
utm_content = url.utm_content,
utm_medium = url.utm_medium,
utm_source = url.utm_source,
utm_term = url.utm_term
FROM (SELECT event_id, website_id, session_id,
(regexp_matches(url_query, '(?:[&?]|^)fbclid=([^&]+)', 'i'))[1] AS fbclid,
(regexp_matches(url_query, '(?:[&?]|^)gclid=([^&]+)', 'i'))[1] AS gclid,
(regexp_matches(url_query, '(?:[&?]|^)li_fat_id=([^&]+)', 'i'))[1] AS li_fat_id,
(regexp_matches(url_query, '(?:[&?]|^)msclkid=([^&]+)', 'i'))[1] AS msclkid,
(regexp_matches(url_query, '(?:[&?]|^)ttclid=([^&]+)', 'i'))[1] AS ttclid,
(regexp_matches(url_query, '(?:[&?]|^)twclid=([^&]+)', 'i'))[1] AS twclid,
(regexp_matches(url_query, '(?:[&?]|^)utm_campaign=([^&]+)', 'i'))[1] AS utm_campaign,
(regexp_matches(url_query, '(?:[&?]|^)utm_content=([^&]+)', 'i'))[1] AS utm_content,
(regexp_matches(url_query, '(?:[&?]|^)utm_medium=([^&]+)', 'i'))[1] AS utm_medium,
(regexp_matches(url_query, '(?:[&?]|^)utm_source=([^&]+)', 'i'))[1] AS utm_source,
(regexp_matches(url_query, '(?:[&?]|^)utm_term=([^&]+)', 'i'))[1] AS utm_term
FROM "website_event") url
WHERE we.event_id = url.event_id
and we.session_id = url.session_id
and we.website_id = url.website_id;
-----------------------------------------------------
-- mySQL
-----------------------------------------------------
UPDATE `website_event`
SET fbclid = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)fbclid=[^&]+'), '=', -1), '&', 1),
gclid = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)gclid=[^&]+'), '=', -1), '&', 1),
li_fat_id = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)li_fat_id=[^&]+'), '=', -1), '&', 1),
msclkid = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)msclkid=[^&]+'), '=', -1), '&', 1),
ttclid = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)ttclid=[^&]+'), '=', -1), '&', 1),
twclid = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)twclid=[^&]+'), '=', -1), '&', 1),
utm_campaign = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_campaign=[^&]+'), '=', -1), '&', 1),
utm_content = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_content=[^&]+'), '=', -1), '&', 1),
utm_medium = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_medium=[^&]+'), '=', -1), '&', 1),
utm_source = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_source=[^&]+'), '=', -1), '&', 1),
utm_term = SUBSTRING_INDEX(SUBSTRING_INDEX(REGEXP_SUBSTR(url_query, '(?:[&?]|^)utm_term=[^&]+'), '=', -1), '&', 1)
WHERE 1 = 1;

View file

@ -14,7 +14,7 @@ export function SessionsDataTable({
const queryResult = useWebsiteSessionsQuery(websiteId);
return (
<DataGrid queryResult={queryResult} allowSearch={false} renderEmpty={() => children}>
<DataGrid queryResult={queryResult} allowSearch={true} renderEmpty={() => children}>
{({ data }) => <SessionsTable data={data} showDomain={!websiteId} />}
</DataGrid>
);

View file

@ -17,6 +17,11 @@ export function SessionInfo({ data }) {
<TextField value={data?.id} allowCopy />
</Box>
<Box>
<Label>{formatMessage(labels.distinctId)}</Label>
<Row>{data?.distinctId}</Row>
</Box>
<Box>
<Label>{formatMessage(labels.lastSeen)}</Label>
<Row>{formatTimezoneDate(data?.lastAt, 'PPPPpp')}</Row>

View file

@ -121,6 +121,7 @@ export async function POST(request: Request) {
country,
region,
city,
distinctId: id,
});
} catch (e: any) {
if (!e.message.toLowerCase().includes('unique constraint')) {
@ -144,7 +145,7 @@ export async function POST(request: Request) {
const base = hostname ? `https://${hostname}` : 'https://localhost';
const currentUrl = new URL(url, base);
let urlPath = currentUrl.pathname;
let urlPath = currentUrl.pathname === '/undefined' ? '' : currentUrl.pathname;
const urlQuery = currentUrl.search.substring(1);
const urlDomain = currentUrl.hostname.replace(/^www./, '');
@ -215,6 +216,7 @@ export async function POST(request: Request) {
region,
city,
tag,
distinctId: id,
createdAt,
});
}
@ -228,6 +230,7 @@ export async function POST(request: Request) {
websiteId,
sessionId,
sessionData: data,
distinctId: id,
createdAt,
});
}

View file

@ -132,6 +132,7 @@ export const labels = defineMessages({
all: { id: 'label.all', defaultMessage: 'All' },
session: { id: 'label.session', defaultMessage: 'Session' },
sessions: { id: 'label.sessions', defaultMessage: 'Sessions' },
distinctId: { id: 'label.distinct-id', defaultMessage: 'Distinct ID' },
pageNotFound: { id: 'message.page-not-found', defaultMessage: 'Page not found' },
activity: { id: 'label.activity', defaultMessage: 'Activity' },
dismiss: { id: 'label.dismiss', defaultMessage: 'Dismiss' },

View file

@ -33,17 +33,7 @@ export const FILTER_REFERRERS = 'filter-referrers';
export const FILTER_PAGES = 'filter-pages';
export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute'];
export const EVENT_COLUMNS = [
'url',
'entry',
'exit',
'referrer',
'title',
'query',
'event',
'tag',
'region',
];
export const EVENT_COLUMNS = ['url', 'entry', 'exit', 'referrer', 'title', 'query', 'event', 'tag'];
export const SESSION_COLUMNS = [
'browser',
@ -53,6 +43,7 @@ export const SESSION_COLUMNS = [
'language',
'country',
'city',
'region',
'host',
];

View file

@ -39,6 +39,7 @@ export async function saveEvent(args: {
region?: string;
city?: string;
tag?: string;
distinctId?: string;
createdAt?: Date;
}) {
return runQuery({
@ -182,6 +183,7 @@ async function clickhouseQuery(data: {
region?: string;
city?: string;
tag?: string;
distinctId?: string;
createdAt?: Date;
}) {
const {
@ -211,6 +213,7 @@ async function clickhouseQuery(data: {
region,
city,
tag,
distinctId,
createdAt,
...args
} = data;
@ -247,6 +250,7 @@ async function clickhouseQuery(data: {
event_type: eventName ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView,
event_name: eventName ? eventName?.substring(0, EVENT_NAME_LENGTH) : null,
tag: tag,
distinct_id: distinctId,
created_at: getUTCString(createdAt),
};

View file

@ -2,7 +2,19 @@ import { Prisma } from '@prisma/client';
import prisma from '@/lib/prisma';
export async function createSession(data: Prisma.SessionCreateInput) {
const { id, websiteId, browser, os, device, screen, language, country, region, city } = data;
const {
id,
websiteId,
browser,
os,
device,
screen,
language,
country,
region,
city,
distinctId,
} = data;
return prisma.client.session.create({
data: {
@ -16,6 +28,7 @@ export async function createSession(data: Prisma.SessionCreateInput) {
country,
region,
city,
distinctId,
},
});
}

View file

@ -15,6 +15,7 @@ async function relationalQuery(websiteId: string, sessionId: string) {
return rawQuery(
`
select id,
distinct_id as "distinctId",
website_id as "websiteId",
hostname,
browser,
@ -33,6 +34,7 @@ async function relationalQuery(websiteId: string, sessionId: string) {
sum(${getTimestampDiffSQL('min_time', 'max_time')}) as "totaltime"
from (select
session.session_id as id,
session.distinct_id,
website_event.visit_id,
session.website_id,
website_event.hostname,
@ -52,8 +54,8 @@ async function relationalQuery(websiteId: string, sessionId: string) {
join website_event on website_event.session_id = session.session_id
where session.website_id = {{websiteId::uuid}}
and session.session_id = {{sessionId::uuid}}
group by session.session_id, visit_id, session.website_id, website_event.hostname, session.browser, session.os, session.device, session.screen, session.language, session.country, session.region, session.city) t
group by id, website_id, hostname, browser, os, device, screen, language, country, region, city;
group by session.session_id, session.distinct_id, visit_id, session.website_id, website_event.hostname, session.browser, session.os, session.device, session.screen, session.language, session.country, session.region, session.city) t
group by id, distinct_id, website_id, hostname, browser, os, device, screen, language, country, region, city;
`,
{ websiteId, sessionId },
).then(result => result?.[0]);
@ -66,6 +68,7 @@ async function clickhouseQuery(websiteId: string, sessionId: string) {
`
select id,
websiteId,
distinctId,
hostname,
browser,
os,
@ -83,6 +86,7 @@ async function clickhouseQuery(websiteId: string, sessionId: string) {
sum(max_time-min_time) as totaltime
from (select
session_id as id,
distinct_id as distinctId,
visit_id,
website_id as websiteId,
hostname,
@ -101,8 +105,8 @@ async function clickhouseQuery(websiteId: string, sessionId: string) {
from website_event_stats_hourly
where website_id = {websiteId:UUID}
and session_id = {sessionId:UUID}
group by session_id, visit_id, website_id, hostname, browser, os, device, screen, language, country, region, city) t
group by id, websiteId, hostname, browser, os, device, screen, language, country, region, city;
group by session_id, distinct_id, visit_id, website_id, hostname, browser, os, device, screen, language, country, region, city) t
group by id, websiteId, distinctId, hostname, browser, os, device, screen, language, country, region, city;
`,
{ websiteId, sessionId },
).then(result => result?.[0]);

View file

@ -1,5 +1,5 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import { CLICKHOUSE, getDatabaseType, POSTGRESQL, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { PageParams, QueryFilters } from '@/lib/types';
@ -14,10 +14,14 @@ export async function getWebsiteSessions(
async function relationalQuery(websiteId: string, filters: QueryFilters, pageParams: PageParams) {
const { pagedRawQuery, parseFilters } = prisma;
const { search } = pageParams;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
});
const db = getDatabaseType();
const like = db === POSTGRESQL ? 'ilike' : 'like';
return pagedRawQuery(
`
with sessions as (
@ -43,6 +47,15 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
${filterQuery}
${
search
? `and (distinct_id ${like} {{search}}
or city ${like} {{search}}
or browser ${like} {{search}}
or os ${like} {{search}}
or device ${like} {{search}})`
: ''
}
group by session.session_id,
session.website_id,
website_event.hostname,
@ -58,7 +71,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
limit 1000)
select * from sessions
`,
params,
{ ...params, search: `%${search}%` },
pageParams,
);
}
@ -66,6 +79,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
async function clickhouseQuery(websiteId: string, filters: QueryFilters, pageParams?: PageParams) {
const { pagedQuery, parseFilters, getDateStringSQL } = clickhouse;
const { params, dateQuery, filterQuery } = await parseFilters(websiteId, filters);
const { search } = pageParams;
return pagedQuery(
`
@ -91,12 +105,21 @@ async function clickhouseQuery(websiteId: string, filters: QueryFilters, pagePar
where website_id = {websiteId:UUID}
${dateQuery}
${filterQuery}
${
search
? `and ((positionCaseInsensitive(distinct_id, {search:String}) > 0)
or (positionCaseInsensitive(city, {search:String}) > 0)
or (positionCaseInsensitive(browser, {search:String}) > 0)
or (positionCaseInsensitive(os, {search:String}) > 0)
or (positionCaseInsensitive(device, {search:String}) > 0))`
: ''
}
group by session_id, website_id, hostname, browser, os, device, screen, language, country, region, city
order by lastAt desc
limit 1000)
select * from sessions
`,
params,
{ ...params, search },
pageParams,
);
}

View file

@ -11,6 +11,7 @@ export async function saveSessionData(data: {
websiteId: string;
sessionId: string;
sessionData: DynamicData;
distinctId?: string;
createdAt?: Date;
}) {
return runQuery({
@ -23,10 +24,11 @@ export async function relationalQuery(data: {
websiteId: string;
sessionId: string;
sessionData: DynamicData;
distinctId?: string;
createdAt?: Date;
}) {
const { client } = prisma;
const { websiteId, sessionId, sessionData, createdAt } = data;
const { websiteId, sessionId, sessionData, distinctId, createdAt } = data;
const jsonKeys = flattenJSON(sessionData);
@ -39,6 +41,7 @@ export async function relationalQuery(data: {
numberValue: a.dataType === DATA_TYPE.number ? a.value : null,
dateValue: a.dataType === DATA_TYPE.date ? new Date(a.value) : null,
dataType: a.dataType,
distinctId,
createdAt,
}));
@ -80,9 +83,10 @@ async function clickhouseQuery(data: {
websiteId: string;
sessionId: string;
sessionData: DynamicData;
distinctId?: string;
createdAt?: Date;
}) {
const { websiteId, sessionId, sessionData, createdAt } = data;
const { websiteId, sessionId, sessionData, distinctId, createdAt } = data;
const { insert, getUTCString } = clickhouse;
const { sendMessage } = kafka;
@ -98,6 +102,7 @@ async function clickhouseQuery(data: {
string_value: getStringValue(value, dataType),
number_value: dataType === DATA_TYPE.number ? value : null,
date_value: dataType === DATA_TYPE.date ? getUTCString(value) : null,
distinct_id: distinctId,
created_at: getUTCString(createdAt),
};
});