distinct_id schema changes and search on sessions page

This commit is contained in:
Francis Cao 2025-04-29 08:57:58 -07:00
parent 9d3c926d69
commit c5efc27c07
11 changed files with 176 additions and 39 deletions

View file

@ -0,0 +1,103 @@
-- add tag column
ALTER TABLE umami.website_event ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.session_data ADD COLUMN "distinct_id" String AFTER "data_type";
-- update materialized view
DROP TABLE umami.website_event_stats_hourly_mv;
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly
AS
SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
entry_url,
exit_url,
url_paths as url_path,
url_query,
utm_source,
utm_medium,
utm_campaign,
utm_content,
utm_term,
referrer_domain,
page_title,
gclid,
fbclid,
msclkid,
ttclid,
li_fat_id,
twclid,
event_type,
event_name,
views,
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
argMinState(url_path, created_at) entry_url,
argMaxState(url_path, created_at) exit_url,
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
arrayFilter(x -> x != '', groupArray(gclid)) gclid,
arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
arrayFilter(x -> x != '', groupArray(twclid)) twclid,
event_type,
if(event_type = 2, groupArray(event_name), []) event_name,
sumIf(1, event_type = 1) views,
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
event_type,
distinct_id,
timestamp);

View file

@ -38,6 +38,7 @@ CREATE TABLE umami.website_event
event_type UInt32,
event_name String,
tag String,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -75,6 +76,7 @@ CREATE TABLE umami.session_data
number_value Nullable(Decimal64(4)),
date_value Nullable(DateTime('UTC')),
data_type UInt32,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -120,6 +122,7 @@ CREATE TABLE umami.website_event_stats_hourly
min_time SimpleAggregateFunction(min, DateTime('UTC')),
max_time SimpleAggregateFunction(max, DateTime('UTC')),
tag SimpleAggregateFunction(groupArrayArray, Array(String)),
distinct_id,
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
@ -172,6 +175,7 @@ SELECT
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
@ -209,6 +213,7 @@ FROM (SELECT
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id String,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
@ -224,6 +229,7 @@ GROUP BY website_id,
region,
city,
event_type,
distinct_id,
timestamp);
-- projections

View file

@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE `session` ADD COLUMN `distinct_id` VARCHAR(50) NULL;
-- AlterTable
ALTER TABLE `session_data` ADD COLUMN `distinct_id` VARCHAR(50) NULL;

View file

@ -1,3 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "mysql"
provider = "mysql"

View file

@ -30,17 +30,18 @@ model User {
}
model Session {
id String @id @unique @map("session_id") @db.VarChar(36)
websiteId String @map("website_id") @db.VarChar(36)
browser String? @db.VarChar(20)
os String? @db.VarChar(20)
device String? @db.VarChar(20)
screen String? @db.VarChar(11)
language String? @db.VarChar(35)
country String? @db.Char(2)
region String? @db.Char(20)
city String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
id String @id @unique @map("session_id") @db.VarChar(36)
websiteId String @map("website_id") @db.VarChar(36)
browser String? @db.VarChar(20)
os String? @db.VarChar(20)
device String? @db.VarChar(20)
screen String? @db.VarChar(11)
language String? @db.VarChar(35)
country String? @db.Char(2)
region String? @db.Char(20)
city String? @db.VarChar(50)
distinct_id String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
websiteEvent WebsiteEvent[]
sessionData SessionData[]
@ -166,6 +167,7 @@ model SessionData {
numberValue Decimal? @map("number_value") @db.Decimal(19, 4)
dateValue DateTime? @map("date_value") @db.Timestamp(0)
dataType Int @map("data_type") @db.UnsignedInt
distinct_id String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamp(0)
website Website @relation(fields: [websiteId], references: [id])

View file

@ -0,0 +1,5 @@
-- AlterTable
ALTER TABLE "session" ADD COLUMN "distinct_id" VARCHAR(50);
-- AlterTable
ALTER TABLE "session_data" ADD COLUMN "distinct_id" VARCHAR(50);

View file

@ -1,3 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "postgresql"
provider = "postgresql"

View file

@ -30,17 +30,18 @@ model User {
}
model Session {
id String @id @unique @map("session_id") @db.Uuid
websiteId String @map("website_id") @db.Uuid
browser String? @db.VarChar(20)
os String? @db.VarChar(20)
device String? @db.VarChar(20)
screen String? @db.VarChar(11)
language String? @db.VarChar(35)
country String? @db.Char(2)
region String? @db.VarChar(20)
city String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
id String @id @unique @map("session_id") @db.Uuid
websiteId String @map("website_id") @db.Uuid
browser String? @db.VarChar(20)
os String? @db.VarChar(20)
device String? @db.VarChar(20)
screen String? @db.VarChar(11)
language String? @db.VarChar(35)
country String? @db.Char(2)
region String? @db.VarChar(20)
city String? @db.VarChar(50)
distinct_id String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
websiteEvent WebsiteEvent[]
sessionData SessionData[]
@ -166,6 +167,7 @@ model SessionData {
numberValue Decimal? @map("number_value") @db.Decimal(19, 4)
dateValue DateTime? @map("date_value") @db.Timestamptz(6)
dataType Int @map("data_type") @db.Integer
distinct_id String? @db.VarChar(50)
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
website Website @relation(fields: [websiteId], references: [id])

View file

@ -14,7 +14,7 @@ export default function SessionsDataTable({
const queryResult = useWebsiteSessions(websiteId);
return (
<DataTable queryResult={queryResult} allowSearch={false} renderEmpty={() => children}>
<DataTable queryResult={queryResult} allowSearch={true} renderEmpty={() => children}>
{({ data }) => <SessionsTable data={data} showDomain={!websiteId} />}
</DataTable>
);

View file

@ -33,17 +33,7 @@ export const FILTER_REFERRERS = 'filter-referrers';
export const FILTER_PAGES = 'filter-pages';
export const UNIT_TYPES = ['year', 'month', 'hour', 'day', 'minute'];
export const EVENT_COLUMNS = [
'url',
'entry',
'exit',
'referrer',
'title',
'query',
'event',
'tag',
'region',
];
export const EVENT_COLUMNS = ['url', 'entry', 'exit', 'referrer', 'title', 'query', 'event', 'tag'];
export const SESSION_COLUMNS = [
'browser',
@ -53,6 +43,7 @@ export const SESSION_COLUMNS = [
'language',
'country',
'city',
'region',
'host',
];

View file

@ -1,5 +1,5 @@
import clickhouse from '@/lib/clickhouse';
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
import { CLICKHOUSE, getDatabaseType, POSTGRESQL, PRISMA, runQuery } from '@/lib/db';
import prisma from '@/lib/prisma';
import { PageParams, QueryFilters } from '@/lib/types';
@ -14,10 +14,14 @@ export async function getWebsiteSessions(
async function relationalQuery(websiteId: string, filters: QueryFilters, pageParams: PageParams) {
const { pagedRawQuery, parseFilters } = prisma;
const { search } = pageParams;
const { filterQuery, params } = await parseFilters(websiteId, {
...filters,
});
const db = getDatabaseType();
const like = db === POSTGRESQL ? 'ilike' : 'like';
return pagedRawQuery(
`
with sessions as (
@ -43,6 +47,15 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
where website_event.website_id = {{websiteId::uuid}}
and website_event.created_at between {{startDate}} and {{endDate}}
${filterQuery}
${
search
? `and (distinct_id ${like} {{search}}
or city ${like} {{search}}
or browser ${like} {{search}}
or os ${like} {{search}}
or device ${like} {{search}})`
: ''
}
group by session.session_id,
session.website_id,
website_event.hostname,
@ -58,7 +71,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
limit 1000)
select * from sessions
`,
params,
{ ...params, search: `%${search}%` },
pageParams,
);
}
@ -66,6 +79,7 @@ async function relationalQuery(websiteId: string, filters: QueryFilters, pagePar
async function clickhouseQuery(websiteId: string, filters: QueryFilters, pageParams?: PageParams) {
const { pagedQuery, parseFilters, getDateStringSQL } = clickhouse;
const { params, dateQuery, filterQuery } = await parseFilters(websiteId, filters);
const { search } = pageParams;
return pagedQuery(
`
@ -91,12 +105,21 @@ async function clickhouseQuery(websiteId: string, filters: QueryFilters, pagePar
where website_id = {websiteId:UUID}
${dateQuery}
${filterQuery}
${
search
? `and ((positionCaseInsensitive(distinct_id, {search:String}) > 0)
or (positionCaseInsensitive(city, {search:String}) > 0)
or (positionCaseInsensitive(browser, {search:String}) > 0)
or (positionCaseInsensitive(os, {search:String}) > 0)
or (positionCaseInsensitive(device, {search:String}) > 0))`
: ''
}
group by session_id, website_id, hostname, browser, os, device, screen, language, country, region, city
order by lastAt desc
limit 1000)
select * from sessions
`,
params,
{ ...params, search },
pageParams,
);
}