distinct_id schema changes and search on sessions page

This commit is contained in:
Francis Cao 2025-04-29 08:57:58 -07:00
parent 9d3c926d69
commit c5efc27c07
11 changed files with 176 additions and 39 deletions

View file

@ -0,0 +1,103 @@
-- add tag column
ALTER TABLE umami.website_event ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.website_event_stats_hourly ADD COLUMN "distinct_id" String AFTER "tag";
ALTER TABLE umami.session_data ADD COLUMN "distinct_id" String AFTER "data_type";
-- update materialized view
DROP TABLE umami.website_event_stats_hourly_mv;
CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv
TO umami.website_event_stats_hourly
AS
SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
entry_url,
exit_url,
url_paths as url_path,
url_query,
utm_source,
utm_medium,
utm_campaign,
utm_content,
utm_term,
referrer_domain,
page_title,
gclid,
fbclid,
msclkid,
ttclid,
li_fat_id,
twclid,
event_type,
event_name,
views,
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
argMinState(url_path, created_at) entry_url,
argMaxState(url_path, created_at) exit_url,
arrayFilter(x -> x != '', groupArray(url_path)) as url_paths,
arrayFilter(x -> x != '', groupArray(url_query)) url_query,
arrayFilter(x -> x != '', groupArray(utm_source)) utm_source,
arrayFilter(x -> x != '', groupArray(utm_medium)) utm_medium,
arrayFilter(x -> x != '', groupArray(utm_campaign)) utm_campaign,
arrayFilter(x -> x != '', groupArray(utm_content)) utm_content,
arrayFilter(x -> x != '', groupArray(utm_term)) utm_term,
arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain,
arrayFilter(x -> x != '', groupArray(page_title)) page_title,
arrayFilter(x -> x != '', groupArray(gclid)) gclid,
arrayFilter(x -> x != '', groupArray(fbclid)) fbclid,
arrayFilter(x -> x != '', groupArray(msclkid)) msclkid,
arrayFilter(x -> x != '', groupArray(ttclid)) ttclid,
arrayFilter(x -> x != '', groupArray(li_fat_id)) li_fat_id,
arrayFilter(x -> x != '', groupArray(twclid)) twclid,
event_type,
if(event_type = 2, groupArray(event_name), []) event_name,
sumIf(1, event_type = 1) views,
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
session_id,
visit_id,
hostname,
browser,
os,
device,
screen,
language,
country,
region,
city,
event_type,
distinct_id,
timestamp);

View file

@ -38,6 +38,7 @@ CREATE TABLE umami.website_event
event_type UInt32,
event_name String,
tag String,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -75,6 +76,7 @@ CREATE TABLE umami.session_data
number_value Nullable(Decimal64(4)),
date_value Nullable(DateTime('UTC')),
data_type UInt32,
distinct_id String,
created_at DateTime('UTC'),
job_id Nullable(UUID)
)
@ -120,6 +122,7 @@ CREATE TABLE umami.website_event_stats_hourly
min_time SimpleAggregateFunction(min, DateTime('UTC')),
max_time SimpleAggregateFunction(max, DateTime('UTC')),
tag SimpleAggregateFunction(groupArrayArray, Array(String)),
distinct_id,
created_at Datetime('UTC')
)
ENGINE = AggregatingMergeTree
@ -172,6 +175,7 @@ SELECT
min_time,
max_time,
tag,
distinct_id,
timestamp as created_at
FROM (SELECT
website_id,
@ -209,6 +213,7 @@ FROM (SELECT
min(created_at) min_time,
max(created_at) max_time,
arrayFilter(x -> x != '', groupArray(tag)) tag,
distinct_id String,
toStartOfHour(created_at) timestamp
FROM umami.website_event
GROUP BY website_id,
@ -224,6 +229,7 @@ GROUP BY website_id,
region,
city,
event_type,
distinct_id,
timestamp);
-- projections