mirror of
https://github.com/umami-software/umami.git
synced 2026-02-04 04:37:11 +01:00
Merge ecf0b7bef4 into 7e42b5b35e
This commit is contained in:
commit
0e0a0795b7
11 changed files with 306 additions and 39 deletions
|
|
@ -39,6 +39,7 @@ CREATE TABLE umami.website_event
|
|||
event_name String,
|
||||
tag String,
|
||||
distinct_id String,
|
||||
visitor_id String,
|
||||
created_at DateTime('UTC'),
|
||||
job_id Nullable(UUID)
|
||||
)
|
||||
|
|
@ -123,6 +124,7 @@ CREATE TABLE umami.website_event_stats_hourly
|
|||
max_time SimpleAggregateFunction(max, DateTime('UTC')),
|
||||
tag SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
distinct_id String,
|
||||
visitor_id String,
|
||||
created_at Datetime('UTC')
|
||||
)
|
||||
ENGINE = AggregatingMergeTree
|
||||
|
|
@ -176,6 +178,7 @@ SELECT
|
|||
max_time,
|
||||
tag,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
timestamp as created_at
|
||||
FROM (SELECT
|
||||
website_id,
|
||||
|
|
@ -214,6 +217,7 @@ FROM (SELECT
|
|||
max(created_at) max_time,
|
||||
arrayFilter(x -> x != '', groupArray(tag)) tag,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
toStartOfHour(created_at) timestamp
|
||||
FROM umami.website_event
|
||||
GROUP BY website_id,
|
||||
|
|
@ -230,6 +234,7 @@ GROUP BY website_id,
|
|||
city,
|
||||
event_type,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
timestamp);
|
||||
|
||||
-- projections
|
||||
|
|
@ -281,3 +286,16 @@ JOIN (SELECT event_id, string_value as currency
|
|||
WHERE positionCaseInsensitive(data_key, 'currency') > 0) c
|
||||
ON c.event_id = ed.event_id
|
||||
WHERE positionCaseInsensitive(data_key, 'revenue') > 0;
|
||||
|
||||
-- identity linking
|
||||
CREATE TABLE umami.identity_link
|
||||
(
|
||||
website_id UUID,
|
||||
visitor_id String,
|
||||
distinct_id String,
|
||||
created_at DateTime('UTC'),
|
||||
linked_at DateTime('UTC')
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(linked_at)
|
||||
ORDER BY (website_id, visitor_id, distinct_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ model Session {
|
|||
region String? @db.VarChar(20)
|
||||
city String? @db.VarChar(50)
|
||||
distinctId String? @map("distinct_id") @db.VarChar(50)
|
||||
visitorId String? @map("visitor_id") @db.VarChar(50)
|
||||
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
|
||||
|
||||
websiteEvents WebsiteEvent[]
|
||||
|
|
@ -60,6 +61,7 @@ model Session {
|
|||
@@index([websiteId, createdAt, country])
|
||||
@@index([websiteId, createdAt, region])
|
||||
@@index([websiteId, createdAt, city])
|
||||
@@index([websiteId, visitorId])
|
||||
@@map("session")
|
||||
}
|
||||
|
||||
|
|
@ -84,6 +86,7 @@ model Website {
|
|||
revenue Revenue[]
|
||||
segments Segment[]
|
||||
sessionData SessionData[]
|
||||
identityLinks IdentityLink[]
|
||||
|
||||
@@index([userId])
|
||||
@@index([teamId])
|
||||
|
|
@ -316,3 +319,19 @@ model Pixel {
|
|||
@@index([createdAt])
|
||||
@@map("pixel")
|
||||
}
|
||||
|
||||
model IdentityLink {
|
||||
id String @id @unique @map("identity_link_id") @db.Uuid
|
||||
websiteId String @map("website_id") @db.Uuid
|
||||
visitorId String @map("visitor_id") @db.VarChar(50)
|
||||
distinctId String @map("distinct_id") @db.VarChar(50)
|
||||
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(6)
|
||||
linkedAt DateTime @default(now()) @updatedAt @map("linked_at") @db.Timestamptz(6)
|
||||
|
||||
website Website @relation(fields: [websiteId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@unique([websiteId, visitorId, distinctId])
|
||||
@@index([websiteId, distinctId])
|
||||
@@index([websiteId, visitorId])
|
||||
@@map("identity_link")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import { secret, uuid, hash } from '@/lib/crypto';
|
|||
import { COLLECTION_TYPE, EVENT_TYPE } from '@/lib/constants';
|
||||
import { anyObjectParam, urlOrPathParam } from '@/lib/schema';
|
||||
import { safeDecodeURI, safeDecodeURIComponent } from '@/lib/url';
|
||||
import { createSession, saveEvent, saveSessionData } from '@/queries/sql';
|
||||
import { createSession, saveEvent, saveSessionData, createIdentityLink } from '@/queries/sql';
|
||||
import { serializeError } from 'serialize-error';
|
||||
|
||||
interface Cache {
|
||||
|
|
@ -41,6 +41,7 @@ const schema = z.object({
|
|||
userAgent: z.string().optional(),
|
||||
timestamp: z.coerce.number().int().optional(),
|
||||
id: z.string().optional(),
|
||||
vid: z.string().max(50).optional(),
|
||||
})
|
||||
.refine(
|
||||
data => {
|
||||
|
|
@ -80,6 +81,7 @@ export async function POST(request: Request) {
|
|||
tag,
|
||||
timestamp,
|
||||
id,
|
||||
vid: visitorId,
|
||||
} = payload;
|
||||
|
||||
const sourceId = websiteId || pixelId || linkId;
|
||||
|
|
@ -146,6 +148,7 @@ export async function POST(request: Request) {
|
|||
region,
|
||||
city,
|
||||
distinctId: id,
|
||||
visitorId,
|
||||
createdAt,
|
||||
});
|
||||
}
|
||||
|
|
@ -226,6 +229,7 @@ export async function POST(request: Request) {
|
|||
|
||||
// Session
|
||||
distinctId: id,
|
||||
visitorId,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
|
|
@ -265,6 +269,19 @@ export async function POST(request: Request) {
|
|||
createdAt,
|
||||
});
|
||||
}
|
||||
|
||||
// Create identity link when both visitorId and distinctId are present
|
||||
// Fire-and-forget to avoid adding latency to the tracking endpoint
|
||||
if (visitorId && id && websiteId) {
|
||||
createIdentityLink({
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId: id,
|
||||
}).catch(e => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('Failed to create identity link:', e);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const token = createToken({ websiteId, sessionId, visitId, iat }, secret());
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ export interface SaveEventArgs {
|
|||
|
||||
// Session
|
||||
distinctId?: string;
|
||||
visitorId?: string;
|
||||
browser?: string;
|
||||
os?: string;
|
||||
device?: string;
|
||||
|
|
@ -164,6 +165,7 @@ async function clickhouseQuery({
|
|||
referrerQuery,
|
||||
referrerDomain,
|
||||
distinctId,
|
||||
visitorId,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
|
|
@ -220,6 +222,7 @@ async function clickhouseQuery({
|
|||
event_name: eventName ? eventName?.substring(0, EVENT_NAME_LENGTH) : null,
|
||||
tag: tag,
|
||||
distinct_id: distinctId,
|
||||
visitor_id: visitorId,
|
||||
created_at: getUTCString(createdAt),
|
||||
browser,
|
||||
os,
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ async function relationalQuery(
|
|||
`
|
||||
select
|
||||
cast(coalesce(sum(t.c), 0) as bigint) as "pageviews",
|
||||
count(distinct t.session_id) as "visitors",
|
||||
count(distinct coalesce(t.resolved_identity, t.visitor_id, t.session_id::text)) as "visitors",
|
||||
count(distinct t.visit_id) as "visits",
|
||||
coalesce(sum(case when t.c = 1 then 1 else 0 end), 0) as "bounces",
|
||||
cast(coalesce(sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}), 0) as bigint) as "totaltime"
|
||||
|
|
@ -45,17 +45,23 @@ async function relationalQuery(
|
|||
select
|
||||
website_event.session_id,
|
||||
website_event.visit_id,
|
||||
session.visitor_id,
|
||||
il.distinct_id as "resolved_identity",
|
||||
count(*) as "c",
|
||||
min(website_event.created_at) as "min_time",
|
||||
max(website_event.created_at) as "max_time"
|
||||
from website_event
|
||||
${cohortQuery}
|
||||
${joinSessionQuery}
|
||||
left join session on session.session_id = website_event.session_id
|
||||
and session.website_id = website_event.website_id
|
||||
left join identity_link il on il.visitor_id = session.visitor_id
|
||||
and il.website_id = session.website_id
|
||||
where website_event.website_id = {{websiteId::uuid}}
|
||||
and website_event.created_at between {{startDate}} and {{endDate}}
|
||||
and website_event.event_type != 2
|
||||
${filterQuery}
|
||||
group by 1, 2
|
||||
group by 1, 2, 3, 4
|
||||
) as t
|
||||
`,
|
||||
queryParams,
|
||||
|
|
@ -79,47 +85,55 @@ async function clickhouseQuery(
|
|||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(t.session_id) as "visitors",
|
||||
uniq(coalesce(t.resolved_identity, t.visitor_id, toString(t.session_id))) as "visitors",
|
||||
uniq(t.visit_id) as "visits",
|
||||
sum(if(t.c = 1, 1, 0)) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
from (
|
||||
select
|
||||
session_id,
|
||||
visit_id,
|
||||
we.session_id,
|
||||
we.visit_id,
|
||||
we.visitor_id,
|
||||
il.distinct_id as resolved_identity,
|
||||
count(*) c,
|
||||
min(created_at) min_time,
|
||||
max(created_at) max_time
|
||||
from website_event
|
||||
min(we.created_at) min_time,
|
||||
max(we.created_at) max_time
|
||||
from website_event we
|
||||
${cohortQuery}
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type != 2
|
||||
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and we.event_type != 2
|
||||
${filterQuery}
|
||||
group by session_id, visit_id
|
||||
group by we.session_id, we.visit_id, we.visitor_id, il.distinct_id
|
||||
) as t;
|
||||
`;
|
||||
} else {
|
||||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(session_id) as "visitors",
|
||||
uniq(coalesce(resolved_identity, visitor_id, toString(session_id))) as "visitors",
|
||||
uniq(visit_id) as "visits",
|
||||
sumIf(1, t.c = 1) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
from (select
|
||||
session_id,
|
||||
visit_id,
|
||||
sum(views) c,
|
||||
min(min_time) min_time,
|
||||
max(max_time) max_time
|
||||
from website_event_stats_hourly "website_event"
|
||||
we.session_id,
|
||||
we.visit_id,
|
||||
we.visitor_id,
|
||||
il.distinct_id as resolved_identity,
|
||||
sum(we.views) c,
|
||||
min(we.min_time) min_time,
|
||||
max(we.max_time) max_time
|
||||
from website_event_stats_hourly we
|
||||
${cohortQuery}
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type != 2
|
||||
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and we.event_type != 2
|
||||
${filterQuery}
|
||||
group by session_id, visit_id
|
||||
group by we.session_id, we.visit_id, we.visitor_id, il.distinct_id
|
||||
) as t;
|
||||
`;
|
||||
}
|
||||
|
|
|
|||
76
src/queries/sql/identity/createIdentityLink.ts
Normal file
76
src/queries/sql/identity/createIdentityLink.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Identity Stitching - Links anonymous browser sessions to authenticated user identities
|
||||
*
|
||||
* Design decisions:
|
||||
* - One visitor can link to multiple distinct_ids (user logs into different accounts)
|
||||
* - One distinct_id can link to multiple visitors (user on multiple devices/browsers)
|
||||
* - Links are additive and never invalidated (preserves historical journey)
|
||||
* - Uses ReplacingMergeTree in ClickHouse with linked_at for deduplication
|
||||
* - Upsert pattern ensures idempotency for repeated identify() calls
|
||||
*
|
||||
* Edge cases handled:
|
||||
* - Safari private browsing: visitorId will be undefined, no link created
|
||||
* - localStorage cleared: new visitorId generated, creates new link
|
||||
* - Multiple tabs: same visitorId shared via localStorage
|
||||
*/
|
||||
import { uuid } from '@/lib/crypto';
|
||||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
import kafka from '@/lib/kafka';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||
|
||||
export interface CreateIdentityLinkArgs {
|
||||
websiteId: string;
|
||||
visitorId: string;
|
||||
distinctId: string;
|
||||
}
|
||||
|
||||
export async function createIdentityLink(data: CreateIdentityLinkArgs) {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(data),
|
||||
[CLICKHOUSE]: () => clickhouseQuery(data),
|
||||
});
|
||||
}
|
||||
|
||||
async function relationalQuery({ websiteId, visitorId, distinctId }: CreateIdentityLinkArgs) {
|
||||
const { client } = prisma;
|
||||
|
||||
return client.identityLink.upsert({
|
||||
where: {
|
||||
websiteId_visitorId_distinctId: {
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId,
|
||||
},
|
||||
},
|
||||
update: {
|
||||
linkedAt: new Date(),
|
||||
},
|
||||
create: {
|
||||
id: uuid(),
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function clickhouseQuery({ websiteId, visitorId, distinctId }: CreateIdentityLinkArgs) {
|
||||
const { insert, getUTCString } = clickhouse;
|
||||
const { sendMessage } = kafka;
|
||||
|
||||
const now = getUTCString(new Date());
|
||||
const message = {
|
||||
website_id: websiteId,
|
||||
visitor_id: visitorId,
|
||||
distinct_id: distinctId,
|
||||
created_at: now,
|
||||
linked_at: now,
|
||||
};
|
||||
|
||||
if (kafka.enabled) {
|
||||
await sendMessage('identity_link', message);
|
||||
} else {
|
||||
await insert('identity_link', [message]);
|
||||
}
|
||||
}
|
||||
71
src/queries/sql/identity/getLinkedVisitorIds.ts
Normal file
71
src/queries/sql/identity/getLinkedVisitorIds.ts
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* Resolves all visitor IDs linked to a given distinct_id (authenticated user)
|
||||
*
|
||||
* Use cases (for future implementation):
|
||||
* - User journey reports: aggregate sessions across devices
|
||||
* - Cohort analysis: include all linked sessions
|
||||
* - Retroactive attribution: credit conversions to original anonymous session
|
||||
*
|
||||
* Note: Uses FINAL keyword in ClickHouse to ensure deduplication from ReplacingMergeTree
|
||||
*/
|
||||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||
|
||||
export interface GetLinkedVisitorIdsArgs {
|
||||
websiteId: string;
|
||||
distinctId: string;
|
||||
}
|
||||
|
||||
export interface LinkedVisitorId {
|
||||
visitorId: string;
|
||||
linkedAt: Date;
|
||||
}
|
||||
|
||||
export async function getLinkedVisitorIds(
|
||||
data: GetLinkedVisitorIdsArgs,
|
||||
): Promise<LinkedVisitorId[]> {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(data),
|
||||
[CLICKHOUSE]: () => clickhouseQuery(data),
|
||||
});
|
||||
}
|
||||
|
||||
async function relationalQuery({
|
||||
websiteId,
|
||||
distinctId,
|
||||
}: GetLinkedVisitorIdsArgs): Promise<LinkedVisitorId[]> {
|
||||
const { client } = prisma;
|
||||
|
||||
const links = await client.identityLink.findMany({
|
||||
where: {
|
||||
websiteId,
|
||||
distinctId,
|
||||
},
|
||||
select: {
|
||||
visitorId: true,
|
||||
linkedAt: true,
|
||||
},
|
||||
});
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
async function clickhouseQuery({
|
||||
websiteId,
|
||||
distinctId,
|
||||
}: GetLinkedVisitorIdsArgs): Promise<LinkedVisitorId[]> {
|
||||
const { rawQuery } = clickhouse;
|
||||
|
||||
return rawQuery<LinkedVisitorId[]>(
|
||||
`
|
||||
select
|
||||
visitor_id as visitorId,
|
||||
linked_at as linkedAt
|
||||
from identity_link final
|
||||
where website_id = {websiteId:UUID}
|
||||
and distinct_id = {distinctId:String}
|
||||
`,
|
||||
{ websiteId, distinctId },
|
||||
);
|
||||
}
|
||||
2
src/queries/sql/identity/index.ts
Normal file
2
src/queries/sql/identity/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export * from './createIdentityLink';
|
||||
export * from './getLinkedVisitorIds';
|
||||
|
|
@ -39,3 +39,5 @@ export * from './getValues';
|
|||
export * from './getWebsiteDateRange';
|
||||
export * from './getWebsiteStats';
|
||||
export * from './getWeeklyTraffic';
|
||||
export * from './identity/createIdentityLink';
|
||||
export * from './identity/getLinkedVisitorIds';
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ export async function createSession(data: Prisma.SessionCreateInput) {
|
|||
region,
|
||||
city,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
created_at
|
||||
)
|
||||
values (
|
||||
|
|
@ -34,6 +35,7 @@ export async function createSession(data: Prisma.SessionCreateInput) {
|
|||
{{region}},
|
||||
{{city}},
|
||||
{{distinctId}},
|
||||
{{visitorId}},
|
||||
{{createdAt}}
|
||||
)
|
||||
on conflict (session_id) do nothing
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
const excludeHash = attr(_data + 'exclude-hash') === _true;
|
||||
const domain = attr(_data + 'domains') || '';
|
||||
const credentials = attr(_data + 'fetch-credentials') || 'omit';
|
||||
const identityStitching = attr(_data + 'identity-stitching') !== _false;
|
||||
|
||||
const domains = domain.split(',').map(n => n.trim());
|
||||
const host =
|
||||
|
|
@ -41,6 +42,38 @@
|
|||
|
||||
/* Helper functions */
|
||||
|
||||
/**
|
||||
* Identity Stitching: Generates a persistent visitor ID stored in localStorage.
|
||||
* When combined with identify(), links anonymous sessions to authenticated users.
|
||||
* Gracefully degrades when localStorage is unavailable (Safari private browsing).
|
||||
*/
|
||||
const generateUUID = () =>
|
||||
'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
||||
const r = (Math.random() * 16) | 0;
|
||||
return (c === 'x' ? r : (r & 0x3) | 0x8).toString(16);
|
||||
});
|
||||
|
||||
const getVisitorId = () => {
|
||||
if (!identityStitching || !localStorage) return undefined;
|
||||
|
||||
try {
|
||||
const storageKey = 'umami.visitor';
|
||||
let vid = localStorage.getItem(storageKey);
|
||||
|
||||
if (!vid) {
|
||||
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
||||
localStorage.setItem(storageKey, vid);
|
||||
}
|
||||
|
||||
return vid;
|
||||
} catch {
|
||||
// localStorage access throws in Safari private browsing
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
|
||||
const visitorId = getVisitorId();
|
||||
|
||||
const normalize = raw => {
|
||||
if (!raw) return raw;
|
||||
try {
|
||||
|
|
@ -63,6 +96,7 @@
|
|||
referrer: currentRef,
|
||||
tag,
|
||||
id: identity ? identity : undefined,
|
||||
vid: visitorId,
|
||||
});
|
||||
|
||||
const hasDoNotTrack = () => {
|
||||
|
|
@ -141,12 +175,21 @@
|
|||
|
||||
/* Tracking functions */
|
||||
|
||||
const trackingDisabled = () =>
|
||||
const trackingDisabled = () => {
|
||||
let storageDisabled = false;
|
||||
try {
|
||||
storageDisabled = localStorage && localStorage.getItem('umami.disabled');
|
||||
} catch {
|
||||
// localStorage throws in Safari private browsing
|
||||
}
|
||||
return (
|
||||
disabled ||
|
||||
!website ||
|
||||
(localStorage && localStorage.getItem('umami.disabled')) ||
|
||||
storageDisabled ||
|
||||
(domain && !domains.includes(hostname)) ||
|
||||
(dnt && hasDoNotTrack());
|
||||
(dnt && hasDoNotTrack())
|
||||
);
|
||||
};
|
||||
|
||||
const send = async (payload, type = 'event') => {
|
||||
if (trackingDisabled()) return;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue