mirror of
https://github.com/umami-software/umami.git
synced 2025-12-06 01:18:00 +01:00
feat: implement automatic session linking and identity stitching (#3820)
Links anonymous browser sessions to authenticated user identities, enabling unified
user journey tracking across login boundaries. This solves the "logged-out anonymous
session → logged-in session" tracking gap, providing complete funnel visibility and
accurate visitor deduplication.
## Changes
- Client-side: Persistent visitor ID in localStorage (data-identity-stitching attribute)
- Server-side: identity_link table linking visitors to distinct IDs (authenticated users)
- Query updates: getWebsiteStats now deduplicates by resolved identity
- Graceful degradation: Works in Safari private browsing and when localStorage unavailable
## Implementation Details
Uses hybrid approach combining client-side persistence with server-side linking:
- Visitor ID generated once per browser, persists across sessions
- When user logs in, identify() creates identity link
- stats queries join through identity_link to deduplicate cross-device sessions
Both PostgreSQL and ClickHouse supported with appropriate query patterns:
- PostgreSQL: normalized schema, joins through session table
- ClickHouse: denormalized with ReplacingMergeTree for deduplication
## Edge Cases Handled
- Safari private browsing: localStorage throws, visitorId undefined, no link created
- localStorage cleared: new visitorId generated, creates new link
- Multiple tabs: same visitorId shared via localStorage
- Multiple devices: one visitor can link to multiple distinct_ids
- Multiple accounts: one distinct_id can link to multiple visitors
## Test Plan
- [ ] Enable feature on test website (default enabled)
- [ ] Anonymous pageview - confirm visitor_id in events table
- [ ] Call umami.identify('user1') - confirm identity_link created
- [ ] Stats show 1 visitor (deduplicated)
- [ ] Log out, browse anonymously, stats still show 1 visitor
- [ ] Test with data-identity-stitching="false" - no visitor_id collected
- [ ] Test in Safari private browsing - no errors, gracefully skips
- [ ] Test ClickHouse: verify identity_link table populated and FINAL keyword works
- [ ] Verify retroactive: historical anonymous session attributed correctly
This commit is contained in:
parent
a902a87c08
commit
34db34759f
7 changed files with 76 additions and 24 deletions
|
|
@ -293,6 +293,7 @@ CREATE TABLE umami.identity_link
|
||||||
website_id UUID,
|
website_id UUID,
|
||||||
visitor_id String,
|
visitor_id String,
|
||||||
distinct_id String,
|
distinct_id String,
|
||||||
|
created_at DateTime('UTC'),
|
||||||
linked_at DateTime('UTC')
|
linked_at DateTime('UTC')
|
||||||
)
|
)
|
||||||
ENGINE = ReplacingMergeTree(linked_at)
|
ENGINE = ReplacingMergeTree(linked_at)
|
||||||
|
|
|
||||||
|
|
@ -321,11 +321,12 @@ model Pixel {
|
||||||
}
|
}
|
||||||
|
|
||||||
model IdentityLink {
|
model IdentityLink {
|
||||||
id String @id @unique @map("identity_link_id") @db.Uuid
|
id String @id @unique @map("identity_link_id") @db.Uuid
|
||||||
websiteId String @map("website_id") @db.Uuid
|
websiteId String @map("website_id") @db.Uuid
|
||||||
visitorId String @map("visitor_id") @db.VarChar(50)
|
visitorId String @map("visitor_id") @db.VarChar(50)
|
||||||
distinctId String @map("distinct_id") @db.VarChar(50)
|
distinctId String @map("distinct_id") @db.VarChar(50)
|
||||||
linkedAt DateTime @default(now()) @map("linked_at") @db.Timestamptz(6)
|
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(6)
|
||||||
|
linkedAt DateTime @default(now()) @updatedAt @map("linked_at") @db.Timestamptz(6)
|
||||||
|
|
||||||
website Website @relation(fields: [websiteId], references: [id], onDelete: Cascade)
|
website Website @relation(fields: [websiteId], references: [id], onDelete: Cascade)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -271,11 +271,15 @@ export async function POST(request: Request) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create identity link when both visitorId and distinctId are present
|
// Create identity link when both visitorId and distinctId are present
|
||||||
|
// Fire-and-forget to avoid adding latency to the tracking endpoint
|
||||||
if (visitorId && id && websiteId) {
|
if (visitorId && id && websiteId) {
|
||||||
await createIdentityLink({
|
createIdentityLink({
|
||||||
websiteId,
|
websiteId,
|
||||||
visitorId,
|
visitorId,
|
||||||
distinctId: id,
|
distinctId: id,
|
||||||
|
}).catch(e => {
|
||||||
|
// eslint-disable-next-line no-console
|
||||||
|
console.error('Failed to create identity link:', e);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ async function clickhouseQuery(
|
||||||
sql = `
|
sql = `
|
||||||
select
|
select
|
||||||
sum(t.c) as "pageviews",
|
sum(t.c) as "pageviews",
|
||||||
uniq(coalesce(t.resolved_identity, t.session_id)) as "visitors",
|
uniq(coalesce(t.resolved_identity, toString(t.session_id))) as "visitors",
|
||||||
uniq(t.visit_id) as "visits",
|
uniq(t.visit_id) as "visits",
|
||||||
sum(if(t.c = 1, 1, 0)) as "bounces",
|
sum(if(t.c = 1, 1, 0)) as "bounces",
|
||||||
sum(max_time-min_time) as "totaltime"
|
sum(max_time-min_time) as "totaltime"
|
||||||
|
|
@ -98,7 +98,7 @@ async function clickhouseQuery(
|
||||||
max(we.created_at) max_time
|
max(we.created_at) max_time
|
||||||
from website_event we
|
from website_event we
|
||||||
${cohortQuery}
|
${cohortQuery}
|
||||||
left join identity_link il on il.visitor_id = we.visitor_id
|
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||||
and il.website_id = we.website_id
|
and il.website_id = we.website_id
|
||||||
where we.website_id = {websiteId:UUID}
|
where we.website_id = {websiteId:UUID}
|
||||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||||
|
|
@ -111,7 +111,7 @@ async function clickhouseQuery(
|
||||||
sql = `
|
sql = `
|
||||||
select
|
select
|
||||||
sum(t.c) as "pageviews",
|
sum(t.c) as "pageviews",
|
||||||
uniq(coalesce(resolved_identity, session_id)) as "visitors",
|
uniq(coalesce(resolved_identity, toString(session_id))) as "visitors",
|
||||||
uniq(visit_id) as "visits",
|
uniq(visit_id) as "visits",
|
||||||
sumIf(1, t.c = 1) as "bounces",
|
sumIf(1, t.c = 1) as "bounces",
|
||||||
sum(max_time-min_time) as "totaltime"
|
sum(max_time-min_time) as "totaltime"
|
||||||
|
|
@ -124,7 +124,7 @@ async function clickhouseQuery(
|
||||||
max(we.max_time) max_time
|
max(we.max_time) max_time
|
||||||
from website_event_stats_hourly we
|
from website_event_stats_hourly we
|
||||||
${cohortQuery}
|
${cohortQuery}
|
||||||
left join identity_link il on il.visitor_id = we.visitor_id
|
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||||
and il.website_id = we.website_id
|
and il.website_id = we.website_id
|
||||||
where we.website_id = {websiteId:UUID}
|
where we.website_id = {websiteId:UUID}
|
||||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,18 @@
|
||||||
|
/**
|
||||||
|
* Identity Stitching - Links anonymous browser sessions to authenticated user identities
|
||||||
|
*
|
||||||
|
* Design decisions:
|
||||||
|
* - One visitor can link to multiple distinct_ids (user logs into different accounts)
|
||||||
|
* - One distinct_id can link to multiple visitors (user on multiple devices/browsers)
|
||||||
|
* - Links are additive and never invalidated (preserves historical journey)
|
||||||
|
* - Uses ReplacingMergeTree in ClickHouse with linked_at for deduplication
|
||||||
|
* - Upsert pattern ensures idempotency for repeated identify() calls
|
||||||
|
*
|
||||||
|
* Edge cases handled:
|
||||||
|
* - Safari private browsing: visitorId will be undefined, no link created
|
||||||
|
* - localStorage cleared: new visitorId generated, creates new link
|
||||||
|
* - Multiple tabs: same visitorId shared via localStorage
|
||||||
|
*/
|
||||||
import { uuid } from '@/lib/crypto';
|
import { uuid } from '@/lib/crypto';
|
||||||
import prisma from '@/lib/prisma';
|
import prisma from '@/lib/prisma';
|
||||||
import clickhouse from '@/lib/clickhouse';
|
import clickhouse from '@/lib/clickhouse';
|
||||||
|
|
@ -44,11 +59,13 @@ async function clickhouseQuery({ websiteId, visitorId, distinctId }: CreateIdent
|
||||||
const { insert, getUTCString } = clickhouse;
|
const { insert, getUTCString } = clickhouse;
|
||||||
const { sendMessage } = kafka;
|
const { sendMessage } = kafka;
|
||||||
|
|
||||||
|
const now = getUTCString(new Date());
|
||||||
const message = {
|
const message = {
|
||||||
website_id: websiteId,
|
website_id: websiteId,
|
||||||
visitor_id: visitorId,
|
visitor_id: visitorId,
|
||||||
distinct_id: distinctId,
|
distinct_id: distinctId,
|
||||||
linked_at: getUTCString(new Date()),
|
created_at: now,
|
||||||
|
linked_at: now,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (kafka.enabled) {
|
if (kafka.enabled) {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,13 @@
|
||||||
|
/**
|
||||||
|
* Resolves all visitor IDs linked to a given distinct_id (authenticated user)
|
||||||
|
*
|
||||||
|
* Use cases (for future implementation):
|
||||||
|
* - User journey reports: aggregate sessions across devices
|
||||||
|
* - Cohort analysis: include all linked sessions
|
||||||
|
* - Retroactive attribution: credit conversions to original anonymous session
|
||||||
|
*
|
||||||
|
* Note: Uses FINAL keyword in ClickHouse to ensure deduplication from ReplacingMergeTree
|
||||||
|
*/
|
||||||
import prisma from '@/lib/prisma';
|
import prisma from '@/lib/prisma';
|
||||||
import clickhouse from '@/lib/clickhouse';
|
import clickhouse from '@/lib/clickhouse';
|
||||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,11 @@
|
||||||
|
|
||||||
/* Helper functions */
|
/* Helper functions */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Identity Stitching: Generates a persistent visitor ID stored in localStorage.
|
||||||
|
* When combined with identify(), links anonymous sessions to authenticated users.
|
||||||
|
* Gracefully degrades when localStorage is unavailable (Safari private browsing).
|
||||||
|
*/
|
||||||
const generateUUID = () =>
|
const generateUUID = () =>
|
||||||
'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
||||||
const r = (Math.random() * 16) | 0;
|
const r = (Math.random() * 16) | 0;
|
||||||
|
|
@ -51,15 +56,20 @@
|
||||||
const getVisitorId = () => {
|
const getVisitorId = () => {
|
||||||
if (!identityStitching || !localStorage) return undefined;
|
if (!identityStitching || !localStorage) return undefined;
|
||||||
|
|
||||||
const storageKey = 'umami.visitor';
|
try {
|
||||||
let vid = localStorage.getItem(storageKey);
|
const storageKey = 'umami.visitor';
|
||||||
|
let vid = localStorage.getItem(storageKey);
|
||||||
|
|
||||||
if (!vid) {
|
if (!vid) {
|
||||||
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
||||||
localStorage.setItem(storageKey, vid);
|
localStorage.setItem(storageKey, vid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vid;
|
||||||
|
} catch {
|
||||||
|
// localStorage access throws in Safari private browsing
|
||||||
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
return vid;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const visitorId = getVisitorId();
|
const visitorId = getVisitorId();
|
||||||
|
|
@ -165,12 +175,21 @@
|
||||||
|
|
||||||
/* Tracking functions */
|
/* Tracking functions */
|
||||||
|
|
||||||
const trackingDisabled = () =>
|
const trackingDisabled = () => {
|
||||||
disabled ||
|
let storageDisabled = false;
|
||||||
!website ||
|
try {
|
||||||
(localStorage && localStorage.getItem('umami.disabled')) ||
|
storageDisabled = localStorage && localStorage.getItem('umami.disabled');
|
||||||
(domain && !domains.includes(hostname)) ||
|
} catch {
|
||||||
(dnt && hasDoNotTrack());
|
// localStorage throws in Safari private browsing
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
disabled ||
|
||||||
|
!website ||
|
||||||
|
storageDisabled ||
|
||||||
|
(domain && !domains.includes(hostname)) ||
|
||||||
|
(dnt && hasDoNotTrack())
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
const send = async (payload, type = 'event') => {
|
const send = async (payload, type = 'event') => {
|
||||||
if (trackingDisabled()) return;
|
if (trackingDisabled()) return;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue