mirror of
https://github.com/umami-software/umami.git
synced 2025-12-06 01:18:00 +01:00
feat: implement automatic session linking and identity stitching (#3820)
Links anonymous browser sessions to authenticated user identities, enabling unified
user journey tracking across login boundaries. This solves the "logged-out anonymous
session → logged-in session" tracking gap, providing complete funnel visibility and
accurate visitor deduplication.
## Changes
- Client-side: Persistent visitor ID in localStorage (data-identity-stitching attribute)
- Server-side: identity_link table linking visitors to distinct IDs (authenticated users)
- Query updates: getWebsiteStats now deduplicates by resolved identity
- Graceful degradation: Works in Safari private browsing and when localStorage unavailable
## Implementation Details
Uses hybrid approach combining client-side persistence with server-side linking:
- Visitor ID generated once per browser, persists across sessions
- When user logs in, identify() creates identity link
- stats queries join through identity_link to deduplicate cross-device sessions
Both PostgreSQL and ClickHouse supported with appropriate query patterns:
- PostgreSQL: normalized schema, joins through session table
- ClickHouse: denormalized with ReplacingMergeTree for deduplication
## Edge Cases Handled
- Safari private browsing: localStorage throws, visitorId undefined, no link created
- localStorage cleared: new visitorId generated, creates new link
- Multiple tabs: same visitorId shared via localStorage
- Multiple devices: one visitor can link to multiple distinct_ids
- Multiple accounts: one distinct_id can link to multiple visitors
## Test Plan
- [ ] Enable feature on test website (default enabled)
- [ ] Anonymous pageview - confirm visitor_id in events table
- [ ] Call umami.identify('user1') - confirm identity_link created
- [ ] Stats show 1 visitor (deduplicated)
- [ ] Log out, browse anonymously, stats still show 1 visitor
- [ ] Test with data-identity-stitching="false" - no visitor_id collected
- [ ] Test in Safari private browsing - no errors, gracefully skips
- [ ] Test ClickHouse: verify identity_link table populated and FINAL keyword works
- [ ] Verify retroactive: historical anonymous session attributed correctly
This commit is contained in:
parent
a902a87c08
commit
34db34759f
7 changed files with 76 additions and 24 deletions
|
|
@ -293,6 +293,7 @@ CREATE TABLE umami.identity_link
|
|||
website_id UUID,
|
||||
visitor_id String,
|
||||
distinct_id String,
|
||||
created_at DateTime('UTC'),
|
||||
linked_at DateTime('UTC')
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(linked_at)
|
||||
|
|
|
|||
|
|
@ -321,11 +321,12 @@ model Pixel {
|
|||
}
|
||||
|
||||
model IdentityLink {
|
||||
id String @id @unique @map("identity_link_id") @db.Uuid
|
||||
websiteId String @map("website_id") @db.Uuid
|
||||
visitorId String @map("visitor_id") @db.VarChar(50)
|
||||
distinctId String @map("distinct_id") @db.VarChar(50)
|
||||
linkedAt DateTime @default(now()) @map("linked_at") @db.Timestamptz(6)
|
||||
id String @id @unique @map("identity_link_id") @db.Uuid
|
||||
websiteId String @map("website_id") @db.Uuid
|
||||
visitorId String @map("visitor_id") @db.VarChar(50)
|
||||
distinctId String @map("distinct_id") @db.VarChar(50)
|
||||
createdAt DateTime @default(now()) @map("created_at") @db.Timestamptz(6)
|
||||
linkedAt DateTime @default(now()) @updatedAt @map("linked_at") @db.Timestamptz(6)
|
||||
|
||||
website Website @relation(fields: [websiteId], references: [id], onDelete: Cascade)
|
||||
|
||||
|
|
|
|||
|
|
@ -271,11 +271,15 @@ export async function POST(request: Request) {
|
|||
}
|
||||
|
||||
// Create identity link when both visitorId and distinctId are present
|
||||
// Fire-and-forget to avoid adding latency to the tracking endpoint
|
||||
if (visitorId && id && websiteId) {
|
||||
await createIdentityLink({
|
||||
createIdentityLink({
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId: id,
|
||||
}).catch(e => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('Failed to create identity link:', e);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ async function clickhouseQuery(
|
|||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(coalesce(t.resolved_identity, t.session_id)) as "visitors",
|
||||
uniq(coalesce(t.resolved_identity, toString(t.session_id))) as "visitors",
|
||||
uniq(t.visit_id) as "visits",
|
||||
sum(if(t.c = 1, 1, 0)) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
|
|
@ -98,7 +98,7 @@ async function clickhouseQuery(
|
|||
max(we.created_at) max_time
|
||||
from website_event we
|
||||
${cohortQuery}
|
||||
left join identity_link il on il.visitor_id = we.visitor_id
|
||||
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
|
|
@ -111,7 +111,7 @@ async function clickhouseQuery(
|
|||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(coalesce(resolved_identity, session_id)) as "visitors",
|
||||
uniq(coalesce(resolved_identity, toString(session_id))) as "visitors",
|
||||
uniq(visit_id) as "visits",
|
||||
sumIf(1, t.c = 1) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
|
|
@ -124,7 +124,7 @@ async function clickhouseQuery(
|
|||
max(we.max_time) max_time
|
||||
from website_event_stats_hourly we
|
||||
${cohortQuery}
|
||||
left join identity_link il on il.visitor_id = we.visitor_id
|
||||
left join identity_link final il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,18 @@
|
|||
/**
|
||||
* Identity Stitching - Links anonymous browser sessions to authenticated user identities
|
||||
*
|
||||
* Design decisions:
|
||||
* - One visitor can link to multiple distinct_ids (user logs into different accounts)
|
||||
* - One distinct_id can link to multiple visitors (user on multiple devices/browsers)
|
||||
* - Links are additive and never invalidated (preserves historical journey)
|
||||
* - Uses ReplacingMergeTree in ClickHouse with linked_at for deduplication
|
||||
* - Upsert pattern ensures idempotency for repeated identify() calls
|
||||
*
|
||||
* Edge cases handled:
|
||||
* - Safari private browsing: visitorId will be undefined, no link created
|
||||
* - localStorage cleared: new visitorId generated, creates new link
|
||||
* - Multiple tabs: same visitorId shared via localStorage
|
||||
*/
|
||||
import { uuid } from '@/lib/crypto';
|
||||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
|
|
@ -44,11 +59,13 @@ async function clickhouseQuery({ websiteId, visitorId, distinctId }: CreateIdent
|
|||
const { insert, getUTCString } = clickhouse;
|
||||
const { sendMessage } = kafka;
|
||||
|
||||
const now = getUTCString(new Date());
|
||||
const message = {
|
||||
website_id: websiteId,
|
||||
visitor_id: visitorId,
|
||||
distinct_id: distinctId,
|
||||
linked_at: getUTCString(new Date()),
|
||||
created_at: now,
|
||||
linked_at: now,
|
||||
};
|
||||
|
||||
if (kafka.enabled) {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,13 @@
|
|||
/**
|
||||
* Resolves all visitor IDs linked to a given distinct_id (authenticated user)
|
||||
*
|
||||
* Use cases (for future implementation):
|
||||
* - User journey reports: aggregate sessions across devices
|
||||
* - Cohort analysis: include all linked sessions
|
||||
* - Retroactive attribution: credit conversions to original anonymous session
|
||||
*
|
||||
* Note: Uses FINAL keyword in ClickHouse to ensure deduplication from ReplacingMergeTree
|
||||
*/
|
||||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||
|
|
|
|||
|
|
@ -42,6 +42,11 @@
|
|||
|
||||
/* Helper functions */
|
||||
|
||||
/**
|
||||
* Identity Stitching: Generates a persistent visitor ID stored in localStorage.
|
||||
* When combined with identify(), links anonymous sessions to authenticated users.
|
||||
* Gracefully degrades when localStorage is unavailable (Safari private browsing).
|
||||
*/
|
||||
const generateUUID = () =>
|
||||
'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
||||
const r = (Math.random() * 16) | 0;
|
||||
|
|
@ -51,15 +56,20 @@
|
|||
const getVisitorId = () => {
|
||||
if (!identityStitching || !localStorage) return undefined;
|
||||
|
||||
const storageKey = 'umami.visitor';
|
||||
let vid = localStorage.getItem(storageKey);
|
||||
try {
|
||||
const storageKey = 'umami.visitor';
|
||||
let vid = localStorage.getItem(storageKey);
|
||||
|
||||
if (!vid) {
|
||||
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
||||
localStorage.setItem(storageKey, vid);
|
||||
if (!vid) {
|
||||
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
||||
localStorage.setItem(storageKey, vid);
|
||||
}
|
||||
|
||||
return vid;
|
||||
} catch {
|
||||
// localStorage access throws in Safari private browsing
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return vid;
|
||||
};
|
||||
|
||||
const visitorId = getVisitorId();
|
||||
|
|
@ -165,12 +175,21 @@
|
|||
|
||||
/* Tracking functions */
|
||||
|
||||
const trackingDisabled = () =>
|
||||
disabled ||
|
||||
!website ||
|
||||
(localStorage && localStorage.getItem('umami.disabled')) ||
|
||||
(domain && !domains.includes(hostname)) ||
|
||||
(dnt && hasDoNotTrack());
|
||||
const trackingDisabled = () => {
|
||||
let storageDisabled = false;
|
||||
try {
|
||||
storageDisabled = localStorage && localStorage.getItem('umami.disabled');
|
||||
} catch {
|
||||
// localStorage throws in Safari private browsing
|
||||
}
|
||||
return (
|
||||
disabled ||
|
||||
!website ||
|
||||
storageDisabled ||
|
||||
(domain && !domains.includes(hostname)) ||
|
||||
(dnt && hasDoNotTrack())
|
||||
);
|
||||
};
|
||||
|
||||
const send = async (payload, type = 'event') => {
|
||||
if (trackingDisabled()) return;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue