mirror of
https://github.com/umami-software/umami.git
synced 2025-12-06 01:18:00 +01:00
feat: implement identity stitching for session linking (#3820)
Adds automatic session linking/identity stitching to link anonymous browsing sessions with authenticated user sessions. ## Changes ### Database Schema - Add `identity_link` table (PostgreSQL + ClickHouse) to store mappings between visitor IDs and authenticated user IDs - Add `visitor_id` field to `Session` model - Add `visitor_id` column to ClickHouse `website_event` table ### Client Tracker - Generate and persist `visitor_id` in localStorage - Include `vid` in all tracking payloads - Support opt-out via `data-identity-stitching="false"` attribute ### API - Accept `vid` parameter in `/api/send` endpoint - Auto-create identity links when `identify()` is called with both visitor_id and distinct_id - Store visitor_id in sessions and events ### Query Updates - Update `getWebsiteStats` to deduplicate visitors by resolved identity - Visitors who browse anonymously then log in are now counted as one user ## Usage When a user logs in, call `umami.identify(userId)`. If identity stitching is enabled (default), the tracker automatically links the anonymous visitor_id to the authenticated userId. Stats queries then resolve linked identities to accurately count unique visitors. Resolves #3820
This commit is contained in:
parent
9a269ab811
commit
a902a87c08
11 changed files with 245 additions and 33 deletions
|
|
@ -39,6 +39,7 @@ CREATE TABLE umami.website_event
|
|||
event_name String,
|
||||
tag String,
|
||||
distinct_id String,
|
||||
visitor_id String,
|
||||
created_at DateTime('UTC'),
|
||||
job_id Nullable(UUID)
|
||||
)
|
||||
|
|
@ -123,6 +124,7 @@ CREATE TABLE umami.website_event_stats_hourly
|
|||
max_time SimpleAggregateFunction(max, DateTime('UTC')),
|
||||
tag SimpleAggregateFunction(groupArrayArray, Array(String)),
|
||||
distinct_id String,
|
||||
visitor_id String,
|
||||
created_at Datetime('UTC')
|
||||
)
|
||||
ENGINE = AggregatingMergeTree
|
||||
|
|
@ -176,6 +178,7 @@ SELECT
|
|||
max_time,
|
||||
tag,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
timestamp as created_at
|
||||
FROM (SELECT
|
||||
website_id,
|
||||
|
|
@ -214,6 +217,7 @@ FROM (SELECT
|
|||
max(created_at) max_time,
|
||||
arrayFilter(x -> x != '', groupArray(tag)) tag,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
toStartOfHour(created_at) timestamp
|
||||
FROM umami.website_event
|
||||
GROUP BY website_id,
|
||||
|
|
@ -230,6 +234,7 @@ GROUP BY website_id,
|
|||
city,
|
||||
event_type,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
timestamp);
|
||||
|
||||
-- projections
|
||||
|
|
@ -281,3 +286,15 @@ JOIN (SELECT event_id, string_value as currency
|
|||
WHERE positionCaseInsensitive(data_key, 'currency') > 0) c
|
||||
ON c.event_id = ed.event_id
|
||||
WHERE positionCaseInsensitive(data_key, 'revenue') > 0;
|
||||
|
||||
-- identity linking
|
||||
CREATE TABLE umami.identity_link
|
||||
(
|
||||
website_id UUID,
|
||||
visitor_id String,
|
||||
distinct_id String,
|
||||
linked_at DateTime('UTC')
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(linked_at)
|
||||
ORDER BY (website_id, visitor_id, distinct_id)
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ model Session {
|
|||
region String? @db.VarChar(20)
|
||||
city String? @db.VarChar(50)
|
||||
distinctId String? @map("distinct_id") @db.VarChar(50)
|
||||
visitorId String? @map("visitor_id") @db.VarChar(50)
|
||||
createdAt DateTime? @default(now()) @map("created_at") @db.Timestamptz(6)
|
||||
|
||||
websiteEvents WebsiteEvent[]
|
||||
|
|
@ -60,6 +61,7 @@ model Session {
|
|||
@@index([websiteId, createdAt, country])
|
||||
@@index([websiteId, createdAt, region])
|
||||
@@index([websiteId, createdAt, city])
|
||||
@@index([websiteId, visitorId])
|
||||
@@map("session")
|
||||
}
|
||||
|
||||
|
|
@ -76,14 +78,15 @@ model Website {
|
|||
updatedAt DateTime? @updatedAt @map("updated_at") @db.Timestamptz(6)
|
||||
deletedAt DateTime? @map("deleted_at") @db.Timestamptz(6)
|
||||
|
||||
user User? @relation("user", fields: [userId], references: [id])
|
||||
createUser User? @relation("createUser", fields: [createdBy], references: [id])
|
||||
team Team? @relation(fields: [teamId], references: [id])
|
||||
eventData EventData[]
|
||||
reports Report[]
|
||||
revenue Revenue[]
|
||||
segments Segment[]
|
||||
sessionData SessionData[]
|
||||
user User? @relation("user", fields: [userId], references: [id])
|
||||
createUser User? @relation("createUser", fields: [createdBy], references: [id])
|
||||
team Team? @relation(fields: [teamId], references: [id])
|
||||
eventData EventData[]
|
||||
reports Report[]
|
||||
revenue Revenue[]
|
||||
segments Segment[]
|
||||
sessionData SessionData[]
|
||||
identityLinks IdentityLink[]
|
||||
|
||||
@@index([userId])
|
||||
@@index([teamId])
|
||||
|
|
@ -316,3 +319,18 @@ model Pixel {
|
|||
@@index([createdAt])
|
||||
@@map("pixel")
|
||||
}
|
||||
|
||||
model IdentityLink {
|
||||
id String @id @unique @map("identity_link_id") @db.Uuid
|
||||
websiteId String @map("website_id") @db.Uuid
|
||||
visitorId String @map("visitor_id") @db.VarChar(50)
|
||||
distinctId String @map("distinct_id") @db.VarChar(50)
|
||||
linkedAt DateTime @default(now()) @map("linked_at") @db.Timestamptz(6)
|
||||
|
||||
website Website @relation(fields: [websiteId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@unique([websiteId, visitorId, distinctId])
|
||||
@@index([websiteId, distinctId])
|
||||
@@index([websiteId, visitorId])
|
||||
@@map("identity_link")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import { secret, uuid, hash } from '@/lib/crypto';
|
|||
import { COLLECTION_TYPE, EVENT_TYPE } from '@/lib/constants';
|
||||
import { anyObjectParam, urlOrPathParam } from '@/lib/schema';
|
||||
import { safeDecodeURI, safeDecodeURIComponent } from '@/lib/url';
|
||||
import { createSession, saveEvent, saveSessionData } from '@/queries/sql';
|
||||
import { createSession, saveEvent, saveSessionData, createIdentityLink } from '@/queries/sql';
|
||||
import { serializeError } from 'serialize-error';
|
||||
|
||||
interface Cache {
|
||||
|
|
@ -41,6 +41,7 @@ const schema = z.object({
|
|||
userAgent: z.string().optional(),
|
||||
timestamp: z.coerce.number().int().optional(),
|
||||
id: z.string().optional(),
|
||||
vid: z.string().max(50).optional(),
|
||||
})
|
||||
.refine(
|
||||
data => {
|
||||
|
|
@ -80,6 +81,7 @@ export async function POST(request: Request) {
|
|||
tag,
|
||||
timestamp,
|
||||
id,
|
||||
vid: visitorId,
|
||||
} = payload;
|
||||
|
||||
const sourceId = websiteId || pixelId || linkId;
|
||||
|
|
@ -146,6 +148,7 @@ export async function POST(request: Request) {
|
|||
region,
|
||||
city,
|
||||
distinctId: id,
|
||||
visitorId,
|
||||
createdAt,
|
||||
});
|
||||
}
|
||||
|
|
@ -226,6 +229,7 @@ export async function POST(request: Request) {
|
|||
|
||||
// Session
|
||||
distinctId: id,
|
||||
visitorId,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
|
|
@ -265,6 +269,15 @@ export async function POST(request: Request) {
|
|||
createdAt,
|
||||
});
|
||||
}
|
||||
|
||||
// Create identity link when both visitorId and distinctId are present
|
||||
if (visitorId && id && websiteId) {
|
||||
await createIdentityLink({
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId: id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const token = createToken({ websiteId, sessionId, visitId, iat }, secret());
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ export interface SaveEventArgs {
|
|||
|
||||
// Session
|
||||
distinctId?: string;
|
||||
visitorId?: string;
|
||||
browser?: string;
|
||||
os?: string;
|
||||
device?: string;
|
||||
|
|
@ -164,6 +165,7 @@ async function clickhouseQuery({
|
|||
referrerQuery,
|
||||
referrerDomain,
|
||||
distinctId,
|
||||
visitorId,
|
||||
browser,
|
||||
os,
|
||||
device,
|
||||
|
|
@ -220,6 +222,7 @@ async function clickhouseQuery({
|
|||
event_name: eventName ? eventName?.substring(0, EVENT_NAME_LENGTH) : null,
|
||||
tag: tag,
|
||||
distinct_id: distinctId,
|
||||
visitor_id: visitorId,
|
||||
created_at: getUTCString(createdAt),
|
||||
browser,
|
||||
os,
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ async function relationalQuery(
|
|||
`
|
||||
select
|
||||
cast(coalesce(sum(t.c), 0) as bigint) as "pageviews",
|
||||
count(distinct t.session_id) as "visitors",
|
||||
count(distinct coalesce(t.resolved_identity, t.session_id::text)) as "visitors",
|
||||
count(distinct t.visit_id) as "visits",
|
||||
coalesce(sum(case when t.c = 1 then 1 else 0 end), 0) as "bounces",
|
||||
cast(coalesce(sum(${getTimestampDiffSQL('t.min_time', 't.max_time')}), 0) as bigint) as "totaltime"
|
||||
|
|
@ -45,17 +45,22 @@ async function relationalQuery(
|
|||
select
|
||||
website_event.session_id,
|
||||
website_event.visit_id,
|
||||
il.distinct_id as "resolved_identity",
|
||||
count(*) as "c",
|
||||
min(website_event.created_at) as "min_time",
|
||||
max(website_event.created_at) as "max_time"
|
||||
from website_event
|
||||
${cohortQuery}
|
||||
${joinSessionQuery}
|
||||
${joinSessionQuery}
|
||||
left join session on session.session_id = website_event.session_id
|
||||
and session.website_id = website_event.website_id
|
||||
left join identity_link il on il.visitor_id = session.visitor_id
|
||||
and il.website_id = session.website_id
|
||||
where website_event.website_id = {{websiteId::uuid}}
|
||||
and website_event.created_at between {{startDate}} and {{endDate}}
|
||||
and website_event.event_type != 2
|
||||
${filterQuery}
|
||||
group by 1, 2
|
||||
group by 1, 2, 3
|
||||
) as t
|
||||
`,
|
||||
queryParams,
|
||||
|
|
@ -79,47 +84,53 @@ async function clickhouseQuery(
|
|||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(t.session_id) as "visitors",
|
||||
uniq(coalesce(t.resolved_identity, t.session_id)) as "visitors",
|
||||
uniq(t.visit_id) as "visits",
|
||||
sum(if(t.c = 1, 1, 0)) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
from (
|
||||
select
|
||||
session_id,
|
||||
visit_id,
|
||||
we.session_id,
|
||||
we.visit_id,
|
||||
il.distinct_id as resolved_identity,
|
||||
count(*) c,
|
||||
min(created_at) min_time,
|
||||
max(created_at) max_time
|
||||
from website_event
|
||||
min(we.created_at) min_time,
|
||||
max(we.created_at) max_time
|
||||
from website_event we
|
||||
${cohortQuery}
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type != 2
|
||||
left join identity_link il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and we.event_type != 2
|
||||
${filterQuery}
|
||||
group by session_id, visit_id
|
||||
group by we.session_id, we.visit_id, il.distinct_id
|
||||
) as t;
|
||||
`;
|
||||
} else {
|
||||
sql = `
|
||||
select
|
||||
sum(t.c) as "pageviews",
|
||||
uniq(session_id) as "visitors",
|
||||
uniq(coalesce(resolved_identity, session_id)) as "visitors",
|
||||
uniq(visit_id) as "visits",
|
||||
sumIf(1, t.c = 1) as "bounces",
|
||||
sum(max_time-min_time) as "totaltime"
|
||||
from (select
|
||||
session_id,
|
||||
visit_id,
|
||||
sum(views) c,
|
||||
min(min_time) min_time,
|
||||
max(max_time) max_time
|
||||
from website_event_stats_hourly "website_event"
|
||||
we.session_id,
|
||||
we.visit_id,
|
||||
il.distinct_id as resolved_identity,
|
||||
sum(we.views) c,
|
||||
min(we.min_time) min_time,
|
||||
max(we.max_time) max_time
|
||||
from website_event_stats_hourly we
|
||||
${cohortQuery}
|
||||
where website_id = {websiteId:UUID}
|
||||
and created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and event_type != 2
|
||||
left join identity_link il on il.visitor_id = we.visitor_id
|
||||
and il.website_id = we.website_id
|
||||
where we.website_id = {websiteId:UUID}
|
||||
and we.created_at between {startDate:DateTime64} and {endDate:DateTime64}
|
||||
and we.event_type != 2
|
||||
${filterQuery}
|
||||
group by session_id, visit_id
|
||||
group by we.session_id, we.visit_id, il.distinct_id
|
||||
) as t;
|
||||
`;
|
||||
}
|
||||
|
|
|
|||
59
src/queries/sql/identity/createIdentityLink.ts
Normal file
59
src/queries/sql/identity/createIdentityLink.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import { uuid } from '@/lib/crypto';
|
||||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
import kafka from '@/lib/kafka';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||
|
||||
export interface CreateIdentityLinkArgs {
|
||||
websiteId: string;
|
||||
visitorId: string;
|
||||
distinctId: string;
|
||||
}
|
||||
|
||||
export async function createIdentityLink(data: CreateIdentityLinkArgs) {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(data),
|
||||
[CLICKHOUSE]: () => clickhouseQuery(data),
|
||||
});
|
||||
}
|
||||
|
||||
async function relationalQuery({ websiteId, visitorId, distinctId }: CreateIdentityLinkArgs) {
|
||||
const { client } = prisma;
|
||||
|
||||
return client.identityLink.upsert({
|
||||
where: {
|
||||
websiteId_visitorId_distinctId: {
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId,
|
||||
},
|
||||
},
|
||||
update: {
|
||||
linkedAt: new Date(),
|
||||
},
|
||||
create: {
|
||||
id: uuid(),
|
||||
websiteId,
|
||||
visitorId,
|
||||
distinctId,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function clickhouseQuery({ websiteId, visitorId, distinctId }: CreateIdentityLinkArgs) {
|
||||
const { insert, getUTCString } = clickhouse;
|
||||
const { sendMessage } = kafka;
|
||||
|
||||
const message = {
|
||||
website_id: websiteId,
|
||||
visitor_id: visitorId,
|
||||
distinct_id: distinctId,
|
||||
linked_at: getUTCString(new Date()),
|
||||
};
|
||||
|
||||
if (kafka.enabled) {
|
||||
await sendMessage('identity_link', message);
|
||||
} else {
|
||||
await insert('identity_link', [message]);
|
||||
}
|
||||
}
|
||||
61
src/queries/sql/identity/getLinkedVisitorIds.ts
Normal file
61
src/queries/sql/identity/getLinkedVisitorIds.ts
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import prisma from '@/lib/prisma';
|
||||
import clickhouse from '@/lib/clickhouse';
|
||||
import { CLICKHOUSE, PRISMA, runQuery } from '@/lib/db';
|
||||
|
||||
export interface GetLinkedVisitorIdsArgs {
|
||||
websiteId: string;
|
||||
distinctId: string;
|
||||
}
|
||||
|
||||
export interface LinkedVisitorId {
|
||||
visitorId: string;
|
||||
linkedAt: Date;
|
||||
}
|
||||
|
||||
export async function getLinkedVisitorIds(
|
||||
data: GetLinkedVisitorIdsArgs,
|
||||
): Promise<LinkedVisitorId[]> {
|
||||
return runQuery({
|
||||
[PRISMA]: () => relationalQuery(data),
|
||||
[CLICKHOUSE]: () => clickhouseQuery(data),
|
||||
});
|
||||
}
|
||||
|
||||
async function relationalQuery({
|
||||
websiteId,
|
||||
distinctId,
|
||||
}: GetLinkedVisitorIdsArgs): Promise<LinkedVisitorId[]> {
|
||||
const { client } = prisma;
|
||||
|
||||
const links = await client.identityLink.findMany({
|
||||
where: {
|
||||
websiteId,
|
||||
distinctId,
|
||||
},
|
||||
select: {
|
||||
visitorId: true,
|
||||
linkedAt: true,
|
||||
},
|
||||
});
|
||||
|
||||
return links;
|
||||
}
|
||||
|
||||
async function clickhouseQuery({
|
||||
websiteId,
|
||||
distinctId,
|
||||
}: GetLinkedVisitorIdsArgs): Promise<LinkedVisitorId[]> {
|
||||
const { rawQuery } = clickhouse;
|
||||
|
||||
return rawQuery<LinkedVisitorId[]>(
|
||||
`
|
||||
select
|
||||
visitor_id as visitorId,
|
||||
linked_at as linkedAt
|
||||
from identity_link final
|
||||
where website_id = {websiteId:UUID}
|
||||
and distinct_id = {distinctId:String}
|
||||
`,
|
||||
{ websiteId, distinctId },
|
||||
);
|
||||
}
|
||||
2
src/queries/sql/identity/index.ts
Normal file
2
src/queries/sql/identity/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export * from './createIdentityLink';
|
||||
export * from './getLinkedVisitorIds';
|
||||
|
|
@ -39,3 +39,5 @@ export * from './getValues';
|
|||
export * from './getWebsiteDateRange';
|
||||
export * from './getWebsiteStats';
|
||||
export * from './getWeeklyTraffic';
|
||||
export * from './identity/createIdentityLink';
|
||||
export * from './identity/getLinkedVisitorIds';
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ export async function createSession(data: Prisma.SessionCreateInput) {
|
|||
region,
|
||||
city,
|
||||
distinct_id,
|
||||
visitor_id,
|
||||
created_at
|
||||
)
|
||||
values (
|
||||
|
|
@ -34,6 +35,7 @@ export async function createSession(data: Prisma.SessionCreateInput) {
|
|||
{{region}},
|
||||
{{city}},
|
||||
{{distinctId}},
|
||||
{{visitorId}},
|
||||
{{createdAt}}
|
||||
)
|
||||
on conflict (session_id) do nothing
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
const excludeHash = attr(_data + 'exclude-hash') === _true;
|
||||
const domain = attr(_data + 'domains') || '';
|
||||
const credentials = attr(_data + 'fetch-credentials') || 'omit';
|
||||
const identityStitching = attr(_data + 'identity-stitching') !== _false;
|
||||
|
||||
const domains = domain.split(',').map(n => n.trim());
|
||||
const host =
|
||||
|
|
@ -41,6 +42,28 @@
|
|||
|
||||
/* Helper functions */
|
||||
|
||||
const generateUUID = () =>
|
||||
'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, c => {
|
||||
const r = (Math.random() * 16) | 0;
|
||||
return (c === 'x' ? r : (r & 0x3) | 0x8).toString(16);
|
||||
});
|
||||
|
||||
const getVisitorId = () => {
|
||||
if (!identityStitching || !localStorage) return undefined;
|
||||
|
||||
const storageKey = 'umami.visitor';
|
||||
let vid = localStorage.getItem(storageKey);
|
||||
|
||||
if (!vid) {
|
||||
vid = typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : generateUUID();
|
||||
localStorage.setItem(storageKey, vid);
|
||||
}
|
||||
|
||||
return vid;
|
||||
};
|
||||
|
||||
const visitorId = getVisitorId();
|
||||
|
||||
const normalize = raw => {
|
||||
if (!raw) return raw;
|
||||
try {
|
||||
|
|
@ -63,6 +86,7 @@
|
|||
referrer: currentRef,
|
||||
tag,
|
||||
id: identity ? identity : undefined,
|
||||
vid: visitorId,
|
||||
});
|
||||
|
||||
const hasDoNotTrack = () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue