From 0496062be0be30b779bb315547afd7b67e8ac765 Mon Sep 17 00:00:00 2001 From: Viet-Tien Ngoc Date: Mon, 5 Aug 2024 17:29:42 +0700 Subject: [PATCH 1/3] add schema --- .../migrations/05_add_event_data_blob.sql | 52 ++++++++++++++++++ db/clickhouse/schema.sql | 53 +++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 db/clickhouse/migrations/05_add_event_data_blob.sql diff --git a/db/clickhouse/migrations/05_add_event_data_blob.sql b/db/clickhouse/migrations/05_add_event_data_blob.sql new file mode 100644 index 000000000..37b6fb98d --- /dev/null +++ b/db/clickhouse/migrations/05_add_event_data_blob.sql @@ -0,0 +1,52 @@ +CREATE TABLE umami.event_data_blob +( + website_id UUID, + session_id UUID, + visit_id UUID, + event_id UUID, + blob1 String, + blob2 String, + blob3 String, + blob4 String, + blob5 String, + blob5 String, + blob7 String, + blob8 String, + blob9 String, + blob10 String, + blob11 String, + blob12 String, + blob13 String, + blob14 String, + blob15 String, + blob16 String, + blob17 String, + blob18 String, + blob19 String, + blob20 String, + double1 Nullable(Decimal64(4)), + double2 Nullable(Decimal64(4)), + double3 Nullable(Decimal64(4)), + double4 Nullable(Decimal64(4)), + double5 Nullable(Decimal64(4)), + double6 Nullable(Decimal64(4)), + double7 Nullable(Decimal64(4)), + double8 Nullable(Decimal64(4)), + double9 Nullable(Decimal64(4)), + double10 Nullable(Decimal64(4)), + double11 Nullable(Decimal64(4)), + double12 Nullable(Decimal64(4)), + double13 Nullable(Decimal64(4)), + double14 Nullable(Decimal64(4)), + double15 Nullable(Decimal64(4)), + double16 Nullable(Decimal64(4)), + double17 Nullable(Decimal64(4)), + double18 Nullable(Decimal64(4)), + double19 Nullable(Decimal64(4)), + double20 Nullable(Decimal64(4)), + created_at DateTime('UTC'), + job_id Nullable(UUID) +) + engine = MergeTree + ORDER BY (website_id, visit_id, event_id, created_at) + SETTINGS index_granularity = 8192; \ No newline at end of file diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index c1e525c13..9aa5b7f1e 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -69,4 +69,57 @@ CREATE TABLE umami.session_data ) engine = MergeTree ORDER BY (website_id, session_id, data_key, created_at) + SETTINGS index_granularity = 8192; + +CREATE TABLE umami.event_data_blob +( + website_id UUID, + session_id UUID, + visit_id UUID, + event_id UUID, + blob1 String, + blob2 String, + blob3 String, + blob4 String, + blob5 String, + blob5 String, + blob7 String, + blob8 String, + blob9 String, + blob10 String, + blob11 String, + blob12 String, + blob13 String, + blob14 String, + blob15 String, + blob16 String, + blob17 String, + blob18 String, + blob19 String, + blob20 String, + double1 Nullable(Decimal64(4)), + double2 Nullable(Decimal64(4)), + double3 Nullable(Decimal64(4)), + double4 Nullable(Decimal64(4)), + double5 Nullable(Decimal64(4)), + double6 Nullable(Decimal64(4)), + double7 Nullable(Decimal64(4)), + double8 Nullable(Decimal64(4)), + double9 Nullable(Decimal64(4)), + double10 Nullable(Decimal64(4)), + double11 Nullable(Decimal64(4)), + double12 Nullable(Decimal64(4)), + double13 Nullable(Decimal64(4)), + double14 Nullable(Decimal64(4)), + double15 Nullable(Decimal64(4)), + double16 Nullable(Decimal64(4)), + double17 Nullable(Decimal64(4)), + double18 Nullable(Decimal64(4)), + double19 Nullable(Decimal64(4)), + double20 Nullable(Decimal64(4)), + created_at DateTime('UTC'), + job_id Nullable(UUID) +) + engine = MergeTree + ORDER BY (website_id, visit_id, event_id, created_at) SETTINGS index_granularity = 8192; \ No newline at end of file From 891e7f1f5bf93558ea0851ff5f5c709ce6999d71 Mon Sep 17 00:00:00 2001 From: Viet-Tien Ngoc Date: Tue, 6 Aug 2024 10:49:03 +0700 Subject: [PATCH 2/3] save event data into both table --- src/lib/constants.ts | 1 + src/lib/data.ts | 34 +++++++++++++++++++ .../analytics/eventData/saveEventData.ts | 21 ++++++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/lib/constants.ts b/src/lib/constants.ts index 35917802d..406f119ba 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -143,6 +143,7 @@ export const REPORT_PARAMETERS = { export const KAFKA_TOPIC = { event: 'event', eventData: 'event_data', + eventDataBlob: 'event_data_blob', } as const; export const ROLES = { diff --git a/src/lib/data.ts b/src/lib/data.ts index cf2722b5c..68e8bc758 100644 --- a/src/lib/data.ts +++ b/src/lib/data.ts @@ -92,3 +92,37 @@ function getKeyName(key: string, parentKey: string) { export function objectToArray(obj: object) { return Object.keys(obj).map(key => obj[key]); } + +export function flattenDynamicData( + data: { key: string; value: any; dataType: DynamicDataType }[], +): { + blobs: string[]; + doubles: number[]; +} { + const blobs: string[] = []; + const doubles: number[] = []; + + data.forEach(({ value, dataType }) => { + switch (dataType) { + case DATA_TYPE.string: + blobs.push(value); + break; + case DATA_TYPE.number: + doubles.push(value); + break; + case DATA_TYPE.date: + doubles.push(new Date(value).getTime()); + break; + case DATA_TYPE.boolean: + doubles.push(value ? 1 : 0); + break; + case DATA_TYPE.array: + blobs.push(JSON.stringify(value)); + break; + default: + break; + } + }); + + return { blobs, doubles }; +} diff --git a/src/queries/analytics/eventData/saveEventData.ts b/src/queries/analytics/eventData/saveEventData.ts index 0cbc750e7..e18021c9f 100644 --- a/src/queries/analytics/eventData/saveEventData.ts +++ b/src/queries/analytics/eventData/saveEventData.ts @@ -2,7 +2,7 @@ import { Prisma } from '@prisma/client'; import { DATA_TYPE } from 'lib/constants'; import { uuid } from 'lib/crypto'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; -import { flattenJSON, getStringValue } from 'lib/data'; +import { flattenDynamicData, flattenJSON, getStringValue } from 'lib/data'; import kafka from 'lib/kafka'; import prisma from 'lib/prisma'; import { DynamicData } from 'lib/types'; @@ -61,7 +61,7 @@ async function clickhouseQuery(data: { }) { const { websiteId, sessionId, visitId, eventId, urlPath, eventName, eventData, createdAt } = data; - const { getDateFormat, sendMessages } = kafka; + const { getDateFormat, sendMessages, sendMessage } = kafka; const jsonKeys = flattenJSON(eventData); @@ -84,5 +84,22 @@ async function clickhouseQuery(data: { await sendMessages(messages, 'event_data'); + const jsonBlobs = flattenDynamicData(jsonKeys); + const message: { [key: string]: string | number } = { + website_id: websiteId, + session_id: sessionId, + event_id: eventId, + visitId: visitId, + event_name: eventName, + }; + jsonBlobs.blobs.forEach((blob, i) => { + message[`blob${i + 1}`] = blob; + }); + jsonBlobs.doubles.forEach((double, i) => { + message[`double${i + 1}`] = double; + }); + + await sendMessage(message, 'event_data_blob'); + return data; } From db93cbf8341b7e2cb9acb2753dd1e45893b3df85 Mon Sep 17 00:00:00 2001 From: Viet-Tien Ngoc Date: Tue, 6 Aug 2024 10:52:28 +0700 Subject: [PATCH 3/3] update limit --- src/queries/analytics/eventData/saveEventData.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/queries/analytics/eventData/saveEventData.ts b/src/queries/analytics/eventData/saveEventData.ts index e18021c9f..482a6ee96 100644 --- a/src/queries/analytics/eventData/saveEventData.ts +++ b/src/queries/analytics/eventData/saveEventData.ts @@ -93,9 +93,11 @@ async function clickhouseQuery(data: { event_name: eventName, }; jsonBlobs.blobs.forEach((blob, i) => { + if (i >= 20) return; // 20 is the max number of blobs message[`blob${i + 1}`] = blob; }); jsonBlobs.doubles.forEach((double, i) => { + if (i >= 20) return; // 20 is the max number of doubles message[`double${i + 1}`] = double; });