diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 53fba1fb7..e36ddf612 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -66,4 +66,203 @@ CREATE TABLE umami.session_data ) engine = MergeTree ORDER BY (website_id, session_id, data_key, created_at) - SETTINGS index_granularity = 8192; \ No newline at end of file + SETTINGS index_granularity = 8192; + +-- stats hourly +CREATE TABLE umami.website_event_stats_hourly +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfHour(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id) +TTL created_at + INTERVAL 10 DAY; + +CREATE MATERIALIZED VIEW umami.website_event_stats_hourly_mv +TO umami.website_event_stats_hourly +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfHour(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); + +-- stats daily +CREATE TABLE umami.website_event_stats_daily +( + website_id UUID, + session_id UUID, + visit_id UUID, + hostname LowCardinality(String), + browser LowCardinality(String), + os LowCardinality(String), + device LowCardinality(String), + country LowCardinality(String), + subdivision1 LowCardinality(String), + city String, + entry_url AggregateFunction(argMin, String, DateTime('UTC')), + exit_url AggregateFunction(argMax, String, DateTime('UTC')), + url_path SimpleAggregateFunction(groupArrayArray, Array(String)), + url_query SimpleAggregateFunction(groupArrayArray, Array(String)), + referrer_domain SimpleAggregateFunction(groupArrayArray, Array(String)), + page_title SimpleAggregateFunction(groupArrayArray, Array(String)), + event_type UInt32, + event_name SimpleAggregateFunction(groupArrayArray, Array(String)), + views SimpleAggregateFunction(sum, UInt64), + min_time SimpleAggregateFunction(min, DateTime('UTC')), + max_time SimpleAggregateFunction(max, DateTime('UTC')), + created_at Datetime('UTC') +) +ENGINE = AggregatingMergeTree +PARTITION BY toYYYYMM(created_at) +ORDER BY ( + website_id, + toStartOfDay(created_at), + cityHash64(visit_id), + visit_id +) +SAMPLE BY cityHash64(visit_id); + +CREATE MATERIALIZED VIEW umami.website_event_stats_daily_mv +TO umami.website_event_stats_daily +AS +SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + entry_url, + exit_url, + url_paths as url_path, + url_query, + referrer_domain, + page_title, + event_type, + event_name, + views, + min_time, + max_time, + timestamp as created_at +FROM (SELECT + website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + argMinState(url_path, created_at) entry_url, + argMaxState(url_path, created_at) exit_url, + arrayFilter(x -> x != '', groupArray(url_path)) as url_paths, + arrayFilter(x -> x != '', groupArray(url_query)) url_query, + arrayFilter(x -> x != '', groupArray(referrer_domain)) referrer_domain, + arrayFilter(x -> x != '', groupArray(page_title)) page_title, + event_type, + if(event_type = 2, groupArray(event_name), []) event_name, + sumIf(1, event_type = 1) views, + min(created_at) min_time, + max(created_at) max_time, + toStartOfDay(created_at) timestamp +FROM umami.website_event +GROUP BY website_id, + session_id, + visit_id, + hostname, + browser, + os, + device, + country, + subdivision1, + city, + event_type, + timestamp); \ No newline at end of file diff --git a/scripts/start-env.js b/scripts/start-env.js index a2f2b43fc..264c4e92b 100644 --- a/scripts/start-env.js +++ b/scripts/start-env.js @@ -2,6 +2,6 @@ require('dotenv').config(); const cli = require('next/dist/cli/next-start'); cli.nextStart({ - 'port': process.env.PORT || 3000, - 'hostname': process.env.HOSTNAME || '0.0.0.0' + port: process.env.PORT || 3000, + hostname: process.env.HOSTNAME || '0.0.0.0', }); diff --git a/src/lib/constants.ts b/src/lib/constants.ts index 661611bc9..aa1b3c0f2 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -58,8 +58,8 @@ export const SESSION_COLUMNS = [ export const FILTER_COLUMNS = { url: 'url_path', - entry: 'url_path', - exit: 'url_path', + entry: 'entry_url', + exit: 'exit_url', referrer: 'referrer_domain', host: 'hostname', title: 'page_title', diff --git a/src/pages/api/websites/[websiteId]/metrics.ts b/src/pages/api/websites/[websiteId]/metrics.ts index 3dac217b2..b37c38f74 100644 --- a/src/pages/api/websites/[websiteId]/metrics.ts +++ b/src/pages/api/websites/[websiteId]/metrics.ts @@ -64,7 +64,7 @@ export default async ( await useAuth(req, res); await useValidate(schema, req, res); - const { websiteId, type, limit, offset, search } = req.query; + const { websiteId, type, limit, offset, search, unit } = req.query; if (req.method === 'GET') { if (!(await canViewWebsite(req.auth, websiteId))) { @@ -89,7 +89,7 @@ export default async ( } if (SESSION_COLUMNS.includes(type)) { - const data = await getSessionMetrics(websiteId, type, filters, limit, offset); + const data = await getSessionMetrics(websiteId, type, filters, limit, offset, unit as string); if (type === 'language') { const combined = {}; @@ -111,7 +111,14 @@ export default async ( } if (EVENT_COLUMNS.includes(type)) { - const data = await getPageviewMetrics(websiteId, type, filters, limit, offset); + const data = await getPageviewMetrics( + websiteId, + type, + filters, + limit, + offset, + unit as string, + ); return ok(res, data); } diff --git a/src/pages/api/websites/[websiteId]/stats.ts b/src/pages/api/websites/[websiteId]/stats.ts index 0189627a5..1c684dbed 100644 --- a/src/pages/api/websites/[websiteId]/stats.ts +++ b/src/pages/api/websites/[websiteId]/stats.ts @@ -56,7 +56,7 @@ export default async ( await useAuth(req, res); await useValidate(schema, req, res); - const { websiteId, compare } = req.query; + const { websiteId, compare, unit } = req.query; if (req.method === 'GET') { if (!(await canViewWebsite(req.auth, websiteId))) { @@ -72,9 +72,13 @@ export default async ( const filters = getRequestFilters(req); - const metrics = await getWebsiteStats(websiteId, { ...filters, startDate, endDate }); + const metrics = await getWebsiteStats(websiteId, unit as string, { + ...filters, + startDate, + endDate, + }); - const prevPeriod = await getWebsiteStats(websiteId, { + const prevPeriod = await getWebsiteStats(websiteId, unit as string, { ...filters, startDate: compareStartDate, endDate: compareEndDate, diff --git a/src/queries/analytics/events/getEventMetrics.ts b/src/queries/analytics/events/getEventMetrics.ts index 8efbf7694..c97c11ad2 100644 --- a/src/queries/analytics/events/getEventMetrics.ts +++ b/src/queries/analytics/events/getEventMetrics.ts @@ -51,17 +51,23 @@ async function clickhouseQuery( eventType: EVENT_TYPE.customEvent, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; + return rawQuery( ` select event_name x, ${getDateSQL('created_at', unit, timezone)} t, count(*) y - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} + from ( + select arrayJoin(event_name) as event_name, + created_at + from ${table} website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} ${filterQuery} + ) as g group by x, t order by t `, diff --git a/src/queries/analytics/getWebsiteStats.ts b/src/queries/analytics/getWebsiteStats.ts index 84ceaf1c1..125715ca6 100644 --- a/src/queries/analytics/getWebsiteStats.ts +++ b/src/queries/analytics/getWebsiteStats.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import clickhouse from 'lib/clickhouse'; import { EVENT_TYPE } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; @@ -5,7 +6,7 @@ import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; export async function getWebsiteStats( - ...args: [websiteId: string, filters: QueryFilters] + ...args: [websiteId: string, unit: string, filters: QueryFilters] ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] > { @@ -17,6 +18,7 @@ export async function getWebsiteStats( async function relationalQuery( websiteId: string, + unit: string, filters: QueryFilters, ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] @@ -57,6 +59,7 @@ async function relationalQuery( async function clickhouseQuery( websiteId: string, + unit: string, filters: QueryFilters, ): Promise< { pageviews: number; visitors: number; visits: number; bounces: number; totaltime: number }[] @@ -66,29 +69,21 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` select - sum(t.c) as "pageviews", - uniq(t.session_id) as "visitors", - uniq(t.visit_id) as "visits", - sum(if(t.c = 1, 1, 0)) as "bounces", + sum(views) as "pageviews", + uniq(session_id) as "visitors", + uniq(visit_id) as "visits", + sumIf(1, views = 1) as "bounces", sum(max_time-min_time) as "totaltime" - from ( - select - session_id, - visit_id, - count(*) c, - min(created_at) min_time, - max(created_at) max_time - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - ${filterQuery} - group by session_id, visit_id - ) as t; + from ${table} "website_event" + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${filterQuery}; `, params, ).then(result => { diff --git a/src/queries/analytics/pageviews/getPageviewMetrics.ts b/src/queries/analytics/pageviews/getPageviewMetrics.ts index b3ae633ab..d66ec6aa7 100644 --- a/src/queries/analytics/pageviews/getPageviewMetrics.ts +++ b/src/queries/analytics/pageviews/getPageviewMetrics.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import clickhouse from 'lib/clickhouse'; import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { CLICKHOUSE, PRISMA, runQuery } from 'lib/db'; @@ -5,7 +6,14 @@ import prisma from 'lib/prisma'; import { QueryFilters } from 'lib/types'; export async function getPageviewMetrics( - ...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number] + ...args: [ + websiteId: string, + type: string, + filters: QueryFilters, + limit?: number, + offset?: number, + unit?: string, + ] ) { return runQuery({ [PRISMA]: () => relationalQuery(...args), @@ -19,6 +27,7 @@ async function relationalQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ) { const column = FILTER_COLUMNS[type] || type; const { rawQuery, parseFilters } = prisma; @@ -82,6 +91,7 @@ async function clickhouseQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; const { rawQuery, parseFilters } = clickhouse; @@ -90,40 +100,42 @@ async function clickhouseQuery( eventType: column === 'event_name' ? EVENT_TYPE.customEvent : EVENT_TYPE.pageView, }); - let entryExitQuery = ''; let excludeDomain = ''; + let groupByQuery = ''; + if (column === 'referrer_domain') { - excludeDomain = `and referrer_domain != {websiteDomain:String} and referrer_domain != ''`; + excludeDomain = `and t != {websiteDomain:String} and t != ''`; + } + + let columnQuery = `arrayJoin(${column})`; + + if (type === 'entry') { + columnQuery = `visit_id x, argMinMerge(${column})`; + } + + if (type === 'exit') { + columnQuery = `visit_id x, argMaxMerge(${column})`; } if (type === 'entry' || type === 'exit') { - const aggregrate = type === 'entry' ? 'min' : 'max'; - - entryExitQuery = ` - join ( - select visit_id, - ${aggregrate}(created_at) target_created_at - from website_event - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - group by visit_id - ) x - on x.visit_id = website_event.visit_id - and x.target_created_at = website_event.created_at - `; + groupByQuery = 'group by x'; } + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; + return rawQuery( ` - select ${column} x, count(*) y - from website_event - ${entryExitQuery} - where website_id = {websiteId:UUID} - and created_at between {startDate:DateTime64} and {endDate:DateTime64} - and event_type = {eventType:UInt32} - ${excludeDomain} - ${filterQuery} + select g.t as x, + count(*) as y + from ( + select ${columnQuery} as t + from ${table} website_event + where website_id = {websiteId:UUID} + and created_at between {startDate:DateTime64} and {endDate:DateTime64} + and event_type = {eventType:UInt32} + ${excludeDomain} + ${filterQuery} + ${groupByQuery}) as g group by x order by y desc limit ${limit} diff --git a/src/queries/analytics/pageviews/getPageviewStats.ts b/src/queries/analytics/pageviews/getPageviewStats.ts index 65bc8625f..0bb16ca9b 100644 --- a/src/queries/analytics/pageviews/getPageviewStats.ts +++ b/src/queries/analytics/pageviews/getPageviewStats.ts @@ -46,6 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` @@ -55,8 +56,8 @@ async function clickhouseQuery( from ( select ${getDateSQL('created_at', unit, timezone)} as t, - count(*) as y - from website_event + sum(views) as y + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} diff --git a/src/queries/analytics/sessions/getSessionMetrics.ts b/src/queries/analytics/sessions/getSessionMetrics.ts index e522a7eff..93e36a55f 100644 --- a/src/queries/analytics/sessions/getSessionMetrics.ts +++ b/src/queries/analytics/sessions/getSessionMetrics.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-unused-vars, @typescript-eslint/no-unused-vars */ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; @@ -5,7 +6,14 @@ import { EVENT_TYPE, FILTER_COLUMNS, SESSION_COLUMNS } from 'lib/constants'; import { QueryFilters } from 'lib/types'; export async function getSessionMetrics( - ...args: [websiteId: string, type: string, filters: QueryFilters, limit?: number, offset?: number] + ...args: [ + websiteId: string, + type: string, + filters: QueryFilters, + limit?: number, + offset?: number, + unit?: string, + ] ) { return runQuery({ [PRISMA]: () => relationalQuery(...args), @@ -19,6 +27,7 @@ async function relationalQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ) { const column = FILTER_COLUMNS[type] || type; const { parseFilters, rawQuery } = prisma; @@ -62,6 +71,7 @@ async function clickhouseQuery( filters: QueryFilters, limit: number = 500, offset: number = 0, + unit: string, ): Promise<{ x: string; y: number }[]> { const column = FILTER_COLUMNS[type] || type; const { parseFilters, rawQuery } = clickhouse; @@ -70,6 +80,7 @@ async function clickhouseQuery( eventType: EVENT_TYPE.pageView, }); const includeCountry = column === 'city' || column === 'subdivision1'; + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` @@ -77,7 +88,7 @@ async function clickhouseQuery( ${column} x, uniq(session_id) y ${includeCountry ? ', country' : ''} - from website_event + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32} diff --git a/src/queries/analytics/sessions/getSessionStats.ts b/src/queries/analytics/sessions/getSessionStats.ts index 54c46c355..2b57b922b 100644 --- a/src/queries/analytics/sessions/getSessionStats.ts +++ b/src/queries/analytics/sessions/getSessionStats.ts @@ -46,6 +46,7 @@ async function clickhouseQuery( ...filters, eventType: EVENT_TYPE.pageView, }); + const table = unit === 'hour' ? 'website_event_stats_hourly' : 'website_event_stats_daily'; return rawQuery( ` @@ -55,8 +56,8 @@ async function clickhouseQuery( from ( select ${getDateSQL('created_at', unit, timezone)} as t, - count(distinct session_id) as y - from website_event + uniq(session_id) as y + from ${table} website_event where website_id = {websiteId:UUID} and created_at between {startDate:DateTime64} and {endDate:DateTime64} and event_type = {eventType:UInt32}