From a2a53cf9f70c426a3dcb4f2bbf757c232ec3af13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carl-Gerhard=20Lindesva=CC=88rd?= Date: Mon, 15 Dec 2025 22:10:21 +0100 Subject: [PATCH] fix: handle revenue better on overview (and remove it from top pages) --- packages/db/src/services/overview.service.ts | 626 +++++++++++-------- 1 file changed, 362 insertions(+), 264 deletions(-) diff --git a/packages/db/src/services/overview.service.ts b/packages/db/src/services/overview.service.ts index d5cb2e4f..f74895e3 100644 --- a/packages/db/src/services/overview.service.ts +++ b/packages/db/src/services/overview.service.ts @@ -7,6 +7,28 @@ import { TABLE_NAMES, ch } from '../clickhouse/client'; import { clix } from '../clickhouse/query-builder'; import { getEventFiltersWhereClause } from './chart.service'; +// Constants +const ROLLUP_DATE_PREFIX = '1970-01-01'; + +const COLUMN_PREFIX_MAP: Record = { + region: 'country', + city: 'country', + browser_version: 'browser', + os_version: 'os', +}; + +// Types +type MetricsRow = { + bounce_rate: number; + unique_visitors: number; + total_sessions: number; + avg_session_duration: number; + total_screen_views: number; + views_per_session: number; +}; + +type MetricsSeriesRow = MetricsRow & { date: string; total_revenue: number }; + export const zGetMetricsInput = z.object({ projectId: z.string(), filters: z.array(z.any()), @@ -85,11 +107,112 @@ export type IGetTopGenericInput = z.infer & { export class OverviewService { constructor(private client: typeof ch) {} + // Helper methods + private isRollupRow(date: string): boolean { + return date.startsWith(ROLLUP_DATE_PREFIX); + } + + private getFillConfig(interval: string, startDate: string, endDate: string) { + const useDateOnly = ['month', 'week'].includes(interval); + return { + from: clix.toStartOf( + clix.datetime(startDate, useDateOnly ? 'toDate' : 'toDateTime'), + interval as any, + ), + to: clix.datetime(endDate, useDateOnly ? 'toDate' : 'toDateTime'), + step: clix.toInterval('1', interval as any), + }; + } + + private createRevenueQuery({ + projectId, + startDate, + endDate, + interval, + timezone, + filters, + }: { + projectId: string; + startDate: string; + endDate: string; + interval: string; + timezone: string; + filters: IChartEventFilter[]; + }) { + return clix(this.client, timezone) + .select<{ date: string; total_revenue: number }>([ + `${clix.toStartOf('created_at', interval as any, timezone)} AS date`, + 'sum(revenue) AS total_revenue', + ]) + .from(TABLE_NAMES.events) + .where('project_id', '=', projectId) + .where('name', '=', 'revenue') + .where('revenue', '>', 0) + .where('created_at', 'BETWEEN', [ + clix.datetime(startDate, 'toDateTime'), + clix.datetime(endDate, 'toDateTime'), + ]) + .rawWhere(this.getRawWhereClause('events', filters)) + .groupBy(['date']) + .rollup() + .transform({ + date: (item) => new Date(item.date).toISOString(), + }); + } + + private mergeRevenueIntoSeries( + series: T[], + revenueData: { date: string; total_revenue: number }[], + ): (T & { total_revenue: number })[] { + const revenueByDate = new Map( + revenueData + .filter((r) => !this.isRollupRow(r.date)) + .map((r) => [r.date, r.total_revenue]), + ); + return series.map((row) => ({ + ...row, + total_revenue: revenueByDate.get(row.date) ?? 0, + })); + } + + private getOverallRevenue( + revenueData: { date: string; total_revenue: number }[], + ): number { + return ( + revenueData.find((r) => this.isRollupRow(r.date))?.total_revenue ?? 0 + ); + } + + private withDistinctSessionsIfNeeded( + query: ReturnType, + params: { + filters: IChartEventFilter[]; + projectId: string; + startDate: string; + endDate: string; + timezone: string; + }, + ): ReturnType { + if (!this.isPageFilter(params.filters)) { + query.rawWhere(this.getRawWhereClause('sessions', params.filters)); + return query; + } + + return clix(this.client, params.timezone) + .with('distinct_sessions', this.getDistinctSessions(params)) + .merge(query) + .where( + 'id', + 'IN', + clix.exp('(SELECT session_id FROM distinct_sessions)'), + ); + } + isPageFilter(filters: IChartEventFilter[]) { return filters.some((filter) => filter.name === 'path' && filter.value); } - getMetrics({ + async getMetrics({ projectId, filters, startDate, @@ -117,169 +240,41 @@ export class OverviewService { total_revenue: number; }[]; }> { - const where = this.getRawWhereClause('sessions', filters); - if (this.isPageFilter(filters)) { - // Session aggregation with bounce rates - const sessionAggQuery = clix(this.client, timezone) - .select([ - `${clix.toStartOf('created_at', interval, timezone)} AS date`, - 'round((countIf(is_bounce = 1 AND sign = 1) * 100.) / countIf(sign = 1), 2) AS bounce_rate', - 'sum(revenue * sign) AS total_revenue', - ]) - .from(TABLE_NAMES.sessions, true) - .where('sign', '=', 1) - .where('project_id', '=', projectId) - .where('created_at', 'BETWEEN', [ - clix.datetime(startDate, 'toDateTime'), - clix.datetime(endDate, 'toDateTime'), - ]) - .rawWhere(where) - .groupBy(['date']) - .rollup() - .orderBy('date', 'ASC'); - - // Overall unique visitors - const overallUniqueVisitorsQuery = clix(this.client, timezone) - .select([ - 'uniq(profile_id) AS unique_visitors', - 'uniq(session_id) AS total_sessions', - ]) - .from(TABLE_NAMES.events) - .where('project_id', '=', projectId) - .where('name', '=', 'screen_view') - .where('created_at', 'BETWEEN', [ - clix.datetime(startDate, 'toDateTime'), - clix.datetime(endDate, 'toDateTime'), - ]) - .rawWhere(this.getRawWhereClause('events', filters)); - - // Use toDate for month/week intervals, toDateTime for others - const rollupDate = - interval === 'month' || interval === 'week' - ? clix.date('1970-01-01') - : clix.datetime('1970-01-01 00:00:00'); - - return clix(this.client, timezone) - .with('session_agg', sessionAggQuery) - .with( - 'overall_bounce_rate', - clix(this.client, timezone) - .select(['bounce_rate']) - .from('session_agg') - .where('date', '=', rollupDate), - ) - .with( - 'overall_total_revenue', - clix(this.client, timezone) - .select(['total_revenue']) - .from('session_agg') - .where('date', '=', rollupDate), - ) - .with( - 'daily_stats', - clix(this.client, timezone) - .select(['date', 'bounce_rate', 'total_revenue']) - .from('session_agg') - .where('date', '!=', rollupDate), - ) - .with('overall_unique_visitors', overallUniqueVisitorsQuery) - .select<{ - date: string; - bounce_rate: number; - unique_visitors: number; - total_sessions: number; - avg_session_duration: number; - total_screen_views: number; - views_per_session: number; - total_revenue: number; - overall_unique_visitors: number; - overall_total_sessions: number; - overall_bounce_rate: number; - overall_total_revenue: number; - }>([ - `${clix.toStartOf('e.created_at', interval)} AS date`, - 'ds.bounce_rate as bounce_rate', - 'uniq(e.profile_id) AS unique_visitors', - 'uniq(e.session_id) AS total_sessions', - 'round(avgIf(duration, duration > 0), 2) / 1000 AS _avg_session_duration', - 'if(isNaN(_avg_session_duration), 0, _avg_session_duration) AS avg_session_duration', - 'count(*) AS total_screen_views', - 'round((count(*) * 1.) / uniq(e.session_id), 2) AS views_per_session', - 'ds.total_revenue AS total_revenue', - '(SELECT unique_visitors FROM overall_unique_visitors) AS overall_unique_visitors', - '(SELECT total_sessions FROM overall_unique_visitors) AS overall_total_sessions', - '(SELECT bounce_rate FROM overall_bounce_rate) AS overall_bounce_rate', - '(SELECT total_revenue FROM overall_total_revenue) AS overall_total_revenue', - ]) - .from(`${TABLE_NAMES.events} AS e`) - .leftJoin( - 'daily_stats AS ds', - `${clix.toStartOf('e.created_at', interval)} = ds.date`, - ) - .where('e.project_id', '=', projectId) - .where('e.name', '=', 'screen_view') - .where('e.created_at', 'BETWEEN', [ - clix.datetime(startDate, 'toDateTime'), - clix.datetime(endDate, 'toDateTime'), - ]) - .rawWhere(this.getRawWhereClause('events', filters)) - .groupBy(['date', 'ds.bounce_rate', 'ds.total_revenue']) - .orderBy('date', 'ASC') - .fill( - clix.toStartOf( - clix.datetime( - startDate, - ['month', 'week'].includes(interval) ? 'toDate' : 'toDateTime', - ), - interval, - ), - clix.datetime( - endDate, - ['month', 'week'].includes(interval) ? 'toDate' : 'toDateTime', - ), - clix.toInterval('1', interval), - ) - .transform({ - date: (item) => new Date(item.date).toISOString(), + return this.isPageFilter(filters) + ? this.getMetricsWithPageFilter({ + projectId, + filters, + startDate, + endDate, + interval, + timezone, }) - .execute() - .then((res) => { - const anyRowWithData = res.find( - (item) => - item.overall_bounce_rate !== null || - item.overall_total_sessions !== null || - item.overall_unique_visitors !== null || - item.overall_total_revenue !== null, - ); - return { - metrics: { - bounce_rate: anyRowWithData?.overall_bounce_rate ?? 0, - unique_visitors: anyRowWithData?.overall_unique_visitors ?? 0, - total_sessions: anyRowWithData?.overall_total_sessions ?? 0, - avg_session_duration: average( - res.map((item) => item.avg_session_duration), - ), - total_screen_views: sum( - res.map((item) => item.total_screen_views), - ), - views_per_session: average( - res.map((item) => item.views_per_session), - ), - total_revenue: anyRowWithData?.overall_total_revenue ?? 0, - }, - series: res.map( - omit([ - 'overall_bounce_rate', - 'overall_unique_visitors', - 'overall_total_sessions', - 'overall_total_revenue', - ]), - ), - }; + : this.getMetricsFromSessions({ + projectId, + filters, + startDate, + endDate, + interval, + timezone, }); - } + } - const query = clix(this.client, timezone) + private async getMetricsFromSessions({ + projectId, + filters, + startDate, + endDate, + interval, + timezone, + }: IGetMetricsInput): Promise<{ + metrics: MetricsRow & { total_revenue: number }; + series: MetricsSeriesRow[]; + }> { + const where = this.getRawWhereClause('sessions', filters); + const fillConfig = this.getFillConfig(interval, startDate, endDate); + + // Session metrics query + const sessionQuery = clix(this.client, timezone) .select<{ date: string; bounce_rate: number; @@ -288,9 +283,8 @@ export class OverviewService { avg_session_duration: number; total_screen_views: number; views_per_session: number; - total_revenue: number; }>([ - `${clix.toStartOf('created_at', interval, timezone)} AS date`, + `${clix.toStartOf('created_at', interval as any, timezone)} AS date`, 'round(sum(sign * is_bounce) * 100.0 / sum(sign), 2) as bounce_rate', 'uniqIf(profile_id, sign > 0) AS unique_visitors', 'sum(sign) AS total_sessions', @@ -298,7 +292,6 @@ export class OverviewService { 'if(isNaN(_avg_session_duration), 0, _avg_session_duration) AS avg_session_duration', 'sum(sign * screen_view_count) AS total_screen_views', 'round(sum(sign * screen_view_count) * 1.0 / sum(sign), 2) AS views_per_session', - 'sum(revenue * sign) AS total_revenue', ]) .from('sessions') .where('created_at', 'BETWEEN', [ @@ -311,41 +304,200 @@ export class OverviewService { .having('sum(sign)', '>', 0) .rollup() .orderBy('date', 'ASC') - .fill( - clix.toStartOf( - clix.datetime( - startDate, - ['month', 'week'].includes(interval) ? 'toDate' : 'toDateTime', - ), - interval, - ), - clix.datetime( - endDate, - ['month', 'week'].includes(interval) ? 'toDate' : 'toDateTime', - ), - clix.toInterval('1', interval), - ) + .fill(fillConfig.from, fillConfig.to, fillConfig.step) .transform({ date: (item) => new Date(item.date).toISOString(), }); - return query.execute().then((res) => { - // First row is the rollup row containing the total values - return { - metrics: { - bounce_rate: res[0]?.bounce_rate ?? 0, - unique_visitors: res[0]?.unique_visitors ?? 0, - total_sessions: res[0]?.total_sessions ?? 0, - avg_session_duration: res[0]?.avg_session_duration ?? 0, - total_screen_views: res[0]?.total_screen_views ?? 0, - views_per_session: res[0]?.views_per_session ?? 0, - total_revenue: res[0]?.total_revenue ?? 0, - }, - series: res - .slice(1) - .map(omit(['overall_bounce_rate', 'overall_unique_visitors'])), - }; + // Revenue query + const revenueQuery = this.createRevenueQuery({ + projectId, + startDate, + endDate, + interval, + timezone, + filters, }); + + // Execute both queries in parallel and merge results + const [sessionRes, revenueRes] = await Promise.all([ + sessionQuery.execute(), + revenueQuery.execute(), + ]); + + const overallRevenue = this.getOverallRevenue(revenueRes); + const series = this.mergeRevenueIntoSeries(sessionRes.slice(1), revenueRes); + + return { + metrics: { + bounce_rate: sessionRes[0]?.bounce_rate ?? 0, + unique_visitors: sessionRes[0]?.unique_visitors ?? 0, + total_sessions: sessionRes[0]?.total_sessions ?? 0, + avg_session_duration: sessionRes[0]?.avg_session_duration ?? 0, + total_screen_views: sessionRes[0]?.total_screen_views ?? 0, + views_per_session: sessionRes[0]?.views_per_session ?? 0, + total_revenue: overallRevenue, + }, + series, + }; + } + + private async getMetricsWithPageFilter({ + projectId, + filters, + startDate, + endDate, + interval, + timezone, + }: IGetMetricsInput): Promise<{ + metrics: MetricsRow & { total_revenue: number }; + series: MetricsSeriesRow[]; + }> { + const where = this.getRawWhereClause('sessions', filters); + const fillConfig = this.getFillConfig(interval, startDate, endDate); + + // Session aggregation with bounce rates + const sessionAggQuery = clix(this.client, timezone) + .select([ + `${clix.toStartOf('created_at', interval as any, timezone)} AS date`, + 'round((countIf(is_bounce = 1 AND sign = 1) * 100.) / countIf(sign = 1), 2) AS bounce_rate', + ]) + .from(TABLE_NAMES.sessions, true) + .where('sign', '=', 1) + .where('project_id', '=', projectId) + .where('created_at', 'BETWEEN', [ + clix.datetime(startDate, 'toDateTime'), + clix.datetime(endDate, 'toDateTime'), + ]) + .rawWhere(where) + .groupBy(['date']) + .rollup() + .orderBy('date', 'ASC'); + + // Overall unique visitors + const overallUniqueVisitorsQuery = clix(this.client, timezone) + .select([ + 'uniq(profile_id) AS unique_visitors', + 'uniq(session_id) AS total_sessions', + ]) + .from(TABLE_NAMES.events) + .where('project_id', '=', projectId) + .where('name', '=', 'screen_view') + .where('created_at', 'BETWEEN', [ + clix.datetime(startDate, 'toDateTime'), + clix.datetime(endDate, 'toDateTime'), + ]) + .rawWhere(this.getRawWhereClause('events', filters)); + + // Use toDate for month/week intervals, toDateTime for others + const rollupDate = + interval === 'month' || interval === 'week' + ? clix.date(ROLLUP_DATE_PREFIX) + : clix.datetime(`${ROLLUP_DATE_PREFIX} 00:00:00`); + + // Main metrics query (without revenue) + const mainQuery = clix(this.client, timezone) + .with('session_agg', sessionAggQuery) + .with( + 'overall_bounce_rate', + clix(this.client, timezone) + .select(['bounce_rate']) + .from('session_agg') + .where('date', '=', rollupDate), + ) + .with( + 'daily_session_stats', + clix(this.client, timezone) + .select(['date', 'bounce_rate']) + .from('session_agg') + .where('date', '!=', rollupDate), + ) + .with('overall_unique_visitors', overallUniqueVisitorsQuery) + .select<{ + date: string; + bounce_rate: number; + unique_visitors: number; + total_sessions: number; + avg_session_duration: number; + total_screen_views: number; + views_per_session: number; + overall_unique_visitors: number; + overall_total_sessions: number; + overall_bounce_rate: number; + }>([ + `${clix.toStartOf('e.created_at', interval as any)} AS date`, + 'dss.bounce_rate as bounce_rate', + 'uniq(e.profile_id) AS unique_visitors', + 'uniq(e.session_id) AS total_sessions', + 'round(avgIf(duration, duration > 0), 2) / 1000 AS _avg_session_duration', + 'if(isNaN(_avg_session_duration), 0, _avg_session_duration) AS avg_session_duration', + 'count(*) AS total_screen_views', + 'round((count(*) * 1.) / uniq(e.session_id), 2) AS views_per_session', + '(SELECT unique_visitors FROM overall_unique_visitors) AS overall_unique_visitors', + '(SELECT total_sessions FROM overall_unique_visitors) AS overall_total_sessions', + '(SELECT bounce_rate FROM overall_bounce_rate) AS overall_bounce_rate', + ]) + .from(`${TABLE_NAMES.events} AS e`) + .leftJoin( + 'daily_session_stats AS dss', + `${clix.toStartOf('e.created_at', interval as any)} = dss.date`, + ) + .where('e.project_id', '=', projectId) + .where('e.name', '=', 'screen_view') + .where('e.created_at', 'BETWEEN', [ + clix.datetime(startDate, 'toDateTime'), + clix.datetime(endDate, 'toDateTime'), + ]) + .rawWhere(this.getRawWhereClause('events', filters)) + .groupBy(['date', 'dss.bounce_rate']) + .orderBy('date', 'ASC') + .fill(fillConfig.from, fillConfig.to, fillConfig.step) + .transform({ + date: (item) => new Date(item.date).toISOString(), + }); + + // Revenue query + const revenueQuery = this.createRevenueQuery({ + projectId, + startDate, + endDate, + interval, + timezone, + filters, + }); + + // Execute both queries in parallel and merge results + const [mainRes, revenueRes] = await Promise.all([ + mainQuery.execute(), + revenueQuery.execute(), + ]); + + const overallRevenue = this.getOverallRevenue(revenueRes); + const series = this.mergeRevenueIntoSeries(mainRes, revenueRes); + + const anyRowWithData = mainRes.find( + (item) => + item.overall_bounce_rate !== null || + item.overall_total_sessions !== null || + item.overall_unique_visitors !== null, + ); + + return { + metrics: { + bounce_rate: anyRowWithData?.overall_bounce_rate ?? 0, + unique_visitors: anyRowWithData?.overall_unique_visitors ?? 0, + total_sessions: anyRowWithData?.overall_total_sessions ?? 0, + avg_session_duration: average( + mainRes.map((item) => item.avg_session_duration), + ), + total_screen_views: sum(mainRes.map((item) => item.total_screen_views)), + views_per_session: average( + mainRes.map((item) => item.views_per_session), + ), + total_revenue: overallRevenue, + }, + series, + }; } getRawWhereClause(type: 'events' | 'sessions', filters: IChartEventFilter[]) { @@ -368,12 +520,6 @@ export class OverviewService { } return item; }), - // .filter((item) => { - // if (this.isPageFilter(filters) && type === 'sessions') { - // return item.name !== 'entry_path' && item.name !== 'entry_origin'; - // } - // return true; - // }), ); return Object.values(where).join(' AND '); @@ -414,7 +560,6 @@ export class OverviewService { 'entry_path', 'entry_origin', 'coalesce(round(countIf(is_bounce = 1 AND sign = 1) * 100.0 / countIf(sign = 1), 2), 0) as bounce_rate', - 'sum(revenue * sign) as revenue', ]) .from(TABLE_NAMES.sessions, true) .where('sign', '=', 1) @@ -446,7 +591,6 @@ export class OverviewService { 'p.avg_duration', 'p.count as sessions', 'b.bounce_rate', - 'coalesce(b.revenue, 0) as revenue', ]) .from('page_stats p', false) .leftJoin( @@ -469,16 +613,6 @@ export class OverviewService { limit = 10, timezone, }: IGetTopEntryExitInput) { - const where = this.getRawWhereClause('sessions', filters); - - const distinctSessionQuery = this.getDistinctSessions({ - projectId, - filters, - startDate, - endDate, - timezone, - }); - const offset = (cursor - 1) * limit; const query = clix(this.client, timezone) @@ -503,25 +637,19 @@ export class OverviewService { clix.datetime(startDate, 'toDateTime'), clix.datetime(endDate, 'toDateTime'), ]) - .rawWhere(where) .groupBy([`${mode}_origin`, `${mode}_path`]) .having('sum(sign)', '>', 0) .orderBy('sessions', 'DESC') .limit(limit) .offset(offset); - let mainQuery = query; - - if (this.isPageFilter(filters)) { - mainQuery = clix(this.client, timezone) - .with('distinct_sessions', distinctSessionQuery) - .merge(query) - .where( - 'id', - 'IN', - clix.exp('(SELECT session_id FROM distinct_sessions)'), - ); - } + const mainQuery = this.withDistinctSessionsIfNeeded(query, { + projectId, + filters, + startDate, + endDate, + timezone, + }); return mainQuery.execute(); } @@ -560,28 +688,7 @@ export class OverviewService { limit = 10, timezone, }: IGetTopGenericInput) { - const distinctSessionQuery = this.getDistinctSessions({ - projectId, - filters, - startDate, - endDate, - timezone, - }); - - const prefixColumn = (() => { - switch (column) { - case 'region': - return 'country'; - case 'city': - return 'country'; - case 'browser_version': - return 'browser'; - case 'os_version': - return 'os'; - } - return null; - })(); - + const prefixColumn = COLUMN_PREFIX_MAP[column] ?? null; const offset = (cursor - 1) * limit; const query = clix(this.client, timezone) @@ -612,24 +719,15 @@ export class OverviewService { .limit(limit) .offset(offset); - let mainQuery = query; + const mainQuery = this.withDistinctSessionsIfNeeded(query, { + projectId, + filters, + startDate, + endDate, + timezone, + }); - if (this.isPageFilter(filters)) { - mainQuery = clix(this.client, timezone) - .with('distinct_sessions', distinctSessionQuery) - .merge(query) - .where( - 'id', - 'IN', - clix.exp('(SELECT session_id FROM distinct_sessions)'), - ); - } else { - mainQuery.rawWhere(this.getRawWhereClause('sessions', filters)); - } - - const res = await mainQuery.execute(); - - return res; + return mainQuery.execute(); } }