feature(dashboard): add new retention chart type

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-10-15 20:40:24 +02:00
committed by Carl-Gerhard Lindesvärd
parent e2065da16e
commit f977c5454a
53 changed files with 1463 additions and 364 deletions

View File

@@ -32,6 +32,19 @@ export function getTimezoneFromDateString(_date: string) {
'-10:00': 'Pacific/Honolulu',
'-11:00': 'Pacific/Midway',
'-12:00': 'Pacific/Tarawa',
// Additional time zones
'+05:30': 'Asia/Kolkata',
'+05:45': 'Asia/Kathmandu',
'+08:45': 'Australia/Eucla',
'+09:30': 'Australia/Darwin',
'+10:30': 'Australia/Adelaide',
'+12:45': 'Pacific/Chatham',
'+13:00': 'Pacific/Apia',
'+14:00': 'Pacific/Kiritimati',
'-02:30': 'America/St_Johns',
'-03:30': 'America/St_Johns',
'-04:30': 'America/Caracas',
'-09:30': 'Pacific/Marquesas',
};
const defaultTimezone = 'UTC';

View File

@@ -5,10 +5,13 @@ export const round = (num: number, decimals = 2) => {
return Math.round((num + Number.EPSILON) * factor) / factor;
};
export const average = (arr: (number | null)[]) => {
export const average = (arr: (number | null)[], includeZero = false) => {
const filtered = arr.filter(
(n): n is number =>
isNumber(n) && !Number.isNaN(n) && Number.isFinite(n) && n !== 0,
isNumber(n) &&
!Number.isNaN(n) &&
Number.isFinite(n) &&
(includeZero || n !== 0),
);
const avg = filtered.reduce((p, c) => p + c, 0) / filtered.length;
return Number.isNaN(avg) ? 0 : avg;
@@ -17,10 +20,10 @@ export const average = (arr: (number | null)[]) => {
export const sum = (arr: (number | null | undefined)[]): number =>
round(arr.filter(isNumber).reduce((acc, item) => acc + item, 0));
export const min = (arr: (number | null)[]): number =>
export const min = (arr: (number | null | undefined)[]): number =>
Math.min(...arr.filter(isNumber));
export const max = (arr: (number | null)[]): number =>
export const max = (arr: (number | null | undefined)[]): number =>
Math.max(...arr.filter(isNumber));
export const isFloat = (n: number) => n % 1 !== 0;

View File

@@ -106,6 +106,7 @@ export const intervals = {
minute: 'minute',
day: 'day',
hour: 'hour',
week: 'week',
month: 'month',
} as const;

View File

@@ -0,0 +1,58 @@
-- +goose Up
-- +goose StatementBegin
CREATE MATERIALIZED VIEW cohort_events_mv ENGINE = AggregatingMergeTree()
ORDER BY (project_id, name, created_at, profile_id) POPULATE AS
SELECT project_id,
name,
toDate(created_at) AS created_at,
profile_id,
COUNT() AS event_count
FROM events_v2
WHERE profile_id != device_id
GROUP BY project_id,
name,
created_at,
profile_id;
-- +goose StatementEnd
-- +goose StatementBegin
CREATE MATERIALIZED VIEW distinct_event_names_mv ENGINE = AggregatingMergeTree()
ORDER BY (project_id, name, created_at) POPULATE AS
SELECT project_id,
name,
max(created_at) AS created_at,
count() AS event_count
FROM events_v2
GROUP BY project_id,
name;
-- +goose StatementEnd
-- +goose StatementBegin
CREATE MATERIALIZED VIEW event_property_values_mv ENGINE = AggregatingMergeTree()
ORDER BY (project_id, name, property_key, property_value) POPULATE AS
select project_id,
name,
key_value.keys as property_key,
key_value.values as property_value,
created_at
from (
SELECT project_id,
name,
untuple(arrayJoin(properties)) as key_value,
max(created_at) as created_at
from events_v2
group by project_id,
name,
key_value
)
where property_value != ''
and property_key != ''
and property_key NOT IN ('__duration_from', '__properties_from')
group by project_id,
name,
property_key,
property_value,
created_at;
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
SELECT 'down SQL query';
-- +goose StatementEnd

View File

@@ -0,0 +1,2 @@
-- AlterEnum
ALTER TYPE "ChartType" ADD VALUE 'retention';

View File

@@ -0,0 +1,2 @@
-- AlterEnum
ALTER TYPE "Interval" ADD VALUE 'week';

View File

@@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "reports" ADD COLUMN "criteria" TEXT;

View File

@@ -192,6 +192,7 @@ enum Interval {
day
month
minute
week
}
enum ChartType {
@@ -203,6 +204,7 @@ enum ChartType {
area
map
funnel
retention
}
model Dashboard {
@@ -243,6 +245,7 @@ model Report {
projectId String
project Project @relation(fields: [projectId], references: [id])
previous Boolean @default(false)
criteria String?
dashboardId String
dashboard Dashboard @relation(fields: [dashboardId], references: [id])

View File

@@ -14,6 +14,9 @@ export const TABLE_NAMES = {
self_hosting: 'self_hosting',
events_bots: 'events_bots',
dau_mv: 'dau_mv',
event_names_mv: 'distinct_event_names_mv',
event_property_values_mv: 'event_property_values_mv',
cohort_events_mv: 'cohort_events_mv',
};
export const originalCh = createClient({
@@ -129,6 +132,10 @@ export function formatClickhouseDate(
_date: Date | string,
skipTime = false,
): string {
if (typeof _date === 'string') {
return _date.slice(0, 19).replace('T', ' ');
}
const date = typeof _date === 'string' ? new Date(_date) : _date;
if (skipTime) {
return date.toISOString().split('T')[0]!;

View File

@@ -81,6 +81,10 @@ export function getChartSql({
sb.select.date = `toStartOfDay(toTimeZone(created_at, '${getTimezoneFromDateString(startDate)}')) as date`;
break;
}
case 'week': {
sb.select.date = `toStartOfWeek(toTimeZone(created_at, '${getTimezoneFromDateString(startDate)}')) as date`;
break;
}
case 'month': {
sb.select.date = `toStartOfMonth(toTimeZone(created_at, '${getTimezoneFromDateString(startDate)}')) as date`;
break;

View File

@@ -10,6 +10,7 @@ import type {
IChartLineType,
IChartProps,
IChartRange,
ICriteria,
} from '@openpanel/validation';
import { db } from '../prisma-client';
@@ -64,6 +65,7 @@ export function transformReport(
formula: report.formula ?? undefined,
metric: report.metric ?? 'sum',
unit: report.unit ?? undefined,
criteria: (report.criteria as ICriteria) ?? undefined,
};
}

View File

@@ -1,4 +1,4 @@
import { flatten, map, pipe, prop, sort, uniq } from 'ramda';
import { flatten, map, pipe, prop, range, sort, uniq } from 'ramda';
import { escape } from 'sqlstring';
import { z } from 'zod';
@@ -7,12 +7,23 @@ import {
chQuery,
createSqlBuilder,
db,
formatClickhouseDate,
getSelectPropertyKey,
toDate,
} from '@openpanel/db';
import { zChartInput, zRange, zTimeInterval } from '@openpanel/validation';
import {
zChartInput,
zCriteria,
zRange,
zTimeInterval,
} from '@openpanel/validation';
import { round } from '@openpanel/common';
import {
differenceInDays,
differenceInMonths,
differenceInWeeks,
formatISO,
} from 'date-fns';
import { getProjectAccessCached } from '../access';
import { TRPCAccessError } from '../errors';
import { createTRPCRouter, protectedProcedure, publicProcedure } from '../trpc';
@@ -24,21 +35,23 @@ import {
getFunnelStep,
} from './chart.helpers';
function utc(date: string | Date) {
if (typeof date === 'string') {
return date.replace('T', ' ').slice(0, 19);
}
return formatISO(date).replace('T', ' ').slice(0, 19);
}
export const chartRouter = createTRPCRouter({
events: protectedProcedure
.input(
z.object({
projectId: z.string(),
range: zRange,
interval: zTimeInterval,
startDate: z.string().nullish(),
endDate: z.string().nullish(),
}),
)
.query(async ({ input: { projectId, ...input } }) => {
const { startDate, endDate } = getChartStartEndDate(input);
.query(async ({ input: { projectId } }) => {
const events = await chQuery<{ name: string }>(
`SELECT DISTINCT name FROM ${TABLE_NAMES.events} WHERE project_id = ${escape(projectId)} AND ${toDate('created_at', input.interval)} BETWEEN ${toDate(formatClickhouseDate(startDate), input.interval)} AND ${toDate(formatClickhouseDate(endDate), input.interval)};`,
`SELECT DISTINCT name FROM ${TABLE_NAMES.event_names_mv} WHERE project_id = ${escape(projectId)}`,
);
return [
@@ -54,23 +67,22 @@ export const chartRouter = createTRPCRouter({
z.object({
event: z.string().optional(),
projectId: z.string(),
range: zRange,
interval: zTimeInterval,
startDate: z.string().nullish(),
endDate: z.string().nullish(),
}),
)
.query(async ({ input: { projectId, event, ...input } }) => {
const { startDate, endDate } = getChartStartEndDate(input);
const events = await chQuery<{ keys: string[] }>(
`SELECT distinct mapKeys(properties) as keys from ${TABLE_NAMES.events} where ${
event && event !== '*' ? `name = ${escape(event)} AND ` : ''
} project_id = ${escape(projectId)} AND
${toDate('created_at', input.interval)} BETWEEN ${toDate(formatClickhouseDate(startDate), input.interval)} AND ${toDate(formatClickhouseDate(endDate), input.interval)};`,
.query(async ({ input: { projectId, event } }) => {
const res = await chQuery<{ property_key: string; created_at: string }>(
`SELECT
distinct property_key,
max(created_at) as created_at
FROM ${TABLE_NAMES.event_property_values_mv}
WHERE project_id = ${escape(projectId)}
${event && event !== '*' ? `AND name = ${escape(event)}` : ''}
GROUP BY property_key
ORDER BY created_at DESC`,
);
const properties = events
.flatMap((event) => event.keys)
const properties = res
.map((item) => item.property_key)
.map((item) => item.replace(/\.([0-9]+)\./g, '.*.'))
.map((item) => item.replace(/\.([0-9]+)/g, '[*]'))
.map((item) => `properties.${item}`);
@@ -108,36 +120,55 @@ export const chartRouter = createTRPCRouter({
event: z.string(),
property: z.string(),
projectId: z.string(),
range: zRange,
interval: zTimeInterval,
startDate: z.string().nullish(),
endDate: z.string().nullish(),
}),
)
.query(async ({ input: { event, property, projectId, ...input } }) => {
const { startDate, endDate } = getChartStartEndDate(input);
if (property === 'has_profile') {
return {
values: ['true', 'false'],
};
}
const { sb, getSql } = createSqlBuilder();
sb.where.project_id = `project_id = ${escape(projectId)}`;
if (event !== '*') {
sb.where.event = `name = ${escape(event)}`;
const values: string[] = [];
if (property.startsWith('properties.')) {
const propertyKey = property.replace(/^properties\./, '');
const res = await chQuery<{
property_value: string;
created_at: string;
}>(
`SELECT
distinct property_value,
max(created_at) as created_at
FROM ${TABLE_NAMES.event_property_values_mv}
WHERE project_id = ${escape(projectId)}
AND property_key = ${escape(propertyKey)}
${event && event !== '*' ? `AND name = ${escape(event)}` : ''}
GROUP BY property_value
ORDER BY created_at DESC`,
);
values.push(...res.map((e) => e.property_value));
} else {
const { sb, getSql } = createSqlBuilder();
sb.where.project_id = `project_id = ${escape(projectId)}`;
if (event !== '*') {
sb.where.event = `name = ${escape(event)}`;
}
sb.select.values = `distinct ${getSelectPropertyKey(property)} as values`;
sb.where.date = `${toDate('created_at', 'month')} > now() - INTERVAL 6 MONTH`;
const events = await chQuery<{ values: string[] }>(getSql());
values.push(
...pipe(
(data: typeof events) => map(prop('values'), data),
flatten,
uniq,
sort((a, b) => a.length - b.length),
)(events),
);
}
sb.select.values = `distinct ${getSelectPropertyKey(property)} as values`;
sb.where.date = `${toDate('created_at', input.interval)} BETWEEN ${toDate(formatClickhouseDate(startDate), input.interval)} AND ${toDate(formatClickhouseDate(endDate), input.interval)};`;
const events = await chQuery<{ values: string[] }>(getSql());
const values = pipe(
(data: typeof events) => map(prop('values'), data),
flatten,
uniq,
sort((a, b) => a.length - b.length),
)(events);
return {
values,
@@ -204,4 +235,208 @@ export const chartRouter = createTRPCRouter({
return getChart(input);
}),
cohort: protectedProcedure
.input(
z.object({
projectId: z.string(),
firstEvent: z.array(z.string()).min(1),
secondEvent: z.array(z.string()).min(1),
criteria: zCriteria.default('on_or_after'),
startDate: z.string().nullish(),
endDate: z.string().nullish(),
interval: zTimeInterval.default('day'),
range: zRange,
}),
)
.query(async ({ input }) => {
const { projectId, firstEvent, secondEvent } = input;
const dates = getChartStartEndDate(input);
const diffInterval = {
minute: () => differenceInDays(dates.endDate, dates.startDate),
hour: () => differenceInDays(dates.endDate, dates.startDate),
day: () => differenceInDays(dates.endDate, dates.startDate),
week: () => differenceInWeeks(dates.endDate, dates.startDate),
month: () => differenceInMonths(dates.endDate, dates.startDate),
}[input.interval]();
const sqlInterval = {
minute: 'DAY',
hour: 'DAY',
day: 'DAY',
week: 'WEEK',
month: 'MONTH',
}[input.interval];
const sqlToStartOf = {
minute: 'toDate',
hour: 'toDate',
day: 'toDate',
week: 'toStartOfWeek',
month: 'toStartOfMonth',
}[input.interval];
const countCriteria = input.criteria === 'on_or_after' ? '>=' : '=';
const usersSelect = range(0, diffInterval + 1)
.map(
(index) =>
`groupUniqArrayIf(profile_id, x_after_cohort ${countCriteria} ${index}) AS interval_${index}_users`,
)
.join(',\n');
const countsSelect = range(0, diffInterval + 1)
.map(
(index) =>
`length(interval_${index}_users) AS interval_${index}_user_count`,
)
.join(',\n');
const whereEventNameIs = (event: string[]) => {
if (event.length === 1) {
return `name = ${escape(event[0])}`;
}
return `name IN (${event.map((e) => escape(e)).join(',')})`;
};
// const dropoffsSelect = range(1, diffInterval + 1)
// .map(
// (index) =>
// `arrayFilter(x -> NOT has(interval_${index}_users, x), interval_${index - 1}_users) AS interval_${index}_dropoffs`,
// )
// .join(',\n');
// const dropoffCountsSelect = range(1, diffInterval + 1)
// .map(
// (index) =>
// `length(interval_${index}_dropoffs) AS interval_${index}_dropoff_count`,
// )
// .join(',\n');
// SELECT
// project_id,
// profile_id AS userID,
// name,
// toDate(created_at) AS cohort_interval
// FROM events_v2
// WHERE profile_id != device_id
// AND ${whereEventNameIs(firstEvent)}
// AND project_id = ${escape(projectId)}
// AND created_at BETWEEN toDate('${utc(dates.startDate)}') AND toDate('${utc(dates.endDate)}')
// GROUP BY project_id, name, cohort_interval, userID
const cohortQuery = `
WITH
cohort_users AS (
SELECT
profile_id AS userID,
project_id,
${sqlToStartOf}(created_at) AS cohort_interval
FROM ${TABLE_NAMES.cohort_events_mv}
WHERE ${whereEventNameIs(firstEvent)}
AND project_id = ${escape(projectId)}
AND created_at BETWEEN toDate('${utc(dates.startDate)}') AND toDate('${utc(dates.endDate)}')
),
retention_matrix AS (
SELECT
c.cohort_interval,
e.profile_id,
dateDiff('${sqlInterval}', c.cohort_interval, ${sqlToStartOf}(e.created_at)) AS x_after_cohort
FROM cohort_users AS c
INNER JOIN ${TABLE_NAMES.cohort_events_mv} AS e ON c.userID = e.profile_id
WHERE (${whereEventNameIs(secondEvent)}) AND (e.project_id = ${escape(projectId)})
AND ((e.created_at >= c.cohort_interval) AND (e.created_at <= (c.cohort_interval + INTERVAL ${diffInterval} ${sqlInterval})))
),
interval_users AS (
SELECT
cohort_interval,
${usersSelect}
FROM retention_matrix
GROUP BY cohort_interval
),
cohort_sizes AS (
SELECT
cohort_interval,
COUNT(DISTINCT userID) AS total_first_event_count
FROM cohort_users
GROUP BY cohort_interval
)
SELECT
cohort_interval,
cohort_sizes.total_first_event_count,
${countsSelect}
FROM interval_users
LEFT JOIN cohort_sizes AS cs ON cohort_interval = cs.cohort_interval
ORDER BY cohort_interval ASC
`;
const cohortData = await chQuery<{
cohort_interval: string;
total_first_event_count: number;
[key: string]: any;
}>(cohortQuery);
return processCohortData(cohortData, diffInterval);
}),
});
function processCohortData(
data: Array<{
cohort_interval: string;
total_first_event_count: number;
[key: string]: any;
}>,
diffInterval: number,
) {
if (data.length === 0) {
return [];
}
const processed = data.map((row) => {
const sum = row.total_first_event_count;
const values = range(0, diffInterval + 1).map(
(index) => (row[`interval_${index}_user_count`] || 0) as number,
);
return {
cohort_interval: row.cohort_interval,
sum,
values: values,
percentages: values.map((value) =>
sum > 0 ? round((value / sum) * 100, 2) : 0,
),
};
});
// Initialize aggregation for averages
const averageData: {
sum: number;
values: Array<number>;
percentages: Array<number>;
} = {
sum: 0,
values: range(0, diffInterval + 1).map(() => 0),
percentages: range(0, diffInterval + 1).map(() => 0),
};
// Aggregate data for averages
processed.forEach((row) => {
averageData.sum += row.sum;
row.values.forEach((value, index) => {
averageData.values[index] += value;
averageData.percentages[index] += row.percentages[index]!;
});
});
const cohortCount = processed.length;
// Calculate average values
const averageRow = {
cohort_interval: 'Average',
sum: cohortCount > 0 ? round(averageData.sum / cohortCount, 0) : 0,
percentages: averageData.percentages.map((item) =>
round(item / cohortCount, 2),
),
values: averageData.values.map((item) => round(item / cohortCount, 0)),
};
return [averageRow, ...processed];
}

View File

@@ -44,6 +44,7 @@ export const reportRouter = createTRPCRouter({
range: report.range === 'custom' ? '30d' : report.range,
formula: report.formula,
previous: report.previous ?? false,
unit: report.unit,
},
});
}),
@@ -84,6 +85,7 @@ export const reportRouter = createTRPCRouter({
range: report.range === 'custom' ? '30d' : report.range,
formula: report.formula,
previous: report.previous ?? false,
unit: report.unit,
},
});
}),

View File

@@ -75,10 +75,13 @@ export const zChartInput = z.object({
offset: z.number().optional(),
});
export const zCriteria = z.enum(['on_or_after', 'on']);
export const zReportInput = zChartInput.extend({
name: z.string(),
lineType: zLineType,
unit: z.string().optional(),
criteria: zCriteria.optional(),
});
export const zInviteUser = z.object({

View File

@@ -5,6 +5,7 @@ import type {
zChartEvent,
zChartInput,
zChartType,
zCriteria,
zLineType,
zMetric,
zRange,
@@ -41,6 +42,7 @@ export type IGetChartDataInput = {
startDate: string;
endDate: string;
} & Omit<IChartInput, 'events' | 'name' | 'startDate' | 'endDate' | 'range'>;
export type ICriteria = z.infer<typeof zCriteria>;
export type PreviousValue =
| {