prep events partition

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-07-19 22:08:22 +02:00
parent ddc2ce338f
commit 3993b493e3
27 changed files with 136 additions and 71 deletions

View File

@@ -34,6 +34,51 @@ CREATE TABLE IF NOT EXISTS openpanel.events (
ORDER BY
(project_id, created_at, profile_id) SETTINGS index_granularity = 8192;
CREATE TABLE IF NOT EXISTS openpanel.events_v2 (
`id` UUID DEFAULT generateUUIDv4(),
`name` String,
`device_id` String,
`profile_id` String,
`project_id` String,
`session_id` String,
`path` String,
`origin` String,
`referrer` String,
`referrer_name` String,
`referrer_type` String,
`duration` UInt64,
`properties` Map(String, String),
`created_at` DateTime64(3),
`country` String,
`city` String,
`region` String,
`longitude` Nullable(Float32),
`latitude` Nullable(Float32),
`os` String,
`os_version` String,
`browser` String,
`browser_version` String,
-- device: mobile/desktop/tablet
`device` String,
-- brand: (Samsung, OnePlus)
`brand` String,
-- model: (Samsung Galaxy, iPhone X)
`model` String
) ENGINE = MergeTree() PARTITION BY toYYYYMM(created_at)
ORDER BY
(project_id, created_at, profile_id) SETTINGS index_granularity = 8192;
ALTER TABLE
events DROP COLUMN utm_source,
DROP COLUMN utm_medium,
DROP COLUMN utm_campaign,
DROP COLUMN utm_term,
DROP COLUMN utm_content,
DROP COLUMN sdk,
DROP COLUMN sdk_version,
DROP COLUMN client_type,
DROP COLUMN continent;
CREATE TABLE IF NOT EXISTS openpanel.events_bots (
`id` UUID DEFAULT generateUUIDv4(),
`project_id` String,

View File

@@ -4,7 +4,7 @@ import SuperJSON from 'superjson';
import { deepMergeObjects } from '@openpanel/common';
import { redis, redisPub } from '@openpanel/redis';
import { ch } from '../clickhouse-client';
import { ch, TABLE_NAMES } from '../clickhouse-client';
import { transformEvent } from '../services/event.service';
import type {
IClickhouseEvent,
@@ -30,7 +30,7 @@ const sortOldestFirst = (
export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
constructor() {
super({
table: 'events',
table: TABLE_NAMES.events,
redis,
});
}
@@ -176,7 +176,7 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
}
await ch.insert({
table: 'events',
table: TABLE_NAMES.events,
values: Array.from(itemsToClickhouse).map((item) => item.event),
format: 'JSONEachRow',
});

View File

@@ -1,6 +1,11 @@
import type { ResponseJSON } from '@clickhouse/client';
import { createClient } from '@clickhouse/client';
export const TABLE_NAMES = {
events: 'events',
profiles: 'profiles',
};
export const originalCh = createClient({
url: process.env.CLICKHOUSE_URL,
username: process.env.CLICKHOUSE_USER,

View File

@@ -6,7 +6,7 @@ import type {
IGetChartDataInput,
} from '@openpanel/validation';
import { formatClickhouseDate } from '../clickhouse-client';
import { formatClickhouseDate, TABLE_NAMES } from '../clickhouse-client';
import { createSqlBuilder } from '../sql-builder';
export function getChartSql({
@@ -94,7 +94,7 @@ export function getChartSql({
if (event.segment === 'one_event_per_user') {
sb.from = `(
SELECT DISTINCT ON (profile_id) * from events WHERE ${join(
SELECT DISTINCT ON (profile_id) * from ${TABLE_NAMES.events} WHERE ${join(
sb.where,
' AND '
)}

View File

@@ -12,6 +12,7 @@ import {
chQuery,
convertClickhouseDateToJs,
formatClickhouseDate,
TABLE_NAMES,
} from '../clickhouse-client';
import type { EventMeta, Prisma } from '../prisma-client';
import { db } from '../prisma-client';
@@ -323,7 +324,7 @@ export async function getEventList({
sb.where.projectId = `project_id = ${escape(projectId)}`;
if (profileId) {
sb.where.deviceId = `device_id IN (SELECT device_id as did FROM events WHERE profile_id = ${escape(profileId)} group by did)`;
sb.where.deviceId = `device_id IN (SELECT device_id as did FROM ${TABLE_NAMES.events} WHERE profile_id = ${escape(profileId)} group by did)`;
}
if (startDate && endDate) {
@@ -448,7 +449,7 @@ export async function getLastScreenViewFromProfileId({
const [eventInDb] = profileId
? await getEvents(
`SELECT * FROM events WHERE name = 'screen_view' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)} AND created_at >= now() - INTERVAL 30 MINUTE ORDER BY created_at DESC LIMIT 1`
`SELECT * FROM ${TABLE_NAMES.events} WHERE name = 'screen_view' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)} AND created_at >= now() - INTERVAL 30 MINUTE ORDER BY created_at DESC LIMIT 1`
)
: [];

View File

@@ -4,7 +4,11 @@ import { toObject } from '@openpanel/common';
import type { IChartEventFilter } from '@openpanel/validation';
import { profileBuffer } from '../buffers';
import { chQuery, formatClickhouseDate } from '../clickhouse-client';
import {
chQuery,
formatClickhouseDate,
TABLE_NAMES,
} from '../clickhouse-client';
import { createSqlBuilder } from '../sql-builder';
export type IProfileMetrics = {
@@ -18,19 +22,19 @@ export type IProfileMetrics = {
export function getProfileMetrics(profileId: string, projectId: string) {
return chQuery<IProfileMetrics>(`
WITH lastSeen AS (
SELECT max(created_at) as lastSeen FROM events WHERE profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
SELECT max(created_at) as lastSeen FROM ${TABLE_NAMES.events} WHERE profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
),
firstSeen AS (
SELECT min(created_at) as firstSeen FROM events WHERE profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
SELECT min(created_at) as firstSeen FROM ${TABLE_NAMES.events} WHERE profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
),
screenViews AS (
SELECT count(*) as screenViews FROM events WHERE name = 'screen_view' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
SELECT count(*) as screenViews FROM ${TABLE_NAMES.events} WHERE name = 'screen_view' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
),
sessions AS (
SELECT count(*) as sessions FROM events WHERE name = 'session_start' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
SELECT count(*) as sessions FROM ${TABLE_NAMES.events} WHERE name = 'session_start' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
),
duration AS (
SELECT avg(duration) as durationAvg, quantilesExactInclusive(0.9)(duration)[1] as durationP90 FROM events WHERE name = 'session_end' AND duration != 0 AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
SELECT avg(duration) as durationAvg, quantilesExactInclusive(0.9)(duration)[1] as durationP90 FROM ${TABLE_NAMES.events} WHERE name = 'session_end' AND duration != 0 AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)}
)
SELECT lastSeen, firstSeen, screenViews, sessions, durationAvg, durationP90 FROM lastSeen, firstSeen, screenViews,sessions, duration
`).then((data) => data[0]!);

View File

@@ -1,6 +1,6 @@
import { escape } from 'sqlstring';
import { chQuery } from '../clickhouse-client';
import { chQuery, TABLE_NAMES } from '../clickhouse-client';
type IGetWeekRetentionInput = {
projectId: string;
@@ -15,7 +15,7 @@ WITH
SELECT
profile_id,
max(toWeek(created_at)) AS last_seen
FROM events
FROM ${TABLE_NAMES.events}
WHERE (project_id = ${escape(projectId)}) AND (profile_id != device_id)
GROUP BY profile_id
),
@@ -24,7 +24,7 @@ WITH
SELECT
profile_id,
min(toWeek(created_at)) AS first_seen
FROM events
FROM ${TABLE_NAMES.events}
WHERE (project_id = ${escape(projectId)}) AND (profile_id != device_id)
GROUP BY profile_id
),
@@ -79,8 +79,8 @@ export function getRetentionSeries({ projectId }: IGetWeekRetentionInput) {
countDistinct(events.profile_id) AS active_users,
countDistinct(future_events.profile_id) AS retained_users,
(100 * (countDistinct(future_events.profile_id) / CAST(countDistinct(events.profile_id), 'float'))) AS retention
FROM events
LEFT JOIN events AS future_events ON
FROM ${TABLE_NAMES.events} as events
LEFT JOIN ${TABLE_NAMES.events} AS future_events ON
events.profile_id = future_events.profile_id
AND toStartOfWeek(events.created_at) = toStartOfWeek(future_events.created_at - toIntervalWeek(1))
AND future_events.profile_id != future_events.device_id
@@ -140,7 +140,7 @@ export function getRetentionLastSeenSeries({
SELECT
max(created_at) AS last_active,
profile_id
FROM events
FROM ${TABLE_NAMES.events}
WHERE (project_id = ${escape(projectId)}) AND (device_id != profile_id)
GROUP BY profile_id
)

View File

@@ -1,3 +1,5 @@
import { TABLE_NAMES } from './clickhouse-client';
export interface SqlBuilderObject {
where: Record<string, string>;
having: Record<string, string>;
@@ -15,7 +17,7 @@ export function createSqlBuilder() {
const sb: SqlBuilderObject = {
where: {},
from: 'events',
from: TABLE_NAMES.events,
select: {},
groupBy: {},
orderBy: {},