batching events

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-07-17 17:13:07 +02:00
committed by Carl-Gerhard Lindesvärd
parent 244aa3b0d3
commit 5e225b7ae6
58 changed files with 2204 additions and 583 deletions

View File

@@ -0,0 +1,212 @@
import { groupBy } from 'ramda';
import SuperJSON from 'superjson';
import { deepMergeObjects } from '@openpanel/common';
import { redis, redisPub } from '@openpanel/redis';
import { ch } from '../clickhouse-client';
import { transformEvent } from '../services/event.service';
import type {
IClickhouseEvent,
IServiceCreateEventPayload,
} from '../services/event.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
QueueItem,
} from './buffer';
import { RedisBuffer } from './buffer';
const sortOldestFirst = (
a: QueueItem<IClickhouseEvent>,
b: QueueItem<IClickhouseEvent>
) =>
new Date(a.event.created_at).getTime() -
new Date(b.event.created_at).getTime();
export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
constructor() {
super({
table: 'events',
redis,
});
}
public onInsert?: OnInsert<IClickhouseEvent> | undefined = (event) => {
redisPub.publish(
'event:received',
SuperJSON.stringify(transformEvent(event))
);
this.redis.setex(
`live:event:${event.project_id}:${event.profile_id}`,
'',
60 * 5
);
};
public onCompleted?: OnCompleted<IClickhouseEvent> | undefined = (
savedEvents
) => {
for (const event of savedEvents) {
redisPub.publish(
'event:saved',
SuperJSON.stringify(transformEvent(event))
);
}
return savedEvents.map((event) => event.id);
};
public processQueue: ProcessQueue<IClickhouseEvent> = async (queue) => {
const itemsToClickhouse = new Set<QueueItem<IClickhouseEvent>>();
const itemsToStalled = new Set<QueueItem<IClickhouseEvent>>();
// Sort data by created_at
// oldest first
queue.sort(sortOldestFirst);
// All events thats not a screen_view can be sent to clickhouse
// We only need screen_views since we want to calculate the duration of each screen
// To do this we need a minimum of 2 screen_views
queue
.filter(
(item) =>
item.event.name !== 'screen_view' || item.event.device === 'server'
)
.forEach((item) => {
// Find the last event with data and merge it with the current event
// We use profile_id here since this property can be set from backend as well
const lastEventWithData = queue
.slice(0, item.index)
.findLast((lastEvent) => {
return (
lastEvent.event.project_id === item.event.project_id &&
lastEvent.event.profile_id === item.event.profile_id &&
lastEvent.event.path !== ''
);
});
const event = deepMergeObjects<IClickhouseEvent>(
lastEventWithData?.event || {},
item.event
);
if (lastEventWithData) {
event.properties.__properties_from = lastEventWithData.event.id;
}
return itemsToClickhouse.add({
...item,
event,
});
});
// Group screen_view events by session_id
const grouped = groupBy(
(item) => item.event.session_id,
queue.filter(
(item) =>
item.event.name === 'screen_view' && item.event.device !== 'server'
)
);
// Iterate over each group
for (const [sessionId, screenViews] of Object.entries(grouped)) {
if (sessionId === '' || !sessionId) {
continue;
}
// If there is only one screen_view event we can send it back to redis since we can't calculate the duration
const hasSessionEnd = queue.find(
(item) =>
item.event.name === 'session_end' &&
item.event.session_id === sessionId
);
screenViews
?.slice()
.sort(sortOldestFirst)
.forEach((item, index) => {
const nextScreenView = screenViews[index + 1];
// if nextScreenView does not exists we can't calculate the duration (last event in session)
if (nextScreenView) {
const duration =
new Date(nextScreenView.event.created_at).getTime() -
new Date(item.event.created_at).getTime();
const event = {
...item.event,
duration,
};
event.properties.__duration_from = nextScreenView.event.id;
itemsToClickhouse.add({
...item,
event,
});
// push last event in session if we have a session_end event
} else if (hasSessionEnd) {
itemsToClickhouse.add(item);
}
});
} // for of end
// Check if we have any events that has been in the queue for more than 24 hour
// This should not theoretically happen but if it does we should move them to stalled
queue.forEach((item) => {
if (
!itemsToClickhouse.has(item) &&
new Date(item.event.created_at).getTime() <
new Date().getTime() - 1000 * 60 * 60 * 24
) {
itemsToStalled.add(item);
}
});
if (itemsToStalled.size > 0) {
const multi = this.redis.multi();
for (const item of itemsToStalled) {
multi.rpush(this.getKey('stalled'), JSON.stringify(item.event));
}
await multi.exec();
}
await ch.insert({
table: 'events',
values: Array.from(itemsToClickhouse).map((item) => item.event),
format: 'JSONEachRow',
});
return [
...Array.from(itemsToClickhouse).map((item) => item.index),
...Array.from(itemsToStalled).map((item) => item.index),
];
};
public findMany: FindMany<IClickhouseEvent, IServiceCreateEventPayload> =
async (callback) => {
return this.getQueue(-1)
.then((queue) => {
return queue
.filter(callback)
.map((item) => transformEvent(item.event));
})
.catch(() => {
return [];
});
};
public find: Find<IClickhouseEvent, IServiceCreateEventPayload> = async (
callback
) => {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformEvent(match.event) : null;
})
.catch(() => {
return null;
});
};
}