test(buffer): testing new buffer (only inserts, no processing)

This commit is contained in:
Carl-Gerhard Lindesvärd
2025-01-31 00:09:25 +00:00
parent 71bf22af51
commit a2b74a9b4d
5 changed files with 95 additions and 104 deletions

View File

@@ -1,13 +1,23 @@
import { TABLE_NAMES, ch } from '../clickhouse-client'; import { TABLE_NAMES, ch } from '../clickhouse-client';
import type { IClickhouseBotEvent } from '../services/event.service'; import type { IClickhouseBotEvent } from '../services/event.service';
import { BotBuffer as NewBotBuffer } from './bot-buffer-psql';
import { RedisBuffer } from './buffer'; import { RedisBuffer } from './buffer';
const testNewBotBuffer = new NewBotBuffer();
type BufferType = IClickhouseBotEvent; type BufferType = IClickhouseBotEvent;
export class BotBuffer extends RedisBuffer<BufferType> { export class BotBuffer extends RedisBuffer<BufferType> {
constructor() { constructor() {
super('events_bots', 500); super('events_bots', 500);
} }
async add(event: BufferType) {
await super.add(event);
if (process.env.TEST_NEW_BUFFER) {
await testNewBotBuffer.add(event);
}
}
protected async insertIntoDB(items: BufferType[]): Promise<void> { protected async insertIntoDB(items: BufferType[]): Promise<void> {
await ch.insert({ await ch.insert({
table: TABLE_NAMES.events_bots, table: TABLE_NAMES.events_bots,

View File

@@ -4,7 +4,7 @@ import { type ILogger as Logger, createLogger } from '@openpanel/logger';
import { getRedisCache, getRedisPub, runEvery } from '@openpanel/redis'; import { getRedisCache, getRedisPub, runEvery } from '@openpanel/redis';
import { Prisma } from '@prisma/client'; import { Prisma } from '@prisma/client';
import { ch } from '../clickhouse-client'; import { ch } from '../clickhouse-client';
import { db } from '../prisma-client'; import { type EventBuffer as IPrismaEventBuffer, db } from '../prisma-client';
import { import {
type IClickhouseEvent, type IClickhouseEvent,
type IServiceEvent, type IServiceEvent,
@@ -17,6 +17,7 @@ export class EventBuffer {
private lockKey = `lock:${this.name}`; private lockKey = `lock:${this.name}`;
private lockTimeout = 60; private lockTimeout = 60;
private daysToKeep = 2; private daysToKeep = 2;
private batchSize = 1000;
constructor() { constructor() {
this.logger = createLogger({ name: this.name }); this.logger = createLogger({ name: this.name });
@@ -35,16 +36,16 @@ export class EventBuffer {
}, },
}); });
this.publishEvent('event:received', event); // TODO: UNCOMMENT THIS!!!
// this.publishEvent('event:received', event);
if (event.profile_id) { // if (event.profile_id) {
getRedisCache().set( // getRedisCache().set(
`live:event:${event.project_id}:${event.profile_id}`, // `live:event:${event.project_id}:${event.profile_id}`,
'', // '',
'EX', // 'EX',
60 * 5, // 60 * 5,
); // );
} // }
} catch (error) { } catch (error) {
if (error instanceof Prisma.PrismaClientKnownRequestError) { if (error instanceof Prisma.PrismaClientKnownRequestError) {
if (error.code === 'P2002') { if (error.code === 'P2002') {
@@ -106,109 +107,78 @@ export class EventBuffer {
} }
async processBuffer() { async processBuffer() {
const eventsToProcess = await db.$transaction(async (trx) => { const eventsToProcess = await db.$queryRaw<IPrismaEventBuffer[]>`
// Process all screen_views that have a next event WITH has_2_special AS (
const processableViews = await trx.$queryRaw< SELECT "sessionId"
Array<{ FROM event_buffer
id: string; WHERE "processedAt" IS NULL
payload: IClickhouseEvent; AND name IN ('screen_view', 'session_start', 'session_end')
next_event_time: Date; GROUP BY "sessionId"
}> HAVING COUNT(*) >= 2
>` )
WITH NextEvents AS ( SELECT *
SELECT FROM event_buffer e
id, WHERE e."processedAt" IS NULL
payload, AND (
LEAD("createdAt") OVER ( -- 1) if the event name is NOT in the special set
PARTITION BY "sessionId" e.name NOT IN ('screen_view', 'session_start', 'session_end')
ORDER BY "createdAt" OR
) as next_event_time -- 2) if the event name IS in the special set AND
FROM event_buffer -- the session has >= 2 such unprocessed events
WHERE "name" = 'screen_view' (
AND "processedAt" IS NULL e.name IN ('screen_view', 'session_start', 'session_end')
AND e."sessionId" IN (SELECT "sessionId" FROM has_2_special)
) )
SELECT * )
FROM NextEvents ORDER BY e."createdAt" ASC -- or e.id, whichever "oldest first" logic you use
WHERE next_event_time IS NOT NULL LIMIT ${this.batchSize}
`; `;
// Find screen_views that are last in their session with session_end const toInsert = eventsToProcess.reduce<IPrismaEventBuffer[]>(
const lastViews = await trx.$queryRaw< (acc, event, index, list) => {
Array<{ if (event.name === 'screen_view') {
id: string; const nextScreenView = list.find(
payload: IClickhouseEvent; (e, eIndex) =>
}> (e.name === 'screen_view' || e.name === 'session_end') &&
>` e.sessionId === event.sessionId &&
WITH LastViews AS ( eIndex > index,
SELECT e.id, e.payload, );
EXISTS (
SELECT 1
FROM event_buffer se
WHERE se."name" = 'session_end'
AND se."sessionId" = e."sessionId"
AND se."createdAt" > e."createdAt"
) as has_session_end
FROM event_buffer e
WHERE e."name" = 'screen_view'
AND e."processedAt" IS NULL
AND NOT EXISTS (
SELECT 1
FROM event_buffer next
WHERE next."sessionId" = e."sessionId"
AND next."name" = 'screen_view'
AND next."createdAt" > e."createdAt"
)
)
SELECT * FROM LastViews
WHERE has_session_end = true
`;
// Get all other events if (nextScreenView && nextScreenView.name === 'screen_view') {
const regularEvents = await trx.eventBuffer.findMany({ event.payload.duration =
where: { new Date(nextScreenView.createdAt).getTime() -
processedAt: null, new Date(event.createdAt).getTime();
name: { not: 'screen_view' }, }
},
orderBy: { createdAt: 'asc' },
});
return { // if there is no more screen views nor session_end,
processableViews, // we don't want to insert this event into clickhouse
lastViews, if (!nextScreenView) {
regularEvents, return acc;
}; }
}); }
const toInsert = [ acc.push(event);
...eventsToProcess.processableViews.map((view) => ({
...view.payload, return acc;
duration: },
new Date(view.next_event_time).getTime() - [],
new Date(view.payload.created_at).getTime(), );
})),
...eventsToProcess.lastViews.map((v) => v.payload),
...eventsToProcess.regularEvents.map((e) => e.payload),
];
if (toInsert.length > 0) { if (toInsert.length > 0) {
await ch.insert({ await ch.insert({
table: 'events', table: 'events',
values: toInsert, values: toInsert.map((e) => e.payload),
format: 'JSONEachRow', format: 'JSONEachRow',
}); });
for (const event of toInsert) { for (const event of toInsert) {
this.publishEvent('event:saved', event); this.publishEvent('event:saved', event.payload);
} }
await db.eventBuffer.updateMany({ await db.eventBuffer.updateMany({
where: { where: {
id: { id: {
in: [ in: toInsert.map((e) => e.id),
...eventsToProcess.processableViews.map((v) => v.id),
...eventsToProcess.lastViews.map((v) => v.id),
...eventsToProcess.regularEvents.map((e) => e.id),
],
}, },
}, },
data: { data: {
@@ -218,10 +188,6 @@ export class EventBuffer {
this.logger.info('Processed events', { this.logger.info('Processed events', {
count: toInsert.length, count: toInsert.length,
screenViews:
eventsToProcess.processableViews.length +
eventsToProcess.lastViews.length,
regularEvents: eventsToProcess.regularEvents.length,
}); });
} }
} }

View File

@@ -16,9 +16,12 @@ import type {
} from '../services/event.service'; } from '../services/event.service';
import type { Find, FindMany } from './buffer'; import type { Find, FindMany } from './buffer';
import { RedisBuffer } from './buffer'; import { RedisBuffer } from './buffer';
import { EventBuffer as NewEventBuffer } from './event-buffer-psql';
const STALLED_QUEUE_TIMEOUT = 1000 * 60 * 60 * 24; const STALLED_QUEUE_TIMEOUT = 1000 * 60 * 60 * 24;
const testNewEventBuffer = new NewEventBuffer();
type BufferType = IClickhouseEvent; type BufferType = IClickhouseEvent;
export class EventBuffer extends RedisBuffer<BufferType> { export class EventBuffer extends RedisBuffer<BufferType> {
constructor() { constructor() {
@@ -57,6 +60,9 @@ export class EventBuffer extends RedisBuffer<BufferType> {
public async add(event: BufferType) { public async add(event: BufferType) {
await super.add(event); await super.add(event);
if (process.env.TEST_NEW_BUFFER) {
await testNewEventBuffer.add(event);
}
if (event.name === 'screen_view') { if (event.name === 'screen_view') {
await getRedisCache().set( await getRedisCache().set(
this.getLastEventKey({ this.getLastEventKey({

View File

@@ -1,6 +1,6 @@
import { BotBuffer } from './bot-buffer-psql'; import { BotBuffer } from './bot-buffer';
import { EventBuffer } from './event-buffer-psql'; import { EventBuffer } from './event-buffer';
import { ProfileBuffer } from './profile-buffer-psql'; import { ProfileBuffer } from './profile-buffer';
export const eventBuffer = new EventBuffer(); export const eventBuffer = new EventBuffer();
export const profileBuffer = new ProfileBuffer(); export const profileBuffer = new ProfileBuffer();

View File

@@ -17,17 +17,26 @@ import type {
} from '../services/profile.service'; } from '../services/profile.service';
import type { Find, FindMany } from './buffer'; import type { Find, FindMany } from './buffer';
import { RedisBuffer } from './buffer'; import { RedisBuffer } from './buffer';
import { ProfileBuffer as NewProfileBuffer } from './profile-buffer-psql';
const BATCH_SIZE = process.env.BATCH_SIZE_PROFILES const BATCH_SIZE = process.env.BATCH_SIZE_PROFILES
? Number.parseInt(process.env.BATCH_SIZE_PROFILES, 10) ? Number.parseInt(process.env.BATCH_SIZE_PROFILES, 10)
: 50; : 50;
const testNewProfileBuffer = new NewProfileBuffer();
type BufferType = IClickhouseProfile; type BufferType = IClickhouseProfile;
export class ProfileBuffer extends RedisBuffer<BufferType> { export class ProfileBuffer extends RedisBuffer<BufferType> {
constructor() { constructor() {
super('profiles', BATCH_SIZE); super('profiles', BATCH_SIZE);
} }
async add(profile: BufferType) {
await super.add(profile);
if (process.env.TEST_NEW_BUFFER) {
await testNewProfileBuffer.add(profile);
}
}
// this will do a couple of things: // this will do a couple of things:
// - we slice the queue to maxBufferSize since this queries have a limit on character count // - we slice the queue to maxBufferSize since this queries have a limit on character count
// - check redis cache for profiles // - check redis cache for profiles