feature(worker+api): improve buffer

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-09-17 23:37:11 +02:00
parent 32e91959f6
commit 545cbfd520
18 changed files with 1484 additions and 432 deletions

View File

@@ -1,39 +1,18 @@
import { TABLE_NAMES, ch } from '../clickhouse-client';
import type { IClickhouseBotEvent } from '../services/event.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
} from './buffer';
import { RedisBuffer } from './buffer';
export class BotBuffer extends RedisBuffer<IClickhouseBotEvent> {
type BufferType = IClickhouseBotEvent;
export class BotBuffer extends RedisBuffer<BufferType> {
constructor() {
super({
table: TABLE_NAMES.events_bots,
batchSize: 100,
});
super(TABLE_NAMES.events, 500);
}
public onInsert?: OnInsert<IClickhouseBotEvent> | undefined;
public onCompleted?: OnCompleted<IClickhouseBotEvent> | undefined;
public processQueue: ProcessQueue<IClickhouseBotEvent> = async (queue) => {
protected async insertIntoDB(items: BufferType[]): Promise<void> {
await ch.insert({
table: TABLE_NAMES.events_bots,
values: queue.map((item) => item.event),
values: items,
format: 'JSONEachRow',
});
return queue.map((item) => item.index);
};
public findMany: FindMany<IClickhouseBotEvent, IClickhouseBotEvent> = () => {
return Promise.resolve([]);
};
public find: Find<IClickhouseBotEvent, IClickhouseBotEvent> = () => {
return Promise.resolve(null);
};
}
}

View File

@@ -1,181 +1,219 @@
import { v4 as uuidv4 } from 'uuid';
import type { ILogger } from '@openpanel/logger';
import { createLogger } from '@openpanel/logger';
import { getRedisCache } from '@openpanel/redis';
export const DELETE = '__DELETE__';
export type QueueItem<T> = {
event: T;
index: number;
};
export type OnInsert<T> = (data: T) => unknown;
export type OnCompleted<T> =
| ((data: T[]) => Promise<unknown[]>)
| ((data: T[]) => unknown[]);
export type ProcessQueue<T> = (data: QueueItem<T>[]) => Promise<number[]>;
export type Find<T, R = unknown> = (
callback: (item: QueueItem<T>) => boolean,
callback: (item: T) => boolean,
) => Promise<R | null>;
export type FindMany<T, R = unknown> = (
callback: (item: QueueItem<T>) => boolean,
callback: (item: T) => boolean,
) => Promise<R[]>;
const getError = (e: unknown) => {
if (e instanceof Error) {
return [
`Name: ${e.name}`,
`Message: ${e.message}`,
`Stack: ${e.stack}`,
`Cause: ${e.cause ? String(e.cause) : ''}`,
].join('\n');
}
return 'Unknown error';
};
export class RedisBuffer<T> {
protected prefix = 'op:buffer';
protected bufferKey: string;
private lockKey: string;
protected maxBufferSize: number | null;
protected logger: ILogger;
export abstract class RedisBuffer<T> {
// constructor
public prefix = 'op:buffer';
public table: string;
public batchSize?: number;
public logger: ReturnType<typeof createLogger>;
public disableAutoFlush?: boolean;
// abstract methods
public abstract onInsert?: OnInsert<T>;
public abstract onCompleted?: OnCompleted<T>;
public abstract processQueue: ProcessQueue<T>;
public abstract find: Find<T, unknown>;
public abstract findMany: FindMany<T, unknown>;
constructor(options: {
table: string;
batchSize?: number;
disableAutoFlush?: boolean;
}) {
this.table = options.table;
this.batchSize = options.batchSize;
this.disableAutoFlush = options.disableAutoFlush;
constructor(bufferName: string, maxBufferSize: number | null) {
this.bufferKey = bufferName;
this.lockKey = `lock:${bufferName}`;
this.maxBufferSize = maxBufferSize;
this.logger = createLogger({ name: 'buffer' }).child({
table: this.table,
buffer: bufferName,
});
}
public getKey(name?: string) {
const key = `${this.prefix}:${this.table}`;
protected getKey(name?: string) {
const key = `${this.prefix}:${this.bufferKey}`;
if (name) {
return `${key}:${name}`;
}
return key;
}
public async insert(value: T) {
this.onInsert?.(value);
await getRedisCache().rpush(this.getKey(), JSON.stringify(value));
async add(item: T): Promise<void> {
try {
this.onAdd(item);
await getRedisCache().rpush(this.getKey(), JSON.stringify(item));
const bufferSize = await getRedisCache().llen(this.getKey());
const length = await getRedisCache().llen(this.getKey());
this.logger.debug(
`Inserted item into buffer ${this.table}. Current length: ${length}`,
);
this.logger.debug(`Item added. Current size: ${bufferSize}`);
if (!this.disableAutoFlush && this.batchSize && length >= this.batchSize) {
this.logger.info(
`Buffer ${this.table} reached batch size (${this.batchSize}). Flushing...`,
);
this.flush();
if (this.maxBufferSize && bufferSize >= this.maxBufferSize) {
await this.tryFlush();
}
} catch (error) {
this.logger.error('Failed to add item to buffer', { error, item });
}
}
public async flush() {
try {
const queue = await this.getQueue(this.batchSize || -1);
if (queue.length === 0) {
this.logger.debug(`Flush called on empty buffer ${this.table}`);
return { count: 0, data: [] };
}
this.logger.info(
`Flushing ${queue.length} items from buffer ${this.table}`,
);
public async tryFlush(): Promise<void> {
const lockId = uuidv4();
const acquired = await getRedisCache().set(
this.lockKey,
lockId,
'EX',
8,
'NX',
);
if (acquired === 'OK') {
this.logger.debug('Lock acquired. Attempting to flush.');
try {
const indexes = await this.processQueue(queue);
await this.deleteIndexes(indexes);
const data = indexes
.map((index) => queue[index]?.event)
.filter((event): event is T => event !== null);
await this.flush();
} finally {
await this.releaseLock(lockId);
}
} else {
this.logger.debug('Failed to acquire lock for. Skipping flush.');
}
}
if (this.onCompleted) {
const res = await this.onCompleted(data);
this.logger.info(
`Completed processing ${res.length} items from buffer ${this.table}`,
);
return { count: res.length, data: res };
}
protected async waitForReleasedLock(
maxWaitTime = 8000,
checkInterval = 500,
): Promise<boolean> {
const startTime = performance.now();
this.logger.info(
`Processed ${indexes.length} items from buffer ${this.table}`,
);
return { count: indexes.length, data: indexes };
while (performance.now() - startTime < maxWaitTime) {
const lock = await getRedisCache().get(this.lockKey);
if (!lock) {
return true;
}
await new Promise((resolve) => setTimeout(resolve, checkInterval));
}
this.logger.warn('Timeout waiting for lock release');
return false;
}
private async retryOnce(cb: () => Promise<void>) {
try {
await cb();
} catch (e) {
this.logger.error(`#1 Failed to execute callback: ${cb.name}`, e);
await new Promise((resolve) => setTimeout(resolve, 1000));
try {
await cb();
} catch (e) {
this.logger.error(
`Failed to process queue while flushing buffer ${this.table}:`,
e,
);
const timestamp = new Date().getTime();
await getRedisCache().hset(this.getKey(`failed:${timestamp}`), {
error: getError(e),
data: JSON.stringify(queue.map((item) => item.event)),
retries: 0,
});
this.logger.warn(
`Stored ${queue.length} failed items in ${this.getKey(`failed:${timestamp}`)}`,
this.logger.error(`#2 Failed to execute callback: ${cb.name}`, e);
}
}
}
private async flush(): Promise<void> {
// Use a transaction to ensure atomicity
const result = await getRedisCache()
.multi()
.lrange(this.getKey(), 0, -1)
.del(this.getKey())
.exec();
if (!result) {
throw new Error('Redis transaction failed');
}
const lrange = result[0];
if (!lrange || lrange[0] instanceof Error) {
throw new Error('Redis transaction failed');
}
const items = lrange[1] as string[];
const parsedItems = items.map((item) => JSON.parse(item) as T);
if (parsedItems.length === 0) {
this.logger.debug('No items to flush');
return;
}
this.logger.info(`Flushing ${parsedItems.length} items`);
try {
const { toInsert, toKeep } = await this.processItems(parsedItems);
if (toInsert.length) {
await this.retryOnce(() => this.insertIntoDB(toInsert));
this.onInsert(toInsert);
}
// Add back items to keep
if (toKeep.length > 0) {
await getRedisCache().lpush(
this.getKey(),
...toKeep.map((item) => JSON.stringify(item)),
);
}
} catch (e) {
this.logger.error(
`Failed to get queue while flushing buffer ${this.table}:`,
e,
this.logger.info(
`Inserted ${toInsert.length} items into DB, kept ${toKeep.length} items in buffer`,
{
toInsert: toInsert.length,
toKeep: toKeep.length,
},
);
} catch (error) {
this.logger.error('Failed to process queue while flushing buffer}:', {
error,
queueSize: items.length,
});
if (items.length > 0) {
// Add back items to keep
this.logger.debug('Adding all items back to buffer');
await getRedisCache().lpush(
this.getKey(),
...items.map((item) => JSON.stringify(item)),
);
}
}
}
public async deleteIndexes(indexes: number[]) {
const multi = getRedisCache().multi();
indexes.forEach((index) => {
multi.lset(this.getKey(), index, DELETE);
});
multi.lrem(this.getKey(), 0, DELETE);
await multi.exec();
this.logger.debug(
`Deleted ${indexes.length} items from buffer ${this.table}`,
);
private async releaseLock(lockId: string): Promise<void> {
this.logger.debug(`Released lock for ${this.getKey()}`);
const script = `
if redis.call("get", KEYS[1]) == ARGV[1] then
return redis.call("del", KEYS[1])
else
return 0
end
`;
await getRedisCache().eval(script, 1, this.lockKey, lockId);
}
public async getQueue(limit: number): Promise<QueueItem<T>[]> {
const queue = await getRedisCache().lrange(this.getKey(), 0, limit);
const result = queue
.map((item, index) => ({
event: this.transformQueueItem(item),
index,
}))
.filter((item): item is QueueItem<T> => item.event !== null);
this.logger.debug(
`Retrieved ${result.length} items from buffer ${this.table}`,
);
return result;
protected async getQueue(count?: number): Promise<T[]> {
const items = await getRedisCache().lrange(this.getKey(), 0, count ?? -1);
return items.map((item) => JSON.parse(item) as T);
}
private transformQueueItem(item: string): T | null {
try {
return JSON.parse(item);
} catch (e) {
this.logger.warn(`Failed to parse item in buffer ${this.table}:`, e);
return null;
}
protected processItems(items: T[]): Promise<{ toInsert: T[]; toKeep: T[] }> {
return Promise.resolve({ toInsert: items, toKeep: [] });
}
protected insertIntoDB(_items: T[]): Promise<void> {
throw new Error('Not implemented');
}
protected onAdd(_item: T): void {
// Override in subclass
}
protected onInsert(_item: T[]): void {
// Override in subclass
}
public findMany: FindMany<T, unknown> = () => {
return Promise.resolve([]);
};
public find: Find<T, unknown> = () => {
return Promise.resolve(null);
};
}

View File

@@ -10,31 +10,16 @@ import type {
IClickhouseEvent,
IServiceEvent,
} from '../services/event.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
QueueItem,
} from './buffer';
import type { Find, FindMany } from './buffer';
import { RedisBuffer } from './buffer';
const sortOldestFirst = (
a: QueueItem<IClickhouseEvent>,
b: QueueItem<IClickhouseEvent>,
) =>
new Date(a.event.created_at).getTime() -
new Date(b.event.created_at).getTime();
export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
type BufferType = IClickhouseEvent;
export class EventBuffer extends RedisBuffer<BufferType> {
constructor() {
super({
table: TABLE_NAMES.events,
});
super(TABLE_NAMES.events, null);
}
public onInsert?: OnInsert<IClickhouseEvent> | undefined = (event) => {
public onAdd(event: BufferType) {
getRedisPub().publish(
'event:received',
SuperJSON.stringify(transformEvent(event)),
@@ -47,24 +32,22 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
60 * 5,
);
}
};
}
public onCompleted?: OnCompleted<IClickhouseEvent> | undefined = (
savedEvents,
) => {
for (const event of savedEvents) {
public onInsert(items: BufferType[]) {
for (const event of items) {
getRedisPub().publish(
'event:saved',
SuperJSON.stringify(transformEvent(event)),
);
}
}
return savedEvents.map((event) => event.id);
};
public processQueue: ProcessQueue<IClickhouseEvent> = async (queue) => {
const itemsToClickhouse = new Set<QueueItem<IClickhouseEvent>>();
const itemsToStalled = new Set<QueueItem<IClickhouseEvent>>();
protected async processItems(
queue: BufferType[],
): Promise<{ toInsert: BufferType[]; toKeep: BufferType[] }> {
const toInsert = new Set<BufferType>();
const itemsToStalled = new Set<BufferType>();
// Sort data by created_at
// oldest first
@@ -74,44 +57,37 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
// We only need screen_views since we want to calculate the duration of each screen
// To do this we need a minimum of 2 screen_views
queue
.filter(
(item) =>
item.event.name !== 'screen_view' || item.event.device === 'server',
)
.forEach((item) => {
.filter((item) => item.name !== 'screen_view' || item.device === 'server')
.forEach((item, index) => {
// Find the last event with data and merge it with the current event
// We use profile_id here since this property can be set from backend as well
const lastEventWithData = queue
.slice(0, item.index)
.slice(0, index)
.findLast((lastEvent) => {
return (
lastEvent.event.project_id === item.event.project_id &&
lastEvent.event.profile_id === item.event.profile_id &&
lastEvent.event.path !== ''
lastEvent.project_id === item.project_id &&
lastEvent.profile_id === item.profile_id &&
lastEvent.path !== ''
);
});
const event = deepMergeObjects<IClickhouseEvent>(
omit(['properties', 'duration'], lastEventWithData?.event || {}),
item.event,
const event = deepMergeObjects<BufferType>(
omit(['properties', 'duration'], lastEventWithData || {}),
item,
);
if (lastEventWithData) {
// event.properties.__properties_from = lastEventWithData.event.id;
event.properties.__properties_from = lastEventWithData.id;
}
return itemsToClickhouse.add({
...item,
event,
});
return toInsert.add(event);
});
// Group screen_view events by session_id
const grouped = groupBy(
(item) => item.event.session_id,
(item) => item.session_id,
queue.filter(
(item) =>
item.event.name === 'screen_view' && item.event.device !== 'server',
(item) => item.name === 'screen_view' && item.device !== 'server',
),
);
@@ -123,9 +99,7 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
// If there is only one screen_view event we can send it back to redis since we can't calculate the duration
const hasSessionEnd = queue.find(
(item) =>
item.event.name === 'session_end' &&
item.event.session_id === sessionId,
(item) => item.name === 'session_end' && item.session_id === sessionId,
);
screenViews
@@ -136,20 +110,17 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
// if nextScreenView does not exists we can't calculate the duration (last event in session)
if (nextScreenView) {
const duration =
new Date(nextScreenView.event.created_at).getTime() -
new Date(item.event.created_at).getTime();
new Date(nextScreenView.created_at).getTime() -
new Date(item.created_at).getTime();
const event = {
...item.event,
...item,
duration,
};
event.properties.__duration_from = nextScreenView.event.id;
itemsToClickhouse.add({
...item,
event,
});
// push last event in session if we have a session_end event
event.properties.__duration_from = nextScreenView.id;
toInsert.add(event);
} else if (hasSessionEnd) {
itemsToClickhouse.add(item);
// push last event in session if we have a session_end event
toInsert.add(item);
}
});
} // for of end
@@ -158,8 +129,8 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
// This should not theoretically happen but if it does we should move them to stalled
queue.forEach((item) => {
if (
!itemsToClickhouse.has(item) &&
new Date(item.event.created_at).getTime() <
!toInsert.has(item) &&
new Date(item.created_at).getTime() <
new Date().getTime() - 1000 * 60 * 60 * 24
) {
itemsToStalled.add(item);
@@ -169,43 +140,57 @@ export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
if (itemsToStalled.size > 0) {
const multi = getRedisCache().multi();
for (const item of itemsToStalled) {
multi.rpush(this.getKey('stalled'), JSON.stringify(item.event));
multi.rpush(this.getKey('stalled'), JSON.stringify(item));
}
await multi.exec();
}
const toInsertArray = Array.from(toInsert);
return {
toInsert: toInsertArray,
toKeep: queue.filter(
(item) => !toInsertArray.find((i) => i.id === item.id),
),
};
}
protected async insertIntoDB(items: BufferType[]): Promise<void> {
await ch.insert({
table: TABLE_NAMES.events,
values: Array.from(itemsToClickhouse).map((item) => item.event),
values: items,
format: 'JSONEachRow',
});
return [
...Array.from(itemsToClickhouse).map((item) => item.index),
...Array.from(itemsToStalled).map((item) => item.index),
];
};
}
public findMany: FindMany<IClickhouseEvent, IServiceEvent> = async (
callback,
) => {
return this.getQueue(-1)
.then((queue) => {
return queue.filter(callback).map((item) => transformEvent(item.event));
})
.catch(() => {
return [];
});
if (await this.waitForReleasedLock()) {
return this.getQueue()
.then((queue) => {
return queue.filter(callback).map(transformEvent);
})
.catch(() => {
return [];
});
}
return [];
};
public find: Find<IClickhouseEvent, IServiceEvent> = async (callback) => {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformEvent(match.event) : null;
})
.catch(() => {
return null;
});
if (await this.waitForReleasedLock()) {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformEvent(match) : null;
})
.catch(() => {
return null;
});
}
return null;
};
}
const sortOldestFirst = (a: IClickhouseEvent, b: IClickhouseEvent) =>
new Date(a.created_at).getTime() - new Date(b.created_at).getTime();

View File

@@ -1,56 +1,80 @@
import { mergeDeepRight } from 'ramda';
import { groupBy, mergeDeepRight, prop } from 'ramda';
import { toDots } from '@openpanel/common';
import { getRedisCache } from '@openpanel/redis';
import { escape } from 'sqlstring';
import { TABLE_NAMES, ch, chQuery } from '../clickhouse-client';
import { transformProfile } from '../services/profile.service';
import type {
IClickhouseProfile,
IServiceProfile,
} from '../services/profile.service';
import { transformProfile } from '../services/profile.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
QueueItem,
} from './buffer';
import type { Find, FindMany } from './buffer';
import { RedisBuffer } from './buffer';
export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
const BATCH_SIZE = process.env.BATCH_SIZE_PROFILES
? Number.parseInt(process.env.BATCH_SIZE_PROFILES, 10)
: 50;
type BufferType = IClickhouseProfile;
export class ProfileBuffer extends RedisBuffer<BufferType> {
constructor() {
super({
table: TABLE_NAMES.profiles,
batchSize: 100,
disableAutoFlush: true,
});
super(TABLE_NAMES.profiles, BATCH_SIZE);
}
public onInsert?: OnInsert<IClickhouseProfile> | undefined;
public onCompleted?: OnCompleted<IClickhouseProfile> | undefined;
public processQueue: ProcessQueue<IClickhouseProfile> = async (queue) => {
const cleanedQueue = this.combineQueueItems(queue);
const redisProfiles = await this.getCachedProfiles(cleanedQueue);
// this will do a couple of things:
// - we slice the queue to maxBufferSize since this queries have a limit on character count
// - check redis cache for profiles
// - fetch missing profiles from clickhouse
// - merge the incoming profile with existing data
protected async processItems(
items: BufferType[],
): Promise<{ toInsert: BufferType[]; toKeep: BufferType[] }> {
const queue = this.combineQueueItems(items);
const slicedQueue = this.maxBufferSize
? queue.slice(0, this.maxBufferSize)
: queue;
const redisProfiles = await this.getCachedProfiles(slicedQueue);
const dbProfiles = await this.fetchDbProfiles(
cleanedQueue.filter((_, index) => !redisProfiles[index]),
slicedQueue.filter((_, index) => !redisProfiles[index]),
);
const values = this.createProfileValues(
cleanedQueue,
const toInsert = this.createProfileValues(
slicedQueue,
redisProfiles,
dbProfiles,
);
if (values.length > 0) {
await this.updateRedisCache(values);
await this.insertIntoClickhouse(values);
if (toInsert.length > 0) {
await this.updateRedisCache(toInsert);
}
return queue.map((item) => item.index);
};
return Promise.resolve({
toInsert,
toKeep: this.maxBufferSize ? queue.slice(this.maxBufferSize) : [],
});
}
private combineQueueItems(queue: BufferType[]): BufferType[] {
const itemsToClickhouse = new Map<string, BufferType>();
queue.forEach((item) => {
const key = item.project_id + item.id;
const existing = itemsToClickhouse.get(key);
itemsToClickhouse.set(key, mergeDeepRight(existing ?? {}, item));
});
return Array.from(itemsToClickhouse.values());
}
protected async insertIntoDB(items: BufferType[]): Promise<void> {
await ch.insert({
table: TABLE_NAMES.profiles,
values: items,
format: 'JSONEachRow',
});
}
private matchPartialObject(
full: any,
@@ -77,27 +101,16 @@ export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
return true;
}
private combineQueueItems(
queue: QueueItem<IClickhouseProfile>[],
): QueueItem<IClickhouseProfile>[] {
const itemsToClickhouse = new Map<string, QueueItem<IClickhouseProfile>>();
queue.forEach((item) => {
const key = item.event.project_id + item.event.id;
const existing = itemsToClickhouse.get(key);
itemsToClickhouse.set(key, mergeDeepRight(existing ?? {}, item));
});
return Array.from(itemsToClickhouse.values());
}
private async getCachedProfiles(
cleanedQueue: QueueItem<IClickhouseProfile>[],
queue: BufferType[],
): Promise<(IClickhouseProfile | null)[]> {
const redisCache = getRedisCache();
const keys = cleanedQueue.map(
(item) => `profile:${item.event.project_id}:${item.event.id}`,
);
const keys = queue.map((item) => `profile:${item.project_id}:${item.id}`);
if (keys.length === 0) {
return [];
}
const cachedProfiles = await redisCache.mget(...keys);
return cachedProfiles.map((profile) => {
try {
@@ -109,34 +122,51 @@ export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
}
private async fetchDbProfiles(
cleanedQueue: QueueItem<IClickhouseProfile>[],
queue: IClickhouseProfile[],
): Promise<IClickhouseProfile[]> {
if (cleanedQueue.length === 0) {
if (queue.length === 0) {
return [];
}
// const grouped = groupBy(prop('project_id'), queue);
// const queries = Object.entries(grouped).map(([project_id, items]) => {
// if (!items) {
// return [];
// }
// return chQuery<IClickhouseProfile>(
// `SELECT
// *
// FROM ${TABLE_NAMES.profiles}
// WHERE
// id IN (${items.map((item) => escape(item.id)).join(',')})
// AND created_at > INTERVAL 12 MONTH
// ORDER BY
// created_at DESC`,
// );
// });
return await chQuery<IClickhouseProfile>(
`SELECT
*
FROM ${TABLE_NAMES.profiles}
WHERE
(id, project_id) IN (${cleanedQueue.map((item) => `('${item.event.id}', '${item.event.project_id}')`).join(',')})
(project_id, id) IN (${queue.map((item) => `('${item.project_id}', '${item.id}')`).join(',')})
ORDER BY
created_at DESC`,
);
}
private createProfileValues(
cleanedQueue: QueueItem<IClickhouseProfile>[],
queue: IClickhouseProfile[],
redisProfiles: (IClickhouseProfile | null)[],
dbProfiles: IClickhouseProfile[],
): IClickhouseProfile[] {
return cleanedQueue
return queue
.map((item, index) => {
const cachedProfile = redisProfiles[index];
const dbProfile = dbProfiles.find(
(p) =>
p.id === item.event.id && p.project_id === item.event.project_id,
(p) => p.id === item.id && p.project_id === item.project_id,
);
const profile = cachedProfile || dbProfile;
@@ -145,31 +175,33 @@ export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
this.matchPartialObject(
profile,
{
...item.event,
properties: toDots(item.event.properties),
...item,
properties: toDots(item.properties),
},
{
ignore: ['created_at'],
},
)
) {
console.log('Ignoring profile', item.event.id);
this.logger.debug('No changes for profile', {
profile,
});
return null;
}
return {
id: item.event.id,
first_name: item.event.first_name ?? profile?.first_name ?? '',
last_name: item.event.last_name ?? profile?.last_name ?? '',
email: item.event.email ?? profile?.email ?? '',
avatar: item.event.avatar ?? profile?.avatar ?? '',
id: item.id,
first_name: item.first_name ?? profile?.first_name ?? '',
last_name: item.last_name ?? profile?.last_name ?? '',
email: item.email ?? profile?.email ?? '',
avatar: item.avatar ?? profile?.avatar ?? '',
properties: toDots({
...(profile?.properties ?? {}),
...(item.event.properties ?? {}),
...(item.properties ?? {}),
}),
project_id: item.event.project_id ?? profile?.project_id ?? '',
created_at: item.event.created_at ?? profile?.created_at ?? '',
is_external: item.event.is_external,
project_id: item.project_id ?? profile?.project_id ?? '',
created_at: item.created_at ?? profile?.created_at ?? '',
is_external: item.is_external,
};
})
.flatMap((item) => (item ? [item] : []));
@@ -188,24 +220,12 @@ export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
await multi.exec();
}
private async insertIntoClickhouse(
values: IClickhouseProfile[],
): Promise<void> {
await ch.insert({
table: TABLE_NAMES.profiles,
values,
format: 'JSONEachRow',
});
}
public findMany: FindMany<IClickhouseProfile, IServiceProfile> = async (
callback,
) => {
return this.getQueue(-1)
.then((queue) => {
return queue
.filter(callback)
.map((item) => transformProfile(item.event));
return queue.filter(callback).map(transformProfile);
})
.catch(() => {
return [];
@@ -216,7 +236,7 @@ export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformProfile(match.event) : null;
return match ? transformProfile(match) : null;
})
.catch(() => {
return null;

View File

@@ -0,0 +1,39 @@
import { TABLE_NAMES, ch } from '../../clickhouse-client';
import type { IClickhouseBotEvent } from '../../services/event.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
} from './buffer';
import { RedisBuffer } from './buffer';
export class BotBuffer extends RedisBuffer<IClickhouseBotEvent> {
constructor() {
super({
table: TABLE_NAMES.events_bots,
batchSize: 100,
});
}
public onInsert?: OnInsert<IClickhouseBotEvent> | undefined;
public onCompleted?: OnCompleted<IClickhouseBotEvent> | undefined;
public processQueue: ProcessQueue<IClickhouseBotEvent> = async (queue) => {
await ch.insert({
table: TABLE_NAMES.events_bots,
values: queue.map((item) => item.event),
format: 'JSONEachRow',
});
return queue.map((item) => item.index);
};
public findMany: FindMany<IClickhouseBotEvent, IClickhouseBotEvent> = () => {
return Promise.resolve([]);
};
public find: Find<IClickhouseBotEvent, IClickhouseBotEvent> = () => {
return Promise.resolve(null);
};
}

View File

@@ -0,0 +1,181 @@
import { createLogger } from '@openpanel/logger';
import { getRedisCache } from '@openpanel/redis';
export const DELETE = '__DELETE__';
export type QueueItem<T> = {
event: T;
index: number;
};
export type OnInsert<T> = (data: T) => unknown;
export type OnCompleted<T> =
| ((data: T[]) => Promise<unknown[]>)
| ((data: T[]) => unknown[]);
export type ProcessQueue<T> = (data: QueueItem<T>[]) => Promise<number[]>;
export type Find<T, R = unknown> = (
callback: (item: QueueItem<T>) => boolean,
) => Promise<R | null>;
export type FindMany<T, R = unknown> = (
callback: (item: QueueItem<T>) => boolean,
) => Promise<R[]>;
const getError = (e: unknown) => {
if (e instanceof Error) {
return [
`Name: ${e.name}`,
`Message: ${e.message}`,
`Stack: ${e.stack}`,
`Cause: ${e.cause ? String(e.cause) : ''}`,
].join('\n');
}
return 'Unknown error';
};
export abstract class RedisBuffer<T> {
// constructor
public prefix = 'op:buffer';
public table: string;
public batchSize?: number;
public logger: ReturnType<typeof createLogger>;
public disableAutoFlush?: boolean;
// abstract methods
public abstract onInsert?: OnInsert<T>;
public abstract onCompleted?: OnCompleted<T>;
public abstract processQueue: ProcessQueue<T>;
public abstract find: Find<T, unknown>;
public abstract findMany: FindMany<T, unknown>;
constructor(options: {
table: string;
batchSize?: number;
disableAutoFlush?: boolean;
}) {
this.table = options.table;
this.batchSize = options.batchSize;
this.disableAutoFlush = options.disableAutoFlush;
this.logger = createLogger({ name: 'buffer' }).child({
table: this.table,
});
}
public getKey(name?: string) {
const key = `${this.prefix}:${this.table}`;
if (name) {
return `${key}:${name}`;
}
return key;
}
public async insert(value: T) {
this.onInsert?.(value);
await getRedisCache().rpush(this.getKey(), JSON.stringify(value));
const length = await getRedisCache().llen(this.getKey());
this.logger.debug(
`Inserted item into buffer ${this.table}. Current length: ${length}`,
);
if (!this.disableAutoFlush && this.batchSize && length >= this.batchSize) {
this.logger.info(
`Buffer ${this.table} reached batch size (${this.batchSize}). Flushing...`,
);
this.flush();
}
}
public async flush() {
try {
const queue = await this.getQueue(this.batchSize || -1);
if (queue.length === 0) {
this.logger.debug(`Flush called on empty buffer ${this.table}`);
return { count: 0, data: [] };
}
this.logger.info(
`Flushing ${queue.length} items from buffer ${this.table}`,
);
try {
const indexes = await this.processQueue(queue);
await this.deleteIndexes(indexes);
const data = indexes
.map((index) => queue[index]?.event)
.filter((event): event is T => event !== null);
if (this.onCompleted) {
const res = await this.onCompleted(data);
this.logger.info(
`Completed processing ${res.length} items from buffer ${this.table}`,
);
return { count: res.length, data: res };
}
this.logger.info(
`Processed ${indexes.length} items from buffer ${this.table}`,
);
return { count: indexes.length, data: indexes };
} catch (e) {
this.logger.error(
`Failed to process queue while flushing buffer ${this.table}:`,
e,
);
const timestamp = new Date().getTime();
await getRedisCache().hset(this.getKey(`failed:${timestamp}`), {
error: getError(e),
data: JSON.stringify(queue.map((item) => item.event)),
retries: 0,
});
this.logger.warn(
`Stored ${queue.length} failed items in ${this.getKey(`failed:${timestamp}`)}`,
);
}
} catch (e) {
this.logger.error(
`Failed to get queue while flushing buffer ${this.table}:`,
e,
);
}
}
public async deleteIndexes(indexes: number[]) {
const multi = getRedisCache().multi();
indexes.forEach((index) => {
multi.lset(this.getKey(), index, DELETE);
});
multi.lrem(this.getKey(), 0, DELETE);
await multi.exec();
this.logger.debug(
`Deleted ${indexes.length} items from buffer ${this.table}`,
);
}
public async getQueue(limit: number): Promise<QueueItem<T>[]> {
const queue = await getRedisCache().lrange(this.getKey(), 0, limit);
const result = queue
.map((item, index) => ({
event: this.transformQueueItem(item),
index,
}))
.filter((item): item is QueueItem<T> => item.event !== null);
this.logger.debug(
`Retrieved ${result.length} items from buffer ${this.table}`,
);
return result;
}
private transformQueueItem(item: string): T | null {
try {
return JSON.parse(item);
} catch (e) {
this.logger.warn(`Failed to parse item in buffer ${this.table}:`, e);
return null;
}
}
}

View File

@@ -0,0 +1,211 @@
import { groupBy, omit } from 'ramda';
import SuperJSON from 'superjson';
import { deepMergeObjects } from '@openpanel/common';
import { getRedisCache, getRedisPub } from '@openpanel/redis';
import { TABLE_NAMES, ch } from '../../clickhouse-client';
import { transformEvent } from '../../services/event.service';
import type {
IClickhouseEvent,
IServiceEvent,
} from '../../services/event.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
QueueItem,
} from './buffer';
import { RedisBuffer } from './buffer';
const sortOldestFirst = (
a: QueueItem<IClickhouseEvent>,
b: QueueItem<IClickhouseEvent>,
) =>
new Date(a.event.created_at).getTime() -
new Date(b.event.created_at).getTime();
export class EventBuffer extends RedisBuffer<IClickhouseEvent> {
constructor() {
super({
table: TABLE_NAMES.events,
});
}
public onInsert?: OnInsert<IClickhouseEvent> | undefined = (event) => {
getRedisPub().publish(
'event:received',
SuperJSON.stringify(transformEvent(event)),
);
if (event.profile_id) {
getRedisCache().set(
`live:event:${event.project_id}:${event.profile_id}`,
'',
'EX',
60 * 5,
);
}
};
public onCompleted?: OnCompleted<IClickhouseEvent> | undefined = (
savedEvents,
) => {
for (const event of savedEvents) {
getRedisPub().publish(
'event:saved',
SuperJSON.stringify(transformEvent(event)),
);
}
return savedEvents.map((event) => event.id);
};
public processQueue: ProcessQueue<IClickhouseEvent> = async (queue) => {
const itemsToClickhouse = new Set<QueueItem<IClickhouseEvent>>();
const itemsToStalled = new Set<QueueItem<IClickhouseEvent>>();
// Sort data by created_at
// oldest first
queue.sort(sortOldestFirst);
// All events thats not a screen_view can be sent to clickhouse
// We only need screen_views since we want to calculate the duration of each screen
// To do this we need a minimum of 2 screen_views
queue
.filter(
(item) =>
item.event.name !== 'screen_view' || item.event.device === 'server',
)
.forEach((item) => {
// Find the last event with data and merge it with the current event
// We use profile_id here since this property can be set from backend as well
const lastEventWithData = queue
.slice(0, item.index)
.findLast((lastEvent) => {
return (
lastEvent.event.project_id === item.event.project_id &&
lastEvent.event.profile_id === item.event.profile_id &&
lastEvent.event.path !== ''
);
});
const event = deepMergeObjects<IClickhouseEvent>(
omit(['properties', 'duration'], lastEventWithData?.event || {}),
item.event,
);
if (lastEventWithData) {
event.properties.__properties_from = lastEventWithData.event.id;
}
return itemsToClickhouse.add({
...item,
event,
});
});
// Group screen_view events by session_id
const grouped = groupBy(
(item) => item.event.session_id,
queue.filter(
(item) =>
item.event.name === 'screen_view' && item.event.device !== 'server',
),
);
// Iterate over each group
for (const [sessionId, screenViews] of Object.entries(grouped)) {
if (sessionId === '' || !sessionId) {
continue;
}
// If there is only one screen_view event we can send it back to redis since we can't calculate the duration
const hasSessionEnd = queue.find(
(item) =>
item.event.name === 'session_end' &&
item.event.session_id === sessionId,
);
screenViews
?.slice()
.sort(sortOldestFirst)
.forEach((item, index) => {
const nextScreenView = screenViews[index + 1];
// if nextScreenView does not exists we can't calculate the duration (last event in session)
if (nextScreenView) {
const duration =
new Date(nextScreenView.event.created_at).getTime() -
new Date(item.event.created_at).getTime();
const event = {
...item.event,
duration,
};
event.properties.__duration_from = nextScreenView.event.id;
itemsToClickhouse.add({
...item,
event,
});
// push last event in session if we have a session_end event
} else if (hasSessionEnd) {
itemsToClickhouse.add(item);
}
});
} // for of end
// Check if we have any events that has been in the queue for more than 24 hour
// This should not theoretically happen but if it does we should move them to stalled
queue.forEach((item) => {
if (
!itemsToClickhouse.has(item) &&
new Date(item.event.created_at).getTime() <
new Date().getTime() - 1000 * 60 * 60 * 24
) {
itemsToStalled.add(item);
}
});
if (itemsToStalled.size > 0) {
const multi = getRedisCache().multi();
for (const item of itemsToStalled) {
multi.rpush(this.getKey('stalled'), JSON.stringify(item.event));
}
await multi.exec();
}
await ch.insert({
table: TABLE_NAMES.events,
values: Array.from(itemsToClickhouse).map((item) => item.event),
format: 'JSONEachRow',
});
return [
...Array.from(itemsToClickhouse).map((item) => item.index),
...Array.from(itemsToStalled).map((item) => item.index),
];
};
public findMany: FindMany<IClickhouseEvent, IServiceEvent> = async (
callback,
) => {
return this.getQueue(-1)
.then((queue) => {
return queue.filter(callback).map((item) => transformEvent(item.event));
})
.catch(() => {
return [];
});
};
public find: Find<IClickhouseEvent, IServiceEvent> = async (callback) => {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformEvent(match.event) : null;
})
.catch(() => {
return null;
});
};
}

View File

@@ -0,0 +1,225 @@
import { mergeDeepRight } from 'ramda';
import { toDots } from '@openpanel/common';
import { getRedisCache } from '@openpanel/redis';
import { TABLE_NAMES, ch, chQuery } from '../../clickhouse-client';
import type {
IClickhouseProfile,
IServiceProfile,
} from '../../services/profile.service';
import { transformProfile } from '../../services/profile.service';
import type {
Find,
FindMany,
OnCompleted,
OnInsert,
ProcessQueue,
QueueItem,
} from './buffer';
import { RedisBuffer } from './buffer';
export class ProfileBuffer extends RedisBuffer<IClickhouseProfile> {
constructor() {
super({
table: TABLE_NAMES.profiles,
batchSize: 100,
disableAutoFlush: true,
});
}
public onInsert?: OnInsert<IClickhouseProfile> | undefined;
public onCompleted?: OnCompleted<IClickhouseProfile> | undefined;
public processQueue: ProcessQueue<IClickhouseProfile> = async (queue) => {
const cleanedQueue = this.combineQueueItems(queue);
const redisProfiles = await this.getCachedProfiles(cleanedQueue);
const dbProfiles = await this.fetchDbProfiles(
cleanedQueue.filter((_, index) => !redisProfiles[index]),
);
const values = this.createProfileValues(
cleanedQueue,
redisProfiles,
dbProfiles,
);
if (values.length > 0) {
await this.updateRedisCache(values);
await this.insertIntoClickhouse(values);
}
return queue.map((item) => item.index);
};
private matchPartialObject(
full: any,
partial: any,
options: { ignore: string[] },
): boolean {
if (typeof partial !== 'object' || partial === null) {
return partial === full;
}
for (const key in partial) {
if (options.ignore.includes(key)) {
continue;
}
if (
!(key in full) ||
!this.matchPartialObject(full[key], partial[key], options)
) {
return false;
}
}
return true;
}
private combineQueueItems(
queue: QueueItem<IClickhouseProfile>[],
): QueueItem<IClickhouseProfile>[] {
const itemsToClickhouse = new Map<string, QueueItem<IClickhouseProfile>>();
queue.forEach((item) => {
const key = item.event.project_id + item.event.id;
const existing = itemsToClickhouse.get(key);
itemsToClickhouse.set(key, mergeDeepRight(existing ?? {}, item));
});
return Array.from(itemsToClickhouse.values());
}
private async getCachedProfiles(
cleanedQueue: QueueItem<IClickhouseProfile>[],
): Promise<(IClickhouseProfile | null)[]> {
const redisCache = getRedisCache();
const keys = cleanedQueue.map(
(item) => `profile:${item.event.project_id}:${item.event.id}`,
);
const cachedProfiles = await redisCache.mget(...keys);
return cachedProfiles.map((profile) => {
try {
return profile ? JSON.parse(profile) : null;
} catch (error) {
return null;
}
});
}
private async fetchDbProfiles(
cleanedQueue: QueueItem<IClickhouseProfile>[],
): Promise<IClickhouseProfile[]> {
if (cleanedQueue.length === 0) {
return [];
}
return await chQuery<IClickhouseProfile>(
`SELECT
*
FROM ${TABLE_NAMES.profiles}
WHERE
(id, project_id) IN (${cleanedQueue.map((item) => `('${item.event.id}', '${item.event.project_id}')`).join(',')})
ORDER BY
created_at DESC`,
);
}
private createProfileValues(
cleanedQueue: QueueItem<IClickhouseProfile>[],
redisProfiles: (IClickhouseProfile | null)[],
dbProfiles: IClickhouseProfile[],
): IClickhouseProfile[] {
return cleanedQueue
.map((item, index) => {
const cachedProfile = redisProfiles[index];
const dbProfile = dbProfiles.find(
(p) =>
p.id === item.event.id && p.project_id === item.event.project_id,
);
const profile = cachedProfile || dbProfile;
if (
profile &&
this.matchPartialObject(
profile,
{
...item.event,
properties: toDots(item.event.properties),
},
{
ignore: ['created_at'],
},
)
) {
console.log('Ignoring profile', item.event.id);
return null;
}
return {
id: item.event.id,
first_name: item.event.first_name ?? profile?.first_name ?? '',
last_name: item.event.last_name ?? profile?.last_name ?? '',
email: item.event.email ?? profile?.email ?? '',
avatar: item.event.avatar ?? profile?.avatar ?? '',
properties: toDots({
...(profile?.properties ?? {}),
...(item.event.properties ?? {}),
}),
project_id: item.event.project_id ?? profile?.project_id ?? '',
created_at: item.event.created_at ?? profile?.created_at ?? '',
is_external: item.event.is_external,
};
})
.flatMap((item) => (item ? [item] : []));
}
private async updateRedisCache(values: IClickhouseProfile[]): Promise<void> {
const redisCache = getRedisCache();
const multi = redisCache.multi();
values.forEach((value) => {
multi.setex(
`profile:${value.project_id}:${value.id}`,
60 * 30, // 30 minutes
JSON.stringify(value),
);
});
await multi.exec();
}
private async insertIntoClickhouse(
values: IClickhouseProfile[],
): Promise<void> {
await ch.insert({
table: TABLE_NAMES.profiles,
values,
format: 'JSONEachRow',
});
}
public findMany: FindMany<IClickhouseProfile, IServiceProfile> = async (
callback,
) => {
return this.getQueue(-1)
.then((queue) => {
return queue
.filter(callback)
.map((item) => transformProfile(item.event));
})
.catch(() => {
return [];
});
};
public find: Find<IClickhouseProfile, IServiceProfile> = async (callback) => {
return this.getQueue(-1)
.then((queue) => {
const match = queue.find(callback);
return match ? transformProfile(match.event) : null;
})
.catch(() => {
return null;
});
};
}