fix: overall perf improvements

* fix: ignore private ips

* fix: performance related fixes

* fix: simply event buffer

* fix: default to 1 events queue shard

* add: cleanup scripts

* fix: comments

* fix comments

* fix

* fix: groupmq

* wip

* fix: sync cachable

* remove cluster names and add it behind env flag (if someone want to scale)

* fix

* wip

* better logger

* remove reqid and user agent

* fix lock

* remove wait_for_async_insert
This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-15 22:13:59 +01:00
committed by GitHub
parent 38cc53890a
commit da59622dce
66 changed files with 5042 additions and 3860 deletions

View File

@@ -8,18 +8,21 @@ export class BaseBuffer {
lockKey: string;
lockTimeout = 60;
onFlush: () => void;
enableParallelProcessing: boolean;
protected bufferCounterKey: string;
constructor(options: {
name: string;
onFlush: () => Promise<void>;
enableParallelProcessing?: boolean;
}) {
this.logger = createLogger({ name: options.name });
this.name = options.name;
this.lockKey = `lock:${this.name}`;
this.onFlush = options.onFlush;
this.bufferCounterKey = `${this.name}:buffer:count`;
this.enableParallelProcessing = options.enableParallelProcessing ?? false;
}
protected chunks<T>(items: T[], size: number) {
@@ -91,6 +94,26 @@ export class BaseBuffer {
async tryFlush() {
const now = performance.now();
// Parallel mode: No locking, multiple workers can process simultaneously
if (this.enableParallelProcessing) {
try {
this.logger.debug('Processing buffer (parallel mode)...');
await this.onFlush();
this.logger.debug('Flush completed (parallel mode)', {
elapsed: performance.now() - now,
});
} catch (error) {
this.logger.error('Failed to process buffer (parallel mode)', {
error,
});
// In parallel mode, we can't safely reset counter as other workers might be active
// Counter will be resynced automatically by the periodic job
}
return;
}
// Sequential mode: Use lock to ensure only one worker processes at a time
const lockId = generateSecureId('lock');
const acquired = await getRedisCache().set(
this.lockKey,
@@ -101,7 +124,7 @@ export class BaseBuffer {
);
if (acquired === 'OK') {
try {
this.logger.info('Acquired lock. Processing buffer...', {
this.logger.debug('Acquired lock. Processing buffer...', {
lockId,
});
await this.onFlush();
@@ -117,7 +140,7 @@ export class BaseBuffer {
}
} finally {
await this.releaseLock(lockId);
this.logger.info('Flush completed', {
this.logger.debug('Flush completed', {
elapsed: performance.now() - now,
lockId,
});

View File

@@ -71,7 +71,7 @@ export class BotBuffer extends BaseBuffer {
.decrby(this.bufferCounterKey, events.length)
.exec();
this.logger.info('Processed bot events', {
this.logger.debug('Processed bot events', {
count: events.length,
});
} catch (error) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -12,12 +12,12 @@ export class ProfileBuffer extends BaseBuffer {
private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
? Number.parseInt(process.env.PROFILE_BUFFER_BATCH_SIZE, 10)
: 200;
private daysToKeep = process.env.PROFILE_BUFFER_DAYS_TO_KEEP
? Number.parseInt(process.env.PROFILE_BUFFER_DAYS_TO_KEEP, 10)
: 7;
private chunkSize = process.env.PROFILE_BUFFER_CHUNK_SIZE
? Number.parseInt(process.env.PROFILE_BUFFER_CHUNK_SIZE, 10)
: 1000;
private ttlInSeconds = process.env.PROFILE_BUFFER_TTL_IN_SECONDS
? Number.parseInt(process.env.PROFILE_BUFFER_TTL_IN_SECONDS, 10)
: 60 * 60;
private readonly redisKey = 'profile-buffer';
private readonly redisProfilePrefix = 'profile-cache:';
@@ -49,7 +49,7 @@ export class ProfileBuffer extends BaseBuffer {
profileId: profile.id,
projectId: profile.project_id,
});
return (await getRedisCache().exists(cacheKey)) === 1;
return (await this.redis.exists(cacheKey)) === 1;
}
async add(profile: IClickhouseProfile, isFromEvent = false) {
@@ -90,9 +90,6 @@ export class ProfileBuffer extends BaseBuffer {
profile,
});
const cacheTtl = profile.is_external
? 60 * 60 * 24 * this.daysToKeep
: 60 * 60; // 1 hour for internal profiles
const cacheKey = this.getProfileCacheKey({
profileId: profile.id,
projectId: profile.project_id,
@@ -100,7 +97,7 @@ export class ProfileBuffer extends BaseBuffer {
const result = await this.redis
.multi()
.set(cacheKey, JSON.stringify(mergedProfile), 'EX', cacheTtl)
.set(cacheKey, JSON.stringify(mergedProfile), 'EX', this.ttlInSeconds)
.rpush(this.redisKey, JSON.stringify(mergedProfile))
.incr(this.bufferCounterKey)
.llen(this.redisKey)
@@ -120,7 +117,6 @@ export class ProfileBuffer extends BaseBuffer {
batchSize: this.batchSize,
});
if (bufferLength >= this.batchSize) {
this.logger.info('Buffer full, initiating flush');
await this.tryFlush();
}
} catch (error) {
@@ -137,18 +133,33 @@ export class ProfileBuffer extends BaseBuffer {
projectId: profile.project_id,
});
const existingProfile = await getRedisCache().get(cacheKey);
const existingProfile = await this.fetchFromCache(
profile.id,
profile.project_id,
);
if (existingProfile) {
const parsedProfile = getSafeJson<IClickhouseProfile>(existingProfile);
if (parsedProfile) {
logger.debug('Profile found in Redis');
return parsedProfile;
}
logger.debug('Profile found in Redis');
return existingProfile;
}
return this.fetchFromClickhouse(profile, logger);
}
public async fetchFromCache(
profileId: string,
projectId: string,
): Promise<IClickhouseProfile | null> {
const cacheKey = this.getProfileCacheKey({
profileId,
projectId,
});
const existingProfile = await this.redis.get(cacheKey);
if (!existingProfile) {
return null;
}
return getSafeJson<IClickhouseProfile>(existingProfile);
}
private async fetchFromClickhouse(
profile: IClickhouseProfile,
logger: ILogger,
@@ -176,7 +187,7 @@ export class ProfileBuffer extends BaseBuffer {
async processBuffer() {
try {
this.logger.info('Starting profile buffer processing');
this.logger.debug('Starting profile buffer processing');
const profiles = await this.redis.lrange(
this.redisKey,
0,
@@ -188,7 +199,7 @@ export class ProfileBuffer extends BaseBuffer {
return;
}
this.logger.info(`Processing ${profiles.length} profiles in buffer`);
this.logger.debug(`Processing ${profiles.length} profiles in buffer`);
const parsedProfiles = profiles.map((p) =>
getSafeJson<IClickhouseProfile>(p),
);
@@ -208,7 +219,7 @@ export class ProfileBuffer extends BaseBuffer {
.decrby(this.bufferCounterKey, profiles.length)
.exec();
this.logger.info('Successfully completed profile processing', {
this.logger.debug('Successfully completed profile processing', {
totalProfiles: profiles.length,
});
} catch (error) {

View File

@@ -12,6 +12,9 @@ export class SessionBuffer extends BaseBuffer {
private batchSize = process.env.SESSION_BUFFER_BATCH_SIZE
? Number.parseInt(process.env.SESSION_BUFFER_BATCH_SIZE, 10)
: 1000;
private chunkSize = process.env.SESSION_BUFFER_CHUNK_SIZE
? Number.parseInt(process.env.SESSION_BUFFER_CHUNK_SIZE, 10)
: 1000;
private readonly redisKey = 'session-buffer';
private redis: Redis;
@@ -209,7 +212,7 @@ export class SessionBuffer extends BaseBuffer {
};
});
for (const chunk of this.chunks(sessions, 1000)) {
for (const chunk of this.chunks(sessions, this.chunkSize)) {
// Insert to ClickHouse
await ch.insert({
table: TABLE_NAMES.sessions,
@@ -225,7 +228,7 @@ export class SessionBuffer extends BaseBuffer {
.decrby(this.bufferCounterKey, events.length);
await multi.exec();
this.logger.info('Processed sessions', {
this.logger.debug('Processed sessions', {
count: events.length,
});
} catch (error) {

View File

@@ -24,10 +24,13 @@ type WarnLogParams = LogParams & { err?: Error };
class CustomLogger implements Logger {
trace({ message, args }: LogParams) {
logger.info(message, args);
logger.debug(message, args);
}
debug({ message, args }: LogParams) {
logger.info(message, args);
if (message.includes('Query:') && args?.response_status === 200) {
return;
}
logger.debug(message, args);
}
info({ message, args }: LogParams) {
logger.info(message, args);
@@ -157,8 +160,6 @@ export const ch = new Proxy(originalCh, {
return (...args: any[]) =>
withRetry(() => {
args[0].clickhouse_settings = {
// Allow bigger HTTP payloads/time to stream rows
wait_for_async_insert: 1,
// Increase insert timeouts and buffer sizes for large batches
max_execution_time: 300,
max_insert_block_size: '500000',

View File

@@ -1,4 +1,4 @@
import { cacheable } from '@openpanel/redis';
import { cacheable, cacheableLru } from '@openpanel/redis';
import type { Client, Prisma } from '../prisma-client';
import { db } from '../prisma-client';
@@ -34,4 +34,7 @@ export async function getClientById(
});
}
export const getClientByIdCached = cacheable(getClientById, 60 * 60 * 24);
export const getClientByIdCached = cacheableLru(getClientById, {
maxSize: 1000,
ttl: 60 * 5,
});

View File

@@ -19,12 +19,9 @@ import type { EventMeta, Prisma } from '../prisma-client';
import { db } from '../prisma-client';
import { type SqlBuilderObject, createSqlBuilder } from '../sql-builder';
import { getEventFiltersWhereClause } from './chart.service';
import { getOrganizationByProjectIdCached } from './organization.service';
import type { IServiceProfile, IServiceUpsertProfile } from './profile.service';
import {
getProfileById,
getProfileByIdCached,
getProfiles,
getProfilesCached,
upsertProfile,
} from './profile.service';
@@ -156,8 +153,6 @@ export interface IServiceEvent {
properties: Record<string, unknown> & {
hash?: string;
query?: Record<string, unknown>;
__reqId?: string;
__user_agent?: string;
};
createdAt: Date;
country?: string | undefined;
@@ -343,7 +338,7 @@ export async function createEvent(payload: IServiceCreateEventPayload) {
sdk_version: payload.sdkVersion ?? '',
};
await Promise.all([sessionBuffer.add(event), eventBuffer.add(event)]);
const promises = [sessionBuffer.add(event), eventBuffer.add(event)];
if (payload.profileId) {
const profile: IServiceUpsertProfile = {
@@ -374,10 +369,12 @@ export async function createEvent(payload: IServiceCreateEventPayload) {
profile.isExternal ||
(profile.isExternal === false && payload.name === 'session_start')
) {
await upsertProfile(profile, true);
promises.push(upsertProfile(profile, true));
}
}
await Promise.all(promises);
return {
document: event,
};
@@ -395,6 +392,7 @@ export interface GetEventListOptions {
endDate?: Date;
select?: SelectHelper<IServiceEvent>;
custom?: (sb: SqlBuilderObject) => void;
dateIntervalInDays?: number;
}
export async function getEventList(options: GetEventListOptions) {
@@ -408,28 +406,28 @@ export async function getEventList(options: GetEventListOptions) {
filters,
startDate,
endDate,
select: incomingSelect,
custom,
select: incomingSelect,
dateIntervalInDays = 0.5,
} = options;
const { sb, getSql, join } = createSqlBuilder();
const organization = await getOrganizationByProjectIdCached(projectId);
// This will speed up the query quite a lot for big organizations
const dateIntervalInDays =
organization?.subscriptionPeriodEventsLimit &&
organization?.subscriptionPeriodEventsLimit > 1_000_000
? 1
: 7;
const MAX_DATE_INTERVAL_IN_DAYS = 365;
// Cap the date interval to prevent infinity
const safeDateIntervalInDays = Math.min(
dateIntervalInDays,
MAX_DATE_INTERVAL_IN_DAYS,
);
if (typeof cursor === 'number') {
sb.offset = Math.max(0, (cursor ?? 0) * take);
} else if (cursor instanceof Date) {
sb.where.cursorWindow = `created_at >= toDateTime64(${sqlstring.escape(formatClickhouseDate(cursor))}, 3) - INTERVAL ${dateIntervalInDays} DAY`;
sb.where.cursorWindow = `created_at >= toDateTime64(${sqlstring.escape(formatClickhouseDate(cursor))}, 3) - INTERVAL ${safeDateIntervalInDays} DAY`;
sb.where.cursor = `created_at <= ${sqlstring.escape(formatClickhouseDate(cursor))}`;
}
if (!cursor) {
sb.where.cursorWindow = `created_at >= toDateTime64(${sqlstring.escape(formatClickhouseDate(new Date()))}, 3) - INTERVAL ${dateIntervalInDays} DAY`;
sb.where.cursorWindow = `created_at >= toDateTime64(${sqlstring.escape(formatClickhouseDate(new Date()))}, 3) - INTERVAL ${safeDateIntervalInDays} DAY`;
}
sb.limit = take;
@@ -453,6 +451,9 @@ export async function getEventList(options: GetEventListOptions) {
incomingSelect ?? {},
);
sb.select.createdAt = 'created_at';
sb.select.projectId = 'project_id';
if (select.id) {
sb.select.id = 'id';
}
@@ -474,9 +475,6 @@ export async function getEventList(options: GetEventListOptions) {
if (select.properties) {
sb.select.properties = 'properties';
}
if (select.createdAt) {
sb.select.createdAt = 'created_at';
}
if (select.country) {
sb.select.country = 'country';
}
@@ -583,21 +581,20 @@ export async function getEventList(options: GetEventListOptions) {
custom(sb);
}
console.log('getSql()', getSql());
const data = await getEvents(getSql(), {
profile: select.profile ?? true,
meta: select.meta ?? true,
});
// If we dont get any events, try without the cursor window
if (data.length === 0 && sb.where.cursorWindow) {
if (
data.length === 0 &&
sb.where.cursorWindow &&
safeDateIntervalInDays < MAX_DATE_INTERVAL_IN_DAYS
) {
return getEventList({
...options,
custom(sb) {
options.custom?.(sb);
delete sb.where.cursorWindow;
},
dateIntervalInDays: dateIntervalInDays * 2,
});
}
@@ -945,7 +942,7 @@ class EventService {
]);
if (event?.profileId) {
const profile = await getProfileByIdCached(event?.profileId, projectId);
const profile = await getProfileById(event?.profileId, projectId);
if (profile) {
event.profile = profile;
}

View File

@@ -13,7 +13,7 @@ import type {
IServiceCreateEventPayload,
IServiceEvent,
} from './event.service';
import { getProfileById, getProfileByIdCached } from './profile.service';
import { getProfileById } from './profile.service';
import { getProjectByIdCached } from './project.service';
type ICreateNotification = Pick<
@@ -264,10 +264,7 @@ export async function checkNotificationRulesForEvent(
payload.profileId &&
rules.some((rule) => rule.template?.match(/{{profile\.[^}]*}}/))
) {
const profile = await getProfileByIdCached(
payload.profileId,
payload.projectId,
);
const profile = await getProfileById(payload.profileId, payload.projectId);
if (profile) {
(payload as any).profile = profile;
}

View File

@@ -106,6 +106,11 @@ export async function getProfileById(id: string, projectId: string) {
return null;
}
const cachedProfile = await profileBuffer.fetchFromCache(id, projectId);
if (cachedProfile) {
return transformProfile(cachedProfile);
}
const [profile] = await chQuery<IClickhouseProfile>(
`SELECT
id,
@@ -127,8 +132,6 @@ export async function getProfileById(id: string, projectId: string) {
return transformProfile(profile);
}
export const getProfileByIdCached = cacheable(getProfileById, 60 * 30);
interface GetProfileListOptions {
projectId: string;
take: number;
@@ -306,10 +309,5 @@ export async function upsertProfile(
is_external: isExternal,
};
if (!isFromEvent) {
// Save to cache directly since the profile might be used before its saved in clickhouse
getProfileByIdCached.set(id, projectId)(transformProfile(profile));
}
return profileBuffer.add(profile, isFromEvent);
}

View File

@@ -1,6 +1,6 @@
import { generateSalt } from '@openpanel/common/server';
import { getRedisCache } from '@openpanel/redis';
import { cacheableLru } from '@openpanel/redis';
import { db } from '../prisma-client';
export async function getCurrentSalt() {
@@ -17,36 +17,36 @@ export async function getCurrentSalt() {
return salt.salt;
}
export async function getSalts() {
const cache = await getRedisCache().get('op:salt');
if (cache) {
return JSON.parse(cache);
}
export const getSalts = cacheableLru(
'op:salt',
async () => {
const [curr, prev] = await db.salt.findMany({
orderBy: {
createdAt: 'desc',
},
take: 2,
});
const [curr, prev] = await db.salt.findMany({
orderBy: {
createdAt: 'desc',
},
take: 2,
});
if (!curr) {
throw new Error('No salt found');
}
if (!curr) {
throw new Error('No salt found');
}
if (!prev) {
throw new Error('No salt found');
}
if (!prev) {
throw new Error('No salt found');
}
const salts = {
current: curr.salt,
previous: prev.salt,
};
const salts = {
current: curr.salt,
previous: prev.salt,
};
await getRedisCache().set('op:salt', JSON.stringify(salts), 'EX', 60 * 10);
return salts;
}
return salts;
},
{
maxSize: 2,
ttl: 60 * 5,
},
);
export async function createInitialSalts() {
const MAX_RETRIES = 5;