fix(buffer): better merging profiles in buffer

This commit is contained in:
Carl-Gerhard Lindesvärd
2025-02-22 07:46:06 +01:00
parent 4c938131ca
commit 2022a82f03
3 changed files with 89 additions and 81 deletions

View File

@@ -21,6 +21,7 @@
"@openpanel/validation": "workspace:*", "@openpanel/validation": "workspace:*",
"@prisma/client": "^5.1.1", "@prisma/client": "^5.1.1",
"@prisma/extension-read-replicas": "^0.4.0", "@prisma/extension-read-replicas": "^0.4.0",
"fast-deep-equal": "^3.1.3",
"jiti": "^2.4.1", "jiti": "^2.4.1",
"prisma-json-types-generator": "^3.1.1", "prisma-json-types-generator": "^3.1.1",
"ramda": "^0.29.1", "ramda": "^0.29.1",

View File

@@ -1,12 +1,20 @@
import { createHash } from 'node:crypto'; import { deepMergeObjects } from '@openpanel/common';
import { getSafeJson } from '@openpanel/common'; // import { getSafeJson } from '@openpanel/json';
import { type Redis, getRedisCache } from '@openpanel/redis'; import { type Redis, getRedisCache } from '@openpanel/redis';
import { dissocPath, mergeDeepRight, omit, whereEq } from 'ramda'; import shallowEqual from 'fast-deep-equal';
import { omit } from 'ramda';
import { TABLE_NAMES, ch, chQuery } from '../clickhouse/client'; import { TABLE_NAMES, ch, chQuery } from '../clickhouse/client';
import type { IClickhouseProfile } from '../services/profile.service'; import type { IClickhouseProfile } from '../services/profile.service';
import { BaseBuffer } from './base-buffer'; import { BaseBuffer } from './base-buffer';
import { isPartialMatch } from './partial-json-match';
// TODO: Use @openpanel/json when polar is merged
function getSafeJson<T>(str: string): T | null {
try {
return JSON.parse(str);
} catch (e) {
return null;
}
}
export class ProfileBuffer extends BaseBuffer { export class ProfileBuffer extends BaseBuffer {
private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
@@ -34,35 +42,14 @@ export class ProfileBuffer extends BaseBuffer {
this.redis = getRedisCache(); this.redis = getRedisCache();
} }
private excludeKeys( private getProfileCacheKey({
profile: IClickhouseProfile, projectId,
exclude: string[][], profileId,
): IClickhouseProfile { }: {
let filtered = profile; profileId: string;
for (const path of exclude) { projectId: string;
filtered = dissocPath(path, filtered); }) {
} return `${this.redisProfilePrefix}${projectId}:${profileId}`;
return filtered;
}
private match(source: any, partial: any): boolean {
const exclude = [
['created_at'],
['properties', 'browser_version'],
['properties', 'browserVersion'],
['properties', 'latitude'],
['properties', 'longitude'],
['properties', 'os_version'],
['properties', 'osVersion'],
['properties', 'path'],
['properties', 'referrer_name'],
['properties', 'referrerName'],
['properties', 'referrer_type'],
['properties', 'referrerType'],
['properties', 'referrer'],
];
return isPartialMatch(source, this.excludeKeys(partial, exclude));
} }
async add(profile: IClickhouseProfile) { async add(profile: IClickhouseProfile) {
@@ -70,56 +57,54 @@ export class ProfileBuffer extends BaseBuffer {
this.logger.debug('Adding profile', { this.logger.debug('Adding profile', {
projectId: profile.project_id, projectId: profile.project_id,
profileId: profile.id, profileId: profile.id,
profile,
}); });
const cacheKey = `${this.redisProfilePrefix}${profile.project_id}:${profile.id}`;
// Check if we have this profile in Redis cache const existingProfile = await this.fetchFromCache(profile);
const existingProfile = await this.redis.get(cacheKey);
let mergedProfile = profile;
if (!existingProfile) { const mergedProfile: IClickhouseProfile = existingProfile
this.logger.debug('Profile not found in cache, checking Clickhouse', { ? deepMergeObjects(existingProfile, profile)
projectId: profile.project_id, : profile;
profileId: profile.id,
});
// If not in cache, check Clickhouse
const clickhouseProfile = await this.fetchFromClickhouse(profile);
if (clickhouseProfile) {
this.logger.debug('Found existing profile in Clickhouse, merging', {
projectId: profile.project_id,
profileId: profile.id,
});
mergedProfile = mergeDeepRight(clickhouseProfile, profile);
}
} else {
const parsedProfile = getSafeJson<IClickhouseProfile>(existingProfile);
if (parsedProfile) { // Avoid unnecessary updates:
// Only merge if checksums are different // If the profile is less than X minutes old
if (this.match(parsedProfile, profile)) { // and the profiles are the same
return; // Skip if checksums match if (profile.created_at && existingProfile?.created_at) {
} const a = new Date(profile.created_at);
const b = new Date(existingProfile.created_at);
this.logger.debug('Profile changed, merging with cached version', { const diffTime = Math.abs(a.getTime() - b.getTime());
existingProfile: parsedProfile, if (
incomingProfile: profile, diffTime < 1000 * 60 * 10 &&
}); shallowEqual(
mergedProfile = mergeDeepRight(parsedProfile, profile); omit(['created_at'], existingProfile),
omit(['created_at'], mergedProfile),
)
) {
this.logger.debug('Profile not changed, skipping');
return;
} }
} }
this.logger.debug('Merged profile will be inserted', {
mergedProfile,
existingProfile,
profile,
});
const cacheTtl = profile.is_external
? 60 * 60 * 24 * this.daysToKeep
: 60 * 60; // 1 hour for internal profiles
const cacheKey = this.getProfileCacheKey({
profileId: profile.id,
projectId: profile.project_id,
});
const result = await this.redis const result = await this.redis
.multi() .multi()
.set( .set(cacheKey, JSON.stringify(mergedProfile), 'EX', cacheTtl)
cacheKey,
JSON.stringify(mergedProfile),
'EX',
60 * 60 * 24 * this.daysToKeep,
)
.rpush(this.redisBufferKey, JSON.stringify(mergedProfile)) .rpush(this.redisBufferKey, JSON.stringify(mergedProfile))
.llen(this.redisBufferKey) .llen(this.redisBufferKey)
.exec(); .exec();
if (!result) { if (!result) {
this.logger.error('Failed to add profile to Redis', { this.logger.error('Failed to add profile to Redis', {
profile, profile,
@@ -142,6 +127,33 @@ export class ProfileBuffer extends BaseBuffer {
} }
} }
private async fetchFromCache(
profile: IClickhouseProfile,
): Promise<IClickhouseProfile | null> {
this.logger.debug('Fetching profile from Redis', {
projectId: profile.project_id,
profileId: profile.id,
});
const cacheKey = this.getProfileCacheKey({
profileId: profile.id,
projectId: profile.project_id,
});
const existingProfile = await getRedisCache().get(cacheKey);
if (existingProfile) {
const parsedProfile = getSafeJson<IClickhouseProfile>(existingProfile);
if (parsedProfile) {
this.logger.debug('Profile found in Redis', {
projectId: profile.project_id,
profileId: profile.id,
});
return parsedProfile;
}
}
return this.fetchFromClickhouse(profile);
}
private async fetchFromClickhouse( private async fetchFromClickhouse(
profile: IClickhouseProfile, profile: IClickhouseProfile,
): Promise<IClickhouseProfile | null> { ): Promise<IClickhouseProfile | null> {
@@ -190,20 +202,12 @@ export class ProfileBuffer extends BaseBuffer {
getSafeJson<IClickhouseProfile>(p), getSafeJson<IClickhouseProfile>(p),
); );
let processedChunks = 0;
for (const chunk of this.chunks(parsedProfiles, this.chunkSize)) { for (const chunk of this.chunks(parsedProfiles, this.chunkSize)) {
processedChunks++;
this.logger.debug(`Processing chunk ${processedChunks}`, {
size: chunk.length,
});
this.logger.debug('Chunk data', { chunk });
await ch.insert({ await ch.insert({
table: TABLE_NAMES.profiles, table: TABLE_NAMES.profiles,
values: chunk, values: chunk,
format: 'JSONEachRow', format: 'JSONEachRow',
}); });
this.logger.debug(`Successfully inserted chunk ${processedChunks}`);
} }
// Only remove profiles after successful insert // Only remove profiles after successful insert
@@ -211,7 +215,6 @@ export class ProfileBuffer extends BaseBuffer {
this.logger.info('Successfully completed profile processing', { this.logger.info('Successfully completed profile processing', {
totalProfiles: profiles.length, totalProfiles: profiles.length,
totalChunks: processedChunks,
}); });
} catch (error) { } catch (error) {
this.logger.error('Failed to process buffer', { error }); this.logger.error('Failed to process buffer', { error });

4
pnpm-lock.yaml generated
View File

@@ -898,6 +898,9 @@ importers:
'@prisma/extension-read-replicas': '@prisma/extension-read-replicas':
specifier: ^0.4.0 specifier: ^0.4.0
version: 0.4.0(@prisma/client@5.9.1(prisma@5.9.1)) version: 0.4.0(@prisma/client@5.9.1(prisma@5.9.1))
fast-deep-equal:
specifier: ^3.1.3
version: 3.1.3
jiti: jiti:
specifier: ^2.4.1 specifier: ^2.4.1
version: 2.4.1 version: 2.4.1
@@ -8820,6 +8823,7 @@ packages:
lodash.isequal@4.5.0: lodash.isequal@4.5.0:
resolution: {integrity: sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==} resolution: {integrity: sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==}
deprecated: This package is deprecated. Use require('node:util').isDeepStrictEqual instead.
lodash.isinteger@4.0.4: lodash.isinteger@4.0.4:
resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==} resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}