fix(buffer): better merging profiles in buffer
This commit is contained in:
@@ -21,6 +21,7 @@
|
|||||||
"@openpanel/validation": "workspace:*",
|
"@openpanel/validation": "workspace:*",
|
||||||
"@prisma/client": "^5.1.1",
|
"@prisma/client": "^5.1.1",
|
||||||
"@prisma/extension-read-replicas": "^0.4.0",
|
"@prisma/extension-read-replicas": "^0.4.0",
|
||||||
|
"fast-deep-equal": "^3.1.3",
|
||||||
"jiti": "^2.4.1",
|
"jiti": "^2.4.1",
|
||||||
"prisma-json-types-generator": "^3.1.1",
|
"prisma-json-types-generator": "^3.1.1",
|
||||||
"ramda": "^0.29.1",
|
"ramda": "^0.29.1",
|
||||||
|
|||||||
@@ -1,12 +1,20 @@
|
|||||||
import { createHash } from 'node:crypto';
|
import { deepMergeObjects } from '@openpanel/common';
|
||||||
import { getSafeJson } from '@openpanel/common';
|
// import { getSafeJson } from '@openpanel/json';
|
||||||
import { type Redis, getRedisCache } from '@openpanel/redis';
|
import { type Redis, getRedisCache } from '@openpanel/redis';
|
||||||
import { dissocPath, mergeDeepRight, omit, whereEq } from 'ramda';
|
import shallowEqual from 'fast-deep-equal';
|
||||||
|
import { omit } from 'ramda';
|
||||||
import { TABLE_NAMES, ch, chQuery } from '../clickhouse/client';
|
import { TABLE_NAMES, ch, chQuery } from '../clickhouse/client';
|
||||||
import type { IClickhouseProfile } from '../services/profile.service';
|
import type { IClickhouseProfile } from '../services/profile.service';
|
||||||
import { BaseBuffer } from './base-buffer';
|
import { BaseBuffer } from './base-buffer';
|
||||||
import { isPartialMatch } from './partial-json-match';
|
|
||||||
|
// TODO: Use @openpanel/json when polar is merged
|
||||||
|
function getSafeJson<T>(str: string): T | null {
|
||||||
|
try {
|
||||||
|
return JSON.parse(str);
|
||||||
|
} catch (e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class ProfileBuffer extends BaseBuffer {
|
export class ProfileBuffer extends BaseBuffer {
|
||||||
private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
|
private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
|
||||||
@@ -34,35 +42,14 @@ export class ProfileBuffer extends BaseBuffer {
|
|||||||
this.redis = getRedisCache();
|
this.redis = getRedisCache();
|
||||||
}
|
}
|
||||||
|
|
||||||
private excludeKeys(
|
private getProfileCacheKey({
|
||||||
profile: IClickhouseProfile,
|
projectId,
|
||||||
exclude: string[][],
|
profileId,
|
||||||
): IClickhouseProfile {
|
}: {
|
||||||
let filtered = profile;
|
profileId: string;
|
||||||
for (const path of exclude) {
|
projectId: string;
|
||||||
filtered = dissocPath(path, filtered);
|
}) {
|
||||||
}
|
return `${this.redisProfilePrefix}${projectId}:${profileId}`;
|
||||||
return filtered;
|
|
||||||
}
|
|
||||||
|
|
||||||
private match(source: any, partial: any): boolean {
|
|
||||||
const exclude = [
|
|
||||||
['created_at'],
|
|
||||||
['properties', 'browser_version'],
|
|
||||||
['properties', 'browserVersion'],
|
|
||||||
['properties', 'latitude'],
|
|
||||||
['properties', 'longitude'],
|
|
||||||
['properties', 'os_version'],
|
|
||||||
['properties', 'osVersion'],
|
|
||||||
['properties', 'path'],
|
|
||||||
['properties', 'referrer_name'],
|
|
||||||
['properties', 'referrerName'],
|
|
||||||
['properties', 'referrer_type'],
|
|
||||||
['properties', 'referrerType'],
|
|
||||||
['properties', 'referrer'],
|
|
||||||
];
|
|
||||||
|
|
||||||
return isPartialMatch(source, this.excludeKeys(partial, exclude));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async add(profile: IClickhouseProfile) {
|
async add(profile: IClickhouseProfile) {
|
||||||
@@ -70,56 +57,54 @@ export class ProfileBuffer extends BaseBuffer {
|
|||||||
this.logger.debug('Adding profile', {
|
this.logger.debug('Adding profile', {
|
||||||
projectId: profile.project_id,
|
projectId: profile.project_id,
|
||||||
profileId: profile.id,
|
profileId: profile.id,
|
||||||
profile,
|
|
||||||
});
|
});
|
||||||
const cacheKey = `${this.redisProfilePrefix}${profile.project_id}:${profile.id}`;
|
|
||||||
|
|
||||||
// Check if we have this profile in Redis cache
|
const existingProfile = await this.fetchFromCache(profile);
|
||||||
const existingProfile = await this.redis.get(cacheKey);
|
|
||||||
let mergedProfile = profile;
|
|
||||||
|
|
||||||
if (!existingProfile) {
|
const mergedProfile: IClickhouseProfile = existingProfile
|
||||||
this.logger.debug('Profile not found in cache, checking Clickhouse', {
|
? deepMergeObjects(existingProfile, profile)
|
||||||
projectId: profile.project_id,
|
: profile;
|
||||||
profileId: profile.id,
|
|
||||||
});
|
|
||||||
// If not in cache, check Clickhouse
|
|
||||||
const clickhouseProfile = await this.fetchFromClickhouse(profile);
|
|
||||||
if (clickhouseProfile) {
|
|
||||||
this.logger.debug('Found existing profile in Clickhouse, merging', {
|
|
||||||
projectId: profile.project_id,
|
|
||||||
profileId: profile.id,
|
|
||||||
});
|
|
||||||
mergedProfile = mergeDeepRight(clickhouseProfile, profile);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const parsedProfile = getSafeJson<IClickhouseProfile>(existingProfile);
|
|
||||||
|
|
||||||
if (parsedProfile) {
|
// Avoid unnecessary updates:
|
||||||
// Only merge if checksums are different
|
// If the profile is less than X minutes old
|
||||||
if (this.match(parsedProfile, profile)) {
|
// and the profiles are the same
|
||||||
return; // Skip if checksums match
|
if (profile.created_at && existingProfile?.created_at) {
|
||||||
}
|
const a = new Date(profile.created_at);
|
||||||
|
const b = new Date(existingProfile.created_at);
|
||||||
this.logger.debug('Profile changed, merging with cached version', {
|
const diffTime = Math.abs(a.getTime() - b.getTime());
|
||||||
existingProfile: parsedProfile,
|
if (
|
||||||
incomingProfile: profile,
|
diffTime < 1000 * 60 * 10 &&
|
||||||
});
|
shallowEqual(
|
||||||
mergedProfile = mergeDeepRight(parsedProfile, profile);
|
omit(['created_at'], existingProfile),
|
||||||
|
omit(['created_at'], mergedProfile),
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
this.logger.debug('Profile not changed, skipping');
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.debug('Merged profile will be inserted', {
|
||||||
|
mergedProfile,
|
||||||
|
existingProfile,
|
||||||
|
profile,
|
||||||
|
});
|
||||||
|
|
||||||
|
const cacheTtl = profile.is_external
|
||||||
|
? 60 * 60 * 24 * this.daysToKeep
|
||||||
|
: 60 * 60; // 1 hour for internal profiles
|
||||||
|
const cacheKey = this.getProfileCacheKey({
|
||||||
|
profileId: profile.id,
|
||||||
|
projectId: profile.project_id,
|
||||||
|
});
|
||||||
|
|
||||||
const result = await this.redis
|
const result = await this.redis
|
||||||
.multi()
|
.multi()
|
||||||
.set(
|
.set(cacheKey, JSON.stringify(mergedProfile), 'EX', cacheTtl)
|
||||||
cacheKey,
|
|
||||||
JSON.stringify(mergedProfile),
|
|
||||||
'EX',
|
|
||||||
60 * 60 * 24 * this.daysToKeep,
|
|
||||||
)
|
|
||||||
.rpush(this.redisBufferKey, JSON.stringify(mergedProfile))
|
.rpush(this.redisBufferKey, JSON.stringify(mergedProfile))
|
||||||
.llen(this.redisBufferKey)
|
.llen(this.redisBufferKey)
|
||||||
.exec();
|
.exec();
|
||||||
|
|
||||||
if (!result) {
|
if (!result) {
|
||||||
this.logger.error('Failed to add profile to Redis', {
|
this.logger.error('Failed to add profile to Redis', {
|
||||||
profile,
|
profile,
|
||||||
@@ -142,6 +127,33 @@ export class ProfileBuffer extends BaseBuffer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async fetchFromCache(
|
||||||
|
profile: IClickhouseProfile,
|
||||||
|
): Promise<IClickhouseProfile | null> {
|
||||||
|
this.logger.debug('Fetching profile from Redis', {
|
||||||
|
projectId: profile.project_id,
|
||||||
|
profileId: profile.id,
|
||||||
|
});
|
||||||
|
const cacheKey = this.getProfileCacheKey({
|
||||||
|
profileId: profile.id,
|
||||||
|
projectId: profile.project_id,
|
||||||
|
});
|
||||||
|
|
||||||
|
const existingProfile = await getRedisCache().get(cacheKey);
|
||||||
|
if (existingProfile) {
|
||||||
|
const parsedProfile = getSafeJson<IClickhouseProfile>(existingProfile);
|
||||||
|
if (parsedProfile) {
|
||||||
|
this.logger.debug('Profile found in Redis', {
|
||||||
|
projectId: profile.project_id,
|
||||||
|
profileId: profile.id,
|
||||||
|
});
|
||||||
|
return parsedProfile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.fetchFromClickhouse(profile);
|
||||||
|
}
|
||||||
|
|
||||||
private async fetchFromClickhouse(
|
private async fetchFromClickhouse(
|
||||||
profile: IClickhouseProfile,
|
profile: IClickhouseProfile,
|
||||||
): Promise<IClickhouseProfile | null> {
|
): Promise<IClickhouseProfile | null> {
|
||||||
@@ -190,20 +202,12 @@ export class ProfileBuffer extends BaseBuffer {
|
|||||||
getSafeJson<IClickhouseProfile>(p),
|
getSafeJson<IClickhouseProfile>(p),
|
||||||
);
|
);
|
||||||
|
|
||||||
let processedChunks = 0;
|
|
||||||
for (const chunk of this.chunks(parsedProfiles, this.chunkSize)) {
|
for (const chunk of this.chunks(parsedProfiles, this.chunkSize)) {
|
||||||
processedChunks++;
|
|
||||||
this.logger.debug(`Processing chunk ${processedChunks}`, {
|
|
||||||
size: chunk.length,
|
|
||||||
});
|
|
||||||
this.logger.debug('Chunk data', { chunk });
|
|
||||||
|
|
||||||
await ch.insert({
|
await ch.insert({
|
||||||
table: TABLE_NAMES.profiles,
|
table: TABLE_NAMES.profiles,
|
||||||
values: chunk,
|
values: chunk,
|
||||||
format: 'JSONEachRow',
|
format: 'JSONEachRow',
|
||||||
});
|
});
|
||||||
this.logger.debug(`Successfully inserted chunk ${processedChunks}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only remove profiles after successful insert
|
// Only remove profiles after successful insert
|
||||||
@@ -211,7 +215,6 @@ export class ProfileBuffer extends BaseBuffer {
|
|||||||
|
|
||||||
this.logger.info('Successfully completed profile processing', {
|
this.logger.info('Successfully completed profile processing', {
|
||||||
totalProfiles: profiles.length,
|
totalProfiles: profiles.length,
|
||||||
totalChunks: processedChunks,
|
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this.logger.error('Failed to process buffer', { error });
|
this.logger.error('Failed to process buffer', { error });
|
||||||
|
|||||||
4
pnpm-lock.yaml
generated
4
pnpm-lock.yaml
generated
@@ -898,6 +898,9 @@ importers:
|
|||||||
'@prisma/extension-read-replicas':
|
'@prisma/extension-read-replicas':
|
||||||
specifier: ^0.4.0
|
specifier: ^0.4.0
|
||||||
version: 0.4.0(@prisma/client@5.9.1(prisma@5.9.1))
|
version: 0.4.0(@prisma/client@5.9.1(prisma@5.9.1))
|
||||||
|
fast-deep-equal:
|
||||||
|
specifier: ^3.1.3
|
||||||
|
version: 3.1.3
|
||||||
jiti:
|
jiti:
|
||||||
specifier: ^2.4.1
|
specifier: ^2.4.1
|
||||||
version: 2.4.1
|
version: 2.4.1
|
||||||
@@ -8820,6 +8823,7 @@ packages:
|
|||||||
|
|
||||||
lodash.isequal@4.5.0:
|
lodash.isequal@4.5.0:
|
||||||
resolution: {integrity: sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==}
|
resolution: {integrity: sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==}
|
||||||
|
deprecated: This package is deprecated. Use require('node:util').isDeepStrictEqual instead.
|
||||||
|
|
||||||
lodash.isinteger@4.0.4:
|
lodash.isinteger@4.0.4:
|
||||||
resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}
|
resolution: {integrity: sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==}
|
||||||
|
|||||||
Reference in New Issue
Block a user