fix:buffers
* wip * remove active visitor counter in redis * test * fix profiel query * fix
This commit is contained in:
committed by
GitHub
parent
20665789e1
commit
a1ce71ffb6
@@ -1,9 +1,6 @@
|
||||
import { deepMergeObjects } from '@openpanel/common';
|
||||
import { generateSecureId } from '@openpanel/common/server';
|
||||
import { getSafeJson } from '@openpanel/json';
|
||||
import type { ILogger } from '@openpanel/logger';
|
||||
import { getRedisCache, type Redis } from '@openpanel/redis';
|
||||
import shallowEqual from 'fast-deep-equal';
|
||||
import { omit, uniq } from 'ramda';
|
||||
import sqlstring from 'sqlstring';
|
||||
import { ch, chQuery, TABLE_NAMES } from '../clickhouse/client';
|
||||
@@ -11,29 +8,24 @@ import type { IClickhouseProfile } from '../services/profile.service';
|
||||
import { BaseBuffer } from './base-buffer';
|
||||
|
||||
export class ProfileBuffer extends BaseBuffer {
|
||||
private batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
|
||||
private readonly batchSize = process.env.PROFILE_BUFFER_BATCH_SIZE
|
||||
? Number.parseInt(process.env.PROFILE_BUFFER_BATCH_SIZE, 10)
|
||||
: 200;
|
||||
private chunkSize = process.env.PROFILE_BUFFER_CHUNK_SIZE
|
||||
private readonly chunkSize = process.env.PROFILE_BUFFER_CHUNK_SIZE
|
||||
? Number.parseInt(process.env.PROFILE_BUFFER_CHUNK_SIZE, 10)
|
||||
: 1000;
|
||||
private ttlInSeconds = process.env.PROFILE_BUFFER_TTL_IN_SECONDS
|
||||
private readonly ttlInSeconds = process.env.PROFILE_BUFFER_TTL_IN_SECONDS
|
||||
? Number.parseInt(process.env.PROFILE_BUFFER_TTL_IN_SECONDS, 10)
|
||||
: 60 * 60;
|
||||
/** Max profiles per ClickHouse IN-clause fetch to keep query size bounded */
|
||||
private readonly fetchChunkSize = process.env.PROFILE_BUFFER_FETCH_CHUNK_SIZE
|
||||
? Number.parseInt(process.env.PROFILE_BUFFER_FETCH_CHUNK_SIZE, 10)
|
||||
: 50;
|
||||
|
||||
private readonly redisKey = 'profile-buffer';
|
||||
private readonly redisProfilePrefix = 'profile-cache:';
|
||||
|
||||
private redis: Redis;
|
||||
private releaseLockSha: string | null = null;
|
||||
|
||||
private readonly releaseLockScript = `
|
||||
if redis.call("get", KEYS[1]) == ARGV[1] then
|
||||
return redis.call("del", KEYS[1])
|
||||
else
|
||||
return 0
|
||||
end
|
||||
`;
|
||||
private readonly redis: Redis;
|
||||
|
||||
constructor() {
|
||||
super({
|
||||
@@ -43,9 +35,6 @@ export class ProfileBuffer extends BaseBuffer {
|
||||
},
|
||||
});
|
||||
this.redis = getRedisCache();
|
||||
this.redis.script('LOAD', this.releaseLockScript).then((sha) => {
|
||||
this.releaseLockSha = sha as string;
|
||||
});
|
||||
}
|
||||
|
||||
private getProfileCacheKey({
|
||||
@@ -58,243 +47,236 @@ export class ProfileBuffer extends BaseBuffer {
|
||||
return `${this.redisProfilePrefix}${projectId}:${profileId}`;
|
||||
}
|
||||
|
||||
private async withProfileLock<T>(
|
||||
public async fetchFromCache(
|
||||
profileId: string,
|
||||
projectId: string,
|
||||
fn: () => Promise<T>
|
||||
): Promise<T> {
|
||||
const lockKey = `profile-lock:${projectId}:${profileId}`;
|
||||
const lockId = generateSecureId('lock');
|
||||
const maxRetries = 20;
|
||||
const retryDelayMs = 50;
|
||||
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
const acquired = await this.redis.set(lockKey, lockId, 'EX', 5, 'NX');
|
||||
if (acquired === 'OK') {
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
if (this.releaseLockSha) {
|
||||
await this.redis.evalsha(this.releaseLockSha, 1, lockKey, lockId);
|
||||
} else {
|
||||
await this.redis.eval(this.releaseLockScript, 1, lockKey, lockId);
|
||||
}
|
||||
}
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
||||
projectId: string
|
||||
): Promise<IClickhouseProfile | null> {
|
||||
const cacheKey = this.getProfileCacheKey({ profileId, projectId });
|
||||
const cached = await this.redis.get(cacheKey);
|
||||
if (!cached) {
|
||||
return null;
|
||||
}
|
||||
|
||||
this.logger.error(
|
||||
'Failed to acquire profile lock, proceeding without lock',
|
||||
{
|
||||
profileId,
|
||||
projectId,
|
||||
}
|
||||
);
|
||||
return fn();
|
||||
}
|
||||
|
||||
async alreadyExists(profile: IClickhouseProfile) {
|
||||
const cacheKey = this.getProfileCacheKey({
|
||||
profileId: profile.id,
|
||||
projectId: profile.project_id,
|
||||
});
|
||||
return (await this.redis.exists(cacheKey)) === 1;
|
||||
return getSafeJson<IClickhouseProfile>(cached);
|
||||
}
|
||||
|
||||
async add(profile: IClickhouseProfile, isFromEvent = false) {
|
||||
const logger = this.logger.child({
|
||||
projectId: profile.project_id,
|
||||
profileId: profile.id,
|
||||
});
|
||||
|
||||
try {
|
||||
logger.debug('Adding profile');
|
||||
|
||||
if (isFromEvent && (await this.alreadyExists(profile))) {
|
||||
logger.debug('Profile already created, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
await this.withProfileLock(profile.id, profile.project_id, async () => {
|
||||
const existingProfile = await this.fetchProfile(profile, logger);
|
||||
|
||||
// Delete any properties that are not server related if we have a non-server profile
|
||||
if (
|
||||
existingProfile?.properties.device !== 'server' &&
|
||||
profile.properties.device === 'server'
|
||||
) {
|
||||
profile.properties = omit(
|
||||
[
|
||||
'city',
|
||||
'country',
|
||||
'region',
|
||||
'longitude',
|
||||
'latitude',
|
||||
'os',
|
||||
'osVersion',
|
||||
'browser',
|
||||
'device',
|
||||
'isServer',
|
||||
'os_version',
|
||||
'browser_version',
|
||||
],
|
||||
profile.properties
|
||||
);
|
||||
}
|
||||
|
||||
const mergedProfile: IClickhouseProfile = existingProfile
|
||||
? {
|
||||
...deepMergeObjects(
|
||||
existingProfile,
|
||||
omit(['created_at', 'groups'], profile)
|
||||
),
|
||||
groups: uniq([
|
||||
...(existingProfile.groups ?? []),
|
||||
...(profile.groups ?? []),
|
||||
]),
|
||||
}
|
||||
: profile;
|
||||
|
||||
if (
|
||||
profile &&
|
||||
existingProfile &&
|
||||
shallowEqual(
|
||||
omit(['created_at'], existingProfile),
|
||||
omit(['created_at'], mergedProfile)
|
||||
)
|
||||
) {
|
||||
this.logger.debug('Profile not changed, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
this.logger.debug('Merged profile will be inserted', {
|
||||
mergedProfile,
|
||||
existingProfile,
|
||||
profile,
|
||||
});
|
||||
|
||||
if (isFromEvent) {
|
||||
const cacheKey = this.getProfileCacheKey({
|
||||
profileId: profile.id,
|
||||
projectId: profile.project_id,
|
||||
});
|
||||
|
||||
const result = await this.redis
|
||||
.multi()
|
||||
.set(cacheKey, JSON.stringify(mergedProfile), 'EX', this.ttlInSeconds)
|
||||
.rpush(this.redisKey, JSON.stringify(mergedProfile))
|
||||
.incr(this.bufferCounterKey)
|
||||
.llen(this.redisKey)
|
||||
.exec();
|
||||
|
||||
if (!result) {
|
||||
this.logger.error('Failed to add profile to Redis', {
|
||||
profile,
|
||||
cacheKey,
|
||||
});
|
||||
const exists = await this.redis.exists(cacheKey);
|
||||
if (exists === 1) {
|
||||
return;
|
||||
}
|
||||
const bufferLength = (result?.[3]?.[1] as number) ?? 0;
|
||||
}
|
||||
|
||||
this.logger.debug('Current buffer length', {
|
||||
bufferLength,
|
||||
batchSize: this.batchSize,
|
||||
});
|
||||
if (bufferLength >= this.batchSize) {
|
||||
await this.tryFlush();
|
||||
}
|
||||
});
|
||||
const result = await this.redis
|
||||
.multi()
|
||||
.rpush(this.redisKey, JSON.stringify(profile))
|
||||
.incr(this.bufferCounterKey)
|
||||
.llen(this.redisKey)
|
||||
.exec();
|
||||
|
||||
if (!result) {
|
||||
this.logger.error('Failed to add profile to Redis', { profile });
|
||||
return;
|
||||
}
|
||||
|
||||
const bufferLength = (result?.[2]?.[1] as number) ?? 0;
|
||||
if (bufferLength >= this.batchSize) {
|
||||
await this.tryFlush();
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to add profile', { error, profile });
|
||||
}
|
||||
}
|
||||
|
||||
private async fetchProfile(
|
||||
profile: IClickhouseProfile,
|
||||
logger: ILogger
|
||||
): Promise<IClickhouseProfile | null> {
|
||||
const existingProfile = await this.fetchFromCache(
|
||||
profile.id,
|
||||
profile.project_id
|
||||
);
|
||||
if (existingProfile) {
|
||||
logger.debug('Profile found in Redis');
|
||||
return existingProfile;
|
||||
private mergeProfiles(
|
||||
existing: IClickhouseProfile | null,
|
||||
incoming: IClickhouseProfile
|
||||
): IClickhouseProfile {
|
||||
if (!existing) {
|
||||
return incoming;
|
||||
}
|
||||
|
||||
return this.fetchFromClickhouse(profile, logger);
|
||||
}
|
||||
|
||||
public async fetchFromCache(
|
||||
profileId: string,
|
||||
projectId: string
|
||||
): Promise<IClickhouseProfile | null> {
|
||||
const cacheKey = this.getProfileCacheKey({
|
||||
profileId,
|
||||
projectId,
|
||||
});
|
||||
const existingProfile = await this.redis.get(cacheKey);
|
||||
if (!existingProfile) {
|
||||
return null;
|
||||
let profile = incoming;
|
||||
if (
|
||||
existing.properties.device !== 'server' &&
|
||||
incoming.properties.device === 'server'
|
||||
) {
|
||||
profile = {
|
||||
...incoming,
|
||||
properties: omit(
|
||||
[
|
||||
'city',
|
||||
'country',
|
||||
'region',
|
||||
'longitude',
|
||||
'latitude',
|
||||
'os',
|
||||
'osVersion',
|
||||
'browser',
|
||||
'device',
|
||||
'isServer',
|
||||
'os_version',
|
||||
'browser_version',
|
||||
],
|
||||
incoming.properties
|
||||
),
|
||||
};
|
||||
}
|
||||
return getSafeJson<IClickhouseProfile>(existingProfile);
|
||||
|
||||
return {
|
||||
...deepMergeObjects(existing, omit(['created_at', 'groups'], profile)),
|
||||
groups: uniq([...(existing.groups ?? []), ...(incoming.groups ?? [])]),
|
||||
};
|
||||
}
|
||||
|
||||
private async fetchFromClickhouse(
|
||||
profile: IClickhouseProfile,
|
||||
logger: ILogger
|
||||
): Promise<IClickhouseProfile | null> {
|
||||
logger.debug('Fetching profile from Clickhouse');
|
||||
const result = await chQuery<IClickhouseProfile>(
|
||||
`SELECT
|
||||
id,
|
||||
project_id,
|
||||
last_value(nullIf(first_name, '')) as first_name,
|
||||
last_value(nullIf(last_name, '')) as last_name,
|
||||
last_value(nullIf(email, '')) as email,
|
||||
last_value(nullIf(avatar, '')) as avatar,
|
||||
last_value(is_external) as is_external,
|
||||
last_value(properties) as properties,
|
||||
last_value(created_at) as created_at
|
||||
FROM ${TABLE_NAMES.profiles}
|
||||
WHERE
|
||||
id = ${sqlstring.escape(String(profile.id))} AND
|
||||
project_id = ${sqlstring.escape(profile.project_id)}
|
||||
${
|
||||
profile.is_external === false
|
||||
? ' AND profiles.created_at > now() - INTERVAL 2 DAY'
|
||||
: ''
|
||||
private async batchFetchFromClickhouse(
|
||||
profiles: IClickhouseProfile[]
|
||||
): Promise<Map<string, IClickhouseProfile>> {
|
||||
const result = new Map<string, IClickhouseProfile>();
|
||||
|
||||
// Non-external (anonymous/device) profiles get a 2-day recency filter to
|
||||
// avoid pulling stale anonymous sessions from far back.
|
||||
const external = profiles.filter((p) => p.is_external !== false);
|
||||
const nonExternal = profiles.filter((p) => p.is_external === false);
|
||||
|
||||
const fetchGroup = async (
|
||||
group: IClickhouseProfile[],
|
||||
withDateFilter: boolean
|
||||
) => {
|
||||
for (const chunk of this.chunks(group, this.fetchChunkSize)) {
|
||||
const tuples = chunk
|
||||
.map(
|
||||
(p) =>
|
||||
`(${sqlstring.escape(String(p.id))}, ${sqlstring.escape(p.project_id)})`
|
||||
)
|
||||
.join(', ');
|
||||
try {
|
||||
const rows = await chQuery<IClickhouseProfile>(
|
||||
`SELECT
|
||||
id,
|
||||
project_id,
|
||||
argMax(nullIf(first_name, ''), ${TABLE_NAMES.profiles}.created_at) as first_name,
|
||||
argMax(nullIf(last_name, ''), ${TABLE_NAMES.profiles}.created_at) as last_name,
|
||||
argMax(nullIf(email, ''), ${TABLE_NAMES.profiles}.created_at) as email,
|
||||
argMax(nullIf(avatar, ''), ${TABLE_NAMES.profiles}.created_at) as avatar,
|
||||
argMax(is_external, ${TABLE_NAMES.profiles}.created_at) as is_external,
|
||||
argMax(properties, ${TABLE_NAMES.profiles}.created_at) as properties,
|
||||
max(created_at) as created_at
|
||||
FROM ${TABLE_NAMES.profiles}
|
||||
WHERE (id, project_id) IN (${tuples})
|
||||
${withDateFilter ? `AND ${TABLE_NAMES.profiles}.created_at > now() - INTERVAL 2 DAY` : ''}
|
||||
GROUP BY id, project_id`
|
||||
);
|
||||
for (const row of rows) {
|
||||
result.set(`${row.project_id}:${row.id}`, row);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.warn(
|
||||
'Failed to batch fetch profiles from Clickhouse, proceeding without existing data',
|
||||
{ error, chunkSize: chunk.length }
|
||||
);
|
||||
}
|
||||
GROUP BY id, project_id
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1`
|
||||
);
|
||||
logger.debug('Clickhouse fetch result', {
|
||||
found: !!result[0],
|
||||
});
|
||||
return result[0] || null;
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all([
|
||||
fetchGroup(external, false),
|
||||
fetchGroup(nonExternal, true),
|
||||
]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async processBuffer() {
|
||||
try {
|
||||
this.logger.debug('Starting profile buffer processing');
|
||||
const profiles = await this.redis.lrange(
|
||||
const rawProfiles = await this.redis.lrange(
|
||||
this.redisKey,
|
||||
0,
|
||||
this.batchSize - 1
|
||||
);
|
||||
|
||||
if (profiles.length === 0) {
|
||||
if (rawProfiles.length === 0) {
|
||||
this.logger.debug('No profiles to process');
|
||||
return;
|
||||
}
|
||||
|
||||
this.logger.debug(`Processing ${profiles.length} profiles in buffer`);
|
||||
const parsedProfiles = profiles.map((p) =>
|
||||
getSafeJson<IClickhouseProfile>(p)
|
||||
);
|
||||
const parsedProfiles = rawProfiles
|
||||
.map((p) => getSafeJson<IClickhouseProfile>(p))
|
||||
.filter(Boolean) as IClickhouseProfile[];
|
||||
|
||||
for (const chunk of this.chunks(parsedProfiles, this.chunkSize)) {
|
||||
// Merge within batch: collapse multiple updates for the same profile
|
||||
const mergedInBatch = new Map<string, IClickhouseProfile>();
|
||||
for (const profile of parsedProfiles) {
|
||||
const key = `${profile.project_id}:${profile.id}`;
|
||||
mergedInBatch.set(
|
||||
key,
|
||||
this.mergeProfiles(mergedInBatch.get(key) ?? null, profile)
|
||||
);
|
||||
}
|
||||
|
||||
const uniqueProfiles = Array.from(mergedInBatch.values());
|
||||
|
||||
// Check Redis cache for all unique profiles in a single MGET
|
||||
const cacheKeys = uniqueProfiles.map((p) =>
|
||||
this.getProfileCacheKey({ profileId: p.id, projectId: p.project_id })
|
||||
);
|
||||
const cacheResults = await this.redis.mget(...cacheKeys);
|
||||
|
||||
const existingByKey = new Map<string, IClickhouseProfile>();
|
||||
const cacheMisses: IClickhouseProfile[] = [];
|
||||
for (let i = 0; i < uniqueProfiles.length; i++) {
|
||||
const uniqueProfile = uniqueProfiles[i];
|
||||
if (uniqueProfile) {
|
||||
const key = `${uniqueProfile.project_id}:${uniqueProfile.id}`;
|
||||
const cached = cacheResults[i]
|
||||
? getSafeJson<IClickhouseProfile>(cacheResults[i]!)
|
||||
: null;
|
||||
if (cached) {
|
||||
existingByKey.set(key, cached);
|
||||
} else {
|
||||
cacheMisses.push(uniqueProfile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch cache misses from ClickHouse in bounded chunks
|
||||
if (cacheMisses.length > 0) {
|
||||
const clickhouseResults =
|
||||
await this.batchFetchFromClickhouse(cacheMisses);
|
||||
for (const [key, profile] of clickhouseResults) {
|
||||
existingByKey.set(key, profile);
|
||||
}
|
||||
}
|
||||
|
||||
// Final merge: in-batch profile + existing (from cache or ClickHouse)
|
||||
const toInsert: IClickhouseProfile[] = [];
|
||||
const multi = this.redis.multi();
|
||||
|
||||
for (const profile of uniqueProfiles) {
|
||||
const key = `${profile.project_id}:${profile.id}`;
|
||||
const merged = this.mergeProfiles(
|
||||
existingByKey.get(key) ?? null,
|
||||
profile
|
||||
);
|
||||
toInsert.push(merged);
|
||||
multi.set(
|
||||
this.getProfileCacheKey({
|
||||
projectId: profile.project_id,
|
||||
profileId: profile.id,
|
||||
}),
|
||||
JSON.stringify(merged),
|
||||
'EX',
|
||||
this.ttlInSeconds
|
||||
);
|
||||
}
|
||||
|
||||
for (const chunk of this.chunks(toInsert, this.chunkSize)) {
|
||||
await ch.insert({
|
||||
table: TABLE_NAMES.profiles,
|
||||
values: chunk,
|
||||
@@ -302,22 +284,21 @@ export class ProfileBuffer extends BaseBuffer {
|
||||
});
|
||||
}
|
||||
|
||||
// Only remove profiles after successful insert and update counter
|
||||
await this.redis
|
||||
.multi()
|
||||
.ltrim(this.redisKey, profiles.length, -1)
|
||||
.decrby(this.bufferCounterKey, profiles.length)
|
||||
.exec();
|
||||
multi
|
||||
.ltrim(this.redisKey, rawProfiles.length, -1)
|
||||
.decrby(this.bufferCounterKey, rawProfiles.length);
|
||||
await multi.exec();
|
||||
|
||||
this.logger.debug('Successfully completed profile processing', {
|
||||
totalProfiles: profiles.length,
|
||||
totalProfiles: rawProfiles.length,
|
||||
uniqueProfiles: uniqueProfiles.length,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to process buffer', { error });
|
||||
}
|
||||
}
|
||||
|
||||
async getBufferSize() {
|
||||
getBufferSize() {
|
||||
return this.getBufferSizeWithCounter(() => this.redis.llen(this.redisKey));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user