feat: backfill profile id on events
This commit is contained in:
@@ -58,6 +58,11 @@ export async function bootCron() {
|
|||||||
type: 'flushSessions',
|
type: 'flushSessions',
|
||||||
pattern: 1000 * 10,
|
pattern: 1000 * 10,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'flush',
|
||||||
|
type: 'flushProfileBackfill',
|
||||||
|
pattern: 1000 * 30,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: 'insightsDaily',
|
name: 'insightsDaily',
|
||||||
type: 'insightsDaily',
|
type: 'insightsDaily',
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import type { Job } from 'bullmq';
|
import type { Job } from 'bullmq';
|
||||||
|
|
||||||
import { eventBuffer, profileBuffer, sessionBuffer } from '@openpanel/db';
|
import { eventBuffer, profileBackfillBuffer, profileBuffer, sessionBuffer } from '@openpanel/db';
|
||||||
import type { CronQueuePayload } from '@openpanel/queue';
|
import type { CronQueuePayload } from '@openpanel/queue';
|
||||||
|
|
||||||
import { jobdeleteProjects } from './cron.delete-projects';
|
import { jobdeleteProjects } from './cron.delete-projects';
|
||||||
@@ -23,6 +23,9 @@ export async function cronJob(job: Job<CronQueuePayload>) {
|
|||||||
case 'flushSessions': {
|
case 'flushSessions': {
|
||||||
return await sessionBuffer.tryFlush();
|
return await sessionBuffer.tryFlush();
|
||||||
}
|
}
|
||||||
|
case 'flushProfileBackfill': {
|
||||||
|
return await profileBackfillBuffer.tryFlush();
|
||||||
|
}
|
||||||
case 'ping': {
|
case 'ping': {
|
||||||
return await ping();
|
return await ping();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import {
|
|||||||
getEvents,
|
getEvents,
|
||||||
getHasFunnelRules,
|
getHasFunnelRules,
|
||||||
getNotificationRulesByProjectId,
|
getNotificationRulesByProjectId,
|
||||||
|
profileBackfillBuffer,
|
||||||
sessionBuffer,
|
sessionBuffer,
|
||||||
transformSessionToEvent,
|
transformSessionToEvent,
|
||||||
} from '@openpanel/db';
|
} from '@openpanel/db';
|
||||||
@@ -91,6 +92,22 @@ export async function createSessionEnd(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const profileId = session.profile_id || payload.profileId
|
||||||
|
|
||||||
|
if (
|
||||||
|
profileId !== session.device_id
|
||||||
|
&& process.env.EXPERIMENTAL_PROFILE_BACKFILL === '1'
|
||||||
|
) {
|
||||||
|
const runOnProjects = process.env.EXPERIMENTAL_PROFILE_BACKFILL_PROJECTS?.split(',').filter(Boolean) ?? []
|
||||||
|
if(runOnProjects.length === 0 || runOnProjects.includes(payload.projectId)) {
|
||||||
|
await profileBackfillBuffer.add({
|
||||||
|
projectId: payload.projectId,
|
||||||
|
sessionId: payload.sessionId,
|
||||||
|
profileId: profileId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create session end event
|
// Create session end event
|
||||||
return createEvent({
|
return createEvent({
|
||||||
...payload,
|
...payload,
|
||||||
@@ -104,7 +121,7 @@ export async function createSessionEnd(
|
|||||||
createdAt: new Date(
|
createdAt: new Date(
|
||||||
convertClickhouseDateToJs(session.ended_at).getTime() + 1000,
|
convertClickhouseDateToJs(session.ended_at).getTime() + 1000,
|
||||||
),
|
),
|
||||||
profileId: session.profile_id || payload.profileId,
|
profileId: profileId,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import { BotBuffer as BotBufferRedis } from './bot-buffer';
|
import { BotBuffer as BotBufferRedis } from './bot-buffer';
|
||||||
import { EventBuffer as EventBufferRedis } from './event-buffer';
|
import { EventBuffer as EventBufferRedis } from './event-buffer';
|
||||||
|
import { ProfileBackfillBuffer } from './profile-backfill-buffer';
|
||||||
import { ProfileBuffer as ProfileBufferRedis } from './profile-buffer';
|
import { ProfileBuffer as ProfileBufferRedis } from './profile-buffer';
|
||||||
import { SessionBuffer } from './session-buffer';
|
import { SessionBuffer } from './session-buffer';
|
||||||
|
|
||||||
@@ -7,3 +8,6 @@ export const eventBuffer = new EventBufferRedis();
|
|||||||
export const profileBuffer = new ProfileBufferRedis();
|
export const profileBuffer = new ProfileBufferRedis();
|
||||||
export const botBuffer = new BotBufferRedis();
|
export const botBuffer = new BotBufferRedis();
|
||||||
export const sessionBuffer = new SessionBuffer();
|
export const sessionBuffer = new SessionBuffer();
|
||||||
|
export const profileBackfillBuffer = new ProfileBackfillBuffer();
|
||||||
|
|
||||||
|
export type { ProfileBackfillEntry } from './profile-backfill-buffer';
|
||||||
|
|||||||
117
packages/db/src/buffers/profile-backfill-buffer.ts
Normal file
117
packages/db/src/buffers/profile-backfill-buffer.ts
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
import { getSafeJson } from '@openpanel/json';
|
||||||
|
import { type Redis, getRedisCache } from '@openpanel/redis';
|
||||||
|
import sqlstring from 'sqlstring';
|
||||||
|
import { TABLE_NAMES, ch, getReplicatedTableName } from '../clickhouse/client';
|
||||||
|
import { BaseBuffer } from './base-buffer';
|
||||||
|
|
||||||
|
export interface ProfileBackfillEntry {
|
||||||
|
projectId: string;
|
||||||
|
sessionId: string;
|
||||||
|
profileId: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max session IDs per IN clause before we split into another query
|
||||||
|
const CHUNK_SIZE = 500;
|
||||||
|
|
||||||
|
export class ProfileBackfillBuffer extends BaseBuffer {
|
||||||
|
private batchSize = process.env.PROFILE_BACKFILL_BUFFER_BATCH_SIZE
|
||||||
|
? Number.parseInt(process.env.PROFILE_BACKFILL_BUFFER_BATCH_SIZE, 10)
|
||||||
|
: 1000;
|
||||||
|
|
||||||
|
private readonly redisKey = 'profile-backfill-buffer';
|
||||||
|
private redis: Redis;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
super({
|
||||||
|
name: 'profile-backfill',
|
||||||
|
onFlush: async () => {
|
||||||
|
await this.processBuffer();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
this.redis = getRedisCache();
|
||||||
|
}
|
||||||
|
|
||||||
|
async add(entry: ProfileBackfillEntry) {
|
||||||
|
try {
|
||||||
|
this.logger.info('Adding profile backfill entry', entry);
|
||||||
|
await this.redis
|
||||||
|
.multi()
|
||||||
|
.rpush(this.redisKey, JSON.stringify(entry))
|
||||||
|
.incr(this.bufferCounterKey)
|
||||||
|
.exec();
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to add profile backfill entry', { error });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async processBuffer() {
|
||||||
|
try {
|
||||||
|
const raw = await this.redis.lrange(this.redisKey, 0, this.batchSize - 1);
|
||||||
|
|
||||||
|
if (raw.length === 0) return;
|
||||||
|
|
||||||
|
// Deduplicate by sessionId — last write wins (most recent profileId)
|
||||||
|
const seen = new Map<string, ProfileBackfillEntry>();
|
||||||
|
for (const r of raw) {
|
||||||
|
const parsed = getSafeJson<ProfileBackfillEntry>(r);
|
||||||
|
if (parsed) {
|
||||||
|
seen.set(parsed.sessionId, parsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const entries = Array.from(seen.values());
|
||||||
|
|
||||||
|
const table = getReplicatedTableName(TABLE_NAMES.events);
|
||||||
|
|
||||||
|
const chunks = this.chunks(entries, CHUNK_SIZE);
|
||||||
|
let processedChunks = 0;
|
||||||
|
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
const caseClause = chunk
|
||||||
|
.map(({ sessionId, profileId }) => `WHEN ${sqlstring.escape(sessionId)} THEN ${sqlstring.escape(profileId)}`)
|
||||||
|
.join('\n');
|
||||||
|
const tupleList = chunk
|
||||||
|
.map(({ projectId, sessionId }) => `(${sqlstring.escape(projectId)}, ${sqlstring.escape(sessionId)})`)
|
||||||
|
.join(',');
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
UPDATE ${table}
|
||||||
|
SET profile_id = CASE session_id
|
||||||
|
${caseClause}
|
||||||
|
END
|
||||||
|
WHERE (project_id, session_id) IN (${tupleList})
|
||||||
|
AND created_at > now() - INTERVAL 6 HOURS`;
|
||||||
|
|
||||||
|
await ch.command({
|
||||||
|
query,
|
||||||
|
clickhouse_settings: {
|
||||||
|
mutations_sync: '0',
|
||||||
|
allow_experimental_lightweight_update: '1'
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
processedChunks++;
|
||||||
|
this.logger.info('Profile backfill chunk applied', {
|
||||||
|
count: chunk.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (processedChunks === chunks.length) {
|
||||||
|
await this.redis
|
||||||
|
.multi()
|
||||||
|
.ltrim(this.redisKey, raw.length, -1)
|
||||||
|
.decrby(this.bufferCounterKey, raw.length)
|
||||||
|
.exec();
|
||||||
|
|
||||||
|
this.logger.info('Profile backfill buffer processed', {
|
||||||
|
total: entries.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to process profile backfill buffer', { error });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async getBufferSize() {
|
||||||
|
return this.getBufferSizeWithCounter(() => this.redis.llen(this.redisKey));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -67,6 +67,10 @@ export const TABLE_NAMES = {
|
|||||||
* Non-clustered mode = self-hosted environments
|
* Non-clustered mode = self-hosted environments
|
||||||
*/
|
*/
|
||||||
export function isClickhouseClustered(): boolean {
|
export function isClickhouseClustered(): boolean {
|
||||||
|
if (process.env.CLICKHOUSE_CLUSTER === 'true' || process.env.CLICKHOUSE_CLUSTER === '1') {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
return !(
|
return !(
|
||||||
process.env.SELF_HOSTED === 'true' || process.env.SELF_HOSTED === '1'
|
process.env.SELF_HOSTED === 'true' || process.env.SELF_HOSTED === '1'
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -119,11 +119,16 @@ export type CronQueuePayloadOnboarding = {
|
|||||||
type: 'onboarding';
|
type: 'onboarding';
|
||||||
payload: undefined;
|
payload: undefined;
|
||||||
};
|
};
|
||||||
|
export type CronQueuePayloadFlushProfileBackfill = {
|
||||||
|
type: 'flushProfileBackfill';
|
||||||
|
payload: undefined;
|
||||||
|
};
|
||||||
export type CronQueuePayload =
|
export type CronQueuePayload =
|
||||||
| CronQueuePayloadSalt
|
| CronQueuePayloadSalt
|
||||||
| CronQueuePayloadFlushEvents
|
| CronQueuePayloadFlushEvents
|
||||||
| CronQueuePayloadFlushSessions
|
| CronQueuePayloadFlushSessions
|
||||||
| CronQueuePayloadFlushProfiles
|
| CronQueuePayloadFlushProfiles
|
||||||
|
| CronQueuePayloadFlushProfileBackfill
|
||||||
| CronQueuePayloadPing
|
| CronQueuePayloadPing
|
||||||
| CronQueuePayloadProject
|
| CronQueuePayloadProject
|
||||||
| CronQueuePayloadInsightsDaily
|
| CronQueuePayloadInsightsDaily
|
||||||
|
|||||||
Reference in New Issue
Block a user