batching events

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-07-17 17:13:07 +02:00
committed by Carl-Gerhard Lindesvärd
parent 244aa3b0d3
commit 5e225b7ae6
58 changed files with 2204 additions and 583 deletions

View File

@@ -2,7 +2,7 @@ import { generateSalt } from '@openpanel/common';
import { db, getCurrentSalt } from '@openpanel/db';
export async function salt() {
const oldSalt = await getCurrentSalt();
const oldSalt = await getCurrentSalt().catch(() => null);
const newSalt = await db.salt.create({
data: {
salt: generateSalt(),
@@ -13,7 +13,7 @@ export async function salt() {
await db.salt.deleteMany({
where: {
salt: {
notIn: [newSalt.salt, oldSalt],
notIn: oldSalt ? [newSalt.salt, oldSalt] : [newSalt.salt],
},
},
});

View File

@@ -1,5 +1,6 @@
import type { Job } from 'bullmq';
import { eventBuffer, profileBuffer } from '@openpanel/db';
import type { CronQueuePayload } from '@openpanel/queue/src/queues';
import { salt } from './cron.salt';
@@ -9,5 +10,11 @@ export async function cronJob(job: Job<CronQueuePayload>) {
case 'salt': {
return await salt();
}
case 'flushEvents': {
return await eventBuffer.flush();
}
case 'flushProfiles': {
return await profileBuffer.flush();
}
}
}

View File

@@ -1,23 +1,26 @@
import type { Job } from 'bullmq';
import { getTime } from '@openpanel/common';
import { createEvent, getEvents } from '@openpanel/db';
import { createEvent, eventBuffer, getEvents } from '@openpanel/db';
import type { EventsQueuePayloadCreateSessionEnd } from '@openpanel/queue/src/queues';
export async function createSessionEnd(
job: Job<EventsQueuePayloadCreateSessionEnd>
) {
const payload = job.data.payload;
const eventsInBuffer = await eventBuffer.findMany(
(item) => item.event.session_id === payload.sessionId
);
const sql = `
SELECT * FROM events
WHERE
device_id = '${payload.deviceId}'
session_id = '${payload.sessionId}'
AND created_at >= (
SELECT created_at
FROM events
WHERE
device_id = '${payload.deviceId}'
session_id = '${payload.sessionId}'
AND name = 'session_start'
ORDER BY created_at DESC
LIMIT 1
@@ -25,7 +28,11 @@ export async function createSessionEnd(
ORDER BY created_at DESC
`;
job.log(sql);
const events = await getEvents(sql);
const eventsInDb = await getEvents(sql);
// sort last inserted first
const events = [...eventsInBuffer, ...eventsInDb].sort(
(a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime()
);
events.map((event, index) => {
job.log(
@@ -64,7 +71,7 @@ export async function createSessionEnd(
},
name: 'session_end',
duration: sessionDuration,
path: lastEvent.path,
path: screenViews[0]?.path ?? '',
createdAt: new Date(getTime(lastEvent?.createdAt) + 100),
});
}

View File

@@ -1,20 +1,30 @@
import { logger } from '@/utils/logger';
import { getReferrerWithQuery, parseReferrer } from '@/utils/parse-referrer';
import { isUserAgentSet, parseUserAgent } from '@/utils/parse-user-agent';
import { parseUserAgent } from '@/utils/parse-user-agent';
import { isSameDomain, parsePath } from '@/utils/url';
import type { Job, JobsOptions } from 'bullmq';
import type { Job } from 'bullmq';
import { omit } from 'ramda';
import { escape } from 'sqlstring';
import { v4 as uuid } from 'uuid';
import { getTime, toISOString } from '@openpanel/common';
import type { IServiceCreateEventPayload } from '@openpanel/db';
import { createEvent, getEvents } from '@openpanel/db';
import { createEvent } from '@openpanel/db';
import { getLastScreenViewFromProfileId } from '@openpanel/db/src/services/event.service';
import { findJobByPrefix } from '@openpanel/queue';
import { eventsQueue } from '@openpanel/queue/src/queues';
import type { EventsQueuePayloadIncomingEvent } from '@openpanel/queue/src/queues';
import { eventsQueue, sessionsQueue } from '@openpanel/queue/src/queues';
import type {
EventsQueuePayloadCreateSessionEnd,
EventsQueuePayloadIncomingEvent,
} from '@openpanel/queue/src/queues';
import { redis } from '@openpanel/redis';
function noDateInFuture(eventDate: Date): Date {
if (eventDate > new Date()) {
return new Date();
} else {
return eventDate;
}
}
const GLOBAL_PROPERTIES = ['__path', '__referrer'];
const SESSION_TIMEOUT = 1000 * 60 * 30;
const SESSION_END_TIMEOUT = SESSION_TIMEOUT + 1000;
@@ -27,12 +37,8 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
projectId,
currentDeviceId,
previousDeviceId,
// TODO: Remove after 2024-09-26
currentDeviceIdDeprecated,
previousDeviceIdDeprecated,
priority,
} = job.data.payload;
let deviceId: string | null = null;
const properties = body.properties ?? {};
const getProperty = (name: string): string | undefined => {
// replace thing is just for older sdks when we didn't have `__`
@@ -44,22 +50,22 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
| undefined) ?? undefined
);
};
const { ua } = headers;
const profileId = body.profileId ?? '';
const createdAt = new Date(body.timestamp);
const profileId = body.profileId ? String(body.profileId) : '';
const createdAt = noDateInFuture(new Date(body.timestamp));
const url = getProperty('__path');
const { path, hash, query, origin } = parsePath(url);
const referrer = isSameDomain(getProperty('__referrer'), url)
? null
: parseReferrer(getProperty('__referrer'));
const utmReferrer = getReferrerWithQuery(query);
const uaInfo = ua ? parseUserAgent(ua) : null;
const isServerEvent = ua ? !isUserAgentSet(ua) : true;
const uaInfo = parseUserAgent(headers.ua);
if (isServerEvent) {
const [event] = await getEvents(
`SELECT * FROM events WHERE name = 'screen_view' AND profile_id = ${escape(profileId)} AND project_id = ${escape(projectId)} ORDER BY created_at DESC LIMIT 1`
);
if (uaInfo.isServer) {
const event = await getLastScreenViewFromProfileId({
profileId,
projectId,
});
const payload: Omit<IServiceCreateEventPayload, 'id'> = {
name: body.name,
@@ -67,7 +73,10 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
sessionId: event?.sessionId || '',
profileId,
projectId,
properties: Object.assign({}, omit(GLOBAL_PROPERTIES, properties)),
properties: {
...omit(GLOBAL_PROPERTIES, properties),
user_agent: headers.ua,
},
createdAt,
country: event?.country || geo.country || '',
city: event?.city || geo.city || '',
@@ -78,7 +87,7 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
osVersion: event?.osVersion ?? '',
browser: event?.browser ?? '',
browserVersion: event?.browserVersion ?? '',
device: event?.device ?? '',
device: event?.device ?? uaInfo.device ?? '',
brand: event?.brand ?? '',
model: event?.model ?? '',
duration: 0,
@@ -94,82 +103,50 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
return createEvent(payload);
}
const [sessionEndKeys, eventsKeys] = await Promise.all([
redis.keys(`bull:events:sessionEnd:${projectId}:*`),
redis.keys(`bull:events:event:${projectId}:*`),
]);
const sessionEnd = await getSessionEndWithPriority(priority)({
projectId,
currentDeviceId,
previousDeviceId,
});
const sessionEndJobCurrentDeviceId = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${currentDeviceId}:`
);
const sessionEndJobPreviousDeviceId = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${previousDeviceId}:`
);
// TODO: Remove after 2024-09-26
const sessionEndJobCurrentDeviceIdDeprecated = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${currentDeviceIdDeprecated}:`
);
const sessionEndJobPreviousDeviceIdDeprecated = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${previousDeviceIdDeprecated}:`
);
const sessionEndPayload = (sessionEnd?.job?.data
?.payload as EventsQueuePayloadCreateSessionEnd['payload']) || {
sessionId: uuid(),
deviceId: currentDeviceId,
profileId,
};
let createSessionStart = false;
const sessionEndJobId =
sessionEnd?.job.id ??
`sessionEnd:${projectId}:${sessionEndPayload.deviceId}:${Date.now()}`;
if (sessionEndJobCurrentDeviceId) {
deviceId = currentDeviceId;
sessionEndJobCurrentDeviceId.changeDelay(SESSION_END_TIMEOUT);
} else if (sessionEndJobPreviousDeviceId) {
deviceId = previousDeviceId;
sessionEndJobPreviousDeviceId.changeDelay(SESSION_END_TIMEOUT);
} else if (sessionEndJobCurrentDeviceIdDeprecated) {
deviceId = currentDeviceIdDeprecated;
sessionEndJobCurrentDeviceIdDeprecated.changeDelay(SESSION_END_TIMEOUT);
} else if (sessionEndJobPreviousDeviceIdDeprecated) {
deviceId = previousDeviceIdDeprecated;
sessionEndJobPreviousDeviceIdDeprecated.changeDelay(SESSION_END_TIMEOUT);
if (sessionEnd) {
// If for some reason we have a session end job that is not a createSessionEnd job
if (sessionEnd.job.data.type !== 'createSessionEnd') {
throw new Error('Invalid session end job');
}
await sessionEnd.job.changeDelay(SESSION_TIMEOUT);
} else {
deviceId = currentDeviceId;
createSessionStart = true;
// Queue session end
eventsQueue.add(
'event',
await sessionsQueue.add(
'session',
{
type: 'createSessionEnd',
payload: {
deviceId,
},
payload: sessionEndPayload,
},
{
delay: SESSION_END_TIMEOUT,
jobId: `sessionEnd:${projectId}:${deviceId}:${Date.now()}`,
jobId: sessionEndJobId,
}
);
}
const prevEventJob = await findJobByPrefix(
eventsQueue,
eventsKeys,
`event:${projectId}:${deviceId}:`
);
const [sessionStartEvent] = await getEvents(
`SELECT * FROM events WHERE name = 'session_start' AND device_id = ${escape(deviceId)} AND project_id = ${escape(projectId)} ORDER BY created_at DESC LIMIT 1`
);
const payload: Omit<IServiceCreateEventPayload, 'id'> = {
name: body.name,
deviceId,
deviceId: sessionEndPayload.deviceId,
sessionId: sessionEndPayload.sessionId,
profileId,
projectId,
sessionId: createSessionStart ? uuid() : sessionStartEvent?.sessionId ?? '',
properties: Object.assign({}, omit(GLOBAL_PROPERTIES, properties), {
__hash: hash,
__query: query,
@@ -189,7 +166,7 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
model: uaInfo?.model ?? '',
duration: 0,
path: path,
origin: origin || sessionStartEvent?.origin || '',
origin: origin,
referrer: referrer?.url,
referrerName: referrer?.name || utmReferrer?.name || '',
referrerType: referrer?.type || utmReferrer?.type || '',
@@ -197,76 +174,7 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
meta: undefined,
};
const isDelayed = prevEventJob ? await prevEventJob?.isDelayed() : false;
if (isDelayed && prevEventJob && prevEventJob.data.type === 'createEvent') {
const prevEvent = prevEventJob.data.payload;
const duration = getTime(payload.createdAt) - getTime(prevEvent.createdAt);
job.log(`prevEvent ${JSON.stringify(prevEvent, null, 2)}`);
// Set path from prev screen_view event if current event is not a screen_view
if (payload.name != 'screen_view') {
payload.path = prevEvent.path;
}
if (payload.name === 'screen_view') {
if (duration < 0) {
logger.info({ prevEvent, payload }, 'Duration is negative');
} else {
try {
// Skip update duration if it's wrong
// Seems like request is not in right order
await prevEventJob.updateData({
type: 'createEvent',
payload: {
...prevEvent,
duration,
},
});
} catch (error) {
logger.error(
{
error,
prevEventJobStatus: await prevEventJob
.getState()
.catch(() => 'unknown'),
},
`Failed update delayed job`
);
}
}
try {
await prevEventJob.promote();
} catch (error) {
logger.error(
{
error,
prevEventJobStatus: await prevEventJob
.getState()
.catch(() => 'unknown'),
prevEvent,
currEvent: payload,
},
`Failed to promote job`
);
}
}
} else if (payload.name !== 'screen_view') {
job.log(
`no previous job ${JSON.stringify(
{
prevEventJob,
payload,
},
null,
2
)}`
);
}
if (createSessionStart) {
// We do not need to queue session_start
if (!sessionEnd) {
await createEvent({
...payload,
name: 'session_start',
@@ -275,40 +183,78 @@ export async function incomingEvent(job: Job<EventsQueuePayloadIncomingEvent>) {
});
}
const options: JobsOptions = {};
if (payload.name === 'screen_view') {
options.delay = SESSION_TIMEOUT;
options.jobId = `event:${projectId}:${deviceId}:${Date.now()}`;
return createEvent(payload);
}
function getSessionEndWithPriority(
priority: boolean,
count = 0
): typeof getSessionEnd {
return async (args) => {
const res = await getSessionEnd(args);
if (count > 5) {
throw new Error('Failed to get session end');
}
// if we get simultaneous requests we want to avoid race conditions with getting the session end
// one of the events will get priority and the other will wait for the first to finish
if (res === null && priority === false) {
await new Promise((resolve) => setTimeout(resolve, 50));
return getSessionEndWithPriority(priority, count + 1)(args);
}
return res;
};
}
async function getSessionEnd({
projectId,
currentDeviceId,
previousDeviceId,
}: {
projectId: string;
currentDeviceId: string;
previousDeviceId: string;
}) {
const sessionEndKeys = await redis.keys(`*:sessionEnd:${projectId}:*`);
const sessionEndJobCurrentDeviceId = await findJobByPrefix(
sessionsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${currentDeviceId}:`
);
if (sessionEndJobCurrentDeviceId) {
return { deviceId: currentDeviceId, job: sessionEndJobCurrentDeviceId };
}
job.log(
`event is queued ${JSON.stringify(
{
ua,
uaInfo,
referrer,
profileId,
projectId,
deviceId,
geo,
sessionStartEvent,
path,
payload,
},
null,
2
)}`
const sessionEndJobCurrentDeviceId2 = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${currentDeviceId}:`
);
if (sessionEndJobCurrentDeviceId2) {
return { deviceId: currentDeviceId, job: sessionEndJobCurrentDeviceId2 };
}
// Queue event instead of creating it,
// since we want to update duration if we get more events in the same session
// The event will only be delayed if it's a screen_view event
return eventsQueue.add(
'event',
{
type: 'createEvent',
payload,
},
options
const sessionEndJobPreviousDeviceId = await findJobByPrefix(
sessionsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${previousDeviceId}:`
);
if (sessionEndJobPreviousDeviceId) {
return { deviceId: previousDeviceId, job: sessionEndJobPreviousDeviceId };
}
const sessionEndJobPreviousDeviceId2 = await findJobByPrefix(
eventsQueue,
sessionEndKeys,
`sessionEnd:${projectId}:${previousDeviceId}:`
);
if (sessionEndJobPreviousDeviceId2) {
return { deviceId: previousDeviceId, job: sessionEndJobPreviousDeviceId2 };
}
// Create session
return null;
}

View File

@@ -1,7 +1,7 @@
import type { Job } from 'bullmq';
import { escape } from 'sqlstring';
import { chQuery, createEvent, db } from '@openpanel/db';
import { chQuery, db } from '@openpanel/db';
import type {
EventsQueuePayload,
EventsQueuePayloadCreateSessionEnd,
@@ -16,22 +16,6 @@ export async function eventsJob(job: Job<EventsQueuePayload>) {
case 'incomingEvent': {
return await incomingEvent(job as Job<EventsQueuePayloadIncomingEvent>);
}
case 'createEvent': {
if (job.attemptsStarted > 1 && job.data.payload.duration < 0) {
job.data.payload.duration = 0;
}
const createdEvent = await createEvent(job.data.payload);
try {
await updateEventsCount(job.data.payload.projectId);
} catch (e) {
if (e instanceof Error) {
job.log(`Failed to update events count: ${e.message}`);
} else {
job.log(`Failed to update events count: Unknown issue`);
}
}
return createdEvent;
}
case 'createSessionEnd': {
return await createSessionEnd(
job as Job<EventsQueuePayloadCreateSessionEnd>

View File

@@ -0,0 +1,9 @@
import type { Job } from 'bullmq';
import type { SessionsQueuePayload } from '@openpanel/queue/src/queues';
import { createSessionEnd } from './events.create-session-end';
export async function sessionsJob(job: Job<SessionsQueuePayload>) {
return await createSessionEnd(job);
}