fix: overall perf improvements
* fix: ignore private ips * fix: performance related fixes * fix: simply event buffer * fix: default to 1 events queue shard * add: cleanup scripts * fix: comments * fix comments * fix * fix: groupmq * wip * fix: sync cachable * remove cluster names and add it behind env flag (if someone want to scale) * fix * wip * better logger * remove reqid and user agent * fix lock * remove wait_for_async_insert
This commit is contained in:
committed by
GitHub
parent
38cc53890a
commit
da59622dce
@@ -2,9 +2,10 @@ import type { Queue, WorkerOptions } from 'bullmq';
|
||||
import { Worker } from 'bullmq';
|
||||
|
||||
import {
|
||||
EVENTS_GROUP_QUEUES_SHARDS,
|
||||
type EventsQueuePayloadIncomingEvent,
|
||||
cronQueue,
|
||||
eventsGroupQueue,
|
||||
eventsGroupQueues,
|
||||
importQueue,
|
||||
miscQueue,
|
||||
notificationQueue,
|
||||
@@ -18,59 +19,179 @@ import { setTimeout as sleep } from 'node:timers/promises';
|
||||
import { Worker as GroupWorker } from 'groupmq';
|
||||
|
||||
import { cronJob } from './jobs/cron';
|
||||
import { eventsJob } from './jobs/events';
|
||||
import { incomingEventPure } from './jobs/events.incoming-event';
|
||||
import { incomingEvent } from './jobs/events.incoming-event';
|
||||
import { importJob } from './jobs/import';
|
||||
import { miscJob } from './jobs/misc';
|
||||
import { notificationJob } from './jobs/notification';
|
||||
import { sessionsJob } from './jobs/sessions';
|
||||
import { eventsGroupJobDuration } from './metrics';
|
||||
import { logger } from './utils/logger';
|
||||
|
||||
const workerOptions: WorkerOptions = {
|
||||
connection: getRedisQueue(),
|
||||
};
|
||||
|
||||
export async function bootWorkers() {
|
||||
const eventsGroupWorker = new GroupWorker<
|
||||
EventsQueuePayloadIncomingEvent['payload']
|
||||
>({
|
||||
concurrency: Number.parseInt(process.env.EVENT_JOB_CONCURRENCY || '1', 10),
|
||||
logger: queueLogger,
|
||||
queue: eventsGroupQueue,
|
||||
handler: async (job) => {
|
||||
logger.info('processing event (group queue)', {
|
||||
groupId: job.groupId,
|
||||
timestamp: job.data.event.timestamp,
|
||||
});
|
||||
await incomingEventPure(job.data);
|
||||
},
|
||||
});
|
||||
eventsGroupWorker.run();
|
||||
const sessionsWorker = new Worker(
|
||||
sessionsQueue.name,
|
||||
sessionsJob,
|
||||
workerOptions,
|
||||
);
|
||||
const cronWorker = new Worker(cronQueue.name, cronJob, workerOptions);
|
||||
const notificationWorker = new Worker(
|
||||
notificationQueue.name,
|
||||
notificationJob,
|
||||
workerOptions,
|
||||
);
|
||||
const miscWorker = new Worker(miscQueue.name, miscJob, workerOptions);
|
||||
const importWorker = new Worker(importQueue.name, importJob, {
|
||||
...workerOptions,
|
||||
concurrency: Number.parseInt(process.env.IMPORT_JOB_CONCURRENCY || '1', 10),
|
||||
});
|
||||
type QueueName = string; // Can be: events, events_N (where N is 0 to shards-1), sessions, cron, notification, misc
|
||||
|
||||
const workers = [
|
||||
sessionsWorker,
|
||||
cronWorker,
|
||||
notificationWorker,
|
||||
miscWorker,
|
||||
importWorker,
|
||||
// eventsGroupWorker,
|
||||
];
|
||||
/**
|
||||
* Parses the ENABLED_QUEUES environment variable and returns an array of queue names to start.
|
||||
* If no env var is provided, returns all queues.
|
||||
*
|
||||
* Supported queue names:
|
||||
* - events - All event shards (events_0, events_1, ..., events_N)
|
||||
* - events_N - Individual event shard (where N is 0 to EVENTS_GROUP_QUEUES_SHARDS-1)
|
||||
* - sessions, cron, notification, misc
|
||||
*/
|
||||
function getEnabledQueues(): QueueName[] {
|
||||
const enabledQueuesEnv = process.env.ENABLED_QUEUES?.trim();
|
||||
|
||||
if (!enabledQueuesEnv) {
|
||||
logger.info('No ENABLED_QUEUES specified, starting all queues', {
|
||||
totalEventShards: EVENTS_GROUP_QUEUES_SHARDS,
|
||||
});
|
||||
return ['events', 'sessions', 'cron', 'notification', 'misc', 'import'];
|
||||
}
|
||||
|
||||
const queues = enabledQueuesEnv
|
||||
.split(',')
|
||||
.map((q) => q.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
logger.info('Starting queues from ENABLED_QUEUES', {
|
||||
queues,
|
||||
totalEventShards: EVENTS_GROUP_QUEUES_SHARDS,
|
||||
});
|
||||
return queues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the concurrency setting for a queue from environment variables.
|
||||
* Env var format: {QUEUE_NAME}_CONCURRENCY (e.g., EVENTS_0_CONCURRENCY=32)
|
||||
*/
|
||||
function getConcurrencyFor(queueName: string, defaultValue = 1): number {
|
||||
const envKey = `${queueName.toUpperCase().replace(/[^A-Z0-9]/g, '_')}_CONCURRENCY`;
|
||||
const value = process.env[envKey];
|
||||
|
||||
if (value) {
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (!Number.isNaN(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
export async function bootWorkers() {
|
||||
const enabledQueues = getEnabledQueues();
|
||||
|
||||
const workers: (Worker | GroupWorker<any>)[] = [];
|
||||
|
||||
// Start event workers based on enabled queues
|
||||
const eventQueuesToStart: number[] = [];
|
||||
|
||||
if (enabledQueues.includes('events')) {
|
||||
// Start all event shards
|
||||
for (let i = 0; i < EVENTS_GROUP_QUEUES_SHARDS; i++) {
|
||||
eventQueuesToStart.push(i);
|
||||
}
|
||||
} else {
|
||||
// Start specific event shards (events_0, events_1, etc.)
|
||||
for (let i = 0; i < EVENTS_GROUP_QUEUES_SHARDS; i++) {
|
||||
if (enabledQueues.includes(`events_${i}`)) {
|
||||
eventQueuesToStart.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const index of eventQueuesToStart) {
|
||||
const queue = eventsGroupQueues[index];
|
||||
if (!queue) continue;
|
||||
|
||||
const queueName = `events_${index}`;
|
||||
const concurrency = getConcurrencyFor(
|
||||
queueName,
|
||||
Number.parseInt(process.env.EVENT_JOB_CONCURRENCY || '10', 10),
|
||||
);
|
||||
|
||||
const worker = new GroupWorker<EventsQueuePayloadIncomingEvent['payload']>({
|
||||
queue,
|
||||
concurrency,
|
||||
logger: queueLogger,
|
||||
blockingTimeoutSec: Number.parseFloat(
|
||||
process.env.EVENT_BLOCKING_TIMEOUT_SEC || '1',
|
||||
),
|
||||
handler: async (job) => {
|
||||
return await incomingEvent(job.data);
|
||||
},
|
||||
});
|
||||
|
||||
worker.run();
|
||||
workers.push(worker);
|
||||
logger.info(`Started worker for ${queueName}`, { concurrency });
|
||||
}
|
||||
|
||||
// Start sessions worker
|
||||
if (enabledQueues.includes('sessions')) {
|
||||
const concurrency = getConcurrencyFor('sessions');
|
||||
const sessionsWorker = new Worker(sessionsQueue.name, sessionsJob, {
|
||||
...workerOptions,
|
||||
concurrency,
|
||||
});
|
||||
workers.push(sessionsWorker);
|
||||
logger.info('Started worker for sessions', { concurrency });
|
||||
}
|
||||
|
||||
// Start cron worker
|
||||
if (enabledQueues.includes('cron')) {
|
||||
const concurrency = getConcurrencyFor('cron');
|
||||
const cronWorker = new Worker(cronQueue.name, cronJob, {
|
||||
...workerOptions,
|
||||
concurrency,
|
||||
});
|
||||
workers.push(cronWorker);
|
||||
logger.info('Started worker for cron', { concurrency });
|
||||
}
|
||||
|
||||
// Start notification worker
|
||||
if (enabledQueues.includes('notification')) {
|
||||
const concurrency = getConcurrencyFor('notification');
|
||||
const notificationWorker = new Worker(
|
||||
notificationQueue.name,
|
||||
notificationJob,
|
||||
{ ...workerOptions, concurrency },
|
||||
);
|
||||
workers.push(notificationWorker);
|
||||
logger.info('Started worker for notification', { concurrency });
|
||||
}
|
||||
|
||||
// Start misc worker
|
||||
if (enabledQueues.includes('misc')) {
|
||||
const concurrency = getConcurrencyFor('misc');
|
||||
const miscWorker = new Worker(miscQueue.name, miscJob, {
|
||||
...workerOptions,
|
||||
concurrency,
|
||||
});
|
||||
workers.push(miscWorker);
|
||||
logger.info('Started worker for misc', { concurrency });
|
||||
}
|
||||
|
||||
// Start import worker
|
||||
if (enabledQueues.includes('import')) {
|
||||
const concurrency = getConcurrencyFor('import');
|
||||
const importWorker = new Worker(importQueue.name, importJob, {
|
||||
...workerOptions,
|
||||
concurrency,
|
||||
});
|
||||
workers.push(importWorker);
|
||||
logger.info('Started worker for import', { concurrency });
|
||||
}
|
||||
|
||||
if (workers.length === 0) {
|
||||
logger.warn(
|
||||
'No workers started. Check ENABLED_QUEUES environment variable.',
|
||||
);
|
||||
}
|
||||
|
||||
workers.forEach((worker) => {
|
||||
(worker as Worker).on('error', (error) => {
|
||||
@@ -94,6 +215,13 @@ export async function bootWorkers() {
|
||||
|
||||
(worker as Worker).on('failed', (job) => {
|
||||
if (job) {
|
||||
if (job.processedOn && job.finishedOn) {
|
||||
const elapsed = job.finishedOn - job.processedOn;
|
||||
eventsGroupJobDuration.observe(
|
||||
{ name: worker.name, status: 'failed' },
|
||||
elapsed,
|
||||
);
|
||||
}
|
||||
logger.error('job failed', {
|
||||
jobId: job.id,
|
||||
worker: worker.name,
|
||||
@@ -106,15 +234,18 @@ export async function bootWorkers() {
|
||||
|
||||
(worker as Worker).on('completed', (job) => {
|
||||
if (job) {
|
||||
logger.info('job completed', {
|
||||
jobId: job.id,
|
||||
worker: worker.name,
|
||||
data: job.data,
|
||||
elapsed:
|
||||
job.processedOn && job.finishedOn
|
||||
? job.finishedOn - job.processedOn
|
||||
: undefined,
|
||||
});
|
||||
if (job.processedOn && job.finishedOn) {
|
||||
const elapsed = job.finishedOn - job.processedOn;
|
||||
logger.info('job completed', {
|
||||
jobId: job.id,
|
||||
worker: worker.name,
|
||||
elapsed,
|
||||
});
|
||||
eventsGroupJobDuration.observe(
|
||||
{ name: worker.name, status: 'success' },
|
||||
elapsed,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -135,8 +266,14 @@ export async function bootWorkers() {
|
||||
});
|
||||
try {
|
||||
const time = performance.now();
|
||||
await waitForQueueToEmpty(cronQueue);
|
||||
|
||||
// Wait for cron queue to empty if it's running
|
||||
if (enabledQueues.includes('cron')) {
|
||||
await waitForQueueToEmpty(cronQueue);
|
||||
}
|
||||
|
||||
await Promise.all(workers.map((worker) => worker.close()));
|
||||
|
||||
logger.info('workers closed successfully', {
|
||||
elapsed: performance.now() - time,
|
||||
});
|
||||
@@ -155,15 +292,7 @@ export async function bootWorkers() {
|
||||
['uncaughtException', 'unhandledRejection', 'SIGTERM', 'SIGINT'].forEach(
|
||||
(evt) => {
|
||||
process.on(evt, (code) => {
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
exitHandler(evt, code);
|
||||
} else {
|
||||
logger.info('Shutting down for development', {
|
||||
event: evt,
|
||||
code,
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
exitHandler(evt, code);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user