feat: add OpenTelemetry device log capture pipeline

- ClickHouse `logs` table (migration 13) with OTel columns, bloom filter indices
- Zod validation schema for log payloads (severity, body, attributes, trace context)
- Redis-backed LogBuffer with micro-batching into ClickHouse
- POST /logs API endpoint with client auth, geo + UA enrichment
- BullMQ logs queue + worker job
- cron flushLogs every 10s wired into existing cron system
- SDK captureLog(severity, body, properties) with client-side batching

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-30 12:04:04 +02:00
parent a1ce71ffb6
commit 0672857974
14 changed files with 652 additions and 2 deletions

View File

@@ -0,0 +1,72 @@
import { createTable, runClickhouseMigrationCommands } from '../src/clickhouse/migration';
import { getIsCluster, getIsSelfHosting, printBoxMessage } from './helpers';
export async function up() {
const replicatedVersion = '1';
const isClustered = getIsCluster();
const sqls: string[] = [];
sqls.push(
...createTable({
name: 'logs',
columns: [
'`id` UUID DEFAULT generateUUIDv4()',
'`project_id` String CODEC(ZSTD(3))',
'`device_id` String CODEC(ZSTD(3))',
'`profile_id` String CODEC(ZSTD(3))',
'`session_id` String CODEC(LZ4)',
// OpenTelemetry log fields
'`timestamp` DateTime64(9) CODEC(DoubleDelta, ZSTD(3))',
'`observed_at` DateTime64(9) CODEC(DoubleDelta, ZSTD(3))',
'`severity_number` UInt8',
'`severity_text` LowCardinality(String)',
'`body` String CODEC(ZSTD(3))',
'`trace_id` String CODEC(ZSTD(3))',
'`span_id` String CODEC(ZSTD(3))',
'`trace_flags` UInt32 DEFAULT 0',
'`logger_name` LowCardinality(String)',
// OTel attributes (log-level key-value pairs)
'`attributes` Map(String, String) CODEC(ZSTD(3))',
// OTel resource attributes (device/app metadata)
'`resource` Map(String, String) CODEC(ZSTD(3))',
// Server-enriched context
'`sdk_name` LowCardinality(String)',
'`sdk_version` LowCardinality(String)',
'`country` LowCardinality(FixedString(2))',
'`city` String',
'`region` LowCardinality(String)',
'`os` LowCardinality(String)',
'`os_version` LowCardinality(String)',
'`browser` LowCardinality(String)',
'`browser_version` LowCardinality(String)',
'`device` LowCardinality(String)',
'`brand` LowCardinality(String)',
'`model` LowCardinality(String)',
],
indices: [
'INDEX idx_severity_number severity_number TYPE minmax GRANULARITY 1',
'INDEX idx_body body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1',
'INDEX idx_trace_id trace_id TYPE bloom_filter GRANULARITY 1',
'INDEX idx_logger_name logger_name TYPE bloom_filter GRANULARITY 1',
],
orderBy: ['project_id', 'toDate(timestamp)', 'severity_number', 'device_id'],
partitionBy: 'toYYYYMM(timestamp)',
settings: {
index_granularity: 8192,
ttl_only_drop_parts: 1,
},
distributionHash: 'cityHash64(project_id, toString(toStartOfHour(timestamp)))',
replicatedVersion,
isClustered,
}),
);
printBoxMessage('Running migration: 13-add-logs', [
'Creates the logs table for OpenTelemetry-compatible device/app log capture.',
]);
if (!process.argv.includes('--dry')) {
await runClickhouseMigrationCommands(sqls);
}
}

View File

@@ -1,6 +1,7 @@
import { BotBuffer as BotBufferRedis } from './bot-buffer';
import { EventBuffer as EventBufferRedis } from './event-buffer';
import { GroupBuffer } from './group-buffer';
import { LogBuffer } from './log-buffer';
import { ProfileBackfillBuffer } from './profile-backfill-buffer';
import { ProfileBuffer as ProfileBufferRedis } from './profile-buffer';
import { ReplayBuffer } from './replay-buffer';
@@ -13,6 +14,8 @@ export const sessionBuffer = new SessionBuffer();
export const profileBackfillBuffer = new ProfileBackfillBuffer();
export const replayBuffer = new ReplayBuffer();
export const groupBuffer = new GroupBuffer();
export const logBuffer = new LogBuffer();
export type { ProfileBackfillEntry } from './profile-backfill-buffer';
export type { IClickhouseSessionReplayChunk } from './replay-buffer';
export type { IClickhouseLog } from './log-buffer';

View File

@@ -0,0 +1,193 @@
import { getSafeJson } from '@openpanel/json';
import { getRedisCache } from '@openpanel/redis';
import { ch } from '../clickhouse/client';
import { BaseBuffer } from './base-buffer';
export interface IClickhouseLog {
id?: string;
project_id: string;
device_id: string;
profile_id: string;
session_id: string;
timestamp: string;
observed_at: string;
severity_number: number;
severity_text: string;
body: string;
trace_id: string;
span_id: string;
trace_flags: number;
logger_name: string;
attributes: Record<string, string>;
resource: Record<string, string>;
sdk_name: string;
sdk_version: string;
country: string;
city: string;
region: string;
os: string;
os_version: string;
browser: string;
browser_version: string;
device: string;
brand: string;
model: string;
}
export class LogBuffer extends BaseBuffer {
private batchSize = process.env.LOG_BUFFER_BATCH_SIZE
? Number.parseInt(process.env.LOG_BUFFER_BATCH_SIZE, 10)
: 4000;
private chunkSize = process.env.LOG_BUFFER_CHUNK_SIZE
? Number.parseInt(process.env.LOG_BUFFER_CHUNK_SIZE, 10)
: 1000;
private microBatchIntervalMs = process.env.LOG_BUFFER_MICRO_BATCH_MS
? Number.parseInt(process.env.LOG_BUFFER_MICRO_BATCH_MS, 10)
: 10;
private microBatchMaxSize = process.env.LOG_BUFFER_MICRO_BATCH_SIZE
? Number.parseInt(process.env.LOG_BUFFER_MICRO_BATCH_SIZE, 10)
: 100;
private pendingLogs: IClickhouseLog[] = [];
private flushTimer: ReturnType<typeof setTimeout> | null = null;
private isFlushing = false;
private flushRetryCount = 0;
private queueKey = 'log_buffer:queue';
protected bufferCounterKey = 'log_buffer:total_count';
constructor() {
super({
name: 'log',
onFlush: async () => {
await this.processBuffer();
},
});
}
add(log: IClickhouseLog) {
this.pendingLogs.push(log);
if (this.pendingLogs.length >= this.microBatchMaxSize) {
this.flushLocalBuffer();
return;
}
if (!this.flushTimer) {
this.flushTimer = setTimeout(() => {
this.flushTimer = null;
this.flushLocalBuffer();
}, this.microBatchIntervalMs);
}
}
public async flush() {
if (this.flushTimer) {
clearTimeout(this.flushTimer);
this.flushTimer = null;
}
await this.flushLocalBuffer();
}
private async flushLocalBuffer() {
if (this.isFlushing || this.pendingLogs.length === 0) {
return;
}
this.isFlushing = true;
const logsToFlush = this.pendingLogs;
this.pendingLogs = [];
try {
const redis = getRedisCache();
const multi = redis.multi();
for (const log of logsToFlush) {
multi.rpush(this.queueKey, JSON.stringify(log));
}
multi.incrby(this.bufferCounterKey, logsToFlush.length);
await multi.exec();
this.flushRetryCount = 0;
} catch (error) {
this.pendingLogs = logsToFlush.concat(this.pendingLogs);
this.flushRetryCount += 1;
this.logger.warn('Failed to flush log buffer to Redis; logs re-queued', {
error,
logCount: logsToFlush.length,
flushRetryCount: this.flushRetryCount,
});
} finally {
this.isFlushing = false;
if (this.pendingLogs.length > 0 && !this.flushTimer) {
this.flushTimer = setTimeout(() => {
this.flushTimer = null;
this.flushLocalBuffer();
}, this.microBatchIntervalMs);
}
}
}
async processBuffer() {
const redis = getRedisCache();
try {
const queueLogs = await redis.lrange(this.queueKey, 0, this.batchSize - 1);
if (queueLogs.length === 0) {
this.logger.debug('No logs to process');
return;
}
const logsToClickhouse: IClickhouseLog[] = [];
for (const logStr of queueLogs) {
const log = getSafeJson<IClickhouseLog>(logStr);
if (log) {
logsToClickhouse.push(log);
}
}
if (logsToClickhouse.length === 0) {
this.logger.debug('No valid logs to process');
return;
}
logsToClickhouse.sort(
(a, b) =>
new Date(a.timestamp || 0).getTime() -
new Date(b.timestamp || 0).getTime(),
);
this.logger.info('Inserting logs into ClickHouse', {
totalLogs: logsToClickhouse.length,
chunks: Math.ceil(logsToClickhouse.length / this.chunkSize),
});
for (const chunk of this.chunks(logsToClickhouse, this.chunkSize)) {
await ch.insert({
table: 'logs',
values: chunk,
format: 'JSONEachRow',
});
}
await redis
.multi()
.ltrim(this.queueKey, queueLogs.length, -1)
.decrby(this.bufferCounterKey, queueLogs.length)
.exec();
this.logger.info('Processed logs from Redis buffer', {
batchSize: this.batchSize,
logsProcessed: logsToClickhouse.length,
});
} catch (error) {
this.logger.error('Error processing log Redis buffer', { error });
}
}
public getBufferSize() {
return this.getBufferSizeWithCounter(async () => {
const redis = getRedisCache();
return await redis.llen(this.queueKey);
});
}
}