feat: add OpenTelemetry device log capture pipeline
- ClickHouse `logs` table (migration 13) with OTel columns, bloom filter indices - Zod validation schema for log payloads (severity, body, attributes, trace context) - Redis-backed LogBuffer with micro-batching into ClickHouse - POST /logs API endpoint with client auth, geo + UA enrichment - BullMQ logs queue + worker job - cron flushLogs every 10s wired into existing cron system - SDK captureLog(severity, body, properties) with client-side batching Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
72
packages/db/code-migrations/13-add-logs.ts
Normal file
72
packages/db/code-migrations/13-add-logs.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import { createTable, runClickhouseMigrationCommands } from '../src/clickhouse/migration';
|
||||
import { getIsCluster, getIsSelfHosting, printBoxMessage } from './helpers';
|
||||
|
||||
export async function up() {
|
||||
const replicatedVersion = '1';
|
||||
const isClustered = getIsCluster();
|
||||
|
||||
const sqls: string[] = [];
|
||||
|
||||
sqls.push(
|
||||
...createTable({
|
||||
name: 'logs',
|
||||
columns: [
|
||||
'`id` UUID DEFAULT generateUUIDv4()',
|
||||
'`project_id` String CODEC(ZSTD(3))',
|
||||
'`device_id` String CODEC(ZSTD(3))',
|
||||
'`profile_id` String CODEC(ZSTD(3))',
|
||||
'`session_id` String CODEC(LZ4)',
|
||||
// OpenTelemetry log fields
|
||||
'`timestamp` DateTime64(9) CODEC(DoubleDelta, ZSTD(3))',
|
||||
'`observed_at` DateTime64(9) CODEC(DoubleDelta, ZSTD(3))',
|
||||
'`severity_number` UInt8',
|
||||
'`severity_text` LowCardinality(String)',
|
||||
'`body` String CODEC(ZSTD(3))',
|
||||
'`trace_id` String CODEC(ZSTD(3))',
|
||||
'`span_id` String CODEC(ZSTD(3))',
|
||||
'`trace_flags` UInt32 DEFAULT 0',
|
||||
'`logger_name` LowCardinality(String)',
|
||||
// OTel attributes (log-level key-value pairs)
|
||||
'`attributes` Map(String, String) CODEC(ZSTD(3))',
|
||||
// OTel resource attributes (device/app metadata)
|
||||
'`resource` Map(String, String) CODEC(ZSTD(3))',
|
||||
// Server-enriched context
|
||||
'`sdk_name` LowCardinality(String)',
|
||||
'`sdk_version` LowCardinality(String)',
|
||||
'`country` LowCardinality(FixedString(2))',
|
||||
'`city` String',
|
||||
'`region` LowCardinality(String)',
|
||||
'`os` LowCardinality(String)',
|
||||
'`os_version` LowCardinality(String)',
|
||||
'`browser` LowCardinality(String)',
|
||||
'`browser_version` LowCardinality(String)',
|
||||
'`device` LowCardinality(String)',
|
||||
'`brand` LowCardinality(String)',
|
||||
'`model` LowCardinality(String)',
|
||||
],
|
||||
indices: [
|
||||
'INDEX idx_severity_number severity_number TYPE minmax GRANULARITY 1',
|
||||
'INDEX idx_body body TYPE tokenbf_v1(32768, 3, 0) GRANULARITY 1',
|
||||
'INDEX idx_trace_id trace_id TYPE bloom_filter GRANULARITY 1',
|
||||
'INDEX idx_logger_name logger_name TYPE bloom_filter GRANULARITY 1',
|
||||
],
|
||||
orderBy: ['project_id', 'toDate(timestamp)', 'severity_number', 'device_id'],
|
||||
partitionBy: 'toYYYYMM(timestamp)',
|
||||
settings: {
|
||||
index_granularity: 8192,
|
||||
ttl_only_drop_parts: 1,
|
||||
},
|
||||
distributionHash: 'cityHash64(project_id, toString(toStartOfHour(timestamp)))',
|
||||
replicatedVersion,
|
||||
isClustered,
|
||||
}),
|
||||
);
|
||||
|
||||
printBoxMessage('Running migration: 13-add-logs', [
|
||||
'Creates the logs table for OpenTelemetry-compatible device/app log capture.',
|
||||
]);
|
||||
|
||||
if (!process.argv.includes('--dry')) {
|
||||
await runClickhouseMigrationCommands(sqls);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
import { BotBuffer as BotBufferRedis } from './bot-buffer';
|
||||
import { EventBuffer as EventBufferRedis } from './event-buffer';
|
||||
import { GroupBuffer } from './group-buffer';
|
||||
import { LogBuffer } from './log-buffer';
|
||||
import { ProfileBackfillBuffer } from './profile-backfill-buffer';
|
||||
import { ProfileBuffer as ProfileBufferRedis } from './profile-buffer';
|
||||
import { ReplayBuffer } from './replay-buffer';
|
||||
@@ -13,6 +14,8 @@ export const sessionBuffer = new SessionBuffer();
|
||||
export const profileBackfillBuffer = new ProfileBackfillBuffer();
|
||||
export const replayBuffer = new ReplayBuffer();
|
||||
export const groupBuffer = new GroupBuffer();
|
||||
export const logBuffer = new LogBuffer();
|
||||
|
||||
export type { ProfileBackfillEntry } from './profile-backfill-buffer';
|
||||
export type { IClickhouseSessionReplayChunk } from './replay-buffer';
|
||||
export type { IClickhouseLog } from './log-buffer';
|
||||
|
||||
193
packages/db/src/buffers/log-buffer.ts
Normal file
193
packages/db/src/buffers/log-buffer.ts
Normal file
@@ -0,0 +1,193 @@
|
||||
import { getSafeJson } from '@openpanel/json';
|
||||
import { getRedisCache } from '@openpanel/redis';
|
||||
import { ch } from '../clickhouse/client';
|
||||
import { BaseBuffer } from './base-buffer';
|
||||
|
||||
export interface IClickhouseLog {
|
||||
id?: string;
|
||||
project_id: string;
|
||||
device_id: string;
|
||||
profile_id: string;
|
||||
session_id: string;
|
||||
timestamp: string;
|
||||
observed_at: string;
|
||||
severity_number: number;
|
||||
severity_text: string;
|
||||
body: string;
|
||||
trace_id: string;
|
||||
span_id: string;
|
||||
trace_flags: number;
|
||||
logger_name: string;
|
||||
attributes: Record<string, string>;
|
||||
resource: Record<string, string>;
|
||||
sdk_name: string;
|
||||
sdk_version: string;
|
||||
country: string;
|
||||
city: string;
|
||||
region: string;
|
||||
os: string;
|
||||
os_version: string;
|
||||
browser: string;
|
||||
browser_version: string;
|
||||
device: string;
|
||||
brand: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export class LogBuffer extends BaseBuffer {
|
||||
private batchSize = process.env.LOG_BUFFER_BATCH_SIZE
|
||||
? Number.parseInt(process.env.LOG_BUFFER_BATCH_SIZE, 10)
|
||||
: 4000;
|
||||
private chunkSize = process.env.LOG_BUFFER_CHUNK_SIZE
|
||||
? Number.parseInt(process.env.LOG_BUFFER_CHUNK_SIZE, 10)
|
||||
: 1000;
|
||||
private microBatchIntervalMs = process.env.LOG_BUFFER_MICRO_BATCH_MS
|
||||
? Number.parseInt(process.env.LOG_BUFFER_MICRO_BATCH_MS, 10)
|
||||
: 10;
|
||||
private microBatchMaxSize = process.env.LOG_BUFFER_MICRO_BATCH_SIZE
|
||||
? Number.parseInt(process.env.LOG_BUFFER_MICRO_BATCH_SIZE, 10)
|
||||
: 100;
|
||||
|
||||
private pendingLogs: IClickhouseLog[] = [];
|
||||
private flushTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
private isFlushing = false;
|
||||
private flushRetryCount = 0;
|
||||
|
||||
private queueKey = 'log_buffer:queue';
|
||||
protected bufferCounterKey = 'log_buffer:total_count';
|
||||
|
||||
constructor() {
|
||||
super({
|
||||
name: 'log',
|
||||
onFlush: async () => {
|
||||
await this.processBuffer();
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
add(log: IClickhouseLog) {
|
||||
this.pendingLogs.push(log);
|
||||
|
||||
if (this.pendingLogs.length >= this.microBatchMaxSize) {
|
||||
this.flushLocalBuffer();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this.flushTimer) {
|
||||
this.flushTimer = setTimeout(() => {
|
||||
this.flushTimer = null;
|
||||
this.flushLocalBuffer();
|
||||
}, this.microBatchIntervalMs);
|
||||
}
|
||||
}
|
||||
|
||||
public async flush() {
|
||||
if (this.flushTimer) {
|
||||
clearTimeout(this.flushTimer);
|
||||
this.flushTimer = null;
|
||||
}
|
||||
await this.flushLocalBuffer();
|
||||
}
|
||||
|
||||
private async flushLocalBuffer() {
|
||||
if (this.isFlushing || this.pendingLogs.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.isFlushing = true;
|
||||
const logsToFlush = this.pendingLogs;
|
||||
this.pendingLogs = [];
|
||||
|
||||
try {
|
||||
const redis = getRedisCache();
|
||||
const multi = redis.multi();
|
||||
for (const log of logsToFlush) {
|
||||
multi.rpush(this.queueKey, JSON.stringify(log));
|
||||
}
|
||||
multi.incrby(this.bufferCounterKey, logsToFlush.length);
|
||||
await multi.exec();
|
||||
this.flushRetryCount = 0;
|
||||
} catch (error) {
|
||||
this.pendingLogs = logsToFlush.concat(this.pendingLogs);
|
||||
this.flushRetryCount += 1;
|
||||
this.logger.warn('Failed to flush log buffer to Redis; logs re-queued', {
|
||||
error,
|
||||
logCount: logsToFlush.length,
|
||||
flushRetryCount: this.flushRetryCount,
|
||||
});
|
||||
} finally {
|
||||
this.isFlushing = false;
|
||||
if (this.pendingLogs.length > 0 && !this.flushTimer) {
|
||||
this.flushTimer = setTimeout(() => {
|
||||
this.flushTimer = null;
|
||||
this.flushLocalBuffer();
|
||||
}, this.microBatchIntervalMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async processBuffer() {
|
||||
const redis = getRedisCache();
|
||||
|
||||
try {
|
||||
const queueLogs = await redis.lrange(this.queueKey, 0, this.batchSize - 1);
|
||||
|
||||
if (queueLogs.length === 0) {
|
||||
this.logger.debug('No logs to process');
|
||||
return;
|
||||
}
|
||||
|
||||
const logsToClickhouse: IClickhouseLog[] = [];
|
||||
for (const logStr of queueLogs) {
|
||||
const log = getSafeJson<IClickhouseLog>(logStr);
|
||||
if (log) {
|
||||
logsToClickhouse.push(log);
|
||||
}
|
||||
}
|
||||
|
||||
if (logsToClickhouse.length === 0) {
|
||||
this.logger.debug('No valid logs to process');
|
||||
return;
|
||||
}
|
||||
|
||||
logsToClickhouse.sort(
|
||||
(a, b) =>
|
||||
new Date(a.timestamp || 0).getTime() -
|
||||
new Date(b.timestamp || 0).getTime(),
|
||||
);
|
||||
|
||||
this.logger.info('Inserting logs into ClickHouse', {
|
||||
totalLogs: logsToClickhouse.length,
|
||||
chunks: Math.ceil(logsToClickhouse.length / this.chunkSize),
|
||||
});
|
||||
|
||||
for (const chunk of this.chunks(logsToClickhouse, this.chunkSize)) {
|
||||
await ch.insert({
|
||||
table: 'logs',
|
||||
values: chunk,
|
||||
format: 'JSONEachRow',
|
||||
});
|
||||
}
|
||||
|
||||
await redis
|
||||
.multi()
|
||||
.ltrim(this.queueKey, queueLogs.length, -1)
|
||||
.decrby(this.bufferCounterKey, queueLogs.length)
|
||||
.exec();
|
||||
|
||||
this.logger.info('Processed logs from Redis buffer', {
|
||||
batchSize: this.batchSize,
|
||||
logsProcessed: logsToClickhouse.length,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error('Error processing log Redis buffer', { error });
|
||||
}
|
||||
}
|
||||
|
||||
public getBufferSize() {
|
||||
return this.getBufferSizeWithCounter(async () => {
|
||||
const redis = getRedisCache();
|
||||
return await redis.llen(this.queueKey);
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user