feat: new importer (#214)

This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-05 09:49:36 +01:00
committed by GitHub
parent b51bc8f3f6
commit 212254d31a
80 changed files with 4884 additions and 842 deletions

View File

@@ -1,3 +1,4 @@
import { Readable } from 'node:stream';
import type { ClickHouseSettings, ResponseJSON } from '@clickhouse/client';
import { ClickHouseLogLevel, createClient } from '@clickhouse/client';
import sqlstring from 'sqlstring';
@@ -23,13 +24,10 @@ type WarnLogParams = LogParams & { err?: Error };
class CustomLogger implements Logger {
trace({ message, args }: LogParams) {
logger.debug(message, args);
logger.info(message, args);
}
debug({ message, args }: LogParams) {
if (message.includes('Query:') && args?.response_status === 200) {
return;
}
logger.debug(message, args);
logger.info(message, args);
}
info({ message, args }: LogParams) {
logger.info(message, args);
@@ -56,14 +54,15 @@ export const TABLE_NAMES = {
event_property_values_mv: 'event_property_values_mv',
cohort_events_mv: 'cohort_events_mv',
sessions: 'sessions',
events_imports: 'events_imports',
};
export const CLICKHOUSE_OPTIONS: NodeClickHouseClientConfigOptions = {
max_open_connections: 30,
request_timeout: 60000,
request_timeout: 300000,
keep_alive: {
enabled: true,
idle_socket_ttl: 8000,
idle_socket_ttl: 60000,
},
compression: {
request: true,
@@ -87,7 +86,7 @@ const cleanQuery = (query?: string) =>
? query.replace(/\n/g, '').replace(/\s+/g, ' ').trim()
: undefined;
async function withRetry<T>(
export async function withRetry<T>(
operation: () => Promise<T>,
maxRetries = 3,
baseDelay = 500,
@@ -132,7 +131,34 @@ export const ch = new Proxy(originalCh, {
const value = Reflect.get(target, property, receiver);
if (property === 'insert') {
return (...args: any[]) => withRetry(() => value.apply(target, args));
return (...args: any[]) =>
withRetry(() => {
args[0].clickhouse_settings = {
// Allow bigger HTTP payloads/time to stream rows
async_insert: 1,
wait_for_async_insert: 1,
// Increase insert timeouts and buffer sizes for large batches
max_execution_time: 300,
max_insert_block_size: '500000',
max_http_get_redirects: '0',
// Ensure JSONEachRow stays efficient
input_format_parallel_parsing: 1,
// Keep long-running inserts/queries from idling out at proxies by sending progress headers
send_progress_in_http_headers: 1,
http_headers_progress_interval_ms: '50000',
// Ensure server holds the connection until the query is finished
wait_end_of_query: 1,
...args[0].clickhouse_settings,
};
return value.apply(target, args);
});
}
if (property === 'command') {
return (...args: any[]) =>
withRetry(() => {
return value.apply(target, args);
});
}
return value;
@@ -177,6 +203,34 @@ export async function chQueryWithMeta<T extends Record<string, any>>(
return response;
}
export async function chInsertCSV(tableName: string, rows: string[]) {
try {
const now = performance.now();
// Create a readable stream in binary mode for CSV (similar to EventBuffer)
const csvStream = Readable.from(rows.join('\n'), {
objectMode: false,
});
await ch.insert({
table: tableName,
values: csvStream,
format: 'CSV',
clickhouse_settings: {
format_csv_allow_double_quotes: 1,
format_csv_allow_single_quotes: 0,
},
});
logger.info('CSV Insert successful', {
elapsed: performance.now() - now,
rows: rows.length,
});
} catch (error) {
logger.error('CSV Insert failed:', error);
throw error;
}
}
export async function chQuery<T extends Record<string, any>>(
query: string,
clickhouseSettings?: ClickHouseSettings,