This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-20 13:56:58 +01:00
parent 00e25ed4b8
commit dd71fd4e11
15 changed files with 1357 additions and 190 deletions

View File

@@ -0,0 +1,112 @@
import { stdin as input, stdout as output } from 'node:process';
import { createInterface } from 'node:readline/promises';
import { parseArgs } from 'node:util';
import sqlstring from 'sqlstring';
import { ch } from '../src/clickhouse/client';
import { clix } from '../src/clickhouse/query-builder';
async function main() {
const rl = createInterface({ input, output });
try {
const { values } = parseArgs({
args: process.argv.slice(2),
options: {
host: { type: 'string' },
user: { type: 'string' },
password: { type: 'string' },
db: { type: 'string' },
start: { type: 'string' },
end: { type: 'string' },
projects: { type: 'string' },
},
strict: false,
});
const getArg = (val: unknown): string | undefined =>
typeof val === 'string' ? val : undefined;
console.log('Copy data from remote ClickHouse to local');
console.log('---------------------------------------');
const host =
getArg(values.host) || (await rl.question('Remote Host (IP/Domain): '));
if (!host) throw new Error('Host is required');
const user = getArg(values.user) || (await rl.question('Remote User: '));
if (!user) throw new Error('User is required');
const password =
getArg(values.password) || (await rl.question('Remote Password: '));
if (!password) throw new Error('Password is required');
const dbName =
getArg(values.db) ||
(await rl.question('Remote DB Name (default: openpanel): ')) ||
'openpanel';
const startDate =
getArg(values.start) ||
(await rl.question('Start Date (YYYY-MM-DD HH:mm:ss): '));
if (!startDate) throw new Error('Start date is required');
const endDate =
getArg(values.end) ||
(await rl.question('End Date (YYYY-MM-DD HH:mm:ss): '));
if (!endDate) throw new Error('End date is required');
const projectIdsInput =
getArg(values.projects) ||
(await rl.question(
'Project IDs (comma separated, leave empty for all): ',
));
const projectIds = projectIdsInput
? projectIdsInput.split(',').map((s: string) => s.trim())
: [];
console.log('\nStarting copy process...');
const tables = ['sessions', 'events'];
for (const table of tables) {
console.log(`Processing table: ${table}`);
// Build the SELECT part using the query builder
// We use sqlstring to escape the remote function arguments
const remoteTable = `remote(${sqlstring.escape(host)}, ${sqlstring.escape(dbName)}, ${sqlstring.escape(table)}, ${sqlstring.escape(user)}, ${sqlstring.escape(password)})`;
const queryBuilder = clix(ch)
.from(remoteTable)
.select(['*'])
.where('created_at', 'BETWEEN', [startDate, endDate]);
if (projectIds.length > 0) {
queryBuilder.where('project_id', 'IN', projectIds);
}
const selectQuery = queryBuilder.toSQL();
const insertQuery = `INSERT INTO ${dbName}.${table} ${selectQuery}`;
console.log(`Executing: ${insertQuery}`);
// try {
// await ch.command({
// query: insertQuery,
// });
// console.log(`✅ Copied ${table} successfully`);
// } catch (error) {
// console.error(`❌ Failed to copy ${table}:`, error);
// }
}
console.log('\nDone!');
} catch (error) {
console.error('\nError:', error);
} finally {
rl.close();
await ch.close();
process.exit(0);
}
}
main();

View File

@@ -0,0 +1,96 @@
import { TABLE_NAMES, ch } from '../src/clickhouse/client';
import { clix } from '../src/clickhouse/query-builder';
const START_DATE = new Date('2025-11-10T00:00:00Z');
const END_DATE = new Date('2025-11-20T23:00:00Z');
const SESSIONS_PER_HOUR = 2;
// Revenue between $10 (1000 cents) and $200 (20000 cents)
const MIN_REVENUE = 1000;
const MAX_REVENUE = 20000;
function getRandomRevenue() {
return (
Math.floor(Math.random() * (MAX_REVENUE - MIN_REVENUE + 1)) + MIN_REVENUE
);
}
async function main() {
console.log(
`Starting revenue update for sessions between ${START_DATE.toISOString()} and ${END_DATE.toISOString()}`,
);
let currentDate = new Date(START_DATE);
while (currentDate < END_DATE) {
const nextHour = new Date(currentDate.getTime() + 60 * 60 * 1000);
console.log(`Processing hour: ${currentDate.toISOString()}`);
// 1. Pick random sessions for this hour
const sessions = await clix(ch)
.from(TABLE_NAMES.sessions)
.select(['id'])
.where('created_at', '>=', currentDate)
.andWhere('created_at', '<', nextHour)
.where('project_id', '=', 'public-web')
.limit(SESSIONS_PER_HOUR)
.execute();
if (sessions.length === 0) {
console.log(`No sessions found for ${currentDate.toISOString()}`);
currentDate = nextHour;
continue;
}
const sessionIds = sessions.map((s: any) => s.id);
console.log(
`Found ${sessionIds.length} sessions to update: ${sessionIds.join(', ')}`,
);
// 2. Construct update query
// We want to assign a DIFFERENT random revenue to each session
// Query: ALTER TABLE sessions UPDATE revenue = if(id='id1', rev1, if(id='id2', rev2, ...)) WHERE id IN ('id1', 'id2', ...)
const updates: { id: string; revenue: number }[] = [];
for (const id of sessionIds) {
const revenue = getRandomRevenue();
updates.push({ id, revenue });
}
// Build nested if() for the update expression
// ClickHouse doesn't have CASE WHEN in UPDATE expression in the same way, but if() works.
// Actually multiIf is cleaner: multiIf(id='id1', rev1, id='id2', rev2, revenue)
const conditions = updates
.map((u) => `id = '${u.id}', ${u.revenue}`)
.join(', ');
const updateExpr = `multiIf(${conditions}, revenue)`;
const idsStr = sessionIds.map((id: string) => `'${id}'`).join(', ');
const query = `ALTER TABLE ${TABLE_NAMES.sessions} UPDATE revenue = ${updateExpr} WHERE id IN (${idsStr})`;
console.log(`Executing update: ${query}`);
try {
await ch.command({
query,
});
console.log('Update command sent.');
// Wait a bit to not overload mutations if running on a large range
await new Promise((resolve) => setTimeout(resolve, 500));
} catch (error) {
console.error('Failed to update sessions:', error);
}
currentDate = nextHour;
}
console.log('Done!');
}
main().catch((error) => {
console.error('Script failed:', error);
process.exit(1);
});