wip
This commit is contained in:
263
packages/db/code-migrations/7-order-keys.ts
Normal file
263
packages/db/code-migrations/7-order-keys.ts
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
import fs from 'node:fs';
|
||||||
|
import path from 'node:path';
|
||||||
|
import {
|
||||||
|
chMigrationClient,
|
||||||
|
createTable,
|
||||||
|
moveDataBetweenTables,
|
||||||
|
renameTable,
|
||||||
|
runClickhouseMigrationCommands,
|
||||||
|
} from '../src/clickhouse/migration';
|
||||||
|
import { getIsCluster } from './helpers';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Migration to update ORDER BY keys for events and sessions tables.
|
||||||
|
*
|
||||||
|
* Changes:
|
||||||
|
* - Events: Remove profile_id from ORDER BY, add created_at for better ordering
|
||||||
|
* Old: ['project_id', 'toDate(created_at)', 'profile_id', 'name']
|
||||||
|
* New: ['project_id', 'toDate(created_at)', 'created_at', 'name']
|
||||||
|
*
|
||||||
|
* - Sessions: Remove profile_id from ORDER BY, reorder to match query patterns
|
||||||
|
* Old: ['project_id', 'id', 'toDate(created_at)', 'profile_id']
|
||||||
|
* New: ['project_id', 'toDate(created_at)', 'created_at', 'id']
|
||||||
|
*
|
||||||
|
* Rationale:
|
||||||
|
* - project_id: Always filtered first (100% of queries)
|
||||||
|
* - toDate(created_at): Almost always filtered (95%+ of queries), good for partitioning
|
||||||
|
* - created_at: Helps with ordering within same day, matches ORDER BY patterns in queries
|
||||||
|
* - name (events): Frequently filtered (screen_view, session_start, etc.), good selectivity
|
||||||
|
* - id (sessions): Used for ordering and uniqueness in session queries
|
||||||
|
*/
|
||||||
|
export async function up() {
|
||||||
|
const isClustered = getIsCluster();
|
||||||
|
|
||||||
|
const sqls: string[] = [];
|
||||||
|
|
||||||
|
const eventTables = createTable({
|
||||||
|
name: 'events_new_20251123',
|
||||||
|
columns: [
|
||||||
|
'`id` UUID DEFAULT generateUUIDv4()',
|
||||||
|
'`name` LowCardinality(String)',
|
||||||
|
'`sdk_name` LowCardinality(String)',
|
||||||
|
'`sdk_version` LowCardinality(String)',
|
||||||
|
'`device_id` String CODEC(ZSTD(3))',
|
||||||
|
'`profile_id` String CODEC(ZSTD(3))',
|
||||||
|
'`project_id` String CODEC(ZSTD(3))',
|
||||||
|
'`session_id` String CODEC(LZ4)',
|
||||||
|
'`path` String CODEC(ZSTD(3))',
|
||||||
|
'`origin` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer_name` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer_type` LowCardinality(String)',
|
||||||
|
'`revenue` UInt64',
|
||||||
|
'`duration` UInt64 CODEC(Delta(4), LZ4)',
|
||||||
|
'`properties` Map(String, String) CODEC(ZSTD(3))',
|
||||||
|
'`created_at` DateTime64(3) CODEC(DoubleDelta, ZSTD(3))',
|
||||||
|
'`country` LowCardinality(FixedString(2))',
|
||||||
|
'`city` String',
|
||||||
|
'`region` LowCardinality(String)',
|
||||||
|
'`longitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
|
||||||
|
'`latitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
|
||||||
|
'`os` LowCardinality(String)',
|
||||||
|
'`os_version` LowCardinality(String)',
|
||||||
|
'`browser` LowCardinality(String)',
|
||||||
|
'`browser_version` LowCardinality(String)',
|
||||||
|
'`device` LowCardinality(String)',
|
||||||
|
'`brand` LowCardinality(String)',
|
||||||
|
'`model` LowCardinality(String)',
|
||||||
|
'`imported_at` Nullable(DateTime) CODEC(Delta(4), LZ4)',
|
||||||
|
],
|
||||||
|
indices: [
|
||||||
|
'INDEX idx_name name TYPE bloom_filter GRANULARITY 1',
|
||||||
|
"INDEX idx_properties_bounce properties['__bounce'] TYPE set(3) GRANULARITY 1",
|
||||||
|
'INDEX idx_origin origin TYPE bloom_filter(0.05) GRANULARITY 1',
|
||||||
|
'INDEX idx_path path TYPE bloom_filter(0.01) GRANULARITY 1',
|
||||||
|
],
|
||||||
|
// New ORDER BY: project_id, toDate(created_at), created_at, name
|
||||||
|
// Removed profile_id, added created_at for better ordering within same day
|
||||||
|
orderBy: ['project_id', 'toDate(created_at)', 'created_at', 'name'],
|
||||||
|
partitionBy: 'toYYYYMM(created_at)',
|
||||||
|
settings: {
|
||||||
|
index_granularity: 8192,
|
||||||
|
// For lightweight updates
|
||||||
|
enable_block_offset_column: 1,
|
||||||
|
enable_block_number_column: 1,
|
||||||
|
},
|
||||||
|
distributionHash:
|
||||||
|
'cityHash64(project_id, toString(toStartOfHour(created_at)))',
|
||||||
|
replicatedVersion: '1',
|
||||||
|
isClustered,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 1: Create temporary tables with new ORDER BY keys
|
||||||
|
// Events table with new ORDER BY
|
||||||
|
sqls.push(...eventTables);
|
||||||
|
|
||||||
|
const sessionTables = createTable({
|
||||||
|
name: 'sessions_new_20251123',
|
||||||
|
engine: 'VersionedCollapsingMergeTree(sign, version)',
|
||||||
|
columns: [
|
||||||
|
'`id` String',
|
||||||
|
'`project_id` String CODEC(ZSTD(3))',
|
||||||
|
'`profile_id` String CODEC(ZSTD(3))',
|
||||||
|
'`device_id` String CODEC(ZSTD(3))',
|
||||||
|
'`created_at` DateTime64(3) CODEC(DoubleDelta, ZSTD(3))',
|
||||||
|
'`ended_at` DateTime64(3) CODEC(DoubleDelta, ZSTD(3))',
|
||||||
|
'`is_bounce` Bool',
|
||||||
|
'`entry_origin` LowCardinality(String)',
|
||||||
|
'`entry_path` String CODEC(ZSTD(3))',
|
||||||
|
'`exit_origin` LowCardinality(String)',
|
||||||
|
'`exit_path` String CODEC(ZSTD(3))',
|
||||||
|
'`screen_view_count` Int32',
|
||||||
|
'`revenue` Float64',
|
||||||
|
'`event_count` Int32',
|
||||||
|
'`duration` UInt32',
|
||||||
|
'`country` LowCardinality(FixedString(2))',
|
||||||
|
'`region` LowCardinality(String)',
|
||||||
|
'`city` String',
|
||||||
|
'`longitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
|
||||||
|
'`latitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
|
||||||
|
'`device` LowCardinality(String)',
|
||||||
|
'`brand` LowCardinality(String)',
|
||||||
|
'`model` LowCardinality(String)',
|
||||||
|
'`browser` LowCardinality(String)',
|
||||||
|
'`browser_version` LowCardinality(String)',
|
||||||
|
'`os` LowCardinality(String)',
|
||||||
|
'`os_version` LowCardinality(String)',
|
||||||
|
'`utm_medium` String CODEC(ZSTD(3))',
|
||||||
|
'`utm_source` String CODEC(ZSTD(3))',
|
||||||
|
'`utm_campaign` String CODEC(ZSTD(3))',
|
||||||
|
'`utm_content` String CODEC(ZSTD(3))',
|
||||||
|
'`utm_term` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer_name` String CODEC(ZSTD(3))',
|
||||||
|
'`referrer_type` LowCardinality(String)',
|
||||||
|
'`sign` Int8',
|
||||||
|
'`version` UInt64',
|
||||||
|
'`properties` Map(String, String) CODEC(ZSTD(3))',
|
||||||
|
],
|
||||||
|
// New ORDER BY: project_id, toDate(created_at), created_at, id
|
||||||
|
// Removed profile_id, reordered to match query patterns (date first, then id)
|
||||||
|
orderBy: ['project_id', 'toDate(created_at)', 'created_at'],
|
||||||
|
partitionBy: 'toYYYYMM(created_at)',
|
||||||
|
settings: {
|
||||||
|
index_granularity: 8192,
|
||||||
|
},
|
||||||
|
distributionHash:
|
||||||
|
'cityHash64(project_id, toString(toStartOfHour(created_at)))',
|
||||||
|
replicatedVersion: '1',
|
||||||
|
isClustered,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sessions table with new ORDER BY
|
||||||
|
sqls.push(...sessionTables);
|
||||||
|
|
||||||
|
const firstEventDateResponse = await chMigrationClient.query({
|
||||||
|
query: 'SELECT min(created_at) as created_at FROM events',
|
||||||
|
format: 'JSONEachRow',
|
||||||
|
});
|
||||||
|
const firstEventDateJson = await firstEventDateResponse.json<{
|
||||||
|
created_at: string;
|
||||||
|
}>();
|
||||||
|
const firstEventDate = new Date(firstEventDateJson[0]?.created_at ?? '');
|
||||||
|
if (firstEventDate) {
|
||||||
|
// Step 2: Copy data from old tables to new tables (partitioned by month for efficiency)
|
||||||
|
sqls.push(
|
||||||
|
...moveDataBetweenTables({
|
||||||
|
from: 'events',
|
||||||
|
to: 'events_new_20251123',
|
||||||
|
batch: {
|
||||||
|
startDate: firstEventDate,
|
||||||
|
column: 'toDate(created_at)',
|
||||||
|
interval: 'month',
|
||||||
|
transform: (date: Date) => {
|
||||||
|
const year = date.getFullYear();
|
||||||
|
const month = String(date.getMonth() + 1).padStart(2, '0');
|
||||||
|
return `${year}-${month}-01`;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstSessionDateResponse = await chMigrationClient.query({
|
||||||
|
query: 'SELECT min(created_at) as created_at FROM sessions',
|
||||||
|
format: 'JSONEachRow',
|
||||||
|
});
|
||||||
|
const firstSessionDateJson = await firstSessionDateResponse.json<{
|
||||||
|
created_at: string;
|
||||||
|
}>();
|
||||||
|
|
||||||
|
const firstSessionDate = new Date(firstSessionDateJson[0]?.created_at ?? '');
|
||||||
|
if (firstSessionDate) {
|
||||||
|
sqls.push(
|
||||||
|
...moveDataBetweenTables({
|
||||||
|
from: 'sessions',
|
||||||
|
to: 'sessions_new_20251123',
|
||||||
|
batch: {
|
||||||
|
startDate: firstSessionDate,
|
||||||
|
column: 'toDate(created_at)',
|
||||||
|
interval: 'month',
|
||||||
|
transform: (date: Date) => {
|
||||||
|
const year = date.getFullYear();
|
||||||
|
const month = String(date.getMonth() + 1).padStart(2, '0');
|
||||||
|
return `${year}-${month}-01`;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
sqls.push(
|
||||||
|
...renameTable({ from: 'events', to: 'events_20251123', isClustered }),
|
||||||
|
);
|
||||||
|
sqls.push(
|
||||||
|
...renameTable({ from: 'sessions', to: 'sessions_20251123', isClustered }),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (isClustered && sessionTables[1] && eventTables[1]) {
|
||||||
|
sqls.push(
|
||||||
|
// Drop temporary DISTRIBUTED tables (will be recreated)
|
||||||
|
'DROP TABLE IF EXISTS events_new_20251123 ON CLUSTER "{cluster}"',
|
||||||
|
'DROP TABLE IF EXISTS sessions_new_20251123 ON CLUSTER "{cluster}"',
|
||||||
|
// Rename new tables to correct names
|
||||||
|
'RENAME TABLE events_new_20251123_replicated TO events_replicated ON CLUSTER "{cluster}"',
|
||||||
|
'RENAME TABLE sessions_new_20251123_replicated TO sessions_replicated ON CLUSTER "{cluster}"',
|
||||||
|
// Create new distributed tables
|
||||||
|
eventTables[1].replaceAll('events_new_20251123', 'events'), // creates a new distributed table
|
||||||
|
sessionTables[1].replaceAll('sessions_new_20251123', 'sessions'), // creates a new distributed table
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
sqls.push(
|
||||||
|
...renameTable({
|
||||||
|
from: 'events_new_20251123',
|
||||||
|
to: 'events',
|
||||||
|
isClustered,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
sqls.push(
|
||||||
|
...renameTable({
|
||||||
|
from: 'sessions_new_20251123',
|
||||||
|
to: 'sessions',
|
||||||
|
isClustered,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.writeFileSync(
|
||||||
|
path.join(__filename.replace('.ts', '.sql')),
|
||||||
|
sqls
|
||||||
|
.map((sql) =>
|
||||||
|
sql
|
||||||
|
.trim()
|
||||||
|
.replace(/;$/, '')
|
||||||
|
.replace(/\n{2,}/g, '\n')
|
||||||
|
.concat(';'),
|
||||||
|
)
|
||||||
|
.join('\n\n---\n\n'),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!process.argv.includes('--dry')) {
|
||||||
|
await runClickhouseMigrationCommands(sqls);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -56,7 +56,9 @@ async function migrate() {
|
|||||||
|
|
||||||
if (!getIsSelfHosting()) {
|
if (!getIsSelfHosting()) {
|
||||||
printBoxMessage('🕒 Migrations starts in 10 seconds', []);
|
printBoxMessage('🕒 Migrations starts in 10 seconds', []);
|
||||||
await new Promise((resolve) => setTimeout(resolve, 10000));
|
if (!getIsDry()) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 10000));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (migration) {
|
if (migration) {
|
||||||
|
|||||||
@@ -247,32 +247,114 @@ export function moveDataBetweenTables({
|
|||||||
let currentDate = endDate;
|
let currentDate = endDate;
|
||||||
const interval = batch.interval || 'day';
|
const interval = batch.interval || 'day';
|
||||||
|
|
||||||
while (currentDate > startDate) {
|
// Helper function to get the start of the week (Monday) for a given date
|
||||||
|
const getWeekStart = (date: Date): Date => {
|
||||||
|
const d = new Date(date);
|
||||||
|
const day = d.getDay();
|
||||||
|
const diff = d.getDate() - day + (day === 0 ? -6 : 1); // Adjust to Monday
|
||||||
|
d.setDate(diff);
|
||||||
|
d.setHours(0, 0, 0, 0); // Normalize to start of day
|
||||||
|
return d;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Helper function to compare dates based on interval
|
||||||
|
const shouldContinue = (
|
||||||
|
current: Date,
|
||||||
|
start: Date,
|
||||||
|
intervalType: string,
|
||||||
|
): boolean => {
|
||||||
|
if (intervalType === 'month') {
|
||||||
|
// For months, compare by year and month
|
||||||
|
// Continue if current month is >= start month
|
||||||
|
const currentYear = current.getFullYear();
|
||||||
|
const currentMonth = current.getMonth();
|
||||||
|
const startYear = start.getFullYear();
|
||||||
|
const startMonth = start.getMonth();
|
||||||
|
return (
|
||||||
|
currentYear > startYear ||
|
||||||
|
(currentYear === startYear && currentMonth >= startMonth)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (intervalType === 'week') {
|
||||||
|
// For weeks, compare by week start dates
|
||||||
|
const currentWeekStart = getWeekStart(current);
|
||||||
|
const startWeekStart = getWeekStart(start);
|
||||||
|
return currentWeekStart >= startWeekStart;
|
||||||
|
}
|
||||||
|
return current > start;
|
||||||
|
};
|
||||||
|
|
||||||
|
while (shouldContinue(currentDate, startDate, interval)) {
|
||||||
const previousDate = new Date(currentDate);
|
const previousDate = new Date(currentDate);
|
||||||
|
|
||||||
switch (interval) {
|
switch (interval) {
|
||||||
case 'month':
|
case 'month':
|
||||||
previousDate.setMonth(previousDate.getMonth() - 1);
|
previousDate.setMonth(previousDate.getMonth() - 1);
|
||||||
break;
|
// If we've gone below startDate's month, adjust to start of startDate's month
|
||||||
case 'week':
|
// This ensures we generate SQL for the month containing startDate
|
||||||
previousDate.setDate(previousDate.getDate() - 7);
|
if (
|
||||||
// Ensure we don't go below startDate
|
previousDate.getFullYear() < startDate.getFullYear() ||
|
||||||
if (previousDate < startDate) {
|
(previousDate.getFullYear() === startDate.getFullYear() &&
|
||||||
previousDate.setTime(startDate.getTime());
|
previousDate.getMonth() < startDate.getMonth())
|
||||||
|
) {
|
||||||
|
previousDate.setFullYear(startDate.getFullYear());
|
||||||
|
previousDate.setMonth(startDate.getMonth());
|
||||||
|
previousDate.setDate(1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'week': {
|
||||||
|
previousDate.setDate(previousDate.getDate() - 7);
|
||||||
|
// If we've gone below startDate's week, adjust to start of startDate's week
|
||||||
|
const startWeekStart = getWeekStart(startDate);
|
||||||
|
const prevWeekStart = getWeekStart(previousDate);
|
||||||
|
if (prevWeekStart < startWeekStart) {
|
||||||
|
previousDate.setTime(startWeekStart.getTime());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
// day
|
// day
|
||||||
default:
|
default:
|
||||||
previousDate.setDate(previousDate.getDate() - 1);
|
previousDate.setDate(previousDate.getDate() - 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For monthly/weekly intervals with transform, we need to use the start of the next period for the upper bound
|
||||||
|
let upperBoundDate = currentDate;
|
||||||
|
if (interval === 'month' && batch.transform) {
|
||||||
|
const nextMonth = new Date(currentDate);
|
||||||
|
nextMonth.setMonth(nextMonth.getMonth() + 1);
|
||||||
|
nextMonth.setDate(1);
|
||||||
|
upperBoundDate = nextMonth;
|
||||||
|
} else if (interval === 'week' && batch.transform) {
|
||||||
|
const nextWeek = new Date(currentDate);
|
||||||
|
nextWeek.setDate(nextWeek.getDate() + 7);
|
||||||
|
const nextWeekStart = getWeekStart(nextWeek);
|
||||||
|
upperBoundDate = nextWeekStart;
|
||||||
|
}
|
||||||
|
|
||||||
const sql = `INSERT INTO ${to}
|
const sql = `INSERT INTO ${to}
|
||||||
SELECT * FROM ${from}
|
SELECT * FROM ${from}
|
||||||
WHERE ${batch.column} > '${batch.transform ? batch.transform(previousDate) : formatClickhouseDate(previousDate, true)}'
|
WHERE ${batch.column} > '${batch.transform ? batch.transform(previousDate) : formatClickhouseDate(previousDate, true)}'
|
||||||
AND ${batch.column} <= '${batch.transform ? batch.transform(currentDate) : formatClickhouseDate(currentDate, true)}'`;
|
AND ${batch.column} <= '${batch.transform ? batch.transform(upperBoundDate) : formatClickhouseDate(upperBoundDate, true)}'`;
|
||||||
sqls.push(sql);
|
sqls.push(sql);
|
||||||
|
|
||||||
|
// For monthly/weekly intervals, stop if we've reached the start period
|
||||||
|
if (interval === 'month') {
|
||||||
|
const prevYear = previousDate.getFullYear();
|
||||||
|
const prevMonth = previousDate.getMonth();
|
||||||
|
const startYear = startDate.getFullYear();
|
||||||
|
const startMonth = startDate.getMonth();
|
||||||
|
if (prevYear === startYear && prevMonth === startMonth) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (interval === 'week') {
|
||||||
|
const prevWeekStart = getWeekStart(previousDate);
|
||||||
|
const startWeekStart = getWeekStart(startDate);
|
||||||
|
if (prevWeekStart.getTime() === startWeekStart.getTime()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
currentDate = previousDate;
|
currentDate = previousDate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user