Files
stats/packages/db/code-migrations/3-init-ch.ts

373 lines
11 KiB
TypeScript

import fs from 'node:fs';
import path from 'node:path';
import { formatClickhouseDate } from '../src/clickhouse/client';
import {
createDatabase,
createMaterializedView,
createTable,
dropTable,
getExistingTables,
moveDataBetweenTables,
renameTable,
runClickhouseMigrationCommands,
} from '../src/clickhouse/migration';
import { printBoxMessage } from './helpers';
export async function up() {
const replicatedVersion = '1';
const existingTables = await getExistingTables();
const hasSelfHosting = existingTables.includes('self_hosting_distributed');
const hasEvents = existingTables.includes('events_distributed');
const hasEventsV2 = existingTables.includes('events_v2');
const hasEventsBots = existingTables.includes('events_bots_distributed');
const hasProfiles = existingTables.includes('profiles_distributed');
const hasProfileAliases = existingTables.includes(
'profile_aliases_distributed',
);
const isSelfHosting = !!process.env.SELF_HOSTING;
const isClustered = !isSelfHosting;
const isSelfHostingPostCluster =
existingTables.includes('events_replicated') && isSelfHosting;
const isSelfHostingPreCluster =
!isSelfHostingPostCluster &&
existingTables.includes('events_v2') &&
isSelfHosting;
const isSelfHostingOld = existingTables.length !== 0 && isSelfHosting;
const sqls: string[] = [];
// Move tables to old names if they exists
if (isSelfHostingOld) {
sqls.push(
...existingTables
.filter((table) => {
return (
!table.endsWith('_tmp') && !existingTables.includes(`${table}_tmp`)
);
})
.flatMap((table) => {
return renameTable({
from: table,
to: `${table}_tmp`,
isClustered: false,
});
}),
);
}
sqls.push(
createDatabase('openpanel', isClustered),
// Create new tables
...createTable({
name: 'self_hosting',
columns: ['`created_at` Date', '`domain` String', '`count` UInt64'],
orderBy: ['domain', 'created_at'],
partitionBy: 'toYYYYMM(created_at)',
distributionHash: 'cityHash64(domain)',
replicatedVersion,
isClustered,
}),
...createTable({
name: 'events',
columns: [
'`id` UUID DEFAULT generateUUIDv4()',
'`name` LowCardinality(String)',
'`sdk_name` LowCardinality(String)',
'`sdk_version` LowCardinality(String)',
'`device_id` String CODEC(ZSTD(3))',
'`profile_id` String CODEC(ZSTD(3))',
'`project_id` String CODEC(ZSTD(3))',
'`session_id` String CODEC(LZ4)',
'`path` String CODEC(ZSTD(3))',
'`origin` String CODEC(ZSTD(3))',
'`referrer` String CODEC(ZSTD(3))',
'`referrer_name` String CODEC(ZSTD(3))',
'`referrer_type` LowCardinality(String)',
'`duration` UInt64 CODEC(Delta(4), LZ4)',
'`properties` Map(String, String) CODEC(ZSTD(3))',
'`created_at` DateTime64(3) CODEC(DoubleDelta, ZSTD(3))',
'`country` LowCardinality(FixedString(2))',
'`city` String',
'`region` LowCardinality(String)',
'`longitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
'`latitude` Nullable(Float32) CODEC(Gorilla, LZ4)',
'`os` LowCardinality(String)',
'`os_version` LowCardinality(String)',
'`browser` LowCardinality(String)',
'`browser_version` LowCardinality(String)',
'`device` LowCardinality(String)',
'`brand` LowCardinality(String)',
'`model` LowCardinality(String)',
'`imported_at` Nullable(DateTime) CODEC(Delta(4), LZ4)',
],
indices: [
'INDEX idx_name name TYPE bloom_filter GRANULARITY 1',
"INDEX idx_properties_bounce properties['__bounce'] TYPE set(3) GRANULARITY 1",
'INDEX idx_origin origin TYPE bloom_filter(0.05) GRANULARITY 1',
'INDEX idx_path path TYPE bloom_filter(0.01) GRANULARITY 1',
],
orderBy: ['project_id', 'toDate(created_at)', 'profile_id', 'name'],
partitionBy: 'toYYYYMM(created_at)',
settings: {
index_granularity: 8192,
},
distributionHash:
'cityHash64(project_id, toString(toStartOfHour(created_at)))',
replicatedVersion,
isClustered,
}),
...createTable({
name: 'events_bots',
columns: [
'`id` UUID DEFAULT generateUUIDv4()',
'`project_id` String',
'`name` String',
'`type` String',
'`path` String',
'`created_at` DateTime64(3)',
],
orderBy: ['project_id', 'created_at'],
settings: {
index_granularity: 8192,
},
distributionHash:
'cityHash64(project_id, toString(toStartOfDay(created_at)))',
replicatedVersion,
isClustered,
}),
...createTable({
name: 'profiles',
columns: [
'`id` String CODEC(ZSTD(3))',
'`is_external` Bool',
'`first_name` String CODEC(ZSTD(3))',
'`last_name` String CODEC(ZSTD(3))',
'`email` String CODEC(ZSTD(3))',
'`avatar` String CODEC(ZSTD(3))',
'`properties` Map(String, String) CODEC(ZSTD(3))',
'`project_id` String CODEC(ZSTD(3))',
'`created_at` DateTime64(3) CODEC(Delta(4), LZ4)',
],
indices: [
'INDEX idx_first_name first_name TYPE bloom_filter GRANULARITY 1',
'INDEX idx_last_name last_name TYPE bloom_filter GRANULARITY 1',
'INDEX idx_email email TYPE bloom_filter GRANULARITY 1',
],
engine: 'ReplacingMergeTree(created_at)',
orderBy: ['project_id', 'id'],
partitionBy: 'toYYYYMM(created_at)',
settings: {
index_granularity: 8192,
},
distributionHash: 'cityHash64(project_id)',
replicatedVersion,
isClustered,
}),
...createTable({
name: 'profile_aliases',
columns: [
'`project_id` String',
'`profile_id` String',
'`alias` String',
'`created_at` DateTime',
],
orderBy: ['project_id', 'profile_id', 'alias', 'created_at'],
settings: {
index_granularity: 8192,
},
distributionHash: 'cityHash64(project_id)',
replicatedVersion,
isClustered,
}),
// Create materialized views
...createMaterializedView({
name: 'dau_mv',
tableName: 'events',
orderBy: ['project_id', 'date'],
partitionBy: 'toYYYYMMDD(date)',
query: `SELECT
toDate(created_at) as date,
uniqState(profile_id) as profile_id,
project_id
FROM {events}
GROUP BY date, project_id`,
distributionHash: 'cityHash64(project_id, date)',
replicatedVersion,
isClustered,
}),
...createMaterializedView({
name: 'cohort_events_mv',
tableName: 'events',
orderBy: ['project_id', 'name', 'created_at', 'profile_id'],
query: `SELECT
project_id,
name,
toDate(created_at) AS created_at,
profile_id,
COUNT() AS event_count
FROM {events}
WHERE profile_id != device_id
GROUP BY project_id, name, created_at, profile_id`,
distributionHash: 'cityHash64(project_id, toString(created_at))',
replicatedVersion,
isClustered,
}),
...createMaterializedView({
name: 'distinct_event_names_mv',
tableName: 'events',
orderBy: ['project_id', 'name', 'created_at'],
query: `SELECT
project_id,
name,
max(created_at) AS created_at,
count() AS event_count
FROM {events}
GROUP BY project_id, name`,
distributionHash: 'cityHash64(name, created_at)',
replicatedVersion,
isClustered,
}),
...createMaterializedView({
name: 'event_property_values_mv',
tableName: 'events',
orderBy: ['project_id', 'name', 'property_key', 'property_value'],
query: `SELECT
project_id,
name,
key_value.keys as property_key,
key_value.values as property_value,
created_at
FROM (
SELECT
project_id,
name,
untuple(arrayJoin(properties)) as key_value,
max(created_at) as created_at
FROM {events}
GROUP BY project_id, name, key_value
)
WHERE property_value != ''
AND property_key != ''
AND property_key NOT IN ('__duration_from', '__properties_from')
GROUP BY project_id, name, property_key, property_value, created_at`,
distributionHash: 'cityHash64(project_id, name)',
replicatedVersion,
isClustered,
}),
);
if (isSelfHostingPostCluster) {
sqls.push(
// Move data between tables
...(hasSelfHosting
? moveDataBetweenTables({
from: 'self_hosting_replicated_tmp',
to: 'self_hosting',
batch: {
column: 'created_at',
interval: 'month',
transform: (date) => {
return formatClickhouseDate(date, true);
},
},
})
: []),
...(hasProfileAliases
? moveDataBetweenTables({
from: 'profile_aliases_replicated_tmp',
to: 'profile_aliases',
batch: {
column: 'created_at',
interval: 'month',
},
})
: []),
...(hasEventsBots
? moveDataBetweenTables({
from: 'events_bots_replicated_tmp',
to: 'events_bots',
batch: {
column: 'created_at',
interval: 'month',
},
})
: []),
...(hasProfiles
? moveDataBetweenTables({
from: 'profiles_replicated_tmp',
to: 'profiles',
batch: {
column: 'created_at',
interval: 'month',
},
})
: []),
...(hasEvents
? moveDataBetweenTables({
from: 'events_replicated_tmp',
to: 'events',
batch: {
column: 'created_at',
interval: 'week',
},
})
: []),
);
}
if (isSelfHostingPreCluster) {
sqls.push(
...(hasEventsV2
? moveDataBetweenTables({
from: 'events_v2',
to: 'events',
batch: {
column: 'created_at',
interval: 'week',
},
})
: []),
);
}
fs.writeFileSync(
path.join(__dirname, '3-init-ch.sql'),
sqls
.map((sql) =>
sql
.trim()
.replace(/;$/, '')
.replace(/\n{2,}/g, '\n')
.concat(';'),
)
.join('\n\n---\n\n'),
);
printBoxMessage('Will start migration for self-hosting setup.', [
'This will move all data from the old tables to the new ones.',
'This might take a while depending on your server.',
]);
if (!process.argv.includes('--dry')) {
await runClickhouseMigrationCommands(sqls);
}
if (isSelfHostingOld) {
printBoxMessage(
'⚠️ Please run the following command to clean up unused tables:',
existingTables.map(
(table) =>
`docker compose exec -it op-ch clickhouse-client --query "${dropTable(
`openpanel.${table}_tmp`,
false,
)}"`,
),
);
}
}