fix: view profiles, improve chart service

This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-27 13:12:14 +01:00
parent d4e3470f7e
commit 620904b4d4
4 changed files with 158 additions and 94 deletions

View File

@@ -95,6 +95,7 @@ export const CLICKHOUSE_OPTIONS: NodeClickHouseClientConfigOptions = {
}, },
clickhouse_settings: { clickhouse_settings: {
date_time_input_format: 'best_effort', date_time_input_format: 'best_effort',
query_plan_convert_any_join_to_semi_or_anti_join: 0,
}, },
log: { log: {
LoggerClass: CustomLogger, LoggerClass: CustomLogger,

View File

@@ -1,3 +1,4 @@
import { uniq } from 'ramda';
import sqlstring from 'sqlstring'; import sqlstring from 'sqlstring';
import { DateTime, stripLeadingAndTrailingSlashes } from '@openpanel/common'; import { DateTime, stripLeadingAndTrailingSlashes } from '@openpanel/common';
@@ -74,6 +75,8 @@ export function getChartSql({
getOrderBy, getOrderBy,
getGroupBy, getGroupBy,
getFill, getFill,
getWith,
with: addCte,
} = createSqlBuilder(); } = createSqlBuilder();
sb.where = getEventFiltersWhereClause(event.filters); sb.where = getEventFiltersWhereClause(event.filters);
@@ -93,14 +96,95 @@ export function getChartSql({
breakdown.name.startsWith('profile.'), breakdown.name.startsWith('profile.'),
); );
// Build WHERE clause without the bar filter (for use in subqueries and CTEs)
// Define this early so we can use it in CTE definitions
const getWhereWithoutBar = () => {
const whereWithoutBar = { ...sb.where };
delete whereWithoutBar.bar;
return Object.keys(whereWithoutBar).length
? `WHERE ${join(whereWithoutBar, ' AND ')}`
: '';
};
// Collect all profile fields used in filters and breakdowns
// Extract top-level field names (e.g., 'properties' from 'profile.properties.os')
const getProfileFields = () => {
const fields = new Set<string>();
// Always need id for the join
fields.add('id');
// Collect from filters
event.filters
.filter((f) => f.name.startsWith('profile.'))
.forEach((f) => {
const fieldName = f.name.replace('profile.', '').split('.')[0];
if (fieldName && fieldName === 'properties') {
fields.add('properties');
} else if (
fieldName &&
['email', 'first_name', 'last_name'].includes(fieldName)
) {
fields.add(fieldName);
}
});
// Collect from breakdowns
breakdowns
.filter((b) => b.name.startsWith('profile.'))
.forEach((b) => {
const fieldName = b.name.replace('profile.', '').split('.')[0];
if (fieldName && fieldName === 'properties') {
fields.add('properties');
} else if (
fieldName &&
['email', 'first_name', 'last_name'].includes(fieldName)
) {
fields.add(fieldName);
}
});
return Array.from(fields);
};
// Create profiles CTE if profiles are needed (to avoid duplicating the heavy profile join)
// Only select the fields that are actually used
const profilesJoinRef =
anyFilterOnProfile || anyBreakdownOnProfile
? 'LEFT ANY JOIN profile ON profile.id = profile_id'
: '';
if (anyFilterOnProfile || anyBreakdownOnProfile) { if (anyFilterOnProfile || anyBreakdownOnProfile) {
sb.joins.profiles = `LEFT ANY JOIN (SELECT const profileFields = getProfileFields();
id as "profile.id", const selectFields = profileFields.map((field) => {
email as "profile.email", if (field === 'id') {
first_name as "profile.first_name", return 'id as "profile.id"';
last_name as "profile.last_name", }
properties as "profile.properties" if (field === 'properties') {
FROM ${TABLE_NAMES.profiles} FINAL WHERE project_id = ${sqlstring.escape(projectId)}) as profile on profile.id = profile_id`; return 'properties as "profile.properties"';
}
if (field === 'email') {
return 'email as "profile.email"';
}
if (field === 'first_name') {
return 'first_name as "profile.first_name"';
}
if (field === 'last_name') {
return 'last_name as "profile.last_name"';
}
return field;
});
// Add profiles CTE using the builder
addCte(
'profile',
`SELECT ${selectFields.join(', ')}
FROM ${TABLE_NAMES.profiles} FINAL
WHERE project_id = ${sqlstring.escape(projectId)}`,
);
// Use the CTE reference in the main query
sb.joins.profiles = profilesJoinRef;
} }
sb.select.count = 'count(*) as count'; sb.select.count = 'count(*) as count';
@@ -142,16 +226,25 @@ export function getChartSql({
sb.where.endDate = `created_at <= toDateTime('${formatClickhouseDate(endDate)}')`; sb.where.endDate = `created_at <= toDateTime('${formatClickhouseDate(endDate)}')`;
} }
// Use CTE to define top breakdown values once, then reference in WHERE clause
if (breakdowns.length > 0 && limit) { if (breakdowns.length > 0 && limit) {
sb.where.bar = `(${breakdowns.map((b) => getSelectPropertyKey(b.name)).join(',')}) IN ( const breakdownSelects = breakdowns
SELECT ${breakdowns.map((b) => getSelectPropertyKey(b.name)).join(',')} .map((b) => getSelectPropertyKey(b.name))
FROM ${TABLE_NAMES.events} .join(', ');
${getJoins()}
${getWhere()} // Add top_breakdowns CTE using the builder
GROUP BY ${breakdowns.map((b) => getSelectPropertyKey(b.name)).join(',')} addCte(
'top_breakdowns',
`SELECT ${breakdownSelects}
FROM ${TABLE_NAMES.events} e
${profilesJoinRef ? `${profilesJoinRef} ` : ''}${getWhereWithoutBar()}
GROUP BY ${breakdownSelects}
ORDER BY count(*) DESC ORDER BY count(*) DESC
LIMIT ${limit} LIMIT ${limit}`,
)`; );
// Filter main query to only include top breakdown values
sb.where.bar = `(${breakdowns.map((b) => getSelectPropertyKey(b.name)).join(',')}) IN (SELECT * FROM top_breakdowns)`;
} }
breakdowns.forEach((breakdown, index) => { breakdowns.forEach((breakdown, index) => {
@@ -224,69 +317,26 @@ export function getChartSql({
) as subQuery`; ) as subQuery`;
sb.joins = {}; sb.joins = {};
const sql = `${getSelect()} ${getFrom()} ${getJoins()} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${getFill()}`; const sql = `${getWith()}${getSelect()} ${getFrom()} ${getJoins()} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${getFill()}`;
console.log('-- Report --'); console.log('-- Report --');
console.log(sql.replaceAll(/[\n\r]/g, ' ')); console.log(sql.replaceAll(/[\n\r]/g, ' '));
console.log('-- End --'); console.log('-- End --');
return sql; return sql;
} }
// Build total_count calculation that accounts for breakdowns
// When breakdowns exist, we need to calculate total_count per breakdown group
if (breakdowns.length > 0) { if (breakdowns.length > 0) {
// Create a subquery that calculates total_count per breakdown group (without date grouping) const breakdownPartitionKeys = breakdowns
// Then reference it in the main query via JOIN .map((_, index) => `label_${index + 1}`)
const breakdownSelects = breakdowns
.map((breakdown, index) => {
const key = `label_${index + 1}`;
const breakdownExpr = getSelectPropertyKey(breakdown.name);
return `${breakdownExpr} as ${key}`;
})
.join(', '); .join(', ');
// GROUP BY needs to use the actual expressions, not aliases sb.select.total_unique_count = `sum(count) OVER (PARTITION BY ${breakdownPartitionKeys}) as total_count`;
const breakdownGroupByExprs = breakdowns
.map((breakdown) => getSelectPropertyKey(breakdown.name))
.join(', ');
// Build the total_count subquery grouped only by breakdowns (no date)
// Extract the count expression without the alias (remove "as count")
const countExpression = sb.select.count.replace(/\s+as\s+count$/i, '');
const totalCountSubquery = `(
SELECT
${breakdownSelects},
${countExpression} as total_count
FROM ${sb.from}
${getJoins()}
${getWhere()}
GROUP BY ${breakdownGroupByExprs}
) as total_counts`;
// Join the total_counts subquery to get total_count per breakdown
// Match on the breakdown column values
const joinConditions = breakdowns
.map((_, index) => {
const outerKey = `label_${index + 1}`;
return `${outerKey} = total_counts.label_${index + 1}`;
})
.join(' AND ');
sb.joins.total_counts = `LEFT JOIN ${totalCountSubquery} ON ${joinConditions}`;
// Use any() aggregate since total_count is the same for all rows in a breakdown group
sb.select.total_unique_count =
'any(total_counts.total_count) as total_count';
} else { } else {
// No breakdowns - use a simple subquery for total count // No breakdowns - use window function without partition to get total across all rows
const totalUniqueSubquery = `( // Sum the count values across all grouped rows
SELECT ${sb.select.count} sb.select.total_unique_count = 'sum(count) OVER () as total_count';
FROM ${sb.from}
${getJoins()}
${getWhere()}
)`;
sb.select.total_unique_count = `${totalUniqueSubquery} as total_count`;
} }
const sql = `${getSelect()} ${getFrom()} ${getJoins()} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${getFill()}`; const sql = `${getWith()}${getSelect()} ${getFrom()} ${getJoins()} ${getWhere()} ${getGroupBy()} ${getOrderBy()} ${getFill()}`;
console.log('-- Report --'); console.log('-- Report --');
console.log(sql.replaceAll(/[\n\r]/g, ' ')); console.log(sql.replaceAll(/[\n\r]/g, ' '));
console.log('-- End --'); console.log('-- End --');

View File

@@ -8,6 +8,7 @@ export interface SqlBuilderObject {
orderBy: Record<string, string>; orderBy: Record<string, string>;
from: string; from: string;
joins: Record<string, string>; joins: Record<string, string>;
ctes: Record<string, string>;
limit: number | undefined; limit: number | undefined;
offset: number | undefined; offset: number | undefined;
fill: string | undefined; fill: string | undefined;
@@ -25,6 +26,7 @@ export function createSqlBuilder() {
orderBy: {}, orderBy: {},
having: {}, having: {},
joins: {}, joins: {},
ctes: {},
limit: undefined, limit: undefined,
offset: undefined, offset: undefined,
fill: undefined, fill: undefined,
@@ -46,6 +48,14 @@ export function createSqlBuilder() {
const getJoins = () => const getJoins = () =>
Object.keys(sb.joins).length ? join(sb.joins, ' ') : ''; Object.keys(sb.joins).length ? join(sb.joins, ' ') : '';
const getFill = () => (sb.fill ? `WITH FILL ${sb.fill}` : ''); const getFill = () => (sb.fill ? `WITH FILL ${sb.fill}` : '');
const getWith = () => {
const cteEntries = Object.entries(sb.ctes);
if (cteEntries.length === 0) return '';
const cteClauses = cteEntries.map(
([name, query]) => `${name} AS (${query})`,
);
return `WITH ${cteClauses.join(', ')} `;
};
return { return {
sb, sb,
@@ -58,8 +68,13 @@ export function createSqlBuilder() {
getHaving, getHaving,
getJoins, getJoins,
getFill, getFill,
getWith,
with: (name: string, query: string) => {
sb.ctes[name] = query;
},
getSql: () => { getSql: () => {
const sql = [ const sql = [
getWith(),
getSelect(), getSelect(),
getFrom(), getFrom(),
getJoins(), getJoins(),

View File

@@ -580,36 +580,34 @@ export const chartRouter = createTRPCRouter({
sb.where.eventName = `name = ${sqlstring.escape(serie.name)}`; sb.where.eventName = `name = ${sqlstring.escape(serie.name)}`;
} }
console.log('> breakdowns', input.breakdowns); // Collect profile fields from filters and breakdowns
if (input.breakdowns) { const profileFields = [
Object.entries(input.breakdowns).forEach(([key, value]) => { ...serie.filters
sb.where[`breakdown_${key}`] = `${key} = ${sqlstring.escape(value)}`; .filter((f) => f.name.startsWith('profile.'))
}); .map((f) => f.name.replace('profile.', '')),
...(input.breakdowns
? Object.keys(input.breakdowns)
.filter((key) => key.startsWith('profile.'))
.map((key) => key.replace('profile.', ''))
: []),
];
if (profileFields.length > 0) {
// Extract top-level field names and select only what's needed
const fieldsToSelect = uniq(
profileFields.map((f) => f.split('.')[0]),
).join(', ');
sb.joins.profiles = `LEFT ANY JOIN (SELECT id, ${fieldsToSelect} FROM ${TABLE_NAMES.profiles} FINAL WHERE project_id = ${sqlstring.escape(projectId)}) as profile on profile.id = profile_id`;
} }
// // Handle breakdowns if provided if (input.breakdowns) {
// const anyBreakdownOnProfile = breakdowns.some((breakdown) => Object.entries(input.breakdowns).forEach(([key, value]) => {
// breakdown.name.startsWith('profile.'), // Transform property keys (e.g., properties.method -> properties['method'])
// ); const propertyKey = getSelectPropertyKey(key);
// const anyFilterOnProfile = [...event.filters, ...filters].some((filter) => sb.where[`breakdown_${key}`] =
// filter.name.startsWith('profile.'), `${propertyKey} = ${sqlstring.escape(value)}`;
// ); });
}
// if (anyFilterOnProfile || anyBreakdownOnProfile) {
// sb.joins.profiles = `LEFT ANY JOIN (SELECT
// id as "profile.id",
// email as "profile.email",
// first_name as "profile.first_name",
// last_name as "profile.last_name",
// properties as "profile.properties"
// FROM ${TABLE_NAMES.profiles} FINAL WHERE project_id = ${sqlstring.escape(projectId)}) as profile on profile.id = profile_id`;
// }
// Apply breakdown filters if provided
// breakdowns.forEach((breakdown) => {
// // This is simplified - in reality we'd need to match the breakdown value
// // For now, we'll just get all profiles for the time bucket
// });
// Get unique profile IDs // Get unique profile IDs
const profileIds = await chQuery<{ profile_id: string }>(getSql()); const profileIds = await chQuery<{ profile_id: string }>(getSql());