feat: added google search console

This commit is contained in:
Carl-Gerhard Lindesvärd
2026-03-09 20:47:02 +01:00
committed by GitHub
parent 70ca44f039
commit 271d189ed0
51 changed files with 5471 additions and 503 deletions

View File

@@ -0,0 +1,85 @@
import fs from 'node:fs';
import path from 'node:path';
import { createTable, runClickhouseMigrationCommands } from '../src/clickhouse/migration';
import { getIsCluster } from './helpers';
export async function up() {
const isClustered = getIsCluster();
const commonMetricColumns = [
'`clicks` UInt32 CODEC(Delta(4), LZ4)',
'`impressions` UInt32 CODEC(Delta(4), LZ4)',
'`ctr` Float32 CODEC(Gorilla, LZ4)',
'`position` Float32 CODEC(Gorilla, LZ4)',
'`synced_at` DateTime DEFAULT now() CODEC(Delta(4), LZ4)',
];
const sqls: string[] = [
// Daily totals — accurate overview numbers
...createTable({
name: 'gsc_daily',
columns: [
'`project_id` String CODEC(ZSTD(3))',
'`date` Date CODEC(Delta(2), LZ4)',
...commonMetricColumns,
],
orderBy: ['project_id', 'date'],
partitionBy: 'toYYYYMM(date)',
engine: 'ReplacingMergeTree(synced_at)',
distributionHash: 'cityHash64(project_id)',
replicatedVersion: '1',
isClustered,
}),
// Per-page breakdown
...createTable({
name: 'gsc_pages_daily',
columns: [
'`project_id` String CODEC(ZSTD(3))',
'`date` Date CODEC(Delta(2), LZ4)',
'`page` String CODEC(ZSTD(3))',
...commonMetricColumns,
],
orderBy: ['project_id', 'date', 'page'],
partitionBy: 'toYYYYMM(date)',
engine: 'ReplacingMergeTree(synced_at)',
distributionHash: 'cityHash64(project_id)',
replicatedVersion: '1',
isClustered,
}),
// Per-query breakdown
...createTable({
name: 'gsc_queries_daily',
columns: [
'`project_id` String CODEC(ZSTD(3))',
'`date` Date CODEC(Delta(2), LZ4)',
'`query` String CODEC(ZSTD(3))',
...commonMetricColumns,
],
orderBy: ['project_id', 'date', 'query'],
partitionBy: 'toYYYYMM(date)',
engine: 'ReplacingMergeTree(synced_at)',
distributionHash: 'cityHash64(project_id)',
replicatedVersion: '1',
isClustered,
}),
];
fs.writeFileSync(
path.join(__filename.replace('.ts', '.sql')),
sqls
.map((sql) =>
sql
.trim()
.replace(/;$/, '')
.replace(/\n{2,}/g, '\n')
.concat(';'),
)
.join('\n\n---\n\n'),
);
if (!process.argv.includes('--dry')) {
await runClickhouseMigrationCommands(sqls);
}
}

View File

@@ -31,3 +31,5 @@ export * from './src/services/overview.service';
export * from './src/services/pages.service';
export * from './src/services/insights';
export * from './src/session-context';
export * from './src/gsc';
export * from './src/encryption';

View File

@@ -0,0 +1,23 @@
-- CreateTable
CREATE TABLE "public"."gsc_connections" (
"id" UUID NOT NULL DEFAULT gen_random_uuid(),
"projectId" TEXT NOT NULL,
"siteUrl" TEXT NOT NULL DEFAULT '',
"accessToken" TEXT NOT NULL,
"refreshToken" TEXT NOT NULL,
"accessTokenExpiresAt" TIMESTAMP(3),
"lastSyncedAt" TIMESTAMP(3),
"lastSyncStatus" TEXT,
"lastSyncError" TEXT,
"backfillStatus" TEXT,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "gsc_connections_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "gsc_connections_projectId_key" ON "public"."gsc_connections"("projectId");
-- AddForeignKey
ALTER TABLE "public"."gsc_connections" ADD CONSTRAINT "gsc_connections_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."projects"("id") ON DELETE CASCADE ON UPDATE CASCADE;

View File

@@ -203,6 +203,7 @@ model Project {
notificationRules NotificationRule[]
notifications Notification[]
imports Import[]
gscConnection GscConnection?
// When deleteAt > now(), the project will be deleted
deleteAt DateTime?
@@ -612,6 +613,24 @@ model InsightEvent {
@@map("insight_events")
}
model GscConnection {
id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid
projectId String @unique
project Project @relation(fields: [projectId], references: [id], onDelete: Cascade)
siteUrl String @default("")
accessToken String
refreshToken String
accessTokenExpiresAt DateTime?
lastSyncedAt DateTime?
lastSyncStatus String?
lastSyncError String?
backfillStatus String?
createdAt DateTime @default(now())
updatedAt DateTime @default(now()) @updatedAt
@@map("gsc_connections")
}
model EmailUnsubscribe {
id String @id @default(dbgenerated("gen_random_uuid()")) @db.Uuid
email String

View File

@@ -58,6 +58,9 @@ export const TABLE_NAMES = {
sessions: 'sessions',
events_imports: 'events_imports',
session_replay_chunks: 'session_replay_chunks',
gsc_daily: 'gsc_daily',
gsc_pages_daily: 'gsc_pages_daily',
gsc_queries_daily: 'gsc_queries_daily',
};
/**

View File

@@ -0,0 +1,44 @@
import { createCipheriv, createDecipheriv, randomBytes } from 'node:crypto';
const ALGORITHM = 'aes-256-gcm';
const IV_LENGTH = 12;
const TAG_LENGTH = 16;
const ENCODING = 'base64';
function getKey(): Buffer {
const raw = process.env.ENCRYPTION_KEY;
if (!raw) {
throw new Error('ENCRYPTION_KEY environment variable is not set');
}
const buf = Buffer.from(raw, 'hex');
if (buf.length !== 32) {
throw new Error(
'ENCRYPTION_KEY must be a 64-character hex string (32 bytes)'
);
}
return buf;
}
export function encrypt(plaintext: string): string {
const key = getKey();
const iv = randomBytes(IV_LENGTH);
const cipher = createCipheriv(ALGORITHM, key, iv);
const encrypted = Buffer.concat([
cipher.update(plaintext, 'utf8'),
cipher.final(),
]);
const tag = cipher.getAuthTag();
// Format: base64(iv + tag + ciphertext)
return Buffer.concat([iv, tag, encrypted]).toString(ENCODING);
}
export function decrypt(ciphertext: string): string {
const key = getKey();
const buf = Buffer.from(ciphertext, ENCODING);
const iv = buf.subarray(0, IV_LENGTH);
const tag = buf.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
const encrypted = buf.subarray(IV_LENGTH + TAG_LENGTH);
const decipher = createDecipheriv(ALGORITHM, key, iv);
decipher.setAuthTag(tag);
return decipher.update(encrypted) + decipher.final('utf8');
}

554
packages/db/src/gsc.ts Normal file
View File

@@ -0,0 +1,554 @@
import { cacheable } from '@openpanel/redis';
import { originalCh } from './clickhouse/client';
import { decrypt, encrypt } from './encryption';
import { db } from './prisma-client';
export interface GscSite {
siteUrl: string;
permissionLevel: string;
}
async function refreshGscToken(
refreshToken: string
): Promise<{ accessToken: string; expiresAt: Date }> {
const params = new URLSearchParams({
client_id: process.env.GOOGLE_CLIENT_ID ?? '',
client_secret: process.env.GOOGLE_CLIENT_SECRET ?? '',
refresh_token: refreshToken,
grant_type: 'refresh_token',
});
const res = await fetch('https://oauth2.googleapis.com/token', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: params.toString(),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to refresh GSC token: ${text}`);
}
const data = (await res.json()) as {
access_token: string;
expires_in: number;
};
const expiresAt = new Date(Date.now() + data.expires_in * 1000);
return { accessToken: data.access_token, expiresAt };
}
export async function getGscAccessToken(projectId: string): Promise<string> {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
if (
conn.accessTokenExpiresAt &&
conn.accessTokenExpiresAt.getTime() > Date.now() + 60_000
) {
return decrypt(conn.accessToken);
}
try {
const { accessToken, expiresAt } = await refreshGscToken(
decrypt(conn.refreshToken)
);
await db.gscConnection.update({
where: { projectId },
data: { accessToken: encrypt(accessToken), accessTokenExpiresAt: expiresAt },
});
return accessToken;
} catch (error) {
await db.gscConnection.update({
where: { projectId },
data: {
lastSyncStatus: 'token_expired',
lastSyncError:
error instanceof Error ? error.message : 'Failed to refresh token',
},
});
throw new Error(
'GSC token has expired or been revoked. Please reconnect Google Search Console.'
);
}
}
export async function listGscSites(projectId: string): Promise<GscSite[]> {
const accessToken = await getGscAccessToken(projectId);
const res = await fetch('https://www.googleapis.com/webmasters/v3/sites', {
headers: { Authorization: `Bearer ${accessToken}` },
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to list GSC sites: ${text}`);
}
const data = (await res.json()) as {
siteEntry?: Array<{ siteUrl: string; permissionLevel: string }>;
};
return data.siteEntry ?? [];
}
interface GscApiRow {
keys: string[];
clicks: number;
impressions: number;
ctr: number;
position: number;
}
interface GscDimensionFilter {
dimension: string;
operator: string;
expression: string;
}
interface GscFilterGroup {
filters: GscDimensionFilter[];
}
async function queryGscSearchAnalytics(
accessToken: string,
siteUrl: string,
startDate: string,
endDate: string,
dimensions: string[],
dimensionFilterGroups?: GscFilterGroup[]
): Promise<GscApiRow[]> {
const encodedSiteUrl = encodeURIComponent(siteUrl);
const url = `https://www.googleapis.com/webmasters/v3/sites/${encodedSiteUrl}/searchAnalytics/query`;
const allRows: GscApiRow[] = [];
let startRow = 0;
const rowLimit = 25000;
while (true) {
const res = await fetch(url, {
method: 'POST',
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
startDate,
endDate,
dimensions,
rowLimit,
startRow,
dataState: 'all',
...(dimensionFilterGroups && { dimensionFilterGroups }),
}),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`GSC query failed for dimensions [${dimensions.join(',')}]: ${text}`);
}
const data = (await res.json()) as { rows?: GscApiRow[] };
const rows = data.rows ?? [];
allRows.push(...rows);
if (rows.length < rowLimit) break;
startRow += rowLimit;
}
return allRows;
}
function formatDate(date: Date): string {
return date.toISOString().slice(0, 10);
}
function nowString(): string {
return new Date().toISOString().replace('T', ' ').replace('Z', '');
}
export async function syncGscData(
projectId: string,
startDate: Date,
endDate: Date
): Promise<void> {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
if (!conn.siteUrl) {
throw new Error('No GSC site URL configured for this project');
}
const accessToken = await getGscAccessToken(projectId);
const start = formatDate(startDate);
const end = formatDate(endDate);
const syncedAt = nowString();
// 1. Daily totals — authoritative numbers for overview chart
const dailyRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date']
);
if (dailyRows.length > 0) {
await originalCh.insert({
table: 'gsc_daily',
values: dailyRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
// 2. Per-page breakdown
const pageRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date', 'page']
);
if (pageRows.length > 0) {
await originalCh.insert({
table: 'gsc_pages_daily',
values: pageRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
page: row.keys[1] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
// 3. Per-query breakdown
const queryRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date', 'query']
);
if (queryRows.length > 0) {
await originalCh.insert({
table: 'gsc_queries_daily',
values: queryRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
query: row.keys[1] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
}
export async function getGscOverview(
projectId: string,
startDate: string,
endDate: string,
interval: 'day' | 'week' | 'month' = 'day'
): Promise<
Array<{
date: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const dateExpr =
interval === 'month'
? 'toStartOfMonth(date)'
: interval === 'week'
? 'toStartOfWeek(date)'
: 'date';
const result = await originalCh.query({
query: `
SELECT
${dateExpr} as date,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY date
ORDER BY date ASC
`,
query_params: { projectId, startDate, endDate },
format: 'JSONEachRow',
});
return result.json();
}
export async function getGscPages(
projectId: string,
startDate: string,
endDate: string,
limit = 100
): Promise<
Array<{
page: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const result = await originalCh.query({
query: `
SELECT
page,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_pages_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY page
ORDER BY clicks DESC
LIMIT {limit: UInt32}
`,
query_params: { projectId, startDate, endDate, limit },
format: 'JSONEachRow',
});
return result.json();
}
export interface GscCannibalizedQuery {
query: string;
totalImpressions: number;
totalClicks: number;
pages: Array<{
page: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>;
}
export const getGscCannibalization = cacheable(
async (
projectId: string,
startDate: string,
endDate: string
): Promise<GscCannibalizedQuery[]> => {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
const accessToken = await getGscAccessToken(projectId);
const rows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
startDate,
endDate,
['query', 'page']
);
const map = new Map<
string,
{
totalImpressions: number;
totalClicks: number;
pages: GscCannibalizedQuery['pages'];
}
>();
for (const row of rows) {
const query = row.keys[0] ?? '';
// Strip hash fragments — GSC records heading anchors (e.g. /page#section)
// as separate URLs but Google treats them as the same page
let page = row.keys[1] ?? '';
try {
const u = new URL(page);
u.hash = '';
page = u.toString();
} catch {
page = page.split('#')[0] ?? page;
}
const entry = map.get(query) ?? {
totalImpressions: 0,
totalClicks: 0,
pages: [],
};
entry.totalImpressions += row.impressions;
entry.totalClicks += row.clicks;
// Merge into existing page entry if already seen (from a different hash variant)
const existing = entry.pages.find((p) => p.page === page);
if (existing) {
const totalImpressions = existing.impressions + row.impressions;
if (totalImpressions > 0) {
existing.position =
(existing.position * existing.impressions + row.position * row.impressions) / totalImpressions;
}
existing.clicks += row.clicks;
existing.impressions += row.impressions;
existing.ctr =
existing.impressions > 0 ? existing.clicks / existing.impressions : 0;
} else {
entry.pages.push({
page,
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
});
}
map.set(query, entry);
}
return [...map.entries()]
.filter(([, v]) => v.pages.length >= 2 && v.totalImpressions >= 100)
.sort(([, a], [, b]) => b.totalImpressions - a.totalImpressions)
.slice(0, 50)
.map(([query, v]) => ({
query,
totalImpressions: v.totalImpressions,
totalClicks: v.totalClicks,
pages: v.pages.sort((a, b) =>
a.position !== b.position
? a.position - b.position
: b.impressions - a.impressions
),
}));
},
60 * 60 * 4
);
export async function getGscPageDetails(
projectId: string,
page: string,
startDate: string,
endDate: string
): Promise<{
timeseries: Array<{ date: string; clicks: number; impressions: number; ctr: number; position: number }>;
queries: Array<{ query: string; clicks: number; impressions: number; ctr: number; position: number }>;
}> {
const conn = await db.gscConnection.findUniqueOrThrow({ where: { projectId } });
const accessToken = await getGscAccessToken(projectId);
const filterGroups: GscFilterGroup[] = [{ filters: [{ dimension: 'page', operator: 'equals', expression: page }] }];
const [timeseriesRows, queryRows] = await Promise.all([
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['date'], filterGroups),
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['query'], filterGroups),
]);
return {
timeseries: timeseriesRows.map((row) => ({
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
queries: queryRows.map((row) => ({
query: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
};
}
export async function getGscQueryDetails(
projectId: string,
query: string,
startDate: string,
endDate: string
): Promise<{
timeseries: Array<{ date: string; clicks: number; impressions: number; ctr: number; position: number }>;
pages: Array<{ page: string; clicks: number; impressions: number; ctr: number; position: number }>;
}> {
const conn = await db.gscConnection.findUniqueOrThrow({ where: { projectId } });
const accessToken = await getGscAccessToken(projectId);
const filterGroups: GscFilterGroup[] = [{ filters: [{ dimension: 'query', operator: 'equals', expression: query }] }];
const [timeseriesRows, pageRows] = await Promise.all([
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['date'], filterGroups),
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['page'], filterGroups),
]);
return {
timeseries: timeseriesRows.map((row) => ({
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
pages: pageRows.map((row) => ({
page: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
};
}
export async function getGscQueries(
projectId: string,
startDate: string,
endDate: string,
limit = 100
): Promise<
Array<{
query: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const result = await originalCh.query({
query: `
SELECT
query,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_queries_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY query
ORDER BY clicks DESC
LIMIT {limit: UInt32}
`,
query_params: { projectId, startDate, endDate, limit },
format: 'JSONEachRow',
});
return result.json();
}

View File

@@ -1,4 +1,5 @@
import { TABLE_NAMES, ch } from '../clickhouse/client';
import type { IInterval } from '@openpanel/validation';
import { ch, TABLE_NAMES } from '../clickhouse/client';
import { clix } from '../clickhouse/query-builder';
export interface IGetPagesInput {
@@ -7,6 +8,15 @@ export interface IGetPagesInput {
endDate: string;
timezone: string;
search?: string;
limit?: number;
}
export interface IPageTimeseriesRow {
origin: string;
path: string;
date: string;
pageviews: number;
sessions: number;
}
export interface ITopPage {
@@ -28,6 +38,7 @@ export class PagesService {
endDate,
timezone,
search,
limit,
}: IGetPagesInput): Promise<ITopPage[]> {
// CTE: Get titles from the last 30 days for faster retrieval
const titlesCte = clix(this.client, timezone)
@@ -72,7 +83,7 @@ export class PagesService {
.leftJoin(
sessionsSubquery,
'e.session_id = s.id AND e.project_id = s.project_id',
's',
's'
)
.leftJoin('page_titles pt', 'concat(e.origin, e.path) = pt.page_key')
.where('e.project_id', '=', projectId)
@@ -83,14 +94,69 @@ export class PagesService {
clix.datetime(endDate, 'toDateTime'),
])
.when(!!search, (q) => {
q.where('e.path', 'LIKE', `%${search}%`);
const term = `%${search}%`;
q.whereGroup()
.where('e.path', 'LIKE', term)
.orWhere('e.origin', 'LIKE', term)
.orWhere('pt.title', 'LIKE', term)
.end();
})
.groupBy(['e.origin', 'e.path', 'pt.title'])
.orderBy('sessions', 'DESC')
.limit(1000);
.orderBy('sessions', 'DESC');
if (limit !== undefined) {
query.limit(limit);
}
return query.execute();
}
async getPageTimeseries({
projectId,
startDate,
endDate,
timezone,
interval,
filterOrigin,
filterPath,
}: IGetPagesInput & {
interval: IInterval;
filterOrigin?: string;
filterPath?: string;
}): Promise<IPageTimeseriesRow[]> {
const dateExpr = clix.toStartOf('e.created_at', interval, timezone);
const useDateOnly = interval === 'month' || interval === 'week';
const fillFrom = clix.toStartOf(
clix.datetime(startDate, useDateOnly ? 'toDate' : 'toDateTime'),
interval
);
const fillTo = clix.datetime(
endDate,
useDateOnly ? 'toDate' : 'toDateTime'
);
const fillStep = clix.toInterval('1', interval);
return clix(this.client, timezone)
.select<IPageTimeseriesRow>([
'e.origin as origin',
'e.path as path',
`${dateExpr} AS date`,
'count() as pageviews',
'uniq(e.session_id) as sessions',
])
.from(`${TABLE_NAMES.events} e`, false)
.where('e.project_id', '=', projectId)
.where('e.name', '=', 'screen_view')
.where('e.path', '!=', '')
.where('e.created_at', 'BETWEEN', [
clix.datetime(startDate, 'toDateTime'),
clix.datetime(endDate, 'toDateTime'),
])
.when(!!filterOrigin, (q) => q.where('e.origin', '=', filterOrigin!))
.when(!!filterPath, (q) => q.where('e.path', '=', filterPath!))
.groupBy(['e.origin', 'e.path', 'date'])
.orderBy('date', 'ASC')
.fill(fillFrom, fillTo, fillStep)
.execute();
}
}
export const pagesService = new PagesService(ch);