feat: added google search console

This commit is contained in:
Carl-Gerhard Lindesvärd
2026-03-09 20:47:02 +01:00
committed by GitHub
parent 70ca44f039
commit 271d189ed0
51 changed files with 5471 additions and 503 deletions

View File

@@ -58,6 +58,9 @@ export const TABLE_NAMES = {
sessions: 'sessions',
events_imports: 'events_imports',
session_replay_chunks: 'session_replay_chunks',
gsc_daily: 'gsc_daily',
gsc_pages_daily: 'gsc_pages_daily',
gsc_queries_daily: 'gsc_queries_daily',
};
/**

View File

@@ -0,0 +1,44 @@
import { createCipheriv, createDecipheriv, randomBytes } from 'node:crypto';
const ALGORITHM = 'aes-256-gcm';
const IV_LENGTH = 12;
const TAG_LENGTH = 16;
const ENCODING = 'base64';
function getKey(): Buffer {
const raw = process.env.ENCRYPTION_KEY;
if (!raw) {
throw new Error('ENCRYPTION_KEY environment variable is not set');
}
const buf = Buffer.from(raw, 'hex');
if (buf.length !== 32) {
throw new Error(
'ENCRYPTION_KEY must be a 64-character hex string (32 bytes)'
);
}
return buf;
}
export function encrypt(plaintext: string): string {
const key = getKey();
const iv = randomBytes(IV_LENGTH);
const cipher = createCipheriv(ALGORITHM, key, iv);
const encrypted = Buffer.concat([
cipher.update(plaintext, 'utf8'),
cipher.final(),
]);
const tag = cipher.getAuthTag();
// Format: base64(iv + tag + ciphertext)
return Buffer.concat([iv, tag, encrypted]).toString(ENCODING);
}
export function decrypt(ciphertext: string): string {
const key = getKey();
const buf = Buffer.from(ciphertext, ENCODING);
const iv = buf.subarray(0, IV_LENGTH);
const tag = buf.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
const encrypted = buf.subarray(IV_LENGTH + TAG_LENGTH);
const decipher = createDecipheriv(ALGORITHM, key, iv);
decipher.setAuthTag(tag);
return decipher.update(encrypted) + decipher.final('utf8');
}

554
packages/db/src/gsc.ts Normal file
View File

@@ -0,0 +1,554 @@
import { cacheable } from '@openpanel/redis';
import { originalCh } from './clickhouse/client';
import { decrypt, encrypt } from './encryption';
import { db } from './prisma-client';
export interface GscSite {
siteUrl: string;
permissionLevel: string;
}
async function refreshGscToken(
refreshToken: string
): Promise<{ accessToken: string; expiresAt: Date }> {
const params = new URLSearchParams({
client_id: process.env.GOOGLE_CLIENT_ID ?? '',
client_secret: process.env.GOOGLE_CLIENT_SECRET ?? '',
refresh_token: refreshToken,
grant_type: 'refresh_token',
});
const res = await fetch('https://oauth2.googleapis.com/token', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: params.toString(),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to refresh GSC token: ${text}`);
}
const data = (await res.json()) as {
access_token: string;
expires_in: number;
};
const expiresAt = new Date(Date.now() + data.expires_in * 1000);
return { accessToken: data.access_token, expiresAt };
}
export async function getGscAccessToken(projectId: string): Promise<string> {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
if (
conn.accessTokenExpiresAt &&
conn.accessTokenExpiresAt.getTime() > Date.now() + 60_000
) {
return decrypt(conn.accessToken);
}
try {
const { accessToken, expiresAt } = await refreshGscToken(
decrypt(conn.refreshToken)
);
await db.gscConnection.update({
where: { projectId },
data: { accessToken: encrypt(accessToken), accessTokenExpiresAt: expiresAt },
});
return accessToken;
} catch (error) {
await db.gscConnection.update({
where: { projectId },
data: {
lastSyncStatus: 'token_expired',
lastSyncError:
error instanceof Error ? error.message : 'Failed to refresh token',
},
});
throw new Error(
'GSC token has expired or been revoked. Please reconnect Google Search Console.'
);
}
}
export async function listGscSites(projectId: string): Promise<GscSite[]> {
const accessToken = await getGscAccessToken(projectId);
const res = await fetch('https://www.googleapis.com/webmasters/v3/sites', {
headers: { Authorization: `Bearer ${accessToken}` },
});
if (!res.ok) {
const text = await res.text();
throw new Error(`Failed to list GSC sites: ${text}`);
}
const data = (await res.json()) as {
siteEntry?: Array<{ siteUrl: string; permissionLevel: string }>;
};
return data.siteEntry ?? [];
}
interface GscApiRow {
keys: string[];
clicks: number;
impressions: number;
ctr: number;
position: number;
}
interface GscDimensionFilter {
dimension: string;
operator: string;
expression: string;
}
interface GscFilterGroup {
filters: GscDimensionFilter[];
}
async function queryGscSearchAnalytics(
accessToken: string,
siteUrl: string,
startDate: string,
endDate: string,
dimensions: string[],
dimensionFilterGroups?: GscFilterGroup[]
): Promise<GscApiRow[]> {
const encodedSiteUrl = encodeURIComponent(siteUrl);
const url = `https://www.googleapis.com/webmasters/v3/sites/${encodedSiteUrl}/searchAnalytics/query`;
const allRows: GscApiRow[] = [];
let startRow = 0;
const rowLimit = 25000;
while (true) {
const res = await fetch(url, {
method: 'POST',
headers: {
Authorization: `Bearer ${accessToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
startDate,
endDate,
dimensions,
rowLimit,
startRow,
dataState: 'all',
...(dimensionFilterGroups && { dimensionFilterGroups }),
}),
});
if (!res.ok) {
const text = await res.text();
throw new Error(`GSC query failed for dimensions [${dimensions.join(',')}]: ${text}`);
}
const data = (await res.json()) as { rows?: GscApiRow[] };
const rows = data.rows ?? [];
allRows.push(...rows);
if (rows.length < rowLimit) break;
startRow += rowLimit;
}
return allRows;
}
function formatDate(date: Date): string {
return date.toISOString().slice(0, 10);
}
function nowString(): string {
return new Date().toISOString().replace('T', ' ').replace('Z', '');
}
export async function syncGscData(
projectId: string,
startDate: Date,
endDate: Date
): Promise<void> {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
if (!conn.siteUrl) {
throw new Error('No GSC site URL configured for this project');
}
const accessToken = await getGscAccessToken(projectId);
const start = formatDate(startDate);
const end = formatDate(endDate);
const syncedAt = nowString();
// 1. Daily totals — authoritative numbers for overview chart
const dailyRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date']
);
if (dailyRows.length > 0) {
await originalCh.insert({
table: 'gsc_daily',
values: dailyRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
// 2. Per-page breakdown
const pageRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date', 'page']
);
if (pageRows.length > 0) {
await originalCh.insert({
table: 'gsc_pages_daily',
values: pageRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
page: row.keys[1] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
// 3. Per-query breakdown
const queryRows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
start,
end,
['date', 'query']
);
if (queryRows.length > 0) {
await originalCh.insert({
table: 'gsc_queries_daily',
values: queryRows.map((row) => ({
project_id: projectId,
date: row.keys[0] ?? '',
query: row.keys[1] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
synced_at: syncedAt,
})),
format: 'JSONEachRow',
});
}
}
export async function getGscOverview(
projectId: string,
startDate: string,
endDate: string,
interval: 'day' | 'week' | 'month' = 'day'
): Promise<
Array<{
date: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const dateExpr =
interval === 'month'
? 'toStartOfMonth(date)'
: interval === 'week'
? 'toStartOfWeek(date)'
: 'date';
const result = await originalCh.query({
query: `
SELECT
${dateExpr} as date,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY date
ORDER BY date ASC
`,
query_params: { projectId, startDate, endDate },
format: 'JSONEachRow',
});
return result.json();
}
export async function getGscPages(
projectId: string,
startDate: string,
endDate: string,
limit = 100
): Promise<
Array<{
page: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const result = await originalCh.query({
query: `
SELECT
page,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_pages_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY page
ORDER BY clicks DESC
LIMIT {limit: UInt32}
`,
query_params: { projectId, startDate, endDate, limit },
format: 'JSONEachRow',
});
return result.json();
}
export interface GscCannibalizedQuery {
query: string;
totalImpressions: number;
totalClicks: number;
pages: Array<{
page: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>;
}
export const getGscCannibalization = cacheable(
async (
projectId: string,
startDate: string,
endDate: string
): Promise<GscCannibalizedQuery[]> => {
const conn = await db.gscConnection.findUniqueOrThrow({
where: { projectId },
});
const accessToken = await getGscAccessToken(projectId);
const rows = await queryGscSearchAnalytics(
accessToken,
conn.siteUrl,
startDate,
endDate,
['query', 'page']
);
const map = new Map<
string,
{
totalImpressions: number;
totalClicks: number;
pages: GscCannibalizedQuery['pages'];
}
>();
for (const row of rows) {
const query = row.keys[0] ?? '';
// Strip hash fragments — GSC records heading anchors (e.g. /page#section)
// as separate URLs but Google treats them as the same page
let page = row.keys[1] ?? '';
try {
const u = new URL(page);
u.hash = '';
page = u.toString();
} catch {
page = page.split('#')[0] ?? page;
}
const entry = map.get(query) ?? {
totalImpressions: 0,
totalClicks: 0,
pages: [],
};
entry.totalImpressions += row.impressions;
entry.totalClicks += row.clicks;
// Merge into existing page entry if already seen (from a different hash variant)
const existing = entry.pages.find((p) => p.page === page);
if (existing) {
const totalImpressions = existing.impressions + row.impressions;
if (totalImpressions > 0) {
existing.position =
(existing.position * existing.impressions + row.position * row.impressions) / totalImpressions;
}
existing.clicks += row.clicks;
existing.impressions += row.impressions;
existing.ctr =
existing.impressions > 0 ? existing.clicks / existing.impressions : 0;
} else {
entry.pages.push({
page,
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
});
}
map.set(query, entry);
}
return [...map.entries()]
.filter(([, v]) => v.pages.length >= 2 && v.totalImpressions >= 100)
.sort(([, a], [, b]) => b.totalImpressions - a.totalImpressions)
.slice(0, 50)
.map(([query, v]) => ({
query,
totalImpressions: v.totalImpressions,
totalClicks: v.totalClicks,
pages: v.pages.sort((a, b) =>
a.position !== b.position
? a.position - b.position
: b.impressions - a.impressions
),
}));
},
60 * 60 * 4
);
export async function getGscPageDetails(
projectId: string,
page: string,
startDate: string,
endDate: string
): Promise<{
timeseries: Array<{ date: string; clicks: number; impressions: number; ctr: number; position: number }>;
queries: Array<{ query: string; clicks: number; impressions: number; ctr: number; position: number }>;
}> {
const conn = await db.gscConnection.findUniqueOrThrow({ where: { projectId } });
const accessToken = await getGscAccessToken(projectId);
const filterGroups: GscFilterGroup[] = [{ filters: [{ dimension: 'page', operator: 'equals', expression: page }] }];
const [timeseriesRows, queryRows] = await Promise.all([
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['date'], filterGroups),
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['query'], filterGroups),
]);
return {
timeseries: timeseriesRows.map((row) => ({
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
queries: queryRows.map((row) => ({
query: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
};
}
export async function getGscQueryDetails(
projectId: string,
query: string,
startDate: string,
endDate: string
): Promise<{
timeseries: Array<{ date: string; clicks: number; impressions: number; ctr: number; position: number }>;
pages: Array<{ page: string; clicks: number; impressions: number; ctr: number; position: number }>;
}> {
const conn = await db.gscConnection.findUniqueOrThrow({ where: { projectId } });
const accessToken = await getGscAccessToken(projectId);
const filterGroups: GscFilterGroup[] = [{ filters: [{ dimension: 'query', operator: 'equals', expression: query }] }];
const [timeseriesRows, pageRows] = await Promise.all([
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['date'], filterGroups),
queryGscSearchAnalytics(accessToken, conn.siteUrl, startDate, endDate, ['page'], filterGroups),
]);
return {
timeseries: timeseriesRows.map((row) => ({
date: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
pages: pageRows.map((row) => ({
page: row.keys[0] ?? '',
clicks: row.clicks,
impressions: row.impressions,
ctr: row.ctr,
position: row.position,
})),
};
}
export async function getGscQueries(
projectId: string,
startDate: string,
endDate: string,
limit = 100
): Promise<
Array<{
query: string;
clicks: number;
impressions: number;
ctr: number;
position: number;
}>
> {
const result = await originalCh.query({
query: `
SELECT
query,
sum(clicks) as clicks,
sum(impressions) as impressions,
avg(ctr) as ctr,
avg(position) as position
FROM gsc_queries_daily
FINAL
WHERE project_id = {projectId: String}
AND date >= {startDate: String}
AND date <= {endDate: String}
GROUP BY query
ORDER BY clicks DESC
LIMIT {limit: UInt32}
`,
query_params: { projectId, startDate, endDate, limit },
format: 'JSONEachRow',
});
return result.json();
}

View File

@@ -1,4 +1,5 @@
import { TABLE_NAMES, ch } from '../clickhouse/client';
import type { IInterval } from '@openpanel/validation';
import { ch, TABLE_NAMES } from '../clickhouse/client';
import { clix } from '../clickhouse/query-builder';
export interface IGetPagesInput {
@@ -7,6 +8,15 @@ export interface IGetPagesInput {
endDate: string;
timezone: string;
search?: string;
limit?: number;
}
export interface IPageTimeseriesRow {
origin: string;
path: string;
date: string;
pageviews: number;
sessions: number;
}
export interface ITopPage {
@@ -28,6 +38,7 @@ export class PagesService {
endDate,
timezone,
search,
limit,
}: IGetPagesInput): Promise<ITopPage[]> {
// CTE: Get titles from the last 30 days for faster retrieval
const titlesCte = clix(this.client, timezone)
@@ -72,7 +83,7 @@ export class PagesService {
.leftJoin(
sessionsSubquery,
'e.session_id = s.id AND e.project_id = s.project_id',
's',
's'
)
.leftJoin('page_titles pt', 'concat(e.origin, e.path) = pt.page_key')
.where('e.project_id', '=', projectId)
@@ -83,14 +94,69 @@ export class PagesService {
clix.datetime(endDate, 'toDateTime'),
])
.when(!!search, (q) => {
q.where('e.path', 'LIKE', `%${search}%`);
const term = `%${search}%`;
q.whereGroup()
.where('e.path', 'LIKE', term)
.orWhere('e.origin', 'LIKE', term)
.orWhere('pt.title', 'LIKE', term)
.end();
})
.groupBy(['e.origin', 'e.path', 'pt.title'])
.orderBy('sessions', 'DESC')
.limit(1000);
.orderBy('sessions', 'DESC');
if (limit !== undefined) {
query.limit(limit);
}
return query.execute();
}
async getPageTimeseries({
projectId,
startDate,
endDate,
timezone,
interval,
filterOrigin,
filterPath,
}: IGetPagesInput & {
interval: IInterval;
filterOrigin?: string;
filterPath?: string;
}): Promise<IPageTimeseriesRow[]> {
const dateExpr = clix.toStartOf('e.created_at', interval, timezone);
const useDateOnly = interval === 'month' || interval === 'week';
const fillFrom = clix.toStartOf(
clix.datetime(startDate, useDateOnly ? 'toDate' : 'toDateTime'),
interval
);
const fillTo = clix.datetime(
endDate,
useDateOnly ? 'toDate' : 'toDateTime'
);
const fillStep = clix.toInterval('1', interval);
return clix(this.client, timezone)
.select<IPageTimeseriesRow>([
'e.origin as origin',
'e.path as path',
`${dateExpr} AS date`,
'count() as pageviews',
'uniq(e.session_id) as sessions',
])
.from(`${TABLE_NAMES.events} e`, false)
.where('e.project_id', '=', projectId)
.where('e.name', '=', 'screen_view')
.where('e.path', '!=', '')
.where('e.created_at', 'BETWEEN', [
clix.datetime(startDate, 'toDateTime'),
clix.datetime(endDate, 'toDateTime'),
])
.when(!!filterOrigin, (q) => q.where('e.origin', '=', filterOrigin!))
.when(!!filterPath, (q) => q.where('e.path', '=', filterPath!))
.groupBy(['e.origin', 'e.path', 'date'])
.orderBy('date', 'ASC')
.fill(fillFrom, fillTo, fillStep)
.execute();
}
}
export const pagesService = new PagesService(ch);