feat: new importer (#214)
This commit is contained in:
committed by
GitHub
parent
b51bc8f3f6
commit
212254d31a
@@ -35,6 +35,7 @@ COPY packages/redis/package.json ./packages/redis/
|
||||
COPY packages/queue/package.json ./packages/queue/
|
||||
COPY packages/logger/package.json ./packages/logger/
|
||||
COPY packages/common/package.json ./packages/common/
|
||||
COPY packages/importer/package.json ./packages/importer/
|
||||
COPY packages/constants/package.json ./packages/constants/
|
||||
COPY packages/validation/package.json ./packages/validation/
|
||||
COPY packages/integrations/package.json packages/integrations/
|
||||
@@ -80,9 +81,10 @@ COPY --from=build /app/packages/geo ./packages/geo
|
||||
COPY --from=build /app/packages/json ./packages/json
|
||||
COPY --from=build /app/packages/email ./packages/email
|
||||
COPY --from=build /app/packages/redis ./packages/redis
|
||||
COPY --from=build /app/packages/logger ./packages/logger
|
||||
COPY --from=build /app/packages/queue ./packages/queue
|
||||
COPY --from=build /app/packages/logger ./packages/logger
|
||||
COPY --from=build /app/packages/common ./packages/common
|
||||
COPY --from=build /app/packages/importer ./packages/importer
|
||||
COPY --from=build /app/packages/validation ./packages/validation
|
||||
COPY --from=build /app/packages/integrations ./packages/integrations
|
||||
COPY --from=build /app/tooling/typescript ./tooling/typescript
|
||||
|
||||
@@ -8,8 +8,7 @@
|
||||
"testing": "WORKER_PORT=9999 pnpm dev",
|
||||
"start": "node dist/index.js",
|
||||
"build": "rm -rf dist && tsdown",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"gen:referrers": "jiti scripts/get-referrers.ts && biome format --write ./src/referrers/index.ts"
|
||||
"typecheck": "tsc --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@bull-board/api": "6.13.1",
|
||||
@@ -20,6 +19,7 @@
|
||||
"@openpanel/integrations": "workspace:^",
|
||||
"@openpanel/json": "workspace:*",
|
||||
"@openpanel/logger": "workspace:*",
|
||||
"@openpanel/importer": "workspace:*",
|
||||
"@openpanel/queue": "workspace:*",
|
||||
"@openpanel/redis": "workspace:*",
|
||||
"bullmq": "^5.8.7",
|
||||
@@ -38,7 +38,7 @@
|
||||
"@types/source-map-support": "^0.5.10",
|
||||
"@types/sqlstring": "^2.3.2",
|
||||
"@types/uuid": "^9.0.8",
|
||||
"tsdown": "^0.14.2",
|
||||
"tsdown": "0.14.2",
|
||||
"typescript": "catalog:"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
type EventsQueuePayloadIncomingEvent,
|
||||
cronQueue,
|
||||
eventsGroupQueue,
|
||||
importQueue,
|
||||
miscQueue,
|
||||
notificationQueue,
|
||||
queueLogger,
|
||||
@@ -19,6 +20,7 @@ import { Worker as GroupWorker } from 'groupmq';
|
||||
import { cronJob } from './jobs/cron';
|
||||
import { eventsJob } from './jobs/events';
|
||||
import { incomingEventPure } from './jobs/events.incoming-event';
|
||||
import { importJob } from './jobs/import';
|
||||
import { miscJob } from './jobs/misc';
|
||||
import { notificationJob } from './jobs/notification';
|
||||
import { sessionsJob } from './jobs/sessions';
|
||||
@@ -56,13 +58,18 @@ export async function bootWorkers() {
|
||||
workerOptions,
|
||||
);
|
||||
const miscWorker = new Worker(miscQueue.name, miscJob, workerOptions);
|
||||
const importWorker = new Worker(importQueue.name, importJob, {
|
||||
...workerOptions,
|
||||
concurrency: Number.parseInt(process.env.IMPORT_JOB_CONCURRENCY || '1', 10),
|
||||
});
|
||||
|
||||
const workers = [
|
||||
sessionsWorker,
|
||||
cronWorker,
|
||||
notificationWorker,
|
||||
miscWorker,
|
||||
eventsGroupWorker,
|
||||
importWorker,
|
||||
// eventsGroupWorker,
|
||||
];
|
||||
|
||||
workers.forEach((worker) => {
|
||||
@@ -148,7 +155,15 @@ export async function bootWorkers() {
|
||||
['uncaughtException', 'unhandledRejection', 'SIGTERM', 'SIGINT'].forEach(
|
||||
(evt) => {
|
||||
process.on(evt, (code) => {
|
||||
exitHandler(evt, code);
|
||||
if (process.env.NODE_ENV === 'production') {
|
||||
exitHandler(evt, code);
|
||||
} else {
|
||||
logger.info('Shutting down for development', {
|
||||
event: evt,
|
||||
code,
|
||||
});
|
||||
process.exit(0);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -5,6 +5,7 @@ import { createInitialSalts } from '@openpanel/db';
|
||||
import {
|
||||
cronQueue,
|
||||
eventsGroupQueue,
|
||||
importQueue,
|
||||
miscQueue,
|
||||
notificationQueue,
|
||||
sessionsQueue,
|
||||
@@ -31,13 +32,14 @@ async function start() {
|
||||
if (process.env.DISABLE_BULLBOARD === undefined) {
|
||||
const serverAdapter = new ExpressAdapter();
|
||||
serverAdapter.setBasePath('/');
|
||||
({
|
||||
createBullBoard({
|
||||
queues: [
|
||||
new BullBoardGroupMQAdapter(eventsGroupQueue) as any,
|
||||
new BullMQAdapter(sessionsQueue),
|
||||
new BullMQAdapter(cronQueue),
|
||||
new BullMQAdapter(notificationQueue),
|
||||
new BullMQAdapter(miscQueue),
|
||||
new BullMQAdapter(importQueue),
|
||||
],
|
||||
serverAdapter: serverAdapter,
|
||||
});
|
||||
|
||||
@@ -54,7 +54,7 @@ export async function deleteProjects(job: Job<CronQueuePayload>) {
|
||||
await ch.command({
|
||||
query,
|
||||
clickhouse_settings: {
|
||||
lightweight_deletes_sync: 0,
|
||||
lightweight_deletes_sync: '0',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { logger as baseLogger } from '@/utils/logger';
|
||||
import { getReferrerWithQuery, parseReferrer } from '@/utils/parse-referrer';
|
||||
import {
|
||||
createSessionEndJob,
|
||||
createSessionStart,
|
||||
getSessionEnd,
|
||||
} from '@/utils/session-handler';
|
||||
import { isSameDomain, parsePath } from '@openpanel/common';
|
||||
import { parseUserAgent } from '@openpanel/common/server';
|
||||
import {
|
||||
getReferrerWithQuery,
|
||||
parseReferrer,
|
||||
parseUserAgent,
|
||||
} from '@openpanel/common/server';
|
||||
import type { IServiceCreateEventPayload, IServiceEvent } from '@openpanel/db';
|
||||
import {
|
||||
checkNotificationRulesForEvent,
|
||||
@@ -15,10 +18,9 @@ import {
|
||||
} from '@openpanel/db';
|
||||
import type { ILogger } from '@openpanel/logger';
|
||||
import type { EventsQueuePayloadIncomingEvent } from '@openpanel/queue';
|
||||
import { getLock } from '@openpanel/redis';
|
||||
import { DelayedError, type Job } from 'bullmq';
|
||||
import { omit } from 'ramda';
|
||||
import type { Job } from 'bullmq';
|
||||
import * as R from 'ramda';
|
||||
import { omit } from 'ramda';
|
||||
import { v4 as uuid } from 'uuid';
|
||||
|
||||
const GLOBAL_PROPERTIES = ['__path', '__referrer'];
|
||||
@@ -115,9 +117,9 @@ export async function incomingEventPure(
|
||||
latitude: geo.latitude,
|
||||
path,
|
||||
origin,
|
||||
referrer: utmReferrer?.url || referrer?.url || '',
|
||||
referrer: referrer?.url || '',
|
||||
referrerName: utmReferrer?.name || referrer?.name || '',
|
||||
referrerType: utmReferrer?.type || referrer?.type || '',
|
||||
referrerType: referrer?.type || utmReferrer?.type || '',
|
||||
os: uaInfo.os,
|
||||
osVersion: uaInfo.osVersion,
|
||||
browser: uaInfo.browser,
|
||||
|
||||
@@ -99,7 +99,7 @@ describe('incomingEvent', () => {
|
||||
origin: 'https://example.com',
|
||||
referrer: '',
|
||||
referrerName: '',
|
||||
referrerType: 'unknown',
|
||||
referrerType: '',
|
||||
sdkName: jobData.payload.headers['openpanel-sdk-name'],
|
||||
sdkVersion: jobData.payload.headers['openpanel-sdk-version'],
|
||||
};
|
||||
@@ -207,7 +207,7 @@ describe('incomingEvent', () => {
|
||||
origin: 'https://example.com',
|
||||
referrer: '',
|
||||
referrerName: '',
|
||||
referrerType: 'unknown',
|
||||
referrerType: '',
|
||||
sdkName: jobData.payload.headers['openpanel-sdk-name'],
|
||||
sdkVersion: jobData.payload.headers['openpanel-sdk-version'],
|
||||
};
|
||||
|
||||
332
apps/worker/src/jobs/import.ts
Normal file
332
apps/worker/src/jobs/import.ts
Normal file
@@ -0,0 +1,332 @@
|
||||
import {
|
||||
type IClickhouseEvent,
|
||||
type ImportSteps,
|
||||
type Prisma,
|
||||
backfillSessionsToProduction,
|
||||
createSessionsStartEndEvents,
|
||||
db,
|
||||
formatClickhouseDate,
|
||||
generateSessionIds,
|
||||
getImportDateBounds,
|
||||
getImportProgress,
|
||||
insertImportBatch,
|
||||
markImportComplete,
|
||||
moveImportsToProduction,
|
||||
updateImportStatus,
|
||||
} from '@openpanel/db';
|
||||
import { MixpanelProvider, UmamiProvider } from '@openpanel/importer';
|
||||
import type { ILogger } from '@openpanel/logger';
|
||||
import type { ImportQueuePayload } from '@openpanel/queue';
|
||||
import type { Job } from 'bullmq';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
const BATCH_SIZE = Number.parseInt(process.env.IMPORT_BATCH_SIZE || '5000', 10);
|
||||
|
||||
/**
|
||||
* Yields control back to the event loop to prevent stalled jobs
|
||||
*/
|
||||
async function yieldToEventLoop(): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(resolve, 100);
|
||||
});
|
||||
}
|
||||
|
||||
export async function importJob(job: Job<ImportQueuePayload>) {
|
||||
const { importId } = job.data.payload;
|
||||
|
||||
const record = await db.import.findUniqueOrThrow({
|
||||
where: { id: importId },
|
||||
include: {
|
||||
project: true,
|
||||
},
|
||||
});
|
||||
|
||||
const jobLogger = logger.child({
|
||||
importId,
|
||||
config: record.config,
|
||||
});
|
||||
|
||||
type ValidStep = Exclude<ImportSteps, 'failed' | 'completed'>;
|
||||
const steps: Record<ValidStep, number> = {
|
||||
loading: 0,
|
||||
generating_session_ids: 1,
|
||||
creating_sessions: 2,
|
||||
moving: 3,
|
||||
backfilling_sessions: 4,
|
||||
};
|
||||
|
||||
jobLogger.info('Starting import job');
|
||||
const providerInstance = createProvider(record, jobLogger);
|
||||
|
||||
try {
|
||||
// Check if this is a resume operation
|
||||
const isNewImport = record.currentStep === null;
|
||||
|
||||
if (isNewImport) {
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'loading',
|
||||
});
|
||||
} else {
|
||||
jobLogger.info('Resuming import from previous state', {
|
||||
currentStep: record.currentStep,
|
||||
currentBatch: record.currentBatch,
|
||||
});
|
||||
}
|
||||
|
||||
// Try to get a precomputed total for better progress reporting
|
||||
const totalEvents = await providerInstance
|
||||
.getTotalEventsCount()
|
||||
.catch(() => -1);
|
||||
let processedEvents = record.processedEvents;
|
||||
|
||||
const resumeLoadingFrom =
|
||||
(record.currentStep === 'loading' && record.currentBatch) || undefined;
|
||||
|
||||
const resumeGeneratingSessionIdsFrom =
|
||||
(record.currentStep === 'generating_session_ids' &&
|
||||
record.currentBatch) ||
|
||||
undefined;
|
||||
|
||||
const resumeCreatingSessionsFrom =
|
||||
(record.currentStep === 'creating_sessions' && record.currentBatch) ||
|
||||
undefined;
|
||||
|
||||
const resumeMovingFrom =
|
||||
(record.currentStep === 'moving' && record.currentBatch) || undefined;
|
||||
|
||||
const resumeBackfillingSessionsFrom =
|
||||
(record.currentStep === 'backfilling_sessions' && record.currentBatch) ||
|
||||
undefined;
|
||||
|
||||
// Example:
|
||||
// shouldRunStep(0) // currStep = 2 (should not run)
|
||||
// shouldRunStep(1) // currStep = 2 (should not run)
|
||||
// shouldRunStep(2) // currStep = 2 (should run)
|
||||
// shouldRunStep(3) // currStep = 2 (should run)
|
||||
const shouldRunStep = (step: ValidStep) => {
|
||||
if (isNewImport) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const stepToRunIndex = steps[step];
|
||||
const currentStepIndex = steps[record.currentStep as ValidStep];
|
||||
return stepToRunIndex >= currentStepIndex;
|
||||
};
|
||||
|
||||
async function whileBounds(
|
||||
from: string | undefined,
|
||||
callback: (from: string, to: string) => Promise<void>,
|
||||
) {
|
||||
const bounds = await getImportDateBounds(importId, from);
|
||||
if (bounds.min && bounds.max) {
|
||||
const start = new Date(bounds.min);
|
||||
const end = new Date(bounds.max);
|
||||
let cursor = new Date(start);
|
||||
while (cursor < end) {
|
||||
const next = new Date(cursor);
|
||||
next.setDate(next.getDate() + 1);
|
||||
await callback(
|
||||
formatClickhouseDate(cursor, true),
|
||||
formatClickhouseDate(next, true),
|
||||
);
|
||||
cursor = next;
|
||||
|
||||
// Yield control back to event loop after processing each day
|
||||
await yieldToEventLoop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 1: Fetch & Transform - Process events in batches
|
||||
if (shouldRunStep('loading')) {
|
||||
const eventBatch: any = [];
|
||||
for await (const rawEvent of providerInstance.parseSource(
|
||||
resumeLoadingFrom,
|
||||
)) {
|
||||
// Validate event
|
||||
if (
|
||||
!providerInstance.validate(
|
||||
// @ts-expect-error
|
||||
rawEvent,
|
||||
)
|
||||
) {
|
||||
jobLogger.warn('Skipping invalid event', { rawEvent });
|
||||
continue;
|
||||
}
|
||||
|
||||
eventBatch.push(rawEvent);
|
||||
|
||||
// Process batch when it reaches the batch size
|
||||
if (eventBatch.length >= BATCH_SIZE) {
|
||||
jobLogger.info('Processing batch', { batchSize: eventBatch.length });
|
||||
|
||||
const transformedEvents: IClickhouseEvent[] = eventBatch.map(
|
||||
(
|
||||
// @ts-expect-error
|
||||
event,
|
||||
) => providerInstance!.transformEvent(event),
|
||||
);
|
||||
|
||||
await insertImportBatch(transformedEvents, importId);
|
||||
|
||||
processedEvents += eventBatch.length;
|
||||
eventBatch.length = 0;
|
||||
|
||||
const createdAt = new Date(transformedEvents[0]?.created_at || '')
|
||||
.toISOString()
|
||||
.split('T')[0];
|
||||
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'loading',
|
||||
batch: createdAt,
|
||||
totalEvents,
|
||||
processedEvents,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing each batch
|
||||
await yieldToEventLoop();
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining events in the last batch
|
||||
if (eventBatch.length > 0) {
|
||||
const transformedEvents = eventBatch.map(
|
||||
(
|
||||
// @ts-expect-error
|
||||
event,
|
||||
) => providerInstance!.transformEvent(event),
|
||||
);
|
||||
|
||||
await insertImportBatch(transformedEvents, importId);
|
||||
|
||||
processedEvents += eventBatch.length;
|
||||
eventBatch.length = 0;
|
||||
|
||||
const createdAt = new Date(transformedEvents[0]?.created_at || '')
|
||||
.toISOString()
|
||||
.split('T')[0];
|
||||
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'loading',
|
||||
batch: createdAt,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing final batch
|
||||
await yieldToEventLoop();
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Generate session IDs if provider requires it
|
||||
if (
|
||||
shouldRunStep('generating_session_ids') &&
|
||||
providerInstance.shouldGenerateSessionIds()
|
||||
) {
|
||||
await whileBounds(resumeGeneratingSessionIdsFrom, async (from) => {
|
||||
console.log('Generating session IDs', { from });
|
||||
await generateSessionIds(importId, from);
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'generating_session_ids',
|
||||
batch: from,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing each day
|
||||
await yieldToEventLoop();
|
||||
});
|
||||
|
||||
jobLogger.info('Session ID generation complete');
|
||||
}
|
||||
|
||||
// Phase 3-5: Process in daily batches for robustness
|
||||
|
||||
if (shouldRunStep('creating_sessions')) {
|
||||
await whileBounds(resumeCreatingSessionsFrom, async (from) => {
|
||||
await createSessionsStartEndEvents(importId, from);
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'creating_sessions',
|
||||
batch: from,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing each day
|
||||
await yieldToEventLoop();
|
||||
});
|
||||
}
|
||||
|
||||
if (shouldRunStep('moving')) {
|
||||
await whileBounds(resumeMovingFrom, async (from) => {
|
||||
await moveImportsToProduction(importId, from);
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'moving',
|
||||
batch: from,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing each day
|
||||
await yieldToEventLoop();
|
||||
});
|
||||
}
|
||||
|
||||
if (shouldRunStep('backfilling_sessions')) {
|
||||
await whileBounds(resumeBackfillingSessionsFrom, async (from) => {
|
||||
await backfillSessionsToProduction(importId, from);
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'backfilling_sessions',
|
||||
batch: from,
|
||||
});
|
||||
|
||||
// Yield control back to event loop after processing each day
|
||||
await yieldToEventLoop();
|
||||
});
|
||||
}
|
||||
|
||||
await markImportComplete(importId);
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'completed',
|
||||
});
|
||||
jobLogger.info('Import marked as complete');
|
||||
|
||||
// Get final progress
|
||||
const finalProgress = await getImportProgress(importId);
|
||||
|
||||
jobLogger.info('Import job completed successfully', {
|
||||
totalEvents: finalProgress.totalEvents,
|
||||
insertedEvents: finalProgress.insertedEvents,
|
||||
status: finalProgress.status,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
totalEvents: finalProgress.totalEvents,
|
||||
processedEvents: finalProgress.insertedEvents,
|
||||
};
|
||||
} catch (error) {
|
||||
jobLogger.error('Import job failed', { error });
|
||||
|
||||
// Mark import as failed
|
||||
try {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
await updateImportStatus(jobLogger, job, importId, {
|
||||
step: 'failed',
|
||||
errorMessage: errorMsg,
|
||||
});
|
||||
jobLogger.warn('Import marked as failed', { error: errorMsg });
|
||||
} catch (markError) {
|
||||
jobLogger.error('Failed to mark import as failed', { error, markError });
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function createProvider(
|
||||
record: Prisma.ImportGetPayload<{ include: { project: true } }>,
|
||||
jobLogger: ILogger,
|
||||
) {
|
||||
const config = record.config;
|
||||
switch (config.provider) {
|
||||
case 'umami':
|
||||
return new UmamiProvider(record.projectId, config, jobLogger);
|
||||
case 'mixpanel':
|
||||
return new MixpanelProvider(record.projectId, config, jobLogger);
|
||||
default:
|
||||
throw new Error(`Unknown provider: ${config.provider}`);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +0,0 @@
|
||||
# Snowplow Referer Parser
|
||||
|
||||
The file index.ts in this dir is generated from snowplows referer database [Snowplow Referer Parser](https://github.com/snowplow-referer-parser/referer-parser).
|
||||
|
||||
The orginal [referers.yml](https://github.com/snowplow-referer-parser/referer-parser/blob/master/resources/referers.yml) is based on Piwik's SearchEngines.php and Socials.php, copyright 2012 Matthieu Aubry and available under the GNU General Public License v3.
|
||||
@@ -1,117 +0,0 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { getReferrerWithQuery, parseReferrer } from './parse-referrer';
|
||||
|
||||
describe('parseReferrer', () => {
|
||||
it('should handle undefined or empty URLs', () => {
|
||||
expect(parseReferrer(undefined)).toEqual({
|
||||
name: '',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
|
||||
expect(parseReferrer('')).toEqual({
|
||||
name: '',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse valid referrer URLs', () => {
|
||||
expect(parseReferrer('https://google.com/search?q=test')).toEqual({
|
||||
name: 'Google',
|
||||
type: 'search',
|
||||
url: 'https://google.com/search?q=test',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle www prefix in hostnames', () => {
|
||||
expect(parseReferrer('https://www.twitter.com/user')).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: 'https://www.twitter.com/user',
|
||||
});
|
||||
|
||||
expect(parseReferrer('https://twitter.com/user')).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: 'https://twitter.com/user',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle unknown referrers', () => {
|
||||
expect(parseReferrer('https://unknown-site.com')).toEqual({
|
||||
name: '',
|
||||
type: 'unknown',
|
||||
url: 'https://unknown-site.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle invalid URLs', () => {
|
||||
expect(parseReferrer('not-a-url')).toEqual({
|
||||
name: '',
|
||||
type: 'unknown',
|
||||
url: 'not-a-url',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getReferrerWithQuery', () => {
|
||||
it('should handle undefined or empty query', () => {
|
||||
expect(getReferrerWithQuery(undefined)).toBeNull();
|
||||
expect(getReferrerWithQuery({})).toBeNull();
|
||||
});
|
||||
|
||||
it('should parse utm_source parameter', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'google' })).toEqual({
|
||||
name: 'Google',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse ref parameter', () => {
|
||||
expect(getReferrerWithQuery({ ref: 'facebook' })).toEqual({
|
||||
name: 'Facebook',
|
||||
type: 'social',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse utm_referrer parameter', () => {
|
||||
expect(getReferrerWithQuery({ utm_referrer: 'twitter' })).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle case-insensitive matching', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'GoOgLe' })).toEqual({
|
||||
name: 'Google',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle unknown sources', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'unknown-source' })).toEqual({
|
||||
name: 'unknown-source',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should prioritize utm_source over ref and utm_referrer', () => {
|
||||
expect(
|
||||
getReferrerWithQuery({
|
||||
utm_source: 'google',
|
||||
ref: 'facebook',
|
||||
utm_referrer: 'twitter',
|
||||
}),
|
||||
).toEqual({
|
||||
name: 'Google',
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,59 +0,0 @@
|
||||
import { stripTrailingSlash } from '@openpanel/common';
|
||||
|
||||
import referrers from '../referrers';
|
||||
|
||||
function getHostname(url: string | undefined) {
|
||||
if (!url) {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(url).hostname;
|
||||
} catch (e) {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
export function parseReferrer(url: string | undefined) {
|
||||
const hostname = getHostname(url);
|
||||
const match = referrers[hostname] ?? referrers[hostname.replace('www.', '')];
|
||||
|
||||
return {
|
||||
name: match?.name ?? '',
|
||||
type: match?.type ?? 'unknown',
|
||||
url: stripTrailingSlash(url ?? ''),
|
||||
};
|
||||
}
|
||||
|
||||
export function getReferrerWithQuery(
|
||||
query: Record<string, string> | undefined,
|
||||
) {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const source = query.utm_source ?? query.ref ?? query.utm_referrer ?? '';
|
||||
|
||||
if (source === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match =
|
||||
Object.values(referrers).find(
|
||||
(referrer) => referrer.name.toLowerCase() === source.toLowerCase(),
|
||||
) || referrers[source];
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
name: match.name,
|
||||
type: match.type,
|
||||
url: '',
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
name: source,
|
||||
type: 'unknown',
|
||||
url: '',
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user