feat: new importer (#214)
This commit is contained in:
committed by
GitHub
parent
b51bc8f3f6
commit
212254d31a
121
packages/importer/src/base-provider.ts
Normal file
121
packages/importer/src/base-provider.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import type { IClickhouseEvent } from '@openpanel/db';
|
||||
import type { BaseRawEvent, ErrorContext, ImportJobMetadata } from './types';
|
||||
|
||||
export abstract class BaseImportProvider<
|
||||
TRawEvent extends BaseRawEvent = BaseRawEvent,
|
||||
> {
|
||||
abstract provider: string;
|
||||
abstract version: string;
|
||||
|
||||
/**
|
||||
* Stream-read and parse source (file/API) → yields raw events
|
||||
* This should be implemented as an async generator to handle large files efficiently
|
||||
*/
|
||||
abstract parseSource(
|
||||
overrideFrom?: string,
|
||||
): AsyncGenerator<TRawEvent, void, unknown>;
|
||||
|
||||
/**
|
||||
* Convert provider format → IClickhouseEvent
|
||||
*/
|
||||
abstract transformEvent(rawEvent: TRawEvent): IClickhouseEvent;
|
||||
|
||||
/**
|
||||
* Validate raw event structure
|
||||
*/
|
||||
abstract validate(rawEvent: TRawEvent): boolean;
|
||||
|
||||
/**
|
||||
* Returns how many events will be imported
|
||||
*/
|
||||
abstract getTotalEventsCount(): Promise<number>;
|
||||
|
||||
/**
|
||||
* Optional hook: Pre-process batch
|
||||
*/
|
||||
async beforeBatch?(events: TRawEvent[]): Promise<TRawEvent[]> {
|
||||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optional hook: Get import metadata for tracking
|
||||
*/
|
||||
getImportMetadata?(): ImportJobMetadata;
|
||||
|
||||
/**
|
||||
* Optional hook: Custom error handling
|
||||
*/
|
||||
async onError?(error: Error, context?: ErrorContext): Promise<void> {
|
||||
// Default: re-throw
|
||||
throw error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get estimated total events (optional, for progress tracking)
|
||||
*/
|
||||
async getEstimatedTotal?(): Promise<number> {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether session IDs should be generated in SQL after import
|
||||
* If true, the import job will generate deterministic session IDs based on
|
||||
* device_id and timestamp using SQL window functions
|
||||
* If false, assumes the provider already generates session IDs during streaming
|
||||
*/
|
||||
shouldGenerateSessionIds(): boolean {
|
||||
return false; // Default: assume provider handles it
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility: Split a date range into chunks to avoid timeout issues with large imports
|
||||
* Returns array of [from, to] date pairs in YYYY-MM-DD format
|
||||
*
|
||||
* @param from - Start date in YYYY-MM-DD format
|
||||
* @param to - End date in YYYY-MM-DD format
|
||||
* @param chunkSizeDays - Number of days per chunk (default: 1)
|
||||
*/
|
||||
public getDateChunks(
|
||||
from: string,
|
||||
to: string,
|
||||
options?: {
|
||||
chunkSizeDays?: number;
|
||||
},
|
||||
): Array<[string, string]> {
|
||||
const chunks: Array<[string, string]> = [];
|
||||
|
||||
const startDate = new Date(from);
|
||||
const endDate = new Date(to);
|
||||
const chunkSizeDays = options?.chunkSizeDays ?? 1;
|
||||
|
||||
// Handle case where from and to are the same date
|
||||
if (startDate.getTime() === endDate.getTime()) {
|
||||
return [[from, to]];
|
||||
}
|
||||
|
||||
const cursor = new Date(startDate);
|
||||
|
||||
while (cursor <= endDate) {
|
||||
const chunkStart = cursor.toISOString().split('T')[0]!;
|
||||
|
||||
// Calculate chunk end: move forward by (chunkSizeDays - 1) to get the last day of the chunk
|
||||
const chunkEndDate = new Date(cursor);
|
||||
chunkEndDate.setDate(chunkEndDate.getDate() + (chunkSizeDays - 1));
|
||||
|
||||
// Don't go past the end date
|
||||
const chunkEnd =
|
||||
chunkEndDate > endDate
|
||||
? endDate.toISOString().split('T')[0]!
|
||||
: chunkEndDate.toISOString().split('T')[0]!;
|
||||
|
||||
chunks.push([chunkStart, chunkEnd]);
|
||||
|
||||
// Move cursor to the next chunk start (after the current chunk)
|
||||
cursor.setDate(cursor.getDate() + chunkSizeDays);
|
||||
|
||||
if (cursor > endDate) break;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
}
|
||||
13
packages/importer/src/index.ts
Normal file
13
packages/importer/src/index.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
export { UmamiProvider } from './providers/umami';
|
||||
export { MixpanelProvider } from './providers/mixpanel';
|
||||
export type {
|
||||
ImportConfig,
|
||||
ImportProgress,
|
||||
ImportResult,
|
||||
BatchResult,
|
||||
BaseRawEvent,
|
||||
ErrorContext,
|
||||
EventProperties,
|
||||
ImportJobMetadata,
|
||||
ImportStageResult,
|
||||
} from './types';
|
||||
30
packages/importer/src/providers/metadata.ts
Normal file
30
packages/importer/src/providers/metadata.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
export type ImportProviderId = 'umami' | 'mixpanel';
|
||||
export type ImportProviderType = 'file' | 'api';
|
||||
|
||||
export interface ImportProviderMeta {
|
||||
id: ImportProviderId;
|
||||
name: string;
|
||||
description: string;
|
||||
logo: string;
|
||||
backgroundColor: string;
|
||||
types: ImportProviderType[];
|
||||
}
|
||||
|
||||
export const IMPORT_PROVIDERS: ImportProviderMeta[] = [
|
||||
{
|
||||
id: 'umami',
|
||||
name: 'Umami',
|
||||
description: 'Import your analytics data from Umami',
|
||||
logo: 'https://cdn.brandfetch.io/id_3VEohOm/w/180/h/180/theme/dark/logo.png?c=1dxbfHSJFAPEGdCLU4o5B',
|
||||
backgroundColor: '#fff',
|
||||
types: ['file'],
|
||||
},
|
||||
{
|
||||
id: 'mixpanel',
|
||||
name: 'Mixpanel',
|
||||
description: 'Import your analytics data from Mixpanel API',
|
||||
logo: 'https://cdn.brandfetch.io/idr_rhI2FS/theme/dark/idMJ8uODLv.svg?c=1dxbfHSJFAPEGdCLU4o5B',
|
||||
backgroundColor: '#fff',
|
||||
types: ['api'],
|
||||
},
|
||||
];
|
||||
319
packages/importer/src/providers/mixpanel.test.ts
Normal file
319
packages/importer/src/providers/mixpanel.test.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
import { omit } from 'ramda';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { MixpanelProvider } from './mixpanel';
|
||||
|
||||
describe('mixpanel', () => {
|
||||
it('should chunk date range into day chunks', async () => {
|
||||
const provider = new MixpanelProvider('pid', {
|
||||
from: '2025-01-01',
|
||||
to: '2025-01-04',
|
||||
serviceAccount: 'sa',
|
||||
serviceSecret: 'ss',
|
||||
projectId: '123',
|
||||
provider: 'mixpanel',
|
||||
type: 'api',
|
||||
mapScreenViewProperty: undefined,
|
||||
});
|
||||
|
||||
const chunks = provider.getDateChunks('2025-01-01', '2025-01-04');
|
||||
expect(chunks).toEqual([
|
||||
['2025-01-01', '2025-01-01'],
|
||||
['2025-01-02', '2025-01-02'],
|
||||
['2025-01-03', '2025-01-03'],
|
||||
['2025-01-04', '2025-01-04'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('should transform event', async () => {
|
||||
const provider = new MixpanelProvider('pid', {
|
||||
from: '2025-01-01',
|
||||
to: '2025-01-02',
|
||||
serviceAccount: 'sa',
|
||||
serviceSecret: 'ss',
|
||||
projectId: '123',
|
||||
provider: 'mixpanel',
|
||||
type: 'api',
|
||||
mapScreenViewProperty: undefined,
|
||||
});
|
||||
|
||||
const rawEvent = {
|
||||
event: '$mp_web_page_view',
|
||||
properties: {
|
||||
time: 1746097970,
|
||||
distinct_id: '$device:123',
|
||||
$browser: 'Chrome',
|
||||
$browser_version: 135,
|
||||
$city: 'Mumbai',
|
||||
$current_url:
|
||||
'https://domain.com/state/maharashtra?utm_source=google&utm_medium=cpc&utm_campaignid=890&utm_adgroupid=&utm_adid=&utm_term=&utm_device=m&utm_network=x&utm_location=123&gclid=oqneoqow&gad_sour',
|
||||
$device: 'Android',
|
||||
$device_id: '123',
|
||||
$initial_referrer: 'https://referrer.com/',
|
||||
$initial_referring_domain: 'referrer.com',
|
||||
$insert_id: 'source_id',
|
||||
$lib_version: '2.60.0',
|
||||
$mp_api_endpoint: 'api-js.mixpanel.com',
|
||||
$mp_api_timestamp_ms: 1746078175363,
|
||||
$mp_autocapture: true,
|
||||
$os: 'Android',
|
||||
$referrer: 'https://google.com/',
|
||||
$referring_domain: 'referrer.com',
|
||||
$region: 'Maharashtra',
|
||||
$screen_height: 854,
|
||||
$screen_width: 384,
|
||||
current_domain: 'domain.com',
|
||||
current_page_title:
|
||||
'Landeed: Satbara Utara, 7/12 Extract, Property Card & Index 2',
|
||||
current_url_path: '/state/maharashtra',
|
||||
current_url_protocol: 'https:',
|
||||
current_url_search:
|
||||
'?utm_source=google&utm_medium=cpc&utm_campaignid=890&utm_adgroupid=&utm_adid=&utm_term=&utm_device=m&utm_network=x&utm_location=123&gclid=oqneoqow&gad_source=5&gclid=EAIaIQobChMI6MnvhciBjQMVlS-DAx',
|
||||
gclid: 'oqneoqow',
|
||||
mp_country_code: 'IN',
|
||||
mp_lib: 'web',
|
||||
mp_processing_time_ms: 1746078175546,
|
||||
mp_sent_by_lib_version: '2.60.0',
|
||||
utm_medium: 'cpc',
|
||||
utm_source: 'google',
|
||||
},
|
||||
};
|
||||
|
||||
const res = provider.transformEvent(rawEvent);
|
||||
|
||||
expect(res).toMatchObject({
|
||||
id: expect.any(String),
|
||||
name: 'screen_view',
|
||||
device_id: '123',
|
||||
profile_id: '123',
|
||||
project_id: 'pid',
|
||||
session_id: '',
|
||||
properties: {
|
||||
__source_insert_id: 'source_id',
|
||||
__screen: '384x854',
|
||||
__lib_version: '2.60.0',
|
||||
'__query.utm_source': 'google',
|
||||
'__query.utm_medium': 'cpc',
|
||||
'__query.utm_campaignid': '890',
|
||||
'__query.utm_device': 'm',
|
||||
'__query.utm_network': 'x',
|
||||
'__query.utm_location': '123',
|
||||
'__query.gclid': 'oqneoqow',
|
||||
__title:
|
||||
'Landeed: Satbara Utara, 7/12 Extract, Property Card & Index 2',
|
||||
},
|
||||
created_at: '2025-05-01T11:12:50.000Z',
|
||||
country: 'IN',
|
||||
city: 'Mumbai',
|
||||
region: 'Maharashtra',
|
||||
longitude: null,
|
||||
latitude: null,
|
||||
os: 'Android',
|
||||
os_version: undefined,
|
||||
browser: 'Chrome',
|
||||
browser_version: '',
|
||||
device: 'mobile',
|
||||
brand: '',
|
||||
model: '',
|
||||
duration: 0,
|
||||
path: '/state/maharashtra',
|
||||
origin: 'https://domain.com',
|
||||
referrer: 'https://referrer.com',
|
||||
referrer_name: 'Google',
|
||||
referrer_type: 'search',
|
||||
imported_at: expect.any(String),
|
||||
sdk_name: 'mixpanel (web)',
|
||||
sdk_version: '1.0.0',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse stringified JSON in properties and flatten them', async () => {
|
||||
const provider = new MixpanelProvider('pid', {
|
||||
from: '2025-01-01',
|
||||
to: '2025-01-02',
|
||||
serviceAccount: 'sa',
|
||||
serviceSecret: 'ss',
|
||||
projectId: '123',
|
||||
provider: 'mixpanel',
|
||||
type: 'api',
|
||||
mapScreenViewProperty: undefined,
|
||||
});
|
||||
|
||||
const rawEvent = {
|
||||
event: 'custom_event',
|
||||
properties: {
|
||||
time: 1746097970,
|
||||
distinct_id: '$device:123',
|
||||
$device_id: '123',
|
||||
$user_id: 'user123',
|
||||
mp_lib: 'web',
|
||||
// Stringified JSON object - should be parsed and flattened
|
||||
area: '{"displayText":"Malab, Nuh, Mewat","id":1189005}',
|
||||
// Stringified JSON array - should be parsed and flattened
|
||||
tags: '["tag1","tag2","tag3"]',
|
||||
// Regular string - should remain as is
|
||||
regularString: 'just a string',
|
||||
// Number - should be converted to string
|
||||
count: 42,
|
||||
// Object - should be flattened
|
||||
nested: { level1: { level2: 'value' } },
|
||||
},
|
||||
};
|
||||
|
||||
const res = provider.transformEvent(rawEvent);
|
||||
|
||||
expect(res.properties).toMatchObject({
|
||||
// Parsed JSON object should be flattened with dot notation
|
||||
'area.displayText': 'Malab, Nuh, Mewat',
|
||||
'area.id': '1189005',
|
||||
// Parsed JSON array should be flattened with numeric indices
|
||||
'tags.0': 'tag1',
|
||||
'tags.1': 'tag2',
|
||||
'tags.2': 'tag3',
|
||||
// Regular values
|
||||
regularString: 'just a string',
|
||||
count: '42',
|
||||
// Nested object flattened
|
||||
'nested.level1.level2': 'value',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle react-native referrer', async () => {
|
||||
const provider = new MixpanelProvider('pid', {
|
||||
from: '2025-01-01',
|
||||
to: '2025-01-02',
|
||||
serviceAccount: 'sa',
|
||||
serviceSecret: 'ss',
|
||||
projectId: '123',
|
||||
provider: 'mixpanel',
|
||||
type: 'api',
|
||||
mapScreenViewProperty: undefined,
|
||||
});
|
||||
|
||||
const rawEvent = {
|
||||
event: 'ec_search_error',
|
||||
properties: {
|
||||
time: 1759947367,
|
||||
distinct_id: '3385916',
|
||||
$browser: 'Mobile Safari',
|
||||
$browser_version: null,
|
||||
$city: 'Bengaluru',
|
||||
$current_url:
|
||||
'https://web.landeed.com/karnataka/ec-encumbrance-certificate',
|
||||
$device: 'iPhone',
|
||||
$device_id:
|
||||
'199b498af1036c-0e943279a1292e-5c0f4368-51bf4-199b498af1036c',
|
||||
$initial_referrer: 'https://www.google.com/',
|
||||
$initial_referring_domain: 'www.google.com',
|
||||
$insert_id: 'bclkaepeqcfuzt4v',
|
||||
$lib_version: '2.60.0',
|
||||
$mp_api_endpoint: 'api-js.mixpanel.com',
|
||||
$mp_api_timestamp_ms: 1759927570699,
|
||||
$os: 'iOS',
|
||||
$region: 'Karnataka',
|
||||
$screen_height: 852,
|
||||
$screen_width: 393,
|
||||
$search_engine: 'google',
|
||||
$user_id: '3385916',
|
||||
binaryReadableVersion: 'NA',
|
||||
binaryVersion: 'NA',
|
||||
component: '/karnataka/ec-encumbrance-certificate',
|
||||
errMsg: 'Request failed with status code 500',
|
||||
errType: 'SERVER_ERROR',
|
||||
isSilentSearch: false,
|
||||
isTimeout: false,
|
||||
jsVersion: '0.42.0',
|
||||
language: 'english',
|
||||
mp_country_code: 'IN',
|
||||
mp_lib: 'web',
|
||||
mp_processing_time_ms: 1759927592421,
|
||||
mp_sent_by_lib_version: '2.60.0',
|
||||
os: 'web',
|
||||
osVersion:
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 18_7_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/388.0.811331708 Mobile/15E148 Safari/604.1',
|
||||
phoneBrand: 'NA',
|
||||
phoneManufacturer: 'NA',
|
||||
phoneModel: 'NA',
|
||||
searchUuid: '68e65d08-fd81-4ded-37d3-2b08d2bc70c3',
|
||||
serverVersion: 'web2.0',
|
||||
state: 17,
|
||||
stateStr: '17',
|
||||
statusCode: 500,
|
||||
type: 'result_event',
|
||||
utm_medium: 'cpc',
|
||||
utm_source:
|
||||
'google%26utm_medium=cpc%26utm_campaignid=21380769590%26utm_adgroupid=%26utm_adid=%26utm_term=%26utm_device=m%26utm_network=%26utm_location=9062055%26gclid=%26gad_campaignid=21374496705%26gbraid=0AAAAAoV7mTM9mWFripzQ2Od0xXAfrW6p3%26wbraid=CmAKCQjwi4PHBhCUA',
|
||||
},
|
||||
};
|
||||
|
||||
const res = provider.transformEvent(rawEvent);
|
||||
|
||||
expect(res.id.length).toBeGreaterThan(30);
|
||||
expect(res.imported_at).toMatch(
|
||||
/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/,
|
||||
);
|
||||
expect(omit(['id', 'imported_at'], res)).toEqual({
|
||||
brand: 'Apple',
|
||||
browser: 'GSA',
|
||||
browser_version: 'null',
|
||||
city: 'Bengaluru',
|
||||
country: 'IN',
|
||||
created_at: '2025-10-08T18:16:07.000Z',
|
||||
device: 'mobile',
|
||||
device_id: '199b498af1036c-0e943279a1292e-5c0f4368-51bf4-199b498af1036c',
|
||||
duration: 0,
|
||||
latitude: null,
|
||||
longitude: null,
|
||||
model: 'iPhone',
|
||||
name: 'ec_search_error',
|
||||
origin: 'https://web.landeed.com',
|
||||
os: 'iOS',
|
||||
os_version: '18.7.0',
|
||||
path: '/karnataka/ec-encumbrance-certificate',
|
||||
profile_id: '3385916',
|
||||
project_id: 'pid',
|
||||
properties: {
|
||||
__lib_version: '2.60.0',
|
||||
'__query.gad_campaignid': '21374496705',
|
||||
'__query.gbraid': '0AAAAAoV7mTM9mWFripzQ2Od0xXAfrW6p3',
|
||||
'__query.utm_campaignid': '21380769590',
|
||||
'__query.utm_device': 'm',
|
||||
'__query.utm_location': '9062055',
|
||||
'__query.utm_medium': 'cpc',
|
||||
'__query.utm_source': 'google',
|
||||
'__query.wbraid': 'CmAKCQjwi4PHBhCUA',
|
||||
__screen: '393x852',
|
||||
__source_insert_id: 'bclkaepeqcfuzt4v',
|
||||
__userAgent:
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 18_7_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/388.0.811331708 Mobile/15E148 Safari/604.1',
|
||||
binaryReadableVersion: 'NA',
|
||||
binaryVersion: 'NA',
|
||||
component: '/karnataka/ec-encumbrance-certificate',
|
||||
errMsg: 'Request failed with status code 500',
|
||||
errType: 'SERVER_ERROR',
|
||||
isSilentSearch: 'false',
|
||||
isTimeout: 'false',
|
||||
jsVersion: '0.42.0',
|
||||
language: 'english',
|
||||
os: 'web',
|
||||
osVersion:
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 18_7_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) GSA/388.0.811331708 Mobile/15E148 Safari/604.1',
|
||||
phoneBrand: 'NA',
|
||||
phoneManufacturer: 'NA',
|
||||
phoneModel: 'NA',
|
||||
searchUuid: '68e65d08-fd81-4ded-37d3-2b08d2bc70c3',
|
||||
serverVersion: 'web2.0',
|
||||
state: '17',
|
||||
stateStr: '17',
|
||||
statusCode: '500',
|
||||
type: 'result_event',
|
||||
},
|
||||
referrer: 'https://www.google.com',
|
||||
referrer_name: 'Google',
|
||||
referrer_type: 'search',
|
||||
region: 'Karnataka',
|
||||
sdk_name: 'mixpanel (web)',
|
||||
sdk_version: '1.0.0',
|
||||
session_id: '',
|
||||
});
|
||||
});
|
||||
});
|
||||
452
packages/importer/src/providers/mixpanel.ts
Normal file
452
packages/importer/src/providers/mixpanel.ts
Normal file
@@ -0,0 +1,452 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { isSameDomain, parsePath, toDots } from '@openpanel/common';
|
||||
import { type UserAgentInfo, parseUserAgent } from '@openpanel/common/server';
|
||||
import { getReferrerWithQuery, parseReferrer } from '@openpanel/common/server';
|
||||
import type { IClickhouseEvent } from '@openpanel/db';
|
||||
import type { ILogger } from '@openpanel/logger';
|
||||
import type { IMixpanelImportConfig } from '@openpanel/validation';
|
||||
import { z } from 'zod';
|
||||
import { BaseImportProvider } from '../base-provider';
|
||||
|
||||
export const zMixpanelRawEvent = z.object({
|
||||
event: z.string(),
|
||||
properties: z.record(z.unknown()),
|
||||
});
|
||||
|
||||
export type MixpanelRawEvent = z.infer<typeof zMixpanelRawEvent>;
|
||||
|
||||
export class MixpanelProvider extends BaseImportProvider<MixpanelRawEvent> {
|
||||
provider = 'mixpanel';
|
||||
version = '1.0.0';
|
||||
|
||||
constructor(
|
||||
private readonly projectId: string,
|
||||
private readonly config: IMixpanelImportConfig,
|
||||
private readonly logger?: ILogger,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
async getTotalEventsCount(): Promise<number> {
|
||||
// Mixpanel sucks and dont provide a good way to extract total event count within a period
|
||||
// jql would work but not accurate and will be deprecated end of 2025
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mixpanel doesn't provide session IDs, so we need to generate them in SQL
|
||||
* after all events are imported to ensure deterministic results
|
||||
*/
|
||||
shouldGenerateSessionIds(): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
async *parseSource(
|
||||
overrideFrom?: string,
|
||||
): AsyncGenerator<MixpanelRawEvent, void, unknown> {
|
||||
yield* this.fetchEventsFromMixpanel(overrideFrom);
|
||||
}
|
||||
|
||||
private async *fetchEventsFromMixpanel(
|
||||
overrideFrom?: string,
|
||||
): AsyncGenerator<MixpanelRawEvent, void, unknown> {
|
||||
const { serviceAccount, serviceSecret, projectId, from, to } = this.config;
|
||||
|
||||
// Split the date range into monthly chunks for reliability
|
||||
// Uses base class utility to avoid timeout issues with large date ranges
|
||||
const dateChunks = this.getDateChunks(overrideFrom ?? from, to); // 1 month per chunk
|
||||
|
||||
for (const [chunkFrom, chunkTo] of dateChunks) {
|
||||
yield* this.fetchEventsForDateRange(
|
||||
serviceAccount,
|
||||
serviceSecret,
|
||||
projectId,
|
||||
chunkFrom,
|
||||
chunkTo,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private async *fetchEventsForDateRange(
|
||||
serviceAccount: string,
|
||||
serviceSecret: string,
|
||||
projectId: string,
|
||||
from: string,
|
||||
to: string,
|
||||
): AsyncGenerator<MixpanelRawEvent, void, unknown> {
|
||||
const url = 'https://data.mixpanel.com/api/2.0/export';
|
||||
|
||||
const params = new URLSearchParams({
|
||||
from_date: from,
|
||||
to_date: to,
|
||||
project_id: projectId,
|
||||
});
|
||||
|
||||
this.logger?.info('Fetching events from Mixpanel', {
|
||||
url: `${url}?${params}`,
|
||||
from,
|
||||
to,
|
||||
projectId,
|
||||
serviceAccount,
|
||||
});
|
||||
|
||||
const response = await fetch(`${url}?${params}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Authorization: `Basic ${Buffer.from(`${serviceAccount}:${serviceSecret}`).toString('base64')}`,
|
||||
Accept: 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Failed to fetch events from Mixpanel: ${response.status} ${response.statusText}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error('No response body from Mixpanel API');
|
||||
}
|
||||
|
||||
// Stream the response line by line
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// Process complete lines
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || ''; // Keep the last incomplete line in buffer
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.trim()) {
|
||||
try {
|
||||
const event = JSON.parse(line);
|
||||
yield event;
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse Mixpanel event:', line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process any remaining line in buffer
|
||||
if (buffer.trim()) {
|
||||
try {
|
||||
const event = JSON.parse(buffer);
|
||||
yield event;
|
||||
} catch (error) {
|
||||
console.warn('Failed to parse final Mixpanel event:', buffer);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
validate(rawEvent: MixpanelRawEvent): boolean {
|
||||
const res = zMixpanelRawEvent.safeParse(rawEvent);
|
||||
return res.success;
|
||||
}
|
||||
|
||||
transformEvent(_rawEvent: MixpanelRawEvent): IClickhouseEvent {
|
||||
const projectId = this.projectId;
|
||||
const rawEvent = zMixpanelRawEvent.parse(_rawEvent);
|
||||
const props = rawEvent.properties as Record<string, any>;
|
||||
const deviceId = props.$device_id;
|
||||
const profileId = String(props.$user_id || props.distinct_id).replace(
|
||||
/^\$device:/,
|
||||
'',
|
||||
);
|
||||
|
||||
// Build full URL from current_url and current_url_search (web only)
|
||||
const fullUrl = props.$current_url;
|
||||
let path = '';
|
||||
let origin = '';
|
||||
let hash = '';
|
||||
let query: Record<string, string> = {};
|
||||
|
||||
if (fullUrl) {
|
||||
const parsed = parsePath(fullUrl);
|
||||
path = parsed.path || '';
|
||||
origin = parsed.origin || '';
|
||||
hash = parsed.hash || '';
|
||||
query = parsed.query || {};
|
||||
} else if (this.config.mapScreenViewProperty) {
|
||||
path = props[this.config.mapScreenViewProperty] || '';
|
||||
}
|
||||
|
||||
// Extract referrer information (web only)
|
||||
const referrerUrl = props.$initial_referrer || props.$referrer || '';
|
||||
const referrer =
|
||||
referrerUrl && !isSameDomain(referrerUrl, fullUrl)
|
||||
? parseReferrer(referrerUrl)
|
||||
: null;
|
||||
|
||||
// Check for UTM referrer in query params (web only)
|
||||
const utmReferrer = getReferrerWithQuery(query);
|
||||
|
||||
// Extract location data
|
||||
const country = props.$country || props.mp_country_code || '';
|
||||
const city = props.$city || '';
|
||||
const region = props.$region || '';
|
||||
|
||||
// For web events, use the standard user agent parsing
|
||||
const userAgent = props.osVersion || '';
|
||||
const uaInfo = this.isWebEvent(props.mp_lib)
|
||||
? parseUserAgent(userAgent, props)
|
||||
: this.parseServerDeviceInfo(props);
|
||||
|
||||
// Map event name - $mp_web_page_view should be screen_view
|
||||
let eventName = rawEvent.event;
|
||||
if (eventName === '$mp_web_page_view') {
|
||||
eventName = 'screen_view';
|
||||
}
|
||||
|
||||
// Build properties object - strip Mixpanel-specific properties
|
||||
const properties = this.stripMixpanelProperties(props, query);
|
||||
|
||||
if (props.$insert_id) {
|
||||
properties.__source_insert_id = String(props.$insert_id);
|
||||
}
|
||||
// Add useful properties
|
||||
if (props.$screen_width && props.$screen_height) {
|
||||
properties.__screen = `${props.$screen_width}x${props.$screen_height}`;
|
||||
}
|
||||
if (props.$screen_dpi) {
|
||||
properties.__dpi = props.$screen_dpi;
|
||||
}
|
||||
if (props.$language) {
|
||||
properties.__language = props.$language;
|
||||
}
|
||||
if (props.$timezone) {
|
||||
properties.__timezone = props.$timezone;
|
||||
}
|
||||
if (props.$app_version) {
|
||||
properties.__version = props.$app_version;
|
||||
}
|
||||
if (props.$app_build_number) {
|
||||
properties.__buildNumber = props.$app_build_number;
|
||||
}
|
||||
if (props.$lib_version) {
|
||||
properties.__lib_version = props.$lib_version;
|
||||
}
|
||||
|
||||
if (hash) {
|
||||
properties.__hash = hash;
|
||||
}
|
||||
|
||||
if (Object.keys(query).length > 0) {
|
||||
properties.__query = query;
|
||||
}
|
||||
|
||||
if (props.current_page_title) {
|
||||
properties.__title = props.current_page_title;
|
||||
}
|
||||
|
||||
if (userAgent) {
|
||||
properties.__userAgent = userAgent;
|
||||
}
|
||||
|
||||
// Always use UUID for id to match ClickHouse UUID column
|
||||
const event = {
|
||||
id: randomUUID(),
|
||||
name: eventName,
|
||||
device_id: deviceId,
|
||||
profile_id: profileId,
|
||||
project_id: projectId,
|
||||
session_id: '', // Will be generated in SQL after import
|
||||
properties: toDots(properties), // Flatten nested objects/arrays to Map(String, String)
|
||||
created_at: new Date(props.time * 1000).toISOString(),
|
||||
country,
|
||||
city,
|
||||
region,
|
||||
longitude: null,
|
||||
latitude: null,
|
||||
os: uaInfo.os || props.$os,
|
||||
os_version: uaInfo.osVersion || props.$osVersion,
|
||||
browser: uaInfo.browser || props.$browser,
|
||||
browser_version:
|
||||
uaInfo.browserVersion || props.$browserVersion
|
||||
? String(props.$browser_version)
|
||||
: '',
|
||||
device: this.getDeviceType(props.mp_lib, uaInfo, props),
|
||||
brand: uaInfo.brand || '',
|
||||
model: uaInfo.model || '',
|
||||
duration: 0,
|
||||
path,
|
||||
origin,
|
||||
referrer: referrer?.url || '',
|
||||
referrer_name: utmReferrer?.name || referrer?.name || '',
|
||||
referrer_type: referrer?.type || utmReferrer?.type || '',
|
||||
imported_at: new Date().toISOString(),
|
||||
sdk_name: props.mp_lib
|
||||
? `${this.provider} (${props.mp_lib})`
|
||||
: this.provider,
|
||||
sdk_version: this.version,
|
||||
};
|
||||
|
||||
// TODO: Remove this
|
||||
// Temporary fix for a client
|
||||
const isMightBeScreenView = this.getMightBeScreenView(rawEvent);
|
||||
if (isMightBeScreenView && event.name === 'Loaded a Screen') {
|
||||
event.name = 'screen_view';
|
||||
event.path = isMightBeScreenView;
|
||||
}
|
||||
|
||||
// TODO: Remove this
|
||||
// This is a hack to get utm tags (not sure if this is just the testing project or all mixpanel projects)
|
||||
if (props.utm_source && !properties.__query?.utm_source) {
|
||||
const split = decodeURIComponent(props.utm_source).split('&');
|
||||
const query = Object.fromEntries(split.map((item) => item.split('=')));
|
||||
for (const [key, value] of Object.entries(query)) {
|
||||
if (key && value) {
|
||||
event.properties[`__query.${key}`] = String(value);
|
||||
} else if (
|
||||
value === undefined &&
|
||||
key &&
|
||||
props.utm_source.startsWith(key)
|
||||
) {
|
||||
event.properties['__query.utm_source'] = String(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
private getDeviceType(
|
||||
mp_lib: string,
|
||||
uaInfo: UserAgentInfo,
|
||||
props: Record<string, any>,
|
||||
) {
|
||||
// Normalize lib/os/browser data
|
||||
const lib = (mp_lib || '').toLowerCase();
|
||||
const os = String(props.$os || uaInfo.os || '').toLowerCase();
|
||||
const browser = String(
|
||||
props.$browser || uaInfo.browser || '',
|
||||
).toLowerCase();
|
||||
|
||||
const isTabletOs = os === 'ipados' || os === 'ipad os' || os === 'ipad';
|
||||
|
||||
// Strong hint from SDK library
|
||||
if (['android', 'iphone', 'react-native', 'swift', 'unity'].includes(lib)) {
|
||||
return isTabletOs ? 'tablet' : 'mobile';
|
||||
}
|
||||
|
||||
// Web or unknown SDKs: infer from OS/Browser
|
||||
const isMobileSignal =
|
||||
os === 'ios' ||
|
||||
os === 'android' ||
|
||||
browser.includes('mobile safari') ||
|
||||
browser.includes('chrome ios') ||
|
||||
browser.includes('android mobile') ||
|
||||
browser.includes('samsung internet') ||
|
||||
browser.includes('mobile');
|
||||
|
||||
if (isMobileSignal) {
|
||||
return 'mobile';
|
||||
}
|
||||
|
||||
const isTabletSignal =
|
||||
isTabletOs ||
|
||||
browser.includes('tablet') ||
|
||||
// iPad often reports as Mac OS X with Mobile Safari
|
||||
(browser.includes('mobile safari') &&
|
||||
(os === 'mac os x' || os === 'macos'));
|
||||
|
||||
if (isTabletSignal) {
|
||||
return 'tablet';
|
||||
}
|
||||
|
||||
// Default to desktop
|
||||
return this.isServerEvent(mp_lib) ? 'server' : 'desktop';
|
||||
}
|
||||
|
||||
private isWebEvent(mp_lib: string) {
|
||||
return [
|
||||
'web',
|
||||
'android',
|
||||
'iphone',
|
||||
'swift',
|
||||
'unity',
|
||||
'react-native',
|
||||
].includes(mp_lib);
|
||||
}
|
||||
|
||||
private isServerEvent(mp_lib: string) {
|
||||
return !this.isWebEvent(mp_lib);
|
||||
}
|
||||
|
||||
private getMightBeScreenView(rawEvent: MixpanelRawEvent) {
|
||||
const props = rawEvent.properties as Record<string, any>;
|
||||
return Object.keys(props).find((key) => key.match(/^[A-Z1-9_]+$/));
|
||||
}
|
||||
|
||||
private parseServerDeviceInfo(props: Record<string, any>): UserAgentInfo {
|
||||
// For mobile events, extract device information from Mixpanel properties
|
||||
const os = props.$os || props.os || '';
|
||||
const osVersion = props.$os_version || props.osVersion || '';
|
||||
const brand = props.$brand || props.phoneBrand || '';
|
||||
const model = props.$model || props.phoneModel || '';
|
||||
const device = os.toLowerCase();
|
||||
|
||||
return {
|
||||
isServer: true,
|
||||
os: os,
|
||||
osVersion: osVersion,
|
||||
browser: '',
|
||||
browserVersion: '',
|
||||
device: device,
|
||||
brand: brand,
|
||||
model: model,
|
||||
};
|
||||
}
|
||||
|
||||
private stripMixpanelProperties(
|
||||
properties: Record<string, any>,
|
||||
searchParams: Record<string, string>,
|
||||
): Record<string, any> {
|
||||
const strip = [
|
||||
'time',
|
||||
'distinct_id',
|
||||
'current_page_title',
|
||||
'current_url_path',
|
||||
'current_url_protocol',
|
||||
'current_url_search',
|
||||
'current_domain',
|
||||
...Object.keys(searchParams),
|
||||
];
|
||||
const filtered = Object.fromEntries(
|
||||
Object.entries(properties).filter(
|
||||
([key]) => !key.match(/^(\$|mp_|utm_)/) && !strip.includes(key),
|
||||
),
|
||||
);
|
||||
|
||||
// Parse JSON strings back to objects/arrays so toDots() can flatten them
|
||||
const parsed: Record<string, any> = {};
|
||||
for (const [key, value] of Object.entries(filtered)) {
|
||||
if (
|
||||
typeof value === 'string' &&
|
||||
(value.startsWith('{') || value.startsWith('['))
|
||||
) {
|
||||
try {
|
||||
parsed[key] = JSON.parse(value);
|
||||
} catch {
|
||||
parsed[key] = value; // Keep as string if parsing fails
|
||||
}
|
||||
} else {
|
||||
parsed[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
382
packages/importer/src/providers/umami.ts
Normal file
382
packages/importer/src/providers/umami.ts
Normal file
@@ -0,0 +1,382 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { Readable } from 'node:stream';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createBrotliDecompress, createGunzip } from 'node:zlib';
|
||||
import { isSameDomain, parsePath } from '@openpanel/common';
|
||||
import { generateDeviceId } from '@openpanel/common/server';
|
||||
import { getReferrerWithQuery, parseReferrer } from '@openpanel/common/server';
|
||||
import type { IClickhouseEvent } from '@openpanel/db';
|
||||
import type { ILogger } from '@openpanel/logger';
|
||||
import type { IUmamiImportConfig } from '@openpanel/validation';
|
||||
import { parse } from 'csv-parse';
|
||||
import { assocPath } from 'ramda';
|
||||
import { z } from 'zod';
|
||||
import { BaseImportProvider } from '../base-provider';
|
||||
|
||||
export const zUmamiRawEvent = z.object({
|
||||
// Required fields
|
||||
event_type: z.coerce.number(),
|
||||
event_name: z.string(),
|
||||
created_at: z.coerce.date(),
|
||||
event_id: z.string().min(1),
|
||||
session_id: z.string().min(1),
|
||||
website_id: z.string().min(1),
|
||||
|
||||
// Optional fields that might be empty
|
||||
visit_id: z.string().optional(),
|
||||
distinct_id: z.string().optional(),
|
||||
url_path: z.string().optional(),
|
||||
hostname: z.string().optional(),
|
||||
referrer_domain: z.string().optional(),
|
||||
referrer_path: z.string().optional(),
|
||||
referrer_query: z.string().optional(),
|
||||
referrer_name: z.string().optional(),
|
||||
referrer_type: z.string().optional(),
|
||||
country: z.string().optional(),
|
||||
city: z.string().optional(),
|
||||
region: z.string().optional(),
|
||||
browser: z.string().optional(),
|
||||
os: z.string().optional(),
|
||||
device: z.string().optional(),
|
||||
screen: z.string().optional(),
|
||||
language: z.string().optional(),
|
||||
utm_source: z.string().optional(),
|
||||
utm_medium: z.string().optional(),
|
||||
utm_campaign: z.string().optional(),
|
||||
utm_content: z.string().optional(),
|
||||
utm_term: z.string().optional(),
|
||||
page_title: z.string().optional(),
|
||||
gclid: z.string().optional(),
|
||||
fbclid: z.string().optional(),
|
||||
msclkid: z.string().optional(),
|
||||
ttclid: z.string().optional(),
|
||||
li_fat_id: z.string().optional(),
|
||||
twclid: z.string().optional(),
|
||||
url_query: z.string().optional(),
|
||||
});
|
||||
export type UmamiRawEvent = z.infer<typeof zUmamiRawEvent>;
|
||||
|
||||
export class UmamiProvider extends BaseImportProvider<UmamiRawEvent> {
|
||||
provider = 'umami';
|
||||
version = '1.0.0';
|
||||
|
||||
constructor(
|
||||
private readonly projectId: string,
|
||||
private readonly config: IUmamiImportConfig,
|
||||
private readonly logger?: ILogger,
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
async getTotalEventsCount(): Promise<number> {
|
||||
return -1;
|
||||
}
|
||||
|
||||
async *parseSource(): AsyncGenerator<UmamiRawEvent, void, unknown> {
|
||||
yield* this.parseRemoteFile(this.config.fileUrl);
|
||||
}
|
||||
|
||||
private async *parseRemoteFile(
|
||||
url: string,
|
||||
opts: {
|
||||
signal?: AbortSignal;
|
||||
maxBytes?: number;
|
||||
maxRows?: number;
|
||||
} = {},
|
||||
): AsyncGenerator<UmamiRawEvent, void, unknown> {
|
||||
const { signal, maxBytes, maxRows } = opts;
|
||||
const controller = new AbortController();
|
||||
|
||||
// Link to caller's signal for cancellation
|
||||
if (signal) {
|
||||
signal.addEventListener('abort', () => controller.abort(), {
|
||||
once: true,
|
||||
});
|
||||
}
|
||||
|
||||
const res = await fetch(url, { signal: controller.signal });
|
||||
if (!res.ok || !res.body) {
|
||||
throw new Error(
|
||||
`Failed to fetch remote file: ${res.status} ${res.statusText}`,
|
||||
);
|
||||
}
|
||||
|
||||
const contentType = res.headers.get('content-type') || '';
|
||||
const contentEnc = res.headers.get('content-encoding') || '';
|
||||
const contentLen = Number(res.headers.get('content-length') ?? 0);
|
||||
|
||||
if (
|
||||
contentType &&
|
||||
!/text\/csv|text\/plain|application\/gzip|application\/octet-stream/i.test(
|
||||
contentType,
|
||||
)
|
||||
) {
|
||||
console.warn(`Warning: Content-Type is ${contentType}, expected CSV-ish`);
|
||||
}
|
||||
|
||||
if (maxBytes && contentLen && contentLen > maxBytes) {
|
||||
throw new Error(
|
||||
`Remote file exceeds size limit (${contentLen} > ${maxBytes})`,
|
||||
);
|
||||
}
|
||||
|
||||
const looksGzip =
|
||||
/\.gz($|\?)/i.test(url) ||
|
||||
/gzip/i.test(contentEnc) ||
|
||||
/application\/gzip/i.test(contentType);
|
||||
const looksBr = /br/i.test(contentEnc) || /\.br($|\?)/i.test(url);
|
||||
|
||||
// WHATWG -> Node stream
|
||||
const body = Readable.fromWeb(res.body as any);
|
||||
|
||||
// Optional size guard during stream
|
||||
let seenBytes = 0;
|
||||
if (maxBytes) {
|
||||
body.on('data', (chunk: Buffer) => {
|
||||
seenBytes += chunk.length;
|
||||
if (seenBytes > maxBytes) {
|
||||
controller.abort();
|
||||
body.destroy(
|
||||
new Error(
|
||||
`Stream exceeded size limit (${seenBytes} > ${maxBytes})`,
|
||||
),
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Build decode chain (gzip/brotli -> CSV parser)
|
||||
const decompress = looksGzip
|
||||
? createGunzip()
|
||||
: looksBr
|
||||
? createBrotliDecompress()
|
||||
: null;
|
||||
|
||||
const parser = parse({
|
||||
columns: true, // objects per row
|
||||
bom: true, // handle UTF-8 BOM
|
||||
relax_column_count: true,
|
||||
skip_empty_lines: true,
|
||||
});
|
||||
|
||||
// Wire the pipeline for proper backpressure & error propagation
|
||||
(async () => {
|
||||
try {
|
||||
if (decompress) {
|
||||
await pipeline(body, decompress, parser, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
} else {
|
||||
await pipeline(body, parser, { signal: controller.signal });
|
||||
}
|
||||
} catch (e) {
|
||||
parser.destroy(e as Error);
|
||||
}
|
||||
})().catch(() => {
|
||||
/* handled by iterator */
|
||||
});
|
||||
|
||||
let rows = 0;
|
||||
try {
|
||||
for await (const record of parser) {
|
||||
rows++;
|
||||
if (maxRows && rows > maxRows) {
|
||||
controller.abort();
|
||||
throw new Error(`Row limit exceeded (${rows} > ${maxRows})`);
|
||||
}
|
||||
yield record as UmamiRawEvent;
|
||||
}
|
||||
} catch (err) {
|
||||
throw new Error(
|
||||
`Failed to parse remote file from ${url}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
} finally {
|
||||
controller.abort(); // ensure fetch stream is torn down
|
||||
}
|
||||
}
|
||||
|
||||
validate(rawEvent: UmamiRawEvent): boolean {
|
||||
const res = zUmamiRawEvent.safeParse(rawEvent);
|
||||
return res.success;
|
||||
}
|
||||
|
||||
transformEvent(_rawEvent: UmamiRawEvent): IClickhouseEvent {
|
||||
const projectId =
|
||||
this.config.projectMapper.find(
|
||||
(mapper) => mapper.from === _rawEvent.website_id,
|
||||
)?.to || this.projectId;
|
||||
|
||||
const rawEvent = zUmamiRawEvent.parse(_rawEvent);
|
||||
// Extract device/profile ID - use visit_id as device_id, session_id for session tracking
|
||||
const deviceId =
|
||||
rawEvent.visit_id ||
|
||||
generateDeviceId({
|
||||
ip: rawEvent.visit_id!,
|
||||
ua: rawEvent.visit_id!,
|
||||
origin: projectId,
|
||||
salt: 'xxx',
|
||||
});
|
||||
const profileId = rawEvent.distinct_id || deviceId;
|
||||
|
||||
// Parse URL if available - use same logic as real-time events
|
||||
const url = rawEvent.url_path
|
||||
? `https://${[rawEvent.hostname, rawEvent.url_path, rawEvent.url_query]
|
||||
.filter(Boolean)
|
||||
.join('')}`
|
||||
: '';
|
||||
const { path, hash, query, origin } = parsePath(url);
|
||||
// Extract referrer information - use same logic as real-time events
|
||||
const referrerUrl = rawEvent.referrer_domain
|
||||
? `https://${rawEvent.referrer_domain}${rawEvent.referrer_path || ''}`
|
||||
: '';
|
||||
|
||||
// Check if referrer is from same domain (like real-time events do)
|
||||
const referrer = isSameDomain(referrerUrl, url)
|
||||
? null
|
||||
: parseReferrer(referrerUrl);
|
||||
|
||||
// Check for UTM referrer in query params (like real-time events do)
|
||||
const utmReferrer = getReferrerWithQuery(query);
|
||||
|
||||
// Extract location data
|
||||
const country = rawEvent.country || '';
|
||||
const city = rawEvent.city || '';
|
||||
const region = rawEvent.region || '';
|
||||
|
||||
// Extract browser/device info
|
||||
const browser = rawEvent.browser || '';
|
||||
const browserVersion = ''; // Not available in Umami CSV
|
||||
const os = rawEvent.os || '';
|
||||
const osVersion = ''; // Not available in Umami CSV
|
||||
const device = rawEvent.device || '';
|
||||
const brand = ''; // Not available in Umami CSV
|
||||
const model = ''; // Not available in Umami CSV
|
||||
|
||||
let properties: Record<string, any> = {};
|
||||
|
||||
if (query) {
|
||||
properties.__query = query;
|
||||
}
|
||||
|
||||
// Add useful properties from Umami data
|
||||
if (rawEvent.page_title) properties.__title = rawEvent.page_title;
|
||||
if (rawEvent.screen) properties.__screen = rawEvent.screen;
|
||||
if (rawEvent.language) properties.__language = rawEvent.language;
|
||||
if (rawEvent.utm_source)
|
||||
properties = assocPath(
|
||||
['__query', 'utm_source'],
|
||||
rawEvent.utm_source,
|
||||
properties,
|
||||
);
|
||||
if (rawEvent.utm_medium)
|
||||
properties = assocPath(
|
||||
['__query', 'utm_medium'],
|
||||
rawEvent.utm_medium,
|
||||
properties,
|
||||
);
|
||||
if (rawEvent.utm_campaign)
|
||||
properties = assocPath(
|
||||
['__query', 'utm_campaign'],
|
||||
rawEvent.utm_campaign,
|
||||
properties,
|
||||
);
|
||||
if (rawEvent.utm_content)
|
||||
properties = assocPath(
|
||||
['__query', 'utm_content'],
|
||||
rawEvent.utm_content,
|
||||
properties,
|
||||
);
|
||||
if (rawEvent.utm_term)
|
||||
properties = assocPath(
|
||||
['__query', 'utm_term'],
|
||||
rawEvent.utm_term,
|
||||
properties,
|
||||
);
|
||||
|
||||
return {
|
||||
id: rawEvent.event_id || randomUUID(),
|
||||
name: rawEvent.event_type === 1 ? 'screen_view' : rawEvent.event_name,
|
||||
device_id: deviceId,
|
||||
profile_id: profileId,
|
||||
project_id: projectId,
|
||||
session_id: rawEvent.session_id || '',
|
||||
properties,
|
||||
created_at: rawEvent.created_at.toISOString(),
|
||||
country,
|
||||
city,
|
||||
region: this.mapRegion(region),
|
||||
longitude: null,
|
||||
latitude: null,
|
||||
os,
|
||||
os_version: osVersion,
|
||||
browser: this.mapBrowser(browser),
|
||||
browser_version: browserVersion,
|
||||
device: this.mapDevice(device),
|
||||
brand,
|
||||
model,
|
||||
duration: 0,
|
||||
path,
|
||||
origin,
|
||||
referrer: utmReferrer?.url || referrer?.url || '',
|
||||
referrer_name: utmReferrer?.name || referrer?.name || '',
|
||||
referrer_type: utmReferrer?.type || referrer?.type || '',
|
||||
imported_at: new Date().toISOString(),
|
||||
sdk_name: this.provider,
|
||||
sdk_version: this.version,
|
||||
};
|
||||
}
|
||||
|
||||
mapRegion(region: string): string {
|
||||
return region.replace(/^[A-Z]{2}\-/, '');
|
||||
}
|
||||
|
||||
mapDevice(device: string): string {
|
||||
const mapping: Record<string, string> = {
|
||||
desktop: 'desktop',
|
||||
laptop: 'desktop',
|
||||
mobile: 'mobile',
|
||||
tablet: 'tablet',
|
||||
smarttv: 'smarttv',
|
||||
Unknown: 'desktop',
|
||||
};
|
||||
|
||||
return mapping[device] || 'desktop';
|
||||
}
|
||||
|
||||
mapBrowser(browser: string): string {
|
||||
const mapping: Record<string, string> = {
|
||||
android: 'Android',
|
||||
aol: 'AOL',
|
||||
bb10: 'BlackBerry 10',
|
||||
beaker: 'Beaker',
|
||||
chrome: 'Chrome',
|
||||
'chromium-webview': 'Chrome (webview)',
|
||||
crios: 'Chrome (iOS)',
|
||||
curl: 'Curl',
|
||||
edge: 'Edge',
|
||||
'edge-chromium': 'Edge (Chromium)',
|
||||
'edge-ios': 'Edge (iOS)',
|
||||
facebook: 'Facebook',
|
||||
firefox: 'Firefox',
|
||||
fxios: 'Firefox (iOS)',
|
||||
ie: 'IE',
|
||||
instagram: 'Instagram',
|
||||
ios: 'iOS',
|
||||
'ios-webview': 'iOS (webview)',
|
||||
kakaotalk: 'KakaoTalk',
|
||||
miui: 'MIUI',
|
||||
opera: 'Opera',
|
||||
'opera-mini': 'Opera Mini',
|
||||
phantomjs: 'PhantomJS',
|
||||
safari: 'Safari',
|
||||
samsung: 'Samsung',
|
||||
searchbot: 'Searchbot',
|
||||
silk: 'Silk',
|
||||
yandexbrowser: 'Yandex',
|
||||
};
|
||||
|
||||
return mapping[browser] || browser || 'Unknown';
|
||||
}
|
||||
}
|
||||
80
packages/importer/src/types.ts
Normal file
80
packages/importer/src/types.ts
Normal file
@@ -0,0 +1,80 @@
|
||||
import type {
|
||||
IImportedEvent,
|
||||
IServiceCreateEventPayload,
|
||||
IServiceImportedEventPayload,
|
||||
} from '@openpanel/db';
|
||||
|
||||
export interface ImportConfig {
|
||||
projectId: string;
|
||||
provider: string;
|
||||
sourceType: 'file' | 'api';
|
||||
sourceLocation: string;
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
id: string;
|
||||
lastTimestamp: number;
|
||||
lastEvent: IServiceImportedEventPayload;
|
||||
}
|
||||
|
||||
export interface ImportProgress {
|
||||
totalEvents: number;
|
||||
processedEvents: number;
|
||||
currentBatch: number;
|
||||
totalBatches: number;
|
||||
}
|
||||
|
||||
export interface ImportResult {
|
||||
success: boolean;
|
||||
totalEvents: number;
|
||||
processedEvents: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface BatchResult {
|
||||
events: IServiceImportedEventPayload[];
|
||||
sessionEvents: IServiceImportedEventPayload[];
|
||||
}
|
||||
|
||||
// Generic types for raw events from different providers
|
||||
export interface BaseRawEvent {
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
// Error context for better error handling
|
||||
export interface ErrorContext {
|
||||
batchNumber?: number;
|
||||
batchSize?: number;
|
||||
eventIndex?: number;
|
||||
rawEvent?: BaseRawEvent;
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
// Properties type for events - more specific than Record<string, any>
|
||||
export interface EventProperties {
|
||||
[key: string]:
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| null
|
||||
| undefined
|
||||
| Record<string, unknown>;
|
||||
__query?: Record<string, unknown>;
|
||||
__title?: string;
|
||||
__screen?: string;
|
||||
__language?: string;
|
||||
}
|
||||
|
||||
// Import job metadata for tracking import progress
|
||||
export interface ImportJobMetadata {
|
||||
importId: string;
|
||||
importStatus: 'pending' | 'processing' | 'processed' | 'failed';
|
||||
importedAt: Date;
|
||||
}
|
||||
|
||||
// Result of import staging operations
|
||||
export interface ImportStageResult {
|
||||
importId: string;
|
||||
totalEvents: number;
|
||||
insertedEvents: number;
|
||||
}
|
||||
Reference in New Issue
Block a user