update importer script

This commit is contained in:
Carl-Gerhard Lindesvärd
2024-07-23 22:58:00 +02:00
parent d4bbad6eb9
commit 65c464a63c
2 changed files with 121 additions and 22 deletions

View File

@@ -1,15 +1,16 @@
import { randomUUID } from 'crypto';
import fs from 'fs';
import readline from 'readline';
import { glob } from 'glob';
import zlib from 'zlib';
import Progress from 'progress';
import { assocPath, prop, uniqBy } from 'ramda';
import { parsePath } from '@openpanel/common';
import { isSameDomain, parsePath } from '@openpanel/common';
import type { IImportedEvent } from '@openpanel/db';
const BATCH_SIZE = 1000;
const BATCH_SIZE = 30_000;
const SLEEP_TIME = 20;
const MAX_CONCURRENT_REQUESTS = 8;
type IMixpanelEvent = {
event: string;
@@ -179,6 +180,21 @@ function generateSessionEvents(events: IImportedEvent[]): Session[] {
}
function createEventObject(event: IMixpanelEvent): IImportedEvent {
const getReferrer = (referrer: string | undefined) => {
if (!referrer) {
return '';
}
if (referrer === '$direct') {
return '';
}
if (isSameDomain(referrer, event.properties.$current_url)) {
return '';
}
return referrer;
};
const url = parsePath(event.properties.$current_url);
return {
profile_id: event.properties.distinct_id
@@ -203,7 +219,7 @@ function createEventObject(event: IMixpanelEvent): IImportedEvent {
browser_version: event.properties.$browser_version
? String(event.properties.$browser_version)
: '',
referrer: event.properties.$initial_referrer ?? '',
referrer: getReferrer(event.properties.$initial_referrer),
referrer_type: event.properties.$search_engine ? 'search' : '',
referrer_name: event.properties.$search_engine ?? '',
device_id: event.properties.$device_id ?? '',
@@ -295,17 +311,33 @@ function processEvents(events: IImportedEvent[]): IImportedEvent[] {
];
}
async function sendBatchToAPI(batch: IImportedEvent[]) {
async function sendBatchToAPI(
batch: IImportedEvent[],
{
apiUrl,
clientId,
clientSecret,
}: {
apiUrl: string;
clientId: string;
clientSecret: string;
}
) {
try {
const res = await fetch('http://localhost:3333/import/events', {
const res = await fetch(`${apiUrl}/import/events`, {
method: 'POST',
headers: {
'Content-Encoding': 'gzip',
'Content-Type': 'application/json',
'openpanel-client-id': 'dd3db204-dcf6-49e2-9e82-de01cba7e585',
'openpanel-client-secret': 'sec_293b903816e327e10c9d',
'openpanel-client-id': clientId,
'openpanel-client-secret': clientSecret,
},
body: JSON.stringify(batch),
body: zlib.gzipSync(JSON.stringify(batch)),
});
if (!res.ok) {
console.log('Failed to send batch to API');
console.log(await res.text());
}
await new Promise((resolve) => setTimeout(resolve, SLEEP_TIME));
} catch (e) {
console.log('sendBatchToAPI failed');
@@ -313,7 +345,17 @@ async function sendBatchToAPI(batch: IImportedEvent[]) {
}
}
async function processFiles(files: string[]) {
async function processFiles({
files,
apiUrl,
clientId,
clientSecret,
}: {
files: string[];
apiUrl: string;
clientId: string;
clientSecret: string;
}) {
const progress = new Progress(
'Processing (:current/:total) :file [:bar] :percent | :savedEvents saved events | :status',
{
@@ -347,34 +389,57 @@ async function processFiles(files: string[]) {
currentBatch = [];
}
if (apiBatching.length >= 10) {
await Promise.all(apiBatching.map(sendBatchToAPI));
if (apiBatching.length >= MAX_CONCURRENT_REQUESTS) {
await Promise.all(
apiBatching.map((batch) =>
sendBatchToAPI(batch, {
apiUrl,
clientId,
clientSecret,
})
)
);
apiBatching = [];
}
}
}
if (currentBatch.length > 0) {
await sendBatchToAPI(currentBatch);
await sendBatchToAPI(currentBatch, {
apiUrl,
clientId,
clientSecret,
});
savedEvents += currentBatch.length;
progress.render({ file: 'Complete', savedEvents, status: 'Complete' });
}
}
export async function importFiles(matcher: string) {
const files = await glob([matcher], { root: '/' });
export async function importFiles({
files,
apiUrl,
clientId,
clientSecret,
}: {
files: string[];
apiUrl: string;
clientId: string;
clientSecret: string;
}) {
if (files.length === 0) {
console.log('No files found');
return;
}
files.sort((a, b) => a.localeCompare(b));
console.log(`Found ${files.length} files to process`);
const startTime = Date.now();
await processFiles(files);
await processFiles({
files,
apiUrl,
clientId,
clientSecret,
});
const endTime = Date.now();
console.log(

View File

@@ -1,12 +1,19 @@
import path from 'path';
import arg from 'arg';
import { glob } from 'glob';
import { importFiles } from './importer';
export default function importer() {
export default async function importer() {
const args = arg(
{
'--glob': String,
'--api-url': String,
'--client-id': String,
'--client-secret': String,
'--dry-run': Boolean,
'--from': Number,
'--to': Number,
},
{
permissive: true,
@@ -17,9 +24,36 @@ export default function importer() {
throw new Error('Missing --glob argument');
}
if (!args['--client-id']) {
throw new Error('Missing --client-id argument');
}
if (!args['--client-secret']) {
throw new Error('Missing --client-secret argument');
}
const cwd = process.cwd();
const filePath = path.resolve(cwd, args['--glob']);
const fileMatcher = path.resolve(cwd, args['--glob']);
const allFiles = await glob([fileMatcher], { root: '/' });
allFiles.sort((a, b) => a.localeCompare(b));
return importFiles(filePath);
const files = allFiles.slice(
args['--from'] ?? 0,
args['--to'] ?? Number.MAX_SAFE_INTEGER
);
if (args['--dry-run']) {
files.forEach((file, index) => {
console.log(`Would import (index: ${index}): ${file}`);
});
return;
}
return importFiles({
files,
clientId: args['--client-id'],
clientSecret: args['--client-secret'],
apiUrl: args['--api-url'] ?? 'https://api.openpanel.dev',
});
}