update importer script
This commit is contained in:
@@ -1,15 +1,16 @@
|
|||||||
import { randomUUID } from 'crypto';
|
import { randomUUID } from 'crypto';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import readline from 'readline';
|
import readline from 'readline';
|
||||||
import { glob } from 'glob';
|
import zlib from 'zlib';
|
||||||
import Progress from 'progress';
|
import Progress from 'progress';
|
||||||
import { assocPath, prop, uniqBy } from 'ramda';
|
import { assocPath, prop, uniqBy } from 'ramda';
|
||||||
|
|
||||||
import { parsePath } from '@openpanel/common';
|
import { isSameDomain, parsePath } from '@openpanel/common';
|
||||||
import type { IImportedEvent } from '@openpanel/db';
|
import type { IImportedEvent } from '@openpanel/db';
|
||||||
|
|
||||||
const BATCH_SIZE = 1000;
|
const BATCH_SIZE = 30_000;
|
||||||
const SLEEP_TIME = 20;
|
const SLEEP_TIME = 20;
|
||||||
|
const MAX_CONCURRENT_REQUESTS = 8;
|
||||||
|
|
||||||
type IMixpanelEvent = {
|
type IMixpanelEvent = {
|
||||||
event: string;
|
event: string;
|
||||||
@@ -179,6 +180,21 @@ function generateSessionEvents(events: IImportedEvent[]): Session[] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function createEventObject(event: IMixpanelEvent): IImportedEvent {
|
function createEventObject(event: IMixpanelEvent): IImportedEvent {
|
||||||
|
const getReferrer = (referrer: string | undefined) => {
|
||||||
|
if (!referrer) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (referrer === '$direct') {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isSameDomain(referrer, event.properties.$current_url)) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
return referrer;
|
||||||
|
};
|
||||||
const url = parsePath(event.properties.$current_url);
|
const url = parsePath(event.properties.$current_url);
|
||||||
return {
|
return {
|
||||||
profile_id: event.properties.distinct_id
|
profile_id: event.properties.distinct_id
|
||||||
@@ -203,7 +219,7 @@ function createEventObject(event: IMixpanelEvent): IImportedEvent {
|
|||||||
browser_version: event.properties.$browser_version
|
browser_version: event.properties.$browser_version
|
||||||
? String(event.properties.$browser_version)
|
? String(event.properties.$browser_version)
|
||||||
: '',
|
: '',
|
||||||
referrer: event.properties.$initial_referrer ?? '',
|
referrer: getReferrer(event.properties.$initial_referrer),
|
||||||
referrer_type: event.properties.$search_engine ? 'search' : '',
|
referrer_type: event.properties.$search_engine ? 'search' : '',
|
||||||
referrer_name: event.properties.$search_engine ?? '',
|
referrer_name: event.properties.$search_engine ?? '',
|
||||||
device_id: event.properties.$device_id ?? '',
|
device_id: event.properties.$device_id ?? '',
|
||||||
@@ -295,17 +311,33 @@ function processEvents(events: IImportedEvent[]): IImportedEvent[] {
|
|||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
|
||||||
async function sendBatchToAPI(batch: IImportedEvent[]) {
|
async function sendBatchToAPI(
|
||||||
|
batch: IImportedEvent[],
|
||||||
|
{
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
}: {
|
||||||
|
apiUrl: string;
|
||||||
|
clientId: string;
|
||||||
|
clientSecret: string;
|
||||||
|
}
|
||||||
|
) {
|
||||||
try {
|
try {
|
||||||
const res = await fetch('http://localhost:3333/import/events', {
|
const res = await fetch(`${apiUrl}/import/events`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
|
'Content-Encoding': 'gzip',
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'openpanel-client-id': 'dd3db204-dcf6-49e2-9e82-de01cba7e585',
|
'openpanel-client-id': clientId,
|
||||||
'openpanel-client-secret': 'sec_293b903816e327e10c9d',
|
'openpanel-client-secret': clientSecret,
|
||||||
},
|
},
|
||||||
body: JSON.stringify(batch),
|
body: zlib.gzipSync(JSON.stringify(batch)),
|
||||||
});
|
});
|
||||||
|
if (!res.ok) {
|
||||||
|
console.log('Failed to send batch to API');
|
||||||
|
console.log(await res.text());
|
||||||
|
}
|
||||||
await new Promise((resolve) => setTimeout(resolve, SLEEP_TIME));
|
await new Promise((resolve) => setTimeout(resolve, SLEEP_TIME));
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('sendBatchToAPI failed');
|
console.log('sendBatchToAPI failed');
|
||||||
@@ -313,7 +345,17 @@ async function sendBatchToAPI(batch: IImportedEvent[]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function processFiles(files: string[]) {
|
async function processFiles({
|
||||||
|
files,
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
}: {
|
||||||
|
files: string[];
|
||||||
|
apiUrl: string;
|
||||||
|
clientId: string;
|
||||||
|
clientSecret: string;
|
||||||
|
}) {
|
||||||
const progress = new Progress(
|
const progress = new Progress(
|
||||||
'Processing (:current/:total) :file [:bar] :percent | :savedEvents saved events | :status',
|
'Processing (:current/:total) :file [:bar] :percent | :savedEvents saved events | :status',
|
||||||
{
|
{
|
||||||
@@ -347,34 +389,57 @@ async function processFiles(files: string[]) {
|
|||||||
currentBatch = [];
|
currentBatch = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (apiBatching.length >= 10) {
|
if (apiBatching.length >= MAX_CONCURRENT_REQUESTS) {
|
||||||
await Promise.all(apiBatching.map(sendBatchToAPI));
|
await Promise.all(
|
||||||
|
apiBatching.map((batch) =>
|
||||||
|
sendBatchToAPI(batch, {
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
})
|
||||||
|
)
|
||||||
|
);
|
||||||
apiBatching = [];
|
apiBatching = [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (currentBatch.length > 0) {
|
if (currentBatch.length > 0) {
|
||||||
await sendBatchToAPI(currentBatch);
|
await sendBatchToAPI(currentBatch, {
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
});
|
||||||
savedEvents += currentBatch.length;
|
savedEvents += currentBatch.length;
|
||||||
progress.render({ file: 'Complete', savedEvents, status: 'Complete' });
|
progress.render({ file: 'Complete', savedEvents, status: 'Complete' });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function importFiles(matcher: string) {
|
export async function importFiles({
|
||||||
const files = await glob([matcher], { root: '/' });
|
files,
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
}: {
|
||||||
|
files: string[];
|
||||||
|
apiUrl: string;
|
||||||
|
clientId: string;
|
||||||
|
clientSecret: string;
|
||||||
|
}) {
|
||||||
if (files.length === 0) {
|
if (files.length === 0) {
|
||||||
console.log('No files found');
|
console.log('No files found');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
files.sort((a, b) => a.localeCompare(b));
|
|
||||||
|
|
||||||
console.log(`Found ${files.length} files to process`);
|
console.log(`Found ${files.length} files to process`);
|
||||||
|
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
await processFiles(files);
|
await processFiles({
|
||||||
|
files,
|
||||||
|
apiUrl,
|
||||||
|
clientId,
|
||||||
|
clientSecret,
|
||||||
|
});
|
||||||
const endTime = Date.now();
|
const endTime = Date.now();
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
|
|||||||
@@ -1,12 +1,19 @@
|
|||||||
import path from 'path';
|
import path from 'path';
|
||||||
import arg from 'arg';
|
import arg from 'arg';
|
||||||
|
import { glob } from 'glob';
|
||||||
|
|
||||||
import { importFiles } from './importer';
|
import { importFiles } from './importer';
|
||||||
|
|
||||||
export default function importer() {
|
export default async function importer() {
|
||||||
const args = arg(
|
const args = arg(
|
||||||
{
|
{
|
||||||
'--glob': String,
|
'--glob': String,
|
||||||
|
'--api-url': String,
|
||||||
|
'--client-id': String,
|
||||||
|
'--client-secret': String,
|
||||||
|
'--dry-run': Boolean,
|
||||||
|
'--from': Number,
|
||||||
|
'--to': Number,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
permissive: true,
|
permissive: true,
|
||||||
@@ -17,9 +24,36 @@ export default function importer() {
|
|||||||
throw new Error('Missing --glob argument');
|
throw new Error('Missing --glob argument');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!args['--client-id']) {
|
||||||
|
throw new Error('Missing --client-id argument');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!args['--client-secret']) {
|
||||||
|
throw new Error('Missing --client-secret argument');
|
||||||
|
}
|
||||||
|
|
||||||
const cwd = process.cwd();
|
const cwd = process.cwd();
|
||||||
|
|
||||||
const filePath = path.resolve(cwd, args['--glob']);
|
const fileMatcher = path.resolve(cwd, args['--glob']);
|
||||||
|
const allFiles = await glob([fileMatcher], { root: '/' });
|
||||||
|
allFiles.sort((a, b) => a.localeCompare(b));
|
||||||
|
|
||||||
return importFiles(filePath);
|
const files = allFiles.slice(
|
||||||
|
args['--from'] ?? 0,
|
||||||
|
args['--to'] ?? Number.MAX_SAFE_INTEGER
|
||||||
|
);
|
||||||
|
|
||||||
|
if (args['--dry-run']) {
|
||||||
|
files.forEach((file, index) => {
|
||||||
|
console.log(`Would import (index: ${index}): ${file}`);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
return importFiles({
|
||||||
|
files,
|
||||||
|
clientId: args['--client-id'],
|
||||||
|
clientSecret: args['--client-secret'],
|
||||||
|
apiUrl: args['--api-url'] ?? 'https://api.openpanel.dev',
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user