feat: new importer (#214)
This commit is contained in:
committed by
GitHub
parent
b51bc8f3f6
commit
212254d31a
@@ -3,9 +3,14 @@
|
||||
"version": "0.0.1",
|
||||
"type": "module",
|
||||
"main": "index.ts",
|
||||
"exports": {
|
||||
".": "./index.ts",
|
||||
"./server": "./server/index.ts"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "vitest",
|
||||
"typecheck": "tsc --noEmit"
|
||||
"typecheck": "tsc --noEmit",
|
||||
"gen:referrers": "jiti scripts/get-referrers.ts && biome format --write ./server/referrers/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@openpanel/constants": "workspace:*",
|
||||
|
||||
96
packages/common/scripts/get-referrers.ts
Normal file
96
packages/common/scripts/get-referrers.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// extras
|
||||
const extraReferrers = {
|
||||
'zoom.us': { type: 'social', name: 'Zoom' },
|
||||
'apple.com': { type: 'tech', name: 'Apple' },
|
||||
'adobe.com': { type: 'tech', name: 'Adobe' },
|
||||
'figma.com': { type: 'tech', name: 'Figma' },
|
||||
'wix.com': { type: 'commerce', name: 'Wix' },
|
||||
'gmail.com': { type: 'email', name: 'Gmail' },
|
||||
'notion.so': { type: 'tech', name: 'Notion' },
|
||||
'ebay.com': { type: 'commerce', name: 'eBay' },
|
||||
'github.com': { type: 'tech', name: 'GitHub' },
|
||||
'gitlab.com': { type: 'tech', name: 'GitLab' },
|
||||
'slack.com': { type: 'social', name: 'Slack' },
|
||||
'etsy.com': { type: 'commerce', name: 'Etsy' },
|
||||
'bsky.app': { type: 'social', name: 'Bluesky' },
|
||||
'twitch.tv': { type: 'content', name: 'Twitch' },
|
||||
'dropbox.com': { type: 'tech', name: 'Dropbox' },
|
||||
'outlook.com': { type: 'email', name: 'Outlook' },
|
||||
'medium.com': { type: 'content', name: 'Medium' },
|
||||
'paypal.com': { type: 'commerce', name: 'PayPal' },
|
||||
'discord.com': { type: 'social', name: 'Discord' },
|
||||
'stripe.com': { type: 'commerce', name: 'Stripe' },
|
||||
'spotify.com': { type: 'content', name: 'Spotify' },
|
||||
'netflix.com': { type: 'content', name: 'Netflix' },
|
||||
'whatsapp.com': { type: 'social', name: 'WhatsApp' },
|
||||
'shopify.com': { type: 'commerce', name: 'Shopify' },
|
||||
'microsoft.com': { type: 'tech', name: 'Microsoft' },
|
||||
'alibaba.com': { type: 'commerce', name: 'Alibaba' },
|
||||
'telegram.org': { type: 'social', name: 'Telegram' },
|
||||
'substack.com': { type: 'content', name: 'Substack' },
|
||||
'salesforce.com': { type: 'tech', name: 'Salesforce' },
|
||||
'instagram.com': { type: 'social', name: 'Instagram' },
|
||||
'wikipedia.org': { type: 'content', name: 'Wikipedia' },
|
||||
'mastodon.social': { type: 'social', name: 'Mastodon' },
|
||||
'office.com': { type: 'tech', name: 'Microsoft Office' },
|
||||
'squarespace.com': { type: 'commerce', name: 'Squarespace' },
|
||||
'stackoverflow.com': { type: 'tech', name: 'Stack Overflow' },
|
||||
'teams.microsoft.com': { type: 'social', name: 'Microsoft Teams' },
|
||||
};
|
||||
|
||||
function transform(data: any) {
|
||||
const obj: Record<string, unknown> = {};
|
||||
for (const type in data) {
|
||||
for (const name in data[type]) {
|
||||
const domains = data[type][name].domains ?? [];
|
||||
for (const domain of domains) {
|
||||
obj[domain] = {
|
||||
type,
|
||||
name,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// Get document, or throw exception on error
|
||||
try {
|
||||
const data = await fetch(
|
||||
'https://s3-eu-west-1.amazonaws.com/snowplow-hosted-assets/third-party/referer-parser/referers-latest.json',
|
||||
).then((res) => res.json());
|
||||
|
||||
fs.writeFileSync(
|
||||
path.resolve(__dirname, '../../worker/src/referrers/index.ts'),
|
||||
[
|
||||
'// This file is generated by the script get-referrers.ts',
|
||||
'',
|
||||
'// The data is fetch from snowplow-referer-parser https://github.com/snowplow-referer-parser/referer-parser',
|
||||
`// The orginal referers.yml is based on Piwik's SearchEngines.php and Socials.php, copyright 2012 Matthieu Aubry and available under the GNU General Public License v3.`,
|
||||
'',
|
||||
`const referrers: Record<string, { type: string, name: string }> = ${JSON.stringify(
|
||||
{
|
||||
...transform(data),
|
||||
...extraReferrers,
|
||||
},
|
||||
)} as const;`,
|
||||
'export default referrers;',
|
||||
].join('\n'),
|
||||
'utf-8',
|
||||
);
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,3 +1,5 @@
|
||||
export * from './crypto';
|
||||
export * from './profileId';
|
||||
export * from './parser-user-agent';
|
||||
export * from './parse-referrer';
|
||||
export * from './id';
|
||||
|
||||
117
packages/common/server/parse-referrer.test.ts
Normal file
117
packages/common/server/parse-referrer.test.ts
Normal file
@@ -0,0 +1,117 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { getReferrerWithQuery, parseReferrer } from './parse-referrer';
|
||||
|
||||
describe('parseReferrer', () => {
|
||||
it('should handle undefined or empty URLs', () => {
|
||||
expect(parseReferrer(undefined)).toEqual({
|
||||
name: '',
|
||||
type: '',
|
||||
url: '',
|
||||
});
|
||||
|
||||
expect(parseReferrer('')).toEqual({
|
||||
name: '',
|
||||
type: '',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse valid referrer URLs', () => {
|
||||
expect(parseReferrer('https://google.com/search?q=test')).toEqual({
|
||||
name: 'Google',
|
||||
type: 'search',
|
||||
url: 'https://google.com/search?q=test',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle www prefix in hostnames', () => {
|
||||
expect(parseReferrer('https://www.twitter.com/user')).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: 'https://www.twitter.com/user',
|
||||
});
|
||||
|
||||
expect(parseReferrer('https://twitter.com/user')).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: 'https://twitter.com/user',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle unknown referrers', () => {
|
||||
expect(parseReferrer('https://unknown-site.com')).toEqual({
|
||||
name: '',
|
||||
type: '',
|
||||
url: 'https://unknown-site.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle invalid URLs', () => {
|
||||
expect(parseReferrer('not-a-url')).toEqual({
|
||||
name: '',
|
||||
type: '',
|
||||
url: 'not-a-url',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getReferrerWithQuery', () => {
|
||||
it('should handle undefined or empty query', () => {
|
||||
expect(getReferrerWithQuery(undefined)).toBeNull();
|
||||
expect(getReferrerWithQuery({})).toBeNull();
|
||||
});
|
||||
|
||||
it('should parse utm_source parameter', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'google' })).toEqual({
|
||||
name: 'Google',
|
||||
type: 'search',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse ref parameter', () => {
|
||||
expect(getReferrerWithQuery({ ref: 'facebook' })).toEqual({
|
||||
name: 'Facebook',
|
||||
type: 'social',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should parse utm_referrer parameter', () => {
|
||||
expect(getReferrerWithQuery({ utm_referrer: 'twitter' })).toEqual({
|
||||
name: 'Twitter',
|
||||
type: 'social',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle case-insensitive matching', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'GoOgLe' })).toEqual({
|
||||
name: 'Google',
|
||||
type: 'search',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle unknown sources', () => {
|
||||
expect(getReferrerWithQuery({ utm_source: 'unknown-source' })).toEqual({
|
||||
name: 'unknown-source',
|
||||
type: '',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
|
||||
it('should prioritize utm_source over ref and utm_referrer', () => {
|
||||
expect(
|
||||
getReferrerWithQuery({
|
||||
utm_source: 'google',
|
||||
ref: 'facebook',
|
||||
utm_referrer: 'twitter',
|
||||
}),
|
||||
).toEqual({
|
||||
name: 'Google',
|
||||
type: 'search',
|
||||
url: '',
|
||||
});
|
||||
});
|
||||
});
|
||||
66
packages/common/server/parse-referrer.ts
Normal file
66
packages/common/server/parse-referrer.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { stripTrailingSlash } from '../src/string';
|
||||
|
||||
import referrers from './referrers';
|
||||
|
||||
function getHostname(url: string | undefined) {
|
||||
if (!url) {
|
||||
return '';
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(url).hostname;
|
||||
} catch (e) {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
export function parseReferrer(url: string | undefined) {
|
||||
const hostname = getHostname(url);
|
||||
const match = referrers[hostname] ?? referrers[hostname.replace('www.', '')];
|
||||
|
||||
return {
|
||||
name: match?.name ?? '',
|
||||
type: match?.type ?? '',
|
||||
url: stripTrailingSlash(url ?? ''),
|
||||
};
|
||||
}
|
||||
|
||||
export function getReferrerWithQuery(
|
||||
query: Record<string, string> | undefined,
|
||||
) {
|
||||
if (!query) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const source = (
|
||||
query.utm_source ??
|
||||
query.ref ??
|
||||
query.utm_referrer ??
|
||||
''
|
||||
).toLowerCase();
|
||||
|
||||
if (source === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
const match =
|
||||
referrers[source] ||
|
||||
referrers[`${source}.com`] ||
|
||||
Object.values(referrers).find(
|
||||
(referrer) => referrer.name.toLowerCase() === source,
|
||||
);
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
name: match.name,
|
||||
type: match.type,
|
||||
url: '',
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
name: source,
|
||||
type: '',
|
||||
url: '',
|
||||
};
|
||||
}
|
||||
@@ -68,6 +68,7 @@ const parse = (ua: string): UAParser.IResult => {
|
||||
return res;
|
||||
};
|
||||
|
||||
export type UserAgentInfo = ReturnType<typeof parseUserAgent>;
|
||||
export function parseUserAgent(
|
||||
ua?: string | null,
|
||||
overrides?: Record<string, unknown>,
|
||||
@@ -80,13 +81,35 @@ export function parseUserAgent(
|
||||
}
|
||||
|
||||
return {
|
||||
os: overrides?.__os || res.os.name,
|
||||
osVersion: overrides?.__osVersion || res.os.version,
|
||||
browser: overrides?.__browser || res.browser.name,
|
||||
browserVersion: overrides?.__browserVersion || res.browser.version,
|
||||
device: overrides?.__device || res.device.type || getDevice(ua),
|
||||
brand: overrides?.__brand || res.device.vendor,
|
||||
model: overrides?.__model || res.device.model,
|
||||
os:
|
||||
typeof overrides?.__os === 'string' && overrides?.__os
|
||||
? overrides?.__os
|
||||
: res.os.name,
|
||||
osVersion:
|
||||
typeof overrides?.__osVersion === 'string' && overrides?.__osVersion
|
||||
? overrides?.__osVersion
|
||||
: res.os.version,
|
||||
browser:
|
||||
typeof overrides?.__browser === 'string' && overrides?.__browser
|
||||
? overrides?.__browser
|
||||
: res.browser.name,
|
||||
browserVersion:
|
||||
typeof overrides?.__browserVersion === 'string' &&
|
||||
overrides?.__browserVersion
|
||||
? overrides?.__browserVersion
|
||||
: res.browser.version,
|
||||
device:
|
||||
typeof overrides?.__device === 'string' && overrides?.__device
|
||||
? overrides?.__device
|
||||
: res.device.type || getDevice(ua),
|
||||
brand:
|
||||
typeof overrides?.__brand === 'string' && overrides?.__brand
|
||||
? overrides?.__brand
|
||||
: res.device.vendor,
|
||||
model:
|
||||
typeof overrides?.__model === 'string' && overrides?.__model
|
||||
? overrides?.__model
|
||||
: res.device.model,
|
||||
isServer: false,
|
||||
} as const;
|
||||
}
|
||||
|
||||
2785
packages/common/server/referrers/index.ts
Normal file
2785
packages/common/server/referrers/index.ts
Normal file
File diff suppressed because it is too large
Load Diff
5
packages/common/server/referrers/referrers.readme.md
Normal file
5
packages/common/server/referrers/referrers.readme.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Snowplow Referer Parser
|
||||
|
||||
The file index.ts in this dir is generated from snowplows referer database [Snowplow Referer Parser](https://github.com/snowplow-referer-parser/referer-parser).
|
||||
|
||||
The orginal [referers.yml](https://github.com/snowplow-referer-parser/referer-parser/blob/master/resources/referers.yml) is based on Piwik's SearchEngines.php and Socials.php, copyright 2012 Matthieu Aubry and available under the GNU General Public License v3.
|
||||
48
packages/common/src/object.test.ts
Normal file
48
packages/common/src/object.test.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { toDots } from './object';
|
||||
|
||||
describe('toDots', () => {
|
||||
it('should convert an object to a dot object', () => {
|
||||
const obj = {
|
||||
a: 1,
|
||||
b: 2,
|
||||
array: ['1', '2', '3'],
|
||||
arrayWithObjects: [{ a: 1 }, { b: 2 }, { c: 3 }],
|
||||
objectWithArrays: { a: [1, 2, 3] },
|
||||
null: null,
|
||||
undefined: undefined,
|
||||
empty: '',
|
||||
jsonString: '{"a": 1, "b": 2}',
|
||||
};
|
||||
expect(toDots(obj)).toEqual({
|
||||
a: '1',
|
||||
b: '2',
|
||||
'array.0': '1',
|
||||
'array.1': '2',
|
||||
'array.2': '3',
|
||||
'arrayWithObjects.0.a': '1',
|
||||
'arrayWithObjects.1.b': '2',
|
||||
'arrayWithObjects.2.c': '3',
|
||||
'objectWithArrays.a.0': '1',
|
||||
'objectWithArrays.a.1': '2',
|
||||
'objectWithArrays.a.2': '3',
|
||||
'jsonString.a': '1',
|
||||
'jsonString.b': '2',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle malformed JSON strings gracefully', () => {
|
||||
const obj = {
|
||||
validJson: '{"key":"value"}',
|
||||
malformedJson: '{"key":"unterminated string',
|
||||
startsWithBrace: '{not json at all',
|
||||
startsWithBracket: '[also not json',
|
||||
regularString: 'normal string',
|
||||
};
|
||||
|
||||
expect(toDots(obj)).toEqual({
|
||||
'validJson.key': 'value',
|
||||
regularString: 'normal string',
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,18 @@
|
||||
import { anyPass, assocPath, isEmpty, isNil, reject } from 'ramda';
|
||||
|
||||
function isValidJsonString(value: string): boolean {
|
||||
return (
|
||||
(value.startsWith('{') && value.endsWith('}')) ||
|
||||
(value.startsWith('[') && value.endsWith(']'))
|
||||
);
|
||||
}
|
||||
function isMalformedJsonString(value: string): boolean {
|
||||
return (
|
||||
(value.startsWith('{') && !value.endsWith('}')) ||
|
||||
(value.startsWith('[') && !value.endsWith(']'))
|
||||
);
|
||||
}
|
||||
|
||||
export function toDots(
|
||||
obj: Record<string, unknown>,
|
||||
path = '',
|
||||
@@ -19,10 +32,28 @@ export function toDots(
|
||||
};
|
||||
}
|
||||
|
||||
if (value === undefined || value === null) {
|
||||
if (value === undefined || value === null || value === '') {
|
||||
return acc;
|
||||
}
|
||||
|
||||
if (typeof value === 'string' && isMalformedJsonString(value)) {
|
||||
// Skip it
|
||||
return acc;
|
||||
}
|
||||
|
||||
// Fix nested json strings - but catch parse errors for malformed JSON
|
||||
if (typeof value === 'string' && isValidJsonString(value)) {
|
||||
try {
|
||||
return {
|
||||
...acc,
|
||||
...toDots(JSON.parse(value), `${path}${key}.`),
|
||||
};
|
||||
} catch {
|
||||
// Skip it
|
||||
return acc;
|
||||
}
|
||||
}
|
||||
|
||||
const cleanedValue =
|
||||
typeof value === 'string'
|
||||
? removeInvalidSurrogates(value).trim()
|
||||
|
||||
Reference in New Issue
Block a user