feat: new importer (#214)

This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-05 09:49:36 +01:00
committed by GitHub
parent b51bc8f3f6
commit 212254d31a
80 changed files with 4884 additions and 842 deletions

View File

@@ -1,3 +1,5 @@
export * from './crypto';
export * from './profileId';
export * from './parser-user-agent';
export * from './parse-referrer';
export * from './id';

View File

@@ -0,0 +1,117 @@
import { describe, expect, it } from 'vitest';
import { getReferrerWithQuery, parseReferrer } from './parse-referrer';
describe('parseReferrer', () => {
it('should handle undefined or empty URLs', () => {
expect(parseReferrer(undefined)).toEqual({
name: '',
type: '',
url: '',
});
expect(parseReferrer('')).toEqual({
name: '',
type: '',
url: '',
});
});
it('should parse valid referrer URLs', () => {
expect(parseReferrer('https://google.com/search?q=test')).toEqual({
name: 'Google',
type: 'search',
url: 'https://google.com/search?q=test',
});
});
it('should handle www prefix in hostnames', () => {
expect(parseReferrer('https://www.twitter.com/user')).toEqual({
name: 'Twitter',
type: 'social',
url: 'https://www.twitter.com/user',
});
expect(parseReferrer('https://twitter.com/user')).toEqual({
name: 'Twitter',
type: 'social',
url: 'https://twitter.com/user',
});
});
it('should handle unknown referrers', () => {
expect(parseReferrer('https://unknown-site.com')).toEqual({
name: '',
type: '',
url: 'https://unknown-site.com',
});
});
it('should handle invalid URLs', () => {
expect(parseReferrer('not-a-url')).toEqual({
name: '',
type: '',
url: 'not-a-url',
});
});
});
describe('getReferrerWithQuery', () => {
it('should handle undefined or empty query', () => {
expect(getReferrerWithQuery(undefined)).toBeNull();
expect(getReferrerWithQuery({})).toBeNull();
});
it('should parse utm_source parameter', () => {
expect(getReferrerWithQuery({ utm_source: 'google' })).toEqual({
name: 'Google',
type: 'search',
url: '',
});
});
it('should parse ref parameter', () => {
expect(getReferrerWithQuery({ ref: 'facebook' })).toEqual({
name: 'Facebook',
type: 'social',
url: '',
});
});
it('should parse utm_referrer parameter', () => {
expect(getReferrerWithQuery({ utm_referrer: 'twitter' })).toEqual({
name: 'Twitter',
type: 'social',
url: '',
});
});
it('should handle case-insensitive matching', () => {
expect(getReferrerWithQuery({ utm_source: 'GoOgLe' })).toEqual({
name: 'Google',
type: 'search',
url: '',
});
});
it('should handle unknown sources', () => {
expect(getReferrerWithQuery({ utm_source: 'unknown-source' })).toEqual({
name: 'unknown-source',
type: '',
url: '',
});
});
it('should prioritize utm_source over ref and utm_referrer', () => {
expect(
getReferrerWithQuery({
utm_source: 'google',
ref: 'facebook',
utm_referrer: 'twitter',
}),
).toEqual({
name: 'Google',
type: 'search',
url: '',
});
});
});

View File

@@ -0,0 +1,66 @@
import { stripTrailingSlash } from '../src/string';
import referrers from './referrers';
function getHostname(url: string | undefined) {
if (!url) {
return '';
}
try {
return new URL(url).hostname;
} catch (e) {
return '';
}
}
export function parseReferrer(url: string | undefined) {
const hostname = getHostname(url);
const match = referrers[hostname] ?? referrers[hostname.replace('www.', '')];
return {
name: match?.name ?? '',
type: match?.type ?? '',
url: stripTrailingSlash(url ?? ''),
};
}
export function getReferrerWithQuery(
query: Record<string, string> | undefined,
) {
if (!query) {
return null;
}
const source = (
query.utm_source ??
query.ref ??
query.utm_referrer ??
''
).toLowerCase();
if (source === '') {
return null;
}
const match =
referrers[source] ||
referrers[`${source}.com`] ||
Object.values(referrers).find(
(referrer) => referrer.name.toLowerCase() === source,
);
if (match) {
return {
name: match.name,
type: match.type,
url: '',
};
}
return {
name: source,
type: '',
url: '',
};
}

View File

@@ -68,6 +68,7 @@ const parse = (ua: string): UAParser.IResult => {
return res;
};
export type UserAgentInfo = ReturnType<typeof parseUserAgent>;
export function parseUserAgent(
ua?: string | null,
overrides?: Record<string, unknown>,
@@ -80,13 +81,35 @@ export function parseUserAgent(
}
return {
os: overrides?.__os || res.os.name,
osVersion: overrides?.__osVersion || res.os.version,
browser: overrides?.__browser || res.browser.name,
browserVersion: overrides?.__browserVersion || res.browser.version,
device: overrides?.__device || res.device.type || getDevice(ua),
brand: overrides?.__brand || res.device.vendor,
model: overrides?.__model || res.device.model,
os:
typeof overrides?.__os === 'string' && overrides?.__os
? overrides?.__os
: res.os.name,
osVersion:
typeof overrides?.__osVersion === 'string' && overrides?.__osVersion
? overrides?.__osVersion
: res.os.version,
browser:
typeof overrides?.__browser === 'string' && overrides?.__browser
? overrides?.__browser
: res.browser.name,
browserVersion:
typeof overrides?.__browserVersion === 'string' &&
overrides?.__browserVersion
? overrides?.__browserVersion
: res.browser.version,
device:
typeof overrides?.__device === 'string' && overrides?.__device
? overrides?.__device
: res.device.type || getDevice(ua),
brand:
typeof overrides?.__brand === 'string' && overrides?.__brand
? overrides?.__brand
: res.device.vendor,
model:
typeof overrides?.__model === 'string' && overrides?.__model
? overrides?.__model
: res.device.model,
isServer: false,
} as const;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
# Snowplow Referer Parser
The file index.ts in this dir is generated from snowplows referer database [Snowplow Referer Parser](https://github.com/snowplow-referer-parser/referer-parser).
The orginal [referers.yml](https://github.com/snowplow-referer-parser/referer-parser/blob/master/resources/referers.yml) is based on Piwik's SearchEngines.php and Socials.php, copyright 2012 Matthieu Aubry and available under the GNU General Public License v3.