This commit is contained in:
Carl-Gerhard Lindesvärd
2025-11-10 10:09:59 +01:00
parent 37246f57f0
commit bb0e413b06
9 changed files with 76 additions and 107 deletions

View File

@@ -1,3 +1,4 @@
import { cacheable } from '@openpanel/redis';
import bots from './bots';
// Pre-compile regex patterns at module load time
@@ -14,59 +15,31 @@ const compiledBots = bots.map((bot) => {
const regexBots = compiledBots.filter((bot) => 'compiledRegex' in bot);
const includesBots = compiledBots.filter((bot) => 'includes' in bot);
// Common legitimate browser patterns - if UA matches these, it's very likely a real browser
// This provides ultra-fast early exit for ~95% of real traffic
const legitimateBrowserPatterns = [
'Mozilla/5.0', // Nearly all modern browsers
'Chrome/', // Chrome/Chromium browsers
'Safari/', // Safari and Chrome-based browsers
'Firefox/', // Firefox
'Edg/', // Edge
];
const mobilePatterns = ['iPhone', 'Android', 'iPad'];
const desktopOSPatterns = ['Windows NT', 'Macintosh', 'X11; Linux'];
export function isBot(ua: string) {
// Ultra-fast early exit: check if this looks like a legitimate browser
// Real browsers typically have Mozilla/5.0 + browser name + OS
if (ua.includes('Mozilla/5.0')) {
// Check for browser signature
const hasBrowser = legitimateBrowserPatterns.some((pattern) =>
ua.includes(pattern),
);
// Check for OS signature (mobile or desktop)
const hasOS =
mobilePatterns.some((pattern) => ua.includes(pattern)) ||
desktopOSPatterns.some((pattern) => ua.includes(pattern));
// If it has Mozilla/5.0, a known browser, and an OS, it's very likely legitimate
if (hasBrowser && hasOS) {
return null;
export const isBot = cacheable(
'is-bot',
(ua: string) => {
// Check simple string patterns first (fast)
for (const bot of includesBots) {
if (ua.includes(bot.includes)) {
return {
name: bot.name,
type: 'category' in bot ? bot.category : 'Unknown',
};
}
}
}
// Check simple string patterns first (fast)
for (const bot of includesBots) {
if (ua.includes(bot.includes)) {
return {
name: bot.name,
type: 'category' in bot ? bot.category : 'Unknown',
};
// Check regex patterns (slower)
for (const bot of regexBots) {
if (bot.compiledRegex.test(ua)) {
return {
name: bot.name,
type: 'category' in bot ? bot.category : 'Unknown',
};
}
}
}
// Check regex patterns (slower)
for (const bot of regexBots) {
if (bot.compiledRegex.test(ua)) {
return {
name: bot.name,
type: 'category' in bot ? bot.category : 'Unknown',
};
}
}
return null;
}
return null;
},
60 * 60, // 1 hour
'lru',
);

View File

@@ -1,14 +1,11 @@
import type { FastifyReply, FastifyRequest } from 'fastify';
import { assocPath, pathOr, pick } from 'ramda';
import { logger } from '@/utils/logger';
import { generateId } from '@openpanel/common';
import { generateDeviceId, parseUserAgent } from '@openpanel/common/server';
import { getProfileById, getSalts, upsertProfile } from '@openpanel/db';
import { type GeoLocation, getGeoLocation } from '@openpanel/geo';
import type { ILogger } from '@openpanel/logger';
import { getEventsGroupQueueShard } from '@openpanel/queue';
import { getRedisCache } from '@openpanel/redis';
import type {
DecrementPayload,
IdentifyPayload,
@@ -241,25 +238,6 @@ async function track({
const jobId = [payload.name, timestamp, projectId, currentDeviceId, groupId]
.filter(Boolean)
.join('-');
await getRedisCache().incr('track:counter');
log('track handler', {
jobId: jobId,
groupId: groupId,
timestamp: timestamp,
data: {
projectId,
headers,
event: {
...payload,
timestamp,
isTimestampFromThePast,
},
uaInfo,
geo,
currentDeviceId,
previousDeviceId,
},
});
await getEventsGroupQueueShard(groupId).add({
orderMs: timestamp,
data: {

View File

@@ -6,9 +6,9 @@ import { duplicateHook } from '@/hooks/duplicate.hook';
import { isBotHook } from '@/hooks/is-bot.hook';
const trackRouter: FastifyPluginCallback = async (fastify) => {
fastify.addHook('preHandler', isBotHook);
fastify.addHook('preValidation', duplicateHook);
fastify.addHook('preHandler', clientHook);
fastify.addHook('preHandler', isBotHook);
fastify.route({
method: 'POST',