diff --git a/apps/api/package.json b/apps/api/package.json index 209e74cc..dff2255d 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -6,6 +6,8 @@ "testing": "API_PORT=3333 pnpm dev", "start": "node dist/index.js", "build": "rm -rf dist && tsup", + "gen:referrers": "jiti scripts/get-referrers.ts && biome format --write src/referrers/index.ts", + "gen:bots": "jiti scripts/get-bots.ts && biome format --write src/bots/bots.ts", "typecheck": "tsc --noEmit" }, "dependencies": { @@ -16,8 +18,8 @@ "@fastify/websocket": "^8.3.1", "@openpanel/common": "workspace:*", "@openpanel/db": "workspace:*", - "@openpanel/logger": "workspace:*", "@openpanel/integrations": "workspace:^", + "@openpanel/logger": "workspace:*", "@openpanel/queue": "workspace:*", "@openpanel/redis": "workspace:*", "@openpanel/trpc": "workspace:*", @@ -33,7 +35,6 @@ "sqlstring": "^2.3.3", "superjson": "^1.13.3", "svix": "^1.24.0", - "ua-parser-js": "^1.0.37", "url-metadata": "^4.1.0", "uuid": "^9.0.1", "zod": "^3.22.4" @@ -42,13 +43,14 @@ "@faker-js/faker": "^9.0.1", "@openpanel/sdk": "workspace:*", "@openpanel/tsconfig": "workspace:*", + "@types/js-yaml": "^4.0.9", "@types/jsonwebtoken": "^9.0.6", "@types/ramda": "^0.29.6", "@types/request-ip": "^0.0.41", "@types/sqlstring": "^2.3.2", - "@types/ua-parser-js": "^0.7.39", "@types/uuid": "^9.0.8", "@types/ws": "^8.5.10", + "js-yaml": "^4.1.0", "tsup": "^7.2.0", "typescript": "^5.2.2" } diff --git a/apps/api/scripts/get-bots.ts b/apps/api/scripts/get-bots.ts new file mode 100644 index 00000000..f72561ff --- /dev/null +++ b/apps/api/scripts/get-bots.ts @@ -0,0 +1,29 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import yaml from 'js-yaml'; + +async function main() { + // Get document, or throw exception on error + try { + const data = await fetch( + 'https://raw.githubusercontent.com/matomo-org/device-detector/master/regexes/bots.yml', + ).then((res) => res.text()); + + fs.writeFileSync( + path.resolve(__dirname, '../src/bots/bots.ts'), + [ + '// This file is generated by the script get-bots.ts', + '', + '// The data is fetch from device-detector https://raw.githubusercontent.com/matomo-org/device-detector/master/regexes/bots.yml', + '', + `const bots = ${JSON.stringify(yaml.load(data))} as const;`, + 'export default bots;', + ].join('\n'), + 'utf-8', + ); + } catch (e) { + console.log(e); + } +} + +main(); diff --git a/apps/api/scripts/get-organizations.ts b/apps/api/scripts/get-organizations.ts deleted file mode 100644 index 427c4a4e..00000000 --- a/apps/api/scripts/get-organizations.ts +++ /dev/null @@ -1,189 +0,0 @@ -// import { clerkClient } from '@clerk/fastify'; - -import { db } from '@openpanel/db'; - -// import { db } from '@openpanel/db'; - -// type Fn = (args: { limit: number; offset: number }) => Promise<{ -// data: T[]; -// totalCount: number; -// }>; - -// function getAllDataByPagination( -// cb: T -// ): Promise>['data']> { -// const data: Awaited>['data'] = []; -// async function getData(page = 0) { -// console.log(`getData with offset ${page * 100}`); -// const response = await cb({ -// limit: 100, -// offset: page * 100, -// }); -// if (response.data.length !== 0) { -// data.push(...response.data); -// await getData(page + 1); -// } -// await new Promise((resolve) => setTimeout(resolve, 100)); -// } - -// return getData().then(() => data); -// } - -// async function main() { -// const organizations = await getAllDataByPagination( -// clerkClient.organizations.getOrganizationList.bind( -// clerkClient.organizations -// ) -// ); -// const users = await getAllDataByPagination( -// clerkClient.users.getUserList.bind(clerkClient.users) -// ); - -// console.log(`Found ${organizations.length} organizations`); -// console.log(`Found ${users.length} users`); - -// for (const user of users.slice(-10)) { -// const email = user.primaryEmailAddress?.emailAddress; -// console.log('Check', email); - -// try { -// if (email) { -// const exists = await db.user.findUnique({ -// where: { -// id: user.id, -// }, -// }); - -// if (exists) { -// console.log('already exists'); -// } else { -// await db.user.create({ -// data: { -// id: user.id, -// email: email, -// firstName: user.firstName, -// lastName: user.lastName, -// }, -// }); -// } -// } else { -// console.log('No email?', user); -// } -// } catch (e) { -// console.log('ERROR'); -// console.log(''); -// console.log(''); -// console.dir(user, { depth: null }); - -// console.log(''); -// console.log(''); -// console.log(''); -// } -// } - -// for (const org of organizations.slice(-20)) { -// try { -// if (org.slug) { -// const exists = await db.organization.findUnique({ -// where: { -// id: org.slug, -// }, -// }); - -// if (exists) { -// console.log('already exists org'); -// } else { -// const clerkOrgMembers = -// await clerkClient.organizations.getOrganizationMembershipList({ -// organizationId: org.id, -// }); - -// const members = clerkOrgMembers.data.map((member) => { -// const user = users.find( -// (u) => u.id === member.publicUserData?.userId -// ); -// return { -// userId: member.publicUserData?.userId, -// role: member.role, -// email: user!.primaryEmailAddress!.emailAddress, -// }; -// }); - -// await db.organization.create({ -// data: { -// id: org.slug, -// name: org.name, -// createdBy: { -// connect: { -// id: org.createdBy, -// }, -// }, -// members: { -// create: members, -// }, -// }, -// }); - -// const invites = -// await clerkClient.organizations.getOrganizationInvitationList({ -// organizationId: org.id, -// status: ['pending'], -// }); - -// for (const invite of invites.data) { -// await db.member.create({ -// data: { -// email: invite.emailAddress, -// organizationId: org.slug, -// role: invite.role, -// userId: null, -// meta: { -// access: invite.publicMetadata?.access as string[], -// invitationId: invite.id, -// }, -// }, -// }); -// } -// } -// } else { -// console.log('org does not have any slug', org); -// } -// } catch (e) { -// console.log('ERROR'); -// console.log(''); -// console.log(''); -// console.dir(org, { depth: null }); -// console.log(''); -// console.log(''); -// console.log(''); -// } -// } - -// process.exit(0); -// } - -// main(); - -async function main() { - const organization = await db.organization.findUnique({ - where: { - id: 'openpanel-dev', - members: { - some: { - userId: 'user_2cEoI8b1SuEFbZERGEAyVvC676F', - }, - }, - }, - include: { - members: { - select: { - role: true, - user: true, - }, - }, - }, - }); - - console.dir(organization, { depth: null }); -} -main(); diff --git a/apps/api/scripts/migrate-origins.ts b/apps/api/scripts/migrate-origins.ts deleted file mode 100644 index a88f5950..00000000 --- a/apps/api/scripts/migrate-origins.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { TABLE_NAMES, ch, chQuery } from '@openpanel/db'; - -async function main() { - const projects = await chQuery( - `SELECT distinct project_id FROM ${TABLE_NAMES.events} ORDER BY project_id`, - ); - const withOrigin = []; - - for (const project of projects) { - try { - const [eventWithOrigin, eventWithoutOrigin] = await Promise.all([ - await chQuery( - `SELECT * FROM ${TABLE_NAMES.events} WHERE origin != '' AND project_id = '${project.project_id}' ORDER BY created_at DESC LIMIT 1`, - ), - await chQuery( - `SELECT * FROM ${TABLE_NAMES.events} WHERE origin = '' AND project_id = '${project.project_id}' AND path != '' ORDER BY created_at DESC LIMIT 1`, - ), - ]); - - if (eventWithOrigin[0] && eventWithoutOrigin[0]) { - console.log(`Project ${project.project_id} as events without origin`); - console.log(`- Origin: ${eventWithOrigin[0].origin}`); - withOrigin.push(project.project_id); - const events = await chQuery( - `SELECT count(*) as count FROM ${TABLE_NAMES.events} WHERE project_id = '${project.project_id}' AND path != '' AND origin = ''`, - ); - console.log(`🤠🤠🤠🤠 Will update ${events[0]?.count} events`); - await ch.command({ - query: `ALTER TABLE events UPDATE origin = '${eventWithOrigin[0].origin}' WHERE project_id = '${project.project_id}' AND path != '' AND origin = '';`, - clickhouse_settings: { - wait_end_of_query: 1, - }, - }); - } - - if (!eventWithOrigin[0] && eventWithoutOrigin[0]) { - console.log( - `😧 Project ${project.project_id} has no events with origin (last event ${eventWithoutOrigin[0].created_at})`, - ); - console.log('- NO ORIGIN'); - } - - if (!eventWithOrigin[0] && !eventWithoutOrigin[0]) { - console.log( - `🔥 WARNING: Project ${project.project_id} has no events at all?!?!?!`, - ); - } - - if (eventWithOrigin[0] && !eventWithoutOrigin[0]) { - console.log( - `✅ Project ${project.project_id} has all events with origin!!!`, - ); - } - console.log(''); - console.log(''); - - await new Promise((resolve) => setTimeout(resolve, 500)); - } catch (e) { - console.log('🥵 ERROR ORRROR'); - console.log('Error for project', project.project_id); - } - } - process.exit(0); -} -main(); diff --git a/apps/api/src/bots/bots.ts b/apps/api/src/bots/bots.ts index 421891fa..3ce59472 100644 --- a/apps/api/src/bots/bots.ts +++ b/apps/api/src/bots/bots.ts @@ -1,4 +1,14 @@ -export default [ +// This file is generated by the script get-bots.ts + +// The data is fetch from device-detector https://raw.githubusercontent.com/matomo-org/device-detector/master/regexes/bots.yml + +const bots = [ + { + regex: 'WireReaderBot(?:/([\\d+.]+))?', + name: 'WireReaderBot', + category: 'Feed Fetcher', + url: 'https://wirereader.app/', + }, { regex: 'monitoring360bot', name: '360 Monitoring', @@ -14,40 +24,28 @@ export default [ name: 'Cloudflare Health Checks', category: 'Service Agent', url: 'https://developers.cloudflare.com/health-checks/', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: '360Spider', name: '360Spider', category: 'Search bot', url: 'https://www.so.com/help/help_3_2.html', - producer: { - name: 'Online Media Group, Inc.', - url: '', - }, + producer: { name: 'Online Media Group, Inc.', url: '' }, }, { regex: 'Aboundex', name: 'Aboundexbot', category: 'Search bot', url: 'http://www.aboundex.com/crawler/', - producer: { - name: 'Aboundex.com', - url: 'http://www.aboundex.com', - }, + producer: { name: 'Aboundex.com', url: 'http://www.aboundex.com' }, }, { regex: 'AcoonBot', name: 'Acoon', category: 'Search bot', url: 'http://www.acoon.de/robot.asp', - producer: { - name: 'Acoon GmbH', - url: 'http://www.acoon.de', - }, + producer: { name: 'Acoon GmbH', url: 'http://www.acoon.de' }, }, { regex: 'AddThis\\.com', @@ -64,69 +62,55 @@ export default [ name: 'aHrefs Bot', category: 'Crawler', url: 'https://ahrefs.com/robot', - producer: { - name: 'Ahrefs Pte Ltd', - url: 'https://ahrefs.com/robot', - }, + producer: { name: 'Ahrefs Pte Ltd', url: 'https://ahrefs.com/robot' }, }, { regex: 'AhrefsSiteAudit/[\\d.]+', name: 'AhrefsSiteAudit', category: 'Site Monitor', url: 'https://ahrefs.com/robot/site-audit', - producer: { - name: 'Ahrefs Pte Ltd', - url: 'https://ahrefs.com/', - }, + producer: { name: 'Ahrefs Pte Ltd', url: 'https://ahrefs.com/' }, }, { regex: 'ia_archiver|alexabot|verifybot', name: 'Alexa Crawler', category: 'Search bot', url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers', - producer: { - name: 'Alexa Internet', - url: 'https://www.alexa.com', - }, + producer: { name: 'Alexa Internet', url: 'https://www.alexa.com' }, }, { regex: 'alexa site audit', name: 'Alexa Site Audit', category: 'Site Monitor', url: 'https://support.alexa.com/hc/en-us/articles/200450194', - producer: { - name: 'Alexa Internet', - url: 'https://www.alexa.com', - }, + producer: { name: 'Alexa Internet', url: 'https://www.alexa.com' }, }, { - regex: 'Amazonbot', + regex: 'Amazonbot/[\\d.]+', name: 'Amazon Bot', category: 'Crawler', url: 'https://developer.amazon.com/support/amazonbot', - producer: { - name: 'Amazon.com, Inc.', - url: 'https://www.amazon.com/', - }, + producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' }, + }, + { + regex: 'AmazonAdBot/[\\d.]+', + name: 'Amazon AdBot', + category: 'Crawler', + url: 'https://adbot.amazon.com/', + producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' }, }, { regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service', name: 'Amazon Route53 Health Check', category: 'Service Agent', - producer: { - name: 'Amazon Web Services', - url: 'https://aws.amazon.com/', - }, + producer: { name: 'Amazon Web Services', url: 'https://aws.amazon.com/' }, }, { regex: 'AmorankSpider', name: 'Amorank Spider', category: 'Crawler', url: 'http://amorank.com/webcrawler.html', - producer: { - name: 'Amorank', - url: 'http://www.amorank.com', - }, + producer: { name: 'Amorank', url: 'http://www.amorank.com' }, }, { regex: 'ApacheBench', @@ -142,81 +126,64 @@ export default [ regex: 'Applebot', name: 'Applebot', category: 'Crawler', - url: 'https://support.apple.com/en-us/HT204683', - producer: { - name: 'Apple Inc', - url: 'https://www.apple.com', - }, + url: 'https://support.apple.com/en-us/119829', + producer: { name: 'Apple Inc', url: 'https://www.apple.com/' }, + }, + { + regex: 'iTMS', + name: 'iTMS', + category: 'Crawler', + url: 'https://support.apple.com/en-us/119829', + producer: { name: 'Apple Inc', url: 'https://www.apple.com/' }, }, { regex: 'AppSignalBot', name: 'AppSignalBot', category: 'Site Monitor', url: 'https://docs.appsignal.com/uptime-monitoring/', - producer: { - name: 'AppSignal', - url: 'https://appsignal.com/', - }, + producer: { name: 'AppSignal', url: 'https://appsignal.com/' }, }, { regex: 'Arachni', name: 'Arachni', category: 'Security Checker', url: 'https://www.arachni-scanner.com/', - producer: { - name: 'Sarosys LLC', - url: 'https://www.sarosys.com/', - }, + producer: { name: 'Sarosys LLC', url: 'https://www.sarosys.com/' }, }, { regex: 'AspiegelBot', name: 'AspiegelBot', category: 'Crawler', url: 'https://aspiegel.com/', - producer: { - name: 'Huawei', - url: 'https://www.huawei.com/', - }, + producer: { name: 'Huawei', url: 'https://www.huawei.com/' }, }, { regex: 'Castro 2, Episode Duration Lookup', name: 'Castro 2', category: 'Service Agent', url: 'http://supertop.co/castro/', - producer: { - name: 'Supertop', - url: 'http://supertop.co', - }, + producer: { name: 'Supertop', url: 'http://supertop.co' }, }, { regex: 'Curious George', name: 'Analytics SEO Crawler', category: 'Crawler', url: 'http://www.analyticsseo.com/crawler', - producer: { - name: 'Analytics SEO', - url: 'http://www.analyticsseo.com', - }, + producer: { name: 'Analytics SEO', url: 'http://www.analyticsseo.com' }, }, { regex: 'archive\\.org_bot|special_archiver', name: 'archive.org bot', category: 'Crawler', url: 'https://archive.org/details/archive.org_bot', - producer: { - name: 'The Internet Archive', - url: 'https://archive.org', - }, + producer: { name: 'The Internet Archive', url: 'https://archive.org' }, }, { regex: 'Ask Jeeves/Teoma', name: 'Ask Jeeves', category: 'Search bot', url: '', - producer: { - name: 'Ask Jeeves Inc.', - url: 'http://www.ask.com', - }, + producer: { name: 'Ask Jeeves Inc.', url: 'http://www.ask.com' }, }, { regex: 'Backlink-Check\\.de', @@ -233,40 +200,28 @@ export default [ name: 'BacklinkCrawler', category: 'Crawler', url: 'http://www.backlinktest.com/crawler.html', - producer: { - name: '2.0Promotion GbR', - url: 'http://www.backlinktest.com', - }, + producer: { name: '2.0Promotion GbR', url: 'http://www.backlinktest.com' }, }, { regex: 'Baidu.*spider|baidu Transcoder', name: 'Baidu Spider', category: 'Search bot', url: 'http://www.baidu.com/search/spider.htm', - producer: { - name: 'Baidu', - url: 'http://www.baidu.com', - }, + producer: { name: 'Baidu', url: 'http://www.baidu.com' }, }, { regex: 'BazQux', name: 'BazQux Reader', url: 'https://bazqux.com/fetcher', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Better Uptime Bot', name: 'Better Uptime Bot', category: 'Site Monitor', url: 'https://betteruptime.com/faq', - producer: { - name: 'Better Uptime', - url: 'https://betteruptime.com/', - }, + producer: { name: 'Better Uptime', url: 'https://betteruptime.com/' }, }, { regex: @@ -279,74 +234,60 @@ export default [ url: 'http://www.microsoft.com', }, }, + { + regex: 'Blackbox Exporter', + name: 'Blackbox Exporter', + category: 'Site Monitor', + url: 'https://github.com/prometheus/blackbox_exporter', + producer: { name: 'Prometheus', url: 'https://prometheus.io/' }, + }, { regex: 'Blekkobot', name: 'Blekkobot', category: 'Search bot', url: 'http://blekko.com/about/blekkobot', - producer: { - name: 'Blekko', - url: 'http://blekko.com', - }, + producer: { name: 'Blekko', url: 'http://blekko.com' }, }, { regex: 'BLEXBot', name: 'BLEXBot Crawler', category: 'Crawler', url: 'http://webmeup-crawler.com', - producer: { - name: 'WebMeUp', - url: 'http://webmeup.com', - }, + producer: { name: 'WebMeUp', url: 'http://webmeup.com' }, }, { regex: 'Bloglovin', name: 'Bloglovin', url: 'http://www.bloglovin.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Blogtrottr', name: 'Blogtrottr', url: '', category: 'Feed Fetcher', - producer: { - name: 'Blogtrottr Ltd', - url: 'https://blogtrottr.com/', - }, + producer: { name: 'Blogtrottr Ltd', url: 'https://blogtrottr.com/' }, }, { regex: 'BoardReader Blog Indexer', name: 'BoardReader Blog Indexer', category: 'Crawler', - producer: { - name: 'BoardReader', - url: 'https://boardreader.com/', - }, + producer: { name: 'BoardReader', url: 'https://boardreader.com/' }, }, { regex: 'BountiiBot', name: 'Bountii Bot', category: 'Search bot', url: 'http://bountii.com/contact.php', - producer: { - name: 'Bountii Inc.', - url: 'http://bountii.com', - }, + producer: { name: 'Bountii Inc.', url: 'http://bountii.com' }, }, { regex: 'Browsershots', name: 'Browsershots', category: 'Service Agent', url: 'http://browsershots.org/faq', - producer: { - name: 'Browsershots.org', - url: 'http://browsershots.org', - }, + producer: { name: 'Browsershots.org', url: 'http://browsershots.org' }, }, { regex: 'BUbiNG', @@ -363,140 +304,112 @@ export default [ name: 'Butterfly Robot', category: 'Search bot', url: 'http://labs.topsy.com/butterfly', - producer: { - name: 'Topsy Labs', - url: 'http://labs.topsy.com', - }, + producer: { name: 'Topsy Labs', url: 'http://labs.topsy.com' }, }, { regex: 'CareerBot', name: 'CareerBot', category: 'Crawler', url: 'http://www.career-x.de/bot.html', - producer: { - name: 'career-x GmbH', - url: 'http://www.career-x.de', - }, + producer: { name: 'career-x GmbH', url: 'http://www.career-x.de' }, }, { regex: 'CCBot', name: 'ccBot crawler', category: 'Crawler', url: 'http://commoncrawl.org/faq/', - producer: { - name: 'reddit inc.', - url: 'http://www.reddit.com', - }, + producer: { name: 'reddit inc.', url: 'http://www.reddit.com' }, }, { regex: 'Cliqzbot', name: 'Cliqzbot', category: 'Crawler', url: 'http://cliqz.com/company/cliqzbot', - producer: { - name: '10betterpages GmbH', - url: 'http://cliqz.com', - }, + producer: { name: '10betterpages GmbH', url: 'http://cliqz.com' }, }, { regex: 'Cloudflare-AMP', name: 'CloudFlare AMP Fetcher', category: 'Crawler', url: 'https://amp.cloudflare.com/doc/fetcher.html', - producer: { - name: 'CloudFlare', - url: 'http://www.cloudflare.com', - }, + producer: { name: 'CloudFlare', url: 'http://www.cloudflare.com' }, }, { regex: 'Cloudflare-?Diagnostics', name: 'Cloudflare Diagnostics', category: 'Site Monitor', url: 'https://www.cloudflare.com/', - producer: { - name: 'Cloudflare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'Cloudflare', url: 'https://www.cloudflare.com/' }, }, { regex: 'CloudFlare-AlwaysOnline', name: 'CloudFlare Always Online', category: 'Site Monitor', url: 'https://www.cloudflare.com/always-online', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: 'Cloudflare-SSLDetector', name: 'Cloudflare SSL Detector', category: 'Site Monitor', url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: 'Cloudflare Custom Hostname Verification', name: 'Cloudflare Custom Hostname Verification', category: 'Service Agent', url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: 'Cloudflare-Traffic-Manager', name: 'Cloudflare Traffic Manager', category: 'Site Monitor', url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, + }, + { + regex: 'Cloudflare-Smart-Transit', + name: 'Cloudflare Smart Transit', + category: 'Site Monitor', + url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/', + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, + }, + { + regex: 'CloudflareObservatory', + name: 'Cloudflare Observatory', + category: 'Site Monitor', + url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test', + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: 'https://developers\\.cloudflare\\.com/security-center/', name: 'Cloudflare Security Insights', category: 'Site Monitor', url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/', - producer: { - name: 'CloudFlare', - url: 'https://www.cloudflare.com/', - }, + producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' }, }, { regex: 'coccoc\\.com', name: 'Cốc Cốc Bot', url: 'https://help.coccoc.com/en/search-engine/coccoc-robots', category: 'Search bot', - producer: { - name: 'Cốc Cốc', - url: 'https://coccoc.com/', - }, + producer: { name: 'Cốc Cốc', url: 'https://coccoc.com/' }, }, { regex: 'collectd', name: 'Collectd', url: 'https://collectd.org/', category: 'Site Monitor', - producer: { - name: 'Collectd', - url: 'https://collectd.org/', - }, + producer: { name: 'Collectd', url: 'https://collectd.org/' }, }, { regex: 'CommaFeed', name: 'CommaFeed', url: 'http://www.commafeed.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'CSS Certificate Spider', @@ -513,20 +426,14 @@ export default [ name: 'Datadog Agent', url: 'https://github.com/DataDog/dd-agent', category: 'Site Monitor', - producer: { - name: 'Datadog', - url: 'https://www.datadoghq.com/', - }, + producer: { name: 'Datadog', url: 'https://www.datadoghq.com/' }, }, { regex: 'Datanyze', name: 'Datanyze', url: '', category: 'Crawler', - producer: { - name: 'Datanyze', - url: 'https://www.datanyze.com', - }, + producer: { name: 'Datanyze', url: 'https://www.datanyze.com' }, }, { regex: 'Dataprovider', @@ -553,20 +460,14 @@ export default [ name: 'Dazoobot', category: 'Search bot', url: '', - producer: { - name: 'DAZOO.FR', - url: 'http://dazoo.fr', - }, + producer: { name: 'DAZOO.FR', url: 'http://dazoo.fr' }, }, { regex: 'discobot', name: 'Discobot', category: 'Search bot', url: 'http://discoveryengine.com/discobot.html', - producer: { - name: 'Discovery Engine', - url: 'http://discoveryengine.com', - }, + producer: { name: 'Discovery Engine', url: 'http://discoveryengine.com' }, }, { regex: 'Domain Re-Animator Bot|support@domainreanimator\\.com', @@ -583,49 +484,41 @@ export default [ name: 'DotBot', category: 'Crawler', url: 'http://www.opensiteexplorer.org/dotbot', - producer: { - name: 'SEOmoz, Inc.', - url: 'http://moz.com/', - }, + producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' }, }, { regex: 'DuckDuck(?:Go-Favicons-)?Bot', - name: 'DuckDuckGo Bot', + name: 'DuckDuckBot', category: 'Search bot', - url: 'https://duckduckgo.com/duckduckbot', - producer: { - name: 'DuckDuckGo', - url: 'https://duckduckgo.com/', - }, + url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/', + producer: { name: 'DuckDuckGo', url: 'https://duckduckgo.com/' }, + }, + { + regex: 'DuckAssistBot', + name: 'DuckAssistBot', + category: 'Search bot', + url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot/', + producer: { name: 'DuckDuckGo', url: 'https://duckduckgo.com/' }, }, { regex: 'EasouSpider', name: 'Easou Spider', category: 'Search bot', url: 'http://www.easou.com/search/spider.html', - producer: { - name: 'easou ICP', - url: 'http://www.easou.com', - }, + producer: { name: 'easou ICP', url: 'http://www.easou.com' }, }, { regex: 'eCairn-Grabber', name: 'eCairn-Grabber', category: 'Crawler', - producer: { - name: 'eCairn', - url: 'https://ecairn.com', - }, + producer: { name: 'eCairn', url: 'https://ecairn.com' }, }, { regex: 'EMail Exractor', name: 'EMail Exractor', category: 'Crawler', url: '', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'evc-batch', @@ -642,10 +535,7 @@ export default [ name: 'ExaBot', category: 'Crawler', url: 'http://www.exabot.com/go/robot', - producer: { - name: 'Dassault Systèmes', - url: 'http://www.3ds.com', - }, + producer: { name: 'Dassault Systèmes', url: 'http://www.3ds.com' }, }, { regex: 'ExactSeek Crawler', @@ -662,51 +552,35 @@ export default [ name: 'Ezooms', category: 'Crawler', url: '', - producer: { - name: 'SEOmoz, Inc.', - url: 'http://moz.com/', - }, + producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' }, }, { - regex: - 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog', - name: 'Facebook External Hit', + regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)', + name: 'Facebook Crawler', category: 'Social Media Agent', - url: 'https://www.facebook.com/externalhit_uatext.php', - producer: { - name: 'Meta Platforms, Inc.', - url: 'https://www.meta.com/', - }, + url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/', + producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' }, }, { regex: 'FacebookBot/[\\d.]+', name: 'FacebookBot', category: 'Crawler', url: 'https://developers.facebook.com/docs/sharing/bot', - producer: { - name: 'Meta Platforms, Inc.', - url: 'https://www.meta.com/', - }, + producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' }, }, { regex: 'Feedbin', name: 'Feedbin', url: 'http://feedbin.com/', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'FeedBurner', name: 'FeedBurner', url: 'http://www.feedburner.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Feed Wrangler', @@ -723,40 +597,28 @@ export default [ name: 'Feedly', url: 'http://www.feedly.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Feedspot', name: 'Feedspot', url: 'http://www.feedspot.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Fever/[0-9]', name: 'Fever', url: 'http://feedafever.com/', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'FlipboardProxy|FlipboardRSS', name: 'Flipboard', url: 'http://flipboard.com/browserproxy', category: 'Feed Fetcher', - producer: { - name: 'Flipboard', - url: 'http://flipboard.com/', - }, + producer: { name: 'Flipboard', url: 'http://flipboard.com/' }, }, { regex: 'Findxbot', @@ -775,30 +637,21 @@ export default [ name: 'Genieo Web filter', category: '', url: 'http://www.genieo.com/webfilter.html', - producer: { - name: 'Genieo', - url: 'http://www.genieo.com', - }, + producer: { name: 'Genieo', url: 'http://www.genieo.com' }, }, { regex: 'GigablastOpenSource', name: 'Gigablast', category: 'Search bot', url: 'https://github.com/gigablast/open-source-search-engine', - producer: { - name: 'Matt Wells', - url: 'http://www.gigablast.com/faq.html', - }, + producer: { name: 'Matt Wells', url: 'http://www.gigablast.com/faq.html' }, }, { regex: 'Gluten Free Crawler', name: 'Gluten Free Crawler', category: 'Crawler', url: 'http://glutenfreepleasure.com/', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'gobuster', @@ -810,206 +663,176 @@ export default [ name: 'Goo', category: 'Search bot', url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1', - producer: { - name: 'NTT Resonant', - url: 'http://goo.ne.jp', - }, - }, - { - regex: 'Storebot-Google', - name: 'Google StoreBot', - category: 'Crawler', - }, - { - regex: 'Google Favicon', - name: 'Google Favicon', - category: 'Crawler', + producer: { name: 'NTT Resonant', url: 'http://goo.ne.jp' }, }, + { regex: 'Storebot-Google', name: 'Google StoreBot', category: 'Crawler' }, + { regex: 'Google Favicon', name: 'Google Favicon', category: 'Crawler' }, { regex: 'Google Search Console', name: 'Google Search Console', category: 'Crawler', url: 'https://search.google.com/search-console/about', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'Google Page Speed Insights', name: 'Google PageSpeed Insights', category: 'Site Monitor', url: 'http://developers.google.com/speed/pagespeed/insights/', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'google_partner_monitoring', name: 'Google Partner Monitoring', category: 'Site Monitor', url: '', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'Google-Cloud-Scheduler', name: 'Google Cloud Scheduler', category: 'Crawler', url: 'https://cloud.google.com/scheduler', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com' }, }, { regex: 'Google-Structured-Data-Testing-Tool', name: 'Google Structured Data Testing Tool', category: 'Validator', url: 'https://search.google.com/structured-data/testing-tool', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'GoogleStackdriverMonitoring', name: 'Google Stackdriver Monitoring', category: 'Site Monitor', url: 'https://cloud.google.com/monitoring', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com' }, }, { regex: 'Google-Transparency-Report', name: 'Google Transparency Report', category: 'Site Monitor', url: 'https://transparencyreport.google.com/', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, + }, + { + regex: 'Google-CloudVertexBot', + name: 'Google-CloudVertexBot', + category: 'Crawler', + url: 'https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot', + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'via ggpht\\.com GoogleImageProxy', name: 'Gmail Image Proxy', category: 'Crawler', url: '', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'SeznamEmailProxy', name: 'Seznam Email Proxy', category: 'Crawler', url: '', - producer: { - name: 'Seznam.cz, a.s.', - url: 'http://www.seznam.cz/', - }, + producer: { name: 'Seznam.cz, a.s.', url: 'http://www.seznam.cz/' }, }, { regex: 'Seznam-Zbozi-robot', name: 'Seznam Zbozi.cz', category: 'Crawler', url: '', - producer: { - name: 'Seznam.cz, a.s.', - url: 'https://www.zbozi.cz/', - }, + producer: { name: 'Seznam.cz, a.s.', url: 'https://www.zbozi.cz/' }, }, { regex: 'Heurekabot-Feed', name: 'Heureka Feed', category: 'Crawler', url: 'https://sluzby.heureka.cz/napoveda/heurekabot/', - producer: { - name: 'Heureka.cz, a.s.', - url: 'https://www.heureka.cz/', - }, + producer: { name: 'Heureka.cz, a.s.', url: 'https://www.heureka.cz/' }, }, { regex: 'ShopAlike', name: 'ShopAlike', category: 'Crawler', url: '', - producer: { - name: 'Visual Meta', - url: 'https://www.shopalike.cz/', - }, + producer: { name: 'Visual Meta', url: 'https://www.shopalike.cz/' }, + }, + { + regex: 'deepcrawl\\.com', + name: 'Lumar', + category: 'Crawler', + url: 'https://deepcrawl.com/bot', + producer: { name: 'Lumar', url: 'https://www.lumar.io/' }, + }, + { + regex: 'Googlebot-News', + name: 'Googlebot News', + category: 'Search bot', + url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers', + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: - 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer)|Google.*/\\+/web/snippet', + 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\\+/web/snippet', name: 'Googlebot', category: 'Search bot', url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: '^Google$', name: 'Googlebot', category: 'Search bot', url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { - regex: 'heritrix', - name: 'Heritrix', + regex: 'Google-Safety', + name: 'Google-Safety', category: 'Crawler', - url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix', - producer: { - name: 'The Internet Archive', - url: 'https://archive.org', - }, + url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers', + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, + }, + { + regex: 'DuplexWeb-Google', + name: 'DuplexWeb-Google', + category: 'Crawler', + url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers', + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, + }, + { + regex: 'Google-Area120-PrivacyPolicyFetcher', + name: 'Google Area 120 Privacy Policy Fetcher', + category: 'Crawler', + url: 'https://area120.google.com/', + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'HubSpot ', name: 'HubSpot', category: 'Crawler', - producer: { - name: 'HubSpot Inc.', - url: 'https://www.hubspot.com', - }, + producer: { name: 'HubSpot Inc.', url: 'https://www.hubspot.com' }, }, { - regex: 'vuhuvBot', - name: 'Vuhuv Bot', - category: 'Crawler', - url: 'http://vuhuv.com/bot.html', + regex: 'vuhuv(?:Bot|RBT)', + name: 'vuhuvBot', + category: 'Search bot', + url: 'https://vuhuv.com/bot.html', }, { regex: 'HTTPMon/[\\d.]+', name: 'HTTPMon', category: 'Site Monitor', url: 'http://www.httpmon.com', - producer: { - name: 'towards GmbH', - url: 'http://www.towards.ch/', - }, + producer: { name: 'towards GmbH', url: 'http://www.towards.ch/' }, }, { regex: 'ICC-Crawler', name: 'ICC-Crawler', category: 'Crawler', url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'inoreader\\.com', @@ -1031,45 +854,29 @@ export default [ regex: 'ips-agent', name: 'IPS Agent', category: 'Crawler', - producer: { - name: 'VeriSign, Inc', - url: 'http://www.verisign.com/', - }, + producer: { name: 'VeriSign, Inc', url: 'http://www.verisign.com/' }, }, { regex: 'IP-Guide\\.com', name: 'IP-Guide Crawler', category: 'Crawler', url: '', - producer: { - name: '', - url: 'https://ip-guide.com', - }, - }, - { - regex: 'k6/[0-9\\.]+', - name: 'K6', - url: 'https://k6.io/', + producer: { name: '', url: 'https://ip-guide.com' }, }, + { regex: 'k6/[0-9\\.]+', name: 'K6', url: 'https://k6.io/' }, { regex: 'kouio', name: 'Kouio', url: 'http://kouio.com/', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'larbin', name: 'Larbin web crawler', category: 'Crawler', url: 'http://larbin.sourceforge.net', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: '[A-z0-9]*-Lighthouse', @@ -1086,83 +893,59 @@ export default [ name: 'LastMod Bot', category: 'Site Monitor', url: 'https://last-modified.com/en/about', - producer: { - name: '', - url: 'https://last-modified.com/en', - }, + producer: { name: '', url: 'https://last-modified.com/en' }, }, { regex: 'linkdexbot|linkdex\\.com', name: 'Linkdex Bot', category: 'Search bot', url: 'http://www.linkdex.com/bots', - producer: { - name: 'Mojeek Ltd.', - url: 'http://www.mojeek.com', - }, + producer: { name: 'Mojeek Ltd.', url: 'http://www.mojeek.com' }, }, { regex: 'LinkedInBot', name: 'LinkedIn Bot', category: 'Social Media Agent', url: 'http://www.linkedin.com', - producer: { - name: 'LinkedIn', - url: 'http://www.linkedin.com', - }, + producer: { name: 'LinkedIn', url: 'http://www.linkedin.com' }, }, { regex: 'ltx71', name: 'LTX71', category: 'Security Checker', url: 'https://ltx71.com/', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Mail\\.RU', name: 'Mail.Ru Bot', category: 'Search bot', url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots', - producer: { - name: 'Mail.Ru Group', - url: 'http://corp.mail.ru', - }, + producer: { name: 'Mail.Ru Group', url: 'http://corp.mail.ru' }, }, { regex: 'magpie-crawler', name: 'Magpie-Crawler', category: 'Social Media Agent', url: 'http://www.brandwatch.com/magpie-crawler/', - producer: { - name: 'Brandwatch', - url: 'http://www.brandwatch.com', - }, + producer: { name: 'Brandwatch', url: 'http://www.brandwatch.com' }, }, { regex: 'MagpieRSS', name: 'MagpieRSS', url: 'http://magpierss.sourceforge.net/', category: 'Feed Parser', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'masscan-ng/[\\d.]+', name: 'masscan-ng', url: 'https://github.com/bi-zone/masscan-ng', category: 'Crawler', - producer: { - name: 'BIZON, OOO', - url: 'https://bi.zone/', - }, + producer: { name: 'BIZON, OOO', url: 'https://bi.zone/' }, }, { - regex: 'masscan', + regex: '.*masscan', name: 'masscan', url: 'https://github.com/robertdavidgraham/masscan', category: 'Crawler', @@ -1171,30 +954,20 @@ export default [ url: 'https://github.com/robertdavidgraham', }, }, - { - regex: 'Mastodon/', - name: 'Mastodon Bot', - category: 'Social Media Agent', - }, + { regex: 'Mastodon/', name: 'Mastodon Bot', category: 'Social Media Agent' }, { regex: 'meanpathbot', name: 'Meanpath Bot', category: 'Search bot', url: 'http://www.meanpath.com/meanpathbot.html', - producer: { - name: 'Meanpath', - url: 'http://www.meanpath.com', - }, + producer: { name: 'Meanpath', url: 'http://www.meanpath.com' }, }, { regex: 'MetaJobBot', name: 'MetaJobBot', category: 'Crawler', url: 'http://www.metajob.at/the/crawler', - producer: { - name: 'MetaJob', - url: 'http://www.metajob.at', - }, + producer: { name: 'MetaJob', url: 'http://www.metajob.at' }, }, { regex: 'MetaInspector', @@ -1207,60 +980,42 @@ export default [ name: 'Mixrank Bot', category: 'Crawler', url: 'http://mixrank.com', - producer: { - name: 'Online Media Group, Inc.', - url: '', - }, + producer: { name: 'Online Media Group, Inc.', url: '' }, }, { regex: 'MJ12bot', name: 'MJ12 Bot', category: 'Search bot', url: 'http://majestic12.co.uk/bot.php', - producer: { - name: 'Majestic-12', - url: 'http://majestic12.co.uk', - }, + producer: { name: 'Majestic-12', url: 'http://majestic12.co.uk' }, }, { regex: 'Mnogosearch', name: 'Mnogosearch', category: 'Search bot', url: 'http://www.mnogosearch.org/', - producer: { - name: 'Lavtech.Com Corp.', - url: '', - }, + producer: { name: 'Lavtech.Com Corp.', url: '' }, }, { regex: 'MojeekBot', name: 'MojeekBot', category: 'Search bot', url: 'http://www.mojeek.com/bot.html', - producer: { - name: 'Mojeek Ltd.', - url: 'http://www.mojeek.com', - }, + producer: { name: 'Mojeek Ltd.', url: 'http://www.mojeek.com' }, }, { regex: 'munin', name: 'Munin', category: 'Site Monitor', url: 'http://munin-monitoring.org/', - producer: { - name: 'Munin', - url: 'http://munin-monitoring.org/', - }, + producer: { name: 'Munin', url: 'http://munin-monitoring.org/' }, }, { regex: 'NalezenCzBot', name: 'NalezenCzBot', category: 'Crawler', url: 'http://www.nalezen.cz/about-crawler', - producer: { - name: 'Jaroslav Kuboš', - url: '', - }, + producer: { name: 'Jaroslav Kuboš', url: '' }, }, { regex: 'check_http/v', @@ -1282,123 +1037,91 @@ export default [ name: 'Netcraft Survey Bot', category: 'Search bot', url: '', - producer: { - name: 'Netcraft', - url: 'http://www.netcraft.com', - }, + producer: { name: 'Netcraft', url: 'http://www.netcraft.com' }, }, { regex: 'netEstate NE Crawler', name: 'netEstate', category: 'Crawler', url: 'http://www.website-datenbank.de/Impressum', - producer: { - name: 'netEstate GmbH', - url: 'https://www.netestate.de/en/', - }, + producer: { name: 'netEstate GmbH', url: 'https://www.netestate.de/en/' }, }, { regex: 'Netvibes', name: 'Netvibes', url: 'http://www.netvibes.com/', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'NewsBlur .*(?:Fetcher|Finder)', name: 'NewsBlur', url: 'http://www.newsblur.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'NewsGatorOnline', name: 'NewsGator', url: 'http://www.newsgator.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'nlcrawler', name: 'NLCrawler', category: 'Crawler', url: '', - producer: { - name: 'Northern Light', - url: 'http://northernlight.com', - }, + producer: { name: 'Northern Light', url: 'http://northernlight.com' }, }, { regex: 'Nmap Scripting Engine', name: 'Nmap', category: 'Security Checker', url: 'https://nmap.org/book/nse.html', - producer: { - name: 'Nmap', - url: 'https://nmap.org/', - }, + producer: { name: 'Nmap', url: 'https://nmap.org/' }, }, { regex: 'Nuzzel', name: 'Nuzzel', category: 'Crawler', - producer: { - name: 'Nuzzel', - url: 'https://www.nuzzel.com/', - }, + producer: { name: 'Nuzzel', url: 'https://www.nuzzel.com/' }, }, { - regex: 'Octopus [0-9]', - name: 'Octopus', + regex: 'NodePing', + name: 'NodePing', + category: 'Site Monitor', + url: 'https://nodeping.com', + producer: { name: 'NodePing', url: 'https://nodeping.com' }, }, + { regex: 'Octopus [0-9]', name: 'Octopus' }, { regex: 'OnlineOrNot\\.com_bot', name: 'OnlineOrNot Bot', category: 'Site Monitor', url: 'https://onlineornot.com/website-monitoring', - producer: { - name: 'OnlineOrNot', - url: 'https://onlineornot.com', - }, + producer: { name: 'OnlineOrNot', url: 'https://onlineornot.com' }, }, { regex: 'omgili', name: 'Omgili bot', category: 'Search bot', url: 'http://www.omgili.com/Crawler.html', - producer: { - name: 'Omgili', - url: 'http://www.omgili.com', - }, + producer: { name: 'Omgili', url: 'http://www.omgili.com' }, }, { regex: 'OpenindexSpider', name: 'Openindex Spider', category: 'Search bot', url: 'http://www.openindex.io/en/webmasters/spider.html', - producer: { - name: 'Openindex B.V.', - url: 'http://www.openindex.io', - }, + producer: { name: 'Openindex B.V.', url: 'http://www.openindex.io' }, }, { regex: 'spbot', name: 'OpenLinkProfiler', category: 'Crawler', url: 'http://openlinkprofiler.org/bot', - producer: { - name: 'Axandra GmbH', - url: 'http://www.axandra.com', - }, + producer: { name: 'Axandra GmbH', url: 'http://www.axandra.com' }, }, { regex: 'OpenWebSpider', @@ -1415,20 +1138,14 @@ export default [ name: 'Orange Bot', category: 'Search bot', url: 'http://lemoteur.orange.fr', - producer: { - name: 'Orange', - url: 'http://www.orange.fr', - }, + producer: { name: 'Orange', url: 'http://www.orange.fr' }, }, { regex: 'PaperLiBot', name: 'PaperLiBot', category: 'Search bot', url: 'http://support.paper.li/entries/20023257-what-is-paper-li', - producer: { - name: 'Smallrivers SA', - url: 'http://www.paper.li', - }, + producer: { name: 'Smallrivers SA', url: 'http://www.paper.li' }, }, { regex: 'phantomas/', @@ -1451,100 +1168,70 @@ export default [ name: 'Pocket', category: 'Read-it-later Service', url: 'https://getpocket.com/pocketparser_ua', - producer: { - name: 'Read It Later, Inc.', - url: 'https://getpocket.com/', - }, + producer: { name: 'Read It Later, Inc.', url: 'https://getpocket.com/' }, }, { regex: 'PritTorrent', name: 'PritTorrent', category: 'Crawler', url: 'https://github.com/astro/prittorrent', - producer: { - name: 'Bitlove', - url: 'http://bitlove.org/', - }, + producer: { name: 'Bitlove', url: 'http://bitlove.org/' }, }, { regex: 'PRTG Network Monitor', name: 'PRTG Network Monitor', category: 'Network Monitor', url: 'https://www.paessler.com/prtg', - producer: { - name: 'Paessler AG', - url: 'https://www.paessler.com', - }, + producer: { name: 'Paessler AG', url: 'https://www.paessler.com' }, }, { regex: 'psbot', name: 'Picsearch bot', category: 'Search bot', url: 'http://www.picsearch.com/bot.html', - producer: { - name: 'Picsearch', - url: 'http://www.picsearch.com', - }, + producer: { name: 'Picsearch', url: 'http://www.picsearch.com' }, }, { regex: 'Pingdom(?:\\.com|TMS)', name: 'Pingdom Bot', category: 'Site Monitor', url: '', - producer: { - name: 'Pingdom AB', - url: 'https://www.pingdom.com', - }, + producer: { name: 'Pingdom AB', url: 'https://www.pingdom.com' }, }, { regex: 'Quora Link Preview', name: 'Quora Link Preview', category: 'Crawler', url: '', - producer: { - name: 'Quora', - url: 'http://www.quora.com', - }, + producer: { name: 'Quora', url: 'http://www.quora.com' }, }, { regex: 'Quora-Bot', name: 'Quora Bot', category: 'Crawler', url: '', - producer: { - name: 'Quora', - url: 'https://www.quora.com/', - }, + producer: { name: 'Quora', url: 'https://www.quora.com/' }, }, { regex: 'RamblerMail', name: 'RamblerMail Image Proxy', category: 'Crawler', url: '', - producer: { - name: 'Rambler&Co', - url: 'https://rambler-co.ru/', - }, + producer: { name: 'Rambler&Co', url: 'https://rambler-co.ru/' }, }, { regex: 'QuerySeekerSpider', name: 'QuerySeekerSpider', category: 'Crawler', url: 'http://queryseeker.com/bot.html', - producer: { - name: 'QueryEye Inc.', - url: 'http://queryeye.com', - }, + producer: { name: 'QueryEye Inc.', url: 'http://queryeye.com' }, }, { - regex: 'Qwantify', - name: 'Qwantify', + regex: 'Qwantify|Qwantbot', + name: 'Qwantbot', category: 'Crawler', - url: 'https://www.qwant.com/', - producer: { - name: 'Qwant Corporation', - url: 'https://www.qwant.com/', - }, + url: 'https://help.qwant.com/bot/', + producer: { name: 'Qwant Corporation', url: 'https://www.qwant.com/' }, }, { regex: 'Rainmeter', @@ -1557,50 +1244,35 @@ export default [ name: 'Reddit Bot', category: 'Social Media Agent', url: 'http://www.reddit.com/feedback', - producer: { - name: 'reddit inc.', - url: 'http://www.reddit.com', - }, + producer: { name: 'reddit inc.', url: 'http://www.reddit.com' }, }, { regex: 'Riddler', name: 'Riddler', category: 'Security search bot', url: 'https://riddler.io/about', - producer: { - name: 'F-Secure', - url: 'https://www.f-secure.com', - }, + producer: { name: 'F-Secure', url: 'https://www.f-secure.com' }, }, { regex: 'rogerbot', name: 'Rogerbot', category: 'Crawler', url: 'http://moz.com/help/pro/what-is-rogerbot-', - producer: { - name: 'SEOmoz, Inc.', - url: 'http://moz.com/', - }, + producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' }, }, { regex: 'ROI Hunter', name: 'ROI Hunter', category: 'Crawler', url: '', - producer: { - name: 'Roihunter a.s.', - url: 'http://roihunter.com/', - }, + producer: { name: 'Roihunter a.s.', url: 'http://roihunter.com/' }, }, { regex: 'SafeDNSBot', name: 'SafeDNSBot', category: 'Crawler', url: 'https://www.safedns.com/searchbot', - producer: { - name: 'SafeDNS, Inc.', - url: 'https://www.safedns.com/', - }, + producer: { name: 'SafeDNS, Inc.', url: 'https://www.safedns.com/' }, }, { regex: 'Scrapy', @@ -1623,50 +1295,56 @@ export default [ name: 'ScreenerBot', category: 'Crawler', url: 'http://www.screenerbot.com', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'SemrushBot', - name: 'Semrush Bot', + name: 'SemrushBot', category: 'Crawler', - url: 'http://www.semrush.com/bot.html', - producer: { - name: 'SEMrush', - url: 'http://www.semrush.com', - }, + url: 'https://www.semrush.com/bot/', + producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' }, + }, + { + regex: 'SerpReputationManagementAgent/[\\d.]+', + name: 'Semrush Reputation Management', + category: 'Service Agent', + url: 'https://www.semrush.com/bot/', + producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' }, + }, + { + regex: 'SplitSignalBot', + name: 'SplitSignalBot', + category: 'Crawler', + url: 'https://www.semrush.com/bot/', + producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' }, + }, + { + regex: 'SiteAuditBot/[\\d.]+', + name: 'SiteAuditBot', + category: 'Crawler', + url: 'https://www.semrush.com/bot/', + producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' }, }, { regex: 'SensikaBot', name: 'Sensika Bot', category: '', url: '', - producer: { - name: 'Sensika', - url: 'http://sensika.com', - }, + producer: { name: 'Sensika', url: 'http://sensika.com' }, }, { regex: 'SEOENG(?:World)?Bot', name: 'SEOENGBot', category: 'Crawler', url: 'http://www.seoengine.com/seoengbot.htm', - producer: { - name: 'SEO Engine', - url: 'http://www.seoengine.com', - }, + producer: { name: 'SEO Engine', url: 'http://www.seoengine.com' }, }, { regex: 'SEOkicks-Robot', name: 'SEOkicks-Robot', category: 'Crawler', url: 'http://www.seokicks.de/robot.html', - producer: { - name: 'SEOkicks', - url: 'https://www.seokicks.de/', - }, + producer: { name: 'SEOkicks', url: 'https://www.seokicks.de/' }, }, { regex: 'seoscanners\\.net', @@ -1689,70 +1367,49 @@ export default [ name: 'Seznam Bot', category: 'Search bot', url: 'http://www.mapy.cz/cz/seznambot.html', - producer: { - name: 'Seznam.cz, a.s.', - url: 'http://www.seznam.cz/', - }, + producer: { name: 'Seznam.cz, a.s.', url: 'http://www.seznam.cz/' }, }, { regex: 'shopify-partner-homepage-scraper', name: 'Shopify Partner', category: 'Crawler', url: 'https://www.shopify.com/partners', - producer: { - name: 'Shopify', - url: 'https://www.shopify.com/', - }, + producer: { name: 'Shopify', url: 'https://www.shopify.com/' }, }, { regex: 'ShopWiki', name: 'ShopWiki', category: 'Search tools', url: 'http://www.shopwiki.com/wiki/Help:Bot', - producer: { - name: 'ShopWiki Corp.', - url: 'http://www.shopwiki.com', - }, + producer: { name: 'ShopWiki Corp.', url: 'http://www.shopwiki.com' }, }, { regex: 'SilverReader', name: 'SilverReader', url: 'http://silverreader.com', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'SimplePie', name: 'SimplePie', url: 'http://www.simplepie.org', category: 'Feed Parser', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'SISTRIX Crawler', name: 'SISTRIX Crawler', category: 'Crawler', url: 'http://crawler.sistrix.net', - producer: { - name: 'SISTRIX GmbH', - url: 'http://www.sistrix.de', - }, + producer: { name: 'SISTRIX GmbH', url: 'http://www.sistrix.de' }, }, { regex: 'compatible; (?:SISTRIX )?Optimizer', name: 'SISTRIX Optimizer', category: 'Crawler', url: 'https://optimizer.sistrix.com', - producer: { - name: 'SISTRIX GmbH', - url: 'http://www.sistrix.de', - }, + producer: { name: 'SISTRIX GmbH', url: 'http://www.sistrix.de' }, }, { regex: 'SiteSucker', @@ -1765,20 +1422,14 @@ export default [ name: 'Sixy.ch', category: 'Site Monitor', url: 'http://sixy.ch', - producer: { - name: 'Manuel Kasper', - url: 'https://neon1.net/', - }, + producer: { name: 'Manuel Kasper', url: 'https://neon1.net/' }, }, { regex: 'Slackbot|Slack-ImgProxy', name: 'Slackbot', category: 'Crawler', url: 'https://api.slack.com/robots', - producer: { - name: 'Slack Technologies', - url: 'http://slack.com', - }, + producer: { name: 'Slack Technologies', url: 'http://slack.com' }, }, { regex: @@ -1786,40 +1437,28 @@ export default [ name: 'Sogou Spider', category: 'Search bot', url: 'http://www.sogou.com/docs/help/webmasters.htm', - producer: { - name: 'Sohu, Inc.', - url: 'http://www.sogou.com', - }, + producer: { name: 'Sohu, Inc.', url: 'http://www.sogou.com' }, }, { regex: 'Sosospider|Sosoimagespider', name: 'Soso Spider', category: 'Search bot', url: 'http://help.soso.com/webspider.htm', - producer: { - name: 'Tencent Holdings', - url: 'http://www.soso.com', - }, + producer: { name: 'Tencent Holdings', url: 'http://www.soso.com' }, }, { regex: 'Sprinklr', name: 'Sprinklr', category: 'Crawler', url: '', - producer: { - name: 'Sprinklr, Inc.', - url: 'https://www.sprinklr.com/', - }, + producer: { name: 'Sprinklr, Inc.', url: 'https://www.sprinklr.com/' }, }, { regex: 'sqlmap/', name: 'sqlmap', category: 'Security Checker', url: 'http://sqlmap.org/', - producer: { - name: 'sqlmap', - url: 'http://sqlmap.org/', - }, + producer: { name: 'sqlmap', url: 'http://sqlmap.org/' }, }, { regex: 'SSL Labs', @@ -1836,20 +1475,14 @@ export default [ name: 'StatusCake', category: 'Site Monitor', url: 'https://www.statuscake.com', - producer: { - name: 'StatusCake', - url: 'https://www.statuscake.com', - }, + producer: { name: 'StatusCake', url: 'https://www.statuscake.com' }, }, { regex: 'Superfeedr bot', name: 'Superfeedr Bot', category: 'Feed Fetcher', url: '', - producer: { - name: 'Superfeedr', - url: 'https://superfeedr.com/', - }, + producer: { name: 'Superfeedr', url: 'https://superfeedr.com/' }, }, { regex: 'Sparkler/[0-9]', @@ -1862,17 +1495,9 @@ export default [ name: 'Spinn3r', category: 'Crawler', url: 'http://spinn3r.com/robot', - producer: { - name: 'Tailrank Inc', - url: 'http://spinn3r.com', - }, - }, - { - regex: 'SputnikBot', - name: 'Sputnik Bot', - category: 'Crawler', - url: '', + producer: { name: 'Tailrank Inc', url: 'http://spinn3r.com' }, }, + { regex: 'SputnikBot', name: 'Sputnik Bot', category: 'Crawler', url: '' }, { regex: 'SputnikFaviconBot', name: 'Sputnik Favicon Bot', @@ -1890,10 +1515,7 @@ export default [ name: 'Survey Bot', category: 'Search bot', url: 'http://www.domaintools.com/webmasters/surveybot.php', - producer: { - name: 'Domain Tools', - url: 'http://www.domaintools.com', - }, + producer: { name: 'Domain Tools', url: 'http://www.domaintools.com' }, }, { regex: 'TarmotGezgin', @@ -1911,30 +1533,21 @@ export default [ name: 'TLSProbe', url: 'https://scan.trustnet.venafi.com/', category: 'Security search bot', - producer: { - name: 'Venafi TrustNet', - url: 'https://www.venafi.com', - }, + producer: { name: 'Venafi TrustNet', url: 'https://www.venafi.com' }, }, { regex: 'TinEye-bot', name: 'TinEye Crawler', category: 'Search bot', url: 'http://www.tineye.com/crawler.html', - producer: { - name: 'Idée Inc.', - url: 'http://ideeinc.com', - }, + producer: { name: 'Idée Inc.', url: 'http://ideeinc.com' }, }, { regex: 'Tiny Tiny RSS', name: 'Tiny Tiny RSS', url: 'http://tt-rss.org', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'theoldreader\\.com', @@ -1943,242 +1556,170 @@ export default [ url: 'https://theoldreader.com', }, { - regex: 'Trackable/0.1', + regex: 'Trackable/0\\.1', name: 'Chartable', category: 'Site Monitor', url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix', - producer: { - name: 'Chartable', - url: 'https://chartable.com', - }, + producer: { name: 'Chartable', url: 'https://chartable.com' }, }, { regex: 'trendictionbot', name: 'Trendiction Bot', category: 'Crawler', url: 'http://www.trendiction.de/bot', - producer: { - name: 'Talkwalker Inc.', - url: 'http://www.talkwalker.com', - }, + producer: { name: 'Talkwalker Inc.', url: 'http://www.talkwalker.com' }, }, { regex: 'TurnitinBot', name: 'TurnitinBot', category: 'Crawler', url: 'http://www.turnitin.com/robot/crawlerinfo.html', - producer: { - name: 'iParadigms, LLC.', - url: 'http://www.turnitin.com', - }, + producer: { name: 'iParadigms, LLC.', url: 'http://www.turnitin.com' }, }, { - regex: 'TweetedTimes Bot', + regex: 'TweetedTimes', name: 'TweetedTimes Bot', category: 'Crawler', - url: 'http://tweetedtimes.com', - producer: { - name: 'TweetedTimes', - url: 'http://tweetedtimes.com/', - }, + url: 'https://tweetedtimes.com/', + producer: { name: 'TweetedTimes', url: 'https://tweetedtimes.com/' }, }, { regex: 'TweetmemeBot', name: 'Tweetmeme Bot', category: 'Crawler', url: 'http://tweetmeme.com/', - producer: { - name: 'Mediasift', - url: '', - }, + producer: { name: 'Mediasift', url: '' }, }, { regex: 'Twingly Recon', name: 'Twingly Recon', category: 'Crawler', - producer: { - name: 'Twingly', - url: 'https://www.twingly.com', - }, + producer: { name: 'Twingly', url: 'https://www.twingly.com' }, }, { regex: 'Twitterbot', name: 'Twitterbot', category: 'Social Media Agent', url: 'https://dev.twitter.com/docs/cards/getting-started', - producer: { - name: 'Twitter', - url: 'http://www.twitter.com', - }, + producer: { name: 'Twitter', url: 'http://www.twitter.com' }, }, { regex: 'UniversalFeedParser', name: 'UniversalFeedParser', category: 'Feed Fetcher', url: 'https://github.com/kurtmckee/feedparser', - producer: { - name: 'Kurt McKee', - url: 'https://github.com/kurtmckee', - }, + producer: { name: 'Kurt McKee', url: 'https://github.com/kurtmckee' }, }, { regex: 'via secureurl\\.fwdcdn\\.com', name: 'UkrNet Mail Proxy', category: 'Crawler', url: '', - producer: { - name: 'UkrNet Ltd', - url: 'https://www.ukr.net/', - }, + producer: { name: 'UkrNet Ltd', url: 'https://www.ukr.net/' }, }, { - regex: 'Uptimebot', + regex: 'Uptime(?:bot)?/[\\d.]+', name: 'Uptimebot', category: 'Site Monitor', - url: 'https://uptime.com/uptimebot', - producer: { - name: 'Uptime', - url: 'https://uptime.com', - }, + url: 'https://uptime.com/uptime-bot', + producer: { name: 'Uptime', url: 'https://uptime.com/' }, }, { regex: 'UptimeRobot', - name: 'Uptime Robot', + name: 'UptimeRobot', category: 'Site Monitor', - url: '', - producer: { - name: 'Uptime Robot', - url: 'http://uptimerobot.com', - }, + url: 'https://uptimerobot.com/', + producer: { name: 'Uptime Robot', url: 'https://uptimerobot.com/' }, }, { regex: 'URLAppendBot', name: 'URLAppendBot', category: 'Crawler', url: 'http://www.profound.net/urlappendbot.html', - producer: { - name: 'Profound Networks', - url: 'http://www.profound.net', - }, + producer: { name: 'Profound Networks', url: 'http://www.profound.net' }, }, { regex: 'Vagabondo', name: 'Vagabondo', category: 'Crawler', url: '', - producer: { - name: 'WiseGuys', - url: 'http://www.wise-guys.nl/', - }, + producer: { name: 'WiseGuys', url: 'http://www.wise-guys.nl/' }, }, { regex: 'vkShare; ', name: 'VK Share Button', category: 'Crawler', url: 'https://dev.vk.com/en/widgets/share', - producer: { - name: 'VK', - url: 'https://vk.com/', - }, + producer: { name: 'VK', url: 'https://vk.com/' }, }, { regex: 'VKRobot', name: 'VK Robot', category: 'Crawler', url: 'https://dev.vk.com/en/', - producer: { - name: 'VK', - url: 'https://vk.com/', - }, + producer: { name: 'VK', url: 'https://vk.com/' }, }, { regex: 'VSMCrawler', name: 'Visual Site Mapper Crawler', category: 'Crawler', url: 'http://www.visualsitemapper.com/crawler', - producer: { - name: 'Alentum Software Ltd.', - url: 'http://www.alentum.com', - }, + producer: { name: 'Alentum Software Ltd.', url: 'http://www.alentum.com' }, }, { regex: 'Jigsaw', name: 'W3C CSS Validator', category: 'Validator', url: 'http://jigsaw.w3.org/css-validator', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'W3C_I18n-Checker', name: 'W3C I18N Checker', category: 'Validator', url: 'http://validator.w3.org/i18n-checker', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'W3C-checklink', name: 'W3C Link Checker', category: 'Validator', url: 'http://validator.w3.org/checklink', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'W3C_Validator|Validator\\.nu', name: 'W3C Markup Validation Service', category: 'Validator', url: 'http://validator.w3.org/services', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'W3C-mobileOK', name: 'W3C MobileOK Checker', category: 'Validator', url: 'http://validator.w3.org/mobile', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'W3C_Unicorn', name: 'W3C Unified Validator', category: 'Validator', url: 'http://validator.w3.org/unicorn', - producer: { - name: 'W3C', - url: 'http://www.w3.org', - }, + producer: { name: 'W3C', url: 'http://www.w3.org' }, }, { regex: 'P3P Validator', name: 'W3C P3P Validator', category: 'Validator', url: 'https://www.w3.org/P3P/validator.html', - producer: { - name: 'W3C', - url: 'https://www.w3.org', - }, + producer: { name: 'W3C', url: 'https://www.w3.org' }, }, { regex: 'Wappalyzer', name: 'Wappalyzer', url: 'https://github.com/AliasIO/Wappalyzer', - producer: { - name: 'AliasIO', - url: 'https://github.com/AliasIO', - }, + producer: { name: 'AliasIO', url: 'https://github.com/AliasIO' }, }, { regex: 'PTST/', @@ -2191,171 +1732,155 @@ export default [ name: 'WeSEE:Search', category: 'Search bot', url: 'http://www.wesee.com/bot', - producer: { - name: 'WeSEE Ltd', - url: 'http://www.wesee.com', - }, + producer: { name: 'WeSEE Ltd', url: 'http://www.wesee.com' }, }, { regex: 'WebbCrawler', name: 'WebbCrawler', category: 'Crawler', url: 'http://badcheese.com/crawler.html', - producer: { - name: 'Steve Webb', - url: 'http://badcheese.com', - }, + producer: { name: 'Steve Webb', url: 'http://badcheese.com' }, }, { regex: 'websitepulse[+ ]checker', name: 'WebSitePulse', category: 'Site Monitor', url: 'http://badcheese.com/crawler.html', - producer: { - name: 'WebSitePulse', - url: 'http://www.websitepulse.com/', - }, + producer: { name: 'WebSitePulse', url: 'http://www.websitepulse.com/' }, }, { regex: 'WordPress.+isitwp\\.com', name: 'IsItWP', category: 'Crawler', url: 'https://www.isitwp.com/', - producer: { - name: 'WPBeginner, LLC', - url: 'https://www.wpbeginner.com/', - }, + producer: { name: 'WPBeginner, LLC', url: 'https://www.wpbeginner.com/' }, + }, + { + regex: 'Automattic Analytics Crawler/[\\d.]+', + name: 'Automattic Analytics', + category: 'Crawler', + url: 'https://wordpress.com/crawler/', + producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' }, + }, + { + regex: 'WordPress\\.com mShots', + name: 'WordPress.com mShots', + category: 'Service Agent', + url: 'https://wordpress.org/', + producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' }, + }, + { + regex: 'wp\\.com feedbot', + name: 'wp.com feedbot', + category: 'Feed Fetcher', + url: 'https://wordpress.com/', + producer: { name: 'Automattic, Inc.', url: 'https://automattic.com/' }, }, { regex: 'WordPress', name: 'WordPress', category: 'Service Agent', url: 'https://wordpress.org/', - producer: { - name: 'Wordpress.org', - url: 'https://wordpress.org/', - }, + producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' }, }, { regex: 'Wotbox', name: 'Wotbox', category: 'Search bot', url: 'http://www.wotbox.com/bot/', - producer: { - name: 'Wotbox', - url: 'http://www.wotbox.com', - }, + producer: { name: 'Wotbox', url: 'http://www.wotbox.com' }, }, { regex: 'XenForo', name: 'XenForo', category: 'Service Agent', url: 'https://xenforo.com/', - producer: { - name: 'XenForo Ltd.', - url: 'https://xenforo.com/', - }, + producer: { name: 'XenForo Ltd.', url: 'https://xenforo.com/' }, }, { regex: 'yacybot', name: 'YaCy', category: 'Search bot', url: 'http://yacy.net/bot.html', - producer: { - name: 'YaCy', - url: 'http://yacy.net', - }, + producer: { name: 'YaCy', url: 'http://yacy.net' }, }, { regex: 'Yahoo! Slurp|Yahoo!-AdCrawler', name: 'Yahoo! Slurp', category: 'Search bot', url: 'http://help.yahoo.com/ysearch/slurp', - producer: { - name: 'Yahoo! Inc.', - url: 'http://www.yahoo.com', - }, + producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' }, }, { regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone', name: 'Yahoo! Link Preview', category: 'Crawler', url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html', - producer: { - name: 'Yahoo! Inc.', - url: 'http://www.yahoo.com', - }, + producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' }, }, { regex: 'YahooMailProxy', name: 'Yahoo! Mail Proxy', category: 'Service Agent', url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html', - producer: { - name: 'Yahoo! Inc.', - url: 'http://www.yahoo.com', - }, + producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' }, }, { regex: 'YahooCacheSystem', name: 'Yahoo! Cache System', category: 'Crawler', url: '', - producer: { - name: 'Yahoo! Inc.', - url: 'http://www.yahoo.com', - }, + producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' }, }, { regex: 'Y!J-BRW', name: 'Yahoo! Japan BRW', category: 'Crawler', url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955', - producer: { - name: 'Yahoo! Japan Corp.', - url: 'https://www.yahoo.co.jp/', - }, + producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' }, }, { regex: 'Y!J-WSC', name: 'Yahoo! Japan WSC', category: 'Crawler', url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955', - producer: { - name: 'Yahoo! Japan Corp.', - url: 'https://www.yahoo.co.jp/', - }, + producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' }, + }, + { + regex: 'Y!J-ASR', + name: 'Yahoo! Japan ASR', + category: 'Crawler', + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955', + producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' }, + }, + { + regex: '^Y!J', + name: 'Yahoo! Japan', + category: 'Crawler', + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955', + producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' }, }, { regex: - 'Yandex(?:(?:\\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher', + 'Yandex(?:(?:\\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher', name: 'Yandex Bot', category: 'Search bot', url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html', - producer: { - name: 'Yandex LLC', - url: 'https://yandex.com/company/', - }, + producer: { name: 'Yandex LLC', url: 'https://yandex.com/company/' }, }, { regex: 'Yeti|NaverJapan|AdsBot-Naver', name: 'Yeti/Naverbot', category: 'Search bot', url: 'http://help.naver.com/robots/', - producer: { - name: 'Naver', - url: 'http://www.naver.com', - }, + producer: { name: 'Naver', url: 'http://www.naver.com' }, }, { regex: 'YoudaoBot', name: 'Youdao Bot', category: 'Search bot', url: 'http://www.youdao.com/help/webmaster/spider', - producer: { - name: 'NetEase, Inc.', - url: 'http://corp.163.com', - }, + producer: { name: 'NetEase, Inc.', url: 'http://corp.163.com' }, }, { regex: 'YOURLS v[0-9]', @@ -2368,10 +1893,7 @@ export default [ name: 'Yunyun Bot', category: 'Search bot', url: 'http://www.yunyun.com/SiteInfo.php?r=about', - producer: { - name: 'YunYun', - url: 'http://www.yunyun.com', - }, + producer: { name: 'YunYun', url: 'http://www.yunyun.com' }, }, { regex: 'zgrab', @@ -2384,140 +1906,105 @@ export default [ name: 'Zookabot', category: 'Crawler', url: 'http://zookabot.com', - producer: { - name: 'Hwacha ApS', - url: 'http://hwacha.dk', - }, + producer: { name: 'Hwacha ApS', url: 'http://hwacha.dk' }, }, { regex: 'ZumBot', name: 'ZumBot', category: 'Search bot', url: 'http://help.zum.com/inquiry', - producer: { - name: 'ZUM internet', - url: 'http://www.zuminternet.com/', - }, + producer: { name: 'ZUM internet', url: 'http://www.zuminternet.com/' }, }, { regex: 'YottaaMonitor', name: 'Yottaa Site Monitor', category: 'Site Monitor', url: 'http://www.yottaa.com/products/site-monitor', - producer: { - name: 'Yottaa', - url: 'http://www.yottaa.com/', - }, + producer: { name: 'Yottaa', url: 'http://www.yottaa.com/' }, }, { regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857', name: 'Yahoo Gemini', category: 'Crawler', url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html', - producer: { - name: 'Yahoo! Inc.', - url: 'http://www.yahoo.com', - }, + producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' }, }, { regex: '.*Java.*outbrain', name: 'Outbrain', category: 'Crawler', url: '', - producer: { - name: 'Outbrain', - url: 'http://www.outbrain.com/', - }, + producer: { name: 'Outbrain', url: 'http://www.outbrain.com/' }, }, { regex: 'HubPages.*crawlingpolicy', name: 'HubPages', category: 'Crawler', url: 'https://hubpages.com/help/crawlingpolicy', - producer: { - name: 'HubPages, Inc.', - url: 'https://discover.hubpages.com/', - }, + producer: { name: 'HubPages, Inc.', url: 'https://discover.hubpages.com/' }, }, { regex: 'Pinterest(?:bot)?/[\\d.]+.*www\\.pinterest\\.com', name: 'Pinterest', url: 'https://help.pinterest.com/en/business/article/pinterest-crawler', category: 'Crawler', - producer: { - name: 'Pinterest', - url: 'https://www.pinterest.com/', - }, + producer: { name: 'Pinterest', url: 'https://www.pinterest.com/' }, }, { - regex: 'Site24x7', + regex: '.*Site24x7', name: 'Site24x7 Website Monitoring', category: 'Site Monitor', url: 'https://www.site24x7.com/site24x7-faq.html', - producer: { - name: 'Site24x7', - url: 'https://www.site24x7.com', - }, + producer: { name: 'Site24x7', url: 'https://www.site24x7.com' }, + }, + { + regex: '.* HLB/[\\d.]+', + name: 'Site24x7 Defacement Monitor', + category: 'Site Monitor', + url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor', + producer: { name: 'Site24x7', url: 'https://www.site24x7.com/' }, }, { regex: 's~snapchat-proxy', name: 'Snapchat Proxy', category: 'Crawler', url: 'https://www.snapchat.com', - producer: { - name: 'Snapchat Inc.', - url: 'https://www.snapchat.com', - }, + producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com' }, }, { regex: 'Snap URL Preview Service', name: 'Snap URL Preview Service', category: 'Service Agent', url: 'https://developers.snap.com/robots', - producer: { - name: 'Snapchat Inc.', - url: 'https://www.snapchat.com/', - }, + producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com/' }, }, { regex: 'SnapchatAds/[\\d.]+', name: 'Snapchat Ads', category: 'Crawler', url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US', - producer: { - name: 'Snapchat Inc.', - url: 'https://www.snapchat.com/', - }, + producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com/' }, }, { regex: "Let's Encrypt validation server", name: "Let's Encrypt Validation", category: 'Service Agent', url: 'https://letsencrypt.org/how-it-works/', - producer: { - name: "Let's Encrypt", - url: 'https://letsencrypt.org', - }, + producer: { name: "Let's Encrypt", url: 'https://letsencrypt.org' }, }, { regex: 'GrapeshotCrawler', name: 'Grapeshot', category: 'Crawler', url: 'https://www.grapeshot.com/crawler', - producer: { - name: 'Grapeshot', - url: 'https://www.grapeshot.com', - }, + producer: { name: 'Grapeshot', url: 'https://www.grapeshot.com' }, }, { regex: 'www\\.monitor\\.us', name: 'Monitor.Us', category: 'Site Monitor', url: 'http://www.monitor.us', - producer: { - name: 'Monitor.Us', - url: 'http://www.monitor.us', - }, + producer: { name: 'Monitor.Us', url: 'http://www.monitor.us' }, }, { regex: 'Catchpoint', @@ -2534,84 +2021,32 @@ export default [ name: 'BitlyBot', category: 'Crawler', url: 'https://bitly.com', - producer: { - name: 'Bitly, Inc.', - url: 'https://bitly.com', - }, - }, - { - regex: 'Zao/', - name: 'Zao', - category: 'Crawler', - }, - { - regex: 'lycos', - name: 'Lycos', - }, - { - regex: 'Slurp', - name: 'Inktomi Slurp', - }, - { - regex: 'Speedy Spider', - name: 'Speedy', - }, - { - regex: 'ScoutJet', - name: 'ScoutJet', - }, - { - regex: 'nrsbot|netresearch', - name: 'NetResearchServer', - }, - { - regex: 'scooter', - name: 'Scooter', - }, - { - regex: 'gigabot', - name: 'Gigabot', - }, - { - regex: 'charlotte', - name: 'Charlotte', - }, - { - regex: 'Pompos', - name: 'Pompos', - }, - { - regex: 'ichiro', - name: 'ichiro', + producer: { name: 'Bitly, Inc.', url: 'https://bitly.com' }, }, + { regex: 'Zao/', name: 'Zao', category: 'Crawler' }, + { regex: 'lycos', name: 'Lycos' }, + { regex: 'Slurp', name: 'Inktomi Slurp' }, + { regex: 'Speedy Spider', name: 'Speedy' }, + { regex: 'ScoutJet', name: 'ScoutJet' }, + { regex: 'nrsbot|netresearch', name: 'NetResearchServer' }, + { regex: 'scooter', name: 'Scooter' }, + { regex: 'gigabot', name: 'Gigabot' }, + { regex: 'charlotte', name: 'Charlotte' }, + { regex: 'Pompos', name: 'Pompos' }, + { regex: 'ichiro', name: 'ichiro' }, { regex: 'PagePeeker', name: 'PagePeeker', + category: 'Crawler', + url: 'https://pagepeeker.com/robots/', + producer: { name: 'PAGEPEEKER SRL', url: 'https://pagepeeker.com/' }, }, - { - regex: 'WebThumbnail', - name: 'WebThumbnail', - }, - { - regex: 'Willow Internet Crawler', - name: 'Willow Internet Crawler', - }, - { - regex: 'EmailWolf', - name: 'EmailWolf', - }, - { - regex: 'NetLyzer FastProbe', - name: 'NetLyzer FastProbe', - }, - { - regex: 'AdMantX.*admantx\\.com', - name: 'ADMantX', - }, - { - regex: 'Server Density Service Monitoring', - name: 'Server Density', - }, + { regex: 'WebThumbnail', name: 'WebThumbnail' }, + { regex: 'Willow Internet Crawler', name: 'Willow Internet Crawler' }, + { regex: 'EmailWolf', name: 'EmailWolf' }, + { regex: 'NetLyzer FastProbe', name: 'NetLyzer FastProbe' }, + { regex: 'AdMantX.*admantx\\.com', name: 'ADMantX' }, + { regex: 'Server Density Service Monitoring', name: 'Server Density' }, { regex: 'RSSRadio \\(Push Notification Scanner;support@dorada\\.co\\.uk\\)', name: 'RSSRadio Bot', @@ -2619,33 +2054,20 @@ export default [ { regex: '^sentry', name: 'Sentry Bot', - producer: { - name: 'Sentry', - url: 'https://sentry.io', - }, + producer: { name: 'Sentry', url: 'https://sentry.io' }, }, { regex: '^Spotify/[\\d.]+$', name: 'Spotify', - producer: { - name: 'Spotify', - url: 'https://www.spotify.com', - }, - }, - { - regex: 'The Knowledge AI', - name: 'The Knowledge AI', - category: 'Crawler', + producer: { name: 'Spotify', url: 'https://www.spotify.com' }, }, + { regex: 'The Knowledge AI', name: 'The Knowledge AI', category: 'Crawler' }, { regex: 'Embedly', name: 'Embedly', category: 'Crawler', url: 'https://support.embed.ly/hc/en-us', - producer: { - name: 'A Medium, Corp.', - url: 'https://medium.com/', - }, + producer: { name: 'A Medium, Corp.', url: 'https://medium.com/' }, }, { regex: 'BrandVerity', @@ -2662,50 +2084,28 @@ export default [ name: 'Kaspersky', category: 'Security Checker', url: 'https://www.kaspersky.com/', - producer: { - name: 'AO Kaspersky Lab', - url: 'https://www.kaspersky.com/', - }, + producer: { name: 'AO Kaspersky Lab', url: 'https://www.kaspersky.com/' }, }, { regex: 'eZ Publish Link Validator', name: 'eZ Publish Link Validator', category: 'Crawler', url: 'https://ez.no/', - producer: { - name: 'eZ Systems AS', - url: 'https://ez.no/', - }, + producer: { name: 'eZ Systems AS', url: 'https://ez.no/' }, }, { regex: 'woorankreview', name: 'WooRank', category: 'Search bot', url: 'https://www.woorank.com/', - producer: { - name: 'WooRank sprl', - url: 'https://www.woorank.com/', - }, + producer: { name: 'WooRank sprl', url: 'https://www.woorank.com/' }, }, { regex: 'by Siteimprove\\.com', name: 'Siteimprove', category: 'Search bot', url: 'https://siteimprove.com/', - producer: { - name: 'Siteimprove GmbH', - url: 'https://siteimprove.com/', - }, - }, - { - regex: 'Image size by Siteimprove\\.com', - name: 'Siteimprove', - category: 'Search bot', - url: 'https://siteimprove.com/', - producer: { - name: 'Siteimprove GmbH', - url: 'https://siteimprove.com/', - }, + producer: { name: 'Siteimprove GmbH', url: 'https://siteimprove.com/' }, }, { regex: 'CATExplorador', @@ -2722,70 +2122,49 @@ export default [ name: 'Buck', category: 'Search bot', url: 'https://hypefactors.com/', - producer: { - name: 'Hypefactors A/S', - url: 'https://hypefactors.com/', - }, + producer: { name: 'Hypefactors A/S', url: 'https://hypefactors.com/' }, }, { regex: 'tracemyfile', name: 'TraceMyFile', category: 'Search bot', url: 'https://www.tracemyfile.com/', - producer: { - name: 'Idee Inc.', - url: 'http://ideeinc.com/', - }, + producer: { name: 'Idee Inc.', url: 'http://ideeinc.com/' }, }, { regex: 'zelist\\.ro feed parser', name: 'Ze List', url: 'https://www.zelist.ro/', category: 'Feed Fetcher', - producer: { - name: 'Treeworks SRL', - url: 'https://www.tree.ro/', - }, + producer: { name: 'Treeworks SRL', url: 'https://www.tree.ro/' }, }, { regex: 'weborama-fetcher', name: 'Weborama', category: 'Search bot', url: 'https://weborama.com/', - producer: { - name: 'Weborama SA', - url: 'https://weborama.com/', - }, + producer: { name: 'Weborama SA', url: 'https://weborama.com/' }, }, { regex: 'BoardReader Favicon Fetcher', name: 'BoardReader', category: 'Search bot', url: 'https://boardreader.com/', - producer: { - name: 'Effyis Inc', - url: 'https://boardreader.com/', - }, + producer: { name: 'Effyis Inc', url: 'https://boardreader.com/' }, }, { regex: 'IDG/IT', name: 'IDG/IT', category: 'Search bot', url: 'https://spaziodati.eu/', - producer: { - name: 'SpazioDati S.r.l.', - url: 'https://spaziodati.eu/', - }, + producer: { name: 'SpazioDati S.r.l.', url: 'https://spaziodati.eu/' }, }, { regex: 'Bytespider', name: 'Bytespider', category: 'Search bot', url: 'https://bytedance.com/', - producer: { - name: 'ByteDance Ltd.', - url: 'https://bytedance.com/', - }, + producer: { name: 'ByteDance Ltd.', url: 'https://bytedance.com/' }, }, { regex: 'WikiDo', @@ -2832,10 +2211,7 @@ export default [ name: 'SMTBot', category: 'Search bot', url: 'https://www.similartech.com/smtbot', - producer: { - name: 'SimilarTech Ltd.', - url: 'https://www.similartech.com/', - }, + producer: { name: 'SimilarTech Ltd.', url: 'https://www.similartech.com/' }, }, { regex: 'LCC', @@ -2852,20 +2228,14 @@ export default [ name: 'Startpagina Linkchecker', category: 'Search bot', url: 'https://www.startpagina.nl/linkchecker', - producer: { - name: 'Startpagina B.V.', - url: 'https://www.startpagina.nl/', - }, + producer: { name: 'Startpagina B.V.', url: 'https://www.startpagina.nl/' }, }, { regex: 'MoodleBot-Linkchecker', name: 'MoodleBot Linkchecker', category: 'Search bot', url: 'hhttps://docs.moodle.org/en/Usage', - producer: { - name: 'Moodle Pty Ltd', - url: 'https://moodle.org/', - }, + producer: { name: 'Moodle Pty Ltd', url: 'https://moodle.org/' }, }, { regex: 'GTmetrix', @@ -2877,6 +2247,13 @@ export default [ url: 'https://www.carbon60.com/', }, }, + { + regex: 'CyberFind ?Crawler', + name: 'CyberFind Crawler', + category: 'Crawler', + url: 'https://www.cyberfind.net/bot.html', + producer: { name: 'Find.tf', url: 'https://find.tf/' }, + }, { regex: 'Nutch', name: 'Nutch-based Bot', @@ -2905,11 +2282,7 @@ export default [ category: 'Service bot', url: 'https://www.grammarly.com', }, - { - regex: 'Robozilla', - name: 'Robozilla', - category: 'Crawler', - }, + { regex: 'Robozilla', name: 'Robozilla', category: 'Crawler' }, { regex: 'Domains Project', name: 'Domains Project', @@ -2968,20 +2341,14 @@ export default [ name: 'Adbeat', category: 'Crawler', url: 'https://www.adbeat.com/operation_policy', - producer: { - name: 'PPC Labs LLC', - url: 'https://www.adbeat.com/', - }, + producer: { name: 'PPC Labs LLC', url: 'https://www.adbeat.com/' }, }, { - regex: 'BW/[\\d.]+', + regex: '(?:BuiltWith|BW)/[\\d.]+', name: 'BuiltWith', category: 'Crawler', url: 'https://builtwith.com/biup', - producer: { - name: 'BuiltWith Pty Ltd', - url: 'https://builtwith.com/', - }, + producer: { name: 'BuiltWith Pty Ltd', url: 'https://builtwith.com/' }, }, { regex: 'https://whatis\\.contentkingapp\\.com', @@ -2998,10 +2365,7 @@ export default [ name: 'MicroAdBot', category: 'Crawler', url: 'https://www.microad.co.jp/', - producer: { - name: 'MicroAd, Inc.', - url: 'https://www.microad.co.jp/', - }, + producer: { name: 'MicroAd, Inc.', url: 'https://www.microad.co.jp/' }, }, { regex: 'PingAdmin\\.Ru', @@ -3030,10 +2394,7 @@ export default [ name: 'parse.ly', category: 'Crawler', url: 'https://www.parse.ly/help/integration/crawler', - producer: { - name: 'Parsely, Inc.', - url: 'https://www.parse.ly/', - }, + producer: { name: 'Parsely, Inc.', url: 'https://www.parse.ly/' }, }, { regex: 'Nimbostratus-Bot', @@ -3051,17 +2412,15 @@ export default [ regex: 'Project-Resonance', name: 'Project Resonance', category: 'Crawler', - url: 'http://project-resonance.com', + url: 'https://project-resonance.com/', + producer: { name: 'RedHunt Labs Limited', url: 'https://redhuntlabs.com/' }, }, { regex: 'DataXu/[\\d.]+', name: 'DataXu', category: 'Service Agent', url: 'https://advertising.roku.com/dataxu', - producer: { - name: 'Roku, Inc.', - url: 'https://roku.com', - }, + producer: { name: 'Roku, Inc.', url: 'https://roku.com' }, }, { regex: 'Cocolyzebot', @@ -3078,20 +2437,14 @@ export default [ name: 'VeryHip', category: 'Crawler', url: 'https://veryhip.com/', - producer: { - name: 'VeryHip', - url: 'https://veryhip.com/', - }, + producer: { name: 'VeryHip', url: 'https://veryhip.com/' }, }, { regex: 'LinkpadBot', name: 'LinkpadBot', category: 'Crawler', url: 'https://www.linkpad.org/', - producer: { - name: 'Solomono LLC', - url: 'https://www.linkpad.org/', - }, + producer: { name: 'Solomono LLC', url: 'https://www.linkpad.org/' }, }, { regex: 'MuscatFerret', @@ -3114,40 +2467,28 @@ export default [ name: 'ArchiveBox', url: 'https://archivebox.io/', category: 'Crawler', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Choosito', name: 'Choosito', url: 'https://www.choosito.com/', category: 'Crawler', - producer: { - name: 'Choosito! Inc.', - url: 'https://www.choosito.com/', - }, + producer: { name: 'Choosito! Inc.', url: 'https://www.choosito.com/' }, }, { regex: 'datagnionbot', name: 'datagnionbot', url: 'https://www.datagnion.com/bot.html', category: 'Crawler', - producer: { - name: 'DATAGNION GMBH', - url: 'https://www.datagnion.com/', - }, + producer: { name: 'DATAGNION GMBH', url: 'https://www.datagnion.com/' }, }, { regex: 'WhatCMS', name: 'WhatCMS', url: 'https://whatcms.org/', category: 'Crawler', - producer: { - name: 'Nineteen Ten LLC', - url: 'https://whatcms.org/', - }, + producer: { name: 'Nineteen Ten LLC', url: 'https://whatcms.org/' }, }, { regex: 'httpx', @@ -3155,8 +2496,18 @@ export default [ url: 'https://github.com/projectdiscovery/httpx', category: 'Crawler', producer: { - name: '', - url: '', + name: 'ProjectDiscovery, Inc.', + url: 'https://projectdiscovery.io/', + }, + }, + { + regex: '.*\\.oast\\.', + name: 'Interactsh', + category: 'Security Checker', + url: 'https://github.com/projectdiscovery/interactsh', + producer: { + name: 'ProjectDiscovery, Inc.', + url: 'https://projectdiscovery.io/', }, }, { @@ -3164,10 +2515,7 @@ export default [ name: 'Expanse', category: 'Security Checker', url: 'https://expanse.co/', - producer: { - name: 'Expanse Inc.', - url: 'https://expanse.co/', - }, + producer: { name: 'Expanse Inc.', url: 'https://expanse.co/' }, }, { regex: 'HuaweiWebCatBot', @@ -3184,20 +2532,14 @@ export default [ name: 'Hatena Favicon', category: 'Crawler', url: 'https://www.hatena.ne.jp/faq/', - producer: { - name: 'Hatena Co., Ltd.', - url: 'https://www.hatena.ne.jp', - }, + producer: { name: 'Hatena Co., Ltd.', url: 'https://www.hatena.ne.jp' }, }, { regex: 'Hatena-?Bookmark', name: 'Hatena Bookmark', category: 'Crawler', url: 'https://www.hatena.ne.jp/faq/', - producer: { - name: 'Hatena Co., Ltd.', - url: 'https://www.hatena.ne.jp', - }, + producer: { name: 'Hatena Co., Ltd.', url: 'https://www.hatena.ne.jp' }, }, { regex: 'RyowlEngine/[\\d.]+', @@ -3288,10 +2630,7 @@ export default [ name: 'Cookiebot', category: 'Crawler', url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent', - producer: { - name: 'Cybot A/S', - url: 'https://www.cybot.com/', - }, + producer: { name: 'Cybot A/S', url: 'https://www.cybot.com/' }, }, { regex: 'NetSystemsResearch', @@ -3308,10 +2647,7 @@ export default [ name: 'CensysInspect', category: 'Security Checker', url: 'https://about.censys.io/', - producer: { - name: 'Censys, Inc.', - url: 'https://censys.io/', - }, + producer: { name: 'Censys, Inc.', url: 'https://censys.io/' }, }, { regex: 'gdnplus\\.com', @@ -3340,20 +2676,14 @@ export default [ name: 'MTRobot', category: 'Crawler', url: 'https://metrics-tools.de/robot.html', - producer: { - name: 'Metrics Tools', - url: 'https://metrics-tools.de/', - }, + producer: { name: 'Metrics Tools', url: 'https://metrics-tools.de/' }, }, { regex: 'serpstatbot/[\\d.]+', name: 'serpstatbot', category: 'Crawler', url: 'http://serpstatbot.com/', - producer: { - name: 'Netpeak Ltd', - url: 'https://netpeak.net/', - }, + producer: { name: 'Netpeak Ltd', url: 'https://netpeak.net/' }, }, { regex: 'colly', @@ -3378,10 +2708,7 @@ export default [ name: 'LeakIX', category: 'Security Checker', url: 'https://leakix.net/', - producer: { - name: 'BaDaaS SRL', - url: 'https://leakix.net/', - }, + producer: { name: 'BaDaaS SRL', url: 'https://leakix.net/' }, }, { regex: 'MegaIndex\\.ru/[\\d.]+', @@ -3394,10 +2721,7 @@ export default [ name: 'Seekport', category: 'Crawler', url: 'https://bot.seekport.com/', - producer: { - name: 'SISTRIX GmbH', - url: 'https://www.sistrix.de/', - }, + producer: { name: 'SISTRIX GmbH', url: 'https://www.sistrix.de/' }, }, { regex: 'seolyt/[\\d.]+', @@ -3410,30 +2734,28 @@ export default [ name: 'YaK', category: 'Crawler', url: 'https://www.linkfluence.com/', - producer: { - name: 'Linkfluence SAS', - url: 'https://www.linkfluence.com/', - }, + producer: { name: 'Linkfluence SAS', url: 'https://www.linkfluence.com/' }, }, { regex: 'KomodiaBot/[\\d.]+', name: 'KomodiaBot', category: 'Crawler', url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler', - producer: { - name: 'Komodia Inc.', - url: 'https://www.komodia.com/', - }, + producer: { name: 'Komodia Inc.', url: 'https://www.komodia.com/' }, + }, + { + regex: 'KStandBot/[\\d.]+', + name: 'KStandBot', + category: 'Crawler', + url: 'https://url-classification.io/wiki/index.php?title=URL_server_crawler', + producer: { name: 'Komodia Inc.', url: 'https://www.komodia.com/' }, }, { regex: 'Neevabot/[\\d.]+', name: 'Neevabot', category: 'Search bot', url: 'https://neeva.com/neevabot', - producer: { - name: 'Neeva Inc.', - url: 'https://neeva.com/', - }, + producer: { name: 'Neeva Inc.', url: 'https://neeva.com/' }, }, { regex: 'LinkPreview/[\\d.]+', @@ -3452,10 +2774,7 @@ export default [ name: 'RocketMonitorBot', category: 'Site Monitor', url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html', - producer: { - name: 'Radio Mast, Inc.', - url: 'https://www.radiomast.io/', - }, + producer: { name: 'Radio Mast, Inc.', url: 'https://www.radiomast.io/' }, }, { regex: 'SitemapParser-VIPnytt/[\\d.]+', @@ -3475,11 +2794,7 @@ export default [ category: 'Site Monitor', url: 'https://www.dotcom-monitor.com', }, - { - regex: 'ThinkChaos/', - name: 'ThinkChaos', - category: 'Crawler', - }, + { regex: 'ThinkChaos/', name: 'ThinkChaos', category: 'Crawler' }, { regex: 'DataForSeoBot', name: 'DataForSeoBot', @@ -3519,10 +2834,7 @@ export default [ name: 'IONOS Crawler', category: 'Crawler', url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/', - producer: { - name: 'IONOS SE', - url: 'https://www.ionos.de/', - }, + producer: { name: 'IONOS SE', url: 'https://www.ionos.de/' }, }, { regex: 'Crawldad', @@ -3577,33 +2889,24 @@ export default [ name: 'Onalytica', category: 'Crawler', url: 'https://www.airslate.com/bot/explore/onalytica-bot', - producer: { - name: 'airSlate, Inc.', - url: 'https://www.airslate.com/', - }, + producer: { name: 'airSlate, Inc.', url: 'https://www.airslate.com/' }, }, { regex: 'deepnoc', name: 'deepnoc', category: 'Crawler', url: 'https://deepnoc.com/bot', - producer: { - name: 'deepnoc, GmbH', - url: 'https://deepnoc.com/', - }, + producer: { name: 'deepnoc, GmbH', url: 'https://deepnoc.com/' }, }, { regex: 'Newslitbot/[\\d.]+', name: 'Newslitbot', category: 'Crawler', url: 'https://www.newslit.co/', - producer: { - name: 'Newslit, LLC.', - url: 'https://www.newslit.co/', - }, + producer: { name: 'Newslit, LLC.', url: 'https://www.newslit.co/' }, }, { - regex: 'um-LN/[\\d.]+', + regex: 'um-(?:ANS|CC|FC|IC|LN)/[\\d.]+', name: 'uMBot', category: 'Crawler', url: 'https://www.ubermetrics-technologies.com/', @@ -3623,10 +2926,7 @@ export default [ name: 'Infegy', category: 'Crawler', url: 'https://infegy.com/', - producer: { - name: 'Infegy, Inc.', - url: 'https://infegy.com/', - }, + producer: { name: 'Infegy, Inc.', url: 'https://infegy.com/' }, }, { regex: 'HTTP Banner Detection \\(https://security\\.ipip\\.net\\)', @@ -3653,20 +2953,14 @@ export default [ name: 'WebPros', category: 'Crawler', url: 'https://webpros.com/', - producer: { - name: 'WebPros Holdco B.V.', - url: 'https://webpros.com/', - }, + producer: { name: 'WebPros Holdco B.V.', url: 'https://webpros.com/' }, }, { regex: 'ELB-HealthChecker', name: 'Amazon ELB', category: 'Site Monitor', url: 'https://aws.amazon.com/elasticloadbalancing/', - producer: { - name: 'Amazon.com, Inc.', - url: 'https://www.amazon.com/', - }, + producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' }, }, { regex: 'Wheregoes\\.com Redirect Checker/[\\d.]+', @@ -3721,30 +3015,21 @@ export default [ name: 'adstxtlab.com', category: 'Crawler', url: 'https://adstxtlab.com/validator.php', - producer: { - name: 'Jaohawi AB', - url: 'https://adstxtlab.com/', - }, + producer: { name: 'Jaohawi AB', url: 'https://adstxtlab.com/' }, }, { regex: 'Iframely/[\\d.]+', name: 'Iframely', category: 'Crawler', url: 'https://iframely.com/', - producer: { - name: 'Itteco Software, Corp.', - url: 'https://iframely.com/', - }, + producer: { name: 'Itteco Software, Corp.', url: 'https://iframely.com/' }, }, { regex: 'DomainStatsBot/[\\d.]+', name: 'DomainStatsBot', category: 'Crawler', url: 'https://domainstats.com/pages/our-bot', - producer: { - name: 'Domainstats Ltd', - url: 'https://domainstats.com/', - }, + producer: { name: 'Domainstats Ltd', url: 'https://domainstats.com/' }, }, { regex: 'aiHitBot/[\\d.]+', @@ -3758,16 +3043,8 @@ export default [ category: 'Crawler', url: 'https://domaincrawler.com/about-us/', }, - { - regex: 'DNSResearchBot', - name: 'DNSResearchBot', - category: 'Crawler', - }, - { - regex: 'GitCrawlerBot', - name: 'GitCrawlerBot', - category: 'Crawler', - }, + { regex: 'DNSResearchBot', name: 'DNSResearchBot', category: 'Crawler' }, + { regex: 'GitCrawlerBot', name: 'GitCrawlerBot', category: 'Crawler' }, { regex: 'AdAuth/[\\d.]+', name: 'AdAuth', @@ -3797,10 +3074,7 @@ export default [ name: 'PayPal IPN', category: 'Service Agent', url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/', - producer: { - name: 'PayPal, Inc.', - url: 'https://www.paypal.com/', - }, + producer: { name: 'PayPal, Inc.', url: 'https://www.paypal.com/' }, }, { regex: 'MaCoCu', @@ -3812,6 +3086,16 @@ export default [ url: 'https://www.ijs.si/ijsw/JSI', }, }, + { + regex: 'CLASSLA', + name: 'CLASSLA-web', + category: 'Crawler', + url: 'https://www.clarin.si/info/classla-web-crawler/', + producer: { + name: 'Jožef Stefan Institute', + url: 'https://www.ijs.si/ijsw/JSI', + }, + }, { regex: 'dnt-policy@eff\\.org', name: 'EFF Do Not Track Verifier', @@ -3827,10 +3111,7 @@ export default [ name: 'InfoTigerBot', category: 'Crawler', url: 'https://infotiger.com/bot', - producer: { - name: 'Infotiger UG', - url: 'https://infotiger.com/', - }, + producer: { name: 'Infotiger UG', url: 'https://infotiger.com/' }, }, { regex: '(?:Birdcrawlerbot|CrawlaDeBot)', @@ -3857,10 +3138,7 @@ export default [ name: 'Zaldamo', category: 'Crawler', url: 'https://www.zaldamo.com/search.html', - producer: { - name: 'Zaldamo, LLC.', - url: 'https://www.zaldamo.com/', - }, + producer: { name: 'Zaldamo, LLC.', url: 'https://www.zaldamo.com/' }, }, { regex: 'AFB/[\\d.]+', @@ -3879,10 +3157,7 @@ export default [ name: 'LinkWalker', category: 'Crawler', url: 'https://www.phishlabs.com/', - producer: { - name: 'PhishLabs, Inc.', - url: 'https://www.phishlabs.com/', - }, + producer: { name: 'PhishLabs, Inc.', url: 'https://www.phishlabs.com/' }, }, { regex: 'RenovateBot/[\\d.]+', @@ -3905,30 +3180,14 @@ export default [ name: 'NETZZAPPEN', category: 'Crawler', url: 'https://www.netzzappen.com/', - producer: { - name: 'Marc Huemer', - url: 'https://www.netzzappen.com/', - }, - }, - { - regex: 'SerpReputationManagementAgent/[\\d.]+', - name: 'SEMrush Reputation Management', - category: 'Service Agent', - url: 'https://www.semrush.com/bot/', - producer: { - name: 'SEMrush', - url: 'https://www.semrush.com/', - }, + producer: { name: 'Marc Huemer', url: 'https://www.netzzappen.com/' }, }, { regex: 'panscient\\.com', name: 'Panscient', category: 'Crawler', url: 'https://www.panscient.com/faq.htm', - producer: { - name: 'Panscient, Inc.', - url: 'https://www.panscient.com/', - }, + producer: { name: 'Panscient, Inc.', url: 'https://www.panscient.com/' }, }, { regex: 'research@pdrlabs\\.net', @@ -3945,110 +3204,77 @@ export default [ name: 'NiceCrawler', category: 'Crawler', url: 'https://www.nicecrawler.com/', - producer: { - name: 'Intelium Corp.', - url: 'https://www.intelium.com/', - }, + producer: { name: 'Intelium Corp.', url: 'https://www.intelium.com/' }, }, { regex: 't3versionsBot/[\\d.]+', name: 't3versions', category: 'Crawler', url: 'https://www.t3versions.com/bot', - producer: { - name: 'Torben Hansen', - url: 'https://www.t3versions.com/', - }, + producer: { name: 'Torben Hansen', url: 'https://www.t3versions.com/' }, }, { regex: 'Crawlson/[\\d.]+', name: 'Crawlson', category: 'Crawler', url: 'https://www.crawlson.com/about', - producer: { - name: 'Crawlson', - url: 'https://www.crawlson.com/', - }, + producer: { name: 'Crawlson', url: 'https://www.crawlson.com/' }, }, { regex: 'tchelebi/[\\d.]+', name: 'tchelebi', category: 'Crawler', url: 'https://tchelebi.io/', - producer: { - name: 'NormShield, Inc.', - url: 'https://blackkite.com/', - }, + producer: { name: 'NormShield, Inc.', url: 'https://blackkite.com/' }, }, { regex: 'JobboerseBot', name: 'JobboerseBot', category: 'Crawler', url: 'https://www.xing.com/jobs', - producer: { - name: 'New Work SE', - url: 'https://www.xing.com/', - }, + producer: { name: 'New Work SE', url: 'https://www.xing.com/' }, }, { - regex: 'RepoLookoutBot/[\\d.]+', + regex: 'RepoLookoutBot/v?[\\d.]+', name: 'Repo Lookout', category: 'Security Checker', url: 'https://www.repo-lookout.org/', - producer: { - name: 'Crissy Field GmbH', - url: 'https://www.crissyfield.de/', - }, + producer: { name: 'Crissy Field GmbH', url: 'https://www.crissyfield.de/' }, }, { regex: 'PATHspider', name: 'PATHspider', category: 'Security Checker', url: 'https://pathspider.net/', - producer: { - name: 'MAMI Project', - url: 'https://mami-project.eu/', - }, + producer: { name: 'MAMI Project', url: 'https://mami-project.eu/' }, }, { regex: 'everyfeed-spider/[\\d.]+', name: 'Everyfeed', url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/', category: 'Feed Fetcher', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Exchange check', name: 'Exchange check', category: 'Security Checker', url: 'https://github.com/GossiTheDog/scanning', - producer: { - name: 'Kevin Beaumont', - url: 'https://doublepulsar.com/', - }, + producer: { name: 'Kevin Beaumont', url: 'https://doublepulsar.com/' }, }, { regex: 'Sublinq', name: 'Sublinq', category: 'Crawler', url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'Gregarius/[\\d.]+', name: 'Gregarius', category: 'Feed Fetcher', url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/', - producer: { - name: '', - url: '', - }, + producer: { name: '', url: '' }, }, { regex: 'COMODO DCV', @@ -4061,14 +3287,11 @@ export default [ }, }, { - regex: 'Sectigo DCV', + regex: 'Sectigo DCV|acme\\.sectigo\\.com', name: 'Sectigo DCV', category: 'Service Agent', url: 'https://sectigo.com/', - producer: { - name: 'Sectigo Limited', - url: 'https://sectigo.com/', - }, + producer: { name: 'Sectigo Limited', url: 'https://sectigo.com/' }, }, { regex: @@ -4076,40 +3299,28 @@ export default [ name: 'KlarnaBot', category: 'Crawler', url: 'https://docs.klarna.com/klarna-bot/', - producer: { - name: 'Klarna Bank AB', - url: 'https://www.klarna.com/', - }, + producer: { name: 'Klarna Bank AB', url: 'https://www.klarna.com/' }, }, { regex: 'Taboolabot/[\\d.]+', name: 'Taboolabot', category: 'Crawler', url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler', - producer: { - name: 'Taboola, Inc.', - url: 'https://www.taboola.com/', - }, + producer: { name: 'Taboola, Inc.', url: 'https://www.taboola.com/' }, }, { regex: 'Asana/[\\d.]+', name: 'Asana', category: 'Crawler', url: 'https://asana.com/', - producer: { - name: 'Asana, Inc.', - url: 'https://asana.com/', - }, + producer: { name: 'Asana, Inc.', url: 'https://asana.com/' }, }, { regex: 'Chrome Privacy Preserving Prefetch Proxy', name: 'Chrome Privacy Preserving Prefetch Proxy', category: 'Service Agent', url: 'https://developer.chrome.com/blog/private-prefetch-proxy/', - producer: { - name: 'Google Inc.', - url: 'https://www.google.com/', - }, + producer: { name: 'Google Inc.', url: 'https://www.google.com/' }, }, { regex: 'URLinspectorBot/[\\d.]+', @@ -4126,30 +3337,21 @@ export default [ name: 'Entfer', category: 'Crawler', url: 'https://entfer.com/', - producer: { - name: 'Entfer Ltd.', - url: 'https://entfer.com/', - }, + producer: { name: 'Entfer Ltd.', url: 'https://entfer.com/' }, }, { regex: 'TagInspector/[\\d.]+', name: 'Tag Inspector', category: 'Crawler', url: 'https://taginspector.com/', - producer: { - name: 'InfoTrust, LLC', - url: 'https://infotrust.com/', - }, + producer: { name: 'InfoTrust, LLC', url: 'https://infotrust.com/' }, }, { regex: 'pageburst', name: 'Pageburst', category: 'Crawler', url: 'https://pageburstls.elsevier.com/', - producer: { - name: 'Elsevier Ltd', - url: 'https://www.elsevier.com/', - }, + producer: { name: 'Elsevier Ltd', url: 'https://www.elsevier.com/' }, }, { regex: '.+diffbot', @@ -4166,20 +3368,14 @@ export default [ name: 'Disqus', category: 'Crawler', url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide', - producer: { - name: 'Disqus, Inc.', - url: 'https://disqus.com/', - }, + producer: { name: 'Disqus, Inc.', url: 'https://disqus.com/' }, }, { regex: 'startmebot/[\\d.]+', name: 'start.me', category: 'Crawler', url: 'https://about.start.me/', - producer: { - name: 'start.me BV', - url: 'https://about.start.me/', - }, + producer: { name: 'start.me BV', url: 'https://about.start.me/' }, }, { regex: '2ip bot/[\\d.]+', @@ -4198,10 +3394,7 @@ export default [ name: 'XoviBot', category: 'Crawler', url: 'https://www.xovibot.net', - producer: { - name: 'Xovi GmbH', - url: 'http://www.xovi.de', - }, + producer: { name: 'Xovi GmbH', url: 'http://www.xovi.de' }, }, { regex: 'Overcast/[\\d.]+ Podcast Sync', @@ -4272,10 +3465,7 @@ export default [ name: 'Morningscore Bot', category: 'Crawler', url: 'https://morningscore.io/', - producer: { - name: 'Morningscore', - url: 'https://morningscore.io/', - }, + producer: { name: 'Morningscore', url: 'https://morningscore.io/' }, }, { regex: 'Uptime-Kuma/[\\d.]+', @@ -4284,14 +3474,25 @@ export default [ url: 'https://github.com/louislam/uptime-kuma', }, { - regex: 'ChatGPT-User', - name: 'ChatGPT', + regex: 'OAI-SearchBot', + name: 'OAI-SearchBot', category: 'Crawler', - url: 'https://platform.openai.com/docs/plugins/bot', - producer: { - name: 'OpenAI OpCo, LLC', - url: 'https://openai.com/', - }, + url: 'https://platform.openai.com/docs/bots', + producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' }, + }, + { + regex: 'GPTBot/[\\d.]+', + name: 'GPTBot', + category: 'Crawler', + url: 'https://platform.openai.com/docs/bots', + producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' }, + }, + { + regex: 'ChatGPT-User', + name: 'ChatGPT-User', + category: 'Crawler', + url: 'https://platform.openai.com/docs/bots', + producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' }, }, { regex: 'BrightEdge Crawler/[\\d.]+', @@ -4314,30 +3515,7 @@ export default [ name: 'Cyberscan', category: 'Security Checker', url: 'https://www.cyberscan.io/', - producer: { - name: 'DGC Verwaltungs GmbH', - url: 'https://dgc.org/', - }, - }, - { - regex: 'deepcrawl\\.com', - name: 'Lumar', - category: 'Crawler', - url: 'https://deepcrawl.com/bot', - producer: { - name: 'Lumar', - url: 'https://www.lumar.io/', - }, - }, - { - regex: 'RepoLookoutBot', - name: 'Repo Lookout', - category: 'Crawler', - url: 'https://www.repo-lookout.org/', - producer: { - name: 'Crissy Field GmbH', - url: 'https://www.crissyfield.de/', - }, + producer: { name: 'DGC Verwaltungs GmbH', url: 'https://dgc.org/' }, }, { regex: 'researchscan\\.comsys\\.rwth-aachen\\.de', @@ -4354,40 +3532,21 @@ export default [ name: 'Scraping Robot', category: 'Crawler', url: 'https://scrapingrobot.com/', - producer: { - name: 'Sprious LLC', - url: 'https://sprious.com/', - }, - }, - { - regex: 'GPTBot/[\\d.]+', - name: 'GPTBot', - category: 'Crawler', - url: 'https://platform.openai.com/docs/gptbot', - producer: { - name: 'OpenAI OpCo, LLC', - url: 'https://openai.com/', - }, + producer: { name: 'Sprious LLC', url: 'https://sprious.com/' }, }, { regex: 'Ant(?:\\.com beta|Bot)(?:/([\\d+.]+))?', name: 'Ant', category: 'Crawler', url: 'https://www.ant.com/', - producer: { - name: 'Ant.com Ltd.', - url: 'https://www.ant.com/', - }, + producer: { name: 'Ant.com Ltd.', url: 'https://www.ant.com/' }, }, { regex: 'WebwikiBot/[\\d.]+', name: 'Webwiki', category: 'Crawler', url: 'https://www.webwiki.com/', - producer: { - name: 'webwiki GmbH', - url: 'https://www.webwiki.com/', - }, + producer: { name: 'webwiki GmbH', url: 'https://www.webwiki.com/' }, }, { regex: 'phpMyAdmin', @@ -4400,30 +3559,21 @@ export default [ name: 'Matomo', category: 'Service Agent', url: 'https://github.com/matomo-org/matomo', - producer: { - name: 'InnoCraft Ltd', - url: 'https://matomo.org/', - }, + producer: { name: 'InnoCraft Ltd', url: 'https://matomo.org/' }, }, { regex: 'Prometheus/[\\d.]+', name: 'Prometheus', category: 'Service Agent', url: 'https://github.com/prometheus/prometheus', - producer: { - name: 'The Linux Foundation', - url: 'https://www.cncf.io/', - }, + producer: { name: 'The Linux Foundation', url: 'https://www.cncf.io/' }, }, { regex: 'ArchiveTeam ArchiveBot', name: 'ArchiveBot', category: 'Crawler', url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot', - producer: { - name: 'ArchiveTeam', - url: 'https://wiki.archiveteam.org/', - }, + producer: { name: 'ArchiveTeam', url: 'https://wiki.archiveteam.org/' }, }, { regex: 'MADBbot/[\\d.]+', @@ -4441,7 +3591,7 @@ export default [ }, }, { - regex: '(?:Owler@ows\\.eu|OWLer)/[\\d.]+', + regex: 'owler', name: 'OWLer', category: 'Crawler', url: 'https://openwebsearch.eu/owler/', @@ -4455,20 +3605,14 @@ export default [ name: 'BBC Page Monitor', category: 'Site Monitor', url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor', - producer: { - name: 'BBC', - url: 'https://www.bbc.com/', - }, + producer: { name: 'BBC', url: 'https://www.bbc.com/' }, }, { regex: 'BBC-Forge-URL-Monitor-Twisted', name: 'BBC Forge URL Monitor', category: 'Site Monitor', url: 'https://www.bbc.com/', - producer: { - name: 'BBC', - url: 'https://www.bbc.com/', - }, + producer: { name: 'BBC', url: 'https://www.bbc.com/' }, }, { regex: 'ClaudeBot', @@ -4481,19 +3625,14 @@ export default [ name: 'ImageSift', category: 'Crawler', url: 'https://imagesift.com/', - producer: { - name: 'Castle Global, Inc.', - url: 'https://thehive.ai/', - }, + producer: { name: 'Castle Global, Inc.', url: 'https://thehive.ai/' }, }, { regex: 'TactiScout', name: 'TactiScout', category: 'Crawler', url: 'https://find-it.world/TempCrawl/Crawltheque.php', - producer: { - name: 'Tactikast', - }, + producer: { name: 'Tactikast' }, }, { regex: 'Brightbot ([\\d+.]+)', @@ -4510,20 +3649,14 @@ export default [ name: 'DaspeedBot', category: 'Crawler', url: 'https://daspeed.io/', - producer: { - name: 'DAWAP SARL', - url: 'https://dawap.fr/', - }, + producer: { name: 'DAWAP SARL', url: 'https://dawap.fr/' }, }, { regex: 'StractBot(?:/([\\d+.]+))?', name: 'Stract', category: 'Crawler', url: 'https://stract.com/webmasters', - producer: { - name: 'Stract', - url: 'https://github.com/StractOrg/stract/', - }, + producer: { name: 'Stract', url: 'https://github.com/StractOrg/stract/' }, }, { regex: 'GeedoBot(?:/([\\d+.]+))?', @@ -4531,35 +3664,32 @@ export default [ category: 'Crawler', url: 'https://geedo.com/bot/', }, + { + regex: 'GeedoProductSearch', + name: 'GeedoProductSearch', + category: 'Crawler', + url: 'https://geedo.com/product-search/', + }, { regex: 'BackupLand(?:/([\\d+.]+))?', name: 'BackupLand', category: 'Crawler', url: 'https://go.backupland.com/', - producer: { - name: 'ООО «КВАРТА»', - url: 'https://go.backupland.com/', - }, + producer: { name: 'ООО «КВАРТА»', url: 'https://go.backupland.com/' }, }, { regex: 'Konturbot(?:/([\\d+.]+))?', name: 'Konturbot', category: 'Crawler', url: 'https://kontur.ru/', - producer: { - name: 'АО «ПФ «СКБ Контур»', - url: 'https://kontur.ru/', - }, + producer: { name: 'АО «ПФ «СКБ Контур»', url: 'https://kontur.ru/' }, }, { regex: 'keys-so-bot', name: 'Keys.so', category: 'Crawler', url: 'https://www.keys.so/', - producer: { - name: 'ООО «МОДЕСКО»', - url: 'https://www.modesco.ru/', - }, + producer: { name: 'ООО «МОДЕСКО»', url: 'https://www.modesco.ru/' }, }, { regex: 'LetsearchBot(?:/([\\d+.]+))?', @@ -4588,40 +3718,28 @@ export default [ name: 'Spawning AI', category: 'Crawler', url: 'https://spawning.ai/', - producer: { - name: 'Spawning, Inc', - url: 'https://spawning.ai/', - }, + producer: { name: 'Spawning, Inc', url: 'https://spawning.ai/' }, }, { regex: 'domain research project', name: 'Domain Research Project', category: 'Crawler', url: 'https://trentwil.es/domains.html', - producer: { - name: 'Trent Wiles', - url: 'https://trentwil.es/', - }, + producer: { name: 'Trent Wiles', url: 'https://trentwil.es/' }, }, { regex: 'getodin\\.com', name: 'Odin', category: 'Security Checker', url: 'https://docs.getodin.com/', - producer: { - name: 'Cyble Inc.', - url: 'https://cyble.com/', - }, + producer: { name: 'Cyble Inc.', url: 'https://cyble.com/' }, }, { regex: 'YouBot', name: 'YouBot', category: 'Crawler', url: 'https://about.you.com/youbot/', - producer: { - name: 'SuSea, Inc.', - url: 'https://you.com/', - }, + producer: { name: 'SuSea, Inc.', url: 'https://you.com/' }, }, { regex: 'SiteScoreBot', @@ -4634,20 +3752,14 @@ export default [ name: 'Monitor Backlinks', category: 'Crawler', url: 'https://www.seoptimer.com/monitor-backlinks/', - producer: { - name: 'SEOptimer', - url: 'https://www.seoptimer.com/', - }, + producer: { name: 'SEOptimer', url: 'https://www.seoptimer.com/' }, }, { regex: 'mariadb-mysql-kbs-bot', name: 'MariaDB/MySQL Knowledge Base', category: 'Crawler', url: 'https://github.com/williamdes/mariadb-mysql-kbs', - producer: { - name: 'WDES SAS', - url: 'https://wdes.fr/en/', - }, + producer: { name: 'WDES SAS', url: 'https://wdes.fr/en/' }, }, { regex: 'GitHubCopilotChat', @@ -4655,25 +3767,31 @@ export default [ category: 'Crawler', url: 'https://github.com/aaamoon/copilot-gpt4-service', }, + { + regex: '^pdrl\\.fm', + name: 'Podroll Analyzer', + category: 'Crawler', + url: 'https://podroll.fm', + }, + { + regex: 'PodUptime/', + name: 'PodUptime', + category: 'Site Monitor', + url: 'https://poduptime.com', + }, { regex: 'anthropic-ai', name: 'Anthropic AI', category: 'Crawler', url: 'https://www.anthropic.com/', - producer: { - name: 'Anthropic, PBC', - url: 'https://www.anthropic.com/', - }, + producer: { name: 'Anthropic, PBC', url: 'https://www.anthropic.com/' }, }, { regex: 'NetpeakCheckerBot/[\\d.]+', name: 'Netpeak Checker', category: 'Crawler', url: 'https://netpeaksoftware.com/checker', - producer: { - name: 'Netpeak LTD', - url: 'https://netpeaksoftware.com/', - }, + producer: { name: 'Netpeak LTD', url: 'https://netpeaksoftware.com/' }, }, { regex: 'SandobaCrawler/[\\d.]+', @@ -4690,10 +3808,7 @@ export default [ name: 'Sirdata', category: 'Crawler', url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction', - producer: { - name: 'Sirdata SAS', - url: 'https://www.sirdata.com/', - }, + producer: { name: 'Sirdata SAS', url: 'https://www.sirdata.com/' }, }, { regex: 'CheckMarkNetwork/[\\d.]+', @@ -4710,10 +3825,7 @@ export default [ name: 'Cohere AI', category: 'Crawler', url: 'https://cohere.com/', - producer: { - name: 'Cohere, Inc.', - url: 'https://cohere.com/', - }, + producer: { name: 'Cohere, Inc.', url: 'https://cohere.com/' }, }, { regex: 'PerplexityBot/[\\d.]+', @@ -4740,10 +3852,7 @@ export default [ name: 'Montastic Monitor', category: 'Site Monitor', url: 'https://www.montastic.com/', - producer: { - name: 'Metadot, Corp.', - url: 'https://www.metadot.com/', - }, + producer: { name: 'Metadot, Corp.', url: 'https://www.metadot.com/' }, }, { regex: 'Ruby, Twurly v[\\d.]+', @@ -4761,11 +3870,7 @@ export default [ url: 'https://www.mixnode.com/', }, }, - { - regex: 'CSSCheck/[\\d.]+', - name: 'CSSCheck', - category: 'Validator', - }, + { regex: 'CSSCheck/[\\d.]+', name: 'CSSCheck', category: 'Validator' }, { regex: 'MicrosoftPreview/[\\d.]+', name: 'Microsoft Preview', @@ -4791,10 +3896,7 @@ export default [ name: 'TinEye', category: 'Crawler', url: 'https://tineye.com/', - producer: { - name: 'Idée, Inc.', - url: 'https://tineye.com/', - }, + producer: { name: 'Idée, Inc.', url: 'https://tineye.com/' }, }, { regex: 'e~arsnova-filter-system', @@ -4811,10 +3913,7 @@ export default [ name: 'Botify', category: 'Crawler', url: 'https://www.botify.com/', - producer: { - name: 'BOTIFY SAS', - url: 'https://www.botify.com/', - }, + producer: { name: 'BOTIFY SAS', url: 'https://www.botify.com/' }, }, { regex: 'adscanner', @@ -4831,10 +3930,7 @@ export default [ name: 'WebCEO', category: 'Crawler', url: 'https://www.webceo.com/', - producer: { - name: 'WebCEO, LLC', - url: 'https://www.webceo.com/', - }, + producer: { name: 'WebCEO, LLC', url: 'https://www.webceo.com/' }, }, { regex: 'NetTrack', @@ -4847,20 +3943,14 @@ export default [ name: 'htmlyse', category: 'Crawler', url: 'https://www.htmlyse.com/', - producer: { - name: 'Vistex LTD', - url: 'https://www.htmlyse.com/', - }, + producer: { name: 'Vistex LTD', url: 'https://www.htmlyse.com/' }, }, { regex: 'TrendsmapResolver/[\\d.]+', name: 'Trendsmap', category: 'Crawler', url: 'https://www.trendsmap.com/', - producer: { - name: 'Trendsmap Pty Ltd', - url: 'https://www.trendsmap.com/', - }, + producer: { name: 'Trendsmap Pty Ltd', url: 'https://www.trendsmap.com/' }, }, { regex: 'Shareaholic(?:bot)?/[\\d.]+', @@ -4873,29 +3963,713 @@ export default [ }, }, { - regex: 'keycdn-tools', + regex: 'keycdn-tools:', name: 'KeyCDN Tools', category: 'Service Agent', url: 'https://tools.keycdn.com/geo', }, + { + regex: 'keycdn-tools/', + name: 'KeyCDN Tools', + category: 'Service Agent', + url: 'https://tools.keycdn.com/', + producer: { name: 'proinity LLC', url: 'https://www.keycdn.com/' }, + }, { regex: 'Arquivo-web-crawler', name: 'Arquivo.pt', category: 'Crawler', url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/', + producer: { name: 'FCT|FCCN', url: 'https://www.fct.pt/' }, + }, + { + regex: 'WhatsMyIP\\.org', + name: 'WhatsMyIP.org', + category: 'Service Agent', + url: 'https://www.whatsmyip.org/ua/', + }, + { + regex: 'SenutoBot/[\\d.]+', + name: 'Senuto', + category: 'Crawler', + url: 'https://www.senuto.com/', + producer: { name: 'Senuto Sp. z o.o.', url: 'https://www.senuto.com/' }, + }, + { + regex: 'spaziodati', + name: 'SpazioDati', + category: 'Crawler', + url: 'https://www.spaziodati.eu/', + producer: { name: 'SpazioDati s.r.l.', url: 'https://www.spaziodati.eu/' }, + }, + { + regex: 'GozleBot', + name: 'Gozle', + category: 'Crawler', + url: 'https://gozle.com.tm/en/blog/post/1', + producer: { name: 'Doly Horjun HJ', url: 'https://gozle.com.tm/' }, + }, + { + regex: 'Quantcastbot/[\\d.]+', + name: 'Quantcast', + category: 'Crawler', + url: 'https://www.quantcast.com/bot/', + producer: { name: 'Quantcast Corp.', url: 'https://www.quantcast.com/' }, + }, + { + regex: 'FontRadar', + name: 'FontRadar', + category: 'Crawler', + url: 'https://www.fontradar.com/', + producer: { name: 'EMDASH SAS', url: 'https://www.fontradar.com/' }, + }, + { + regex: 'ViberUrlDownloader', + name: 'Viber Url Downloader', + category: 'Service Agent', + url: 'https://www.viber.com/', + producer: { name: 'Viber Media S.à r.l.', url: 'https://www.viber.com/' }, + }, + { + regex: '^Zeno$', + name: 'Zeno', + category: 'Crawler', + url: 'https://github.com/internetarchive/Zeno', + producer: { name: 'The Internet Archive', url: 'https://archive.org/' }, + }, + { + regex: 'Barracuda Sentinel', + name: 'Barracuda Sentinel', + category: 'Service Agent', + url: 'https://sentinel.barracudanetworks.com/', producer: { - name: 'FCT|FCCN', - url: 'https://www.fct.pt/', + name: 'Barracuda Networks, Inc.', + url: 'https://www.barracudanetworks.com/', }, }, + { + regex: 'RuxitSynthetic/[\\d.]+', + name: 'RuxitSynthetic', + category: 'Site Monitor', + url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164', + producer: { name: 'Dynatrace LLC', url: 'https://www.dynatrace.com/' }, + }, + { + regex: 'DynatraceSynthetic/[\\d.]+', + name: 'DynatraceSynthetic', + category: 'Site Monitor', + url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164', + producer: { name: 'Dynatrace LLC', url: 'https://www.dynatrace.com/' }, + }, + { + regex: 'sitebulb', + name: 'Sitebulb', + category: 'Crawler', + url: 'https://sitebulb.com/', + producer: { name: 'Sitebulb Limited', url: 'https://sitebulb.com/' }, + }, + { + regex: 'Monsidobot/[\\d.]+', + name: 'Monsidobot', + category: 'Crawler', + url: 'https://monsido.com/bot-html', + producer: { name: 'Monsido LLC', url: 'https://monsido.com/' }, + }, + { + regex: 'AccompanyBot', + name: 'AccompanyBot', + category: 'Crawler', + url: 'https://www.accompany.com/', + producer: { name: 'Accompani, Inc', url: 'https://www.accompany.com/' }, + }, + { + regex: 'Ghost Inspector', + name: 'Ghost Inspector', + category: 'Site Monitor', + url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site', + producer: { + name: 'Ghost Inspector, Inc.', + url: 'https://www.ghostinspector.com/', + }, + }, + { + regex: 'Cypress/[\\d.]+', + name: 'Cypress', + category: 'Site Monitor', + url: 'https://github.com/cypress-io/cypress', + producer: { name: 'Cypress.io, Inc.', url: 'https://www.cypress.io/' }, + }, + { + regex: 'Google-Apps-Script', + name: 'Google Apps Script', + category: 'Service Agent', + url: 'https://www.google.com/script/start/', + }, + { + regex: 'SiteOne-Crawler/[\\d.]+', + name: 'SiteOne Crawler', + category: 'Crawler', + url: 'https://crawler.siteone.io/bot/', + producer: { name: 'SiteOne s.r.o.', url: 'https://www.siteone.io/' }, + }, + { + regex: 'Detectify', + name: 'Detectify', + category: 'Security Checker', + url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site', + producer: { name: 'Detectify AB', url: 'https://detectify.com/' }, + }, + { + regex: 'DomCopBot', + name: 'DomCop Bot', + category: 'Crawler', + url: 'https://www.domcop.com/bot', + producer: { + name: 'Axeman Technology Solutions LLP', + url: 'https://axemantech.com/', + }, + }, + { + regex: 'Paqlebot/[\\d.]+', + name: 'Paqlebot', + category: 'Crawler', + url: 'https://www.paqle.dk/about/paqlebot', + producer: { name: 'Paqle A/S', url: 'https://www.paqle.dk/' }, + }, + { + regex: 'Wibybot', + name: 'Wibybot', + category: 'Crawler', + url: 'https://www.wiby.me/', + }, + { + regex: 'Synapse', + name: 'Synapse', + category: 'Crawler', + url: 'https://github.com/matrix-org/synapse', + }, + { + regex: 'OSZKbot/[\\d.]+', + name: 'OSZKbot', + category: 'Crawler', + url: 'http://mekosztaly.oszk.hu/mia/', + producer: { + name: 'National Szechenyi Library', + url: 'https://webarchivum.oszk.hu/', + }, + }, + { + regex: 'ZoomBot', + name: 'ZoomBot', + category: 'Crawler', + url: 'https://suite.seozoom.it/bot.html', + producer: { name: 'SEO Cube S.r.l.', url: 'https://www.seocube.it/' }, + }, + { + regex: 'RavenCrawler/[\\d.]+', + name: 'RavenCrawler', + category: 'Crawler', + url: 'https://raventools.com/site-auditor/', + producer: { name: 'TapClicks, Inc.', url: 'https://www.tapclicks.com/' }, + }, + { + regex: 'KadoBot', + name: 'KadoBot', + category: 'Crawler', + url: 'https://www.kadolijst.nl/bot', + producer: { name: 'Kadolijst', url: 'https://www.kadolijst.nl/' }, + }, + { + regex: 'Dubbotbot/[\\d.]+', + name: 'Dubbotbot', + category: 'Crawler', + url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent', + producer: { name: 'DubBot', url: 'https://dubbot.com/' }, + }, + { + regex: 'Swiftbot/[\\d.]+', + name: 'Swiftbot', + category: 'Crawler', + url: 'https://swiftype.com/swiftbot', + producer: { name: 'Elasticsearch, B.V.', url: 'https://www.elastic.co/' }, + }, + { + regex: 'EyeMonIT', + name: 'EyeMonit', + category: 'Site Monitor', + url: 'https://eyemonit.com/', + producer: { name: 'EyeMonit', url: 'https://eyemonit.com/' }, + }, + { + regex: 'ThousandEyes', + name: 'ThousandEyes', + category: 'Site Monitor', + url: 'https://www.thousandeyes.com/', + producer: { name: 'Cisco Systems, Inc.', url: 'https://www.cisco.com/' }, + }, + { regex: 'OmtrBot/[\\d.]+', name: 'OmtrBot', category: 'Site Monitor' }, + { regex: 'WebMon/[\\d.]+', name: 'WebMon', category: 'Site Monitor' }, + { + regex: 'AdsTxtCrawlerTP/[\\d.]+', + name: 'AdsTxtCrawlerTP', + category: 'Crawler', + }, + { + regex: 'fragFINN', + name: 'fragFINN', + category: 'Crawler', + url: 'https://www.fragfinn.de/', + producer: { name: 'fragFINN e.V.', url: 'https://www.fragfinn.de/' }, + }, + { + regex: 'Clickagy', + name: 'Clickagy', + category: 'Crawler', + url: 'https://www.clickagy.com/', + producer: { name: 'Clickagy, LLC', url: 'https://www.clickagy.com/' }, + }, + { + regex: 'kiwitcms-gitops/[\\d.]+', + name: 'Kiwi TCMS GitOps', + category: 'Service Agent', + url: 'https://kiwitcms.org', + producer: { + name: 'Open Technologies Bulgaria, Ltd.', + url: 'https://kiwitcms.org', + }, + }, + { + regex: 'webtru_crawler', + name: 'webtru', + category: 'Crawler', + url: 'https://webtru.io/', + producer: { name: 'DataSign Inc.', url: 'https://datasign.jp/' }, + }, + { + regex: 'URLSuMaBot', + name: 'URLSuMaBot', + category: 'Crawler', + url: 'https://www.urlsuma.de/', + }, + { + regex: '360JK yunjiankong', + name: '360JK', + category: 'Site Monitor', + url: 'http://jk.cloud.360.cn/', + producer: { + name: '360 Security Technology Inc.', + url: 'https://www.360.cn/', + }, + }, + { + regex: 'UCSBNetworkMeasurement', + name: 'UCSB Network Measurement', + category: 'Crawler', + url: 'https://www.it.ucsb.edu/', + producer: { + name: 'University of California, Santa Barbara', + url: 'https://www.it.ucsb.edu/', + }, + }, + { + regex: 'Plesk screenshot bot', + name: 'Plesk Screenshot Service', + category: 'Service Agent', + url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service', + producer: { + name: 'Plesk International GmbH', + url: 'https://www.plesk.com/', + }, + }, + { + regex: 'Who\\.is', + name: 'Who.is Bot', + category: 'Crawler', + url: 'https://who.is/', + }, + { + regex: 'Probely', + name: 'Probely', + category: 'Security Checker', + url: 'https://probely.com/sos/', + producer: { + name: 'Probely - Soluções de Cibersegurança, S.A.', + url: 'https://probely.com/', + }, + }, + { + regex: 'Uptimia(?:/[\\d.]+)?', + name: 'Uptimia', + category: 'Site Monitor', + url: 'https://www.uptimia.com/', + producer: { name: 'JJ Online GmbH', url: 'https://www.uptimia.com/' }, + }, + { + regex: '2GDPR/[\\d.]+', + name: '2GDPR', + category: 'Service Agent', + url: 'https://2gdpr.com/tos', + producer: { name: '2GDPR', url: 'https://2gdpr.com/' }, + }, + { + regex: 'abuse\\.xmco\\.fr', + name: 'Serenety', + category: 'Security Checker', + url: 'https://abuse.xmco.fr/', + producer: { name: 'XMCO, SASU', url: 'https://www.xmco.fr/' }, + }, + { + regex: 'CheckHost', + name: 'CheckHost', + category: 'Site Monitor', + url: 'https://check-host.net/', + producer: { name: 'CheckHost', url: 'https://check-host.net/' }, + }, + { + regex: 'LAC_IAHarvester/[\\d.]+', + name: 'LAC IA Harvester', + category: 'Crawler', + url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx', + producer: { + name: 'Library and Archives Canada', + url: 'https://library-archives.canada.ca/', + }, + }, + { + regex: 'InsytfulBot/[\\d.]+', + name: 'InsytfulBot', + category: 'Crawler', + url: 'https://www.insytful.com/', + producer: { name: 'Zengenti Limited', url: 'https://www.zengenti.com/' }, + }, + { + regex: 'statista\\.com', + name: 'Statista', + category: 'Crawler', + url: 'https://www.statista.com/', + producer: { name: 'Statista, Inc.', url: 'https://www.statista.com/' }, + }, + { + regex: 'SubstackContentFetch/[\\d.]+', + name: 'Substack Content Fetch', + category: 'Crawler', + url: 'https://substack.com/', + producer: { name: 'Substack, Inc.', url: 'https://substack.com/' }, + }, + { + regex: '^ds9', + name: 'Deep SEARCH 9', + category: 'Crawler', + url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/', + producer: { + name: 'Copyright Clearance Center, Inc.', + url: 'https://www.copyright.com/', + }, + }, + { + regex: 'LiveJournal\\.com', + name: 'LiveJournal', + url: 'https://www.livejournal.com/', + category: 'Feed Fetcher', + producer: { name: 'ООО "СИМ"', url: 'https://www.livejournal.com/' }, + }, + { + regex: 'bitdiscovery', + name: 'Tenable.asm', + category: 'Security Checker', + url: 'https://bitdiscovery.com/', + producer: { name: 'Tenable, Inc.', url: 'https://www.tenable.com/' }, + }, + { + regex: 'Castopod/[\\d.]+', + name: 'Castopod', + category: 'Crawler', + url: 'https://www.castopod.org/', + }, + { + regex: 'Elastic/Synthetics', + name: 'Elastic Synthetics', + category: 'Site Monitor', + url: 'https://github.com/elastic/synthetics', + producer: { name: 'Elasticsearch B.V.', url: 'https://www.elastic.co/' }, + }, + { + regex: 'WDG_Validator/[\\d.]+', + name: 'WDG HTML Validator', + category: 'Validator', + url: 'http://www.htmlhelp.com/tools/validator/', + }, + { + regex: 'scan@aegis.network', + name: 'Aegis', + category: 'Crawler', + url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/', + }, + { + regex: 'CrawlyProjectCrawler/[\\d.]+', + name: 'Crawly Project', + category: 'Crawler', + url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/', + }, + { + regex: 'BDFetch', + name: 'BDFetch', + category: 'Crawler', + url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/', + }, + { + regex: 'PunkMap', + name: 'Punk Map', + category: 'Security Checker', + url: 'https://github.com/openeasm/punkmap', + }, + { + regex: 'GenomeCrawlerd/[\\d.]+', + name: 'Deepfield Genome', + category: 'Crawler', + url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/', + producer: { name: 'Nokia Corporation', url: 'https://www.nokia.com/' }, + }, + { + regex: 'Gaisbot/[\\d.]+', + name: 'Gaisbot', + category: 'Crawler', + url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php', + }, + { + regex: 'FAST-WebCrawler/[\\d.]+', + name: 'AlltheWeb', + category: 'Crawler', + url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler', + }, + { + regex: 'ducks\\.party', + name: 'ducks.party', + category: 'Security Checker', + url: 'https://ducks.party/', + }, + { + regex: 'DepSpid/[\\d.]+', + name: 'DepSpid', + category: 'Crawler', + url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/', + }, + { + regex: 'Website-info\\.net', + name: 'Website-info', + category: 'Crawler', + url: 'https://website-info.net/robot', + producer: { name: 'Meins und Vogel GmbH', url: 'https://muv.com/' }, + }, + { + regex: 'RedekenBot', + name: 'RedekenBot', + category: 'Crawler', + url: 'https://www.redeken.com/en/help/bot.html', + producer: { name: 'Redeken', url: 'https://www.redeken.com/' }, + }, + { + regex: 'semaltbot', + name: 'semaltbot', + category: 'Crawler', + url: 'https://semalt.net/', + producer: { name: 'Semalt LP', url: 'https://semalt.net/' }, + }, + { + regex: 'MakeMerryBot', + name: 'MakeMerryBot', + category: 'Crawler', + url: 'https://makemerry.app/bots', + }, + { + regex: 'Timpibot', + name: 'Timpibot', + category: 'Crawler', + url: 'https://timpi.io/', + producer: { name: 'Timpi Inc.', url: 'https://timpi.io/' }, + }, + { + regex: 'Validbot', + name: 'ValidBot', + category: 'Crawler', + url: 'https://www.validbot.com/', + producer: { name: 'Jake Olefsky LLC', url: 'https://www.validbot.com/' }, + }, + { + regex: 'NPBot', + name: 'NameProtectBot', + category: 'Crawler', + url: 'https://www.cscglobal.com/cscglobal/home/', + producer: { name: 'NameProtect, Inc.', url: 'https://www.cscglobal.com/' }, + }, + { + regex: 'domaincodex\\.com', + name: 'Domain Codex', + category: 'Crawler', + url: 'https://www.domaincodex.com/', + producer: { + name: 'Erie Data Systems, LLC', + url: 'https://www.eriedatasys.com/', + }, + }, + { + regex: 'Swisscows Favicons', + name: 'Swisscows Favicons', + category: 'Crawler', + url: 'https://swisscows.com/', + producer: { name: 'Swisscows AG', url: 'https://swisscows.com/' }, + }, + { + regex: 'leak\\.info', + name: 'leak.info', + category: 'Crawler', + url: 'http://www.leak.info/', + }, + { + regex: 'workona', + name: 'Workona', + category: 'Crawler', + url: 'https://workona.com/', + producer: { name: 'Workona, Inc.', url: 'https://workona.com/' }, + }, + { + regex: 'Bloglines', + name: 'Bloglines', + category: 'Crawler', + url: 'https://web.archive.org/web/20140309033202/http://www.bloglines.com/', + producer: { name: 'Reply!, Inc.', url: 'https://www.reply.com/' }, + }, + { + regex: 'heritrix', + name: 'Heritrix', + category: 'Crawler', + url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix', + producer: { name: 'The Internet Archive', url: 'https://archive.org' }, + }, + { + regex: 'search\\.marginalia\\.nu', + name: 'Marginalia', + category: 'Crawler', + url: 'https://www.marginalia.nu/marginalia-search/for-webmasters/', + producer: { name: 'Marginalia', url: 'https://www.marginalia.nu/' }, + }, + { + regex: 'vu-server-health-scanner/[\\d.]+', + name: 'VU Server Health Scanner', + category: 'Security Checker', + url: 'https://130.37.198.75/index.html', + producer: { name: 'VU Amsterdam', url: 'https://vu.nl/en' }, + }, + { + regex: 'Functionize', + name: 'Functionize', + category: 'Crawler', + url: 'https://www.functionize.com/', + producer: { + name: 'Functionize, Inc.', + url: 'https://www.functionize.com/', + }, + }, + { + regex: 'Prerender', + name: 'Prerender', + category: 'Crawler', + url: 'https://docs.prerender.io/docs/33-overview-of-prerender-crawlers', + producer: { name: 'saas.group Inc.', url: 'https://saas.group/' }, + }, + { + regex: 'bl\\.uk_ldfc_bot', + name: 'The British Library Legal Deposit Bot', + category: 'Crawler', + url: 'https://www.bl.uk/', + producer: { name: 'The British Library', url: 'https://www.bl.uk/' }, + }, + { + regex: 'Miniature\\.io', + name: 'Miniature.io', + category: 'Service Agent', + url: 'https://miniature.io/', + producer: { name: 'LCX Ventures Ltd', url: 'https://www.lcxventures.com/' }, + }, + { + regex: 'Convertify', + name: 'Convertify', + category: 'Service Agent', + url: 'https://www.convertify.app/', + producer: { name: 'Convertify', url: 'https://www.convertify.app/' }, + }, + { + regex: 'ZoteroTranslationServer', + name: 'Zotero Translation Server', + category: 'Service Agent', + url: 'https://github.com/wikimedia/mediawiki-services-zotero', + producer: { + name: 'The Wikimedia Foundation, Inc.', + url: 'https://www.wikimedia.org/', + }, + }, + { + regex: 'MuckRack', + name: 'MuckRack', + category: 'Crawler', + url: 'https://muckrack.com/', + producer: { name: 'Muck Rack, LLC', url: 'https://muckrack.com/' }, + }, + { + regex: 'Golfe', + name: 'Golfe', + category: 'Crawler', + url: 'http://www.goo-olfe.ae/bot.html', + }, + { + regex: 'SpiderLing', + name: 'SpiderLing', + category: 'Crawler', + url: 'https://nlp.fi.muni.cz/projects/biwec/', + producer: { + name: 'Natural Language Processing Centre', + url: 'https://nlp.fi.muni.cz/', + }, + }, + { + regex: 'Bravebot', + name: 'Bravebot', + category: 'Search bot', + url: 'https://search.brave.com/help/brave-search-crawler', + producer: { name: 'Brave Software, Inc.', url: 'https://brave.com/' }, + }, + { + regex: '1001FirmsBot', + name: '1001FirmsBot', + category: 'Crawler', + url: 'https://www.1001firms.com/1001firmsbot.php', + }, + { + regex: 'SteamChatURLLookup', + name: 'Steam Chat URL Lookup', + category: 'Service Agent', + url: 'https://help.steampowered.com/en/faqs/view/595C-42F4-3B66-E02F', + producer: { + name: 'Valve Corporation', + url: 'https://www.valvesoftware.com/', + }, + }, + { + regex: 'ohdear\\.app', + name: 'Oh Dear', + category: 'Site Monitor', + url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs', + producer: { name: 'Immutable, SNC', url: 'https://ohdear.app/' }, + }, { regex: - 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|tweetedtimes\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|kirkland-signature|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$', + 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$', name: 'Generic Bot', }, { regex: - '[a-z0-9_-]*(?:(?