4764 lines
132 KiB
TypeScript
4764 lines
132 KiB
TypeScript
// This file is generated by the script get-bots.ts
|
||
|
||
// The data is fetch from device-detector https://raw.githubusercontent.com/matomo-org/device-detector/master/regexes/bots.yml
|
||
|
||
const bots = [
|
||
{
|
||
regex: 'WireReaderBot(?:/([\\d+.]+))?',
|
||
name: 'WireReaderBot',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://wirereader.app/',
|
||
},
|
||
{
|
||
regex: 'monitoring360bot',
|
||
name: '360 Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.360monitoring.io',
|
||
producer: {
|
||
name: 'Plesk International GmbH',
|
||
url: 'https://www.plesk.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Cloudflare-Healthchecks',
|
||
name: 'Cloudflare Health Checks',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.cloudflare.com/health-checks/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: '360Spider',
|
||
name: '360Spider',
|
||
category: 'Search bot',
|
||
url: 'https://www.so.com/help/help_3_2.html',
|
||
producer: { name: 'Online Media Group, Inc.', url: '' },
|
||
},
|
||
{
|
||
regex: 'Aboundex',
|
||
name: 'Aboundexbot',
|
||
category: 'Search bot',
|
||
url: 'http://www.aboundex.com/crawler/',
|
||
producer: { name: 'Aboundex.com', url: 'http://www.aboundex.com' },
|
||
},
|
||
{
|
||
regex: 'AcoonBot',
|
||
name: 'Acoon',
|
||
category: 'Search bot',
|
||
url: 'http://www.acoon.de/robot.asp',
|
||
producer: { name: 'Acoon GmbH', url: 'http://www.acoon.de' },
|
||
},
|
||
{
|
||
regex: 'AddThis\\.com',
|
||
name: 'AddThis.com',
|
||
category: 'Social Media Agent',
|
||
url: '',
|
||
producer: {
|
||
name: 'Clearspring Technologies, Inc.',
|
||
url: 'http://www.clearspring.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'AhrefsBot',
|
||
name: 'aHrefs Bot',
|
||
category: 'Crawler',
|
||
url: 'https://ahrefs.com/robot',
|
||
producer: { name: 'Ahrefs Pte Ltd', url: 'https://ahrefs.com/robot' },
|
||
},
|
||
{
|
||
regex: 'AhrefsSiteAudit/[\\d.]+',
|
||
name: 'AhrefsSiteAudit',
|
||
category: 'Site Monitor',
|
||
url: 'https://ahrefs.com/robot/site-audit',
|
||
producer: { name: 'Ahrefs Pte Ltd', url: 'https://ahrefs.com/' },
|
||
},
|
||
{
|
||
regex: 'ia_archiver|alexabot|verifybot',
|
||
name: 'Alexa Crawler',
|
||
category: 'Search bot',
|
||
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers',
|
||
producer: { name: 'Alexa Internet', url: 'https://www.alexa.com' },
|
||
},
|
||
{
|
||
regex: 'alexa site audit',
|
||
name: 'Alexa Site Audit',
|
||
category: 'Site Monitor',
|
||
url: 'https://support.alexa.com/hc/en-us/articles/200450194',
|
||
producer: { name: 'Alexa Internet', url: 'https://www.alexa.com' },
|
||
},
|
||
{
|
||
regex: 'Amazonbot/[\\d.]+',
|
||
name: 'Amazon Bot',
|
||
category: 'Crawler',
|
||
url: 'https://developer.amazon.com/support/amazonbot',
|
||
producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' },
|
||
},
|
||
{
|
||
regex: 'AmazonAdBot/[\\d.]+',
|
||
name: 'Amazon AdBot',
|
||
category: 'Crawler',
|
||
url: 'https://adbot.amazon.com/',
|
||
producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' },
|
||
},
|
||
{
|
||
regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service',
|
||
name: 'Amazon Route53 Health Check',
|
||
category: 'Service Agent',
|
||
producer: { name: 'Amazon Web Services', url: 'https://aws.amazon.com/' },
|
||
},
|
||
{
|
||
regex: 'AmorankSpider',
|
||
name: 'Amorank Spider',
|
||
category: 'Crawler',
|
||
url: 'http://amorank.com/webcrawler.html',
|
||
producer: { name: 'Amorank', url: 'http://www.amorank.com' },
|
||
},
|
||
{
|
||
regex: 'ApacheBench',
|
||
name: 'ApacheBench',
|
||
category: 'Benchmark',
|
||
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html',
|
||
producer: {
|
||
name: 'The Apache Software Foundation',
|
||
url: 'https://www.apache.org/foundation/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Applebot',
|
||
name: 'Applebot',
|
||
category: 'Crawler',
|
||
url: 'https://support.apple.com/en-us/119829',
|
||
producer: { name: 'Apple Inc', url: 'https://www.apple.com/' },
|
||
},
|
||
{
|
||
regex: 'iTMS',
|
||
name: 'iTMS',
|
||
category: 'Crawler',
|
||
url: 'https://support.apple.com/en-us/119829',
|
||
producer: { name: 'Apple Inc', url: 'https://www.apple.com/' },
|
||
},
|
||
{
|
||
regex: 'AppSignalBot',
|
||
name: 'AppSignalBot',
|
||
category: 'Site Monitor',
|
||
url: 'https://docs.appsignal.com/uptime-monitoring/',
|
||
producer: { name: 'AppSignal', url: 'https://appsignal.com/' },
|
||
},
|
||
{
|
||
regex: 'Arachni',
|
||
name: 'Arachni',
|
||
category: 'Security Checker',
|
||
url: 'https://www.arachni-scanner.com/',
|
||
producer: { name: 'Sarosys LLC', url: 'https://www.sarosys.com/' },
|
||
},
|
||
{
|
||
regex: 'AspiegelBot',
|
||
name: 'AspiegelBot',
|
||
category: 'Crawler',
|
||
url: 'https://aspiegel.com/',
|
||
producer: { name: 'Huawei', url: 'https://www.huawei.com/' },
|
||
},
|
||
{
|
||
regex: 'Castro 2, Episode Duration Lookup',
|
||
name: 'Castro 2',
|
||
category: 'Service Agent',
|
||
url: 'http://supertop.co/castro/',
|
||
producer: { name: 'Supertop', url: 'http://supertop.co' },
|
||
},
|
||
{
|
||
regex: 'Curious George',
|
||
name: 'Analytics SEO Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.analyticsseo.com/crawler',
|
||
producer: { name: 'Analytics SEO', url: 'http://www.analyticsseo.com' },
|
||
},
|
||
{
|
||
regex: 'archive\\.org_bot|special_archiver',
|
||
name: 'archive.org bot',
|
||
category: 'Crawler',
|
||
url: 'https://archive.org/details/archive.org_bot',
|
||
producer: { name: 'The Internet Archive', url: 'https://archive.org' },
|
||
},
|
||
{
|
||
regex: 'Ask Jeeves/Teoma',
|
||
name: 'Ask Jeeves',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: { name: 'Ask Jeeves Inc.', url: 'http://www.ask.com' },
|
||
},
|
||
{
|
||
regex: 'Backlink-Check\\.de',
|
||
name: 'Backlink-Check.de',
|
||
category: 'Crawler',
|
||
url: 'http://www.backlink-check.de/bot.html',
|
||
producer: {
|
||
name: 'Mediagreen Medienservice',
|
||
url: 'http://www.backlink-check.de',
|
||
},
|
||
},
|
||
{
|
||
regex: 'BacklinkCrawler',
|
||
name: 'BacklinkCrawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.backlinktest.com/crawler.html',
|
||
producer: { name: '2.0Promotion GbR', url: 'http://www.backlinktest.com' },
|
||
},
|
||
{
|
||
regex: 'Baidu.*spider|baidu Transcoder',
|
||
name: 'Baidu Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.baidu.com/search/spider.htm',
|
||
producer: { name: 'Baidu', url: 'http://www.baidu.com' },
|
||
},
|
||
{
|
||
regex: 'BazQux',
|
||
name: 'BazQux Reader',
|
||
url: 'https://bazqux.com/fetcher',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Better Uptime Bot',
|
||
name: 'Better Uptime Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://betteruptime.com/faq',
|
||
producer: { name: 'Better Uptime', url: 'https://betteruptime.com/' },
|
||
},
|
||
{
|
||
regex:
|
||
'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot',
|
||
name: 'BingBot',
|
||
category: 'Search bot',
|
||
url: 'http://search.msn.com/msnbot.htmn',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'http://www.microsoft.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Blackbox Exporter',
|
||
name: 'Blackbox Exporter',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/prometheus/blackbox_exporter',
|
||
producer: { name: 'Prometheus', url: 'https://prometheus.io/' },
|
||
},
|
||
{
|
||
regex: 'Blekkobot',
|
||
name: 'Blekkobot',
|
||
category: 'Search bot',
|
||
url: 'http://blekko.com/about/blekkobot',
|
||
producer: { name: 'Blekko', url: 'http://blekko.com' },
|
||
},
|
||
{
|
||
regex: 'BLEXBot',
|
||
name: 'BLEXBot Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://webmeup-crawler.com',
|
||
producer: { name: 'WebMeUp', url: 'http://webmeup.com' },
|
||
},
|
||
{
|
||
regex: 'Bloglovin',
|
||
name: 'Bloglovin',
|
||
url: 'http://www.bloglovin.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Blogtrottr',
|
||
name: 'Blogtrottr',
|
||
url: '',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: 'Blogtrottr Ltd', url: 'https://blogtrottr.com/' },
|
||
},
|
||
{
|
||
regex: 'BoardReader Blog Indexer',
|
||
name: 'BoardReader Blog Indexer',
|
||
category: 'Crawler',
|
||
producer: { name: 'BoardReader', url: 'https://boardreader.com/' },
|
||
},
|
||
{
|
||
regex: 'BountiiBot',
|
||
name: 'Bountii Bot',
|
||
category: 'Search bot',
|
||
url: 'http://bountii.com/contact.php',
|
||
producer: { name: 'Bountii Inc.', url: 'http://bountii.com' },
|
||
},
|
||
{
|
||
regex: 'Browsershots',
|
||
name: 'Browsershots',
|
||
category: 'Service Agent',
|
||
url: 'http://browsershots.org/faq',
|
||
producer: { name: 'Browsershots.org', url: 'http://browsershots.org' },
|
||
},
|
||
{
|
||
regex: 'BUbiNG',
|
||
name: 'BUbiNG',
|
||
category: 'Crawler',
|
||
url: 'http://law.di.unimi.it/BUbiNG.html',
|
||
producer: {
|
||
name: 'The Laboratory for Web Algorithmics (LAW)',
|
||
url: 'http://law.di.unimi.it/software.php#buging',
|
||
},
|
||
},
|
||
{
|
||
regex: '(?<!HTC)[ _]Butterfly/',
|
||
name: 'Butterfly Robot',
|
||
category: 'Search bot',
|
||
url: 'http://labs.topsy.com/butterfly',
|
||
producer: { name: 'Topsy Labs', url: 'http://labs.topsy.com' },
|
||
},
|
||
{
|
||
regex: 'CareerBot',
|
||
name: 'CareerBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.career-x.de/bot.html',
|
||
producer: { name: 'career-x GmbH', url: 'http://www.career-x.de' },
|
||
},
|
||
{
|
||
regex: 'CCBot',
|
||
name: 'ccBot crawler',
|
||
category: 'Crawler',
|
||
url: 'http://commoncrawl.org/faq/',
|
||
producer: { name: 'reddit inc.', url: 'http://www.reddit.com' },
|
||
},
|
||
{
|
||
regex: 'Cliqzbot',
|
||
name: 'Cliqzbot',
|
||
category: 'Crawler',
|
||
url: 'http://cliqz.com/company/cliqzbot',
|
||
producer: { name: '10betterpages GmbH', url: 'http://cliqz.com' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare-AMP',
|
||
name: 'CloudFlare AMP Fetcher',
|
||
category: 'Crawler',
|
||
url: 'https://amp.cloudflare.com/doc/fetcher.html',
|
||
producer: { name: 'CloudFlare', url: 'http://www.cloudflare.com' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare-?Diagnostics',
|
||
name: 'Cloudflare Diagnostics',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.cloudflare.com/',
|
||
producer: { name: 'Cloudflare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'CloudFlare-AlwaysOnline',
|
||
name: 'CloudFlare Always Online',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.cloudflare.com/always-online',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare-SSLDetector',
|
||
name: 'Cloudflare SSL Detector',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare Custom Hostname Verification',
|
||
name: 'Cloudflare Custom Hostname Verification',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare-Traffic-Manager',
|
||
name: 'Cloudflare Traffic Manager',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'Cloudflare-Smart-Transit',
|
||
name: 'Cloudflare Smart Transit',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'CloudflareObservatory',
|
||
name: 'Cloudflare Observatory',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'https://developers\\.cloudflare\\.com/security-center/',
|
||
name: 'Cloudflare Security Insights',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: { name: 'CloudFlare', url: 'https://www.cloudflare.com/' },
|
||
},
|
||
{
|
||
regex: 'coccoc\\.com',
|
||
name: 'Cốc Cốc Bot',
|
||
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots',
|
||
category: 'Search bot',
|
||
producer: { name: 'Cốc Cốc', url: 'https://coccoc.com/' },
|
||
},
|
||
{
|
||
regex: 'collectd',
|
||
name: 'Collectd',
|
||
url: 'https://collectd.org/',
|
||
category: 'Site Monitor',
|
||
producer: { name: 'Collectd', url: 'https://collectd.org/' },
|
||
},
|
||
{
|
||
regex: 'CommaFeed',
|
||
name: 'CommaFeed',
|
||
url: 'http://www.commafeed.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'CSS Certificate Spider',
|
||
name: 'CSS Certificate Spider',
|
||
category: 'Crawler',
|
||
url: 'http://www.css-security.com/certificatespider/',
|
||
producer: {
|
||
name: 'Certified Security Solutions',
|
||
url: 'https://www.css-security.com/company/about-us/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Datadog Agent|Datadog/?Synthetics',
|
||
name: 'Datadog Agent',
|
||
url: 'https://github.com/DataDog/dd-agent',
|
||
category: 'Site Monitor',
|
||
producer: { name: 'Datadog', url: 'https://www.datadoghq.com/' },
|
||
},
|
||
{
|
||
regex: 'Datanyze',
|
||
name: 'Datanyze',
|
||
url: '',
|
||
category: 'Crawler',
|
||
producer: { name: 'Datanyze', url: 'https://www.datanyze.com' },
|
||
},
|
||
{
|
||
regex: 'Dataprovider',
|
||
name: 'Dataprovider',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Dataprovider B.V.',
|
||
url: 'https://www.dataprovider.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Daum(?!(?:Apps|Device))',
|
||
name: 'Daum',
|
||
category: 'Search bot',
|
||
url: 'http://tab.search.daum.net/aboutWebSearch_en.html',
|
||
producer: {
|
||
name: 'Daum Communications Corp.',
|
||
url: 'http://www.kakaocorp.com/main',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Dazoobot',
|
||
name: 'Dazoobot',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: { name: 'DAZOO.FR', url: 'http://dazoo.fr' },
|
||
},
|
||
{
|
||
regex: 'discobot',
|
||
name: 'Discobot',
|
||
category: 'Search bot',
|
||
url: 'http://discoveryengine.com/discobot.html',
|
||
producer: { name: 'Discovery Engine', url: 'http://discoveryengine.com' },
|
||
},
|
||
{
|
||
regex: 'Domain Re-Animator Bot|support@domainreanimator\\.com',
|
||
name: 'Domain Re-Animator Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Domain Re-Animator, LLC',
|
||
url: 'http://domainreanimator.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'DotBot',
|
||
name: 'DotBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.opensiteexplorer.org/dotbot',
|
||
producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' },
|
||
},
|
||
{
|
||
regex: 'DuckDuck(?:Go-Favicons-)?Bot',
|
||
name: 'DuckDuckBot',
|
||
category: 'Search bot',
|
||
url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/',
|
||
producer: { name: 'DuckDuckGo', url: 'https://duckduckgo.com/' },
|
||
},
|
||
{
|
||
regex: 'DuckAssistBot',
|
||
name: 'DuckAssistBot',
|
||
category: 'Search bot',
|
||
url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot/',
|
||
producer: { name: 'DuckDuckGo', url: 'https://duckduckgo.com/' },
|
||
},
|
||
{
|
||
regex: 'EasouSpider',
|
||
name: 'Easou Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.easou.com/search/spider.html',
|
||
producer: { name: 'easou ICP', url: 'http://www.easou.com' },
|
||
},
|
||
{
|
||
regex: 'eCairn-Grabber',
|
||
name: 'eCairn-Grabber',
|
||
category: 'Crawler',
|
||
producer: { name: 'eCairn', url: 'https://ecairn.com' },
|
||
},
|
||
{
|
||
regex: 'EMail Exractor',
|
||
name: 'EMail Exractor',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'evc-batch',
|
||
name: 'evc-batch',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'eVenture Capital Partners II, LLC',
|
||
url: 'http://www.eventures.vc/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Exabot|ExaleadCloudview',
|
||
name: 'ExaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.exabot.com/go/robot',
|
||
producer: { name: 'Dassault Systèmes', url: 'http://www.3ds.com' },
|
||
},
|
||
{
|
||
regex: 'ExactSeek Crawler',
|
||
name: 'ExactSeek Crawler',
|
||
category: 'Search bot',
|
||
url: 'http://www.exactseek.com',
|
||
producer: {
|
||
name: 'Jayde Online, Inc.',
|
||
url: 'http://www.jaydeonlineinc.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Ezooms',
|
||
name: 'Ezooms',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' },
|
||
},
|
||
{
|
||
regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)',
|
||
name: 'Facebook Crawler',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||
},
|
||
{
|
||
regex: 'meta-externalagent',
|
||
name: 'Meta-ExternalAgent',
|
||
category: 'Crawler',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||
},
|
||
{
|
||
regex: 'meta-externalfetcher',
|
||
name: 'Meta-ExternalFetcher',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||
},
|
||
{
|
||
regex: 'FacebookBot/[\\d.]+',
|
||
name: 'FacebookBot',
|
||
category: 'Crawler',
|
||
url: 'https://developers.facebook.com/docs/sharing/bot',
|
||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||
},
|
||
{
|
||
regex: 'Feedbin',
|
||
name: 'Feedbin',
|
||
url: 'http://feedbin.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'FeedBurner',
|
||
name: 'FeedBurner',
|
||
url: 'http://www.feedburner.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Feed Wrangler',
|
||
name: 'Feed Wrangler',
|
||
url: 'https://feedwrangler.net/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'David Smith & Developing Perspective, LLC',
|
||
url: 'https://david-smith.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Feedly',
|
||
name: 'Feedly',
|
||
url: 'http://www.feedly.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Feedspot',
|
||
name: 'Feedspot',
|
||
url: 'http://www.feedspot.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Fever/[0-9]',
|
||
name: 'Fever',
|
||
url: 'http://feedafever.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'FlipboardProxy|FlipboardRSS',
|
||
name: 'Flipboard',
|
||
url: 'http://flipboard.com/browserproxy',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: 'Flipboard', url: 'http://flipboard.com/' },
|
||
},
|
||
{
|
||
regex: 'Findxbot',
|
||
name: 'Findxbot',
|
||
category: 'Crawler',
|
||
url: 'http://www.findxbot.com',
|
||
},
|
||
{
|
||
regex: 'FreshRSS',
|
||
name: 'FreshRSS',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://freshrss.org/',
|
||
},
|
||
{
|
||
regex: 'Genieo',
|
||
name: 'Genieo Web filter',
|
||
category: '',
|
||
url: 'http://www.genieo.com/webfilter.html',
|
||
producer: { name: 'Genieo', url: 'http://www.genieo.com' },
|
||
},
|
||
{
|
||
regex: 'GigablastOpenSource',
|
||
name: 'Gigablast',
|
||
category: 'Search bot',
|
||
url: 'https://github.com/gigablast/open-source-search-engine',
|
||
producer: { name: 'Matt Wells', url: 'http://www.gigablast.com/faq.html' },
|
||
},
|
||
{
|
||
regex: 'Gluten Free Crawler',
|
||
name: 'Gluten Free Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://glutenfreepleasure.com/',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'gobuster',
|
||
name: 'Gobuster',
|
||
url: 'https://github.com/OJ/gobuster',
|
||
},
|
||
{
|
||
regex: 'ichiro/mobile goo',
|
||
name: 'Goo',
|
||
category: 'Search bot',
|
||
url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1',
|
||
producer: { name: 'NTT Resonant', url: 'http://goo.ne.jp' },
|
||
},
|
||
{ regex: 'Storebot-Google', name: 'Google StoreBot', category: 'Crawler' },
|
||
{ regex: 'Google Favicon', name: 'Google Favicon', category: 'Crawler' },
|
||
{
|
||
regex: 'Google Search Console',
|
||
name: 'Google Search Console',
|
||
category: 'Crawler',
|
||
url: 'https://search.google.com/search-console/about',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google Page Speed Insights',
|
||
name: 'Google PageSpeed Insights',
|
||
category: 'Site Monitor',
|
||
url: 'http://developers.google.com/speed/pagespeed/insights/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'google_partner_monitoring',
|
||
name: 'Google Partner Monitoring',
|
||
category: 'Site Monitor',
|
||
url: '',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google-Cloud-Scheduler',
|
||
name: 'Google Cloud Scheduler',
|
||
category: 'Crawler',
|
||
url: 'https://cloud.google.com/scheduler',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com' },
|
||
},
|
||
{
|
||
regex: 'Google-Structured-Data-Testing-Tool',
|
||
name: 'Google Structured Data Testing Tool',
|
||
category: 'Validator',
|
||
url: 'https://search.google.com/structured-data/testing-tool',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'GoogleStackdriverMonitoring',
|
||
name: 'Google Stackdriver Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://cloud.google.com/monitoring',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com' },
|
||
},
|
||
{
|
||
regex: 'Google-Transparency-Report',
|
||
name: 'Google Transparency Report',
|
||
category: 'Site Monitor',
|
||
url: 'https://transparencyreport.google.com/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google-CloudVertexBot',
|
||
name: 'Google-CloudVertexBot',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'via ggpht\\.com GoogleImageProxy',
|
||
name: 'Gmail Image Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google-Document-Conversion',
|
||
name: 'Google-Document-Conversion',
|
||
category: 'Service Agent',
|
||
url: 'https://support.google.com/drive/answer/176692?hl=en',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'GoogleDocs; apps-spreadsheets',
|
||
name: 'Google Sheets',
|
||
category: 'Service Agent',
|
||
url: 'https://workspace.google.com/products/sheets/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'GoogleDocs; apps-presentations',
|
||
name: 'Google Slides',
|
||
category: 'Service Agent',
|
||
url: 'https://workspace.google.com/products/slides/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'GoogleDocs;',
|
||
name: 'Google Docs',
|
||
category: 'Service Agent',
|
||
url: 'https://docs.google.com/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'SeznamEmailProxy',
|
||
name: 'Seznam Email Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Seznam.cz, a.s.', url: 'http://www.seznam.cz/' },
|
||
},
|
||
{
|
||
regex: 'Seznam-Zbozi-robot',
|
||
name: 'Seznam Zbozi.cz',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Seznam.cz, a.s.', url: 'https://www.zbozi.cz/' },
|
||
},
|
||
{
|
||
regex: 'Heurekabot-Feed',
|
||
name: 'Heureka Feed',
|
||
category: 'Crawler',
|
||
url: 'https://sluzby.heureka.cz/napoveda/heurekabot/',
|
||
producer: { name: 'Heureka.cz, a.s.', url: 'https://www.heureka.cz/' },
|
||
},
|
||
{
|
||
regex: 'ShopAlike',
|
||
name: 'ShopAlike',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Visual Meta', url: 'https://www.shopalike.cz/' },
|
||
},
|
||
{
|
||
regex: 'deepcrawl\\.com',
|
||
name: 'Lumar',
|
||
category: 'Crawler',
|
||
url: 'https://deepcrawl.com/bot',
|
||
producer: { name: 'Lumar', url: 'https://www.lumar.io/' },
|
||
},
|
||
{
|
||
regex: 'Googlebot-News',
|
||
name: 'Googlebot News',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex:
|
||
'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\\+/web/snippet',
|
||
name: 'Googlebot',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: '^Google$',
|
||
name: 'Googlebot',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google-Safety',
|
||
name: 'Google-Safety',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'DuplexWeb-Google',
|
||
name: 'DuplexWeb-Google',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'Google-Area120-PrivacyPolicyFetcher',
|
||
name: 'Google Area 120 Privacy Policy Fetcher',
|
||
category: 'Crawler',
|
||
url: 'https://area120.google.com/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'HubSpot ',
|
||
name: 'HubSpot',
|
||
category: 'Crawler',
|
||
producer: { name: 'HubSpot Inc.', url: 'https://www.hubspot.com' },
|
||
},
|
||
{
|
||
regex: 'vuhuv(?:Bot|RBT)',
|
||
name: 'vuhuvBot',
|
||
category: 'Search bot',
|
||
url: 'https://vuhuv.com/bot.html',
|
||
},
|
||
{
|
||
regex: 'HTTPMon/[\\d.]+',
|
||
name: 'HTTPMon',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.httpmon.com',
|
||
producer: { name: 'towards GmbH', url: 'http://www.towards.ch/' },
|
||
},
|
||
{
|
||
regex: 'ICC-Crawler',
|
||
name: 'ICC-Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'inoreader\\.com',
|
||
name: 'inoreader',
|
||
category: 'Feed Reader',
|
||
url: 'https://www.inoreader.com',
|
||
},
|
||
{
|
||
regex: 'iisbot',
|
||
name: 'IIS Site Analysis',
|
||
category: 'Crawler',
|
||
url: 'http://www.iis.net/iisbot.html',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'http://www.microsoft.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ips-agent',
|
||
name: 'IPS Agent',
|
||
category: 'Crawler',
|
||
producer: { name: 'VeriSign, Inc', url: 'http://www.verisign.com/' },
|
||
},
|
||
{
|
||
regex: 'IP-Guide\\.com',
|
||
name: 'IP-Guide Crawler',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: '', url: 'https://ip-guide.com' },
|
||
},
|
||
{ regex: 'k6/[0-9.]+', name: 'K6', url: 'https://k6.io/' },
|
||
{
|
||
regex: 'kouio',
|
||
name: 'Kouio',
|
||
url: 'http://kouio.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'larbin',
|
||
name: 'Larbin web crawler',
|
||
category: 'Crawler',
|
||
url: 'http://larbin.sourceforge.net',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: '[A-z0-9]*-Lighthouse',
|
||
name: 'Lighthouse',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.google.com/web/tools/lighthouse',
|
||
producer: {
|
||
name: 'Lighthouse',
|
||
url: 'https://developers.google.com/web/tools/lighthouse',
|
||
},
|
||
},
|
||
{
|
||
regex: 'last-modified\\.com',
|
||
name: 'LastMod Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://last-modified.com/en/about',
|
||
producer: { name: '', url: 'https://last-modified.com/en' },
|
||
},
|
||
{
|
||
regex: 'linkdexbot|linkdex\\.com',
|
||
name: 'Linkdex Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.linkdex.com/bots',
|
||
producer: { name: 'Mojeek Ltd.', url: 'http://www.mojeek.com' },
|
||
},
|
||
{
|
||
regex: 'LinkedInBot',
|
||
name: 'LinkedIn Bot',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.linkedin.com',
|
||
producer: { name: 'LinkedIn', url: 'http://www.linkedin.com' },
|
||
},
|
||
{
|
||
regex: 'ltx71',
|
||
name: 'LTX71',
|
||
category: 'Security Checker',
|
||
url: 'https://ltx71.com/',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Mail\\.RU',
|
||
name: 'Mail.Ru Bot',
|
||
category: 'Search bot',
|
||
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots',
|
||
producer: { name: 'Mail.Ru Group', url: 'http://corp.mail.ru' },
|
||
},
|
||
{
|
||
regex: 'magpie-crawler',
|
||
name: 'Magpie-Crawler',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.brandwatch.com/magpie-crawler/',
|
||
producer: { name: 'Brandwatch', url: 'http://www.brandwatch.com' },
|
||
},
|
||
{
|
||
regex: 'MagpieRSS',
|
||
name: 'MagpieRSS',
|
||
url: 'http://magpierss.sourceforge.net/',
|
||
category: 'Feed Parser',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'masscan-ng/[\\d.]+',
|
||
name: 'masscan-ng',
|
||
url: 'https://github.com/bi-zone/masscan-ng',
|
||
category: 'Crawler',
|
||
producer: { name: 'BIZON, OOO', url: 'https://bi.zone/' },
|
||
},
|
||
{
|
||
regex: '.*masscan',
|
||
name: 'masscan',
|
||
url: 'https://github.com/robertdavidgraham/masscan',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Robert Graham',
|
||
url: 'https://github.com/robertdavidgraham',
|
||
},
|
||
},
|
||
{ regex: 'Mastodon/', name: 'Mastodon Bot', category: 'Social Media Agent' },
|
||
{
|
||
regex: 'meanpathbot',
|
||
name: 'Meanpath Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.meanpath.com/meanpathbot.html',
|
||
producer: { name: 'Meanpath', url: 'http://www.meanpath.com' },
|
||
},
|
||
{
|
||
regex: 'MetaJobBot',
|
||
name: 'MetaJobBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.metajob.at/the/crawler',
|
||
producer: { name: 'MetaJob', url: 'http://www.metajob.at' },
|
||
},
|
||
{
|
||
regex: 'MetaInspector',
|
||
name: 'MetaInspector',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/jaimeiniesta/metainspector',
|
||
},
|
||
{
|
||
regex: 'MixrankBot',
|
||
name: 'Mixrank Bot',
|
||
category: 'Crawler',
|
||
url: 'http://mixrank.com',
|
||
producer: { name: 'Online Media Group, Inc.', url: '' },
|
||
},
|
||
{
|
||
regex: 'MJ12bot',
|
||
name: 'MJ12 Bot',
|
||
category: 'Search bot',
|
||
url: 'http://majestic12.co.uk/bot.php',
|
||
producer: { name: 'Majestic-12', url: 'http://majestic12.co.uk' },
|
||
},
|
||
{
|
||
regex: 'Mnogosearch',
|
||
name: 'Mnogosearch',
|
||
category: 'Search bot',
|
||
url: 'http://www.mnogosearch.org/',
|
||
producer: { name: 'Lavtech.Com Corp.', url: '' },
|
||
},
|
||
{
|
||
regex: 'MojeekBot',
|
||
name: 'MojeekBot',
|
||
category: 'Search bot',
|
||
url: 'http://www.mojeek.com/bot.html',
|
||
producer: { name: 'Mojeek Ltd.', url: 'http://www.mojeek.com' },
|
||
},
|
||
{
|
||
regex: 'munin',
|
||
name: 'Munin',
|
||
category: 'Site Monitor',
|
||
url: 'http://munin-monitoring.org/',
|
||
producer: { name: 'Munin', url: 'http://munin-monitoring.org/' },
|
||
},
|
||
{
|
||
regex: 'NalezenCzBot',
|
||
name: 'NalezenCzBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.nalezen.cz/about-crawler',
|
||
producer: { name: 'Jaroslav Kuboš', url: '' },
|
||
},
|
||
{
|
||
regex: 'check_http/v',
|
||
name: 'Nagios check_http',
|
||
category: 'Site Monitor',
|
||
url: 'https://nagios.org',
|
||
producer: {
|
||
name: 'Nagios Plugins Development Team',
|
||
url: 'https://nagios.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'nbertaupete95\\(at\\)gmail\\.com',
|
||
name: 'nbertaupete95',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)',
|
||
name: 'Netcraft Survey Bot',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: { name: 'Netcraft', url: 'http://www.netcraft.com' },
|
||
},
|
||
{
|
||
regex: 'netEstate NE Crawler',
|
||
name: 'netEstate',
|
||
category: 'Crawler',
|
||
url: 'http://www.website-datenbank.de/Impressum',
|
||
producer: { name: 'netEstate GmbH', url: 'https://www.netestate.de/en/' },
|
||
},
|
||
{
|
||
regex: 'Netvibes',
|
||
name: 'Netvibes',
|
||
url: 'http://www.netvibes.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'NewsBlur .*(?:Fetcher|Finder)',
|
||
name: 'NewsBlur',
|
||
url: 'http://www.newsblur.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'NewsGatorOnline',
|
||
name: 'NewsGator',
|
||
url: 'http://www.newsgator.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'nlcrawler',
|
||
name: 'NLCrawler',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Northern Light', url: 'http://northernlight.com' },
|
||
},
|
||
{
|
||
regex: 'Nmap Scripting Engine',
|
||
name: 'Nmap',
|
||
category: 'Security Checker',
|
||
url: 'https://nmap.org/book/nse.html',
|
||
producer: { name: 'Nmap', url: 'https://nmap.org/' },
|
||
},
|
||
{
|
||
regex: 'Nuzzel',
|
||
name: 'Nuzzel',
|
||
category: 'Crawler',
|
||
producer: { name: 'Nuzzel', url: 'https://www.nuzzel.com/' },
|
||
},
|
||
{
|
||
regex: 'NodePing',
|
||
name: 'NodePing',
|
||
category: 'Site Monitor',
|
||
url: 'https://nodeping.com',
|
||
producer: { name: 'NodePing', url: 'https://nodeping.com' },
|
||
},
|
||
{ regex: 'Octopus [0-9]', name: 'Octopus' },
|
||
{
|
||
regex: 'OnlineOrNot\\.com_bot',
|
||
name: 'OnlineOrNot Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://onlineornot.com/website-monitoring',
|
||
producer: { name: 'OnlineOrNot', url: 'https://onlineornot.com' },
|
||
},
|
||
{
|
||
regex: 'omgili',
|
||
name: 'Omgili bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.omgili.com/Crawler.html',
|
||
producer: { name: 'Omgili', url: 'http://www.omgili.com' },
|
||
},
|
||
{
|
||
regex: 'OpenindexSpider',
|
||
name: 'Openindex Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.openindex.io/en/webmasters/spider.html',
|
||
producer: { name: 'Openindex B.V.', url: 'http://www.openindex.io' },
|
||
},
|
||
{
|
||
regex: 'spbot',
|
||
name: 'OpenLinkProfiler',
|
||
category: 'Crawler',
|
||
url: 'http://openlinkprofiler.org/bot',
|
||
producer: { name: 'Axandra GmbH', url: 'http://www.axandra.com' },
|
||
},
|
||
{
|
||
regex: 'OpenWebSpider',
|
||
name: 'OpenWebSpider',
|
||
category: 'Crawler',
|
||
url: 'http://www.openwebspider.org',
|
||
producer: {
|
||
name: 'OpenWebSpider Lab',
|
||
url: 'http://lab.openwebspider.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'OrangeBot|VoilaBot',
|
||
name: 'Orange Bot',
|
||
category: 'Search bot',
|
||
url: 'http://lemoteur.orange.fr',
|
||
producer: { name: 'Orange', url: 'http://www.orange.fr' },
|
||
},
|
||
{
|
||
regex: 'PaperLiBot',
|
||
name: 'PaperLiBot',
|
||
category: 'Search bot',
|
||
url: 'http://support.paper.li/entries/20023257-what-is-paper-li',
|
||
producer: { name: 'Smallrivers SA', url: 'http://www.paper.li' },
|
||
},
|
||
{
|
||
regex: 'phantomas/',
|
||
name: 'Phantomas',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/macbre/phantomas',
|
||
},
|
||
{
|
||
regex: 'phpservermon',
|
||
name: 'PHP Server Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/phpservermon/phpservermon',
|
||
producer: {
|
||
name: 'PHP Server Monitor',
|
||
url: 'http://www.phpservermonitor.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Pocket(?:ImageCache|Parser)/[\\d.]+',
|
||
name: 'Pocket',
|
||
category: 'Read-it-later Service',
|
||
url: 'https://getpocket.com/pocketparser_ua',
|
||
producer: { name: 'Read It Later, Inc.', url: 'https://getpocket.com/' },
|
||
},
|
||
{
|
||
regex: 'PritTorrent',
|
||
name: 'PritTorrent',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/astro/prittorrent',
|
||
producer: { name: 'Bitlove', url: 'http://bitlove.org/' },
|
||
},
|
||
{
|
||
regex: 'PRTG Network Monitor',
|
||
name: 'PRTG Network Monitor',
|
||
category: 'Network Monitor',
|
||
url: 'https://www.paessler.com/prtg',
|
||
producer: { name: 'Paessler AG', url: 'https://www.paessler.com' },
|
||
},
|
||
{
|
||
regex: 'psbot',
|
||
name: 'Picsearch bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.picsearch.com/bot.html',
|
||
producer: { name: 'Picsearch', url: 'http://www.picsearch.com' },
|
||
},
|
||
{
|
||
regex: 'Pingdom(?:\\.com|TMS)',
|
||
name: 'Pingdom Bot',
|
||
category: 'Site Monitor',
|
||
url: '',
|
||
producer: { name: 'Pingdom AB', url: 'https://www.pingdom.com' },
|
||
},
|
||
{
|
||
regex: 'Quora Link Preview',
|
||
name: 'Quora Link Preview',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Quora', url: 'http://www.quora.com' },
|
||
},
|
||
{
|
||
regex: 'Quora-Bot',
|
||
name: 'Quora Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Quora', url: 'https://www.quora.com/' },
|
||
},
|
||
{
|
||
regex: 'RamblerMail',
|
||
name: 'RamblerMail Image Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Rambler&Co', url: 'https://rambler-co.ru/' },
|
||
},
|
||
{
|
||
regex: 'QuerySeekerSpider',
|
||
name: 'QuerySeekerSpider',
|
||
category: 'Crawler',
|
||
url: 'http://queryseeker.com/bot.html',
|
||
producer: { name: 'QueryEye Inc.', url: 'http://queryeye.com' },
|
||
},
|
||
{
|
||
regex: 'Qwantify|Qwantbot',
|
||
name: 'Qwantbot',
|
||
category: 'Crawler',
|
||
url: 'https://help.qwant.com/bot/',
|
||
producer: { name: 'Qwant Corporation', url: 'https://www.qwant.com/' },
|
||
},
|
||
{
|
||
regex: 'Rainmeter',
|
||
name: 'Rainmeter',
|
||
category: 'Crawler',
|
||
url: 'https://www.rainmeter.net',
|
||
},
|
||
{
|
||
regex: 'redditbot',
|
||
name: 'Reddit Bot',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.reddit.com/feedback',
|
||
producer: { name: 'reddit inc.', url: 'http://www.reddit.com' },
|
||
},
|
||
{
|
||
regex: 'Riddler',
|
||
name: 'Riddler',
|
||
category: 'Security search bot',
|
||
url: 'https://riddler.io/about',
|
||
producer: { name: 'F-Secure', url: 'https://www.f-secure.com' },
|
||
},
|
||
{
|
||
regex: 'rogerbot',
|
||
name: 'Rogerbot',
|
||
category: 'Crawler',
|
||
url: 'http://moz.com/help/pro/what-is-rogerbot-',
|
||
producer: { name: 'SEOmoz, Inc.', url: 'http://moz.com/' },
|
||
},
|
||
{
|
||
regex: 'ROI Hunter',
|
||
name: 'ROI Hunter',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Roihunter a.s.', url: 'http://roihunter.com/' },
|
||
},
|
||
{
|
||
regex: 'SafeDNSBot',
|
||
name: 'SafeDNSBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.safedns.com/searchbot',
|
||
producer: { name: 'SafeDNS, Inc.', url: 'https://www.safedns.com/' },
|
||
},
|
||
{
|
||
regex: 'Scrapy',
|
||
name: 'Scrapy',
|
||
category: 'Crawler',
|
||
url: 'http://scrapy.org',
|
||
},
|
||
{
|
||
regex: 'Screaming Frog SEO Spider',
|
||
name: 'Screaming Frog SEO Spider',
|
||
category: 'Crawler',
|
||
url: 'http://www.screamingfrog.co.uk/seo-spider',
|
||
producer: {
|
||
name: 'Screaming Frog Ltd',
|
||
url: 'http://www.screamingfrog.co.uk',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ScreenerBot',
|
||
name: 'ScreenerBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.screenerbot.com',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'SemrushBot',
|
||
name: 'SemrushBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' },
|
||
},
|
||
{
|
||
regex: 'SerpReputationManagementAgent/[\\d.]+',
|
||
name: 'Semrush Reputation Management',
|
||
category: 'Service Agent',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' },
|
||
},
|
||
{
|
||
regex: 'SplitSignalBot',
|
||
name: 'SplitSignalBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' },
|
||
},
|
||
{
|
||
regex: 'SiteAuditBot/[\\d.]+',
|
||
name: 'SiteAuditBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: { name: 'Semrush Inc.', url: 'https://www.semrush.com/' },
|
||
},
|
||
{
|
||
regex: 'SensikaBot',
|
||
name: 'Sensika Bot',
|
||
category: '',
|
||
url: '',
|
||
producer: { name: 'Sensika', url: 'http://sensika.com' },
|
||
},
|
||
{
|
||
regex: 'SEOENG(?:World)?Bot',
|
||
name: 'SEOENGBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.seoengine.com/seoengbot.htm',
|
||
producer: { name: 'SEO Engine', url: 'http://www.seoengine.com' },
|
||
},
|
||
{
|
||
regex: 'SEOkicks-Robot',
|
||
name: 'SEOkicks-Robot',
|
||
category: 'Crawler',
|
||
url: 'http://www.seokicks.de/robot.html',
|
||
producer: { name: 'SEOkicks', url: 'https://www.seokicks.de/' },
|
||
},
|
||
{
|
||
regex: 'seoscanners\\.net',
|
||
name: 'Seoscanners.net',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
regex: 'SkypeUriPreview',
|
||
name: 'Skype URI Preview',
|
||
category: 'Service Agent',
|
||
url: '',
|
||
producer: {
|
||
name: 'Skype Communications S.à.r.l.',
|
||
url: 'https://www.skype.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SeznamBot|SklikBot|Seznam screenshot-generator',
|
||
name: 'Seznam Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.mapy.cz/cz/seznambot.html',
|
||
producer: { name: 'Seznam.cz, a.s.', url: 'http://www.seznam.cz/' },
|
||
},
|
||
{
|
||
regex: 'shopify-partner-homepage-scraper',
|
||
name: 'Shopify Partner',
|
||
category: 'Crawler',
|
||
url: 'https://www.shopify.com/partners',
|
||
producer: { name: 'Shopify', url: 'https://www.shopify.com/' },
|
||
},
|
||
{
|
||
regex: 'ShopWiki',
|
||
name: 'ShopWiki',
|
||
category: 'Search tools',
|
||
url: 'http://www.shopwiki.com/wiki/Help:Bot',
|
||
producer: { name: 'ShopWiki Corp.', url: 'http://www.shopwiki.com' },
|
||
},
|
||
{
|
||
regex: 'SilverReader',
|
||
name: 'SilverReader',
|
||
url: 'http://silverreader.com',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'SimplePie',
|
||
name: 'SimplePie',
|
||
url: 'http://www.simplepie.org',
|
||
category: 'Feed Parser',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'SISTRIX Crawler',
|
||
name: 'SISTRIX Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://crawler.sistrix.net',
|
||
producer: { name: 'SISTRIX GmbH', url: 'http://www.sistrix.de' },
|
||
},
|
||
{
|
||
regex: 'compatible; (?:SISTRIX )?Optimizer',
|
||
name: 'SISTRIX Optimizer',
|
||
category: 'Crawler',
|
||
url: 'https://optimizer.sistrix.com',
|
||
producer: { name: 'SISTRIX GmbH', url: 'http://www.sistrix.de' },
|
||
},
|
||
{
|
||
regex: 'SiteSucker',
|
||
name: 'SiteSucker',
|
||
category: 'Crawler',
|
||
url: 'http://ricks-apps.com/osx/sitesucker/',
|
||
},
|
||
{
|
||
regex: 'sixy\\.ch',
|
||
name: 'Sixy.ch',
|
||
category: 'Site Monitor',
|
||
url: 'http://sixy.ch',
|
||
producer: { name: 'Manuel Kasper', url: 'https://neon1.net/' },
|
||
},
|
||
{
|
||
regex: 'Slackbot|Slack-ImgProxy',
|
||
name: 'Slackbot',
|
||
category: 'Crawler',
|
||
url: 'https://api.slack.com/robots',
|
||
producer: { name: 'Slack Technologies', url: 'http://slack.com' },
|
||
},
|
||
{
|
||
regex:
|
||
'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider',
|
||
name: 'Sogou Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.sogou.com/docs/help/webmasters.htm',
|
||
producer: { name: 'Sohu, Inc.', url: 'http://www.sogou.com' },
|
||
},
|
||
{
|
||
regex: 'Sosospider|Sosoimagespider',
|
||
name: 'Soso Spider',
|
||
category: 'Search bot',
|
||
url: 'http://help.soso.com/webspider.htm',
|
||
producer: { name: 'Tencent Holdings', url: 'http://www.soso.com' },
|
||
},
|
||
{
|
||
regex: 'Sprinklr',
|
||
name: 'Sprinklr',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Sprinklr, Inc.', url: 'https://www.sprinklr.com/' },
|
||
},
|
||
{
|
||
regex: 'sqlmap/',
|
||
name: 'sqlmap',
|
||
category: 'Security Checker',
|
||
url: 'http://sqlmap.org/',
|
||
producer: { name: 'sqlmap', url: 'http://sqlmap.org/' },
|
||
},
|
||
{
|
||
regex: 'SSL Labs',
|
||
name: 'SSL Labs',
|
||
category: 'Validator',
|
||
url: 'https://www.ssllabs.com/about/assessment.html',
|
||
producer: {
|
||
name: 'SSL Labs',
|
||
url: 'https://www.ssllabs.com/about/assessment.html',
|
||
},
|
||
},
|
||
{
|
||
regex: 'StatusCake',
|
||
name: 'StatusCake',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.statuscake.com',
|
||
producer: { name: 'StatusCake', url: 'https://www.statuscake.com' },
|
||
},
|
||
{
|
||
regex: 'Superfeedr bot',
|
||
name: 'Superfeedr Bot',
|
||
category: 'Feed Fetcher',
|
||
url: '',
|
||
producer: { name: 'Superfeedr', url: 'https://superfeedr.com/' },
|
||
},
|
||
{
|
||
regex: 'Sparkler/[0-9]',
|
||
name: 'Sparkler',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/USCDataScience/sparkler',
|
||
},
|
||
{
|
||
regex: 'Spinn3r',
|
||
name: 'Spinn3r',
|
||
category: 'Crawler',
|
||
url: 'http://spinn3r.com/robot',
|
||
producer: { name: 'Tailrank Inc', url: 'http://spinn3r.com' },
|
||
},
|
||
{ regex: 'SputnikBot', name: 'Sputnik Bot', category: 'Crawler', url: '' },
|
||
{
|
||
regex: 'SputnikFaviconBot',
|
||
name: 'Sputnik Favicon Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
regex: 'SputnikImageBot',
|
||
name: 'Sputnik Image Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
regex: 'SurveyBot',
|
||
name: 'Survey Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.domaintools.com/webmasters/surveybot.php',
|
||
producer: { name: 'Domain Tools', url: 'http://www.domaintools.com' },
|
||
},
|
||
{
|
||
regex: 'TarmotGezgin',
|
||
name: 'Tarmot Gezgin',
|
||
url: 'http://www.tarmot.com/gezgin/',
|
||
category: 'Search bot',
|
||
},
|
||
{
|
||
regex: 'TelegramBot',
|
||
name: 'TelegramBot',
|
||
url: 'https://telegram.org/blog/bot-revolution',
|
||
},
|
||
{
|
||
regex: 'TLSProbe',
|
||
name: 'TLSProbe',
|
||
url: 'https://scan.trustnet.venafi.com/',
|
||
category: 'Security search bot',
|
||
producer: { name: 'Venafi TrustNet', url: 'https://www.venafi.com' },
|
||
},
|
||
{
|
||
regex: 'TinEye-bot',
|
||
name: 'TinEye Crawler',
|
||
category: 'Search bot',
|
||
url: 'http://www.tineye.com/crawler.html',
|
||
producer: { name: 'Idée Inc.', url: 'http://ideeinc.com' },
|
||
},
|
||
{
|
||
regex: 'Tiny Tiny RSS',
|
||
name: 'Tiny Tiny RSS',
|
||
url: 'http://tt-rss.org',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'theoldreader\\.com',
|
||
name: 'theoldreader',
|
||
category: 'Feed Reader',
|
||
url: 'https://theoldreader.com',
|
||
},
|
||
{
|
||
regex: 'Trackable/0\\.1',
|
||
name: 'Chartable',
|
||
category: 'Site Monitor',
|
||
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix',
|
||
producer: { name: 'Chartable', url: 'https://chartable.com' },
|
||
},
|
||
{
|
||
regex: 'trendictionbot',
|
||
name: 'Trendiction Bot',
|
||
category: 'Crawler',
|
||
url: 'http://www.trendiction.de/bot',
|
||
producer: { name: 'Talkwalker Inc.', url: 'http://www.talkwalker.com' },
|
||
},
|
||
{
|
||
regex: 'TurnitinBot',
|
||
name: 'TurnitinBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.turnitin.com/robot/crawlerinfo.html',
|
||
producer: { name: 'iParadigms, LLC.', url: 'http://www.turnitin.com' },
|
||
},
|
||
{
|
||
regex: 'TweetedTimes',
|
||
name: 'TweetedTimes Bot',
|
||
category: 'Crawler',
|
||
url: 'https://tweetedtimes.com/',
|
||
producer: { name: 'TweetedTimes', url: 'https://tweetedtimes.com/' },
|
||
},
|
||
{
|
||
regex: 'TweetmemeBot',
|
||
name: 'Tweetmeme Bot',
|
||
category: 'Crawler',
|
||
url: 'http://tweetmeme.com/',
|
||
producer: { name: 'Mediasift', url: '' },
|
||
},
|
||
{
|
||
regex: 'Twingly Recon',
|
||
name: 'Twingly Recon',
|
||
category: 'Crawler',
|
||
producer: { name: 'Twingly', url: 'https://www.twingly.com' },
|
||
},
|
||
{
|
||
regex: 'Twitterbot',
|
||
name: 'Twitterbot',
|
||
category: 'Social Media Agent',
|
||
url: 'https://dev.twitter.com/docs/cards/getting-started',
|
||
producer: { name: 'Twitter', url: 'http://www.twitter.com' },
|
||
},
|
||
{
|
||
regex: 'UniversalFeedParser',
|
||
name: 'UniversalFeedParser',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://github.com/kurtmckee/feedparser',
|
||
producer: { name: 'Kurt McKee', url: 'https://github.com/kurtmckee' },
|
||
},
|
||
{
|
||
regex: 'via secureurl\\.fwdcdn\\.com',
|
||
name: 'UkrNet Mail Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'UkrNet Ltd', url: 'https://www.ukr.net/' },
|
||
},
|
||
{
|
||
regex: 'Uptime(?:bot)?/[\\d.]+',
|
||
name: 'Uptimebot',
|
||
category: 'Site Monitor',
|
||
url: 'https://uptime.com/uptime-bot',
|
||
producer: { name: 'Uptime', url: 'https://uptime.com/' },
|
||
},
|
||
{
|
||
regex: 'UptimeRobot',
|
||
name: 'UptimeRobot',
|
||
category: 'Site Monitor',
|
||
url: 'https://uptimerobot.com/',
|
||
producer: { name: 'Uptime Robot', url: 'https://uptimerobot.com/' },
|
||
},
|
||
{
|
||
regex: 'URLAppendBot',
|
||
name: 'URLAppendBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.profound.net/urlappendbot.html',
|
||
producer: { name: 'Profound Networks', url: 'http://www.profound.net' },
|
||
},
|
||
{
|
||
regex: 'Vagabondo',
|
||
name: 'Vagabondo',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'WiseGuys', url: 'http://www.wise-guys.nl/' },
|
||
},
|
||
{
|
||
regex: 'vkShare; ',
|
||
name: 'VK Share Button',
|
||
category: 'Crawler',
|
||
url: 'https://dev.vk.com/en/widgets/share',
|
||
producer: { name: 'VK', url: 'https://vk.com/' },
|
||
},
|
||
{
|
||
regex: 'VKRobot',
|
||
name: 'VK Robot',
|
||
category: 'Crawler',
|
||
url: 'https://dev.vk.com/en/',
|
||
producer: { name: 'VK', url: 'https://vk.com/' },
|
||
},
|
||
{
|
||
regex: 'VSMCrawler',
|
||
name: 'Visual Site Mapper Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.visualsitemapper.com/crawler',
|
||
producer: { name: 'Alentum Software Ltd.', url: 'http://www.alentum.com' },
|
||
},
|
||
{
|
||
regex: 'Jigsaw',
|
||
name: 'W3C CSS Validator',
|
||
category: 'Validator',
|
||
url: 'http://jigsaw.w3.org/css-validator',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'W3C_I18n-Checker',
|
||
name: 'W3C I18N Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/i18n-checker',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'W3C-checklink',
|
||
name: 'W3C Link Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/checklink',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'W3C_Validator|Validator\\.nu',
|
||
name: 'W3C Markup Validation Service',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/services',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'W3C-mobileOK',
|
||
name: 'W3C MobileOK Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/mobile',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'W3C_Unicorn',
|
||
name: 'W3C Unified Validator',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/unicorn',
|
||
producer: { name: 'W3C', url: 'http://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'P3P Validator',
|
||
name: 'W3C P3P Validator',
|
||
category: 'Validator',
|
||
url: 'https://www.w3.org/P3P/validator.html',
|
||
producer: { name: 'W3C', url: 'https://www.w3.org' },
|
||
},
|
||
{
|
||
regex: 'Wappalyzer',
|
||
name: 'Wappalyzer',
|
||
url: 'https://github.com/AliasIO/Wappalyzer',
|
||
producer: { name: 'AliasIO', url: 'https://github.com/AliasIO' },
|
||
},
|
||
{
|
||
regex: 'PTST/',
|
||
name: 'WebPageTest',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.webpagetest.org',
|
||
},
|
||
{
|
||
regex: 'WeSEE',
|
||
name: 'WeSEE:Search',
|
||
category: 'Search bot',
|
||
url: 'http://www.wesee.com/bot',
|
||
producer: { name: 'WeSEE Ltd', url: 'http://www.wesee.com' },
|
||
},
|
||
{
|
||
regex: 'WebbCrawler',
|
||
name: 'WebbCrawler',
|
||
category: 'Crawler',
|
||
url: 'http://badcheese.com/crawler.html',
|
||
producer: { name: 'Steve Webb', url: 'http://badcheese.com' },
|
||
},
|
||
{
|
||
regex: 'websitepulse[+ ]checker',
|
||
name: 'WebSitePulse',
|
||
category: 'Site Monitor',
|
||
url: 'http://badcheese.com/crawler.html',
|
||
producer: { name: 'WebSitePulse', url: 'http://www.websitepulse.com/' },
|
||
},
|
||
{
|
||
regex: 'WordPress.+isitwp\\.com',
|
||
name: 'IsItWP',
|
||
category: 'Crawler',
|
||
url: 'https://www.isitwp.com/',
|
||
producer: { name: 'WPBeginner, LLC', url: 'https://www.wpbeginner.com/' },
|
||
},
|
||
{
|
||
regex: 'Automattic Analytics Crawler/[\\d.]+',
|
||
name: 'Automattic Analytics',
|
||
category: 'Crawler',
|
||
url: 'https://wordpress.com/crawler/',
|
||
producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' },
|
||
},
|
||
{
|
||
regex: 'WordPress\\.com mShots',
|
||
name: 'WordPress.com mShots',
|
||
category: 'Service Agent',
|
||
url: 'https://wordpress.org/',
|
||
producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' },
|
||
},
|
||
{
|
||
regex: 'wp\\.com feedbot',
|
||
name: 'wp.com feedbot',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://wordpress.com/',
|
||
producer: { name: 'Automattic, Inc.', url: 'https://automattic.com/' },
|
||
},
|
||
{
|
||
regex: 'WordPress',
|
||
name: 'WordPress',
|
||
category: 'Service Agent',
|
||
url: 'https://wordpress.org/',
|
||
producer: { name: 'Wordpress.org', url: 'https://wordpress.org/' },
|
||
},
|
||
{
|
||
regex: 'Wotbox',
|
||
name: 'Wotbox',
|
||
category: 'Search bot',
|
||
url: 'http://www.wotbox.com/bot/',
|
||
producer: { name: 'Wotbox', url: 'http://www.wotbox.com' },
|
||
},
|
||
{
|
||
regex: 'XenForo',
|
||
name: 'XenForo',
|
||
category: 'Service Agent',
|
||
url: 'https://xenforo.com/',
|
||
producer: { name: 'XenForo Ltd.', url: 'https://xenforo.com/' },
|
||
},
|
||
{
|
||
regex: 'yacybot',
|
||
name: 'YaCy',
|
||
category: 'Search bot',
|
||
url: 'http://yacy.net/bot.html',
|
||
producer: { name: 'YaCy', url: 'http://yacy.net' },
|
||
},
|
||
{
|
||
regex: 'Yahoo! Slurp|Yahoo!-AdCrawler',
|
||
name: 'Yahoo! Slurp',
|
||
category: 'Search bot',
|
||
url: 'http://help.yahoo.com/ysearch/slurp',
|
||
producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' },
|
||
},
|
||
{
|
||
regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone',
|
||
name: 'Yahoo! Link Preview',
|
||
category: 'Crawler',
|
||
url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html',
|
||
producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' },
|
||
},
|
||
{
|
||
regex: 'YahooMailProxy',
|
||
name: 'Yahoo! Mail Proxy',
|
||
category: 'Service Agent',
|
||
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html',
|
||
producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' },
|
||
},
|
||
{
|
||
regex: 'YahooCacheSystem',
|
||
name: 'Yahoo! Cache System',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' },
|
||
},
|
||
{
|
||
regex: 'Y!J-BRW',
|
||
name: 'Yahoo! Japan BRW',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' },
|
||
},
|
||
{
|
||
regex: 'Y!J-WSC',
|
||
name: 'Yahoo! Japan WSC',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' },
|
||
},
|
||
{
|
||
regex: 'Y!J-ASR',
|
||
name: 'Yahoo! Japan ASR',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' },
|
||
},
|
||
{
|
||
regex: '^Y!J',
|
||
name: 'Yahoo! Japan',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: { name: 'Yahoo! Japan Corp.', url: 'https://www.yahoo.co.jp/' },
|
||
},
|
||
{
|
||
regex:
|
||
'Yandex(?:(?:\\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher',
|
||
name: 'Yandex Bot',
|
||
category: 'Search bot',
|
||
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html',
|
||
producer: { name: 'Yandex LLC', url: 'https://yandex.com/company/' },
|
||
},
|
||
{
|
||
regex: 'Yeti|NaverJapan|AdsBot-Naver',
|
||
name: 'Yeti/Naverbot',
|
||
category: 'Search bot',
|
||
url: 'http://help.naver.com/robots/',
|
||
producer: { name: 'Naver', url: 'http://www.naver.com' },
|
||
},
|
||
{
|
||
regex: 'YoudaoBot',
|
||
name: 'Youdao Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.youdao.com/help/webmaster/spider',
|
||
producer: { name: 'NetEase, Inc.', url: 'http://corp.163.com' },
|
||
},
|
||
{
|
||
regex: 'YOURLS v[0-9]',
|
||
name: 'Yourls',
|
||
category: 'Crawler',
|
||
url: 'http://yourls.org',
|
||
},
|
||
{
|
||
regex: 'YRSpider|YYSpider',
|
||
name: 'Yunyun Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.yunyun.com/SiteInfo.php?r=about',
|
||
producer: { name: 'YunYun', url: 'http://www.yunyun.com' },
|
||
},
|
||
{
|
||
regex: 'zgrab',
|
||
name: 'zgrab',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/zmap/zgrab',
|
||
},
|
||
{
|
||
regex: 'Zookabot',
|
||
name: 'Zookabot',
|
||
category: 'Crawler',
|
||
url: 'http://zookabot.com',
|
||
producer: { name: 'Hwacha ApS', url: 'http://hwacha.dk' },
|
||
},
|
||
{
|
||
regex: 'ZumBot',
|
||
name: 'ZumBot',
|
||
category: 'Search bot',
|
||
url: 'http://help.zum.com/inquiry',
|
||
producer: { name: 'ZUM internet', url: 'http://www.zuminternet.com/' },
|
||
},
|
||
{
|
||
regex: 'YottaaMonitor',
|
||
name: 'Yottaa Site Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.yottaa.com/products/site-monitor',
|
||
producer: { name: 'Yottaa', url: 'http://www.yottaa.com/' },
|
||
},
|
||
{
|
||
regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857',
|
||
name: 'Yahoo Gemini',
|
||
category: 'Crawler',
|
||
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html',
|
||
producer: { name: 'Yahoo! Inc.', url: 'http://www.yahoo.com' },
|
||
},
|
||
{
|
||
regex: '.*Java.*outbrain',
|
||
name: 'Outbrain',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: { name: 'Outbrain', url: 'http://www.outbrain.com/' },
|
||
},
|
||
{
|
||
regex: 'HubPages.*crawlingpolicy',
|
||
name: 'HubPages',
|
||
category: 'Crawler',
|
||
url: 'https://hubpages.com/help/crawlingpolicy',
|
||
producer: { name: 'HubPages, Inc.', url: 'https://discover.hubpages.com/' },
|
||
},
|
||
{
|
||
regex: 'Pinterest(?:bot)?/[\\d.]+.*www\\.pinterest\\.com',
|
||
name: 'Pinterest',
|
||
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler',
|
||
category: 'Crawler',
|
||
producer: { name: 'Pinterest', url: 'https://www.pinterest.com/' },
|
||
},
|
||
{
|
||
regex: '.*Site24x7',
|
||
name: 'Site24x7 Website Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.site24x7.com/site24x7-faq.html',
|
||
producer: { name: 'Site24x7', url: 'https://www.site24x7.com' },
|
||
},
|
||
{
|
||
regex: '.* HLB/[\\d.]+',
|
||
name: 'Site24x7 Defacement Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor',
|
||
producer: { name: 'Site24x7', url: 'https://www.site24x7.com/' },
|
||
},
|
||
{
|
||
regex: 's~snapchat-proxy',
|
||
name: 'Snapchat Proxy',
|
||
category: 'Crawler',
|
||
url: 'https://www.snapchat.com',
|
||
producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com' },
|
||
},
|
||
{
|
||
regex: 'Snap URL Preview Service',
|
||
name: 'Snap URL Preview Service',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.snap.com/robots',
|
||
producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com/' },
|
||
},
|
||
{
|
||
regex: 'SnapchatAds/[\\d.]+',
|
||
name: 'Snapchat Ads',
|
||
category: 'Crawler',
|
||
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US',
|
||
producer: { name: 'Snapchat Inc.', url: 'https://www.snapchat.com/' },
|
||
},
|
||
{
|
||
regex: "Let's Encrypt validation server",
|
||
name: "Let's Encrypt Validation",
|
||
category: 'Service Agent',
|
||
url: 'https://letsencrypt.org/how-it-works/',
|
||
producer: { name: "Let's Encrypt", url: 'https://letsencrypt.org' },
|
||
},
|
||
{
|
||
regex: 'GrapeshotCrawler',
|
||
name: 'Grapeshot',
|
||
category: 'Crawler',
|
||
url: 'https://www.grapeshot.com/crawler',
|
||
producer: { name: 'Grapeshot', url: 'https://www.grapeshot.com' },
|
||
},
|
||
{
|
||
regex: 'www\\.monitor\\.us',
|
||
name: 'Monitor.Us',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.monitor.us',
|
||
producer: { name: 'Monitor.Us', url: 'http://www.monitor.us' },
|
||
},
|
||
{
|
||
regex: 'Catchpoint',
|
||
name: 'Catchpoint',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.catchpoint.com/',
|
||
producer: {
|
||
name: 'Catchpoint Systems',
|
||
url: 'https://www.catchpoint.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'bitlybot',
|
||
name: 'BitlyBot',
|
||
category: 'Crawler',
|
||
url: 'https://bitly.com',
|
||
producer: { name: 'Bitly, Inc.', url: 'https://bitly.com' },
|
||
},
|
||
{ regex: 'Zao/', name: 'Zao', category: 'Crawler' },
|
||
{ regex: 'lycos', name: 'Lycos' },
|
||
{ regex: 'Slurp', name: 'Inktomi Slurp' },
|
||
{ regex: 'Speedy Spider', name: 'Speedy' },
|
||
{ regex: 'ScoutJet', name: 'ScoutJet' },
|
||
{ regex: 'nrsbot|netresearch', name: 'NetResearchServer' },
|
||
{ regex: 'scooter', name: 'Scooter' },
|
||
{ regex: 'gigabot', name: 'Gigabot' },
|
||
{ regex: 'charlotte', name: 'Charlotte' },
|
||
{ regex: 'Pompos', name: 'Pompos' },
|
||
{ regex: 'ichiro', name: 'ichiro' },
|
||
{
|
||
regex: 'PagePeeker',
|
||
name: 'PagePeeker',
|
||
category: 'Crawler',
|
||
url: 'https://pagepeeker.com/robots/',
|
||
producer: { name: 'PAGEPEEKER SRL', url: 'https://pagepeeker.com/' },
|
||
},
|
||
{ regex: 'WebThumbnail', name: 'WebThumbnail' },
|
||
{ regex: 'Willow Internet Crawler', name: 'Willow Internet Crawler' },
|
||
{ regex: 'EmailWolf', name: 'EmailWolf' },
|
||
{ regex: 'NetLyzer FastProbe', name: 'NetLyzer FastProbe' },
|
||
{ regex: 'AdMantX.*admantx\\.com', name: 'ADMantX' },
|
||
{ regex: 'Server Density Service Monitoring', name: 'Server Density' },
|
||
{
|
||
regex: 'RSSRadio \\(Push Notification Scanner;support@dorada\\.co\\.uk\\)',
|
||
name: 'RSSRadio Bot',
|
||
},
|
||
{
|
||
regex: '^sentry',
|
||
name: 'Sentry Bot',
|
||
producer: { name: 'Sentry', url: 'https://sentry.io' },
|
||
},
|
||
{
|
||
regex: '^Spotify/[\\d.]+$',
|
||
name: 'Spotify',
|
||
producer: { name: 'Spotify', url: 'https://www.spotify.com' },
|
||
},
|
||
{ regex: 'The Knowledge AI', name: 'The Knowledge AI', category: 'Crawler' },
|
||
{
|
||
regex: 'Embedly',
|
||
name: 'Embedly',
|
||
category: 'Crawler',
|
||
url: 'https://support.embed.ly/hc/en-us',
|
||
producer: { name: 'A Medium, Corp.', url: 'https://medium.com/' },
|
||
},
|
||
{
|
||
regex: 'BrandVerity',
|
||
name: 'BrandVerity',
|
||
category: 'Crawler',
|
||
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me',
|
||
producer: {
|
||
name: 'BrandVerity, Inc.',
|
||
url: 'https://www.brandverity.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Kaspersky Lab CFR link resolver',
|
||
name: 'Kaspersky',
|
||
category: 'Security Checker',
|
||
url: 'https://www.kaspersky.com/',
|
||
producer: { name: 'AO Kaspersky Lab', url: 'https://www.kaspersky.com/' },
|
||
},
|
||
{
|
||
regex: 'eZ Publish Link Validator',
|
||
name: 'eZ Publish Link Validator',
|
||
category: 'Crawler',
|
||
url: 'https://ez.no/',
|
||
producer: { name: 'eZ Systems AS', url: 'https://ez.no/' },
|
||
},
|
||
{
|
||
regex: 'woorankreview',
|
||
name: 'WooRank',
|
||
category: 'Search bot',
|
||
url: 'https://www.woorank.com/',
|
||
producer: { name: 'WooRank sprl', url: 'https://www.woorank.com/' },
|
||
},
|
||
{
|
||
regex: 'by Siteimprove\\.com',
|
||
name: 'Siteimprove',
|
||
category: 'Search bot',
|
||
url: 'https://siteimprove.com/',
|
||
producer: { name: 'Siteimprove GmbH', url: 'https://siteimprove.com/' },
|
||
},
|
||
{
|
||
regex: 'CATExplorador',
|
||
name: 'CATExplorador',
|
||
category: 'Search bot',
|
||
url: 'https://fundacio.cat/ca/domini/',
|
||
producer: {
|
||
name: 'Fundació puntCAT',
|
||
url: 'https://fundacio.cat/ca/domini/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Buck',
|
||
name: 'Buck',
|
||
category: 'Search bot',
|
||
url: 'https://hypefactors.com/',
|
||
producer: { name: 'Hypefactors A/S', url: 'https://hypefactors.com/' },
|
||
},
|
||
{
|
||
regex: 'tracemyfile',
|
||
name: 'TraceMyFile',
|
||
category: 'Search bot',
|
||
url: 'https://www.tracemyfile.com/',
|
||
producer: { name: 'Idee Inc.', url: 'http://ideeinc.com/' },
|
||
},
|
||
{
|
||
regex: 'zelist\\.ro feed parser',
|
||
name: 'Ze List',
|
||
url: 'https://www.zelist.ro/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: 'Treeworks SRL', url: 'https://www.tree.ro/' },
|
||
},
|
||
{
|
||
regex: 'weborama-fetcher',
|
||
name: 'Weborama',
|
||
category: 'Search bot',
|
||
url: 'https://weborama.com/',
|
||
producer: { name: 'Weborama SA', url: 'https://weborama.com/' },
|
||
},
|
||
{
|
||
regex: 'BoardReader Favicon Fetcher',
|
||
name: 'BoardReader',
|
||
category: 'Search bot',
|
||
url: 'https://boardreader.com/',
|
||
producer: { name: 'Effyis Inc', url: 'https://boardreader.com/' },
|
||
},
|
||
{
|
||
regex: 'IDG/IT',
|
||
name: 'IDG/IT',
|
||
category: 'Search bot',
|
||
url: 'https://spaziodati.eu/',
|
||
producer: { name: 'SpazioDati S.r.l.', url: 'https://spaziodati.eu/' },
|
||
},
|
||
{
|
||
regex: 'Bytespider',
|
||
name: 'Bytespider',
|
||
category: 'Search bot',
|
||
url: 'https://bytedance.com/',
|
||
producer: { name: 'ByteDance Ltd.', url: 'https://bytedance.com/' },
|
||
},
|
||
{
|
||
regex: 'WikiDo',
|
||
name: 'WikiDo',
|
||
category: 'Search bot',
|
||
url: 'https://www.wikido.com/',
|
||
producer: {
|
||
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.',
|
||
url: 'https://www.wikido.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Awario(?:Smart)?Bot',
|
||
name: 'Awario',
|
||
category: 'Search bot',
|
||
url: 'https://awario.com/bots.html',
|
||
producer: {
|
||
name: 'TechFusion Ltd.',
|
||
url: 'https://www.techfusion.com.cy/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'AwarioRssBot',
|
||
name: 'Awario',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://awario.com/bots.html',
|
||
producer: {
|
||
name: 'TechFusion Ltd.',
|
||
url: 'https://www.techfusion.com.cy/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'oBot',
|
||
name: 'oBot',
|
||
category: 'Search bot',
|
||
url: 'https://www.xforce-security.com/crawler/',
|
||
producer: {
|
||
name: 'IBM Germany Research & Development GmbH',
|
||
url: 'https://exchange.xforce.ibmcloud.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SMTBot',
|
||
name: 'SMTBot',
|
||
category: 'Search bot',
|
||
url: 'https://www.similartech.com/smtbot',
|
||
producer: { name: 'SimilarTech Ltd.', url: 'https://www.similartech.com/' },
|
||
},
|
||
{
|
||
regex: 'LCC',
|
||
name: 'LCC',
|
||
category: 'Search bot',
|
||
url: 'https://corpora.uni-leipzig.de/crawler_faq.html',
|
||
producer: {
|
||
name: 'Universität Leipzig',
|
||
url: 'https://www.uni-leipzig.de/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Startpagina-Linkchecker',
|
||
name: 'Startpagina Linkchecker',
|
||
category: 'Search bot',
|
||
url: 'https://www.startpagina.nl/linkchecker',
|
||
producer: { name: 'Startpagina B.V.', url: 'https://www.startpagina.nl/' },
|
||
},
|
||
{
|
||
regex: 'MoodleBot-Linkchecker',
|
||
name: 'MoodleBot Linkchecker',
|
||
category: 'Search bot',
|
||
url: 'hhttps://docs.moodle.org/en/Usage',
|
||
producer: { name: 'Moodle Pty Ltd', url: 'https://moodle.org/' },
|
||
},
|
||
{
|
||
regex: 'GTmetrix',
|
||
name: 'GTmetrix',
|
||
category: 'Crawler',
|
||
url: 'https://gtmetrix.com/',
|
||
producer: {
|
||
name: 'Carbon60 Operating Co. Ltd.',
|
||
url: 'https://www.carbon60.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'CyberFind ?Crawler',
|
||
name: 'CyberFind Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.cyberfind.net/bot.html',
|
||
producer: { name: 'Find.tf', url: 'https://find.tf/' },
|
||
},
|
||
{
|
||
regex: 'Nutch',
|
||
name: 'Nutch-based Bot',
|
||
category: 'Crawler',
|
||
url: 'https://nutch.apache.org',
|
||
producer: {
|
||
name: 'The Apache Software Foundation',
|
||
url: 'https://www.apache.org/foundation/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Seobility',
|
||
name: 'Seobility',
|
||
category: 'Crawler',
|
||
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot',
|
||
},
|
||
{
|
||
regex: 'Vercelbot',
|
||
name: 'Vercel Bot',
|
||
category: 'Service bot',
|
||
url: 'https://vercel.com',
|
||
},
|
||
{
|
||
regex: 'Grammarly',
|
||
name: 'Grammarly',
|
||
category: 'Service bot',
|
||
url: 'https://www.grammarly.com',
|
||
},
|
||
{ regex: 'Robozilla', name: 'Robozilla', category: 'Crawler' },
|
||
{
|
||
regex: 'Domains Project',
|
||
name: 'Domains Project',
|
||
category: 'Crawler',
|
||
url: 'https://domainsproject.org',
|
||
},
|
||
{
|
||
regex: 'PetalBot',
|
||
name: 'Petal Bot',
|
||
category: 'Crawler',
|
||
url: 'https://aspiegel.com/petalbot',
|
||
},
|
||
{
|
||
regex: 'SerendeputyBot',
|
||
name: 'Serendeputy Bot',
|
||
category: 'Crawler',
|
||
url: 'https://serendeputy.com/about/serendeputy-bot',
|
||
},
|
||
{
|
||
regex:
|
||
'ias-(?:va|sg).*admantx.*service-fetcher|admantx\\.com.*service-fetcher',
|
||
name: 'ADmantX Service Fetcher',
|
||
category: 'Service bot',
|
||
url: 'https://www.admantx.com/service-fetcher.html',
|
||
},
|
||
{
|
||
regex: 'SemanticScholarBot',
|
||
name: 'Semantic Scholar Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semanticscholar.org/crawler',
|
||
},
|
||
{
|
||
regex: 'VelenPublicWebCrawler',
|
||
name: 'Velen Public Web Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://hunter.io/robot',
|
||
},
|
||
{
|
||
regex: 'Barkrowler',
|
||
name: 'Barkrowler',
|
||
category: 'Crawler',
|
||
url: 'http://www.exensa.com/crawl',
|
||
},
|
||
{
|
||
regex: 'BDCbot',
|
||
name: 'BDCbot',
|
||
category: 'Crawler',
|
||
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx',
|
||
producer: {
|
||
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA',
|
||
url: 'https://bigdatacorp.com.br/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'adbeat',
|
||
name: 'Adbeat',
|
||
category: 'Crawler',
|
||
url: 'https://www.adbeat.com/operation_policy',
|
||
producer: { name: 'PPC Labs LLC', url: 'https://www.adbeat.com/' },
|
||
},
|
||
{
|
||
regex: '(?:BuiltWith|BW)/[\\d.]+',
|
||
name: 'BuiltWith',
|
||
category: 'Crawler',
|
||
url: 'https://builtwith.com/biup',
|
||
producer: { name: 'BuiltWith Pty Ltd', url: 'https://builtwith.com/' },
|
||
},
|
||
{
|
||
regex: 'https://whatis\\.contentkingapp\\.com',
|
||
name: 'ContentKing',
|
||
category: 'Site Monitor',
|
||
url: 'https://whatis.contentkingapp.com/',
|
||
producer: {
|
||
name: 'ContentKing BV',
|
||
url: 'https://www.contentkingapp.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'MicroAdBot',
|
||
name: 'MicroAdBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.microad.co.jp/',
|
||
producer: { name: 'MicroAd, Inc.', url: 'https://www.microad.co.jp/' },
|
||
},
|
||
{
|
||
regex: 'PingAdmin\\.Ru',
|
||
name: 'PingAdmin.Ru',
|
||
category: 'Site Monitor',
|
||
url: 'https://ping-admin.ru/',
|
||
},
|
||
{
|
||
regex: 'notifyninja.+monitoring',
|
||
name: 'Notify Ninja',
|
||
category: 'Site Monitor',
|
||
url: 'http://notifyninja.com',
|
||
},
|
||
{
|
||
regex: 'WebDataStats',
|
||
name: 'WebDataStats',
|
||
category: 'Crawler',
|
||
url: 'https://webdatastats.com/policy.html',
|
||
producer: {
|
||
name: 'WebTehRazrabotka LLC',
|
||
url: 'https://webdatastats.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'parse\\.ly scraper',
|
||
name: 'parse.ly',
|
||
category: 'Crawler',
|
||
url: 'https://www.parse.ly/help/integration/crawler',
|
||
producer: { name: 'Parsely, Inc.', url: 'https://www.parse.ly/' },
|
||
},
|
||
{
|
||
regex: 'Nimbostratus-Bot',
|
||
name: 'Nimbostratus Bot',
|
||
category: 'Site Monitor',
|
||
url: 'http://cloudsystemnetworks.com',
|
||
},
|
||
{
|
||
regex: 'HeartRails_Capture/[\\d.]+',
|
||
name: 'Heart Rails Capture',
|
||
category: 'Service Agent',
|
||
url: 'http://capture.heartrails.com',
|
||
},
|
||
{
|
||
regex: 'Project-Resonance',
|
||
name: 'Project Resonance',
|
||
category: 'Crawler',
|
||
url: 'https://project-resonance.com/',
|
||
producer: { name: 'RedHunt Labs Limited', url: 'https://redhuntlabs.com/' },
|
||
},
|
||
{
|
||
regex: 'DataXu/[\\d.]+',
|
||
name: 'DataXu',
|
||
category: 'Service Agent',
|
||
url: 'https://advertising.roku.com/dataxu',
|
||
producer: { name: 'Roku, Inc.', url: 'https://roku.com' },
|
||
},
|
||
{
|
||
regex: 'Cocolyzebot',
|
||
name: 'Cocolyzebot',
|
||
category: 'Crawler',
|
||
url: 'https://cocolyze.com/en/cocolyzebot',
|
||
producer: {
|
||
name: 'VSI INNOVATION SAS',
|
||
url: 'https://vsi-innovation.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'veryhip',
|
||
name: 'VeryHip',
|
||
category: 'Crawler',
|
||
url: 'https://veryhip.com/',
|
||
producer: { name: 'VeryHip', url: 'https://veryhip.com/' },
|
||
},
|
||
{
|
||
regex: 'LinkpadBot',
|
||
name: 'LinkpadBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.linkpad.org/',
|
||
producer: { name: 'Solomono LLC', url: 'https://www.linkpad.org/' },
|
||
},
|
||
{
|
||
regex: 'MuscatFerret',
|
||
name: 'MuscatFerret',
|
||
category: 'Crawler',
|
||
url: 'http://www.webtop.com/',
|
||
},
|
||
{
|
||
regex: 'PageThing\\.com',
|
||
name: 'PageThing',
|
||
category: 'Crawler',
|
||
url: 'https://www.pagething.com/',
|
||
producer: {
|
||
name: 'SPECIALNOISE LTD',
|
||
url: 'https://www.specialnoise.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ArchiveBox',
|
||
name: 'ArchiveBox',
|
||
url: 'https://archivebox.io/',
|
||
category: 'Crawler',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Choosito',
|
||
name: 'Choosito',
|
||
url: 'https://www.choosito.com/',
|
||
category: 'Crawler',
|
||
producer: { name: 'Choosito! Inc.', url: 'https://www.choosito.com/' },
|
||
},
|
||
{
|
||
regex: 'datagnionbot',
|
||
name: 'datagnionbot',
|
||
url: 'https://www.datagnion.com/bot.html',
|
||
category: 'Crawler',
|
||
producer: { name: 'DATAGNION GMBH', url: 'https://www.datagnion.com/' },
|
||
},
|
||
{
|
||
regex: 'WhatCMS',
|
||
name: 'WhatCMS',
|
||
url: 'https://whatcms.org/',
|
||
category: 'Crawler',
|
||
producer: { name: 'Nineteen Ten LLC', url: 'https://whatcms.org/' },
|
||
},
|
||
{
|
||
regex: 'httpx',
|
||
name: 'httpx',
|
||
url: 'https://github.com/projectdiscovery/httpx',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'ProjectDiscovery, Inc.',
|
||
url: 'https://projectdiscovery.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: '.*\\.oast\\.',
|
||
name: 'Interactsh',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/projectdiscovery/interactsh',
|
||
producer: {
|
||
name: 'ProjectDiscovery, Inc.',
|
||
url: 'https://projectdiscovery.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\\.com',
|
||
name: 'Expanse',
|
||
category: 'Security Checker',
|
||
url: 'https://expanse.co/',
|
||
producer: { name: 'Expanse Inc.', url: 'https://expanse.co/' },
|
||
},
|
||
{
|
||
regex: 'HuaweiWebCatBot',
|
||
name: 'HuaweiWebCatBot',
|
||
category: 'Crawler',
|
||
url: 'https://isecurity.huawei.com',
|
||
producer: {
|
||
name: 'Huawei Technologies Co., Ltd.',
|
||
url: 'https://huawei.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Hatena-Favicon',
|
||
name: 'Hatena Favicon',
|
||
category: 'Crawler',
|
||
url: 'https://www.hatena.ne.jp/faq/',
|
||
producer: { name: 'Hatena Co., Ltd.', url: 'https://www.hatena.ne.jp' },
|
||
},
|
||
{
|
||
regex: 'Hatena-?Bookmark',
|
||
name: 'Hatena Bookmark',
|
||
category: 'Crawler',
|
||
url: 'https://www.hatena.ne.jp/faq/',
|
||
producer: { name: 'Hatena Co., Ltd.', url: 'https://www.hatena.ne.jp' },
|
||
},
|
||
{
|
||
regex: 'RyowlEngine/[\\d.]+',
|
||
name: 'Ryowl',
|
||
category: 'Crawler',
|
||
url: 'https://ryowl.org',
|
||
},
|
||
{
|
||
regex: 'OdklBot/[\\d.]+',
|
||
name: 'Odnoklassniki Bot',
|
||
category: 'Crawler',
|
||
url: 'https://odnoklassniki.ru',
|
||
},
|
||
{
|
||
regex: 'Mediatoolkitbot',
|
||
name: 'Mediatoolkit Bot',
|
||
category: 'Crawler',
|
||
url: 'https://mediatoolkit.com',
|
||
},
|
||
{
|
||
regex: 'ZoominfoBot',
|
||
name: 'ZoominfoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.zoominfo.com',
|
||
},
|
||
{
|
||
regex: 'WeViKaBot/[\\d.]+',
|
||
name: 'WeViKaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.wevika.de',
|
||
},
|
||
{
|
||
regex: 'SEOkicks',
|
||
name: 'SEOkicks',
|
||
category: 'Crawler',
|
||
url: 'https://www.seokicks.de/robot.html',
|
||
},
|
||
{
|
||
regex: 'Plukkie/[\\d.]+',
|
||
name: 'Plukkie',
|
||
category: 'Crawler',
|
||
url: 'http://www.botje.com/plukkie.htm',
|
||
},
|
||
{
|
||
regex: 'proximic;',
|
||
name: 'Comscore',
|
||
category: 'Crawler',
|
||
url: 'https://www.comscore.com/Web-Crawler',
|
||
},
|
||
{
|
||
regex: 'SurdotlyBot/[\\d.]+',
|
||
name: 'SurdotlyBot',
|
||
category: 'Crawler',
|
||
url: 'http://sur.ly/bot.html',
|
||
},
|
||
{
|
||
regex: 'Gowikibot/[\\d.]+',
|
||
name: 'Gowikibot',
|
||
category: 'Crawler',
|
||
url: 'http:/www.gowikibot.com',
|
||
},
|
||
{
|
||
regex: 'SabsimBot/[\\d.]+',
|
||
name: 'SabsimBot',
|
||
category: 'Crawler',
|
||
url: 'https://sabsim.com',
|
||
},
|
||
{
|
||
regex: 'LumtelBot/[\\d.]+',
|
||
name: 'LumtelBot',
|
||
category: 'Crawler',
|
||
url: 'https://umtel.com',
|
||
},
|
||
{
|
||
regex: 'PiplBot',
|
||
name: 'PiplBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.pipl.com/bot',
|
||
},
|
||
{
|
||
regex: 'woobot/[\\d.]+',
|
||
name: 'WooRank',
|
||
category: 'Crawler',
|
||
url: 'https://www.woorank.com/bot',
|
||
},
|
||
{
|
||
regex: 'Cookiebot/[\\d.]+',
|
||
name: 'Cookiebot',
|
||
category: 'Crawler',
|
||
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent',
|
||
producer: { name: 'Cybot A/S', url: 'https://www.cybot.com/' },
|
||
},
|
||
{
|
||
regex: 'NetSystemsResearch',
|
||
name: 'NetSystemsResearch',
|
||
category: 'Security Checker',
|
||
url: 'https://www.netsystemsresearch.com/',
|
||
producer: {
|
||
name: 'NET SYSTEMS RESEARCH LLC',
|
||
url: 'https://www.netsystemsresearch.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'CensysInspect/[\\d.]+',
|
||
name: 'CensysInspect',
|
||
category: 'Security Checker',
|
||
url: 'https://about.censys.io/',
|
||
producer: { name: 'Censys, Inc.', url: 'https://censys.io/' },
|
||
},
|
||
{
|
||
regex: 'gdnplus\\.com',
|
||
name: 'GDNP',
|
||
category: 'Crawler',
|
||
url: 'https://gdnplus.com/',
|
||
producer: {
|
||
name: 'Global Digital Network Plus, LLC',
|
||
url: 'https://gdnplus.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'WellKnownBot/[\\d.]+',
|
||
name: 'WellKnownBot',
|
||
category: 'Crawler',
|
||
url: 'https://well-known.dev',
|
||
},
|
||
{
|
||
regex: 'Adsbot/[\\d.]+',
|
||
name: 'Adsbot',
|
||
category: 'Crawler',
|
||
url: 'https://seostar.co/robot/',
|
||
},
|
||
{
|
||
regex: 'MTRobot/[\\d.]+',
|
||
name: 'MTRobot',
|
||
category: 'Crawler',
|
||
url: 'https://metrics-tools.de/robot.html',
|
||
producer: { name: 'Metrics Tools', url: 'https://metrics-tools.de/' },
|
||
},
|
||
{
|
||
regex: 'serpstatbot/[\\d.]+',
|
||
name: 'serpstatbot',
|
||
category: 'Crawler',
|
||
url: 'http://serpstatbot.com/',
|
||
producer: { name: 'Netpeak Ltd', url: 'https://netpeak.net/' },
|
||
},
|
||
{
|
||
regex: 'colly',
|
||
name: 'colly',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/gocolly/colly/',
|
||
},
|
||
{
|
||
regex: 'l9tcpid/v[\\d.]+',
|
||
name: 'l9tcpid',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/LeakIX/l9tcpid',
|
||
},
|
||
{
|
||
regex: 'l9explore/[\\d.]+',
|
||
name: 'l9explore',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/LeakIX/l9explore',
|
||
},
|
||
{
|
||
regex: 'l9scan/|^Lkx-.*/[\\d.]+',
|
||
name: 'LeakIX',
|
||
category: 'Security Checker',
|
||
url: 'https://leakix.net/',
|
||
producer: { name: 'BaDaaS SRL', url: 'https://leakix.net/' },
|
||
},
|
||
{
|
||
regex: 'MegaIndex\\.ru/[\\d.]+',
|
||
name: 'MegaIndex',
|
||
category: 'Crawler',
|
||
url: 'https://megaindex.com/crawler',
|
||
},
|
||
{
|
||
regex: 'Seekport',
|
||
name: 'Seekport',
|
||
category: 'Crawler',
|
||
url: 'https://bot.seekport.com/',
|
||
producer: { name: 'SISTRIX GmbH', url: 'https://www.sistrix.de/' },
|
||
},
|
||
{
|
||
regex: 'seolyt/[\\d.]+',
|
||
name: 'seolyt',
|
||
category: 'Crawler',
|
||
url: 'https://seolyt.com/',
|
||
},
|
||
{
|
||
regex: 'YaK/[\\d.]+',
|
||
name: 'YaK',
|
||
category: 'Crawler',
|
||
url: 'https://www.linkfluence.com/',
|
||
producer: { name: 'Linkfluence SAS', url: 'https://www.linkfluence.com/' },
|
||
},
|
||
{
|
||
regex: 'KomodiaBot/[\\d.]+',
|
||
name: 'KomodiaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler',
|
||
producer: { name: 'Komodia Inc.', url: 'https://www.komodia.com/' },
|
||
},
|
||
{
|
||
regex: 'KStandBot/[\\d.]+',
|
||
name: 'KStandBot',
|
||
category: 'Crawler',
|
||
url: 'https://url-classification.io/wiki/index.php?title=URL_server_crawler',
|
||
producer: { name: 'Komodia Inc.', url: 'https://www.komodia.com/' },
|
||
},
|
||
{
|
||
regex: 'Neevabot/[\\d.]+',
|
||
name: 'Neevabot',
|
||
category: 'Search bot',
|
||
url: 'https://neeva.com/neevabot',
|
||
producer: { name: 'Neeva Inc.', url: 'https://neeva.com/' },
|
||
},
|
||
{
|
||
regex: 'LinkPreview/[\\d.]+',
|
||
name: 'LinkPreview',
|
||
category: 'Service Agent',
|
||
url: 'https://www.linkpreview.net/',
|
||
},
|
||
{
|
||
regex: 'JungleKeyThumbnail/[\\d.]+',
|
||
name: 'JungleKeyThumbnail',
|
||
category: 'Crawler',
|
||
url: 'https://junglekey.com/',
|
||
},
|
||
{
|
||
regex: 'rocketmonitor(?: |bot/)[\\d.]+',
|
||
name: 'RocketMonitorBot',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html',
|
||
producer: { name: 'Radio Mast, Inc.', url: 'https://www.radiomast.io/' },
|
||
},
|
||
{
|
||
regex: 'SitemapParser-VIPnytt/[\\d.]+',
|
||
name: 'SitemapParser-VIPnytt',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/VIPnytt/SitemapParser/',
|
||
},
|
||
{
|
||
regex: '^Turnitin',
|
||
name: 'Turnitin',
|
||
category: 'Crawler',
|
||
url: 'https://turnitin.com/robot/crawlerinfo.html',
|
||
},
|
||
{
|
||
regex: 'DMBrowser/[\\d.]+|DMBrowser-[UB]V',
|
||
name: 'Dotcom Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.dotcom-monitor.com',
|
||
},
|
||
{ regex: 'ThinkChaos/', name: 'ThinkChaos', category: 'Crawler' },
|
||
{
|
||
regex: 'DataForSeoBot',
|
||
name: 'DataForSeoBot',
|
||
category: 'Crawler',
|
||
url: 'https://dataforseo.com/dataforseo-bot',
|
||
},
|
||
{
|
||
regex: 'Discordbot/[\\d.]+',
|
||
name: 'Discord Bot',
|
||
category: 'Service Agent',
|
||
url: 'https://discordapp.com',
|
||
},
|
||
{
|
||
regex: 'Linespider/[\\d.]+',
|
||
name: 'Linespider',
|
||
category: 'Crawler',
|
||
url: 'https://lin.ee/4dwXkTH',
|
||
},
|
||
{
|
||
regex: 'Cincraw/[\\d.]+',
|
||
name: 'Cincraw',
|
||
category: 'Crawler',
|
||
url: 'http://cincrawdata.net/bot/',
|
||
},
|
||
{
|
||
regex: 'CISPA Web Analyzer',
|
||
name: 'CISPA Web Analyzer',
|
||
category: 'Crawler',
|
||
url: 'https://notify.cispa.de/',
|
||
producer: {
|
||
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH',
|
||
url: 'https://cispa.de/en',
|
||
},
|
||
},
|
||
{
|
||
regex: 'IonCrawl',
|
||
name: 'IONOS Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/',
|
||
producer: { name: 'IONOS SE', url: 'https://www.ionos.de/' },
|
||
},
|
||
{
|
||
regex: 'Crawldad',
|
||
name: 'Crawldad',
|
||
category: 'Crawler',
|
||
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972',
|
||
},
|
||
{
|
||
regex: 'https://securitytxt-scan\\.cs\\.hm\\.edu/',
|
||
name: 'security.txt scanserver',
|
||
category: 'Security Checker',
|
||
url: 'https://securitytxt-scan.cs.hm.edu/',
|
||
producer: {
|
||
name: 'Hochschule für angewandte Wissenschaften München',
|
||
url: 'https://www.hm.edu/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'TigerBot/[\\d.]+',
|
||
name: 'TigerBot',
|
||
category: 'Crawler',
|
||
url: 'https://tiger.ch/',
|
||
},
|
||
{
|
||
regex: 'TestCrawler/[\\d.]+',
|
||
name: 'TestCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.comcepta.com/',
|
||
},
|
||
{
|
||
regex: 'CrowdTanglebot/[\\d.]+',
|
||
name: 'CrowdTangle',
|
||
category: 'Crawler',
|
||
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot',
|
||
producer: {
|
||
name: 'CrowdTangle, Inc.',
|
||
url: 'https://www.crowdtangle.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Sellers\\.Guide Crawler by Primis',
|
||
name: 'Sellers.Guide',
|
||
category: 'Crawler',
|
||
url: 'https://sellers.guide/',
|
||
producer: {
|
||
name: 'McCann Disciplines, Ltd.',
|
||
url: 'https://www.primis.tech/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'OnalyticaBot',
|
||
name: 'Onalytica',
|
||
category: 'Crawler',
|
||
url: 'https://www.airslate.com/bot/explore/onalytica-bot',
|
||
producer: { name: 'airSlate, Inc.', url: 'https://www.airslate.com/' },
|
||
},
|
||
{
|
||
regex: 'deepnoc',
|
||
name: 'deepnoc',
|
||
category: 'Crawler',
|
||
url: 'https://deepnoc.com/bot',
|
||
producer: { name: 'deepnoc, GmbH', url: 'https://deepnoc.com/' },
|
||
},
|
||
{
|
||
regex: 'Newslitbot/[\\d.]+',
|
||
name: 'Newslitbot',
|
||
category: 'Crawler',
|
||
url: 'https://www.newslit.co/',
|
||
producer: { name: 'Newslit, LLC.', url: 'https://www.newslit.co/' },
|
||
},
|
||
{
|
||
regex: 'um-(?:ANS|CC|FC|IC|LN)/[\\d.]+',
|
||
name: 'uMBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.ubermetrics-technologies.com/',
|
||
producer: {
|
||
name: 'Ubermetrics Technologies GmbH',
|
||
url: 'https://www.ubermetrics-technologies.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Abonti/[\\d.]+',
|
||
name: 'Abonti',
|
||
category: 'Crawler',
|
||
url: 'http://abonti.com/',
|
||
},
|
||
{
|
||
regex: 'collection@infegy\\.com',
|
||
name: 'Infegy',
|
||
category: 'Crawler',
|
||
url: 'https://infegy.com/',
|
||
producer: { name: 'Infegy, Inc.', url: 'https://infegy.com/' },
|
||
},
|
||
{
|
||
regex: 'HTTP Banner Detection \\(https://security\\.ipip\\.net\\)',
|
||
name: 'IPIP',
|
||
category: 'Security Checker',
|
||
url: 'https://security.ipip.net/',
|
||
producer: {
|
||
name: 'Beijing Tiantexin Tech. Co., Ltd.',
|
||
url: 'https://en.ipip.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ev-crawler/[\\d.]+',
|
||
name: 'Headline',
|
||
category: 'Crawler',
|
||
url: 'https://headline.com/legal/crawler',
|
||
producer: {
|
||
name: 'e.ventures Managementgesellschaft mbH',
|
||
url: 'https://headline.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'webprosbot/[\\d.]+',
|
||
name: 'WebPros',
|
||
category: 'Crawler',
|
||
url: 'https://webpros.com/',
|
||
producer: { name: 'WebPros Holdco B.V.', url: 'https://webpros.com/' },
|
||
},
|
||
{
|
||
regex: 'ELB-HealthChecker',
|
||
name: 'Amazon ELB',
|
||
category: 'Site Monitor',
|
||
url: 'https://aws.amazon.com/elasticloadbalancing/',
|
||
producer: { name: 'Amazon.com, Inc.', url: 'https://www.amazon.com/' },
|
||
},
|
||
{
|
||
regex: 'Wheregoes\\.com Redirect Checker/[\\d.]+',
|
||
name: 'WhereGoes',
|
||
category: 'Crawler',
|
||
url: 'https://wheregoes.com/',
|
||
},
|
||
{
|
||
regex: 'project_patchwatch',
|
||
name: 'Project Patchwatch',
|
||
category: 'Crawler',
|
||
url: 'http://66.240.192.82/',
|
||
},
|
||
{
|
||
regex: 'InternetMeasurement/[\\d.]+',
|
||
name: 'InternetMeasurement',
|
||
category: 'Crawler',
|
||
url: 'https://internet-measurement.com/',
|
||
},
|
||
{
|
||
regex: 'DomainAppender /[\\d.]+',
|
||
name: 'DomainAppender',
|
||
category: 'Crawler',
|
||
url: 'https://www.profound.net/product/domain_append/',
|
||
producer: {
|
||
name: 'Profound Networks, LLC',
|
||
url: 'https://www.profound.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'FreeWebMonitoring SiteChecker/[\\d.]+',
|
||
name: 'FreeWebMonitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.freewebmonitoring.com/bot.html',
|
||
producer: {
|
||
name: 'GreenWave Online, Inc.',
|
||
url: 'http://www.greenwaveonline.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Page Modified Pinger',
|
||
name: 'Page Modified Pinger',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.pagemodified.com/',
|
||
producer: {
|
||
name: 'Valley Hosting, LLC',
|
||
url: 'https://www.pagemodified.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'adstxtlab\\.com',
|
||
name: 'adstxtlab.com',
|
||
category: 'Crawler',
|
||
url: 'https://adstxtlab.com/validator.php',
|
||
producer: { name: 'Jaohawi AB', url: 'https://adstxtlab.com/' },
|
||
},
|
||
{
|
||
regex: 'Iframely/[\\d.]+',
|
||
name: 'Iframely',
|
||
category: 'Crawler',
|
||
url: 'https://iframely.com/',
|
||
producer: { name: 'Itteco Software, Corp.', url: 'https://iframely.com/' },
|
||
},
|
||
{
|
||
regex: 'DomainStatsBot/[\\d.]+',
|
||
name: 'DomainStatsBot',
|
||
category: 'Crawler',
|
||
url: 'https://domainstats.com/pages/our-bot',
|
||
producer: { name: 'Domainstats Ltd', url: 'https://domainstats.com/' },
|
||
},
|
||
{
|
||
regex: 'aiHitBot/[\\d.]+',
|
||
name: 'aiHitBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.aihitdata.com/about',
|
||
},
|
||
{
|
||
regex: 'DomainCrawler/',
|
||
name: 'DomainCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://domaincrawler.com/about-us/',
|
||
},
|
||
{ regex: 'DNSResearchBot', name: 'DNSResearchBot', category: 'Crawler' },
|
||
{ regex: 'GitCrawlerBot', name: 'GitCrawlerBot', category: 'Crawler' },
|
||
{
|
||
regex: 'AdAuth/[\\d.]+',
|
||
name: 'AdAuth',
|
||
category: 'Crawler',
|
||
url: 'https://www.adauth.com',
|
||
},
|
||
{
|
||
regex: 'faveeo\\.com',
|
||
name: 'Faveeo',
|
||
category: 'Crawler',
|
||
url: 'http://www.faveeo.com',
|
||
},
|
||
{
|
||
regex: 'kozmonavt\\.',
|
||
name: 'Kozmonavt',
|
||
category: 'Crawler',
|
||
url: 'https://kozmonavt.ml',
|
||
},
|
||
{
|
||
regex: 'CriteoBot/',
|
||
name: 'CriteoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.criteo.com/criteo-crawler/',
|
||
},
|
||
{
|
||
regex: 'PayPal IPN',
|
||
name: 'PayPal IPN',
|
||
category: 'Service Agent',
|
||
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/',
|
||
producer: { name: 'PayPal, Inc.', url: 'https://www.paypal.com/' },
|
||
},
|
||
{
|
||
regex: 'MaCoCu',
|
||
name: 'MaCoCu',
|
||
category: 'Crawler',
|
||
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/',
|
||
producer: {
|
||
name: 'Jožef Stefan Institute',
|
||
url: 'https://www.ijs.si/ijsw/JSI',
|
||
},
|
||
},
|
||
{
|
||
regex: 'CLASSLA',
|
||
name: 'CLASSLA-web',
|
||
category: 'Crawler',
|
||
url: 'https://www.clarin.si/info/classla-web-crawler/',
|
||
producer: {
|
||
name: 'Jožef Stefan Institute',
|
||
url: 'https://www.ijs.si/ijsw/JSI',
|
||
},
|
||
},
|
||
{
|
||
regex: 'dnt-policy@eff\\.org',
|
||
name: 'EFF Do Not Track Verifier',
|
||
category: 'Crawler',
|
||
url: 'https://www.eff.org/issues/do-not-track',
|
||
producer: {
|
||
name: 'Electronic Frontier Foundation',
|
||
url: 'https://www.eff.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'InfoTigerBot',
|
||
name: 'InfoTigerBot',
|
||
category: 'Crawler',
|
||
url: 'https://infotiger.com/bot',
|
||
producer: { name: 'Infotiger UG', url: 'https://infotiger.com/' },
|
||
},
|
||
{
|
||
regex: '(?:Birdcrawlerbot|CrawlaDeBot)',
|
||
name: 'Birdcrawlerbot',
|
||
category: 'Crawler',
|
||
url: 'https://crawla.de/de/index.php',
|
||
producer: {
|
||
name: 'Swoppen Systems GmbH',
|
||
url: 'https://www.swoppen.com/de',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ScamadviserExternalHit/[\\d.]+',
|
||
name: 'Scamadviser External Hit',
|
||
category: 'Crawler',
|
||
url: 'https://www.scamadviser.com/',
|
||
producer: {
|
||
name: 'Ecommerce Operations B.V.',
|
||
url: 'https://www.scamadviser.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ZaldamoSearchBot',
|
||
name: 'Zaldamo',
|
||
category: 'Crawler',
|
||
url: 'https://www.zaldamo.com/search.html',
|
||
producer: { name: 'Zaldamo, LLC.', url: 'https://www.zaldamo.com/' },
|
||
},
|
||
{
|
||
regex: 'AFB/[\\d.]+',
|
||
name: 'Allloadin Favicon Bot',
|
||
category: 'Crawler',
|
||
url: 'https://allloadin.com/',
|
||
},
|
||
{
|
||
regex: 'SeolytBot/[\\d.]+',
|
||
name: 'Seolyt Bot',
|
||
category: 'Crawler',
|
||
url: 'https://seolyt.com',
|
||
},
|
||
{
|
||
regex: 'LinkWalker/[\\d.]+',
|
||
name: 'LinkWalker',
|
||
category: 'Crawler',
|
||
url: 'https://www.phishlabs.com/',
|
||
producer: { name: 'PhishLabs, Inc.', url: 'https://www.phishlabs.com/' },
|
||
},
|
||
{
|
||
regex: 'RenovateBot/[\\d.]+',
|
||
name: 'RenovateBot',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/renovatebot/renovate',
|
||
producer: {
|
||
name: 'White Source Ltd.',
|
||
url: 'https://www.mend.io/free-developer-tools/renovate/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'INETDEX-BOT/[\\d.]+',
|
||
name: 'Inetdex Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.inetdex.com/',
|
||
},
|
||
{
|
||
regex: 'NETZZAPPEN',
|
||
name: 'NETZZAPPEN',
|
||
category: 'Crawler',
|
||
url: 'https://www.netzzappen.com/',
|
||
producer: { name: 'Marc Huemer', url: 'https://www.netzzappen.com/' },
|
||
},
|
||
{
|
||
regex: 'panscient\\.com',
|
||
name: 'Panscient',
|
||
category: 'Crawler',
|
||
url: 'https://www.panscient.com/faq.htm',
|
||
producer: { name: 'Panscient, Inc.', url: 'https://www.panscient.com/' },
|
||
},
|
||
{
|
||
regex: 'research@pdrlabs\\.net',
|
||
name: 'PDR Labs',
|
||
category: 'Security Checker',
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/',
|
||
producer: {
|
||
name: 'PDR Labs',
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Nicecrawler/[\\d.]+',
|
||
name: 'NiceCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.nicecrawler.com/',
|
||
producer: { name: 'Intelium Corp.', url: 'https://www.intelium.com/' },
|
||
},
|
||
{
|
||
regex: 't3versionsBot/[\\d.]+',
|
||
name: 't3versions',
|
||
category: 'Crawler',
|
||
url: 'https://www.t3versions.com/bot',
|
||
producer: { name: 'Torben Hansen', url: 'https://www.t3versions.com/' },
|
||
},
|
||
{
|
||
regex: 'Crawlson/[\\d.]+',
|
||
name: 'Crawlson',
|
||
category: 'Crawler',
|
||
url: 'https://www.crawlson.com/about',
|
||
producer: { name: 'Crawlson', url: 'https://www.crawlson.com/' },
|
||
},
|
||
{
|
||
regex: 'tchelebi/[\\d.]+',
|
||
name: 'tchelebi',
|
||
category: 'Crawler',
|
||
url: 'https://tchelebi.io/',
|
||
producer: { name: 'NormShield, Inc.', url: 'https://blackkite.com/' },
|
||
},
|
||
{
|
||
regex: 'JobboerseBot',
|
||
name: 'JobboerseBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.xing.com/jobs',
|
||
producer: { name: 'New Work SE', url: 'https://www.xing.com/' },
|
||
},
|
||
{
|
||
regex: 'RepoLookoutBot/v?[\\d.]+',
|
||
name: 'Repo Lookout',
|
||
category: 'Security Checker',
|
||
url: 'https://www.repo-lookout.org/',
|
||
producer: { name: 'Crissy Field GmbH', url: 'https://www.crissyfield.de/' },
|
||
},
|
||
{
|
||
regex: 'PATHspider',
|
||
name: 'PATHspider',
|
||
category: 'Security Checker',
|
||
url: 'https://pathspider.net/',
|
||
producer: { name: 'MAMI Project', url: 'https://mami-project.eu/' },
|
||
},
|
||
{
|
||
regex: 'everyfeed-spider/[\\d.]+',
|
||
name: 'Everyfeed',
|
||
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Exchange check',
|
||
name: 'Exchange check',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/GossiTheDog/scanning',
|
||
producer: { name: 'Kevin Beaumont', url: 'https://doublepulsar.com/' },
|
||
},
|
||
{
|
||
regex: 'Sublinq',
|
||
name: 'Sublinq',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'Gregarius/[\\d.]+',
|
||
name: 'Gregarius',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/',
|
||
producer: { name: '', url: '' },
|
||
},
|
||
{
|
||
regex: 'COMODO DCV',
|
||
name: 'COMODO DCV',
|
||
category: 'Service Agent',
|
||
url: 'https://www.comodo.com/',
|
||
producer: {
|
||
name: 'Comodo Security Solutions, Inc.',
|
||
url: 'https://www.comodo.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Sectigo DCV|acme\\.sectigo\\.com',
|
||
name: 'Sectigo DCV',
|
||
category: 'Service Agent',
|
||
url: 'https://sectigo.com/',
|
||
producer: { name: 'Sectigo Limited', url: 'https://sectigo.com/' },
|
||
},
|
||
{
|
||
regex:
|
||
'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\\d.]+',
|
||
name: 'KlarnaBot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.klarna.com/klarna-bot/',
|
||
producer: { name: 'Klarna Bank AB', url: 'https://www.klarna.com/' },
|
||
},
|
||
{
|
||
regex: 'Taboolabot/[\\d.]+',
|
||
name: 'Taboolabot',
|
||
category: 'Crawler',
|
||
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler',
|
||
producer: { name: 'Taboola, Inc.', url: 'https://www.taboola.com/' },
|
||
},
|
||
{
|
||
regex: 'Asana/[\\d.]+',
|
||
name: 'Asana',
|
||
category: 'Crawler',
|
||
url: 'https://asana.com/',
|
||
producer: { name: 'Asana, Inc.', url: 'https://asana.com/' },
|
||
},
|
||
{
|
||
regex: 'Chrome Privacy Preserving Prefetch Proxy',
|
||
name: 'Chrome Privacy Preserving Prefetch Proxy',
|
||
category: 'Service Agent',
|
||
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/',
|
||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||
},
|
||
{
|
||
regex: 'URLinspectorBot/[\\d.]+',
|
||
name: 'URLinspector',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.urlinspector.com/bot/',
|
||
producer: {
|
||
name: 'LinkResearchTools GmbH',
|
||
url: 'https://www.linkresearchtools.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'EntferBot/[\\d.]+',
|
||
name: 'Entfer',
|
||
category: 'Crawler',
|
||
url: 'https://entfer.com/',
|
||
producer: { name: 'Entfer Ltd.', url: 'https://entfer.com/' },
|
||
},
|
||
{
|
||
regex: 'TagInspector/[\\d.]+',
|
||
name: 'Tag Inspector',
|
||
category: 'Crawler',
|
||
url: 'https://taginspector.com/',
|
||
producer: { name: 'InfoTrust, LLC', url: 'https://infotrust.com/' },
|
||
},
|
||
{
|
||
regex: 'pageburst',
|
||
name: 'Pageburst',
|
||
category: 'Crawler',
|
||
url: 'https://pageburstls.elsevier.com/',
|
||
producer: { name: 'Elsevier Ltd', url: 'https://www.elsevier.com/' },
|
||
},
|
||
{
|
||
regex: '.+diffbot',
|
||
name: 'Diffbot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl',
|
||
producer: {
|
||
name: 'Diffbot Technologies Corp.',
|
||
url: 'https://www.diffbot.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'DisqusAdstxtCrawler/[\\d.]+',
|
||
name: 'Disqus',
|
||
category: 'Crawler',
|
||
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide',
|
||
producer: { name: 'Disqus, Inc.', url: 'https://disqus.com/' },
|
||
},
|
||
{
|
||
regex: 'startmebot/[\\d.]+',
|
||
name: 'start.me',
|
||
category: 'Crawler',
|
||
url: 'https://about.start.me/',
|
||
producer: { name: 'start.me BV', url: 'https://about.start.me/' },
|
||
},
|
||
{
|
||
regex: '2ip bot/[\\d.]+',
|
||
name: '2ip',
|
||
category: 'Crawler',
|
||
url: 'https://2ip.io/',
|
||
},
|
||
{
|
||
regex: 'ReqBin Curl Client/[\\d.]+',
|
||
name: 'ReqBin',
|
||
category: 'Crawler',
|
||
url: 'https://reqbin.com/curl',
|
||
},
|
||
{
|
||
regex: 'XoviBot/[\\d.]+',
|
||
name: 'XoviBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.xovibot.net',
|
||
producer: { name: 'Xovi GmbH', url: 'http://www.xovi.de' },
|
||
},
|
||
{
|
||
regex: 'Overcast/[\\d.]+ Podcast Sync',
|
||
name: 'Overcast Podcast Sync',
|
||
category: 'Service Agent',
|
||
url: 'https://overcast.fm/podcasterinfo',
|
||
},
|
||
{
|
||
regex: '^Verity/[\\d.]+',
|
||
name: 'GumGum Verity',
|
||
category: 'Service Agent',
|
||
url: 'https://gumgum.com/verity',
|
||
},
|
||
{
|
||
regex: 'hackermention',
|
||
name: 'hackermention',
|
||
category: 'Feed Reader',
|
||
url: 'https://github.com/snarfed/hackermention',
|
||
},
|
||
{
|
||
regex: 'BitSightBot/[\\d.]+',
|
||
name: 'BitSight',
|
||
category: 'Security Checker',
|
||
url: 'https://www.bitsight.com/',
|
||
producer: {
|
||
name: 'BitSight Technologies, Inc.',
|
||
url: 'https://www.bitsight.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Ezgif/[\\d.]+',
|
||
name: 'Ezgif',
|
||
category: 'Service Agent',
|
||
url: 'https://ezgif.com/about',
|
||
},
|
||
{
|
||
regex: 'intelx\\.io_bot',
|
||
name: 'Intelligence X',
|
||
category: 'Crawler',
|
||
url: 'https://intelx.io/',
|
||
producer: {
|
||
name: 'Kleissner Investments s.r.o.',
|
||
url: 'https://intelx.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'FemtosearchBot/[\\d.]+',
|
||
name: 'Femtosearch',
|
||
category: 'Crawler',
|
||
url: 'http://femtosearch.com/',
|
||
producer: {
|
||
name: 'Grier Forensics, LLC',
|
||
url: 'https://www.grierforensics.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'AdsTxtCrawler/[\\d.]+',
|
||
name: 'AdsTxtCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler',
|
||
producer: {
|
||
name: 'IAB Technology Laboratory, Inc.',
|
||
url: 'https://iabtechlab.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Morningscore',
|
||
name: 'Morningscore Bot',
|
||
category: 'Crawler',
|
||
url: 'https://morningscore.io/',
|
||
producer: { name: 'Morningscore', url: 'https://morningscore.io/' },
|
||
},
|
||
{
|
||
regex: 'Uptime-Kuma/[\\d.]+',
|
||
name: 'Uptime-Kuma',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/louislam/uptime-kuma',
|
||
},
|
||
{
|
||
regex: 'OAI-SearchBot',
|
||
name: 'OAI-SearchBot',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' },
|
||
},
|
||
{
|
||
regex: 'GPTBot/[\\d.]+',
|
||
name: 'GPTBot',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' },
|
||
},
|
||
{
|
||
regex: 'ChatGPT-User',
|
||
name: 'ChatGPT-User',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: { name: 'OpenAI OpCo, LLC', url: 'https://openai.com/' },
|
||
},
|
||
{
|
||
regex: 'BrightEdge Crawler/[\\d.]+',
|
||
name: 'BrightEdge',
|
||
category: 'Crawler',
|
||
url: 'https://www.brightedge.com/',
|
||
producer: {
|
||
name: 'BrightEdge Technologies, Inc',
|
||
url: 'https://www.brightedge.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'sfFeedReader/[\\d.]+',
|
||
name: 'sfFeedReader',
|
||
url: 'https://github.com/diem-project/sfFeed2Plugin',
|
||
category: 'Feed Fetcher',
|
||
},
|
||
{
|
||
regex: 'cyberscan\\.io',
|
||
name: 'Cyberscan',
|
||
category: 'Security Checker',
|
||
url: 'https://www.cyberscan.io/',
|
||
producer: { name: 'DGC Verwaltungs GmbH', url: 'https://dgc.org/' },
|
||
},
|
||
{
|
||
regex: 'researchscan\\.comsys\\.rwth-aachen\\.de',
|
||
name: 'Research Scan',
|
||
category: 'Crawler',
|
||
url: 'http://researchscan.comsys.rwth-aachen.de/',
|
||
producer: {
|
||
name: 'RWTH Aachen University',
|
||
url: 'https://www.comsys.rwth-aachen.de/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'newspaper/[\\d.]+',
|
||
name: 'Scraping Robot',
|
||
category: 'Crawler',
|
||
url: 'https://scrapingrobot.com/',
|
||
producer: { name: 'Sprious LLC', url: 'https://sprious.com/' },
|
||
},
|
||
{
|
||
regex: 'Ant(?:\\.com beta|Bot)(?:/([\\d+.]+))?',
|
||
name: 'Ant',
|
||
category: 'Crawler',
|
||
url: 'https://www.ant.com/',
|
||
producer: { name: 'Ant.com Ltd.', url: 'https://www.ant.com/' },
|
||
},
|
||
{
|
||
regex: 'WebwikiBot/[\\d.]+',
|
||
name: 'Webwiki',
|
||
category: 'Crawler',
|
||
url: 'https://www.webwiki.com/',
|
||
producer: { name: 'webwiki GmbH', url: 'https://www.webwiki.com/' },
|
||
},
|
||
{
|
||
regex: 'phpMyAdmin',
|
||
name: 'phpMyAdmin',
|
||
category: 'Service Agent',
|
||
url: 'https://www.phpmyadmin.net/',
|
||
},
|
||
{
|
||
regex: 'Matomo/[\\d.]+',
|
||
name: 'Matomo',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/matomo-org/matomo',
|
||
producer: { name: 'InnoCraft Ltd', url: 'https://matomo.org/' },
|
||
},
|
||
{
|
||
regex: 'Prometheus/[\\d.]+',
|
||
name: 'Prometheus',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/prometheus/prometheus',
|
||
producer: { name: 'The Linux Foundation', url: 'https://www.cncf.io/' },
|
||
},
|
||
{
|
||
regex: 'ArchiveTeam ArchiveBot',
|
||
name: 'ArchiveBot',
|
||
category: 'Crawler',
|
||
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot',
|
||
producer: { name: 'ArchiveTeam', url: 'https://wiki.archiveteam.org/' },
|
||
},
|
||
{
|
||
regex: 'MADBbot/[\\d.]+',
|
||
name: 'MADBbot',
|
||
category: 'Crawler',
|
||
url: 'https://madb.zapto.org/bot.html',
|
||
},
|
||
{
|
||
regex: 'MeltwaterNews',
|
||
name: 'MeltwaterNews',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Meltwater Deutschland GmbH',
|
||
url: 'https://www.meltwater.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'owler',
|
||
name: 'OWLer',
|
||
category: 'Crawler',
|
||
url: 'https://openwebsearch.eu/owler/',
|
||
producer: {
|
||
name: 'Open Search Foundation e.V.',
|
||
url: 'https://openwebsearch.eu/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'bbc\\.co\\.uk/display/men/Page\\+Monitor',
|
||
name: 'BBC Page Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor',
|
||
producer: { name: 'BBC', url: 'https://www.bbc.com/' },
|
||
},
|
||
{
|
||
regex: 'BBC-Forge-URL-Monitor-Twisted',
|
||
name: 'BBC Forge URL Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.bbc.com/',
|
||
producer: { name: 'BBC', url: 'https://www.bbc.com/' },
|
||
},
|
||
{
|
||
regex: 'ClaudeBot',
|
||
name: 'ClaudeBot',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/ClaudeBot/ClaudeBot',
|
||
},
|
||
{
|
||
regex: 'Imagesift',
|
||
name: 'ImageSift',
|
||
category: 'Crawler',
|
||
url: 'https://imagesift.com/',
|
||
producer: { name: 'Castle Global, Inc.', url: 'https://thehive.ai/' },
|
||
},
|
||
{
|
||
regex: 'TactiScout',
|
||
name: 'TactiScout',
|
||
category: 'Crawler',
|
||
url: 'https://find-it.world/TempCrawl/Crawltheque.php',
|
||
producer: { name: 'Tactikast' },
|
||
},
|
||
{
|
||
regex: 'Brightbot ([\\d+.]+)',
|
||
name: 'BrightBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.brightbot.app/',
|
||
producer: {
|
||
name: 'Bright Interactive Ltd',
|
||
url: 'https://www.builtbybright.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'DaspeedBot/([\\d+.]+)',
|
||
name: 'DaspeedBot',
|
||
category: 'Crawler',
|
||
url: 'https://daspeed.io/',
|
||
producer: { name: 'DAWAP SARL', url: 'https://dawap.fr/' },
|
||
},
|
||
{
|
||
regex: 'StractBot(?:/([\\d+.]+))?',
|
||
name: 'Stract',
|
||
category: 'Crawler',
|
||
url: 'https://stract.com/webmasters',
|
||
producer: { name: 'Stract', url: 'https://github.com/StractOrg/stract/' },
|
||
},
|
||
{
|
||
regex: 'GeedoBot(?:/([\\d+.]+))?',
|
||
name: 'GeedoBot',
|
||
category: 'Crawler',
|
||
url: 'https://geedo.com/bot/',
|
||
},
|
||
{
|
||
regex: 'GeedoProductSearch',
|
||
name: 'GeedoProductSearch',
|
||
category: 'Crawler',
|
||
url: 'https://geedo.com/product-search/',
|
||
},
|
||
{
|
||
regex: 'BackupLand(?:/([\\d+.]+))?',
|
||
name: 'BackupLand',
|
||
category: 'Crawler',
|
||
url: 'https://go.backupland.com/',
|
||
producer: { name: 'ООО «КВАРТА»', url: 'https://go.backupland.com/' },
|
||
},
|
||
{
|
||
regex: 'Konturbot(?:/([\\d+.]+))?',
|
||
name: 'Konturbot',
|
||
category: 'Crawler',
|
||
url: 'https://kontur.ru/',
|
||
producer: { name: 'АО «ПФ «СКБ Контур»', url: 'https://kontur.ru/' },
|
||
},
|
||
{
|
||
regex: 'keys-so-bot',
|
||
name: 'Keys.so',
|
||
category: 'Crawler',
|
||
url: 'https://www.keys.so/',
|
||
producer: { name: 'ООО «МОДЕСКО»', url: 'https://www.modesco.ru/' },
|
||
},
|
||
{
|
||
regex: 'LetsearchBot(?:/([\\d+.]+))?',
|
||
name: 'LetSearch',
|
||
category: 'Crawler',
|
||
url: 'https://letsearch.ru/bots',
|
||
},
|
||
{
|
||
regex: 'Example3(?:/([\\d+.]+))?',
|
||
name: 'Example3',
|
||
category: 'Crawler',
|
||
url: 'https://www.example3.com/',
|
||
},
|
||
{
|
||
regex: 'StatOnlineRuBot(?:/([\\d+.]+))?',
|
||
name: 'StatOnline.ru',
|
||
category: 'Crawler',
|
||
url: 'https://statonline.ru/',
|
||
producer: {
|
||
name: 'ООО «Регистратор доменных имен РЕГ.РУ»',
|
||
url: 'https://statonline.ru/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Spawning-AI',
|
||
name: 'Spawning AI',
|
||
category: 'Crawler',
|
||
url: 'https://spawning.ai/',
|
||
producer: { name: 'Spawning, Inc', url: 'https://spawning.ai/' },
|
||
},
|
||
{
|
||
regex: 'domain research project',
|
||
name: 'Domain Research Project',
|
||
category: 'Crawler',
|
||
url: 'https://trentwil.es/domains.html',
|
||
producer: { name: 'Trent Wiles', url: 'https://trentwil.es/' },
|
||
},
|
||
{
|
||
regex: 'getodin\\.com',
|
||
name: 'Odin',
|
||
category: 'Security Checker',
|
||
url: 'https://docs.getodin.com/',
|
||
producer: { name: 'Cyble Inc.', url: 'https://cyble.com/' },
|
||
},
|
||
{
|
||
regex: 'YouBot',
|
||
name: 'YouBot',
|
||
category: 'Crawler',
|
||
url: 'https://about.you.com/youbot/',
|
||
producer: { name: 'SuSea, Inc.', url: 'https://you.com/' },
|
||
},
|
||
{
|
||
regex: 'SiteScoreBot',
|
||
name: 'SiteScore',
|
||
category: 'Crawler',
|
||
url: 'https://sitescore.ai/',
|
||
},
|
||
{
|
||
regex: 'MBCrawler',
|
||
name: 'Monitor Backlinks',
|
||
category: 'Crawler',
|
||
url: 'https://www.seoptimer.com/monitor-backlinks/',
|
||
producer: { name: 'SEOptimer', url: 'https://www.seoptimer.com/' },
|
||
},
|
||
{
|
||
regex: 'mariadb-mysql-kbs-bot',
|
||
name: 'MariaDB/MySQL Knowledge Base',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/williamdes/mariadb-mysql-kbs',
|
||
producer: { name: 'WDES SAS', url: 'https://wdes.fr/en/' },
|
||
},
|
||
{
|
||
regex: 'GitHubCopilotChat',
|
||
name: 'GitHubCopilotChat',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/aaamoon/copilot-gpt4-service',
|
||
},
|
||
{
|
||
regex: '^pdrl\\.fm',
|
||
name: 'Podroll Analyzer',
|
||
category: 'Crawler',
|
||
url: 'https://podroll.fm',
|
||
},
|
||
{
|
||
regex: 'PodUptime/',
|
||
name: 'PodUptime',
|
||
category: 'Site Monitor',
|
||
url: 'https://poduptime.com',
|
||
},
|
||
{
|
||
regex: 'anthropic-ai',
|
||
name: 'Anthropic AI',
|
||
category: 'Crawler',
|
||
url: 'https://www.anthropic.com/',
|
||
producer: { name: 'Anthropic, PBC', url: 'https://www.anthropic.com/' },
|
||
},
|
||
{
|
||
regex: 'NetpeakCheckerBot/[\\d.]+',
|
||
name: 'Netpeak Checker',
|
||
category: 'Crawler',
|
||
url: 'https://netpeaksoftware.com/checker',
|
||
producer: { name: 'Netpeak LTD', url: 'https://netpeaksoftware.com/' },
|
||
},
|
||
{
|
||
regex: 'SandobaCrawler/[\\d.]+',
|
||
name: 'Sandoba//Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.sandoba.com/en/crawler/',
|
||
producer: {
|
||
name: 'SANDOBA//EBUSINESS SOLUTIONS',
|
||
url: 'https://www.sandoba.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SirdataBot',
|
||
name: 'Sirdata',
|
||
category: 'Crawler',
|
||
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction',
|
||
producer: { name: 'Sirdata SAS', url: 'https://www.sirdata.com/' },
|
||
},
|
||
{
|
||
regex: 'CheckMarkNetwork/[\\d.]+',
|
||
name: 'CheckMark Network',
|
||
category: 'Crawler',
|
||
url: 'https://www.checkmarknetwork.com/spider.html/',
|
||
producer: {
|
||
name: 'Exipert, Inc.',
|
||
url: 'https://www.checkmarknetwork.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'cohere-ai',
|
||
name: 'Cohere AI',
|
||
category: 'Crawler',
|
||
url: 'https://cohere.com/',
|
||
producer: { name: 'Cohere, Inc.', url: 'https://cohere.com/' },
|
||
},
|
||
{
|
||
regex: 'PerplexityBot/[\\d.]+',
|
||
name: 'PerplexityBot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.perplexity.ai/docs/perplexitybot',
|
||
producer: {
|
||
name: 'Perplexity AI, Inc.',
|
||
url: 'https://www.perplexity.ai/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'TTD-Content',
|
||
name: 'The Trade Desk Content',
|
||
category: 'Crawler',
|
||
url: 'https://www.thetradedesk.com/us/ttd-content',
|
||
producer: {
|
||
name: 'The Trade Desk, Inc.',
|
||
url: 'https://www.thetradedesk.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'montastic-monitor',
|
||
name: 'Montastic Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.montastic.com/',
|
||
producer: { name: 'Metadot, Corp.', url: 'https://www.metadot.com/' },
|
||
},
|
||
{
|
||
regex: 'Ruby, Twurly v[\\d.]+',
|
||
name: 'Twurly',
|
||
category: 'Crawler',
|
||
url: 'https://twurly.org/',
|
||
},
|
||
{
|
||
regex: 'Mixnode(?:(?:Cache)?/[\\d.]+)?',
|
||
name: 'Mixnode',
|
||
category: 'Crawler',
|
||
url: 'https://www.mixnode.com/',
|
||
producer: {
|
||
name: 'Mixnode Technologies, Inc.',
|
||
url: 'https://www.mixnode.com/',
|
||
},
|
||
},
|
||
{ regex: 'CSSCheck/[\\d.]+', name: 'CSSCheck', category: 'Validator' },
|
||
{
|
||
regex: 'MicrosoftPreview/[\\d.]+',
|
||
name: 'Microsoft Preview',
|
||
category: 'Service Agent',
|
||
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'https://www.microsoft.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 's~virustotalcloud',
|
||
name: 'VirusTotal Cloud',
|
||
category: 'Crawler',
|
||
url: 'https://www.virustotal.com/',
|
||
producer: {
|
||
name: 'Chronicle Security Ireland Limited',
|
||
url: 'https://chronicle.security/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'TinEye/[\\d.]+',
|
||
name: 'TinEye',
|
||
category: 'Crawler',
|
||
url: 'https://tineye.com/',
|
||
producer: { name: 'Idée, Inc.', url: 'https://tineye.com/' },
|
||
},
|
||
{
|
||
regex: 'e~arsnova-filter-system',
|
||
name: 'ARSNova Filter System',
|
||
category: 'Crawler',
|
||
url: 'https://particify.de/en/',
|
||
producer: {
|
||
name: 'Particify Gerhardt & Weingarten OHG',
|
||
url: 'https://particify.de/en/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'botify',
|
||
name: 'Botify',
|
||
category: 'Crawler',
|
||
url: 'https://www.botify.com/',
|
||
producer: { name: 'BOTIFY SAS', url: 'https://www.botify.com/' },
|
||
},
|
||
{
|
||
regex: 'adscanner',
|
||
name: 'Adscanner',
|
||
category: 'Crawler',
|
||
url: 'https://www.alleyesonscreens.com/',
|
||
producer: {
|
||
name: 'AdScanner d.o.o',
|
||
url: 'https://www.alleyesonscreens.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'online-webceo-bot/[\\d.]+',
|
||
name: 'WebCEO',
|
||
category: 'Crawler',
|
||
url: 'https://www.webceo.com/',
|
||
producer: { name: 'WebCEO, LLC', url: 'https://www.webceo.com/' },
|
||
},
|
||
{
|
||
regex: 'NetTrack',
|
||
name: 'NetTrack',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/',
|
||
},
|
||
{
|
||
regex: 'htmlyse',
|
||
name: 'htmlyse',
|
||
category: 'Crawler',
|
||
url: 'https://www.htmlyse.com/',
|
||
producer: { name: 'Vistex LTD', url: 'https://www.htmlyse.com/' },
|
||
},
|
||
{
|
||
regex: 'TrendsmapResolver/[\\d.]+',
|
||
name: 'Trendsmap',
|
||
category: 'Crawler',
|
||
url: 'https://www.trendsmap.com/',
|
||
producer: { name: 'Trendsmap Pty Ltd', url: 'https://www.trendsmap.com/' },
|
||
},
|
||
{
|
||
regex: 'Shareaholic(?:bot)?/[\\d.]+',
|
||
name: 'Steve Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.shareaholic.com/steve',
|
||
producer: {
|
||
name: 'Shareaholic, Inc.',
|
||
url: 'https://www.shareaholic.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'keycdn-tools:',
|
||
name: 'KeyCDN Tools',
|
||
category: 'Service Agent',
|
||
url: 'https://tools.keycdn.com/geo',
|
||
},
|
||
{
|
||
regex: 'keycdn-tools/',
|
||
name: 'KeyCDN Tools',
|
||
category: 'Service Agent',
|
||
url: 'https://tools.keycdn.com/',
|
||
producer: { name: 'proinity LLC', url: 'https://www.keycdn.com/' },
|
||
},
|
||
{
|
||
regex: 'Arquivo-web-crawler',
|
||
name: 'Arquivo.pt',
|
||
category: 'Crawler',
|
||
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/',
|
||
producer: { name: 'FCT|FCCN', url: 'https://www.fct.pt/' },
|
||
},
|
||
{
|
||
regex: 'WhatsMyIP\\.org',
|
||
name: 'WhatsMyIP.org',
|
||
category: 'Service Agent',
|
||
url: 'https://www.whatsmyip.org/ua/',
|
||
},
|
||
{
|
||
regex: 'SenutoBot/[\\d.]+',
|
||
name: 'Senuto',
|
||
category: 'Crawler',
|
||
url: 'https://www.senuto.com/',
|
||
producer: { name: 'Senuto Sp. z o.o.', url: 'https://www.senuto.com/' },
|
||
},
|
||
{
|
||
regex: 'spaziodati',
|
||
name: 'SpazioDati',
|
||
category: 'Crawler',
|
||
url: 'https://www.spaziodati.eu/',
|
||
producer: { name: 'SpazioDati s.r.l.', url: 'https://www.spaziodati.eu/' },
|
||
},
|
||
{
|
||
regex: 'GozleBot',
|
||
name: 'Gozle',
|
||
category: 'Crawler',
|
||
url: 'https://gozle.com.tm/en/blog/post/1',
|
||
producer: { name: 'Doly Horjun HJ', url: 'https://gozle.com.tm/' },
|
||
},
|
||
{
|
||
regex: 'Quantcastbot/[\\d.]+',
|
||
name: 'Quantcast',
|
||
category: 'Crawler',
|
||
url: 'https://www.quantcast.com/bot/',
|
||
producer: { name: 'Quantcast Corp.', url: 'https://www.quantcast.com/' },
|
||
},
|
||
{
|
||
regex: 'FontRadar',
|
||
name: 'FontRadar',
|
||
category: 'Crawler',
|
||
url: 'https://www.fontradar.com/',
|
||
producer: { name: 'EMDASH SAS', url: 'https://www.fontradar.com/' },
|
||
},
|
||
{
|
||
regex: 'ViberUrlDownloader',
|
||
name: 'Viber Url Downloader',
|
||
category: 'Service Agent',
|
||
url: 'https://www.viber.com/',
|
||
producer: { name: 'Viber Media S.à r.l.', url: 'https://www.viber.com/' },
|
||
},
|
||
{
|
||
regex: '^Zeno$',
|
||
name: 'Zeno',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/internetarchive/Zeno',
|
||
producer: { name: 'The Internet Archive', url: 'https://archive.org/' },
|
||
},
|
||
{
|
||
regex: 'Barracuda Sentinel',
|
||
name: 'Barracuda Sentinel',
|
||
category: 'Service Agent',
|
||
url: 'https://sentinel.barracudanetworks.com/',
|
||
producer: {
|
||
name: 'Barracuda Networks, Inc.',
|
||
url: 'https://www.barracudanetworks.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'RuxitSynthetic/[\\d.]+',
|
||
name: 'RuxitSynthetic',
|
||
category: 'Site Monitor',
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164',
|
||
producer: { name: 'Dynatrace LLC', url: 'https://www.dynatrace.com/' },
|
||
},
|
||
{
|
||
regex: 'DynatraceSynthetic/[\\d.]+',
|
||
name: 'DynatraceSynthetic',
|
||
category: 'Site Monitor',
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164',
|
||
producer: { name: 'Dynatrace LLC', url: 'https://www.dynatrace.com/' },
|
||
},
|
||
{
|
||
regex: 'sitebulb',
|
||
name: 'Sitebulb',
|
||
category: 'Crawler',
|
||
url: 'https://sitebulb.com/',
|
||
producer: { name: 'Sitebulb Limited', url: 'https://sitebulb.com/' },
|
||
},
|
||
{
|
||
regex: 'Monsidobot/[\\d.]+',
|
||
name: 'Monsidobot',
|
||
category: 'Crawler',
|
||
url: 'https://monsido.com/bot-html',
|
||
producer: { name: 'Monsido LLC', url: 'https://monsido.com/' },
|
||
},
|
||
{
|
||
regex: 'AccompanyBot',
|
||
name: 'AccompanyBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.accompany.com/',
|
||
producer: { name: 'Accompani, Inc', url: 'https://www.accompany.com/' },
|
||
},
|
||
{
|
||
regex: 'Ghost Inspector',
|
||
name: 'Ghost Inspector',
|
||
category: 'Site Monitor',
|
||
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site',
|
||
producer: {
|
||
name: 'Ghost Inspector, Inc.',
|
||
url: 'https://www.ghostinspector.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Google-Apps-Script',
|
||
name: 'Google Apps Script',
|
||
category: 'Service Agent',
|
||
url: 'https://www.google.com/script/start/',
|
||
},
|
||
{
|
||
regex: 'SiteOne-Crawler/[\\d.]+',
|
||
name: 'SiteOne Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://crawler.siteone.io/bot/',
|
||
producer: { name: 'SiteOne s.r.o.', url: 'https://www.siteone.io/' },
|
||
},
|
||
{
|
||
regex: 'Detectify',
|
||
name: 'Detectify',
|
||
category: 'Security Checker',
|
||
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site',
|
||
producer: { name: 'Detectify AB', url: 'https://detectify.com/' },
|
||
},
|
||
{
|
||
regex: 'DomCopBot',
|
||
name: 'DomCop Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.domcop.com/bot',
|
||
producer: {
|
||
name: 'Axeman Technology Solutions LLP',
|
||
url: 'https://axemantech.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Paqlebot/[\\d.]+',
|
||
name: 'Paqlebot',
|
||
category: 'Crawler',
|
||
url: 'https://www.paqle.dk/about/paqlebot',
|
||
producer: { name: 'Paqle A/S', url: 'https://www.paqle.dk/' },
|
||
},
|
||
{
|
||
regex: 'Wibybot',
|
||
name: 'Wibybot',
|
||
category: 'Crawler',
|
||
url: 'https://www.wiby.me/',
|
||
},
|
||
{
|
||
regex: 'Synapse',
|
||
name: 'Synapse',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/matrix-org/synapse',
|
||
},
|
||
{
|
||
regex: 'OSZKbot/[\\d.]+',
|
||
name: 'OSZKbot',
|
||
category: 'Crawler',
|
||
url: 'http://mekosztaly.oszk.hu/mia/',
|
||
producer: {
|
||
name: 'National Szechenyi Library',
|
||
url: 'https://webarchivum.oszk.hu/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ZoomBot',
|
||
name: 'ZoomBot',
|
||
category: 'Crawler',
|
||
url: 'https://suite.seozoom.it/bot.html',
|
||
producer: { name: 'SEO Cube S.r.l.', url: 'https://www.seocube.it/' },
|
||
},
|
||
{
|
||
regex: 'RavenCrawler/[\\d.]+',
|
||
name: 'RavenCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://raventools.com/site-auditor/',
|
||
producer: { name: 'TapClicks, Inc.', url: 'https://www.tapclicks.com/' },
|
||
},
|
||
{
|
||
regex: 'KadoBot',
|
||
name: 'KadoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.kadolijst.nl/bot',
|
||
producer: { name: 'Kadolijst', url: 'https://www.kadolijst.nl/' },
|
||
},
|
||
{
|
||
regex: 'Dubbotbot/[\\d.]+',
|
||
name: 'Dubbotbot',
|
||
category: 'Crawler',
|
||
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent',
|
||
producer: { name: 'DubBot', url: 'https://dubbot.com/' },
|
||
},
|
||
{
|
||
regex: 'Swiftbot/[\\d.]+',
|
||
name: 'Swiftbot',
|
||
category: 'Crawler',
|
||
url: 'https://swiftype.com/swiftbot',
|
||
producer: { name: 'Elasticsearch, B.V.', url: 'https://www.elastic.co/' },
|
||
},
|
||
{
|
||
regex: 'EyeMonIT',
|
||
name: 'EyeMonit',
|
||
category: 'Site Monitor',
|
||
url: 'https://eyemonit.com/',
|
||
producer: { name: 'EyeMonit', url: 'https://eyemonit.com/' },
|
||
},
|
||
{
|
||
regex: 'ThousandEyes',
|
||
name: 'ThousandEyes',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.thousandeyes.com/',
|
||
producer: { name: 'Cisco Systems, Inc.', url: 'https://www.cisco.com/' },
|
||
},
|
||
{ regex: 'OmtrBot/[\\d.]+', name: 'OmtrBot', category: 'Site Monitor' },
|
||
{ regex: 'WebMon/[\\d.]+', name: 'WebMon', category: 'Site Monitor' },
|
||
{
|
||
regex: 'AdsTxtCrawlerTP/[\\d.]+',
|
||
name: 'AdsTxtCrawlerTP',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
regex: 'fragFINN',
|
||
name: 'fragFINN',
|
||
category: 'Crawler',
|
||
url: 'https://www.fragfinn.de/',
|
||
producer: { name: 'fragFINN e.V.', url: 'https://www.fragfinn.de/' },
|
||
},
|
||
{
|
||
regex: 'Clickagy',
|
||
name: 'Clickagy',
|
||
category: 'Crawler',
|
||
url: 'https://www.clickagy.com/',
|
||
producer: { name: 'Clickagy, LLC', url: 'https://www.clickagy.com/' },
|
||
},
|
||
{
|
||
regex: 'kiwitcms-gitops/[\\d.]+',
|
||
name: 'Kiwi TCMS GitOps',
|
||
category: 'Service Agent',
|
||
url: 'https://kiwitcms.org',
|
||
producer: {
|
||
name: 'Open Technologies Bulgaria, Ltd.',
|
||
url: 'https://kiwitcms.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'webtru_crawler',
|
||
name: 'webtru',
|
||
category: 'Crawler',
|
||
url: 'https://webtru.io/',
|
||
producer: { name: 'DataSign Inc.', url: 'https://datasign.jp/' },
|
||
},
|
||
{
|
||
regex: 'URLSuMaBot',
|
||
name: 'URLSuMaBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.urlsuma.de/',
|
||
},
|
||
{
|
||
regex: '360JK yunjiankong',
|
||
name: '360JK',
|
||
category: 'Site Monitor',
|
||
url: 'http://jk.cloud.360.cn/',
|
||
producer: {
|
||
name: '360 Security Technology Inc.',
|
||
url: 'https://www.360.cn/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'UCSBNetworkMeasurement',
|
||
name: 'UCSB Network Measurement',
|
||
category: 'Crawler',
|
||
url: 'https://www.it.ucsb.edu/',
|
||
producer: {
|
||
name: 'University of California, Santa Barbara',
|
||
url: 'https://www.it.ucsb.edu/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Plesk screenshot bot',
|
||
name: 'Plesk Screenshot Service',
|
||
category: 'Service Agent',
|
||
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service',
|
||
producer: {
|
||
name: 'Plesk International GmbH',
|
||
url: 'https://www.plesk.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Who\\.is',
|
||
name: 'Who.is Bot',
|
||
category: 'Crawler',
|
||
url: 'https://who.is/',
|
||
},
|
||
{
|
||
regex: 'Probely',
|
||
name: 'Probely',
|
||
category: 'Security Checker',
|
||
url: 'https://probely.com/sos/',
|
||
producer: {
|
||
name: 'Probely - Soluções de Cibersegurança, S.A.',
|
||
url: 'https://probely.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Uptimia(?:/[\\d.]+)?',
|
||
name: 'Uptimia',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.uptimia.com/',
|
||
producer: { name: 'JJ Online GmbH', url: 'https://www.uptimia.com/' },
|
||
},
|
||
{
|
||
regex: '2GDPR/[\\d.]+',
|
||
name: '2GDPR',
|
||
category: 'Service Agent',
|
||
url: 'https://2gdpr.com/tos',
|
||
producer: { name: '2GDPR', url: 'https://2gdpr.com/' },
|
||
},
|
||
{
|
||
regex: 'abuse\\.xmco\\.fr',
|
||
name: 'Serenety',
|
||
category: 'Security Checker',
|
||
url: 'https://abuse.xmco.fr/',
|
||
producer: { name: 'XMCO, SASU', url: 'https://www.xmco.fr/' },
|
||
},
|
||
{
|
||
regex: 'CheckHost',
|
||
name: 'CheckHost',
|
||
category: 'Site Monitor',
|
||
url: 'https://check-host.net/',
|
||
producer: { name: 'CheckHost', url: 'https://check-host.net/' },
|
||
},
|
||
{
|
||
regex: 'LAC_IAHarvester/[\\d.]+',
|
||
name: 'LAC IA Harvester',
|
||
category: 'Crawler',
|
||
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx',
|
||
producer: {
|
||
name: 'Library and Archives Canada',
|
||
url: 'https://library-archives.canada.ca/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'InsytfulBot/[\\d.]+',
|
||
name: 'InsytfulBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.insytful.com/',
|
||
producer: { name: 'Zengenti Limited', url: 'https://www.zengenti.com/' },
|
||
},
|
||
{
|
||
regex: 'statista\\.com',
|
||
name: 'Statista',
|
||
category: 'Crawler',
|
||
url: 'https://www.statista.com/',
|
||
producer: { name: 'Statista, Inc.', url: 'https://www.statista.com/' },
|
||
},
|
||
{
|
||
regex: 'SubstackContentFetch/[\\d.]+',
|
||
name: 'Substack Content Fetch',
|
||
category: 'Crawler',
|
||
url: 'https://substack.com/',
|
||
producer: { name: 'Substack, Inc.', url: 'https://substack.com/' },
|
||
},
|
||
{
|
||
regex: '^ds9',
|
||
name: 'Deep SEARCH 9',
|
||
category: 'Crawler',
|
||
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/',
|
||
producer: {
|
||
name: 'Copyright Clearance Center, Inc.',
|
||
url: 'https://www.copyright.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'LiveJournal\\.com',
|
||
name: 'LiveJournal',
|
||
url: 'https://www.livejournal.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: { name: 'ООО "СИМ"', url: 'https://www.livejournal.com/' },
|
||
},
|
||
{
|
||
regex: 'bitdiscovery',
|
||
name: 'Tenable.asm',
|
||
category: 'Security Checker',
|
||
url: 'https://bitdiscovery.com/',
|
||
producer: { name: 'Tenable, Inc.', url: 'https://www.tenable.com/' },
|
||
},
|
||
{
|
||
regex: 'Castopod/[\\d.]+',
|
||
name: 'Castopod',
|
||
category: 'Crawler',
|
||
url: 'https://www.castopod.org/',
|
||
},
|
||
{
|
||
regex: 'Elastic/Synthetics',
|
||
name: 'Elastic Synthetics',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/elastic/synthetics',
|
||
producer: { name: 'Elasticsearch B.V.', url: 'https://www.elastic.co/' },
|
||
},
|
||
{
|
||
regex: 'WDG_Validator/[\\d.]+',
|
||
name: 'WDG HTML Validator',
|
||
category: 'Validator',
|
||
url: 'http://www.htmlhelp.com/tools/validator/',
|
||
},
|
||
{
|
||
regex: 'scan@aegis.network',
|
||
name: 'Aegis',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/',
|
||
},
|
||
{
|
||
regex: 'CrawlyProjectCrawler/[\\d.]+',
|
||
name: 'Crawly Project',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/',
|
||
},
|
||
{
|
||
regex: 'BDFetch',
|
||
name: 'BDFetch',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/',
|
||
},
|
||
{
|
||
regex: 'PunkMap',
|
||
name: 'Punk Map',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/openeasm/punkmap',
|
||
},
|
||
{
|
||
regex: 'GenomeCrawlerd/[\\d.]+',
|
||
name: 'Deepfield Genome',
|
||
category: 'Crawler',
|
||
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/',
|
||
producer: { name: 'Nokia Corporation', url: 'https://www.nokia.com/' },
|
||
},
|
||
{
|
||
regex: 'Gaisbot/[\\d.]+',
|
||
name: 'Gaisbot',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php',
|
||
},
|
||
{
|
||
regex: 'FAST-WebCrawler/[\\d.]+',
|
||
name: 'AlltheWeb',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler',
|
||
},
|
||
{
|
||
regex: 'ducks\\.party',
|
||
name: 'ducks.party',
|
||
category: 'Security Checker',
|
||
url: 'https://ducks.party/',
|
||
},
|
||
{
|
||
regex: 'DepSpid/[\\d.]+',
|
||
name: 'DepSpid',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/',
|
||
},
|
||
{
|
||
regex: 'Website-info\\.net',
|
||
name: 'Website-info',
|
||
category: 'Crawler',
|
||
url: 'https://website-info.net/robot',
|
||
producer: { name: 'Meins und Vogel GmbH', url: 'https://muv.com/' },
|
||
},
|
||
{
|
||
regex: 'RedekenBot',
|
||
name: 'RedekenBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.redeken.com/en/help/bot.html',
|
||
producer: { name: 'Redeken', url: 'https://www.redeken.com/' },
|
||
},
|
||
{
|
||
regex: 'semaltbot',
|
||
name: 'semaltbot',
|
||
category: 'Crawler',
|
||
url: 'https://semalt.net/',
|
||
producer: { name: 'Semalt LP', url: 'https://semalt.net/' },
|
||
},
|
||
{
|
||
regex: 'MakeMerryBot',
|
||
name: 'MakeMerryBot',
|
||
category: 'Crawler',
|
||
url: 'https://makemerry.app/bots',
|
||
},
|
||
{
|
||
regex: 'Timpibot',
|
||
name: 'Timpibot',
|
||
category: 'Crawler',
|
||
url: 'https://timpi.io/',
|
||
producer: { name: 'Timpi Inc.', url: 'https://timpi.io/' },
|
||
},
|
||
{
|
||
regex: 'Validbot',
|
||
name: 'ValidBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.validbot.com/',
|
||
producer: { name: 'Jake Olefsky LLC', url: 'https://www.validbot.com/' },
|
||
},
|
||
{
|
||
regex: 'NPBot',
|
||
name: 'NameProtectBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.cscglobal.com/cscglobal/home/',
|
||
producer: { name: 'NameProtect, Inc.', url: 'https://www.cscglobal.com/' },
|
||
},
|
||
{
|
||
regex: 'domaincodex\\.com',
|
||
name: 'Domain Codex',
|
||
category: 'Crawler',
|
||
url: 'https://www.domaincodex.com/',
|
||
producer: {
|
||
name: 'Erie Data Systems, LLC',
|
||
url: 'https://www.eriedatasys.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Swisscows Favicons',
|
||
name: 'Swisscows Favicons',
|
||
category: 'Crawler',
|
||
url: 'https://swisscows.com/',
|
||
producer: { name: 'Swisscows AG', url: 'https://swisscows.com/' },
|
||
},
|
||
{
|
||
regex: 'leak\\.info',
|
||
name: 'leak.info',
|
||
category: 'Crawler',
|
||
url: 'http://www.leak.info/',
|
||
},
|
||
{
|
||
regex: 'workona',
|
||
name: 'Workona',
|
||
category: 'Crawler',
|
||
url: 'https://workona.com/',
|
||
producer: { name: 'Workona, Inc.', url: 'https://workona.com/' },
|
||
},
|
||
{
|
||
regex: 'Bloglines',
|
||
name: 'Bloglines',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20140309033202/http://www.bloglines.com/',
|
||
producer: { name: 'Reply!, Inc.', url: 'https://www.reply.com/' },
|
||
},
|
||
{
|
||
regex: 'heritrix',
|
||
name: 'Heritrix',
|
||
category: 'Crawler',
|
||
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix',
|
||
producer: { name: 'The Internet Archive', url: 'https://archive.org' },
|
||
},
|
||
{
|
||
regex: 'search\\.marginalia\\.nu',
|
||
name: 'Marginalia',
|
||
category: 'Crawler',
|
||
url: 'https://www.marginalia.nu/marginalia-search/for-webmasters/',
|
||
producer: { name: 'Marginalia', url: 'https://www.marginalia.nu/' },
|
||
},
|
||
{
|
||
regex: 'vu-server-health-scanner/[\\d.]+',
|
||
name: 'VU Server Health Scanner',
|
||
category: 'Security Checker',
|
||
url: 'https://130.37.198.75/index.html',
|
||
producer: { name: 'VU Amsterdam', url: 'https://vu.nl/en' },
|
||
},
|
||
{
|
||
regex: 'Functionize',
|
||
name: 'Functionize',
|
||
category: 'Crawler',
|
||
url: 'https://www.functionize.com/',
|
||
producer: {
|
||
name: 'Functionize, Inc.',
|
||
url: 'https://www.functionize.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Prerender',
|
||
name: 'Prerender',
|
||
category: 'Crawler',
|
||
url: 'https://docs.prerender.io/docs/33-overview-of-prerender-crawlers',
|
||
producer: { name: 'saas.group Inc.', url: 'https://saas.group/' },
|
||
},
|
||
{
|
||
regex: 'bl\\.uk_ldfc_bot',
|
||
name: 'The British Library Legal Deposit Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.bl.uk/',
|
||
producer: { name: 'The British Library', url: 'https://www.bl.uk/' },
|
||
},
|
||
{
|
||
regex: 'Miniature\\.io',
|
||
name: 'Miniature.io',
|
||
category: 'Service Agent',
|
||
url: 'https://miniature.io/',
|
||
producer: { name: 'LCX Ventures Ltd', url: 'https://www.lcxventures.com/' },
|
||
},
|
||
{
|
||
regex: 'Convertify',
|
||
name: 'Convertify',
|
||
category: 'Service Agent',
|
||
url: 'https://www.convertify.app/',
|
||
producer: { name: 'Convertify', url: 'https://www.convertify.app/' },
|
||
},
|
||
{
|
||
regex: 'ZoteroTranslationServer',
|
||
name: 'Zotero Translation Server',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/wikimedia/mediawiki-services-zotero',
|
||
producer: {
|
||
name: 'The Wikimedia Foundation, Inc.',
|
||
url: 'https://www.wikimedia.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'MuckRack',
|
||
name: 'MuckRack',
|
||
category: 'Crawler',
|
||
url: 'https://muckrack.com/',
|
||
producer: { name: 'Muck Rack, LLC', url: 'https://muckrack.com/' },
|
||
},
|
||
{
|
||
regex: 'Golfe',
|
||
name: 'Golfe',
|
||
category: 'Crawler',
|
||
url: 'http://www.goo-olfe.ae/bot.html',
|
||
},
|
||
{
|
||
regex: 'SpiderLing',
|
||
name: 'SpiderLing',
|
||
category: 'Crawler',
|
||
url: 'https://nlp.fi.muni.cz/projects/biwec/',
|
||
producer: {
|
||
name: 'Natural Language Processing Centre',
|
||
url: 'https://nlp.fi.muni.cz/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Bravebot',
|
||
name: 'Bravebot',
|
||
category: 'Search bot',
|
||
url: 'https://search.brave.com/help/brave-search-crawler',
|
||
producer: { name: 'Brave Software, Inc.', url: 'https://brave.com/' },
|
||
},
|
||
{
|
||
regex: '1001FirmsBot',
|
||
name: '1001FirmsBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.1001firms.com/1001firmsbot.php',
|
||
},
|
||
{
|
||
regex: 'SteamChatURLLookup',
|
||
name: 'Steam Chat URL Lookup',
|
||
category: 'Service Agent',
|
||
url: 'https://help.steampowered.com/en/faqs/view/595C-42F4-3B66-E02F',
|
||
producer: {
|
||
name: 'Valve Corporation',
|
||
url: 'https://www.valvesoftware.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ohdear\\.app',
|
||
name: 'Oh Dear',
|
||
category: 'Site Monitor',
|
||
url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs',
|
||
producer: { name: 'Immutable, SNC', url: 'https://ohdear.app/' },
|
||
},
|
||
{
|
||
regex: 'Inspici',
|
||
name: 'Inspici',
|
||
category: 'Crawler',
|
||
url: 'https://www.inspici.com/',
|
||
producer: { name: 'Inspici, LLC', url: 'https://www.inspici.com/' },
|
||
},
|
||
{
|
||
regex: 'peer39_crawler',
|
||
name: 'Peer39',
|
||
category: 'Crawler',
|
||
url: 'https://www.peer39.com/crawler-notice',
|
||
producer: { name: 'Peer39 Tech, LLC', url: 'https://www.peer39.com/' },
|
||
},
|
||
{
|
||
regex: 'Pandalytics',
|
||
name: 'Pandalytics',
|
||
category: 'Crawler',
|
||
url: 'https://www.domainsbot.com/business-intelligence/',
|
||
producer: { name: 'DomainsBot, Inc.', url: 'https://www.domainsbot.com/' },
|
||
},
|
||
{
|
||
regex: 'CloudServerMarketSpider',
|
||
name: 'CloudServerMarketSpider',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20151228225429/https://cloudservermarket.com/spider.html',
|
||
},
|
||
{
|
||
regex: 'Pigafetta',
|
||
name: 'Pigafetta',
|
||
category: 'Crawler',
|
||
url: 'https://visual-seo.com/Pigafetta-Bot',
|
||
producer: {
|
||
name: 'aStonish Studio Srl',
|
||
url: 'http://www.astonishstudio.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Cotoyogi',
|
||
name: 'Cotoyogi',
|
||
category: 'Crawler',
|
||
url: 'https://ds.rois.ac.jp/center8/crawler/',
|
||
producer: {
|
||
name: 'Joint Support-Center for Data Science Research (ROIS-DS)',
|
||
url: 'https://ds.rois.ac.jp/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SuggestBot',
|
||
name: 'SuggestBot',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/nettrom/suggestbot',
|
||
},
|
||
{
|
||
regex:
|
||
'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$',
|
||
name: 'Generic Bot',
|
||
},
|
||
{
|
||
regex:
|
||
'[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\\d\\.[x\\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|(?<!P)research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)',
|
||
name: 'Generic Bot',
|
||
},
|
||
] as const;
|
||
export default bots;
|