* fix: ignore private ips * fix: performance related fixes * fix: simply event buffer * fix: default to 1 events queue shard * add: cleanup scripts * fix: comments * fix comments * fix * fix: groupmq * wip * fix: sync cachable * remove cluster names and add it behind env flag (if someone want to scale) * fix * wip * better logger * remove reqid and user agent * fix lock * remove wait_for_async_insert
6367 lines
146 KiB
TypeScript
6367 lines
146 KiB
TypeScript
// This file is generated by the script get-bots.ts
|
||
|
||
// The data is fetch from device-detector https://raw.githubusercontent.com/matomo-org/device-detector/master/regexes/bots.yml
|
||
|
||
const bots = [
|
||
{
|
||
includes: 'WireReaderBot',
|
||
name: 'WireReaderBot',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://wirereader.app/',
|
||
},
|
||
{
|
||
includes: 'monitoring360bot',
|
||
name: '360 Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.360monitoring.io',
|
||
producer: {
|
||
name: 'Plesk International GmbH',
|
||
url: 'https://www.plesk.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare-Healthchecks',
|
||
name: 'Cloudflare Health Checks',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.cloudflare.com/health-checks/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: '360Spider',
|
||
name: '360Spider',
|
||
category: 'Search bot',
|
||
url: 'https://www.so.com/help/help_3_2.html',
|
||
producer: {
|
||
name: 'Online Media Group, Inc.',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Aboundex',
|
||
name: 'Aboundexbot',
|
||
category: 'Search bot',
|
||
url: 'http://www.aboundex.com/crawler/',
|
||
producer: {
|
||
name: 'Aboundex.com',
|
||
url: 'http://www.aboundex.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AcoonBot',
|
||
name: 'Acoon',
|
||
category: 'Search bot',
|
||
url: 'http://www.acoon.de/robot.asp',
|
||
producer: {
|
||
name: 'Acoon GmbH',
|
||
url: 'http://www.acoon.de',
|
||
},
|
||
},
|
||
{
|
||
regex: 'AddThis\\.com',
|
||
name: 'AddThis.com',
|
||
category: 'Social Media Agent',
|
||
url: '',
|
||
producer: {
|
||
name: 'Clearspring Technologies, Inc.',
|
||
url: 'http://www.clearspring.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AhrefsBot',
|
||
name: 'aHrefs Bot',
|
||
category: 'Crawler',
|
||
url: 'https://ahrefs.com/robot',
|
||
producer: {
|
||
name: 'Ahrefs Pte Ltd',
|
||
url: 'https://ahrefs.com/robot',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AhrefsSiteAudit',
|
||
name: 'AhrefsSiteAudit',
|
||
category: 'Site Monitor',
|
||
url: 'https://ahrefs.com/robot/site-audit',
|
||
producer: {
|
||
name: 'Ahrefs Pte Ltd',
|
||
url: 'https://ahrefs.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ia_archiver|alexabot|verifybot',
|
||
name: 'Alexa Crawler',
|
||
category: 'Search bot',
|
||
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers',
|
||
producer: {
|
||
name: 'Alexa Internet',
|
||
url: 'https://www.alexa.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'alexa site audit',
|
||
name: 'Alexa Site Audit',
|
||
category: 'Site Monitor',
|
||
url: 'https://support.alexa.com/hc/en-us/articles/200450194',
|
||
producer: {
|
||
name: 'Alexa Internet',
|
||
url: 'https://www.alexa.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Amazonbot',
|
||
name: 'Amazon Bot',
|
||
category: 'Crawler',
|
||
url: 'https://developer.amazon.com/support/amazonbot',
|
||
producer: {
|
||
name: 'Amazon.com, Inc.',
|
||
url: 'https://www.amazon.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AmazonAdBot',
|
||
name: 'Amazon AdBot',
|
||
category: 'Crawler',
|
||
url: 'https://adbot.amazon.com/',
|
||
producer: {
|
||
name: 'Amazon.com, Inc.',
|
||
url: 'https://www.amazon.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service',
|
||
name: 'Amazon Route53 Health Check',
|
||
category: 'Service Agent',
|
||
producer: {
|
||
name: 'Amazon Web Services',
|
||
url: 'https://aws.amazon.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AmorankSpider',
|
||
name: 'Amorank Spider',
|
||
category: 'Crawler',
|
||
url: 'http://amorank.com/webcrawler.html',
|
||
producer: {
|
||
name: 'Amorank',
|
||
url: 'http://www.amorank.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ApacheBench',
|
||
name: 'ApacheBench',
|
||
category: 'Benchmark',
|
||
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html',
|
||
producer: {
|
||
name: 'The Apache Software Foundation',
|
||
url: 'https://www.apache.org/foundation/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Applebot',
|
||
name: 'Applebot',
|
||
category: 'Crawler',
|
||
url: 'https://support.apple.com/en-us/119829',
|
||
producer: {
|
||
name: 'Apple Inc',
|
||
url: 'https://www.apple.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'iTMS',
|
||
name: 'iTMS',
|
||
category: 'Crawler',
|
||
url: 'https://support.apple.com/en-us/119829',
|
||
producer: {
|
||
name: 'Apple Inc',
|
||
url: 'https://www.apple.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AppSignalBot',
|
||
name: 'AppSignalBot',
|
||
category: 'Site Monitor',
|
||
url: 'https://docs.appsignal.com/uptime-monitoring/',
|
||
producer: {
|
||
name: 'AppSignal',
|
||
url: 'https://appsignal.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Arachni',
|
||
name: 'Arachni',
|
||
category: 'Security Checker',
|
||
url: 'https://www.arachni-scanner.com/',
|
||
producer: {
|
||
name: 'Sarosys LLC',
|
||
url: 'https://www.sarosys.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AspiegelBot',
|
||
name: 'AspiegelBot',
|
||
category: 'Crawler',
|
||
url: 'https://aspiegel.com/',
|
||
producer: {
|
||
name: 'Huawei',
|
||
url: 'https://www.huawei.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Castro 2, Episode Duration Lookup',
|
||
name: 'Castro 2',
|
||
category: 'Service Agent',
|
||
url: 'http://supertop.co/castro/',
|
||
producer: {
|
||
name: 'Supertop',
|
||
url: 'http://supertop.co',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Curious George',
|
||
name: 'Analytics SEO Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.analyticsseo.com/crawler',
|
||
producer: {
|
||
name: 'Analytics SEO',
|
||
url: 'http://www.analyticsseo.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'archive\\.org_bot|special_archiver',
|
||
name: 'archive.org bot',
|
||
category: 'Crawler',
|
||
url: 'https://archive.org/details/archive.org_bot',
|
||
producer: {
|
||
name: 'The Internet Archive',
|
||
url: 'https://archive.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Ask Jeeves/Teoma',
|
||
name: 'Ask Jeeves',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: {
|
||
name: 'Ask Jeeves Inc.',
|
||
url: 'http://www.ask.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Backlink-Check\\.de',
|
||
name: 'Backlink-Check.de',
|
||
category: 'Crawler',
|
||
url: 'http://www.backlink-check.de/bot.html',
|
||
producer: {
|
||
name: 'Mediagreen Medienservice',
|
||
url: 'http://www.backlink-check.de',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BacklinkCrawler',
|
||
name: 'BacklinkCrawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.backlinktest.com/crawler.html',
|
||
producer: {
|
||
name: '2.0Promotion GbR',
|
||
url: 'http://www.backlinktest.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Baidu.*spider|baidu Transcoder',
|
||
name: 'Baidu Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.baidu.com/search/spider.htm',
|
||
producer: {
|
||
name: 'Baidu',
|
||
url: 'http://www.baidu.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BazQux',
|
||
name: 'BazQux Reader',
|
||
url: 'https://bazqux.com/fetcher',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Better Uptime Bot',
|
||
name: 'Better Uptime Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://betteruptime.com/faq',
|
||
producer: {
|
||
name: 'Better Uptime',
|
||
url: 'https://betteruptime.com/',
|
||
},
|
||
},
|
||
{
|
||
regex:
|
||
'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot',
|
||
name: 'BingBot',
|
||
category: 'Search bot',
|
||
url: 'http://search.msn.com/msnbot.htmn',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'http://www.microsoft.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Blackbox Exporter',
|
||
name: 'Blackbox Exporter',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/prometheus/blackbox_exporter',
|
||
producer: {
|
||
name: 'Prometheus',
|
||
url: 'https://prometheus.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Blekkobot',
|
||
name: 'Blekkobot',
|
||
category: 'Search bot',
|
||
url: 'http://blekko.com/about/blekkobot',
|
||
producer: {
|
||
name: 'Blekko',
|
||
url: 'http://blekko.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BLEXBot',
|
||
name: 'BLEXBot Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://webmeup-crawler.com',
|
||
producer: {
|
||
name: 'WebMeUp',
|
||
url: 'http://webmeup.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Bloglovin',
|
||
name: 'Bloglovin',
|
||
url: 'http://www.bloglovin.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Blogtrottr',
|
||
name: 'Blogtrottr',
|
||
url: '',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'Blogtrottr Ltd',
|
||
url: 'https://blogtrottr.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BoardReader Blog Indexer',
|
||
name: 'BoardReader Blog Indexer',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'BoardReader',
|
||
url: 'https://boardreader.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BountiiBot',
|
||
name: 'Bountii Bot',
|
||
category: 'Search bot',
|
||
url: 'http://bountii.com/contact.php',
|
||
producer: {
|
||
name: 'Bountii Inc.',
|
||
url: 'http://bountii.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Browsershots',
|
||
name: 'Browsershots',
|
||
category: 'Service Agent',
|
||
url: 'http://browsershots.org/faq',
|
||
producer: {
|
||
name: 'Browsershots.org',
|
||
url: 'http://browsershots.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BUbiNG',
|
||
name: 'BUbiNG',
|
||
category: 'Crawler',
|
||
url: 'http://law.di.unimi.it/BUbiNG.html',
|
||
producer: {
|
||
name: 'The Laboratory for Web Algorithmics (LAW)',
|
||
url: 'http://law.di.unimi.it/software.php#buging',
|
||
},
|
||
},
|
||
{
|
||
regex: '(?<!HTC)[ _]Butterfly/',
|
||
name: 'Butterfly Robot',
|
||
category: 'Search bot',
|
||
url: 'http://labs.topsy.com/butterfly',
|
||
producer: {
|
||
name: 'Topsy Labs',
|
||
url: 'http://labs.topsy.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CareerBot',
|
||
name: 'CareerBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.career-x.de/bot.html',
|
||
producer: {
|
||
name: 'career-x GmbH',
|
||
url: 'http://www.career-x.de',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CCBot',
|
||
name: 'ccBot crawler',
|
||
category: 'Crawler',
|
||
url: 'http://commoncrawl.org/faq/',
|
||
producer: {
|
||
name: 'reddit inc.',
|
||
url: 'http://www.reddit.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cliqzbot',
|
||
name: 'Cliqzbot',
|
||
category: 'Crawler',
|
||
url: 'http://cliqz.com/company/cliqzbot',
|
||
producer: {
|
||
name: '10betterpages GmbH',
|
||
url: 'http://cliqz.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare-AMP',
|
||
name: 'CloudFlare AMP Fetcher',
|
||
category: 'Crawler',
|
||
url: 'https://amp.cloudflare.com/doc/fetcher.html',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'http://www.cloudflare.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Cloudflare-?Diagnostics',
|
||
name: 'Cloudflare Diagnostics',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.cloudflare.com/',
|
||
producer: {
|
||
name: 'Cloudflare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CloudFlare-AlwaysOnline',
|
||
name: 'CloudFlare Always Online',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.cloudflare.com/always-online',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare-SSLDetector',
|
||
name: 'Cloudflare SSL Detector',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare Custom Hostname Verification',
|
||
name: 'Cloudflare Custom Hostname Verification',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare-Traffic-Manager',
|
||
name: 'Cloudflare Traffic Manager',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cloudflare-Smart-Transit',
|
||
name: 'Cloudflare Smart Transit',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CloudflareObservatory',
|
||
name: 'Cloudflare Observatory',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'https://developers\\.cloudflare\\.com/security-center/',
|
||
name: 'Cloudflare Security Insights',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/',
|
||
producer: {
|
||
name: 'CloudFlare',
|
||
url: 'https://www.cloudflare.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'coccoc\\.com',
|
||
name: 'Cốc Cốc Bot',
|
||
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots',
|
||
category: 'Search bot',
|
||
producer: {
|
||
name: 'Cốc Cốc',
|
||
url: 'https://coccoc.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'collectd',
|
||
name: 'Collectd',
|
||
url: 'https://collectd.org/',
|
||
category: 'Site Monitor',
|
||
producer: {
|
||
name: 'Collectd',
|
||
url: 'https://collectd.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CommaFeed',
|
||
name: 'CommaFeed',
|
||
url: 'http://www.commafeed.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CSS Certificate Spider',
|
||
name: 'CSS Certificate Spider',
|
||
category: 'Crawler',
|
||
url: 'http://www.css-security.com/certificatespider/',
|
||
producer: {
|
||
name: 'Certified Security Solutions',
|
||
url: 'https://www.css-security.com/company/about-us/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Datadog Agent|Datadog/?Synthetics',
|
||
name: 'Datadog Agent',
|
||
url: 'https://github.com/DataDog/dd-agent',
|
||
category: 'Site Monitor',
|
||
producer: {
|
||
name: 'Datadog',
|
||
url: 'https://www.datadoghq.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Datanyze',
|
||
name: 'Datanyze',
|
||
url: '',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Datanyze',
|
||
url: 'https://www.datanyze.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Dataprovider',
|
||
name: 'Dataprovider',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Dataprovider B.V.',
|
||
url: 'https://www.dataprovider.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Daum(?!(?:Apps|Device))',
|
||
name: 'Daum',
|
||
category: 'Search bot',
|
||
url: 'http://tab.search.daum.net/aboutWebSearch_en.html',
|
||
producer: {
|
||
name: 'Daum Communications Corp.',
|
||
url: 'http://www.kakaocorp.com/main',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Dazoobot',
|
||
name: 'Dazoobot',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: {
|
||
name: 'DAZOO.FR',
|
||
url: 'http://dazoo.fr',
|
||
},
|
||
},
|
||
{
|
||
includes: 'discobot',
|
||
name: 'Discobot',
|
||
category: 'Search bot',
|
||
url: 'http://discoveryengine.com/discobot.html',
|
||
producer: {
|
||
name: 'Discovery Engine',
|
||
url: 'http://discoveryengine.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Domain Re-Animator Bot|support@domainreanimator\\.com',
|
||
name: 'Domain Re-Animator Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Domain Re-Animator, LLC',
|
||
url: 'http://domainreanimator.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DotBot',
|
||
name: 'DotBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.opensiteexplorer.org/dotbot',
|
||
producer: {
|
||
name: 'SEOmoz, Inc.',
|
||
url: 'http://moz.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'DuckDuck(?:Go-Favicons-)?Bot',
|
||
name: 'DuckDuckBot',
|
||
category: 'Search bot',
|
||
url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/',
|
||
producer: {
|
||
name: 'DuckDuckGo',
|
||
url: 'https://duckduckgo.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DuckAssistBot',
|
||
name: 'DuckAssistBot',
|
||
category: 'Search bot',
|
||
url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot/',
|
||
producer: {
|
||
name: 'DuckDuckGo',
|
||
url: 'https://duckduckgo.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'EasouSpider',
|
||
name: 'Easou Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.easou.com/search/spider.html',
|
||
producer: {
|
||
name: 'easou ICP',
|
||
url: 'http://www.easou.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'eCairn-Grabber',
|
||
name: 'eCairn-Grabber',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'eCairn',
|
||
url: 'https://ecairn.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'EMail Exractor',
|
||
name: 'EMail Exractor',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'evc-batch',
|
||
name: 'evc-batch',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'eVenture Capital Partners II, LLC',
|
||
url: 'http://www.eventures.vc/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Exabot|ExaleadCloudview',
|
||
name: 'ExaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.exabot.com/go/robot',
|
||
producer: {
|
||
name: 'Dassault Systèmes',
|
||
url: 'http://www.3ds.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ExactSeek Crawler',
|
||
name: 'ExactSeek Crawler',
|
||
category: 'Search bot',
|
||
url: 'http://www.exactseek.com',
|
||
producer: {
|
||
name: 'Jayde Online, Inc.',
|
||
url: 'http://www.jaydeonlineinc.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Ezooms',
|
||
name: 'Ezooms',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'SEOmoz, Inc.',
|
||
url: 'http://moz.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)',
|
||
name: 'Facebook Crawler',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'meta-externalagent',
|
||
name: 'Meta-ExternalAgent',
|
||
category: 'Crawler',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'meta-externalfetcher',
|
||
name: 'Meta-ExternalFetcher',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'meta-webindexer',
|
||
name: 'Meta-WebIndexer',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'meta-externalads',
|
||
name: 'Meta-ExternalAds',
|
||
category: 'Social Media Agent',
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'FacebookBot',
|
||
name: 'FacebookBot',
|
||
category: 'Crawler',
|
||
url: 'https://developers.facebook.com/docs/sharing/bot',
|
||
producer: {
|
||
name: 'Meta Platforms, Inc.',
|
||
url: 'https://www.meta.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Feedbin',
|
||
name: 'Feedbin',
|
||
url: 'http://feedbin.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'FeedBurner',
|
||
name: 'FeedBurner',
|
||
url: 'http://www.feedburner.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Feed Wrangler',
|
||
name: 'Feed Wrangler',
|
||
url: 'https://feedwrangler.net/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'David Smith & Developing Perspective, LLC',
|
||
url: 'https://david-smith.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Feedly',
|
||
name: 'Feedly',
|
||
url: 'http://www.feedly.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Feedspot',
|
||
name: 'Feedspot',
|
||
url: 'http://www.feedspot.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Fever/',
|
||
name: 'Fever',
|
||
url: 'http://feedafever.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: 'FlipboardProxy|FlipboardRSS',
|
||
name: 'Flipboard',
|
||
url: 'http://flipboard.com/browserproxy',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'Flipboard',
|
||
url: 'http://flipboard.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Findxbot',
|
||
name: 'Findxbot',
|
||
category: 'Crawler',
|
||
url: 'http://www.findxbot.com',
|
||
},
|
||
{
|
||
includes: 'FreshRSS',
|
||
name: 'FreshRSS',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://freshrss.org/',
|
||
},
|
||
{
|
||
includes: 'Genieo',
|
||
name: 'Genieo Web filter',
|
||
category: '',
|
||
url: 'http://www.genieo.com/webfilter.html',
|
||
producer: {
|
||
name: 'Genieo',
|
||
url: 'http://www.genieo.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GigablastOpenSource',
|
||
name: 'Gigablast',
|
||
category: 'Search bot',
|
||
url: 'https://github.com/gigablast/open-source-search-engine',
|
||
producer: {
|
||
name: 'Matt Wells',
|
||
url: 'http://www.gigablast.com/faq.html',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Gluten Free Crawler',
|
||
name: 'Gluten Free Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://glutenfreepleasure.com/',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'gobuster',
|
||
name: 'Gobuster',
|
||
url: 'https://github.com/OJ/gobuster',
|
||
},
|
||
{
|
||
includes: 'ichiro/mobile goo',
|
||
name: 'Goo',
|
||
category: 'Search bot',
|
||
url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1',
|
||
producer: {
|
||
name: 'NTT Resonant',
|
||
url: 'http://goo.ne.jp',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Storebot-Google',
|
||
name: 'Google StoreBot',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'Google Favicon',
|
||
name: 'Google Favicon',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'Google Search Console',
|
||
name: 'Google Search Console',
|
||
category: 'Crawler',
|
||
url: 'https://search.google.com/search-console/about',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google Page Speed Insights',
|
||
name: 'Google PageSpeed Insights',
|
||
category: 'Site Monitor',
|
||
url: 'http://developers.google.com/speed/pagespeed/insights/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'google_partner_monitoring',
|
||
name: 'Google Partner Monitoring',
|
||
category: 'Site Monitor',
|
||
url: '',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Cloud-Scheduler',
|
||
name: 'Google Cloud Scheduler',
|
||
category: 'Crawler',
|
||
url: 'https://cloud.google.com/scheduler',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Structured-Data-Testing-Tool',
|
||
name: 'Google Structured Data Testing Tool',
|
||
category: 'Validator',
|
||
url: 'https://search.google.com/structured-data/testing-tool',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GoogleStackdriverMonitoring',
|
||
name: 'Google Stackdriver Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://cloud.google.com/monitoring',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Transparency-Report',
|
||
name: 'Google Transparency Report',
|
||
category: 'Site Monitor',
|
||
url: 'https://transparencyreport.google.com/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-CloudVertexBot',
|
||
name: 'Google-CloudVertexBot',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'via ggpht\\.com GoogleImageProxy',
|
||
name: 'Gmail Image Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Document-Conversion',
|
||
name: 'Google-Document-Conversion',
|
||
category: 'Service Agent',
|
||
url: 'https://support.google.com/drive/answer/176692?hl=en',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GoogleDocs; apps-spreadsheets',
|
||
name: 'Google Sheets',
|
||
category: 'Service Agent',
|
||
url: 'https://workspace.google.com/products/sheets/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GoogleDocs; apps-presentations',
|
||
name: 'Google Slides',
|
||
category: 'Service Agent',
|
||
url: 'https://workspace.google.com/products/slides/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GoogleDocs;',
|
||
name: 'Google Docs',
|
||
category: 'Service Agent',
|
||
url: 'https://docs.google.com/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SeznamEmailProxy',
|
||
name: 'Seznam Email Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Seznam.cz, a.s.',
|
||
url: 'http://www.seznam.cz/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Seznam-Zbozi-robot',
|
||
name: 'Seznam Zbozi.cz',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Seznam.cz, a.s.',
|
||
url: 'https://www.zbozi.cz/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Heurekabot-Feed',
|
||
name: 'Heureka Feed',
|
||
category: 'Crawler',
|
||
url: 'https://sluzby.heureka.cz/napoveda/heurekabot/',
|
||
producer: {
|
||
name: 'Heureka.cz, a.s.',
|
||
url: 'https://www.heureka.cz/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ShopAlike',
|
||
name: 'ShopAlike',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Visual Meta',
|
||
url: 'https://www.shopalike.cz/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'deepcrawl\\.com',
|
||
name: 'Lumar',
|
||
category: 'Crawler',
|
||
url: 'https://deepcrawl.com/bot',
|
||
producer: {
|
||
name: 'Lumar',
|
||
url: 'https://www.lumar.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Googlebot-News',
|
||
name: 'Googlebot News',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
regex:
|
||
'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\\+/web/snippet',
|
||
name: 'Googlebot',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '^Google$',
|
||
name: 'Googlebot',
|
||
category: 'Search bot',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Safety',
|
||
name: 'Google-Safety',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DuplexWeb-Google',
|
||
name: 'DuplexWeb-Google',
|
||
category: 'Crawler',
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Area120-PrivacyPolicyFetcher',
|
||
name: 'Google Area 120 Privacy Policy Fetcher',
|
||
category: 'Crawler',
|
||
url: 'https://area120.google.com/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'HubSpot ',
|
||
name: 'HubSpot',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'HubSpot Inc.',
|
||
url: 'https://www.hubspot.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'vuhuv(?:Bot|RBT)',
|
||
name: 'vuhuvBot',
|
||
category: 'Search bot',
|
||
url: 'https://vuhuv.com/bot.html',
|
||
},
|
||
{
|
||
includes: 'HTTPMon',
|
||
name: 'HTTPMon',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.httpmon.com',
|
||
producer: {
|
||
name: 'towards GmbH',
|
||
url: 'http://www.towards.ch/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ICC-Crawler',
|
||
name: 'ICC-Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: 'inoreader\\.com',
|
||
name: 'inoreader',
|
||
category: 'Feed Reader',
|
||
url: 'https://www.inoreader.com',
|
||
},
|
||
{
|
||
includes: 'iisbot',
|
||
name: 'IIS Site Analysis',
|
||
category: 'Crawler',
|
||
url: 'http://www.iis.net/iisbot.html',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'http://www.microsoft.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ips-agent',
|
||
name: 'IPS Agent',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'VeriSign, Inc',
|
||
url: 'http://www.verisign.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'IP-Guide\\.com',
|
||
name: 'IP-Guide Crawler',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: '',
|
||
url: 'https://ip-guide.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'k6/',
|
||
name: 'K6',
|
||
url: 'https://k6.io/',
|
||
},
|
||
{
|
||
includes: 'kouio',
|
||
name: 'Kouio',
|
||
url: 'http://kouio.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'larbin',
|
||
name: 'Larbin web crawler',
|
||
category: 'Crawler',
|
||
url: 'http://larbin.sourceforge.net',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: '[A-z0-9]*-Lighthouse',
|
||
name: 'Lighthouse',
|
||
category: 'Site Monitor',
|
||
url: 'https://developers.google.com/web/tools/lighthouse',
|
||
producer: {
|
||
name: 'Lighthouse',
|
||
url: 'https://developers.google.com/web/tools/lighthouse',
|
||
},
|
||
},
|
||
{
|
||
regex: 'last-modified\\.com',
|
||
name: 'LastMod Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://last-modified.com/en/about',
|
||
producer: {
|
||
name: '',
|
||
url: 'https://last-modified.com/en',
|
||
},
|
||
},
|
||
{
|
||
regex: 'linkdexbot|linkdex\\.com',
|
||
name: 'Linkdex Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.linkdex.com/bots',
|
||
producer: {
|
||
name: 'Mojeek Ltd.',
|
||
url: 'http://www.mojeek.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LinkedInBot',
|
||
name: 'LinkedIn Bot',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.linkedin.com',
|
||
producer: {
|
||
name: 'LinkedIn',
|
||
url: 'http://www.linkedin.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ltx71',
|
||
name: 'LTX71',
|
||
category: 'Security Checker',
|
||
url: 'https://ltx71.com/',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Mail\\.RU',
|
||
name: 'Mail.Ru Bot',
|
||
category: 'Search bot',
|
||
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots',
|
||
producer: {
|
||
name: 'Mail.Ru Group',
|
||
url: 'http://corp.mail.ru',
|
||
},
|
||
},
|
||
{
|
||
includes: 'magpie-crawler',
|
||
name: 'Magpie-Crawler',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.brandwatch.com/magpie-crawler/',
|
||
producer: {
|
||
name: 'Brandwatch',
|
||
url: 'http://www.brandwatch.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MagpieRSS',
|
||
name: 'MagpieRSS',
|
||
url: 'http://magpierss.sourceforge.net/',
|
||
category: 'Feed Parser',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'masscan-ng',
|
||
name: 'masscan-ng',
|
||
url: 'https://github.com/bi-zone/masscan-ng',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'BIZON, OOO',
|
||
url: 'https://bi.zone/',
|
||
},
|
||
},
|
||
{
|
||
regex: '.*masscan',
|
||
name: 'masscan',
|
||
url: 'https://github.com/robertdavidgraham/masscan',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Robert Graham',
|
||
url: 'https://github.com/robertdavidgraham',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Mastodon/',
|
||
name: 'Mastodon Bot',
|
||
category: 'Social Media Agent',
|
||
},
|
||
{
|
||
includes: 'meanpathbot',
|
||
name: 'Meanpath Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.meanpath.com/meanpathbot.html',
|
||
producer: {
|
||
name: 'Meanpath',
|
||
url: 'http://www.meanpath.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MetaJobBot',
|
||
name: 'MetaJobBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.metajob.at/the/crawler',
|
||
producer: {
|
||
name: 'MetaJob',
|
||
url: 'http://www.metajob.at',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MetaInspector',
|
||
name: 'MetaInspector',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/jaimeiniesta/metainspector',
|
||
},
|
||
{
|
||
includes: 'MixrankBot',
|
||
name: 'MixRank Bot',
|
||
category: 'Crawler',
|
||
url: 'http://mixrank.com',
|
||
producer: {
|
||
name: 'Online Media Group, Inc.',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MJ12bot',
|
||
name: 'MJ12 Bot',
|
||
category: 'Search bot',
|
||
url: 'http://majestic12.co.uk/bot.php',
|
||
producer: {
|
||
name: 'Majestic-12',
|
||
url: 'http://majestic12.co.uk',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Mnogosearch',
|
||
name: 'Mnogosearch',
|
||
category: 'Search bot',
|
||
url: 'http://www.mnogosearch.org/',
|
||
producer: {
|
||
name: 'Lavtech.Com Corp.',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MojeekBot',
|
||
name: 'MojeekBot',
|
||
category: 'Search bot',
|
||
url: 'http://www.mojeek.com/bot.html',
|
||
producer: {
|
||
name: 'Mojeek Ltd.',
|
||
url: 'http://www.mojeek.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'munin',
|
||
name: 'Munin',
|
||
category: 'Site Monitor',
|
||
url: 'http://munin-monitoring.org/',
|
||
producer: {
|
||
name: 'Munin',
|
||
url: 'http://munin-monitoring.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NalezenCzBot',
|
||
name: 'NalezenCzBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.nalezen.cz/about-crawler',
|
||
producer: {
|
||
name: 'Jaroslav Kuboš',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'check_http/v',
|
||
name: 'Nagios check_http',
|
||
category: 'Site Monitor',
|
||
url: 'https://nagios.org',
|
||
producer: {
|
||
name: 'Nagios Plugins Development Team',
|
||
url: 'https://nagios.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'nbertaupete95\\(at\\)gmail\\.com',
|
||
name: 'nbertaupete95',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)',
|
||
name: 'Netcraft Survey Bot',
|
||
category: 'Search bot',
|
||
url: '',
|
||
producer: {
|
||
name: 'Netcraft',
|
||
url: 'http://www.netcraft.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'netEstate NE Crawler',
|
||
name: 'netEstate',
|
||
category: 'Crawler',
|
||
url: 'http://www.website-datenbank.de/Impressum',
|
||
producer: {
|
||
name: 'netEstate GmbH',
|
||
url: 'https://www.netestate.de/en/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Netvibes',
|
||
name: 'Netvibes',
|
||
url: 'http://www.netvibes.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: 'NewsBlur .*(?:Fetcher|Finder)',
|
||
name: 'NewsBlur',
|
||
url: 'http://www.newsblur.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NewsGatorOnline',
|
||
name: 'NewsGator',
|
||
url: 'http://www.newsgator.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'nlcrawler',
|
||
name: 'NLCrawler',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Northern Light',
|
||
url: 'http://northernlight.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Nmap Scripting Engine',
|
||
name: 'Nmap',
|
||
category: 'Security Checker',
|
||
url: 'https://nmap.org/book/nse.html',
|
||
producer: {
|
||
name: 'Nmap',
|
||
url: 'https://nmap.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Nuzzel',
|
||
name: 'Nuzzel',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Nuzzel',
|
||
url: 'https://www.nuzzel.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NodePing',
|
||
name: 'NodePing',
|
||
category: 'Site Monitor',
|
||
url: 'https://nodeping.com',
|
||
producer: {
|
||
name: 'NodePing',
|
||
url: 'https://nodeping.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Octopus [\\d.]+',
|
||
name: 'Octopus',
|
||
},
|
||
{
|
||
regex: 'OnlineOrNot\\.com_bot',
|
||
name: 'OnlineOrNot Bot',
|
||
category: 'Site Monitor',
|
||
url: 'https://onlineornot.com/website-monitoring',
|
||
producer: {
|
||
name: 'OnlineOrNot',
|
||
url: 'https://onlineornot.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'omgili',
|
||
name: 'Omgili bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.omgili.com/Crawler.html',
|
||
producer: {
|
||
name: 'Omgili',
|
||
url: 'http://www.omgili.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'OpenindexSpider',
|
||
name: 'Openindex Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.openindex.io/en/webmasters/spider.html',
|
||
producer: {
|
||
name: 'Openindex B.V.',
|
||
url: 'http://www.openindex.io',
|
||
},
|
||
},
|
||
{
|
||
includes: 'spbot',
|
||
name: 'OpenLinkProfiler',
|
||
category: 'Crawler',
|
||
url: 'http://openlinkprofiler.org/bot',
|
||
producer: {
|
||
name: 'Axandra GmbH',
|
||
url: 'http://www.axandra.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'OpenWebSpider',
|
||
name: 'OpenWebSpider',
|
||
category: 'Crawler',
|
||
url: 'http://www.openwebspider.org',
|
||
producer: {
|
||
name: 'OpenWebSpider Lab',
|
||
url: 'http://lab.openwebspider.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'OrangeBot|VoilaBot',
|
||
name: 'Orange Bot',
|
||
category: 'Search bot',
|
||
url: 'http://lemoteur.orange.fr',
|
||
producer: {
|
||
name: 'Orange',
|
||
url: 'http://www.orange.fr',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PaperLiBot',
|
||
name: 'PaperLiBot',
|
||
category: 'Search bot',
|
||
url: 'http://support.paper.li/entries/20023257-what-is-paper-li',
|
||
producer: {
|
||
name: 'Smallrivers SA',
|
||
url: 'http://www.paper.li',
|
||
},
|
||
},
|
||
{
|
||
includes: 'phantomas/',
|
||
name: 'Phantomas',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/macbre/phantomas',
|
||
},
|
||
{
|
||
includes: 'phpservermon',
|
||
name: 'PHP Server Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/phpservermon/phpservermon',
|
||
producer: {
|
||
name: 'PHP Server Monitor',
|
||
url: 'http://www.phpservermonitor.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Pocket(?:ImageCache|Parser)',
|
||
name: 'Pocket',
|
||
category: 'Read-it-later Service',
|
||
url: 'https://getpocket.com/pocketparser_ua',
|
||
producer: {
|
||
name: 'Read It Later, Inc.',
|
||
url: 'https://getpocket.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PritTorrent',
|
||
name: 'PritTorrent',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/astro/prittorrent',
|
||
producer: {
|
||
name: 'Bitlove',
|
||
url: 'http://bitlove.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PRTG Network Monitor',
|
||
name: 'PRTG Network Monitor',
|
||
category: 'Network Monitor',
|
||
url: 'https://www.paessler.com/prtg',
|
||
producer: {
|
||
name: 'Paessler AG',
|
||
url: 'https://www.paessler.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'psbot',
|
||
name: 'Picsearch bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.picsearch.com/bot.html',
|
||
producer: {
|
||
name: 'Picsearch',
|
||
url: 'http://www.picsearch.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Pingdom(?:\\.com|TMS)',
|
||
name: 'Pingdom Bot',
|
||
category: 'Site Monitor',
|
||
url: '',
|
||
producer: {
|
||
name: 'Pingdom AB',
|
||
url: 'https://www.pingdom.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Quora Link Preview',
|
||
name: 'Quora Link Preview',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Quora',
|
||
url: 'http://www.quora.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Quora-Bot',
|
||
name: 'Quora Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Quora',
|
||
url: 'https://www.quora.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RamblerMail',
|
||
name: 'RamblerMail Image Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Rambler&Co',
|
||
url: 'https://rambler-co.ru/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'QuerySeekerSpider',
|
||
name: 'QuerySeekerSpider',
|
||
category: 'Crawler',
|
||
url: 'http://queryseeker.com/bot.html',
|
||
producer: {
|
||
name: 'QueryEye Inc.',
|
||
url: 'http://queryeye.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Qwantify|Qwantbot',
|
||
name: 'Qwantbot',
|
||
category: 'Crawler',
|
||
url: 'https://help.qwant.com/bot/',
|
||
producer: {
|
||
name: 'Qwant Corporation',
|
||
url: 'https://www.qwant.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Rainmeter',
|
||
name: 'Rainmeter',
|
||
category: 'Crawler',
|
||
url: 'https://www.rainmeter.net',
|
||
},
|
||
{
|
||
includes: 'redditbot',
|
||
name: 'Reddit Bot',
|
||
category: 'Social Media Agent',
|
||
url: 'http://www.reddit.com/feedback',
|
||
producer: {
|
||
name: 'reddit inc.',
|
||
url: 'http://www.reddit.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Riddler',
|
||
name: 'Riddler',
|
||
category: 'Security search bot',
|
||
url: 'https://riddler.io/about',
|
||
producer: {
|
||
name: 'F-Secure',
|
||
url: 'https://www.f-secure.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'rogerbot',
|
||
name: 'Rogerbot',
|
||
category: 'Crawler',
|
||
url: 'http://moz.com/help/pro/what-is-rogerbot-',
|
||
producer: {
|
||
name: 'SEOmoz, Inc.',
|
||
url: 'http://moz.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ROI Hunter',
|
||
name: 'ROI Hunter',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Roihunter a.s.',
|
||
url: 'http://roihunter.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SafeDNSBot',
|
||
name: 'SafeDNSBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.safedns.com/searchbot',
|
||
producer: {
|
||
name: 'SafeDNS, Inc.',
|
||
url: 'https://www.safedns.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Scrapy',
|
||
name: 'Scrapy',
|
||
category: 'Crawler',
|
||
url: 'http://scrapy.org',
|
||
},
|
||
{
|
||
includes: 'Screaming Frog SEO Spider',
|
||
name: 'Screaming Frog SEO Spider',
|
||
category: 'Crawler',
|
||
url: 'http://www.screamingfrog.co.uk/seo-spider',
|
||
producer: {
|
||
name: 'Screaming Frog Ltd',
|
||
url: 'http://www.screamingfrog.co.uk',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ScreenerBot',
|
||
name: 'ScreenerBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.screenerbot.com',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SemrushBot',
|
||
name: 'SemrushBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: {
|
||
name: 'Semrush Inc.',
|
||
url: 'https://www.semrush.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BacklinksExtendedBot',
|
||
name: 'BacklinksExtendedBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: {
|
||
name: 'Semrush Inc.',
|
||
url: 'https://www.semrush.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SerpReputationManagementAgent',
|
||
name: 'Semrush Reputation Management',
|
||
category: 'Service Agent',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: {
|
||
name: 'Semrush Inc.',
|
||
url: 'https://www.semrush.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SplitSignalBot',
|
||
name: 'SplitSignalBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: {
|
||
name: 'Semrush Inc.',
|
||
url: 'https://www.semrush.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SiteAuditBot',
|
||
name: 'SiteAuditBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semrush.com/bot/',
|
||
producer: {
|
||
name: 'Semrush Inc.',
|
||
url: 'https://www.semrush.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SensikaBot',
|
||
name: 'Sensika Bot',
|
||
category: '',
|
||
url: '',
|
||
producer: {
|
||
name: 'Sensika',
|
||
url: 'http://sensika.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SEOENG(?:World)?Bot',
|
||
name: 'SEOENGBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.seoengine.com/seoengbot.htm',
|
||
producer: {
|
||
name: 'SEO Engine',
|
||
url: 'http://www.seoengine.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'seoscanners\\.net',
|
||
name: 'Seoscanners.net',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
includes: 'SkypeUriPreview',
|
||
name: 'Skype URI Preview',
|
||
category: 'Service Agent',
|
||
url: '',
|
||
producer: {
|
||
name: 'Skype Communications S.à.r.l.',
|
||
url: 'https://www.skype.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'SeznamBot|SklikBot|Seznam screenshot-generator',
|
||
name: 'Seznam Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.mapy.cz/cz/seznambot.html',
|
||
producer: {
|
||
name: 'Seznam.cz, a.s.',
|
||
url: 'http://www.seznam.cz/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'shopify-partner-homepage-scraper',
|
||
name: 'Shopify Partner',
|
||
category: 'Crawler',
|
||
url: 'https://www.shopify.com/partners',
|
||
producer: {
|
||
name: 'Shopify',
|
||
url: 'https://www.shopify.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ShopWiki',
|
||
name: 'ShopWiki',
|
||
category: 'Search tools',
|
||
url: 'http://www.shopwiki.com/wiki/Help:Bot',
|
||
producer: {
|
||
name: 'ShopWiki Corp.',
|
||
url: 'http://www.shopwiki.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SilverReader',
|
||
name: 'SilverReader',
|
||
url: 'http://silverreader.com',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SimplePie',
|
||
name: 'SimplePie',
|
||
url: 'http://www.simplepie.org',
|
||
category: 'Feed Parser',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SISTRIX Crawler',
|
||
name: 'SISTRIX Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://crawler.sistrix.net',
|
||
producer: {
|
||
name: 'SISTRIX GmbH',
|
||
url: 'http://www.sistrix.de',
|
||
},
|
||
},
|
||
{
|
||
regex: 'compatible; (?:SISTRIX )?Optimizer',
|
||
name: 'SISTRIX Optimizer',
|
||
category: 'Crawler',
|
||
url: 'https://optimizer.sistrix.com',
|
||
producer: {
|
||
name: 'SISTRIX GmbH',
|
||
url: 'http://www.sistrix.de',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SiteSucker',
|
||
name: 'SiteSucker',
|
||
category: 'Crawler',
|
||
url: 'http://ricks-apps.com/osx/sitesucker/',
|
||
},
|
||
{
|
||
regex: 'sixy\\.ch',
|
||
name: 'Sixy.ch',
|
||
category: 'Site Monitor',
|
||
url: 'http://sixy.ch',
|
||
producer: {
|
||
name: 'Manuel Kasper',
|
||
url: 'https://neon1.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Slackbot|Slack-ImgProxy',
|
||
name: 'Slackbot',
|
||
category: 'Crawler',
|
||
url: 'https://api.slack.com/robots',
|
||
producer: {
|
||
name: 'Slack Technologies',
|
||
url: 'http://slack.com',
|
||
},
|
||
},
|
||
{
|
||
regex:
|
||
'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider',
|
||
name: 'Sogou Spider',
|
||
category: 'Search bot',
|
||
url: 'http://www.sogou.com/docs/help/webmasters.htm',
|
||
producer: {
|
||
name: 'Sohu, Inc.',
|
||
url: 'http://www.sogou.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Sosospider|Sosoimagespider',
|
||
name: 'Soso Spider',
|
||
category: 'Search bot',
|
||
url: 'http://help.soso.com/webspider.htm',
|
||
producer: {
|
||
name: 'Tencent Holdings',
|
||
url: 'http://www.soso.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Sprinklr',
|
||
name: 'Sprinklr',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Sprinklr, Inc.',
|
||
url: 'https://www.sprinklr.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SSL Labs',
|
||
name: 'SSL Labs',
|
||
category: 'Validator',
|
||
url: 'https://www.ssllabs.com/about/assessment.html',
|
||
producer: {
|
||
name: 'SSL Labs',
|
||
url: 'https://www.ssllabs.com/about/assessment.html',
|
||
},
|
||
},
|
||
{
|
||
includes: 'StatusCake',
|
||
name: 'StatusCake',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.statuscake.com',
|
||
producer: {
|
||
name: 'StatusCake',
|
||
url: 'https://www.statuscake.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Superfeedr bot',
|
||
name: 'Superfeedr Bot',
|
||
category: 'Feed Fetcher',
|
||
url: '',
|
||
producer: {
|
||
name: 'Superfeedr',
|
||
url: 'https://superfeedr.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Sparkler',
|
||
name: 'Sparkler',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/USCDataScience/sparkler',
|
||
},
|
||
{
|
||
includes: 'Spinn3r',
|
||
name: 'Spinn3r',
|
||
category: 'Crawler',
|
||
url: 'http://spinn3r.com/robot',
|
||
producer: {
|
||
name: 'Tailrank Inc',
|
||
url: 'http://spinn3r.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SputnikBot',
|
||
name: 'Sputnik Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
includes: 'SputnikFaviconBot',
|
||
name: 'Sputnik Favicon Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
includes: 'SputnikImageBot',
|
||
name: 'Sputnik Image Bot',
|
||
category: 'Crawler',
|
||
url: '',
|
||
},
|
||
{
|
||
includes: 'SurveyBot',
|
||
name: 'Survey Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.domaintools.com/webmasters/surveybot.php',
|
||
producer: {
|
||
name: 'Domain Tools',
|
||
url: 'http://www.domaintools.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TarmotGezgin',
|
||
name: 'Tarmot Gezgin',
|
||
url: 'http://www.tarmot.com/gezgin/',
|
||
category: 'Search bot',
|
||
},
|
||
{
|
||
includes: 'TelegramBot',
|
||
name: 'TelegramBot',
|
||
url: 'https://telegram.org/blog/bot-revolution',
|
||
},
|
||
{
|
||
includes: 'TLSProbe',
|
||
name: 'TLSProbe',
|
||
url: 'https://scan.trustnet.venafi.com/',
|
||
category: 'Security search bot',
|
||
producer: {
|
||
name: 'Venafi TrustNet',
|
||
url: 'https://www.venafi.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TinEye-bot',
|
||
name: 'TinEye Crawler',
|
||
category: 'Search bot',
|
||
url: 'http://www.tineye.com/crawler.html',
|
||
producer: {
|
||
name: 'Idée Inc.',
|
||
url: 'http://ideeinc.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Tiny Tiny RSS',
|
||
name: 'Tiny Tiny RSS',
|
||
url: 'http://tt-rss.org',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
regex: 'theoldreader\\.com',
|
||
name: 'theoldreader',
|
||
category: 'Feed Reader',
|
||
url: 'https://theoldreader.com',
|
||
},
|
||
{
|
||
regex: 'Trackable/0\\.1',
|
||
name: 'Chartable',
|
||
category: 'Site Monitor',
|
||
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix',
|
||
producer: {
|
||
name: 'Chartable',
|
||
url: 'https://chartable.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'trendictionbot',
|
||
name: 'Trendiction Bot',
|
||
category: 'Crawler',
|
||
url: 'http://www.trendiction.de/bot',
|
||
producer: {
|
||
name: 'Talkwalker Inc.',
|
||
url: 'http://www.talkwalker.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TurnitinBot',
|
||
name: 'TurnitinBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.turnitin.com/robot/crawlerinfo.html',
|
||
producer: {
|
||
name: 'iParadigms, LLC.',
|
||
url: 'http://www.turnitin.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TweetedTimes',
|
||
name: 'TweetedTimes Bot',
|
||
category: 'Crawler',
|
||
url: 'https://tweetedtimes.com/',
|
||
producer: {
|
||
name: 'TweetedTimes',
|
||
url: 'https://tweetedtimes.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TweetmemeBot',
|
||
name: 'Tweetmeme Bot',
|
||
category: 'Crawler',
|
||
url: 'http://tweetmeme.com/',
|
||
producer: {
|
||
name: 'Mediasift',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Twingly Recon',
|
||
name: 'Twingly Recon',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Twingly',
|
||
url: 'https://www.twingly.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Twitterbot',
|
||
name: 'Twitterbot',
|
||
category: 'Social Media Agent',
|
||
url: 'https://dev.twitter.com/docs/cards/getting-started',
|
||
producer: {
|
||
name: 'Twitter',
|
||
url: 'http://www.twitter.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'UniversalFeedParser',
|
||
name: 'UniversalFeedParser',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://github.com/kurtmckee/feedparser',
|
||
producer: {
|
||
name: 'Kurt McKee',
|
||
url: 'https://github.com/kurtmckee',
|
||
},
|
||
},
|
||
{
|
||
regex: 'via secureurl\\.fwdcdn\\.com',
|
||
name: 'UkrNet Mail Proxy',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'UkrNet Ltd',
|
||
url: 'https://www.ukr.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Uptime(?:bot)?/',
|
||
name: 'Uptimebot',
|
||
category: 'Site Monitor',
|
||
url: 'https://uptime.com/uptime-bot',
|
||
producer: {
|
||
name: 'Uptime',
|
||
url: 'https://uptime.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'UptimeRobot',
|
||
name: 'UptimeRobot',
|
||
category: 'Site Monitor',
|
||
url: 'https://uptimerobot.com/',
|
||
producer: {
|
||
name: 'Uptime Robot',
|
||
url: 'https://uptimerobot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'URLAppendBot',
|
||
name: 'URLAppendBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.profound.net/urlappendbot.html',
|
||
producer: {
|
||
name: 'Profound Networks',
|
||
url: 'http://www.profound.net',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Vagabondo',
|
||
name: 'Vagabondo',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'WiseGuys',
|
||
url: 'http://www.wise-guys.nl/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'vkShare; ',
|
||
name: 'VK Share Button',
|
||
category: 'Crawler',
|
||
url: 'https://dev.vk.com/en/widgets/share',
|
||
producer: {
|
||
name: 'VK',
|
||
url: 'https://vk.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'VKRobot',
|
||
name: 'VK Robot',
|
||
category: 'Crawler',
|
||
url: 'https://dev.vk.com/en/',
|
||
producer: {
|
||
name: 'VK',
|
||
url: 'https://vk.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'VSMCrawler',
|
||
name: 'Visual Site Mapper Crawler',
|
||
category: 'Crawler',
|
||
url: 'http://www.visualsitemapper.com/crawler',
|
||
producer: {
|
||
name: 'Alentum Software Ltd.',
|
||
url: 'http://www.alentum.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Jigsaw',
|
||
name: 'W3C CSS Validator',
|
||
category: 'Validator',
|
||
url: 'http://jigsaw.w3.org/css-validator',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'W3C_I18n-Checker',
|
||
name: 'W3C I18N Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/i18n-checker',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'W3C-checklink',
|
||
name: 'W3C Link Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/checklink',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'W3C_Validator|Validator\\.nu',
|
||
name: 'W3C Markup Validation Service',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/services',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'W3C-mobileOK',
|
||
name: 'W3C MobileOK Checker',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/mobile',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'W3C_Unicorn',
|
||
name: 'W3C Unified Validator',
|
||
category: 'Validator',
|
||
url: 'http://validator.w3.org/unicorn',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'http://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'P3P Validator',
|
||
name: 'W3C P3P Validator',
|
||
category: 'Validator',
|
||
url: 'https://www.w3.org/P3P/validator.html',
|
||
producer: {
|
||
name: 'W3C',
|
||
url: 'https://www.w3.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Wappalyzer',
|
||
name: 'Wappalyzer',
|
||
url: 'https://github.com/AliasIO/Wappalyzer',
|
||
producer: {
|
||
name: 'AliasIO',
|
||
url: 'https://github.com/AliasIO',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PTST/',
|
||
name: 'WebPageTest',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.webpagetest.org',
|
||
},
|
||
{
|
||
includes: 'WeSEE',
|
||
name: 'WeSEE:Search',
|
||
category: 'Search bot',
|
||
url: 'http://www.wesee.com/bot',
|
||
producer: {
|
||
name: 'WeSEE Ltd',
|
||
url: 'http://www.wesee.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WebbCrawler',
|
||
name: 'WebbCrawler',
|
||
category: 'Crawler',
|
||
url: 'http://badcheese.com/crawler.html',
|
||
producer: {
|
||
name: 'Steve Webb',
|
||
url: 'http://badcheese.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'websitepulse[+ ]checker',
|
||
name: 'WebSitePulse',
|
||
category: 'Site Monitor',
|
||
url: 'http://badcheese.com/crawler.html',
|
||
producer: {
|
||
name: 'WebSitePulse',
|
||
url: 'http://www.websitepulse.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'WordPress.+isitwp\\.com',
|
||
name: 'IsItWP',
|
||
category: 'Crawler',
|
||
url: 'https://www.isitwp.com/',
|
||
producer: {
|
||
name: 'WPBeginner, LLC',
|
||
url: 'https://www.wpbeginner.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Automattic Analytics Crawler',
|
||
name: 'Automattic Analytics',
|
||
category: 'Crawler',
|
||
url: 'https://wordpress.com/crawler/',
|
||
producer: {
|
||
name: 'Wordpress.org',
|
||
url: 'https://wordpress.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'WordPress\\.com mShots',
|
||
name: 'WordPress.com mShots',
|
||
category: 'Service Agent',
|
||
url: 'https://wordpress.org/',
|
||
producer: {
|
||
name: 'Wordpress.org',
|
||
url: 'https://wordpress.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'wp\\.com feedbot',
|
||
name: 'wp.com feedbot',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://wordpress.com/',
|
||
producer: {
|
||
name: 'Automattic, Inc.',
|
||
url: 'https://automattic.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WordPress',
|
||
name: 'WordPress',
|
||
category: 'Service Agent',
|
||
url: 'https://wordpress.org/',
|
||
producer: {
|
||
name: 'Wordpress.org',
|
||
url: 'https://wordpress.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Wotbox',
|
||
name: 'Wotbox',
|
||
category: 'Search bot',
|
||
url: 'http://www.wotbox.com/bot/',
|
||
producer: {
|
||
name: 'Wotbox',
|
||
url: 'http://www.wotbox.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'XenForo',
|
||
name: 'XenForo',
|
||
category: 'Service Agent',
|
||
url: 'https://xenforo.com/',
|
||
producer: {
|
||
name: 'XenForo Ltd.',
|
||
url: 'https://xenforo.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'yacybot',
|
||
name: 'YaCy',
|
||
category: 'Search bot',
|
||
url: 'http://yacy.net/bot.html',
|
||
producer: {
|
||
name: 'YaCy',
|
||
url: 'http://yacy.net',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Yahoo! Slurp|Yahoo!-AdCrawler',
|
||
name: 'Yahoo! Slurp',
|
||
category: 'Search bot',
|
||
url: 'http://help.yahoo.com/ysearch/slurp',
|
||
producer: {
|
||
name: 'Yahoo! Inc.',
|
||
url: 'http://www.yahoo.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone',
|
||
name: 'Yahoo! Link Preview',
|
||
category: 'Crawler',
|
||
url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html',
|
||
producer: {
|
||
name: 'Yahoo! Inc.',
|
||
url: 'http://www.yahoo.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YahooMailProxy',
|
||
name: 'Yahoo! Mail Proxy',
|
||
category: 'Service Agent',
|
||
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html',
|
||
producer: {
|
||
name: 'Yahoo! Inc.',
|
||
url: 'http://www.yahoo.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YahooCacheSystem',
|
||
name: 'Yahoo! Cache System',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Yahoo! Inc.',
|
||
url: 'http://www.yahoo.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Y!J-BRW',
|
||
name: 'Yahoo! Japan BRW',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: {
|
||
name: 'Yahoo! Japan Corp.',
|
||
url: 'https://www.yahoo.co.jp/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Y!J-WSC',
|
||
name: 'Yahoo! Japan WSC',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: {
|
||
name: 'Yahoo! Japan Corp.',
|
||
url: 'https://www.yahoo.co.jp/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Y!J-ASR',
|
||
name: 'Yahoo! Japan ASR',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: {
|
||
name: 'Yahoo! Japan Corp.',
|
||
url: 'https://www.yahoo.co.jp/',
|
||
},
|
||
},
|
||
{
|
||
regex: '^Y!J',
|
||
name: 'Yahoo! Japan',
|
||
category: 'Crawler',
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955',
|
||
producer: {
|
||
name: 'Yahoo! Japan Corp.',
|
||
url: 'https://www.yahoo.co.jp/',
|
||
},
|
||
},
|
||
{
|
||
regex:
|
||
'Yandex(?:(?:\\.Gazeta |Accessibility|Additional|Com|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:Additional|AdNet|Antivirus|Blogs|Calendar|Catalog|Dialogs|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher',
|
||
name: 'Yandex Bot',
|
||
category: 'Search bot',
|
||
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html',
|
||
producer: {
|
||
name: 'Yandex LLC',
|
||
url: 'https://yandex.com/company/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Yeti|NaverJapan|AdsBot-Naver',
|
||
name: 'Yeti/Naverbot',
|
||
category: 'Search bot',
|
||
url: 'http://help.naver.com/robots/',
|
||
producer: {
|
||
name: 'Naver',
|
||
url: 'http://www.naver.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YoudaoBot',
|
||
name: 'Youdao Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.youdao.com/help/webmaster/spider',
|
||
producer: {
|
||
name: 'NetEase, Inc.',
|
||
url: 'http://corp.163.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YOURLS',
|
||
name: 'Yourls',
|
||
category: 'Crawler',
|
||
url: 'http://yourls.org',
|
||
},
|
||
{
|
||
regex: 'YRSpider|YYSpider',
|
||
name: 'Yunyun Bot',
|
||
category: 'Search bot',
|
||
url: 'http://www.yunyun.com/SiteInfo.php?r=about',
|
||
producer: {
|
||
name: 'YunYun',
|
||
url: 'http://www.yunyun.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'zgrab',
|
||
name: 'zgrab',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/zmap/zgrab',
|
||
},
|
||
{
|
||
includes: 'Zookabot',
|
||
name: 'Zookabot',
|
||
category: 'Crawler',
|
||
url: 'http://zookabot.com',
|
||
producer: {
|
||
name: 'Hwacha ApS',
|
||
url: 'http://hwacha.dk',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ZumBot',
|
||
name: 'ZumBot',
|
||
category: 'Search bot',
|
||
url: 'http://help.zum.com/inquiry',
|
||
producer: {
|
||
name: 'ZUM internet',
|
||
url: 'http://www.zuminternet.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YottaaMonitor',
|
||
name: 'Yottaa Site Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.yottaa.com/products/site-monitor',
|
||
producer: {
|
||
name: 'Yottaa',
|
||
url: 'http://www.yottaa.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857',
|
||
name: 'Yahoo Gemini',
|
||
category: 'Crawler',
|
||
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html',
|
||
producer: {
|
||
name: 'Yahoo! Inc.',
|
||
url: 'http://www.yahoo.com',
|
||
},
|
||
},
|
||
{
|
||
regex: '.*Java.*outbrain',
|
||
name: 'Outbrain',
|
||
category: 'Crawler',
|
||
url: '',
|
||
producer: {
|
||
name: 'Outbrain',
|
||
url: 'http://www.outbrain.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'HubPages.*crawlingpolicy',
|
||
name: 'HubPages',
|
||
category: 'Crawler',
|
||
url: 'https://hubpages.com/help/crawlingpolicy',
|
||
producer: {
|
||
name: 'HubPages, Inc.',
|
||
url: 'https://discover.hubpages.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Pinterest(?:bot)?/.*www\\.pinterest\\.com',
|
||
name: 'Pinterest',
|
||
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Pinterest',
|
||
url: 'https://www.pinterest.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '.*Site24x7',
|
||
name: 'Site24x7 Website Monitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.site24x7.com/site24x7-faq.html',
|
||
producer: {
|
||
name: 'Site24x7',
|
||
url: 'https://www.site24x7.com',
|
||
},
|
||
},
|
||
{
|
||
regex: '.* HLB',
|
||
name: 'Site24x7 Defacement Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor',
|
||
producer: {
|
||
name: 'Site24x7',
|
||
url: 'https://www.site24x7.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 's~snapchat-proxy',
|
||
name: 'Snapchat Proxy',
|
||
category: 'Crawler',
|
||
url: 'https://www.snapchat.com',
|
||
producer: {
|
||
name: 'Snapchat Inc.',
|
||
url: 'https://www.snapchat.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Snap URL Preview Service',
|
||
name: 'Snap URL Preview Service',
|
||
category: 'Service Agent',
|
||
url: 'https://developers.snap.com/robots',
|
||
producer: {
|
||
name: 'Snapchat Inc.',
|
||
url: 'https://www.snapchat.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SnapchatAds',
|
||
name: 'Snapchat Ads',
|
||
category: 'Crawler',
|
||
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US',
|
||
producer: {
|
||
name: 'Snapchat Inc.',
|
||
url: 'https://www.snapchat.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: "Let's Encrypt validation server",
|
||
name: "Let's Encrypt Validation",
|
||
category: 'Service Agent',
|
||
url: 'https://letsencrypt.org/how-it-works/',
|
||
producer: {
|
||
name: "Let's Encrypt",
|
||
url: 'https://letsencrypt.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GrapeshotCrawler',
|
||
name: 'Grapeshot',
|
||
category: 'Crawler',
|
||
url: 'https://www.grapeshot.com/crawler',
|
||
producer: {
|
||
name: 'Grapeshot',
|
||
url: 'https://www.grapeshot.com',
|
||
},
|
||
},
|
||
{
|
||
regex: 'www\\.monitor\\.us',
|
||
name: 'Monitor.Us',
|
||
category: 'Site Monitor',
|
||
url: 'http://www.monitor.us',
|
||
producer: {
|
||
name: 'Monitor.Us',
|
||
url: 'http://www.monitor.us',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Catchpoint',
|
||
name: 'Catchpoint',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.catchpoint.com/',
|
||
producer: {
|
||
name: 'Catchpoint Systems',
|
||
url: 'https://www.catchpoint.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'bitlybot',
|
||
name: 'BitlyBot',
|
||
category: 'Crawler',
|
||
url: 'https://bitly.com',
|
||
producer: {
|
||
name: 'Bitly, Inc.',
|
||
url: 'https://bitly.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Zao/',
|
||
name: 'Zao',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'lycos',
|
||
name: 'Lycos',
|
||
},
|
||
{
|
||
includes: 'Slurp',
|
||
name: 'Inktomi Slurp',
|
||
},
|
||
{
|
||
includes: 'Speedy Spider',
|
||
name: 'Speedy',
|
||
},
|
||
{
|
||
includes: 'ScoutJet',
|
||
name: 'ScoutJet',
|
||
},
|
||
{
|
||
regex: 'nrsbot|netresearch',
|
||
name: 'NetResearchServer',
|
||
},
|
||
{
|
||
includes: 'scooter',
|
||
name: 'Scooter',
|
||
},
|
||
{
|
||
includes: 'gigabot',
|
||
name: 'Gigabot',
|
||
},
|
||
{
|
||
includes: 'charlotte',
|
||
name: 'Charlotte',
|
||
},
|
||
{
|
||
includes: 'Pompos',
|
||
name: 'Pompos',
|
||
},
|
||
{
|
||
includes: 'ichiro',
|
||
name: 'ichiro',
|
||
},
|
||
{
|
||
includes: 'PagePeeker',
|
||
name: 'PagePeeker',
|
||
category: 'Crawler',
|
||
url: 'https://pagepeeker.com/robots/',
|
||
producer: {
|
||
name: 'PAGEPEEKER SRL',
|
||
url: 'https://pagepeeker.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WebThumbnail',
|
||
name: 'WebThumbnail',
|
||
},
|
||
{
|
||
includes: 'Willow Internet Crawler',
|
||
name: 'Willow Internet Crawler',
|
||
},
|
||
{
|
||
includes: 'EmailWolf',
|
||
name: 'EmailWolf',
|
||
},
|
||
{
|
||
includes: 'NetLyzer FastProbe',
|
||
name: 'NetLyzer FastProbe',
|
||
},
|
||
{
|
||
regex: 'AdMantX.*admantx\\.com',
|
||
name: 'ADMantX',
|
||
},
|
||
{
|
||
includes: 'Server Density Service Monitoring',
|
||
name: 'Server Density',
|
||
},
|
||
{
|
||
regex: 'RSSRadio \\(Push Notification Scanner;support@dorada\\.co\\.uk\\)',
|
||
name: 'RSSRadio Bot',
|
||
},
|
||
{
|
||
regex: '^sentry',
|
||
name: 'Sentry Bot',
|
||
producer: {
|
||
name: 'Sentry',
|
||
url: 'https://sentry.io',
|
||
},
|
||
},
|
||
{
|
||
regex: '^Spotify/[\\d.]+$',
|
||
name: 'Spotify',
|
||
producer: {
|
||
name: 'Spotify',
|
||
url: 'https://www.spotify.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'The Knowledge AI',
|
||
name: 'The Knowledge AI',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'Embedly',
|
||
name: 'Embedly',
|
||
category: 'Crawler',
|
||
url: 'https://support.embed.ly/hc/en-us',
|
||
producer: {
|
||
name: 'A Medium, Corp.',
|
||
url: 'https://medium.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BrandVerity',
|
||
name: 'BrandVerity',
|
||
category: 'Crawler',
|
||
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me',
|
||
producer: {
|
||
name: 'BrandVerity, Inc.',
|
||
url: 'https://www.brandverity.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Kaspersky Lab CFR link resolver',
|
||
name: 'Kaspersky',
|
||
category: 'Security Checker',
|
||
url: 'https://www.kaspersky.com/',
|
||
producer: {
|
||
name: 'AO Kaspersky Lab',
|
||
url: 'https://www.kaspersky.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'eZ Publish Link Validator',
|
||
name: 'eZ Publish Link Validator',
|
||
category: 'Crawler',
|
||
url: 'https://ez.no/',
|
||
producer: {
|
||
name: 'eZ Systems AS',
|
||
url: 'https://ez.no/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'woorankreview',
|
||
name: 'WooRank',
|
||
category: 'Search bot',
|
||
url: 'https://www.woorank.com/',
|
||
producer: {
|
||
name: 'WooRank sprl',
|
||
url: 'https://www.woorank.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Siteimprove',
|
||
name: 'Siteimprove',
|
||
category: 'Search bot',
|
||
url: 'https://siteimprove.com/',
|
||
producer: {
|
||
name: 'Siteimprove GmbH',
|
||
url: 'https://siteimprove.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CATExplorador',
|
||
name: 'CATExplorador',
|
||
category: 'Search bot',
|
||
url: 'https://fundacio.cat/ca/domini/',
|
||
producer: {
|
||
name: 'Fundació puntCAT',
|
||
url: 'https://fundacio.cat/ca/domini/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Buck',
|
||
name: 'Buck',
|
||
category: 'Search bot',
|
||
url: 'https://hypefactors.com/',
|
||
producer: {
|
||
name: 'Hypefactors A/S',
|
||
url: 'https://hypefactors.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'tracemyfile',
|
||
name: 'TraceMyFile',
|
||
category: 'Search bot',
|
||
url: 'https://www.tracemyfile.com/',
|
||
producer: {
|
||
name: 'Idee Inc.',
|
||
url: 'http://ideeinc.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'zelist\\.ro feed parser',
|
||
name: 'Ze List',
|
||
url: 'https://www.zelist.ro/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'Treeworks SRL',
|
||
url: 'https://www.tree.ro/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'weborama-fetcher',
|
||
name: 'Weborama',
|
||
category: 'Search bot',
|
||
url: 'https://weborama.com/',
|
||
producer: {
|
||
name: 'Weborama SA',
|
||
url: 'https://weborama.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BoardReader Favicon Fetcher',
|
||
name: 'BoardReader',
|
||
category: 'Search bot',
|
||
url: 'https://boardreader.com/',
|
||
producer: {
|
||
name: 'Effyis Inc',
|
||
url: 'https://boardreader.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'IDG/(?:EU|IT|RU|UK)',
|
||
name: 'IDG',
|
||
category: 'Crawler',
|
||
url: 'https://www.spaziodati.eu/',
|
||
producer: {
|
||
name: 'SpazioDati S.r.l.',
|
||
url: 'https://www.spaziodati.eu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Bytespider',
|
||
name: 'Bytespider',
|
||
category: 'Search bot',
|
||
url: 'https://bytedance.com/',
|
||
producer: {
|
||
name: 'ByteDance Ltd.',
|
||
url: 'https://bytedance.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WikiDo',
|
||
name: 'WikiDo',
|
||
category: 'Search bot',
|
||
url: 'https://www.wikido.com/',
|
||
producer: {
|
||
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.',
|
||
url: 'https://www.wikido.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Awario(?:Smart)?Bot',
|
||
name: 'Awario',
|
||
category: 'Search bot',
|
||
url: 'https://awario.com/bots.html',
|
||
producer: {
|
||
name: 'TechFusion Ltd.',
|
||
url: 'https://www.techfusion.com.cy/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AwarioRssBot',
|
||
name: 'Awario',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://awario.com/bots.html',
|
||
producer: {
|
||
name: 'TechFusion Ltd.',
|
||
url: 'https://www.techfusion.com.cy/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'oBot',
|
||
name: 'oBot',
|
||
category: 'Search bot',
|
||
url: 'https://www.xforce-security.com/crawler/',
|
||
producer: {
|
||
name: 'IBM Germany Research & Development GmbH',
|
||
url: 'https://exchange.xforce.ibmcloud.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SMTBot',
|
||
name: 'SMTBot',
|
||
category: 'Search bot',
|
||
url: 'https://www.similartech.com/smtbot',
|
||
producer: {
|
||
name: 'SimilarTech Ltd.',
|
||
url: 'https://www.similartech.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LCC',
|
||
name: 'LCC',
|
||
category: 'Search bot',
|
||
url: 'https://corpora.uni-leipzig.de/crawler_faq.html',
|
||
producer: {
|
||
name: 'Universität Leipzig',
|
||
url: 'https://www.uni-leipzig.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Startpagina-Linkchecker',
|
||
name: 'Startpagina Linkchecker',
|
||
category: 'Search bot',
|
||
url: 'https://www.startpagina.nl/linkchecker',
|
||
producer: {
|
||
name: 'Startpagina B.V.',
|
||
url: 'https://www.startpagina.nl/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MoodleBot-Linkchecker',
|
||
name: 'MoodleBot Linkchecker',
|
||
category: 'Search bot',
|
||
url: 'https://docs.moodle.org/en/Usage',
|
||
producer: {
|
||
name: 'Moodle Pty Ltd',
|
||
url: 'https://moodle.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GTmetrix',
|
||
name: 'GTmetrix',
|
||
category: 'Crawler',
|
||
url: 'https://gtmetrix.com/',
|
||
producer: {
|
||
name: 'Carbon60 Operating Co. Ltd.',
|
||
url: 'https://www.carbon60.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'CyberFind ?Crawler',
|
||
name: 'CyberFind Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.cyberfind.net/bot.html',
|
||
producer: {
|
||
name: 'Find.tf',
|
||
url: 'https://find.tf/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Nutch',
|
||
name: 'Nutch-based Bot',
|
||
category: 'Crawler',
|
||
url: 'https://nutch.apache.org',
|
||
producer: {
|
||
name: 'The Apache Software Foundation',
|
||
url: 'https://www.apache.org/foundation/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Seobility',
|
||
name: 'Seobility',
|
||
category: 'Crawler',
|
||
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot',
|
||
},
|
||
{
|
||
includes: 'Vercelbot',
|
||
name: 'Vercel Bot',
|
||
category: 'Service bot',
|
||
url: 'https://vercel.com',
|
||
},
|
||
{
|
||
includes: 'Grammarly',
|
||
name: 'Grammarly',
|
||
category: 'Service bot',
|
||
url: 'https://www.grammarly.com',
|
||
},
|
||
{
|
||
includes: 'Robozilla',
|
||
name: 'Robozilla',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'Domains Project',
|
||
name: 'Domains Project',
|
||
category: 'Crawler',
|
||
url: 'https://domainsproject.org',
|
||
},
|
||
{
|
||
includes: 'PetalBot',
|
||
name: 'Petal Bot',
|
||
category: 'Crawler',
|
||
url: 'https://aspiegel.com/petalbot',
|
||
},
|
||
{
|
||
includes: 'SerendeputyBot',
|
||
name: 'Serendeputy Bot',
|
||
category: 'Crawler',
|
||
url: 'https://serendeputy.com/about/serendeputy-bot',
|
||
},
|
||
{
|
||
regex:
|
||
'ias-(?:va|sg).*admantx.*service-fetcher|admantx\\.com.*service-fetcher',
|
||
name: 'ADmantX Service Fetcher',
|
||
category: 'Service bot',
|
||
url: 'https://www.admantx.com/service-fetcher.html',
|
||
},
|
||
{
|
||
includes: 'SemanticScholarBot',
|
||
name: 'Semantic Scholar Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.semanticscholar.org/crawler',
|
||
},
|
||
{
|
||
includes: 'VelenPublicWebCrawler',
|
||
name: 'Velen Public Web Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://hunter.io/robot',
|
||
},
|
||
{
|
||
includes: 'Barkrowler',
|
||
name: 'Barkrowler',
|
||
category: 'Crawler',
|
||
url: 'http://www.exensa.com/crawl',
|
||
},
|
||
{
|
||
includes: 'BDCbot',
|
||
name: 'BDCbot',
|
||
category: 'Crawler',
|
||
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx',
|
||
producer: {
|
||
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA',
|
||
url: 'https://bigdatacorp.com.br/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'adbeat',
|
||
name: 'Adbeat',
|
||
category: 'Crawler',
|
||
url: 'https://www.adbeat.com/operation_policy',
|
||
producer: {
|
||
name: 'PPC Labs LLC',
|
||
url: 'https://www.adbeat.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '(?:BuiltWith|BW/)',
|
||
name: 'BuiltWith',
|
||
category: 'Crawler',
|
||
url: 'https://builtwith.com/biup',
|
||
producer: {
|
||
name: 'BuiltWith Pty Ltd',
|
||
url: 'https://builtwith.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'https://whatis\\.contentkingapp\\.com',
|
||
name: 'ContentKing',
|
||
category: 'Site Monitor',
|
||
url: 'https://whatis.contentkingapp.com/',
|
||
producer: {
|
||
name: 'ContentKing BV',
|
||
url: 'https://www.contentkingapp.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MicroAdBot',
|
||
name: 'MicroAdBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.microad.co.jp/',
|
||
producer: {
|
||
name: 'MicroAd, Inc.',
|
||
url: 'https://www.microad.co.jp/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'PingAdmin\\.Ru',
|
||
name: 'PingAdmin.Ru',
|
||
category: 'Site Monitor',
|
||
url: 'https://ping-admin.ru/',
|
||
},
|
||
{
|
||
regex: 'notifyninja.+monitoring',
|
||
name: 'Notify Ninja',
|
||
category: 'Site Monitor',
|
||
url: 'http://notifyninja.com',
|
||
},
|
||
{
|
||
includes: 'WebDataStats',
|
||
name: 'WebDataStats',
|
||
category: 'Crawler',
|
||
url: 'https://webdatastats.com/policy.html',
|
||
producer: {
|
||
name: 'WebTehRazrabotka LLC',
|
||
url: 'https://webdatastats.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'parse\\.ly scraper',
|
||
name: 'parse.ly',
|
||
category: 'Crawler',
|
||
url: 'https://www.parse.ly/help/integration/crawler',
|
||
producer: {
|
||
name: 'Parsely, Inc.',
|
||
url: 'https://www.parse.ly/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Nimbostratus-Bot',
|
||
name: 'Nimbostratus Bot',
|
||
category: 'Site Monitor',
|
||
url: 'http://cloudsystemnetworks.com',
|
||
},
|
||
{
|
||
includes: 'HeartRails_Capture',
|
||
name: 'HeartRails Capture',
|
||
category: 'Service Agent',
|
||
url: 'http://capture.heartrails.com',
|
||
},
|
||
{
|
||
includes: 'Project-Resonance',
|
||
name: 'Project Resonance',
|
||
category: 'Crawler',
|
||
url: 'https://project-resonance.com/',
|
||
producer: {
|
||
name: 'RedHunt Labs Limited',
|
||
url: 'https://redhuntlabs.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DataXu',
|
||
name: 'DataXu',
|
||
category: 'Service Agent',
|
||
url: 'https://advertising.roku.com/dataxu',
|
||
producer: {
|
||
name: 'Roku, Inc.',
|
||
url: 'https://roku.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cocolyzebot',
|
||
name: 'Cocolyzebot',
|
||
category: 'Crawler',
|
||
url: 'https://cocolyze.com/en/cocolyzebot',
|
||
producer: {
|
||
name: 'VSI INNOVATION SAS',
|
||
url: 'https://vsi-innovation.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'veryhip',
|
||
name: 'VeryHip',
|
||
category: 'Crawler',
|
||
url: 'https://veryhip.com/',
|
||
producer: {
|
||
name: 'VeryHip',
|
||
url: 'https://veryhip.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LinkpadBot',
|
||
name: 'LinkpadBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.linkpad.org/',
|
||
producer: {
|
||
name: 'Solomono LLC',
|
||
url: 'https://www.linkpad.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MuscatFerret',
|
||
name: 'MuscatFerret',
|
||
category: 'Crawler',
|
||
url: 'http://www.webtop.com/',
|
||
},
|
||
{
|
||
regex: 'PageThing\\.com',
|
||
name: 'PageThing',
|
||
category: 'Crawler',
|
||
url: 'https://www.pagething.com/',
|
||
producer: {
|
||
name: 'SPECIALNOISE LTD',
|
||
url: 'https://www.specialnoise.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ArchiveBox',
|
||
name: 'ArchiveBox',
|
||
url: 'https://archivebox.io/',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Choosito',
|
||
name: 'Choosito',
|
||
url: 'https://www.choosito.com/',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Choosito! Inc.',
|
||
url: 'https://www.choosito.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'datagnionbot',
|
||
name: 'datagnionbot',
|
||
url: 'https://www.datagnion.com/bot.html',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'DATAGNION GMBH',
|
||
url: 'https://www.datagnion.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WhatCMS',
|
||
name: 'WhatCMS',
|
||
url: 'https://whatcms.org/',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Nineteen Ten LLC',
|
||
url: 'https://whatcms.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'httpx',
|
||
name: 'httpx',
|
||
url: 'https://github.com/projectdiscovery/httpx',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'ProjectDiscovery, Inc.',
|
||
url: 'https://projectdiscovery.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: '.*\\.oast\\.',
|
||
name: 'Interactsh',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/projectdiscovery/interactsh',
|
||
producer: {
|
||
name: 'ProjectDiscovery, Inc.',
|
||
url: 'https://projectdiscovery.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\\.com',
|
||
name: 'Expanse',
|
||
category: 'Security Checker',
|
||
url: 'https://expanse.co/',
|
||
producer: {
|
||
name: 'Expanse Inc.',
|
||
url: 'https://expanse.co/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'HuaweiWebCatBot',
|
||
name: 'HuaweiWebCatBot',
|
||
category: 'Crawler',
|
||
url: 'https://isecurity.huawei.com',
|
||
producer: {
|
||
name: 'Huawei Technologies Co., Ltd.',
|
||
url: 'https://huawei.com',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Hatena-Favicon',
|
||
name: 'Hatena Favicon',
|
||
category: 'Crawler',
|
||
url: 'https://www.hatena.ne.jp/faq/',
|
||
producer: {
|
||
name: 'Hatena Co., Ltd.',
|
||
url: 'https://www.hatena.ne.jp',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Hatena-?Bookmark',
|
||
name: 'Hatena Bookmark',
|
||
category: 'Crawler',
|
||
url: 'https://www.hatena.ne.jp/faq/',
|
||
producer: {
|
||
name: 'Hatena Co., Ltd.',
|
||
url: 'https://www.hatena.ne.jp',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RyowlEngine',
|
||
name: 'Ryowl',
|
||
category: 'Crawler',
|
||
url: 'https://ryowl.org',
|
||
},
|
||
{
|
||
includes: 'OdklBot',
|
||
name: 'Odnoklassniki Bot',
|
||
category: 'Crawler',
|
||
url: 'https://odnoklassniki.ru',
|
||
},
|
||
{
|
||
includes: 'Mediatoolkitbot',
|
||
name: 'Mediatoolkit Bot',
|
||
category: 'Crawler',
|
||
url: 'https://mediatoolkit.com',
|
||
},
|
||
{
|
||
includes: 'ZoominfoBot',
|
||
name: 'ZoominfoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.zoominfo.com',
|
||
},
|
||
{
|
||
includes: 'WeViKaBot',
|
||
name: 'WeViKaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.wevika.de',
|
||
},
|
||
{
|
||
includes: 'SEOkicks',
|
||
name: 'SEOkicks',
|
||
category: 'Crawler',
|
||
url: 'https://www.seokicks.de/robot.html',
|
||
producer: {
|
||
name: 'SEOkicks',
|
||
url: 'https://www.seokicks.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Plukkie',
|
||
name: 'Plukkie',
|
||
category: 'Crawler',
|
||
url: 'http://www.botje.com/plukkie.htm',
|
||
},
|
||
{
|
||
includes: 'proximic;',
|
||
name: 'Comscore',
|
||
category: 'Crawler',
|
||
url: 'https://www.comscore.com/Web-Crawler',
|
||
},
|
||
{
|
||
includes: 'SurdotlyBot',
|
||
name: 'SurdotlyBot',
|
||
category: 'Crawler',
|
||
url: 'http://sur.ly/bot.html',
|
||
},
|
||
{
|
||
includes: 'Gowikibot',
|
||
name: 'Gowikibot',
|
||
category: 'Crawler',
|
||
url: 'http:/www.gowikibot.com',
|
||
},
|
||
{
|
||
includes: 'SabsimBot',
|
||
name: 'SabsimBot',
|
||
category: 'Crawler',
|
||
url: 'https://sabsim.com',
|
||
},
|
||
{
|
||
includes: 'LumtelBot',
|
||
name: 'LumtelBot',
|
||
category: 'Crawler',
|
||
url: 'https://lumtel.com',
|
||
},
|
||
{
|
||
includes: 'PiplBot',
|
||
name: 'PiplBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.pipl.com/bot',
|
||
},
|
||
{
|
||
includes: 'woobot',
|
||
name: 'WooRank',
|
||
category: 'Crawler',
|
||
url: 'https://www.woorank.com/bot',
|
||
},
|
||
{
|
||
includes: 'Cookiebot',
|
||
name: 'Cookiebot',
|
||
category: 'Crawler',
|
||
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent',
|
||
producer: {
|
||
name: 'Cybot A/S',
|
||
url: 'https://www.cybot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NetSystemsResearch',
|
||
name: 'NetSystemsResearch',
|
||
category: 'Security Checker',
|
||
url: 'https://www.netsystemsresearch.com/',
|
||
producer: {
|
||
name: 'NET SYSTEMS RESEARCH LLC',
|
||
url: 'https://www.netsystemsresearch.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CensysInspect',
|
||
name: 'CensysInspect',
|
||
category: 'Security Checker',
|
||
url: 'https://about.censys.io/',
|
||
producer: {
|
||
name: 'Censys, Inc.',
|
||
url: 'https://censys.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'gdnplus\\.com',
|
||
name: 'GDNP',
|
||
category: 'Crawler',
|
||
url: 'https://gdnplus.com/',
|
||
producer: {
|
||
name: 'Global Digital Network Plus, LLC',
|
||
url: 'https://gdnplus.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WellKnownBot',
|
||
name: 'WellKnownBot',
|
||
category: 'Crawler',
|
||
url: 'https://well-known.dev',
|
||
},
|
||
{
|
||
includes: 'Adsbot',
|
||
name: 'Adsbot',
|
||
category: 'Crawler',
|
||
url: 'https://seostar.co/robot/',
|
||
},
|
||
{
|
||
includes: 'MTRobot',
|
||
name: 'MTRobot',
|
||
category: 'Crawler',
|
||
url: 'https://metrics-tools.de/robot.html',
|
||
producer: {
|
||
name: 'Metrics Tools',
|
||
url: 'https://metrics-tools.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'serpstatbot',
|
||
name: 'serpstatbot',
|
||
category: 'Crawler',
|
||
url: 'http://serpstatbot.com/',
|
||
producer: {
|
||
name: 'Netpeak Ltd',
|
||
url: 'https://netpeak.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'colly',
|
||
name: 'colly',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/gocolly/colly/',
|
||
},
|
||
{
|
||
includes: 'l9tcpid',
|
||
name: 'l9tcpid',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/LeakIX/l9tcpid',
|
||
},
|
||
{
|
||
includes: 'l9explore',
|
||
name: 'l9explore',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/LeakIX/l9explore',
|
||
},
|
||
{
|
||
regex: 'l9scan/|^Lkx-.*/',
|
||
name: 'LeakIX',
|
||
category: 'Security Checker',
|
||
url: 'https://leakix.net/',
|
||
producer: {
|
||
name: 'BaDaaS SRL',
|
||
url: 'https://leakix.net/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'MegaIndex\\.ru',
|
||
name: 'MegaIndex',
|
||
category: 'Crawler',
|
||
url: 'https://megaindex.com/crawler',
|
||
},
|
||
{
|
||
includes: 'Seekport',
|
||
name: 'Seekport',
|
||
category: 'Crawler',
|
||
url: 'https://bot.seekport.com/',
|
||
producer: {
|
||
name: 'SISTRIX GmbH',
|
||
url: 'https://www.sistrix.de/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Seolyt(?:Bot)?',
|
||
name: 'SeolytBot',
|
||
category: 'Crawler',
|
||
url: 'https://seolyt.com/',
|
||
},
|
||
{
|
||
includes: 'YaK/',
|
||
name: 'YaK',
|
||
category: 'Crawler',
|
||
url: 'https://www.linkfluence.com/',
|
||
producer: {
|
||
name: 'Linkfluence SAS',
|
||
url: 'https://www.linkfluence.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'KomodiaBot',
|
||
name: 'KomodiaBot',
|
||
category: 'Crawler',
|
||
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler',
|
||
producer: {
|
||
name: 'Komodia Inc.',
|
||
url: 'https://www.komodia.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'KStandBot',
|
||
name: 'KStandBot',
|
||
category: 'Crawler',
|
||
url: 'https://url-classification.io/wiki/index.php?title=URL_server_crawler',
|
||
producer: {
|
||
name: 'Komodia Inc.',
|
||
url: 'https://www.komodia.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Neevabot',
|
||
name: 'Neevabot',
|
||
category: 'Search bot',
|
||
url: 'https://neeva.com/neevabot',
|
||
producer: {
|
||
name: 'Neeva Inc.',
|
||
url: 'https://neeva.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Chatwork LinkPreview',
|
||
name: 'Chatwork LinkPreview',
|
||
category: 'Service Agent',
|
||
url: 'https://go.chatwork.com/en/',
|
||
producer: {
|
||
name: 'kubell Co., Ltd.',
|
||
url: 'https://www.kubell.com/en/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LinkPreview',
|
||
name: 'LinkPreview',
|
||
category: 'Service Agent',
|
||
url: 'https://www.linkpreview.net/',
|
||
},
|
||
{
|
||
includes: 'JungleKeyThumbnail',
|
||
name: 'JungleKeyThumbnail',
|
||
category: 'Crawler',
|
||
url: 'https://junglekey.com/',
|
||
},
|
||
{
|
||
regex: 'rocketmonitor(?:bot)?',
|
||
name: 'RocketMonitorBot',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html',
|
||
producer: {
|
||
name: 'Radio Mast, Inc.',
|
||
url: 'https://www.radiomast.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SitemapParser-VIPnytt',
|
||
name: 'SitemapParser-VIPnytt',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/VIPnytt/SitemapParser/',
|
||
},
|
||
{
|
||
regex: '^Turnitin',
|
||
name: 'Turnitin',
|
||
category: 'Crawler',
|
||
url: 'https://turnitin.com/robot/crawlerinfo.html',
|
||
},
|
||
{
|
||
regex: 'DMBrowser|DMBrowser-[UB]V',
|
||
name: 'Dotcom Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.dotcom-monitor.com',
|
||
},
|
||
{
|
||
includes: 'ThinkChaos/',
|
||
name: 'ThinkChaos',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'Thinkbot/',
|
||
name: 'Thinkbot',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'DataForSeoBot',
|
||
name: 'DataForSeoBot',
|
||
category: 'Crawler',
|
||
url: 'https://dataforseo.com/dataforseo-bot',
|
||
},
|
||
{
|
||
includes: 'Discordbot',
|
||
name: 'Discord Bot',
|
||
category: 'Service Agent',
|
||
url: 'https://discordapp.com',
|
||
},
|
||
{
|
||
includes: 'Linespider',
|
||
name: 'Linespider',
|
||
category: 'Crawler',
|
||
url: 'https://lin.ee/4dwXkTH',
|
||
},
|
||
{
|
||
includes: 'Cincraw',
|
||
name: 'Cincraw',
|
||
category: 'Crawler',
|
||
url: 'http://cincrawdata.net/bot/',
|
||
},
|
||
{
|
||
includes: 'CISPA Web Analyzer',
|
||
name: 'CISPA Web Analyzer',
|
||
category: 'Crawler',
|
||
url: 'https://notify.cispa.de/',
|
||
producer: {
|
||
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH',
|
||
url: 'https://cispa.de/en',
|
||
},
|
||
},
|
||
{
|
||
includes: 'IonCrawl',
|
||
name: 'IONOS Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/',
|
||
producer: {
|
||
name: 'IONOS SE',
|
||
url: 'https://www.ionos.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Crawldad',
|
||
name: 'Crawldad',
|
||
category: 'Crawler',
|
||
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972',
|
||
},
|
||
{
|
||
regex: 'https://securitytxt-scan\\.cs\\.hm\\.edu/',
|
||
name: 'security.txt scanserver',
|
||
category: 'Security Checker',
|
||
url: 'https://securitytxt-scan.cs.hm.edu/',
|
||
producer: {
|
||
name: 'Hochschule für angewandte Wissenschaften München',
|
||
url: 'https://www.hm.edu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TigerBot',
|
||
name: 'TigerBot',
|
||
category: 'Crawler',
|
||
url: 'https://tiger.ch/',
|
||
},
|
||
{
|
||
includes: 'TestCrawler',
|
||
name: 'TestCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.comcepta.com/',
|
||
},
|
||
{
|
||
includes: 'CrowdTanglebot',
|
||
name: 'CrowdTangle',
|
||
category: 'Crawler',
|
||
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot',
|
||
producer: {
|
||
name: 'CrowdTangle, Inc.',
|
||
url: 'https://www.crowdtangle.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Sellers\\.Guide Crawler by Primis',
|
||
name: 'Sellers.Guide',
|
||
category: 'Crawler',
|
||
url: 'https://sellers.guide/',
|
||
producer: {
|
||
name: 'McCann Disciplines, Ltd.',
|
||
url: 'https://www.primis.tech/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'OnalyticaBot',
|
||
name: 'Onalytica',
|
||
category: 'Crawler',
|
||
url: 'https://www.airslate.com/bot/explore/onalytica-bot',
|
||
producer: {
|
||
name: 'airSlate, Inc.',
|
||
url: 'https://www.airslate.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'deepnoc',
|
||
name: 'deepnoc',
|
||
category: 'Crawler',
|
||
url: 'https://deepnoc.com/bot',
|
||
producer: {
|
||
name: 'deepnoc, GmbH',
|
||
url: 'https://deepnoc.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Newslitbot',
|
||
name: 'Newslitbot',
|
||
category: 'Crawler',
|
||
url: 'https://www.newslit.co/',
|
||
producer: {
|
||
name: 'Newslit, LLC.',
|
||
url: 'https://www.newslit.co/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'um-(?:ANS|CC|FC|IC|LN)',
|
||
name: 'uMBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.ubermetrics-technologies.com/',
|
||
producer: {
|
||
name: 'Ubermetrics Technologies GmbH',
|
||
url: 'https://www.ubermetrics-technologies.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Abonti',
|
||
name: 'Abonti',
|
||
category: 'Crawler',
|
||
url: 'http://abonti.com/',
|
||
},
|
||
{
|
||
regex: 'collection@infegy\\.com',
|
||
name: 'Infegy',
|
||
category: 'Crawler',
|
||
url: 'https://infegy.com/',
|
||
producer: {
|
||
name: 'Infegy, Inc.',
|
||
url: 'https://infegy.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'HTTP Banner Detection \\(https://security\\.ipip\\.net\\)',
|
||
name: 'IPIP',
|
||
category: 'Security Checker',
|
||
url: 'https://security.ipip.net/',
|
||
producer: {
|
||
name: 'Beijing Tiantexin Tech. Co., Ltd.',
|
||
url: 'https://en.ipip.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ev-crawler',
|
||
name: 'Headline',
|
||
category: 'Crawler',
|
||
url: 'https://headline.com/legal/crawler',
|
||
producer: {
|
||
name: 'e.ventures Managementgesellschaft mbH',
|
||
url: 'https://headline.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'webprosbot',
|
||
name: 'WebPros',
|
||
category: 'Crawler',
|
||
url: 'https://webpros.com/',
|
||
producer: {
|
||
name: 'WebPros Holdco B.V.',
|
||
url: 'https://webpros.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ELB-HealthChecker',
|
||
name: 'Amazon ELB',
|
||
category: 'Site Monitor',
|
||
url: 'https://aws.amazon.com/elasticloadbalancing/',
|
||
producer: {
|
||
name: 'Amazon.com, Inc.',
|
||
url: 'https://www.amazon.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Wheregoes\\.com Redirect Checker',
|
||
name: 'WhereGoes',
|
||
category: 'Crawler',
|
||
url: 'https://wheregoes.com/',
|
||
},
|
||
{
|
||
includes: 'project_patchwatch',
|
||
name: 'Project Patchwatch',
|
||
category: 'Crawler',
|
||
url: 'http://66.240.192.82/',
|
||
},
|
||
{
|
||
includes: 'InternetMeasurement',
|
||
name: 'InternetMeasurement',
|
||
category: 'Crawler',
|
||
url: 'https://internet-measurement.com/',
|
||
},
|
||
{
|
||
includes: 'DomainAppender',
|
||
name: 'DomainAppender',
|
||
category: 'Crawler',
|
||
url: 'https://www.profound.net/product/domain_append/',
|
||
producer: {
|
||
name: 'Profound Networks, LLC',
|
||
url: 'https://www.profound.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'FreeWebMonitoring SiteChecker',
|
||
name: 'FreeWebMonitoring',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.freewebmonitoring.com/bot.html',
|
||
producer: {
|
||
name: 'GreenWave Online, Inc.',
|
||
url: 'http://www.greenwaveonline.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Page Modified Pinger',
|
||
name: 'Page Modified Pinger',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.pagemodified.com/',
|
||
producer: {
|
||
name: 'Valley Hosting, LLC',
|
||
url: 'https://www.pagemodified.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'adstxtlab\\.com',
|
||
name: 'adstxtlab.com',
|
||
category: 'Crawler',
|
||
url: 'https://adstxtlab.com/validator.php',
|
||
producer: {
|
||
name: 'Jaohawi AB',
|
||
url: 'https://adstxtlab.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Iframely',
|
||
name: 'Iframely',
|
||
category: 'Crawler',
|
||
url: 'https://iframely.com/',
|
||
producer: {
|
||
name: 'Itteco Software, Corp.',
|
||
url: 'https://iframely.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DomainStatsBot',
|
||
name: 'DomainStatsBot',
|
||
category: 'Crawler',
|
||
url: 'https://domainstats.com/pages/our-bot',
|
||
producer: {
|
||
name: 'Domainstats Ltd',
|
||
url: 'https://domainstats.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'aiHitBot',
|
||
name: 'aiHitBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.aihitdata.com/about',
|
||
},
|
||
{
|
||
includes: 'DomainCrawler/',
|
||
name: 'DomainCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://domaincrawler.com/about-us/',
|
||
},
|
||
{
|
||
includes: 'DNSResearchBot',
|
||
name: 'DNSResearchBot',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'GitCrawlerBot',
|
||
name: 'GitCrawlerBot',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'AdAuth',
|
||
name: 'AdAuth',
|
||
category: 'Crawler',
|
||
url: 'https://www.adauth.com',
|
||
},
|
||
{
|
||
regex: 'faveeo\\.com',
|
||
name: 'Faveeo',
|
||
category: 'Crawler',
|
||
url: 'http://www.faveeo.com',
|
||
},
|
||
{
|
||
regex: 'kozmonavt\\.',
|
||
name: 'Kozmonavt',
|
||
category: 'Crawler',
|
||
url: 'https://kozmonavt.ml',
|
||
},
|
||
{
|
||
includes: 'CriteoBot/',
|
||
name: 'CriteoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.criteo.com/criteo-crawler/',
|
||
},
|
||
{
|
||
includes: 'PayPal IPN',
|
||
name: 'PayPal IPN',
|
||
category: 'Service Agent',
|
||
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/',
|
||
producer: {
|
||
name: 'PayPal, Inc.',
|
||
url: 'https://www.paypal.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MaCoCu',
|
||
name: 'MaCoCu',
|
||
category: 'Crawler',
|
||
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/',
|
||
producer: {
|
||
name: 'Jožef Stefan Institute',
|
||
url: 'https://www.ijs.si/ijsw/JSI',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CLASSLA',
|
||
name: 'CLASSLA-web',
|
||
category: 'Crawler',
|
||
url: 'https://www.clarin.si/info/classla-web-crawler/',
|
||
producer: {
|
||
name: 'Jožef Stefan Institute',
|
||
url: 'https://www.ijs.si/ijsw/JSI',
|
||
},
|
||
},
|
||
{
|
||
regex: 'dnt-policy@eff\\.org',
|
||
name: 'EFF Do Not Track Verifier',
|
||
category: 'Crawler',
|
||
url: 'https://www.eff.org/issues/do-not-track',
|
||
producer: {
|
||
name: 'Electronic Frontier Foundation',
|
||
url: 'https://www.eff.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'InfoTigerBot',
|
||
name: 'InfoTigerBot',
|
||
category: 'Crawler',
|
||
url: 'https://infotiger.com/bot',
|
||
producer: {
|
||
name: 'Infotiger UG',
|
||
url: 'https://infotiger.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '(?:Birdcrawlerbot|CrawlaDeBot)',
|
||
name: 'Birdcrawlerbot',
|
||
category: 'Crawler',
|
||
url: 'https://crawla.de/de/index.php',
|
||
producer: {
|
||
name: 'Swoppen Systems GmbH',
|
||
url: 'https://www.swoppen.com/de',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ScamadviserExternalHit',
|
||
name: 'Scamadviser External Hit',
|
||
category: 'Crawler',
|
||
url: 'https://www.scamadviser.com/',
|
||
producer: {
|
||
name: 'Ecommerce Operations B.V.',
|
||
url: 'https://www.scamadviser.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ZaldamoSearchBot',
|
||
name: 'Zaldamo',
|
||
category: 'Crawler',
|
||
url: 'https://www.zaldamo.com/search.html',
|
||
producer: {
|
||
name: 'Zaldamo, LLC.',
|
||
url: 'https://www.zaldamo.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AFB',
|
||
name: 'Allloadin Favicon Bot',
|
||
category: 'Crawler',
|
||
url: 'https://allloadin.com/',
|
||
},
|
||
{
|
||
includes: 'LinkWalker',
|
||
name: 'LinkWalker',
|
||
category: 'Crawler',
|
||
url: 'https://www.phishlabs.com/',
|
||
producer: {
|
||
name: 'PhishLabs, Inc.',
|
||
url: 'https://www.phishlabs.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RenovateBot',
|
||
name: 'RenovateBot',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/renovatebot/renovate',
|
||
producer: {
|
||
name: 'White Source Ltd.',
|
||
url: 'https://www.mend.io/free-developer-tools/renovate/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'INETDEX-BOT',
|
||
name: 'Inetdex Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.inetdex.com/',
|
||
},
|
||
{
|
||
includes: 'NETZZAPPEN',
|
||
name: 'NETZZAPPEN',
|
||
category: 'Crawler',
|
||
url: 'https://www.netzzappen.com/',
|
||
producer: {
|
||
name: 'Marc Huemer',
|
||
url: 'https://www.netzzappen.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'panscient\\.com',
|
||
name: 'Panscient',
|
||
category: 'Crawler',
|
||
url: 'https://www.panscient.com/faq.htm',
|
||
producer: {
|
||
name: 'Panscient, Inc.',
|
||
url: 'https://www.panscient.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'research@pdrlabs\\.net',
|
||
name: 'PDR Labs',
|
||
category: 'Security Checker',
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/',
|
||
producer: {
|
||
name: 'PDR Labs',
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Nicecrawler',
|
||
name: 'NiceCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.nicecrawler.com/',
|
||
producer: {
|
||
name: 'Intelium Corp.',
|
||
url: 'https://www.intelium.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 't3versionsBot',
|
||
name: 't3versions',
|
||
category: 'Crawler',
|
||
url: 'https://www.t3versions.com/bot',
|
||
producer: {
|
||
name: 'Torben Hansen',
|
||
url: 'https://www.t3versions.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Crawlson',
|
||
name: 'Crawlson',
|
||
category: 'Crawler',
|
||
url: 'https://www.crawlson.com/about',
|
||
producer: {
|
||
name: 'Crawlson',
|
||
url: 'https://www.crawlson.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'tchelebi',
|
||
name: 'tchelebi',
|
||
category: 'Crawler',
|
||
url: 'https://tchelebi.io/',
|
||
producer: {
|
||
name: 'NormShield, Inc.',
|
||
url: 'https://blackkite.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'JobboerseBot',
|
||
name: 'JobboerseBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.xing.com/jobs',
|
||
producer: {
|
||
name: 'New Work SE',
|
||
url: 'https://www.xing.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RepoLookoutBot',
|
||
name: 'Repo Lookout',
|
||
category: 'Security Checker',
|
||
url: 'https://www.repo-lookout.org/',
|
||
producer: {
|
||
name: 'Crissy Field GmbH',
|
||
url: 'https://www.crissyfield.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PATHspider',
|
||
name: 'PATHspider',
|
||
category: 'Security Checker',
|
||
url: 'https://pathspider.net/',
|
||
producer: {
|
||
name: 'MAMI Project',
|
||
url: 'https://mami-project.eu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'everyfeed-spider',
|
||
name: 'Everyfeed',
|
||
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Exchange check',
|
||
name: 'Exchange check',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/GossiTheDog/scanning',
|
||
producer: {
|
||
name: 'Kevin Beaumont',
|
||
url: 'https://doublepulsar.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Sublinq',
|
||
name: 'Sublinq',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Gregarius',
|
||
name: 'Gregarius',
|
||
category: 'Feed Fetcher',
|
||
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/',
|
||
producer: {
|
||
name: '',
|
||
url: '',
|
||
},
|
||
},
|
||
{
|
||
includes: 'COMODO DCV',
|
||
name: 'COMODO DCV',
|
||
category: 'Service Agent',
|
||
url: 'https://www.comodo.com/',
|
||
producer: {
|
||
name: 'Comodo Security Solutions, Inc.',
|
||
url: 'https://www.comodo.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Sectigo DCV|acme\\.sectigo\\.com',
|
||
name: 'Sectigo DCV',
|
||
category: 'Service Agent',
|
||
url: 'https://sectigo.com/',
|
||
producer: {
|
||
name: 'Sectigo Limited',
|
||
url: 'https://sectigo.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)',
|
||
name: 'KlarnaBot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.klarna.com/klarna-bot/',
|
||
producer: {
|
||
name: 'Klarna Bank AB',
|
||
url: 'https://www.klarna.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Taboolabot',
|
||
name: 'Taboolabot',
|
||
category: 'Crawler',
|
||
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler',
|
||
producer: {
|
||
name: 'Taboola, Inc.',
|
||
url: 'https://www.taboola.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Asana',
|
||
name: 'Asana',
|
||
category: 'Crawler',
|
||
url: 'https://asana.com/',
|
||
producer: {
|
||
name: 'Asana, Inc.',
|
||
url: 'https://asana.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Chrome Privacy Preserving Prefetch Proxy',
|
||
name: 'Chrome Privacy Preserving Prefetch Proxy',
|
||
category: 'Service Agent',
|
||
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/',
|
||
producer: {
|
||
name: 'Google Inc.',
|
||
url: 'https://www.google.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'URLinspectorBot',
|
||
name: 'URLinspector',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.urlinspector.com/bot/',
|
||
producer: {
|
||
name: 'LinkResearchTools GmbH',
|
||
url: 'https://www.linkresearchtools.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'EntferBot',
|
||
name: 'Entfer',
|
||
category: 'Crawler',
|
||
url: 'https://entfer.com/',
|
||
producer: {
|
||
name: 'Entfer Ltd.',
|
||
url: 'https://entfer.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TagInspector',
|
||
name: 'Tag Inspector',
|
||
category: 'Crawler',
|
||
url: 'https://taginspector.com/',
|
||
producer: {
|
||
name: 'InfoTrust, LLC',
|
||
url: 'https://infotrust.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'pageburst',
|
||
name: 'Pageburst',
|
||
category: 'Crawler',
|
||
url: 'https://pageburstls.elsevier.com/',
|
||
producer: {
|
||
name: 'Elsevier Ltd',
|
||
url: 'https://www.elsevier.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '.+diffbot',
|
||
name: 'Diffbot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl',
|
||
producer: {
|
||
name: 'Diffbot Technologies Corp.',
|
||
url: 'https://www.diffbot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DisqusAdstxtCrawler',
|
||
name: 'Disqus',
|
||
category: 'Crawler',
|
||
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide',
|
||
producer: {
|
||
name: 'Disqus, Inc.',
|
||
url: 'https://disqus.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'startmebot',
|
||
name: 'start.me',
|
||
category: 'Crawler',
|
||
url: 'https://about.start.me/',
|
||
producer: {
|
||
name: 'start.me BV',
|
||
url: 'https://about.start.me/',
|
||
},
|
||
},
|
||
{
|
||
includes: '2ip bot',
|
||
name: '2ip',
|
||
category: 'Crawler',
|
||
url: 'https://2ip.io/',
|
||
},
|
||
{
|
||
includes: 'ReqBin Curl Client',
|
||
name: 'ReqBin',
|
||
category: 'Crawler',
|
||
url: 'https://reqbin.com/curl',
|
||
},
|
||
{
|
||
includes: 'XoviBot',
|
||
name: 'XoviBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.xovibot.net',
|
||
producer: {
|
||
name: 'Xovi GmbH',
|
||
url: 'http://www.xovi.de',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Overcast/.+Podcast Sync',
|
||
name: 'Overcast Podcast Sync',
|
||
category: 'Service Agent',
|
||
url: 'https://overcast.fm/podcasterinfo',
|
||
},
|
||
{
|
||
regex: '^Verity',
|
||
name: 'GumGum Verity',
|
||
category: 'Service Agent',
|
||
url: 'https://gumgum.com/verity',
|
||
},
|
||
{
|
||
includes: 'hackermention',
|
||
name: 'hackermention',
|
||
category: 'Feed Reader',
|
||
url: 'https://github.com/snarfed/hackermention',
|
||
},
|
||
{
|
||
includes: 'BitSightBot',
|
||
name: 'BitSight',
|
||
category: 'Security Checker',
|
||
url: 'https://www.bitsight.com/',
|
||
producer: {
|
||
name: 'BitSight Technologies, Inc.',
|
||
url: 'https://www.bitsight.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Ezgif',
|
||
name: 'Ezgif',
|
||
category: 'Service Agent',
|
||
url: 'https://ezgif.com/about',
|
||
},
|
||
{
|
||
regex: 'intelx\\.io_bot',
|
||
name: 'Intelligence X',
|
||
category: 'Crawler',
|
||
url: 'https://intelx.io/',
|
||
producer: {
|
||
name: 'Kleissner Investments s.r.o.',
|
||
url: 'https://intelx.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'FemtosearchBot',
|
||
name: 'Femtosearch',
|
||
category: 'Crawler',
|
||
url: 'http://femtosearch.com/',
|
||
producer: {
|
||
name: 'Grier Forensics, LLC',
|
||
url: 'https://www.grierforensics.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AdsTxtCrawler/',
|
||
name: 'AdsTxtCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler',
|
||
producer: {
|
||
name: 'IAB Technology Laboratory, Inc.',
|
||
url: 'https://iabtechlab.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Morningscore',
|
||
name: 'Morningscore Bot',
|
||
category: 'Crawler',
|
||
url: 'https://morningscore.io/',
|
||
producer: {
|
||
name: 'Morningscore',
|
||
url: 'https://morningscore.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Uptime-Kuma',
|
||
name: 'Uptime-Kuma',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/louislam/uptime-kuma',
|
||
},
|
||
{
|
||
includes: 'OAI-SearchBot',
|
||
name: 'OAI-SearchBot',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: {
|
||
name: 'OpenAI OpCo, LLC',
|
||
url: 'https://openai.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GPTBot',
|
||
name: 'GPTBot',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: {
|
||
name: 'OpenAI OpCo, LLC',
|
||
url: 'https://openai.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ChatGPT-User',
|
||
name: 'ChatGPT-User',
|
||
category: 'Crawler',
|
||
url: 'https://platform.openai.com/docs/bots',
|
||
producer: {
|
||
name: 'OpenAI OpCo, LLC',
|
||
url: 'https://openai.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BrightEdge Crawler',
|
||
name: 'BrightEdge',
|
||
category: 'Crawler',
|
||
url: 'https://www.brightedge.com/',
|
||
producer: {
|
||
name: 'BrightEdge Technologies, Inc',
|
||
url: 'https://www.brightedge.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'sfFeedReader',
|
||
name: 'sfFeedReader',
|
||
url: 'https://github.com/diem-project/sfFeed2Plugin',
|
||
category: 'Feed Fetcher',
|
||
},
|
||
{
|
||
regex: 'cyberscan\\.io',
|
||
name: 'Cyberscan',
|
||
category: 'Security Checker',
|
||
url: 'https://www.cyberscan.io/',
|
||
producer: {
|
||
name: 'DGC Verwaltungs GmbH',
|
||
url: 'https://dgc.org/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'researchscan\\.comsys\\.rwth-aachen\\.de',
|
||
name: 'Research Scan',
|
||
category: 'Crawler',
|
||
url: 'http://researchscan.comsys.rwth-aachen.de/',
|
||
producer: {
|
||
name: 'RWTH Aachen University',
|
||
url: 'https://www.comsys.rwth-aachen.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'newspaper',
|
||
name: 'Scraping Robot',
|
||
category: 'Crawler',
|
||
url: 'https://scrapingrobot.com/',
|
||
producer: {
|
||
name: 'Sprious LLC',
|
||
url: 'https://sprious.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Ant(?:\\.com beta|Bot)',
|
||
name: 'Ant',
|
||
category: 'Crawler',
|
||
url: 'https://www.ant.com/',
|
||
producer: {
|
||
name: 'Ant.com Ltd.',
|
||
url: 'https://www.ant.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WebwikiBot',
|
||
name: 'Webwiki',
|
||
category: 'Crawler',
|
||
url: 'https://www.webwiki.com/',
|
||
producer: {
|
||
name: 'webwiki GmbH',
|
||
url: 'https://www.webwiki.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'phpMyAdmin',
|
||
name: 'phpMyAdmin',
|
||
category: 'Service Agent',
|
||
url: 'https://www.phpmyadmin.net/',
|
||
},
|
||
{
|
||
regex: 'Matomo/[\\d.]+',
|
||
name: 'Matomo',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/matomo-org/matomo',
|
||
producer: {
|
||
name: 'InnoCraft Ltd',
|
||
url: 'https://matomo.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Prometheus',
|
||
name: 'Prometheus',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/prometheus/prometheus',
|
||
producer: {
|
||
name: 'The Linux Foundation',
|
||
url: 'https://www.cncf.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ArchiveTeam ArchiveBot',
|
||
name: 'ArchiveBot',
|
||
category: 'Crawler',
|
||
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot',
|
||
producer: {
|
||
name: 'ArchiveTeam',
|
||
url: 'https://wiki.archiveteam.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MADBbot',
|
||
name: 'MADBbot',
|
||
category: 'Crawler',
|
||
url: 'https://madb.zapto.org/bot.html',
|
||
},
|
||
{
|
||
includes: 'MeltwaterNews',
|
||
name: 'MeltwaterNews',
|
||
category: 'Crawler',
|
||
producer: {
|
||
name: 'Meltwater Deutschland GmbH',
|
||
url: 'https://www.meltwater.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'owler',
|
||
name: 'OWLer',
|
||
category: 'Crawler',
|
||
url: 'https://openwebsearch.eu/owler/',
|
||
producer: {
|
||
name: 'Open Search Foundation e.V.',
|
||
url: 'https://openwebsearch.eu/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'bbc\\.co\\.uk/display/men/Page\\+Monitor',
|
||
name: 'BBC Page Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor',
|
||
producer: {
|
||
name: 'BBC',
|
||
url: 'https://www.bbc.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'BBC-Forge-URL-Monitor-Twisted',
|
||
name: 'BBC Forge URL Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.bbc.com/',
|
||
producer: {
|
||
name: 'BBC',
|
||
url: 'https://www.bbc.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ClaudeBot',
|
||
name: 'ClaudeBot',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/ClaudeBot/ClaudeBot',
|
||
},
|
||
{
|
||
includes: 'Imagesift',
|
||
name: 'ImageSift',
|
||
category: 'Crawler',
|
||
url: 'https://imagesift.com/',
|
||
producer: {
|
||
name: 'Castle Global, Inc.',
|
||
url: 'https://thehive.ai/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TactiScout',
|
||
name: 'TactiScout',
|
||
category: 'Crawler',
|
||
url: 'https://find-it.world/TempCrawl/Crawltheque.php',
|
||
producer: {
|
||
name: 'Tactikast',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Brightbot',
|
||
name: 'BrightBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.brightbot.app/',
|
||
producer: {
|
||
name: 'Bright Interactive Ltd',
|
||
url: 'https://www.builtbybright.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DaspeedBot',
|
||
name: 'DaspeedBot',
|
||
category: 'Crawler',
|
||
url: 'https://daspeed.io/',
|
||
producer: {
|
||
name: 'DAWAP SARL',
|
||
url: 'https://dawap.fr/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'StractBot',
|
||
name: 'Stract',
|
||
category: 'Crawler',
|
||
url: 'https://stract.com/webmasters',
|
||
producer: {
|
||
name: 'Stract',
|
||
url: 'https://github.com/StractOrg/stract/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GeedoBot',
|
||
name: 'GeedoBot',
|
||
category: 'Crawler',
|
||
url: 'https://geedo.com/bot/',
|
||
},
|
||
{
|
||
includes: 'GeedoProductSearch',
|
||
name: 'GeedoProductSearch',
|
||
category: 'Crawler',
|
||
url: 'https://geedo.com/product-search/',
|
||
},
|
||
{
|
||
includes: 'BackupLand',
|
||
name: 'BackupLand',
|
||
category: 'Crawler',
|
||
url: 'https://go.backupland.com/',
|
||
producer: {
|
||
name: 'ООО «КВАРТА»',
|
||
url: 'https://go.backupland.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Konturbot',
|
||
name: 'Konturbot',
|
||
category: 'Crawler',
|
||
url: 'https://kontur.ru/',
|
||
producer: {
|
||
name: 'АО «ПФ «СКБ Контур»',
|
||
url: 'https://kontur.ru/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'keys-so-bot',
|
||
name: 'Keys.so',
|
||
category: 'Crawler',
|
||
url: 'https://www.keys.so/',
|
||
producer: {
|
||
name: 'ООО «МОДЕСКО»',
|
||
url: 'https://www.modesco.ru/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LetsearchBot',
|
||
name: 'LetSearch',
|
||
category: 'Crawler',
|
||
url: 'https://letsearch.ru/bots',
|
||
},
|
||
{
|
||
includes: 'Example3',
|
||
name: 'Example3',
|
||
category: 'Crawler',
|
||
url: 'https://www.example3.com/',
|
||
},
|
||
{
|
||
includes: 'StatOnlineRuBot',
|
||
name: 'StatOnline.ru',
|
||
category: 'Crawler',
|
||
url: 'https://statonline.ru/',
|
||
producer: {
|
||
name: 'ООО «Регистратор доменных имен РЕГ.РУ»',
|
||
url: 'https://statonline.ru/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Spawning-AI',
|
||
name: 'Spawning AI',
|
||
category: 'Crawler',
|
||
url: 'https://spawning.ai/',
|
||
producer: {
|
||
name: 'Spawning, Inc',
|
||
url: 'https://spawning.ai/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'domain research project',
|
||
name: 'Domain Research Project',
|
||
category: 'Crawler',
|
||
url: 'https://trentwil.es/domains.html',
|
||
producer: {
|
||
name: 'Trent Wiles',
|
||
url: 'https://trentwil.es/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'getodin\\.com',
|
||
name: 'Odin',
|
||
category: 'Security Checker',
|
||
url: 'https://docs.getodin.com/',
|
||
producer: {
|
||
name: 'Cyble Inc.',
|
||
url: 'https://cyble.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'YouBot',
|
||
name: 'YouBot',
|
||
category: 'Crawler',
|
||
url: 'https://about.you.com/youbot/',
|
||
producer: {
|
||
name: 'SuSea, Inc.',
|
||
url: 'https://you.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SiteScoreBot',
|
||
name: 'SiteScore',
|
||
category: 'Crawler',
|
||
url: 'https://sitescore.ai/',
|
||
},
|
||
{
|
||
includes: 'MBCrawler',
|
||
name: 'Monitor Backlinks',
|
||
category: 'Crawler',
|
||
url: 'https://www.seoptimer.com/monitor-backlinks/',
|
||
producer: {
|
||
name: 'SEOptimer',
|
||
url: 'https://www.seoptimer.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'mariadb-mysql-kbs-bot',
|
||
name: 'MariaDB/MySQL Knowledge Base',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/williamdes/mariadb-mysql-kbs',
|
||
producer: {
|
||
name: 'WDES SAS',
|
||
url: 'https://wdes.fr/en/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GitHubCopilotChat',
|
||
name: 'GitHubCopilotChat',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/aaamoon/copilot-gpt4-service',
|
||
},
|
||
{
|
||
regex: '^pdrl\\.fm',
|
||
name: 'Podroll Analyzer',
|
||
category: 'Crawler',
|
||
url: 'https://podroll.fm',
|
||
},
|
||
{
|
||
includes: 'PodUptime/',
|
||
name: 'PodUptime',
|
||
category: 'Site Monitor',
|
||
url: 'https://poduptime.com',
|
||
},
|
||
{
|
||
includes: 'anthropic-ai',
|
||
name: 'Anthropic AI',
|
||
category: 'Crawler',
|
||
url: 'https://www.anthropic.com/',
|
||
producer: {
|
||
name: 'Anthropic, PBC',
|
||
url: 'https://www.anthropic.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NetpeakCheckerBot',
|
||
name: 'Netpeak Checker',
|
||
category: 'Crawler',
|
||
url: 'https://netpeaksoftware.com/checker',
|
||
producer: {
|
||
name: 'Netpeak LTD',
|
||
url: 'https://netpeaksoftware.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SandobaCrawler',
|
||
name: 'Sandoba//Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.sandoba.com/en/crawler/',
|
||
producer: {
|
||
name: 'SANDOBA//EBUSINESS SOLUTIONS',
|
||
url: 'https://www.sandoba.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SirdataBot',
|
||
name: 'Sirdata',
|
||
category: 'Crawler',
|
||
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction',
|
||
producer: {
|
||
name: 'Sirdata SAS',
|
||
url: 'https://www.sirdata.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CheckMarkNetwork',
|
||
name: 'CheckMark Network',
|
||
category: 'Crawler',
|
||
url: 'https://www.checkmarknetwork.com/spider.html/',
|
||
producer: {
|
||
name: 'Exipert, Inc.',
|
||
url: 'https://www.checkmarknetwork.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'cohere-ai',
|
||
name: 'Cohere AI',
|
||
category: 'Crawler',
|
||
url: 'https://cohere.com/',
|
||
producer: {
|
||
name: 'Cohere, Inc.',
|
||
url: 'https://cohere.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'PerplexityBot',
|
||
name: 'PerplexityBot',
|
||
category: 'Crawler',
|
||
url: 'https://docs.perplexity.ai/guides/bots',
|
||
producer: {
|
||
name: 'Perplexity AI, Inc.',
|
||
url: 'https://www.perplexity.ai/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Perplexity-User',
|
||
name: 'Perplexity-User',
|
||
category: 'Crawler',
|
||
url: 'https://docs.perplexity.ai/guides/bots',
|
||
producer: {
|
||
name: 'Perplexity AI, Inc.',
|
||
url: 'https://www.perplexity.ai/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TTD-Content',
|
||
name: 'The Trade Desk Content',
|
||
category: 'Crawler',
|
||
url: 'https://www.thetradedesk.com/us/ttd-content',
|
||
producer: {
|
||
name: 'The Trade Desk, Inc.',
|
||
url: 'https://www.thetradedesk.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'montastic-monitor',
|
||
name: 'Montastic Monitor',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.montastic.com/',
|
||
producer: {
|
||
name: 'Metadot, Corp.',
|
||
url: 'https://www.metadot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Ruby, Twurly v',
|
||
name: 'Twurly',
|
||
category: 'Crawler',
|
||
url: 'https://twurly.org/',
|
||
},
|
||
{
|
||
regex: 'Mixnode(?:Cache)?',
|
||
name: 'Mixnode',
|
||
category: 'Crawler',
|
||
url: 'https://www.mixnode.com/',
|
||
producer: {
|
||
name: 'Mixnode Technologies, Inc.',
|
||
url: 'https://www.mixnode.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CSSCheck',
|
||
name: 'CSSCheck',
|
||
category: 'Validator',
|
||
},
|
||
{
|
||
includes: 'MicrosoftPreview',
|
||
name: 'Microsoft Preview',
|
||
category: 'Service Agent',
|
||
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'https://www.microsoft.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 's~virustotalcloud',
|
||
name: 'VirusTotal Cloud',
|
||
category: 'Crawler',
|
||
url: 'https://www.virustotal.com/',
|
||
producer: {
|
||
name: 'Chronicle Security Ireland Limited',
|
||
url: 'https://chronicle.security/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TinEye',
|
||
name: 'TinEye',
|
||
category: 'Crawler',
|
||
url: 'https://tineye.com/',
|
||
producer: {
|
||
name: 'Idée, Inc.',
|
||
url: 'https://tineye.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'e~arsnova-filter-system',
|
||
name: 'ARSNova Filter System',
|
||
category: 'Crawler',
|
||
url: 'https://particify.de/en/',
|
||
producer: {
|
||
name: 'Particify Gerhardt & Weingarten OHG',
|
||
url: 'https://particify.de/en/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'botify',
|
||
name: 'Botify',
|
||
category: 'Crawler',
|
||
url: 'https://www.botify.com/',
|
||
producer: {
|
||
name: 'BOTIFY SAS',
|
||
url: 'https://www.botify.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'adscanner',
|
||
name: 'Adscanner',
|
||
category: 'Crawler',
|
||
url: 'https://www.alleyesonscreens.com/',
|
||
producer: {
|
||
name: 'AdScanner d.o.o',
|
||
url: 'https://www.alleyesonscreens.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'online-webceo-bot',
|
||
name: 'WebCEO',
|
||
category: 'Crawler',
|
||
url: 'https://www.webceo.com/',
|
||
producer: {
|
||
name: 'WebCEO, LLC',
|
||
url: 'https://www.webceo.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NetTrack',
|
||
name: 'NetTrack',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/',
|
||
},
|
||
{
|
||
includes: 'htmlyse',
|
||
name: 'htmlyse',
|
||
category: 'Crawler',
|
||
url: 'https://www.htmlyse.com/',
|
||
producer: {
|
||
name: 'Vistex LTD',
|
||
url: 'https://www.htmlyse.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TrendsmapResolver',
|
||
name: 'Trendsmap',
|
||
category: 'Crawler',
|
||
url: 'https://www.trendsmap.com/',
|
||
producer: {
|
||
name: 'Trendsmap Pty Ltd',
|
||
url: 'https://www.trendsmap.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Shareaholic(?:bot)?',
|
||
name: 'Steve Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.shareaholic.com/steve',
|
||
producer: {
|
||
name: 'Shareaholic, Inc.',
|
||
url: 'https://www.shareaholic.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'keycdn-tools:',
|
||
name: 'KeyCDN Tools',
|
||
category: 'Service Agent',
|
||
url: 'https://tools.keycdn.com/geo',
|
||
},
|
||
{
|
||
includes: 'keycdn-tools/',
|
||
name: 'KeyCDN Tools',
|
||
category: 'Service Agent',
|
||
url: 'https://tools.keycdn.com/',
|
||
producer: {
|
||
name: 'proinity LLC',
|
||
url: 'https://www.keycdn.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Arquivo-web-crawler',
|
||
name: 'Arquivo.pt',
|
||
category: 'Crawler',
|
||
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/',
|
||
producer: {
|
||
name: 'FCT|FCCN',
|
||
url: 'https://www.fct.pt/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'WhatsMyIP\\.org',
|
||
name: 'WhatsMyIP.org',
|
||
category: 'Service Agent',
|
||
url: 'https://www.whatsmyip.org/ua/',
|
||
},
|
||
{
|
||
includes: 'SenutoBot',
|
||
name: 'Senuto',
|
||
category: 'Crawler',
|
||
url: 'https://www.senuto.com/',
|
||
producer: {
|
||
name: 'Senuto Sp. z o.o.',
|
||
url: 'https://www.senuto.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'GozleBot',
|
||
name: 'Gozle',
|
||
category: 'Crawler',
|
||
url: 'https://gozle.com.tm/en/blog/post/1',
|
||
producer: {
|
||
name: 'Doly Horjun HJ',
|
||
url: 'https://gozle.com.tm/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Quantcastbot',
|
||
name: 'Quantcast',
|
||
category: 'Crawler',
|
||
url: 'https://www.quantcast.com/bot/',
|
||
producer: {
|
||
name: 'Quantcast Corp.',
|
||
url: 'https://www.quantcast.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'FontRadar',
|
||
name: 'FontRadar',
|
||
category: 'Crawler',
|
||
url: 'https://www.fontradar.com/',
|
||
producer: {
|
||
name: 'EMDASH SAS',
|
||
url: 'https://www.fontradar.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ViberUrlDownloader',
|
||
name: 'Viber Url Downloader',
|
||
category: 'Service Agent',
|
||
url: 'https://www.viber.com/',
|
||
producer: {
|
||
name: 'Viber Media S.à r.l.',
|
||
url: 'https://www.viber.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '^Zeno$',
|
||
name: 'Zeno',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/internetarchive/Zeno',
|
||
producer: {
|
||
name: 'The Internet Archive',
|
||
url: 'https://archive.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Barracuda Sentinel',
|
||
name: 'Barracuda Sentinel',
|
||
category: 'Service Agent',
|
||
url: 'https://sentinel.barracudanetworks.com/',
|
||
producer: {
|
||
name: 'Barracuda Networks, Inc.',
|
||
url: 'https://www.barracudanetworks.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RuxitSynthetic',
|
||
name: 'RuxitSynthetic',
|
||
category: 'Site Monitor',
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164',
|
||
producer: {
|
||
name: 'Dynatrace LLC',
|
||
url: 'https://www.dynatrace.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DynatraceSynthetic',
|
||
name: 'DynatraceSynthetic',
|
||
category: 'Site Monitor',
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164',
|
||
producer: {
|
||
name: 'Dynatrace LLC',
|
||
url: 'https://www.dynatrace.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'sitebulb',
|
||
name: 'Sitebulb',
|
||
category: 'Crawler',
|
||
url: 'https://sitebulb.com/',
|
||
producer: {
|
||
name: 'Sitebulb Limited',
|
||
url: 'https://sitebulb.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Monsidobot',
|
||
name: 'Monsidobot',
|
||
category: 'Crawler',
|
||
url: 'https://monsido.com/bot-html',
|
||
producer: {
|
||
name: 'Monsido LLC',
|
||
url: 'https://monsido.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'AccompanyBot',
|
||
name: 'AccompanyBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.accompany.com/',
|
||
producer: {
|
||
name: 'Accompani, Inc',
|
||
url: 'https://www.accompany.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Ghost Inspector',
|
||
name: 'Ghost Inspector',
|
||
category: 'Site Monitor',
|
||
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site',
|
||
producer: {
|
||
name: 'Ghost Inspector, Inc.',
|
||
url: 'https://www.ghostinspector.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Google-Apps-Script',
|
||
name: 'Google Apps Script',
|
||
category: 'Service Agent',
|
||
url: 'https://www.google.com/script/start/',
|
||
},
|
||
{
|
||
includes: 'SiteOne-Crawler',
|
||
name: 'SiteOne Crawler',
|
||
category: 'Crawler',
|
||
url: 'https://crawler.siteone.io/bot/',
|
||
producer: {
|
||
name: 'SiteOne s.r.o.',
|
||
url: 'https://www.siteone.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Detectify',
|
||
name: 'Detectify',
|
||
category: 'Security Checker',
|
||
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site',
|
||
producer: {
|
||
name: 'Detectify AB',
|
||
url: 'https://detectify.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'DomCopBot',
|
||
name: 'DomCop Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.domcop.com/bot',
|
||
producer: {
|
||
name: 'Axeman Technology Solutions LLP',
|
||
url: 'https://axemantech.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Paqlebot',
|
||
name: 'Paqlebot',
|
||
category: 'Crawler',
|
||
url: 'https://www.paqle.dk/about/paqlebot',
|
||
producer: {
|
||
name: 'Paqle A/S',
|
||
url: 'https://www.paqle.dk/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Wibybot',
|
||
name: 'Wibybot',
|
||
category: 'Crawler',
|
||
url: 'https://www.wiby.me/',
|
||
},
|
||
{
|
||
includes: 'Synapse',
|
||
name: 'Synapse',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/matrix-org/synapse',
|
||
},
|
||
{
|
||
includes: 'OSZKbot',
|
||
name: 'OSZKbot',
|
||
category: 'Crawler',
|
||
url: 'http://mekosztaly.oszk.hu/mia/',
|
||
producer: {
|
||
name: 'National Szechenyi Library',
|
||
url: 'https://webarchivum.oszk.hu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ZoomBot',
|
||
name: 'ZoomBot',
|
||
category: 'Crawler',
|
||
url: 'https://suite.seozoom.it/bot.html',
|
||
producer: {
|
||
name: 'SEO Cube S.r.l.',
|
||
url: 'https://www.seocube.it/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RavenCrawler',
|
||
name: 'RavenCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://raventools.com/site-auditor/',
|
||
producer: {
|
||
name: 'TapClicks, Inc.',
|
||
url: 'https://www.tapclicks.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'KadoBot',
|
||
name: 'KadoBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.kadolijst.nl/bot',
|
||
producer: {
|
||
name: 'Kadolijst',
|
||
url: 'https://www.kadolijst.nl/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Dubbotbot',
|
||
name: 'Dubbotbot',
|
||
category: 'Crawler',
|
||
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent',
|
||
producer: {
|
||
name: 'DubBot',
|
||
url: 'https://dubbot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Swiftbot',
|
||
name: 'Swiftbot',
|
||
category: 'Crawler',
|
||
url: 'https://swiftype.com/swiftbot',
|
||
producer: {
|
||
name: 'Elasticsearch, B.V.',
|
||
url: 'https://www.elastic.co/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'EyeMonIT',
|
||
name: 'EyeMonit',
|
||
category: 'Site Monitor',
|
||
url: 'https://eyemonit.com/',
|
||
producer: {
|
||
name: 'EyeMonit',
|
||
url: 'https://eyemonit.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ThousandEyes',
|
||
name: 'ThousandEyes',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.thousandeyes.com/',
|
||
producer: {
|
||
name: 'Cisco Systems, Inc.',
|
||
url: 'https://www.cisco.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'OmtrBot',
|
||
name: 'OmtrBot',
|
||
category: 'Site Monitor',
|
||
},
|
||
{
|
||
includes: 'WebMon',
|
||
name: 'WebMon',
|
||
category: 'Site Monitor',
|
||
},
|
||
{
|
||
includes: 'AdsTxtCrawlerTP',
|
||
name: 'AdsTxtCrawlerTP',
|
||
category: 'Crawler',
|
||
},
|
||
{
|
||
includes: 'fragFINN',
|
||
name: 'fragFINN',
|
||
category: 'Crawler',
|
||
url: 'https://www.fragfinn.de/',
|
||
producer: {
|
||
name: 'fragFINN e.V.',
|
||
url: 'https://www.fragfinn.de/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Clickagy',
|
||
name: 'Clickagy',
|
||
category: 'Crawler',
|
||
url: 'https://www.clickagy.com/',
|
||
producer: {
|
||
name: 'Clickagy, LLC',
|
||
url: 'https://www.clickagy.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'kiwitcms-gitops',
|
||
name: 'Kiwi TCMS GitOps',
|
||
category: 'Service Agent',
|
||
url: 'https://kiwitcms.org',
|
||
producer: {
|
||
name: 'Open Technologies Bulgaria, Ltd.',
|
||
url: 'https://kiwitcms.org',
|
||
},
|
||
},
|
||
{
|
||
includes: 'webtru_crawler',
|
||
name: 'webtru',
|
||
category: 'Crawler',
|
||
url: 'https://webtru.io/',
|
||
producer: {
|
||
name: 'DataSign Inc.',
|
||
url: 'https://datasign.jp/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'URLSuMaBot',
|
||
name: 'URLSuMaBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.urlsuma.de/',
|
||
},
|
||
{
|
||
includes: '360JK yunjiankong',
|
||
name: '360JK',
|
||
category: 'Site Monitor',
|
||
url: 'http://jk.cloud.360.cn/',
|
||
producer: {
|
||
name: '360 Security Technology Inc.',
|
||
url: 'https://www.360.cn/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'UCSBNetworkMeasurement',
|
||
name: 'UCSB Network Measurement',
|
||
category: 'Crawler',
|
||
url: 'https://www.it.ucsb.edu/',
|
||
producer: {
|
||
name: 'University of California, Santa Barbara',
|
||
url: 'https://www.it.ucsb.edu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Plesk screenshot bot',
|
||
name: 'Plesk Screenshot Service',
|
||
category: 'Service Agent',
|
||
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service',
|
||
producer: {
|
||
name: 'Plesk International GmbH',
|
||
url: 'https://www.plesk.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Who\\.is',
|
||
name: 'Who.is Bot',
|
||
category: 'Crawler',
|
||
url: 'https://who.is/',
|
||
},
|
||
{
|
||
includes: 'Probely',
|
||
name: 'Probely',
|
||
category: 'Security Checker',
|
||
url: 'https://probely.com/sos/',
|
||
producer: {
|
||
name: 'Probely - Soluções de Cibersegurança, S.A.',
|
||
url: 'https://probely.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Uptimia',
|
||
name: 'Uptimia',
|
||
category: 'Site Monitor',
|
||
url: 'https://www.uptimia.com/',
|
||
producer: {
|
||
name: 'JJ Online GmbH',
|
||
url: 'https://www.uptimia.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: '2GDPR',
|
||
name: '2GDPR',
|
||
category: 'Service Agent',
|
||
url: 'https://2gdpr.com/tos',
|
||
producer: {
|
||
name: '2GDPR',
|
||
url: 'https://2gdpr.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'abuse\\.xmco\\.fr',
|
||
name: 'Serenety',
|
||
category: 'Security Checker',
|
||
url: 'https://abuse.xmco.fr/',
|
||
producer: {
|
||
name: 'XMCO, SASU',
|
||
url: 'https://www.xmco.fr/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CheckHost',
|
||
name: 'CheckHost',
|
||
category: 'Site Monitor',
|
||
url: 'https://check-host.net/',
|
||
producer: {
|
||
name: 'CheckHost',
|
||
url: 'https://check-host.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LAC_IAHarvester',
|
||
name: 'LAC IA Harvester',
|
||
category: 'Crawler',
|
||
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx',
|
||
producer: {
|
||
name: 'Library and Archives Canada',
|
||
url: 'https://library-archives.canada.ca/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'InsytfulBot',
|
||
name: 'InsytfulBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.insytful.com/',
|
||
producer: {
|
||
name: 'Zengenti Limited',
|
||
url: 'https://www.zengenti.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'statista\\.com',
|
||
name: 'Statista',
|
||
category: 'Crawler',
|
||
url: 'https://www.statista.com/',
|
||
producer: {
|
||
name: 'Statista, Inc.',
|
||
url: 'https://www.statista.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SubstackContentFetch',
|
||
name: 'Substack Content Fetch',
|
||
category: 'Crawler',
|
||
url: 'https://substack.com/',
|
||
producer: {
|
||
name: 'Substack, Inc.',
|
||
url: 'https://substack.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: '^ds9',
|
||
name: 'Deep SEARCH 9',
|
||
category: 'Crawler',
|
||
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/',
|
||
producer: {
|
||
name: 'Copyright Clearance Center, Inc.',
|
||
url: 'https://www.copyright.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'LiveJournal\\.com',
|
||
name: 'LiveJournal',
|
||
url: 'https://www.livejournal.com/',
|
||
category: 'Feed Fetcher',
|
||
producer: {
|
||
name: 'ООО "СИМ"',
|
||
url: 'https://www.livejournal.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'bitdiscovery',
|
||
name: 'Tenable.asm',
|
||
category: 'Security Checker',
|
||
url: 'https://bitdiscovery.com/',
|
||
producer: {
|
||
name: 'Tenable, Inc.',
|
||
url: 'https://www.tenable.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Castopod',
|
||
name: 'Castopod',
|
||
category: 'Crawler',
|
||
url: 'https://www.castopod.org/',
|
||
},
|
||
{
|
||
includes: 'Elastic/Synthetics',
|
||
name: 'Elastic Synthetics',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/elastic/synthetics',
|
||
producer: {
|
||
name: 'Elasticsearch B.V.',
|
||
url: 'https://www.elastic.co/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WDG_Validator',
|
||
name: 'WDG HTML Validator',
|
||
category: 'Validator',
|
||
url: 'http://www.htmlhelp.com/tools/validator/',
|
||
},
|
||
{
|
||
regex: 'scan@aegis.network',
|
||
name: 'Aegis',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/',
|
||
},
|
||
{
|
||
includes: 'CrawlyProjectCrawler',
|
||
name: 'Crawly Project',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/',
|
||
},
|
||
{
|
||
includes: 'BDFetch',
|
||
name: 'BDFetch',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/',
|
||
},
|
||
{
|
||
includes: 'PunkMap',
|
||
name: 'Punk Map',
|
||
category: 'Security Checker',
|
||
url: 'https://github.com/openeasm/punkmap',
|
||
},
|
||
{
|
||
includes: 'GenomeCrawlerd',
|
||
name: 'Deepfield Genome',
|
||
category: 'Crawler',
|
||
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/',
|
||
producer: {
|
||
name: 'Nokia Corporation',
|
||
url: 'https://www.nokia.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Gaisbot',
|
||
name: 'Gaisbot',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php',
|
||
},
|
||
{
|
||
includes: 'FAST-WebCrawler',
|
||
name: 'AlltheWeb',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler',
|
||
},
|
||
{
|
||
regex: 'ducks\\.party',
|
||
name: 'ducks.party',
|
||
category: 'Security Checker',
|
||
url: 'https://ducks.party/',
|
||
},
|
||
{
|
||
includes: 'DepSpid',
|
||
name: 'DepSpid',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/',
|
||
},
|
||
{
|
||
regex: 'Website-info\\.net',
|
||
name: 'Website-info',
|
||
category: 'Crawler',
|
||
url: 'https://website-info.net/robot',
|
||
producer: {
|
||
name: 'Meins und Vogel GmbH',
|
||
url: 'https://muv.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'RedekenBot',
|
||
name: 'RedekenBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.redeken.com/en/help/bot.html',
|
||
producer: {
|
||
name: 'Redeken',
|
||
url: 'https://www.redeken.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'semaltbot',
|
||
name: 'semaltbot',
|
||
category: 'Crawler',
|
||
url: 'https://semalt.net/',
|
||
producer: {
|
||
name: 'Semalt LP',
|
||
url: 'https://semalt.net/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MakeMerryBot',
|
||
name: 'MakeMerryBot',
|
||
category: 'Crawler',
|
||
url: 'https://makemerry.app/bots',
|
||
},
|
||
{
|
||
includes: 'Timpibot',
|
||
name: 'Timpibot',
|
||
category: 'Crawler',
|
||
url: 'https://timpi.io/',
|
||
producer: {
|
||
name: 'Timpi Inc.',
|
||
url: 'https://timpi.io/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Validbot',
|
||
name: 'ValidBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.validbot.com/',
|
||
producer: {
|
||
name: 'Jake Olefsky LLC',
|
||
url: 'https://www.validbot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'NPBot',
|
||
name: 'NameProtectBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.cscglobal.com/cscglobal/home/',
|
||
producer: {
|
||
name: 'NameProtect, Inc.',
|
||
url: 'https://www.cscglobal.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'domaincodex\\.com',
|
||
name: 'Domain Codex',
|
||
category: 'Crawler',
|
||
url: 'https://www.domaincodex.com/',
|
||
producer: {
|
||
name: 'Erie Data Systems, LLC',
|
||
url: 'https://www.eriedatasys.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Swisscows Favicons',
|
||
name: 'Swisscows Favicons',
|
||
category: 'Crawler',
|
||
url: 'https://swisscows.com/',
|
||
producer: {
|
||
name: 'Swisscows AG',
|
||
url: 'https://swisscows.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'leak\\.info',
|
||
name: 'leak.info',
|
||
category: 'Crawler',
|
||
url: 'http://www.leak.info/',
|
||
},
|
||
{
|
||
includes: 'workona',
|
||
name: 'Workona',
|
||
category: 'Crawler',
|
||
url: 'https://workona.com/',
|
||
producer: {
|
||
name: 'Workona, Inc.',
|
||
url: 'https://workona.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Bloglines',
|
||
name: 'Bloglines',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20140309033202/http://www.bloglines.com/',
|
||
producer: {
|
||
name: 'Reply!, Inc.',
|
||
url: 'https://www.reply.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'heritrix',
|
||
name: 'Heritrix',
|
||
category: 'Crawler',
|
||
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix',
|
||
producer: {
|
||
name: 'The Internet Archive',
|
||
url: 'https://archive.org',
|
||
},
|
||
},
|
||
{
|
||
regex: 'search\\.marginalia\\.nu',
|
||
name: 'Marginalia',
|
||
category: 'Crawler',
|
||
url: 'https://www.marginalia.nu/marginalia-search/for-webmasters/',
|
||
producer: {
|
||
name: 'Marginalia',
|
||
url: 'https://www.marginalia.nu/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'vu-server-health-scanner',
|
||
name: 'VU Server Health Scanner',
|
||
category: 'Security Checker',
|
||
url: 'https://130.37.198.75/index.html',
|
||
producer: {
|
||
name: 'VU Amsterdam',
|
||
url: 'https://vu.nl/en',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Functionize',
|
||
name: 'Functionize',
|
||
category: 'Crawler',
|
||
url: 'https://www.functionize.com/',
|
||
producer: {
|
||
name: 'Functionize, Inc.',
|
||
url: 'https://www.functionize.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Prerender',
|
||
name: 'Prerender',
|
||
category: 'Crawler',
|
||
url: 'https://docs.prerender.io/docs/33-overview-of-prerender-crawlers',
|
||
producer: {
|
||
name: 'saas.group Inc.',
|
||
url: 'https://saas.group/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'bl\\.uk_ldfc_bot',
|
||
name: 'The British Library Legal Deposit Bot',
|
||
category: 'Crawler',
|
||
url: 'https://www.bl.uk/',
|
||
producer: {
|
||
name: 'The British Library',
|
||
url: 'https://www.bl.uk/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'Miniature\\.io',
|
||
name: 'Miniature.io',
|
||
category: 'Service Agent',
|
||
url: 'https://miniature.io/',
|
||
producer: {
|
||
name: 'LCX Ventures Ltd',
|
||
url: 'https://www.lcxventures.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Convertify',
|
||
name: 'Convertify',
|
||
category: 'Service Agent',
|
||
url: 'https://www.convertify.app/',
|
||
producer: {
|
||
name: 'Convertify',
|
||
url: 'https://www.convertify.app/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'ZoteroTranslationServer',
|
||
name: 'Zotero Translation Server',
|
||
category: 'Service Agent',
|
||
url: 'https://github.com/wikimedia/mediawiki-services-zotero',
|
||
producer: {
|
||
name: 'The Wikimedia Foundation, Inc.',
|
||
url: 'https://www.wikimedia.org/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'MuckRack',
|
||
name: 'MuckRack',
|
||
category: 'Crawler',
|
||
url: 'https://muckrack.com/',
|
||
producer: {
|
||
name: 'Muck Rack, LLC',
|
||
url: 'https://muckrack.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Golfe',
|
||
name: 'Golfe',
|
||
category: 'Crawler',
|
||
url: 'http://www.goo-olfe.ae/bot.html',
|
||
},
|
||
{
|
||
includes: 'SpiderLing',
|
||
name: 'SpiderLing',
|
||
category: 'Crawler',
|
||
url: 'https://nlp.fi.muni.cz/projects/biwec/',
|
||
producer: {
|
||
name: 'Natural Language Processing Centre',
|
||
url: 'https://nlp.fi.muni.cz/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Bravebot',
|
||
name: 'Bravebot',
|
||
category: 'Search bot',
|
||
url: 'https://search.brave.com/help/brave-search-crawler',
|
||
producer: {
|
||
name: 'Brave Software, Inc.',
|
||
url: 'https://brave.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: '1001FirmsBot',
|
||
name: '1001FirmsBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.1001firms.com/1001firmsbot.php',
|
||
},
|
||
{
|
||
includes: 'SteamChatURLLookup',
|
||
name: 'Steam Chat URL Lookup',
|
||
category: 'Service Agent',
|
||
url: 'https://help.steampowered.com/en/faqs/view/595C-42F4-3B66-E02F',
|
||
producer: {
|
||
name: 'Valve Corporation',
|
||
url: 'https://www.valvesoftware.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'ohdear\\.app',
|
||
name: 'Oh Dear',
|
||
category: 'Site Monitor',
|
||
url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs',
|
||
producer: {
|
||
name: 'Immutable, SNC',
|
||
url: 'https://ohdear.app/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Inspici',
|
||
name: 'Inspici',
|
||
category: 'Crawler',
|
||
url: 'https://www.inspici.com/',
|
||
producer: {
|
||
name: 'Inspici, LLC',
|
||
url: 'https://www.inspici.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'peer39_crawler',
|
||
name: 'Peer39',
|
||
category: 'Crawler',
|
||
url: 'https://www.peer39.com/crawler-notice',
|
||
producer: {
|
||
name: 'Peer39 Tech, LLC',
|
||
url: 'https://www.peer39.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Pandalytics',
|
||
name: 'Pandalytics',
|
||
category: 'Crawler',
|
||
url: 'https://www.domainsbot.com/business-intelligence/',
|
||
producer: {
|
||
name: 'DomainsBot, Inc.',
|
||
url: 'https://www.domainsbot.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'CloudServerMarketSpider',
|
||
name: 'CloudServerMarketSpider',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20151228225429/https://cloudservermarket.com/spider.html',
|
||
},
|
||
{
|
||
includes: 'Pigafetta',
|
||
name: 'Pigafetta',
|
||
category: 'Crawler',
|
||
url: 'https://visual-seo.com/Pigafetta-Bot',
|
||
producer: {
|
||
name: 'aStonish Studio Srl',
|
||
url: 'http://www.astonishstudio.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Cotoyogi',
|
||
name: 'Cotoyogi',
|
||
category: 'Crawler',
|
||
url: 'https://ds.rois.ac.jp/center8/crawler/',
|
||
producer: {
|
||
name: 'Joint Support-Center for Data Science Research (ROIS-DS)',
|
||
url: 'https://ds.rois.ac.jp/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SuggestBot',
|
||
name: 'SuggestBot',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/nettrom/suggestbot',
|
||
},
|
||
{
|
||
includes: 'cms-experiment',
|
||
name: 'CMS Experiment',
|
||
category: 'Security Checker',
|
||
url: 'https://securitee.org/cms-experiment-fall2024/',
|
||
},
|
||
{
|
||
includes: 'SiteCheckerBotCrawler',
|
||
name: 'SiteCheckerBotCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://sitechecker.pro/',
|
||
producer: {
|
||
name: 'Cyber Circus Limited',
|
||
url: 'https://sitechecker.pro/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SBIder',
|
||
name: 'SBIder',
|
||
category: 'Crawler',
|
||
url: 'https://www.sitesell.com/sbider.html',
|
||
producer: {
|
||
name: 'SiteSell Inc.',
|
||
url: 'https://www.sitesell.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'LightspeedSystemsCrawler',
|
||
name: 'LightspeedSystemsCrawler',
|
||
category: 'Crawler',
|
||
url: 'https://www.lightspeedsystems.com/',
|
||
producer: {
|
||
name: 'Lightspeed Systems, Inc.',
|
||
url: 'https://www.lightspeedsystems.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Research JLU',
|
||
name: 'Research JLU',
|
||
category: 'Crawler',
|
||
url: 'https://www.uni-giessen.de/en/research',
|
||
producer: {
|
||
name: 'Justus Liebig University Giessen',
|
||
url: 'https://www.uni-giessen.de/en',
|
||
},
|
||
},
|
||
{
|
||
regex: '(?:hgf|OS)AlphaXCrawl',
|
||
name: 'AlphaXCrawl',
|
||
category: 'Crawler',
|
||
url: 'https://www.fim.uni-passau.de/en/data-science/research/open-search',
|
||
producer: {
|
||
name: 'University of Passau',
|
||
url: 'https://www.uni-passau.de/en/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'WPMU DEV',
|
||
name: 'WPMU DEV',
|
||
category: 'Crawler',
|
||
url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent',
|
||
producer: {
|
||
name: 'Incsub, LLC.',
|
||
url: 'https://incsub.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'SnoopSecInspect',
|
||
name: 'SnoopSecInspect',
|
||
category: 'Security Checker',
|
||
url: 'https://web.archive.org/web/20241206193253/https://snoopsec.us.to/',
|
||
},
|
||
{
|
||
includes: 'ModatScanner',
|
||
name: 'ModatScanner',
|
||
category: 'Security Checker',
|
||
url: 'https://www.modat.io/scanning',
|
||
producer: {
|
||
name: 'Modat B.V.',
|
||
url: 'https://www.modat.io/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'researchcyber\\.net',
|
||
name: 'researchcyber.net',
|
||
category: 'Security Checker',
|
||
url: 'https://web.archive.org/web/20241219082407/https://researchcyber.net/',
|
||
},
|
||
{
|
||
includes: 'CrystalSemanticsBot',
|
||
name: 'CrystalSemanticsBot',
|
||
category: 'Crawler',
|
||
url: 'https://web.archive.org/web/20121230203310/http://www.crystalsemantics.com/user-agent/',
|
||
producer: {
|
||
name: 'Crystal Semantics Ltd.',
|
||
url: 'https://web.archive.org/web/20121029062239/http://www.crystalsemantics.com/',
|
||
},
|
||
},
|
||
{
|
||
regex: 'najdu\\.s\\.holubem\\.eu',
|
||
name: 'najdu.s.holubem.eu',
|
||
category: 'Crawler',
|
||
url: 'https://najdu.s.holubem.eu/',
|
||
},
|
||
{
|
||
includes: 'VORTEX/',
|
||
name: 'VORTEX',
|
||
category: 'Crawler',
|
||
url: 'https://marty.anstey.ca/robots/vortex',
|
||
},
|
||
{
|
||
regex: 'xtate/(\\d+\\.[.\\d]+)',
|
||
name: 'xtate',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/babycoff/xtate',
|
||
},
|
||
{
|
||
includes: 'FediList Agent/',
|
||
name: 'FediList',
|
||
category: 'Social Media Agent',
|
||
url: 'https://fedilist.com/',
|
||
},
|
||
{
|
||
regex: 'Grafana/(\\d+\\.[.\\d]+)',
|
||
name: 'Grafana',
|
||
category: 'Site Monitor',
|
||
url: 'https://github.com/grafana/grafana',
|
||
producer: {
|
||
name: 'Grafana Labs',
|
||
url: 'https://grafana.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'github-camo',
|
||
name: 'Github Camo',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/atmos/camo',
|
||
producer: {
|
||
name: 'Github',
|
||
url: 'https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-anonymized-urls',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Bluesky',
|
||
name: 'Bluesky',
|
||
category: 'Social Media Agent',
|
||
url: 'https://bsky.app',
|
||
producer: {
|
||
name: 'Bluesky Social PBC',
|
||
url: 'https://bsky.app',
|
||
},
|
||
},
|
||
{
|
||
regex: 'OpenGraph\\.io',
|
||
name: 'OpenGraph.io',
|
||
category: 'Crawler',
|
||
url: 'https://www.opengraph.io',
|
||
producer: {
|
||
name: 'OpenGraph.io',
|
||
url: 'https://www.opengraph.io',
|
||
},
|
||
},
|
||
{
|
||
includes: 'microsoft-flow/',
|
||
name: 'Microsoft Power Automate',
|
||
category: 'Service Agent',
|
||
url: 'https://www.microsoft.com/en-us/power-platform/products/power-automate',
|
||
producer: {
|
||
name: 'Microsoft Corporation',
|
||
url: 'https://www.microsoft.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'Simbiat Software',
|
||
name: 'Simbiat Software',
|
||
category: 'Crawler',
|
||
url: 'https://www.simbiat.eu',
|
||
producer: {
|
||
name: 'Simbiat Software',
|
||
url: 'https://www.simbiat.eu',
|
||
},
|
||
},
|
||
{
|
||
includes: 'IbouBot',
|
||
name: 'IbouBot',
|
||
category: 'Search bot',
|
||
url: 'https://ibou.io/iboubot.html',
|
||
},
|
||
{
|
||
includes: 'AddSearchBot',
|
||
name: 'AddSearchBot',
|
||
category: 'Crawler',
|
||
url: 'https://www.addsearch.com/docs/indexing/whitelisting-addsearch-bot/',
|
||
producer: {
|
||
name: 'AddSearch Oy',
|
||
url: 'https://www.addsearch.com/',
|
||
},
|
||
},
|
||
{
|
||
includes: 'TerraCotta',
|
||
name: 'TerraCotta',
|
||
category: 'Crawler',
|
||
url: 'https://github.com/CeramicTeam/CeramicTerracotta',
|
||
producer: {
|
||
name: 'Ceramic, Inc.',
|
||
url: 'https://ceramic.ai/',
|
||
},
|
||
},
|
||
{
|
||
regex:
|
||
'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|\\(compatible\\)|John Recon|SPARK COMMIT|masjesu|Komaru_The_Cat|Jesus Christ of Nazareth is LORD|Kowai|Hakai|LoliSec|LMAO|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$|OnlyScans|TheInternetSearchx',
|
||
name: 'Generic Bot',
|
||
},
|
||
{
|
||
regex:
|
||
'[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\\d\\.[x\\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|(?<!P)research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator|-(?:AI|Extended|User)/)(?:[^a-z]|$)',
|
||
name: 'Generic Bot',
|
||
},
|
||
] as const;
|
||
export default bots;
|