chore(api): update bots list
This commit is contained in:
@@ -558,7 +558,21 @@ const bots = [
|
||||
regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)',
|
||||
name: 'Facebook Crawler',
|
||||
category: 'Social Media Agent',
|
||||
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/',
|
||||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'meta-externalagent',
|
||||
name: 'Meta-ExternalAgent',
|
||||
category: 'Crawler',
|
||||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'meta-externalfetcher',
|
||||
name: 'Meta-ExternalFetcher',
|
||||
category: 'Social Media Agent',
|
||||
url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers',
|
||||
producer: { name: 'Meta Platforms, Inc.', url: 'https://www.meta.com/' },
|
||||
},
|
||||
{
|
||||
@@ -730,6 +744,34 @@ const bots = [
|
||||
url: '',
|
||||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'Google-Document-Conversion',
|
||||
name: 'Google-Document-Conversion',
|
||||
category: 'Service Agent',
|
||||
url: 'https://support.google.com/drive/answer/176692?hl=en',
|
||||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'GoogleDocs; apps-spreadsheets',
|
||||
name: 'Google Sheets',
|
||||
category: 'Service Agent',
|
||||
url: 'https://workspace.google.com/products/sheets/',
|
||||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'GoogleDocs; apps-presentations',
|
||||
name: 'Google Slides',
|
||||
category: 'Service Agent',
|
||||
url: 'https://workspace.google.com/products/slides/',
|
||||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'GoogleDocs;',
|
||||
name: 'Google Docs',
|
||||
category: 'Service Agent',
|
||||
url: 'https://docs.google.com/',
|
||||
producer: { name: 'Google Inc.', url: 'https://www.google.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'SeznamEmailProxy',
|
||||
name: 'Seznam Email Proxy',
|
||||
@@ -863,7 +905,7 @@ const bots = [
|
||||
url: '',
|
||||
producer: { name: '', url: 'https://ip-guide.com' },
|
||||
},
|
||||
{ regex: 'k6/[0-9\\.]+', name: 'K6', url: 'https://k6.io/' },
|
||||
{ regex: 'k6/[0-9.]+', name: 'K6', url: 'https://k6.io/' },
|
||||
{
|
||||
regex: 'kouio',
|
||||
name: 'Kouio',
|
||||
@@ -4092,13 +4134,6 @@ const bots = [
|
||||
url: 'https://www.ghostinspector.com/',
|
||||
},
|
||||
},
|
||||
{
|
||||
regex: 'Cypress/[\\d.]+',
|
||||
name: 'Cypress',
|
||||
category: 'Site Monitor',
|
||||
url: 'https://github.com/cypress-io/cypress',
|
||||
producer: { name: 'Cypress.io, Inc.', url: 'https://www.cypress.io/' },
|
||||
},
|
||||
{
|
||||
regex: 'Google-Apps-Script',
|
||||
name: 'Google Apps Script',
|
||||
@@ -4661,9 +4696,62 @@ const bots = [
|
||||
url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs',
|
||||
producer: { name: 'Immutable, SNC', url: 'https://ohdear.app/' },
|
||||
},
|
||||
{
|
||||
regex: 'Inspici',
|
||||
name: 'Inspici',
|
||||
category: 'Crawler',
|
||||
url: 'https://www.inspici.com/',
|
||||
producer: { name: 'Inspici, LLC', url: 'https://www.inspici.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'peer39_crawler',
|
||||
name: 'Peer39',
|
||||
category: 'Crawler',
|
||||
url: 'https://www.peer39.com/crawler-notice',
|
||||
producer: { name: 'Peer39 Tech, LLC', url: 'https://www.peer39.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'Pandalytics',
|
||||
name: 'Pandalytics',
|
||||
category: 'Crawler',
|
||||
url: 'https://www.domainsbot.com/business-intelligence/',
|
||||
producer: { name: 'DomainsBot, Inc.', url: 'https://www.domainsbot.com/' },
|
||||
},
|
||||
{
|
||||
regex: 'CloudServerMarketSpider',
|
||||
name: 'CloudServerMarketSpider',
|
||||
category: 'Crawler',
|
||||
url: 'https://web.archive.org/web/20151228225429/https://cloudservermarket.com/spider.html',
|
||||
},
|
||||
{
|
||||
regex: 'Pigafetta',
|
||||
name: 'Pigafetta',
|
||||
category: 'Crawler',
|
||||
url: 'https://visual-seo.com/Pigafetta-Bot',
|
||||
producer: {
|
||||
name: 'aStonish Studio Srl',
|
||||
url: 'http://www.astonishstudio.com/',
|
||||
},
|
||||
},
|
||||
{
|
||||
regex: 'Cotoyogi',
|
||||
name: 'Cotoyogi',
|
||||
category: 'Crawler',
|
||||
url: 'https://ds.rois.ac.jp/center8/crawler/',
|
||||
producer: {
|
||||
name: 'Joint Support-Center for Data Science Research (ROIS-DS)',
|
||||
url: 'https://ds.rois.ac.jp/',
|
||||
},
|
||||
},
|
||||
{
|
||||
regex: 'SuggestBot',
|
||||
name: 'SuggestBot',
|
||||
category: 'Crawler',
|
||||
url: 'https://github.com/nettrom/suggestbot',
|
||||
},
|
||||
{
|
||||
regex:
|
||||
'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$',
|
||||
'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\\.o\\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \\(cow\\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$',
|
||||
name: 'Generic Bot',
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user