chore(ua-blocker): update robots.json from upstream (#1270)
Co-authored-by: yusukebe <10682+yusukebe@users.noreply.github.com>pull/1271/head
parent
cb084ca3a3
commit
07e50772ec
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
'@hono/ua-blocker': patch
|
||||||
|
---
|
||||||
|
|
||||||
|
chore(ua-blocker): sync `robots.json` with upstream
|
|
@ -55,6 +55,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
|
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
|
||||||
},
|
},
|
||||||
|
"Awario": {
|
||||||
|
"operator": "Awario",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Data Scrapers",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Awario is an AI data scraper operated by Awario. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/awario"
|
||||||
|
},
|
||||||
"bedrockbot": {
|
"bedrockbot": {
|
||||||
"operator": "[Amazon](https://amazon.com)",
|
"operator": "[Amazon](https://amazon.com)",
|
||||||
"respect": "[Yes](https://docs.aws.amazon.com/bedrock/latest/userguide/webcrawl-data-source-connector.html#configuration-webcrawl-connector)",
|
"respect": "[Yes](https://docs.aws.amazon.com/bedrock/latest/userguide/webcrawl-data-source-connector.html#configuration-webcrawl-connector)",
|
||||||
|
@ -146,6 +153,20 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Provides crawling services for any purpose, probably including AI model training."
|
"description": "Provides crawling services for any purpose, probably including AI model training."
|
||||||
},
|
},
|
||||||
|
"Datenbank Crawler": {
|
||||||
|
"operator": "Datenbank",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Data Scrapers",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Datenbank Crawler is an AI data scraper operated by Datenbank. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/datenbank-crawler"
|
||||||
|
},
|
||||||
|
"Devin": {
|
||||||
|
"operator": "Devin AI",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Assistants",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Devin is an AI assistant operated by Devin AI. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/devin"
|
||||||
|
},
|
||||||
"Diffbot": {
|
"Diffbot": {
|
||||||
"operator": "[Diffbot](https://www.diffbot.com/)",
|
"operator": "[Diffbot](https://www.diffbot.com/)",
|
||||||
"respect": "At the discretion of Diffbot users.",
|
"respect": "At the discretion of Diffbot users.",
|
||||||
|
@ -160,6 +181,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "DuckAssistBot is used by DuckDuckGo's DuckAssist feature to fetch content and generate realtime AI answers to user searches. More info can be found at https://darkvisitors.com/agents/agents/duckassistbot"
|
"description": "DuckAssistBot is used by DuckDuckGo's DuckAssist feature to fetch content and generate realtime AI answers to user searches. More info can be found at https://darkvisitors.com/agents/agents/duckassistbot"
|
||||||
},
|
},
|
||||||
|
"Echobot Bot": {
|
||||||
|
"operator": "Echobox",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Data Scrapers",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "Echobot Bot is an AI data scraper operated by Echobox. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/echobot-bot"
|
||||||
|
},
|
||||||
"EchoboxBot": {
|
"EchoboxBot": {
|
||||||
"operator": "[Echobox](https://echobox.com)",
|
"operator": "[Echobox](https://echobox.com)",
|
||||||
"respect": "Unclear at this time.",
|
"respect": "Unclear at this time.",
|
||||||
|
@ -252,11 +280,11 @@
|
||||||
"respect": "No"
|
"respect": "No"
|
||||||
},
|
},
|
||||||
"ICC-Crawler": {
|
"ICC-Crawler": {
|
||||||
"description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business.",
|
|
||||||
"frequency": "No information.",
|
|
||||||
"function": "Scrapes data to train and support AI technologies.",
|
|
||||||
"operator": "[NICT](https://nict.go.jp)",
|
"operator": "[NICT](https://nict.go.jp)",
|
||||||
"respect": "Yes"
|
"respect": "Yes",
|
||||||
|
"function": "Scrapes data to train and support AI technologies.",
|
||||||
|
"frequency": "No information.",
|
||||||
|
"description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business."
|
||||||
},
|
},
|
||||||
"ImagesiftBot": {
|
"ImagesiftBot": {
|
||||||
"description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images.",
|
"description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images.",
|
||||||
|
@ -314,6 +342,13 @@
|
||||||
"frequency": "Unclear at this time.",
|
"frequency": "Unclear at this time.",
|
||||||
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
||||||
},
|
},
|
||||||
|
"MistralAI-User": {
|
||||||
|
"operator": "Mistral",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Assistants",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "MistralAI-User is an AI assistant operated by Mistral. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/mistralai-user"
|
||||||
|
},
|
||||||
"MistralAI-User/1.0": {
|
"MistralAI-User/1.0": {
|
||||||
"operator": "Mistral AI",
|
"operator": "Mistral AI",
|
||||||
"function": "Takes action based on user prompts.",
|
"function": "Takes action based on user prompts.",
|
||||||
|
@ -399,11 +434,11 @@
|
||||||
"description": "Crawls sites to surface as results in Perplexity."
|
"description": "Crawls sites to surface as results in Perplexity."
|
||||||
},
|
},
|
||||||
"PetalBot": {
|
"PetalBot": {
|
||||||
"description": "Operated by Huawei to provide search and AI assistant services.",
|
|
||||||
"frequency": "No explicit frequency provided.",
|
|
||||||
"function": "Used to provide recommendations in Hauwei assistant and AI search services.",
|
|
||||||
"operator": "[Huawei](https://huawei.com/)",
|
"operator": "[Huawei](https://huawei.com/)",
|
||||||
"respect": "Yes"
|
"respect": "Yes",
|
||||||
|
"function": "Used to provide recommendations in Hauwei assistant and AI search services.",
|
||||||
|
"frequency": "No explicit frequency provided.",
|
||||||
|
"description": "Operated by Huawei to provide search and AI assistant services."
|
||||||
},
|
},
|
||||||
"PhindBot": {
|
"PhindBot": {
|
||||||
"description": "Company offers an AI agent that uses AI and generate extra web query on the fly",
|
"description": "Company offers an AI agent that uses AI and generate extra web query on the fly",
|
||||||
|
@ -420,11 +455,11 @@
|
||||||
"respect": "Unclear at this time."
|
"respect": "Unclear at this time."
|
||||||
},
|
},
|
||||||
"QualifiedBot": {
|
"QualifiedBot": {
|
||||||
"description": "Operated by Qualified as part of their suite of AI product offerings.",
|
|
||||||
"frequency": "No explicit frequency provided.",
|
|
||||||
"function": "Company offers AI agents and other related products; usage can be assumed to support said products.",
|
|
||||||
"operator": "[Qualified](https://www.qualified.com)",
|
"operator": "[Qualified](https://www.qualified.com)",
|
||||||
"respect": "Unclear at this time."
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "Company offers AI agents and other related products; usage can be assumed to support said products.",
|
||||||
|
"frequency": "No explicit frequency provided.",
|
||||||
|
"description": "Operated by Qualified as part of their suite of AI product offerings."
|
||||||
},
|
},
|
||||||
"QuillBot": {
|
"QuillBot": {
|
||||||
"description": "Operated by QuillBot as part of their suite of AI product offerings.",
|
"description": "Operated by QuillBot as part of their suite of AI product offerings.",
|
||||||
|
@ -524,6 +559,13 @@
|
||||||
"operator": "[Velen Crawler](https://velen.io)",
|
"operator": "[Velen Crawler](https://velen.io)",
|
||||||
"respect": "[Yes](https://velen.io)"
|
"respect": "[Yes](https://velen.io)"
|
||||||
},
|
},
|
||||||
|
"WARDBot": {
|
||||||
|
"operator": "WEBSPARK",
|
||||||
|
"respect": "Unclear at this time.",
|
||||||
|
"function": "AI Data Scrapers",
|
||||||
|
"frequency": "Unclear at this time.",
|
||||||
|
"description": "WARDBot is an AI data scraper operated by WEBSPARK. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/wardbot"
|
||||||
|
},
|
||||||
"Webzio-Extended": {
|
"Webzio-Extended": {
|
||||||
"operator": "Unclear at this time.",
|
"operator": "Unclear at this time.",
|
||||||
"respect": "Unclear at this time.",
|
"respect": "Unclear at this time.",
|
||||||
|
|
|
@ -7,6 +7,7 @@ User-agent: Andibot
|
||||||
User-agent: anthropic-ai
|
User-agent: anthropic-ai
|
||||||
User-agent: Applebot
|
User-agent: Applebot
|
||||||
User-agent: Applebot-Extended
|
User-agent: Applebot-Extended
|
||||||
|
User-agent: Awario
|
||||||
User-agent: bedrockbot
|
User-agent: bedrockbot
|
||||||
User-agent: Brightbot 1.0
|
User-agent: Brightbot 1.0
|
||||||
User-agent: Bytespider
|
User-agent: Bytespider
|
||||||
|
@ -20,8 +21,11 @@ User-agent: cohere-ai
|
||||||
User-agent: cohere-training-data-crawler
|
User-agent: cohere-training-data-crawler
|
||||||
User-agent: Cotoyogi
|
User-agent: Cotoyogi
|
||||||
User-agent: Crawlspace
|
User-agent: Crawlspace
|
||||||
|
User-agent: Datenbank Crawler
|
||||||
|
User-agent: Devin
|
||||||
User-agent: Diffbot
|
User-agent: Diffbot
|
||||||
User-agent: DuckAssistBot
|
User-agent: DuckAssistBot
|
||||||
|
User-agent: Echobot Bot
|
||||||
User-agent: EchoboxBot
|
User-agent: EchoboxBot
|
||||||
User-agent: FacebookBot
|
User-agent: FacebookBot
|
||||||
User-agent: facebookexternalhit
|
User-agent: facebookexternalhit
|
||||||
|
@ -44,6 +48,7 @@ User-agent: meta-externalagent
|
||||||
User-agent: Meta-ExternalAgent
|
User-agent: Meta-ExternalAgent
|
||||||
User-agent: meta-externalfetcher
|
User-agent: meta-externalfetcher
|
||||||
User-agent: Meta-ExternalFetcher
|
User-agent: Meta-ExternalFetcher
|
||||||
|
User-agent: MistralAI-User
|
||||||
User-agent: MistralAI-User/1.0
|
User-agent: MistralAI-User/1.0
|
||||||
User-agent: MyCentralAIScraperBot
|
User-agent: MyCentralAIScraperBot
|
||||||
User-agent: NovaAct
|
User-agent: NovaAct
|
||||||
|
@ -74,6 +79,7 @@ User-agent: Sidetrade indexer bot
|
||||||
User-agent: TikTokSpider
|
User-agent: TikTokSpider
|
||||||
User-agent: Timpibot
|
User-agent: Timpibot
|
||||||
User-agent: VelenPublicWebCrawler
|
User-agent: VelenPublicWebCrawler
|
||||||
|
User-agent: WARDBot
|
||||||
User-agent: Webzio-Extended
|
User-agent: Webzio-Extended
|
||||||
User-agent: wpbot
|
User-agent: wpbot
|
||||||
User-agent: YandexAdditional
|
User-agent: YandexAdditional
|
||||||
|
@ -81,7 +87,7 @@ User-agent: YandexAdditionalBot
|
||||||
User-agent: YouBot
|
User-agent: YouBot
|
||||||
Disallow: /
|
Disallow: /
|
||||||
`;
|
`;
|
||||||
export const ALL_BOTS = ["AI2Bot", "Ai2Bot-Dolma", "aiHitBot", "Amazonbot", "Andibot", "anthropic-ai", "Applebot", "Applebot-Extended", "bedrockbot", "Brightbot 1.0", "Bytespider", "CCBot", "ChatGPT-User", "Claude-SearchBot", "Claude-User", "Claude-Web", "ClaudeBot", "cohere-ai", "cohere-training-data-crawler", "Cotoyogi", "Crawlspace", "Diffbot", "DuckAssistBot", "EchoboxBot", "FacebookBot", "facebookexternalhit", "Factset_spyderbot", "FirecrawlAgent", "FriendlyCrawler", "Google-CloudVertexBot", "Google-Extended", "GoogleOther", "GoogleOther-Image", "GoogleOther-Video", "GPTBot", "iaskspider/2.0", "ICC-Crawler", "ImagesiftBot", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "meta-externalagent", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MistralAI-User/1.0", "MyCentralAIScraperBot", "NovaAct", "OAI-SearchBot", "omgili", "omgilibot", "Operator", "PanguBot", "Panscient", "panscient.com", "Perplexity-User", "PerplexityBot", "PetalBot", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "SBIntuitionsBot", "Scrapy", "SemrushBot", "SemrushBot-BA", "SemrushBot-CT", "SemrushBot-OCOB", "SemrushBot-SI", "SemrushBot-SWA", "Sidetrade indexer bot", "TikTokSpider", "Timpibot", "VelenPublicWebCrawler", "Webzio-Extended", "wpbot", "YandexAdditional", "YandexAdditionalBot", "YouBot"];
|
export const ALL_BOTS = ["AI2Bot", "Ai2Bot-Dolma", "aiHitBot", "Amazonbot", "Andibot", "anthropic-ai", "Applebot", "Applebot-Extended", "Awario", "bedrockbot", "Brightbot 1.0", "Bytespider", "CCBot", "ChatGPT-User", "Claude-SearchBot", "Claude-User", "Claude-Web", "ClaudeBot", "cohere-ai", "cohere-training-data-crawler", "Cotoyogi", "Crawlspace", "Datenbank Crawler", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "FacebookBot", "facebookexternalhit", "Factset_spyderbot", "FirecrawlAgent", "FriendlyCrawler", "Google-CloudVertexBot", "Google-Extended", "GoogleOther", "GoogleOther-Image", "GoogleOther-Video", "GPTBot", "iaskspider/2.0", "ICC-Crawler", "ImagesiftBot", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "meta-externalagent", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MistralAI-User", "MistralAI-User/1.0", "MyCentralAIScraperBot", "NovaAct", "OAI-SearchBot", "omgili", "omgilibot", "Operator", "PanguBot", "Panscient", "panscient.com", "Perplexity-User", "PerplexityBot", "PetalBot", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "SBIntuitionsBot", "Scrapy", "SemrushBot", "SemrushBot-BA", "SemrushBot-CT", "SemrushBot-OCOB", "SemrushBot-SI", "SemrushBot-SWA", "Sidetrade indexer bot", "TikTokSpider", "Timpibot", "VelenPublicWebCrawler", "WARDBot", "Webzio-Extended", "wpbot", "YandexAdditional", "YandexAdditionalBot", "YouBot"];
|
||||||
export const NON_RESPECTING_BOTS = ["Andibot", "anthropic-ai", "Applebot", "Brightbot 1.0", "Bytespider", "Claude-Web", "cohere-ai", "cohere-training-data-crawler", "Diffbot", "DuckAssistBot", "EchoboxBot", "facebookexternalhit", "Factset_spyderbot", "iaskspider/2.0", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MyCentralAIScraperBot", "NovaAct", "Operator", "PanguBot", "Perplexity-User", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "Scrapy", "Sidetrade indexer bot", "TikTokSpider", "Timpibot", "Webzio-Extended", "wpbot"];
|
export const NON_RESPECTING_BOTS = ["Andibot", "anthropic-ai", "Applebot", "Awario", "Brightbot 1.0", "Bytespider", "Claude-Web", "cohere-ai", "cohere-training-data-crawler", "Datenbank Crawler", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "facebookexternalhit", "Factset_spyderbot", "iaskspider/2.0", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MistralAI-User", "MyCentralAIScraperBot", "NovaAct", "Operator", "PanguBot", "Perplexity-User", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "Scrapy", "Sidetrade indexer bot", "TikTokSpider", "Timpibot", "WARDBot", "Webzio-Extended", "wpbot"];
|
||||||
export const ALL_BOTS_REGEX = /(AI2BOT|AI2BOT-DOLMA|AIHITBOT|AMAZONBOT|ANDIBOT|ANTHROPIC-AI|APPLEBOT|APPLEBOT-EXTENDED|BEDROCKBOT|BRIGHTBOT 1.0|BYTESPIDER|CCBOT|CHATGPT-USER|CLAUDE-SEARCHBOT|CLAUDE-USER|CLAUDE-WEB|CLAUDEBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|COTOYOGI|CRAWLSPACE|DIFFBOT|DUCKASSISTBOT|ECHOBOXBOT|FACEBOOKBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|FIRECRAWLAGENT|FRIENDLYCRAWLER|GOOGLE-CLOUDVERTEXBOT|GOOGLE-EXTENDED|GOOGLEOTHER|GOOGLEOTHER-IMAGE|GOOGLEOTHER-VIDEO|GPTBOT|IASKSPIDER\/2.0|ICC-CRAWLER|IMAGESIFTBOT|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MISTRALAI-USER\/1.0|MYCENTRALAISCRAPERBOT|NOVAACT|OAI-SEARCHBOT|OMGILI|OMGILIBOT|OPERATOR|PANGUBOT|PANSCIENT|PANSCIENT.COM|PERPLEXITY-USER|PERPLEXITYBOT|PETALBOT|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SBINTUITIONSBOT|SCRAPY|SEMRUSHBOT|SEMRUSHBOT-BA|SEMRUSHBOT-CT|SEMRUSHBOT-OCOB|SEMRUSHBOT-SI|SEMRUSHBOT-SWA|SIDETRADE INDEXER BOT|TIKTOKSPIDER|TIMPIBOT|VELENPUBLICWEBCRAWLER|WEBZIO-EXTENDED|WPBOT|YANDEXADDITIONAL|YANDEXADDITIONALBOT|YOUBOT)/;
|
export const ALL_BOTS_REGEX = /(AI2BOT|AI2BOT-DOLMA|AIHITBOT|AMAZONBOT|ANDIBOT|ANTHROPIC-AI|APPLEBOT|APPLEBOT-EXTENDED|AWARIO|BEDROCKBOT|BRIGHTBOT 1.0|BYTESPIDER|CCBOT|CHATGPT-USER|CLAUDE-SEARCHBOT|CLAUDE-USER|CLAUDE-WEB|CLAUDEBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|COTOYOGI|CRAWLSPACE|DATENBANK CRAWLER|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|FIRECRAWLAGENT|FRIENDLYCRAWLER|GOOGLE-CLOUDVERTEXBOT|GOOGLE-EXTENDED|GOOGLEOTHER|GOOGLEOTHER-IMAGE|GOOGLEOTHER-VIDEO|GPTBOT|IASKSPIDER\/2.0|ICC-CRAWLER|IMAGESIFTBOT|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MISTRALAI-USER|MISTRALAI-USER\/1.0|MYCENTRALAISCRAPERBOT|NOVAACT|OAI-SEARCHBOT|OMGILI|OMGILIBOT|OPERATOR|PANGUBOT|PANSCIENT|PANSCIENT.COM|PERPLEXITY-USER|PERPLEXITYBOT|PETALBOT|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SBINTUITIONSBOT|SCRAPY|SEMRUSHBOT|SEMRUSHBOT-BA|SEMRUSHBOT-CT|SEMRUSHBOT-OCOB|SEMRUSHBOT-SI|SEMRUSHBOT-SWA|SIDETRADE INDEXER BOT|TIKTOKSPIDER|TIMPIBOT|VELENPUBLICWEBCRAWLER|WARDBOT|WEBZIO-EXTENDED|WPBOT|YANDEXADDITIONAL|YANDEXADDITIONALBOT|YOUBOT)/;
|
||||||
export const NON_RESPECTING_BOTS_REGEX = /(ANDIBOT|ANTHROPIC-AI|APPLEBOT|BRIGHTBOT 1.0|BYTESPIDER|CLAUDE-WEB|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|DIFFBOT|DUCKASSISTBOT|ECHOBOXBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|IASKSPIDER\/2.0|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MYCENTRALAISCRAPERBOT|NOVAACT|OPERATOR|PANGUBOT|PERPLEXITY-USER|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SCRAPY|SIDETRADE INDEXER BOT|TIKTOKSPIDER|TIMPIBOT|WEBZIO-EXTENDED|WPBOT)/;
|
export const NON_RESPECTING_BOTS_REGEX = /(ANDIBOT|ANTHROPIC-AI|APPLEBOT|AWARIO|BRIGHTBOT 1.0|BYTESPIDER|CLAUDE-WEB|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|DATENBANK CRAWLER|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|IASKSPIDER\/2.0|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MISTRALAI-USER|MYCENTRALAISCRAPERBOT|NOVAACT|OPERATOR|PANGUBOT|PERPLEXITY-USER|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SCRAPY|SIDETRADE INDEXER BOT|TIKTOKSPIDER|TIMPIBOT|WARDBOT|WEBZIO-EXTENDED|WPBOT)/;
|
||||||
|
|
Loading…
Reference in New Issue