94 lines
6.3 KiB
TypeScript
94 lines
6.3 KiB
TypeScript
// This file is generated by scripts/get-robots-txt.ts. Do not edit manually.
|
|
export const ROBOTS_TXT = `User-agent: AI2Bot
|
|
User-agent: Ai2Bot-Dolma
|
|
User-agent: aiHitBot
|
|
User-agent: Amazonbot
|
|
User-agent: Andibot
|
|
User-agent: anthropic-ai
|
|
User-agent: Applebot
|
|
User-agent: Applebot-Extended
|
|
User-agent: Awario
|
|
User-agent: bedrockbot
|
|
User-agent: Brightbot 1.0
|
|
User-agent: Bytespider
|
|
User-agent: CCBot
|
|
User-agent: ChatGPT-User
|
|
User-agent: Claude-SearchBot
|
|
User-agent: Claude-User
|
|
User-agent: Claude-Web
|
|
User-agent: ClaudeBot
|
|
User-agent: cohere-ai
|
|
User-agent: cohere-training-data-crawler
|
|
User-agent: Cotoyogi
|
|
User-agent: Crawlspace
|
|
User-agent: Datenbank Crawler
|
|
User-agent: Devin
|
|
User-agent: Diffbot
|
|
User-agent: DuckAssistBot
|
|
User-agent: Echobot Bot
|
|
User-agent: EchoboxBot
|
|
User-agent: FacebookBot
|
|
User-agent: facebookexternalhit
|
|
User-agent: Factset_spyderbot
|
|
User-agent: FirecrawlAgent
|
|
User-agent: FriendlyCrawler
|
|
User-agent: Gemini-Deep-Research
|
|
User-agent: Google-CloudVertexBot
|
|
User-agent: Google-Extended
|
|
User-agent: GoogleOther
|
|
User-agent: GoogleOther-Image
|
|
User-agent: GoogleOther-Video
|
|
User-agent: GPTBot
|
|
User-agent: iaskspider/2.0
|
|
User-agent: ICC-Crawler
|
|
User-agent: ImagesiftBot
|
|
User-agent: img2dataset
|
|
User-agent: ISSCyberRiskCrawler
|
|
User-agent: Kangaroo Bot
|
|
User-agent: meta-externalagent
|
|
User-agent: Meta-ExternalAgent
|
|
User-agent: meta-externalfetcher
|
|
User-agent: Meta-ExternalFetcher
|
|
User-agent: MistralAI-User
|
|
User-agent: MistralAI-User/1.0
|
|
User-agent: MyCentralAIScraperBot
|
|
User-agent: netEstate Imprint Crawler
|
|
User-agent: NovaAct
|
|
User-agent: OAI-SearchBot
|
|
User-agent: omgili
|
|
User-agent: omgilibot
|
|
User-agent: Operator
|
|
User-agent: PanguBot
|
|
User-agent: Panscient
|
|
User-agent: panscient.com
|
|
User-agent: Perplexity-User
|
|
User-agent: PerplexityBot
|
|
User-agent: PetalBot
|
|
User-agent: PhindBot
|
|
User-agent: Poseidon Research Crawler
|
|
User-agent: QualifiedBot
|
|
User-agent: QuillBot
|
|
User-agent: quillbot.com
|
|
User-agent: SBIntuitionsBot
|
|
User-agent: Scrapy
|
|
User-agent: SemrushBot-OCOB
|
|
User-agent: SemrushBot-SWA
|
|
User-agent: Sidetrade indexer bot
|
|
User-agent: SummalyBot
|
|
User-agent: Thinkbot
|
|
User-agent: TikTokSpider
|
|
User-agent: Timpibot
|
|
User-agent: VelenPublicWebCrawler
|
|
User-agent: WARDBot
|
|
User-agent: Webzio-Extended
|
|
User-agent: wpbot
|
|
User-agent: YandexAdditional
|
|
User-agent: YandexAdditionalBot
|
|
User-agent: YouBot
|
|
Disallow: /
|
|
`;
|
|
export const ALL_BOTS = ["AI2Bot", "Ai2Bot-Dolma", "aiHitBot", "Amazonbot", "Andibot", "anthropic-ai", "Applebot", "Applebot-Extended", "Awario", "bedrockbot", "Brightbot 1.0", "Bytespider", "CCBot", "ChatGPT-User", "Claude-SearchBot", "Claude-User", "Claude-Web", "ClaudeBot", "cohere-ai", "cohere-training-data-crawler", "Cotoyogi", "Crawlspace", "Datenbank Crawler", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "FacebookBot", "facebookexternalhit", "Factset_spyderbot", "FirecrawlAgent", "FriendlyCrawler", "Gemini-Deep-Research", "Google-CloudVertexBot", "Google-Extended", "GoogleOther", "GoogleOther-Image", "GoogleOther-Video", "GPTBot", "iaskspider/2.0", "ICC-Crawler", "ImagesiftBot", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "meta-externalagent", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MistralAI-User", "MistralAI-User/1.0", "MyCentralAIScraperBot", "netEstate Imprint Crawler", "NovaAct", "OAI-SearchBot", "omgili", "omgilibot", "Operator", "PanguBot", "Panscient", "panscient.com", "Perplexity-User", "PerplexityBot", "PetalBot", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "SBIntuitionsBot", "Scrapy", "SemrushBot-OCOB", "SemrushBot-SWA", "Sidetrade indexer bot", "SummalyBot", "Thinkbot", "TikTokSpider", "Timpibot", "VelenPublicWebCrawler", "WARDBot", "Webzio-Extended", "wpbot", "YandexAdditional", "YandexAdditionalBot", "YouBot"];
|
|
export const NON_RESPECTING_BOTS = ["Andibot", "anthropic-ai", "Applebot", "Awario", "Brightbot 1.0", "Bytespider", "Claude-Web", "cohere-ai", "cohere-training-data-crawler", "Datenbank Crawler", "Devin", "Diffbot", "DuckAssistBot", "Echobot Bot", "EchoboxBot", "facebookexternalhit", "Factset_spyderbot", "Gemini-Deep-Research", "iaskspider/2.0", "img2dataset", "ISSCyberRiskCrawler", "Kangaroo Bot", "Meta-ExternalAgent", "meta-externalfetcher", "Meta-ExternalFetcher", "MistralAI-User", "MyCentralAIScraperBot", "netEstate Imprint Crawler", "NovaAct", "Operator", "PanguBot", "Perplexity-User", "PhindBot", "Poseidon Research Crawler", "QualifiedBot", "QuillBot", "quillbot.com", "Scrapy", "Sidetrade indexer bot", "SummalyBot", "Thinkbot", "TikTokSpider", "Timpibot", "WARDBot", "Webzio-Extended", "wpbot"];
|
|
export const ALL_BOTS_REGEX = /(AI2BOT|AI2BOT-DOLMA|AIHITBOT|AMAZONBOT|ANDIBOT|ANTHROPIC-AI|APPLEBOT|APPLEBOT-EXTENDED|AWARIO|BEDROCKBOT|BRIGHTBOT 1.0|BYTESPIDER|CCBOT|CHATGPT-USER|CLAUDE-SEARCHBOT|CLAUDE-USER|CLAUDE-WEB|CLAUDEBOT|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|COTOYOGI|CRAWLSPACE|DATENBANK CRAWLER|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|FIRECRAWLAGENT|FRIENDLYCRAWLER|GEMINI-DEEP-RESEARCH|GOOGLE-CLOUDVERTEXBOT|GOOGLE-EXTENDED|GOOGLEOTHER|GOOGLEOTHER-IMAGE|GOOGLEOTHER-VIDEO|GPTBOT|IASKSPIDER\/2.0|ICC-CRAWLER|IMAGESIFTBOT|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MISTRALAI-USER|MISTRALAI-USER\/1.0|MYCENTRALAISCRAPERBOT|NETESTATE IMPRINT CRAWLER|NOVAACT|OAI-SEARCHBOT|OMGILI|OMGILIBOT|OPERATOR|PANGUBOT|PANSCIENT|PANSCIENT.COM|PERPLEXITY-USER|PERPLEXITYBOT|PETALBOT|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SBINTUITIONSBOT|SCRAPY|SEMRUSHBOT-OCOB|SEMRUSHBOT-SWA|SIDETRADE INDEXER BOT|SUMMALYBOT|THINKBOT|TIKTOKSPIDER|TIMPIBOT|VELENPUBLICWEBCRAWLER|WARDBOT|WEBZIO-EXTENDED|WPBOT|YANDEXADDITIONAL|YANDEXADDITIONALBOT|YOUBOT)/;
|
|
export const NON_RESPECTING_BOTS_REGEX = /(ANDIBOT|ANTHROPIC-AI|APPLEBOT|AWARIO|BRIGHTBOT 1.0|BYTESPIDER|CLAUDE-WEB|COHERE-AI|COHERE-TRAINING-DATA-CRAWLER|DATENBANK CRAWLER|DEVIN|DIFFBOT|DUCKASSISTBOT|ECHOBOT BOT|ECHOBOXBOT|FACEBOOKEXTERNALHIT|FACTSET_SPYDERBOT|GEMINI-DEEP-RESEARCH|IASKSPIDER\/2.0|IMG2DATASET|ISSCYBERRISKCRAWLER|KANGAROO BOT|META-EXTERNALAGENT|META-EXTERNALFETCHER|META-EXTERNALFETCHER|MISTRALAI-USER|MYCENTRALAISCRAPERBOT|NETESTATE IMPRINT CRAWLER|NOVAACT|OPERATOR|PANGUBOT|PERPLEXITY-USER|PHINDBOT|POSEIDON RESEARCH CRAWLER|QUALIFIEDBOT|QUILLBOT|QUILLBOT.COM|SCRAPY|SIDETRADE INDEXER BOT|SUMMALYBOT|THINKBOT|TIKTOKSPIDER|TIMPIBOT|WARDBOT|WEBZIO-EXTENDED|WPBOT)/;
|