271 lines
9.1 KiB
TypeScript
271 lines
9.1 KiB
TypeScript
import { Hono } from 'hono'
|
|
import { aiBots, nonRespectingAiBots, AI_ROBOTS_TXT, useAiRobotsTxt } from './ai-bots'
|
|
import {
|
|
ALL_BOTS,
|
|
NON_RESPECTING_BOTS,
|
|
ROBOTS_TXT,
|
|
ALL_BOTS_REGEX,
|
|
NON_RESPECTING_BOTS_REGEX,
|
|
} from './generated'
|
|
|
|
describe('AI Bots module', () => {
|
|
describe('aiBots export', () => {
|
|
it('Should export ALL_BOTS_REGEX from generated', () => {
|
|
expect(aiBots).toBe(ALL_BOTS_REGEX)
|
|
})
|
|
|
|
it('Should be a RegExp object', () => {
|
|
expect(aiBots instanceof RegExp).toBe(true)
|
|
expect(aiBots.source.length).toBeGreaterThan(0)
|
|
expect(aiBots.toString()).toMatch(/^\/.*\/$/)
|
|
})
|
|
|
|
it('Should include known AI bots', () => {
|
|
expect(aiBots.test('GPTBOT')).toBe(true)
|
|
expect(aiBots.test('CLAUDEBOT')).toBe(true)
|
|
expect(aiBots.test('BYTESPIDER')).toBe(true)
|
|
expect(aiBots.test('CHATGPT-USER')).toBe(true)
|
|
})
|
|
|
|
it('Should be properly formatted as a regex', () => {
|
|
expect(aiBots.source).toContain('|')
|
|
})
|
|
})
|
|
|
|
describe('nonRespectingAiBots export', () => {
|
|
it('Should export NON_RESPECTING_BOTS_REGEX from generated', () => {
|
|
expect(nonRespectingAiBots).toBe(NON_RESPECTING_BOTS_REGEX)
|
|
})
|
|
|
|
it('Should be a RegExp object', () => {
|
|
expect(nonRespectingAiBots instanceof RegExp).toBe(true)
|
|
expect(nonRespectingAiBots.source.length).toBeGreaterThan(0)
|
|
expect(nonRespectingAiBots.toString()).toMatch(/^\/.*\/$/)
|
|
})
|
|
|
|
it('Should be a subset of aiBots', () => {
|
|
// Check if all non-respecting bots are included in the aiBots pattern
|
|
// by testing the original string array against both regexes
|
|
NON_RESPECTING_BOTS.forEach((bot) => {
|
|
expect(aiBots.test(bot.toUpperCase())).toBe(true)
|
|
})
|
|
})
|
|
|
|
it('Should include known non-respecting bots', () => {
|
|
expect(nonRespectingAiBots.test('BYTESPIDER')).toBe(true)
|
|
expect(nonRespectingAiBots.test('IASKSPIDER/2.0')).toBe(true)
|
|
})
|
|
|
|
it('Should not include known respecting bots', () => {
|
|
expect(nonRespectingAiBots.test('GPTBOT')).toBe(false)
|
|
expect(nonRespectingAiBots.test('CHATGPT-USER')).toBe(false)
|
|
})
|
|
|
|
it('Should have a pattern that is shorter than aiBots pattern', () => {
|
|
expect(nonRespectingAiBots.source.length).toBeLessThan(aiBots.source.length)
|
|
})
|
|
})
|
|
|
|
describe('AI_ROBOTS_TXT export', () => {
|
|
it('Should export ROBOTS_TXT from generated', () => {
|
|
expect(AI_ROBOTS_TXT).toBe(ROBOTS_TXT)
|
|
})
|
|
|
|
it('Should be a non-empty string', () => {
|
|
expect(typeof AI_ROBOTS_TXT).toBe('string')
|
|
expect(AI_ROBOTS_TXT.length).toBeGreaterThan(0)
|
|
})
|
|
|
|
it('Should contain User-agent directives', () => {
|
|
expect(AI_ROBOTS_TXT).toContain('User-agent:')
|
|
})
|
|
|
|
it('Should contain Disallow directive', () => {
|
|
expect(AI_ROBOTS_TXT).toContain('Disallow: /')
|
|
})
|
|
|
|
it('Should have proper robots.txt format', () => {
|
|
const lines = AI_ROBOTS_TXT.split('\n')
|
|
const userAgentLines = lines.filter((line) => line.startsWith('User-agent:'))
|
|
const disallowIndex = lines.findIndex((line) => line === 'Disallow: /')
|
|
|
|
expect(userAgentLines.length).toBeGreaterThan(0)
|
|
expect(disallowIndex).toBeGreaterThan(0)
|
|
expect(lines[lines.length - 1]).toBe('')
|
|
})
|
|
|
|
it('Should include all AI bots', () => {
|
|
ALL_BOTS.forEach((bot) => {
|
|
expect(AI_ROBOTS_TXT).toContain(`User-agent: ${bot}`)
|
|
})
|
|
})
|
|
})
|
|
|
|
describe('useAiRobotsTxt function', () => {
|
|
it('Should return a function', () => {
|
|
const middleware = useAiRobotsTxt()
|
|
expect(typeof middleware).toBe('function')
|
|
})
|
|
|
|
it('Should create working Hono middleware', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
|
|
const res = await app.request('/robots.txt')
|
|
expect(res.status).toBe(200)
|
|
})
|
|
|
|
it('Should serve the correct robots.txt content', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
|
|
const res = await app.request('/robots.txt')
|
|
const content = await res.text()
|
|
|
|
expect(content).toBe(AI_ROBOTS_TXT)
|
|
})
|
|
|
|
it('Should set correct content type', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
|
|
const res = await app.request('/robots.txt')
|
|
expect(res.headers.get('Content-Type')).toBe('text/plain; charset=UTF-8')
|
|
})
|
|
|
|
it('Should work with different paths', async () => {
|
|
const app = new Hono()
|
|
app.use('/custom-robots.txt', useAiRobotsTxt())
|
|
app.use('/api/robots.txt', useAiRobotsTxt())
|
|
|
|
const res1 = await app.request('/custom-robots.txt')
|
|
const res2 = await app.request('/api/robots.txt')
|
|
|
|
expect(res1.status).toBe(200)
|
|
expect(res2.status).toBe(200)
|
|
expect(await res1.text()).toBe(AI_ROBOTS_TXT)
|
|
expect(await res2.text()).toBe(AI_ROBOTS_TXT)
|
|
})
|
|
|
|
it('Should not interfere with other routes', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
app.get('/other', (c) => c.text('other content'))
|
|
app.get('/api/data', (c) => c.json({ data: 'test' }))
|
|
|
|
const robotsRes = await app.request('/robots.txt')
|
|
const otherRes = await app.request('/other')
|
|
const apiRes = await app.request('/api/data')
|
|
|
|
expect(robotsRes.status).toBe(200)
|
|
expect(await robotsRes.text()).toBe(AI_ROBOTS_TXT)
|
|
|
|
expect(otherRes.status).toBe(200)
|
|
expect(await otherRes.text()).toBe('other content')
|
|
|
|
expect(apiRes.status).toBe(200)
|
|
expect(await apiRes.json()).toEqual({ data: 'test' })
|
|
})
|
|
|
|
it('Should handle requests with different methods', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
|
|
// GET request
|
|
const getRes = await app.request('/robots.txt', { method: 'GET' })
|
|
expect(getRes.status).toBe(200)
|
|
expect(await getRes.text()).toBe(AI_ROBOTS_TXT)
|
|
|
|
// HEAD request
|
|
const headRes = await app.request('/robots.txt', { method: 'HEAD' })
|
|
expect(headRes.status).toBe(200)
|
|
expect(headRes.headers.get('Content-Type')).toBe('text/plain; charset=UTF-8')
|
|
|
|
// POST request should also work since it's middleware
|
|
const postRes = await app.request('/robots.txt', { method: 'POST' })
|
|
expect(postRes.status).toBe(200)
|
|
expect(await postRes.text()).toBe(AI_ROBOTS_TXT)
|
|
})
|
|
|
|
it('Should serve consistent content across multiple requests', async () => {
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
|
|
const requests = Array.from({ length: 5 }, () => app.request('/robots.txt'))
|
|
const responses = await Promise.all(requests)
|
|
const contents = await Promise.all(responses.map((res) => res.text()))
|
|
|
|
responses.forEach((res) => {
|
|
expect(res.status).toBe(200)
|
|
expect(res.headers.get('Content-Type')).toBe('text/plain; charset=UTF-8')
|
|
})
|
|
|
|
contents.forEach((content) => {
|
|
expect(content).toBe(AI_ROBOTS_TXT)
|
|
})
|
|
|
|
// All contents should be identical
|
|
const uniqueContents = [...new Set(contents)]
|
|
expect(uniqueContents.length).toBe(1)
|
|
})
|
|
})
|
|
|
|
describe('Integration tests', () => {
|
|
it('Should work together with uaBlocker middleware', async () => {
|
|
// This test ensures the ai-bots module integrates well with the main uaBlocker
|
|
const { uaBlocker } = await import('./index')
|
|
|
|
const app = new Hono()
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
app.use('*', uaBlocker({ blocklist: nonRespectingAiBots }))
|
|
app.get('/', (c) => c.text('Hello World'))
|
|
|
|
// Should serve robots.txt
|
|
const robotsRes = await app.request('/robots.txt')
|
|
expect(robotsRes.status).toBe(200)
|
|
expect(await robotsRes.text()).toBe(AI_ROBOTS_TXT)
|
|
|
|
// Should block non-respecting bots
|
|
const blockedRes = await app.request('/', {
|
|
headers: { 'User-Agent': 'Bytespider' },
|
|
})
|
|
expect(blockedRes.status).toBe(403)
|
|
|
|
// Should allow respecting bots
|
|
const allowedRes = await app.request('/', {
|
|
headers: { 'User-Agent': 'GPTBot' },
|
|
})
|
|
expect(allowedRes.status).toBe(200)
|
|
expect(await allowedRes.text()).toBe('Hello World')
|
|
})
|
|
|
|
it('Should work with demo pattern', async () => {
|
|
const { uaBlocker } = await import('./index')
|
|
|
|
const app = new Hono()
|
|
app.use('*', uaBlocker({ blocklist: nonRespectingAiBots }))
|
|
app.use('/robots.txt', useAiRobotsTxt())
|
|
app.get('/', (c) => c.text('Hello World'))
|
|
|
|
// Test the same pattern as shown in demo.ts
|
|
const robotsRes = await app.request('/robots.txt')
|
|
expect(robotsRes.status).toBe(200)
|
|
expect(robotsRes.headers.get('Content-Type')).toBe('text/plain; charset=UTF-8')
|
|
|
|
const homeRes = await app.request('/')
|
|
expect(homeRes.status).toBe(200)
|
|
expect(await homeRes.text()).toBe('Hello World')
|
|
})
|
|
})
|
|
|
|
describe('Data consistency validation', () => {
|
|
it('Should have robots.txt that matches bot lists', () => {
|
|
const userAgentLines = AI_ROBOTS_TXT.split('\n')
|
|
.filter((line) => line.startsWith('User-agent:'))
|
|
.map((line) => line.replace('User-agent: ', ''))
|
|
|
|
expect(userAgentLines.sort()).toEqual(ALL_BOTS.sort())
|
|
})
|
|
})
|
|
})
|