Skip to content

Commit c62f49a

Browse files
authored
Block indexing not crawling (#17044)
* Block indexing instead of crawling * Lint * Update deprecated-enterprise-versions.js * Combine loops
1 parent d507eec commit c62f49a

8 files changed

Lines changed: 162 additions & 127 deletions

File tree

middleware/archived-enterprise-versions-assets.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ module.exports = async (req, res, next) => {
2525
res.set('content-type', r.headers['content-type'])
2626
res.set('content-length', r.headers['content-length'])
2727
res.set('x-is-archived', 'true')
28-
res.set('x-robots-tag', 'none')
28+
res.set('x-robots-tag', 'noindex')
2929
res.send(r.body)
3030
} catch (err) {
3131
next()

middleware/archived-enterprise-versions.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ module.exports = async (req, res, next) => {
3636
try {
3737
const r = await got(getProxyPath(req.path, requestedVersion))
3838
res.set('content-type', r.headers['content-type'])
39-
res.set('x-robots-tag', 'none')
39+
res.set('x-robots-tag', 'noindex')
4040

4141
// make the stubbed redirect files added in >=2.18 return 301 instead of 200
4242
const staticRedirect = r.body.match(patterns.staticRedirect)

middleware/block-robots.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
const languages = require('../lib/languages')
2+
const products = require('../lib/all-products')
3+
const { deprecated } = require('../lib/enterprise-server-releases.js')
4+
5+
const pathRegExps = [
6+
// Disallow indexing of WIP localized content
7+
...Object.values(languages)
8+
.filter(language => language.wip)
9+
.map(language => new RegExp(`^/${language.code}(/.*)?$`, 'i')),
10+
11+
// Disallow indexing of WIP products
12+
...Object.values(products)
13+
.filter(product => product.wip || product.hidden)
14+
.map(product => [
15+
new RegExp(`^/.*?${product.href}`, 'i'),
16+
...product.versions.map(
17+
version => new RegExp(`^/.*?${version}/${product.id}`, 'i')
18+
)
19+
]),
20+
21+
// Disallow indexing of deprecated enterprise versions
22+
...deprecated
23+
.map(version => [
24+
new RegExp(`^/.*?/enterprise-server@${version}/.*?`, 'i'),
25+
new RegExp(`^/.*?/enterprise/${version}/.*?`, 'i')
26+
])
27+
].flat()
28+
29+
function blockIndex (path) {
30+
return pathRegExps.some(pathRe => pathRe.test(path))
31+
}
32+
33+
const middleware = (req, res, next) => {
34+
if (blockIndex(req.path)) res.set('x-robots-tag', 'noindex')
35+
return next()
36+
}
37+
38+
middleware.blockIndex = blockIndex
39+
40+
module.exports = middleware

middleware/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ module.exports = function (app) {
5656

5757
// *** Config and context for rendering ***
5858
app.use(require('./find-page')) // Must come before archived-enterprise-versions, breadcrumbs, featured-links, products, render-page
59+
app.use(require('./block-robots'))
5960

6061
// *** Rendering, 2xx responses ***
6162
// I largely ordered these by use frequency

middleware/robots.js

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,4 @@
1-
const languages = require('../lib/languages')
2-
const products = require('../lib/all-products')
3-
const { deprecated } = require('../lib/enterprise-server-releases.js')
4-
5-
let defaultResponse = 'User-agent: *'
6-
7-
// Disallow crawling of WIP localized content
8-
Object.values(languages)
9-
.filter(language => language.wip)
10-
.forEach(language => {
11-
defaultResponse = defaultResponse.concat(`\nDisallow: /${language.code}\nDisallow: /${language.code}/*\n`)
12-
})
13-
14-
// Disallow crawling of WIP products
15-
Object.values(products)
16-
.filter(product => product.wip || product.hidden)
17-
.forEach(product => {
18-
defaultResponse = defaultResponse.concat(`\nDisallow: /*${product.href}`)
19-
product.versions.forEach(version => {
20-
defaultResponse = defaultResponse.concat(`\nDisallow: /*${version}/${product.id}`)
21-
})
22-
})
23-
24-
// Disallow crawling of Deprecated enterprise versions
25-
deprecated
26-
.forEach(version => {
27-
defaultResponse = defaultResponse
28-
.concat(`\nDisallow: /*/enterprise-server@${version}/*`)
29-
.concat(`\nDisallow: /*/enterprise/${version}/*`)
30-
})
1+
const defaultResponse = 'User-agent: *'
312

323
const disallowAll = `User-agent: *
334
Disallow: /`

tests/rendering/block-robots.js

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
const { blockIndex } = require('../../middleware/block-robots')
2+
const languages = require('../../lib/languages')
3+
const products = require('../../lib/all-products')
4+
const enterpriseServerReleases = require('../../lib/enterprise-server-releases')
5+
6+
function allowIndex (path) {
7+
return !blockIndex(path)
8+
}
9+
10+
describe('block robots', () => {
11+
it('allows crawling of the homepage and English content', async () => {
12+
expect(allowIndex('/')).toBe(true)
13+
expect(allowIndex('/en')).toBe(true)
14+
expect(allowIndex('/en/articles/verifying-your-email-address')).toBe(true)
15+
})
16+
17+
it('allows crawling of generally available localized content', async () => {
18+
Object.values(languages)
19+
.filter(language => !language.wip)
20+
.forEach(language => {
21+
expect(allowIndex(`/${language.code}`)).toBe(true)
22+
expect(allowIndex(`/${language.code}/articles/verifying-your-email-address`)).toBe(true)
23+
})
24+
})
25+
26+
it('disallows crawling of WIP localized content', async () => {
27+
Object.values(languages)
28+
.filter(language => language.wip)
29+
.forEach(language => {
30+
expect(allowIndex(`/${language.code}`)).toBe(false)
31+
expect(allowIndex(`/${language.code}/articles/verifying-your-email-address`)).toBe(false)
32+
})
33+
})
34+
35+
it('disallows crawling of WIP products', async () => {
36+
const wipProductIds = Object.values(products)
37+
.filter(product => product.wip)
38+
.map(product => product.id)
39+
40+
wipProductIds.forEach(id => {
41+
const { href } = products[id]
42+
const blockedPaths = [
43+
// English
44+
`/en${href}`,
45+
`/en${href}/overview`,
46+
`/en${href}/overview/intro`,
47+
`/en/enterprise/${enterpriseServerReleases.latest}/user${href}`,
48+
`/en/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`,
49+
50+
// Japanese
51+
`/ja${href}`,
52+
`/ja${href}/overview`,
53+
`/ja${href}/overview/intro`,
54+
`/ja/enterprise/${enterpriseServerReleases.latest}/user${href}`,
55+
`/ja/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`
56+
]
57+
58+
blockedPaths.forEach(path => {
59+
expect(allowIndex(path)).toBe(false)
60+
})
61+
})
62+
})
63+
64+
it('disallows crawling of early access "hidden" products', async () => {
65+
const hiddenProductIds = Object.values(products)
66+
.filter(product => product.hidden)
67+
.map(product => product.id)
68+
69+
hiddenProductIds.forEach(id => {
70+
const { versions } = products[id]
71+
const blockedPaths = versions.map(version => {
72+
return [
73+
// English
74+
`/en/${version}/${id}`,
75+
`/en/${version}/${id}/some-early-access-article`,
76+
// Japanese
77+
`/ja/${version}/${id}`,
78+
`/ja/${version}/${id}/some-early-access-article`
79+
]
80+
}).flat()
81+
82+
blockedPaths.forEach(path => {
83+
expect(allowIndex(path)).toBe(false)
84+
})
85+
})
86+
})
87+
88+
it('allows crawling of non-WIP products', async () => {
89+
expect('actions' in products).toBe(true)
90+
expect(allowIndex('/en/actions')).toBe(true)
91+
expect(allowIndex('/en/actions/overview')).toBe(true)
92+
expect(allowIndex('/en/actions/overview/intro')).toBe(true)
93+
expect(allowIndex(`/en/enterprise/${enterpriseServerReleases.latest}/user/actions`)).toBe(true)
94+
expect(allowIndex(`/en/enterprise/${enterpriseServerReleases.oldestSupported}/user/actions`)).toBe(true)
95+
})
96+
97+
it('disallows crawling of deprecated enterprise releases', async () => {
98+
enterpriseServerReleases.deprecated.forEach(version => {
99+
const blockedPaths = [
100+
// English
101+
`/en/enterprise-server@${version}/actions`,
102+
`/en/enterprise/${version}/actions`,
103+
`/en/enterprise-server@${version}/actions/overview`,
104+
`/en/enterprise/${version}/actions/overview`,
105+
// Japanese
106+
`/ja/enterprise-server@${version}/actions`,
107+
`/ja/enterprise/${version}/actions`,
108+
`/ja/enterprise-server@${version}/actions/overview`,
109+
`/ja/enterprise/${version}/actions/overview`
110+
]
111+
112+
blockedPaths.forEach(path => {
113+
expect(allowIndex(path)).toBe(false)
114+
})
115+
})
116+
})
117+
})

tests/rendering/robots-txt.js

Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ const robotsParser = require('robots-parser')
33
const robotsMiddleware = require('../../middleware/robots')
44
const { get } = require('../helpers/supertest')
55
const MockExpressResponse = require('mock-express-response')
6-
const products = require('../../lib/all-products')
7-
const enterpriseServerReleases = require('../../lib/enterprise-server-releases')
86

97
describe('robots.txt', () => {
108
jest.setTimeout(5 * 60 * 1000)
@@ -31,15 +29,6 @@ describe('robots.txt', () => {
3129
})
3230
})
3331

34-
it('disallows indexing of WIP localized content', async () => {
35-
Object.values(languages)
36-
.filter(language => language.wip)
37-
.forEach(language => {
38-
expect(robots.isAllowed(`https://docs.github.com/${language.code}`)).toBe(false)
39-
expect(robots.isAllowed(`https://docs.github.com/${language.code}/articles/verifying-your-email-address`)).toBe(false)
40-
})
41-
})
42-
4332
it('disallows indexing of herokuapp.com domains', async () => {
4433
const req = {
4534
hostname: 'docs-internal-12345--my-branch.herokuapp.com',
@@ -52,89 +41,6 @@ describe('robots.txt', () => {
5241
expect(res._getString()).toEqual('User-agent: *\nDisallow: /')
5342
})
5443

55-
it('disallows indexing of WIP products', async () => {
56-
const wipProductIds = Object.values(products)
57-
.filter(product => product.wip)
58-
.map(product => product.id)
59-
60-
wipProductIds.forEach(id => {
61-
const { href } = products[id]
62-
const blockedPaths = [
63-
// English
64-
`https://docs.github.com/en${href}`,
65-
`https://docs.github.com/en${href}/overview`,
66-
`https://docs.github.com/en${href}/overview/intro`,
67-
`https://docs.github.com/en/enterprise/${enterpriseServerReleases.latest}/user${href}`,
68-
`https://docs.github.com/en/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`,
69-
70-
// Japanese
71-
`https://docs.github.com/ja${href}`,
72-
`https://docs.github.com/ja${href}/overview`,
73-
`https://docs.github.com/ja${href}/overview/intro`,
74-
`https://docs.github.com/ja/enterprise/${enterpriseServerReleases.latest}/user${href}`,
75-
`https://docs.github.com/ja/enterprise/${enterpriseServerReleases.oldestSupported}/user${href}`
76-
]
77-
78-
blockedPaths.forEach(path => {
79-
expect(robots.isAllowed(path)).toBe(false)
80-
})
81-
})
82-
})
83-
84-
it('disallows indexing of early access "hidden" products', async () => {
85-
const hiddenProductIds = Object.values(products)
86-
.filter(product => product.hidden)
87-
.map(product => product.id)
88-
89-
hiddenProductIds.forEach(id => {
90-
const { versions } = products[id]
91-
const blockedPaths = versions.map(version => {
92-
return [
93-
// English
94-
`https://docs.github.com/en/${version}/${id}`,
95-
`https://docs.github.com/en/${version}/${id}/some-early-access-article`,
96-
// Japanese
97-
`https://docs.github.com/ja/${version}/${id}`,
98-
`https://docs.github.com/ja/${version}/${id}/some-early-access-article`
99-
]
100-
}).flat()
101-
102-
blockedPaths.forEach(path => {
103-
expect(robots.isAllowed(path)).toBe(false)
104-
})
105-
})
106-
})
107-
108-
it('allows indexing of non-WIP products', async () => {
109-
expect('actions' in products).toBe(true)
110-
expect(robots.isAllowed('https://docs.github.com/en/actions')).toBe(true)
111-
expect(robots.isAllowed('https://docs.github.com/en/actions/overview')).toBe(true)
112-
expect(robots.isAllowed('https://docs.github.com/en/actions/overview/intro')).toBe(true)
113-
expect(robots.isAllowed(`https://docs.github.com/en/enterprise/${enterpriseServerReleases.latest}/user/actions`)).toBe(true)
114-
expect(robots.isAllowed(`https://docs.github.com/en/enterprise/${enterpriseServerReleases.oldestSupported}/user/actions`)).toBe(true)
115-
})
116-
117-
it('disallows indexing of deprecated enterprise releases', async () => {
118-
enterpriseServerReleases.deprecated.forEach(version => {
119-
const blockedPaths = [
120-
// English
121-
`https://docs.github.com/en/enterprise-server@${version}/actions`,
122-
`https://docs.github.com/en/enterprise/${version}/actions`,
123-
`https://docs.github.com/en/enterprise-server@${version}/actions/overview`,
124-
`https://docs.github.com/en/enterprise/${version}/actions/overview`,
125-
// Japanese
126-
`https://docs.github.com/ja/enterprise-server@${version}/actions`,
127-
`https://docs.github.com/ja/enterprise/${version}/actions`,
128-
`https://docs.github.com/ja/enterprise-server@${version}/actions/overview`,
129-
`https://docs.github.com/ja/enterprise/${version}/actions/overview`
130-
]
131-
132-
blockedPaths.forEach(path => {
133-
expect(robots.isAllowed(path)).toBe(false)
134-
})
135-
})
136-
})
137-
13844
it('does not have duplicate lines', () => {
13945
const lines = new Set()
14046
for (const line of res.text.split('\n')) {

tests/routing/deprecated-enterprise-versions.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ describe('enterprise deprecation', () => {
4040
test('sets the expected x-robots-tag header for deprecated Enterprise pages', async () => {
4141
const res = await get('/en/enterprise/2.13/user/articles/about-branches')
4242
expect(res.statusCode).toBe(200)
43-
expect(res.get('x-robots-tag')).toBe('none')
43+
expect(res.get('x-robots-tag')).toBe('noindex')
4444
})
4545

4646
test('handles requests for deprecated Enterprise pages ( <2.13 )', async () => {

0 commit comments

Comments
 (0)