Skip to content

Commit 1b5153c

Browse files
committed
add retrying step and add better formatting
1 parent 472c872 commit 1b5153c

1 file changed

Lines changed: 62 additions & 21 deletions

File tree

script/check-english-links.js

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,21 @@
33
const path = require('path')
44
const fs = require('fs')
55
const linkinator = require('linkinator')
6-
const dedent = require('dedent')
76
const program = require('commander')
8-
const { escapeRegExp } = require('lodash')
7+
const { pull } = require('lodash')
98
const checker = new linkinator.LinkChecker()
109
const rimraf = require('rimraf').sync
10+
const mkdirp = require('mkdirp').sync
1111
const root = 'https://docs.github.com'
1212
const englishRoot = `${root}/en`
1313
const { deprecated } = require('../lib/enterprise-server-releases')
14+
const got = require('got')
15+
16+
// Links with these codes may or may not really be broken
17+
const retryStatusCodes = [429, 503, 'Undefined']
18+
19+
// Broken S3 image URLs result in 403s, broken docs URLs results in 404s
20+
const allBrokenStatusCodes = [403, 404, ...retryStatusCodes]
1421

1522
// [start-readme]
1623
//
@@ -23,11 +30,11 @@ const { deprecated } = require('../lib/enterprise-server-releases')
2330
program
2431
.description('Check all links in the English docs.')
2532
.option('-d, --dry-run', 'Turn off recursion to get a fast minimal report (useful for previewing output).')
33+
.option('-p, --path <PATH>', 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.')
2634
.parse(process.argv)
2735

2836
// Skip excluded links defined in separate file.
2937
const excludedLinks = require('../lib/excluded-links')
30-
.map(link => escapeRegExp(link))
3138

3239
// Skip non-English content.
3340
const languagesToSkip = Object.keys(require('../lib/languages'))
@@ -40,7 +47,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
4047
const enterpriseReleasesToSkip = new RegExp(`${root}.+?[/@](${deprecated.join('|')})/`)
4148

4249
const config = {
43-
path: englishRoot,
50+
path: program.path || englishRoot,
4451
concurrency: 300,
4552
// If this is a dry run, turn off recursion.
4653
recurse: !program.dryRun,
@@ -56,36 +63,70 @@ const config = {
5663
main()
5764

5865
async function main () {
59-
const startTime = new Date()
60-
6166
// Clear and recreate a directory for logs.
6267
const logFile = path.join(__dirname, '../.linkinator/full.log')
6368
rimraf(path.dirname(logFile))
64-
fs.mkdirSync(path.dirname(logFile), { recursive: true })
69+
mkdirp(path.dirname(logFile))
6570

6671
// Update CLI output and append to logfile after each checked link.
6772
checker.on('link', result => {
6873
fs.appendFileSync(logFile, JSON.stringify(result) + '\n')
6974
})
7075

7176
// Start the scan; events will be logged as they occur.
72-
const result = await checker.check(config)
77+
const result = (await checker.check(config)).links
78+
79+
// Scan is complete! Filter the results for broken links.
80+
const brokenLinks = result
81+
.filter(link => link.state === 'BROKEN')
82+
// Coerce undefined status codes into strings so we can filter for them like the other status codes.
83+
.map(link => {
84+
if (!link.status) link.status = 'Undefined'
85+
return link
86+
})
7387

74-
// Scan is complete! Display the results.
75-
const endTime = new Date()
76-
const skippedLinks = result.links.filter(x => x.state === 'SKIPPED')
77-
const brokenLinks = result.links.filter(x => x.state === 'BROKEN')
88+
// Links to retry individually.
89+
const linksToRetry = brokenLinks
90+
.filter(link => retryStatusCodes.find(retryStatusCode => link.status === retryStatusCode))
91+
92+
await Promise.all(linksToRetry
93+
.map(async (link) => {
94+
try {
95+
const r = await got(link.url)
96+
// Remove the link from the list if got can access it.
97+
if (!allBrokenStatusCodes.find(brokenStatusCode => r.statusCode === brokenStatusCode)) {
98+
pull(brokenLinks, link)
99+
}
100+
// Do nothing if the URL is invalid, since it's already captured in the broken list.
101+
} catch (err) {
102+
// noop
103+
}
104+
}))
105+
106+
// Exit successfully if no broken links!
107+
if (!brokenLinks.length) {
108+
console.log('All links are good!')
109+
process.exit(0)
110+
}
78111

112+
// Format and display the results.
79113
console.log(`${brokenLinks.length} broken links found on docs.github.com\n`)
114+
allBrokenStatusCodes
115+
.forEach(statusCode => displayBrokenLinks(statusCode, brokenLinks))
80116

81-
if (brokenLinks.length) {
82-
console.log('```')
83-
brokenLinks.forEach(brokenLinkObj => {
84-
console.log(JSON.stringify(brokenLinkObj, null, 2))
85-
})
86-
console.log('```')
87-
process.exit(1)
88-
}
117+
// Exit unsuccessfully if broken links are found.
118+
process.exit(1)
119+
}
120+
121+
function displayBrokenLinks (statusCode, brokenLinks) {
122+
const brokenLinksForStatus = brokenLinks.filter(x => x.status === statusCode)
89123

90-
process.exit(0)
124+
if (!brokenLinksForStatus.length) return
125+
126+
console.log(`## Status code ${statusCode}: Found ${brokenLinksForStatus.length} broken links`)
127+
console.log('```')
128+
brokenLinksForStatus.forEach(brokenLinkObj => {
129+
console.log(JSON.stringify(brokenLinkObj, null, 2))
130+
})
131+
console.log('```')
91132
}

0 commit comments

Comments
 (0)