33const path = require ( 'path' )
44const fs = require ( 'fs' )
55const linkinator = require ( 'linkinator' )
6- const dedent = require ( 'dedent' )
76const program = require ( 'commander' )
8- const { escapeRegExp } = require ( 'lodash' )
7+ const { pull } = require ( 'lodash' )
98const checker = new linkinator . LinkChecker ( )
109const rimraf = require ( 'rimraf' ) . sync
10+ const mkdirp = require ( 'mkdirp' ) . sync
1111const root = 'https://docs.github.com'
1212const englishRoot = `${ root } /en`
1313const { deprecated } = require ( '../lib/enterprise-server-releases' )
14+ const got = require ( 'got' )
15+
16+ // Links with these codes may or may not really be broken
17+ const retryStatusCodes = [ 429 , 503 , 'Undefined' ]
18+
19+ // Broken S3 image URLs result in 403s, broken docs URLs results in 404s
20+ const allBrokenStatusCodes = [ 403 , 404 , ...retryStatusCodes ]
1421
1522// [start-readme]
1623//
@@ -23,11 +30,11 @@ const { deprecated } = require('../lib/enterprise-server-releases')
2330program
2431 . description ( 'Check all links in the English docs.' )
2532 . option ( '-d, --dry-run' , 'Turn off recursion to get a fast minimal report (useful for previewing output).' )
33+ . option ( '-p, --path <PATH>' , 'Provide an optional path to check. Best used with --dry-run. If not provided, defaults to the homepage.' )
2634 . parse ( process . argv )
2735
2836// Skip excluded links defined in separate file.
2937const excludedLinks = require ( '../lib/excluded-links' )
30- . map ( link => escapeRegExp ( link ) )
3138
3239// Skip non-English content.
3340const languagesToSkip = Object . keys ( require ( '../lib/languages' ) )
@@ -40,7 +47,7 @@ const languagesToSkip = Object.keys(require('../lib/languages'))
4047const enterpriseReleasesToSkip = new RegExp ( `${ root } .+?[/@](${ deprecated . join ( '|' ) } )/` )
4148
4249const config = {
43- path : englishRoot ,
50+ path : program . path || englishRoot ,
4451 concurrency : 300 ,
4552 // If this is a dry run, turn off recursion.
4653 recurse : ! program . dryRun ,
@@ -56,36 +63,70 @@ const config = {
5663main ( )
5764
5865async function main ( ) {
59- const startTime = new Date ( )
60-
6166 // Clear and recreate a directory for logs.
6267 const logFile = path . join ( __dirname , '../.linkinator/full.log' )
6368 rimraf ( path . dirname ( logFile ) )
64- fs . mkdirSync ( path . dirname ( logFile ) , { recursive : true } )
69+ mkdirp ( path . dirname ( logFile ) )
6570
6671 // Update CLI output and append to logfile after each checked link.
6772 checker . on ( 'link' , result => {
6873 fs . appendFileSync ( logFile , JSON . stringify ( result ) + '\n' )
6974 } )
7075
7176 // Start the scan; events will be logged as they occur.
72- const result = await checker . check ( config )
77+ const result = ( await checker . check ( config ) ) . links
78+
79+ // Scan is complete! Filter the results for broken links.
80+ const brokenLinks = result
81+ . filter ( link => link . state === 'BROKEN' )
82+ // Coerce undefined status codes into strings so we can filter for them like the other status codes.
83+ . map ( link => {
84+ if ( ! link . status ) link . status = 'Undefined'
85+ return link
86+ } )
7387
74- // Scan is complete! Display the results.
75- const endTime = new Date ( )
76- const skippedLinks = result . links . filter ( x => x . state === 'SKIPPED' )
77- const brokenLinks = result . links . filter ( x => x . state === 'BROKEN' )
88+ // Links to retry individually.
89+ const linksToRetry = brokenLinks
90+ . filter ( link => retryStatusCodes . find ( retryStatusCode => link . status === retryStatusCode ) )
91+
92+ await Promise . all ( linksToRetry
93+ . map ( async ( link ) => {
94+ try {
95+ const r = await got ( link . url )
96+ // Remove the link from the list if got can access it.
97+ if ( ! allBrokenStatusCodes . find ( brokenStatusCode => r . statusCode === brokenStatusCode ) ) {
98+ pull ( brokenLinks , link )
99+ }
100+ // Do nothing if the URL is invalid, since it's already captured in the broken list.
101+ } catch ( err ) {
102+ // noop
103+ }
104+ } ) )
105+
106+ // Exit successfully if no broken links!
107+ if ( ! brokenLinks . length ) {
108+ console . log ( 'All links are good!' )
109+ process . exit ( 0 )
110+ }
78111
112+ // Format and display the results.
79113 console . log ( `${ brokenLinks . length } broken links found on docs.github.com\n` )
114+ allBrokenStatusCodes
115+ . forEach ( statusCode => displayBrokenLinks ( statusCode , brokenLinks ) )
80116
81- if ( brokenLinks . length ) {
82- console . log ( '```' )
83- brokenLinks . forEach ( brokenLinkObj => {
84- console . log ( JSON . stringify ( brokenLinkObj , null , 2 ) )
85- } )
86- console . log ( '```' )
87- process . exit ( 1 )
88- }
117+ // Exit unsuccessfully if broken links are found.
118+ process . exit ( 1 )
119+ }
120+
121+ function displayBrokenLinks ( statusCode , brokenLinks ) {
122+ const brokenLinksForStatus = brokenLinks . filter ( x => x . status === statusCode )
89123
90- process . exit ( 0 )
124+ if ( ! brokenLinksForStatus . length ) return
125+
126+ console . log ( `## Status code ${ statusCode } : Found ${ brokenLinksForStatus . length } broken links` )
127+ console . log ( '```' )
128+ brokenLinksForStatus . forEach ( brokenLinkObj => {
129+ console . log ( JSON . stringify ( brokenLinkObj , null , 2 ) )
130+ } )
131+ console . log ( '```' )
91132}
0 commit comments