Skip to content

Commit 5954e72

Browse files
authored
Merge pull request #12502 from github/repo-sync
repo sync
2 parents f149f7f + 95b1b1e commit 5954e72

4 files changed

Lines changed: 64 additions & 18 deletions

File tree

.github/workflows/create-translation-batch-pr.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ jobs:
138138

139139
- name: Check in CSV report
140140
run: |
141-
mkdir -p log
142-
csvFile=log/${{ matrix.language_code }}-resets.csv
141+
mkdir -p translations/log
142+
csvFile=translations/log/${{ matrix.language_code }}-resets.csv
143143
script/i18n/report-reset-files.js --report-type=csv --language=${{ matrix.language_code }} --log-file=/tmp/batch.log > $csvFile
144144
git add -f $csvFile && git commit -m "Check in ${{ matrix.language }} CSV report" || echo "Nothing to commit"
145145

lib/excluded-links.js

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
1-
// Linkinator treats the following as regex.
1+
/**
2+
* This file exports a mix of strings and of regexes. Linkinator relies
3+
* on this in `script/check-english-links.js` when we encounter external
4+
* links that we *specifically ignore*. That means, that URLs or patterns
5+
* mentioned in this file might appear within our content but we don't
6+
* bother checking that they actually work.
7+
*/
8+
9+
/* eslint-disable prefer-regex-literals */
10+
211
export default [
312
// Skip GitHub search links.
4-
'https://github.com/search\\?',
5-
'https://github.com/github/gitignore/search\\?',
13+
// E.g. https://github.com/search?foo=bar
14+
new RegExp('https://github\\.com/search\\?'),
15+
new RegExp('https://github\\.com/github/gitignore/search\\?'),
616

717
// These links require auth.
8-
'https://github.com/settings/profile',
9-
'https://github.com/github/docs/edit',
10-
'https://github.com/github/insights-releases/releases/latest',
11-
'https://classroom.github.com/videos',
18+
new RegExp('https://github\\.com/settings/profile'),
19+
new RegExp('https://github\\.com/github/docs/edit'),
20+
new RegExp('https://github\\.com/github/insights-releases/releases/latest'),
21+
new RegExp('https://classroom\\.github.com/videos'),
1222

1323
// Oneoff links that link checkers think are broken but are not.
1424
'https://haveibeenpwned.com/',
15-
'https://www.ilo.org/dyn/normlex/en/f\\?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
25+
'https://www.ilo.org/dyn/normlex/en/f?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
1626
'https://www.linkedin.com/company/github',
1727
'https://www.facebook.com/',
1828
'https://ko-fi.com/',

script/check-english-links.js

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ program
5252
// Skip non-English content.
5353
const languagesToSkip = Object.keys(libLanguages)
5454
.filter((code) => code !== 'en')
55-
.map((code) => `${root}/${code}`)
55+
.map((code) => new RegExp(`${root}/${code}`))
5656

5757
// Skip deprecated Enterprise content.
5858
// Capture the old format https://docs.github.com/enterprise/2.1/
@@ -66,7 +66,19 @@ const config = {
6666
recurse: !program.opts().dryRun,
6767
silent: true,
6868
// The values in this array are treated as regexes.
69-
linksToSkip: [enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks],
69+
linksToSkip: linksToSkipFactory([enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks]),
70+
}
71+
72+
// Return a function that can as quickly as possible check if a certain
73+
// href input should be skipped.
74+
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
75+
// check. The default implementation in Linkinator, if you set
76+
// the `linksToSkip` config to be an array, it will, for every URL it
77+
// checks turn that into a new regex every single time.
78+
function linksToSkipFactory(regexAndURLs) {
79+
const set = new Set(regexAndURLs.filter((regexOrURL) => typeof regexOrURL === 'string'))
80+
const regexes = regexAndURLs.filter((regexOrURL) => regexOrURL instanceof RegExp)
81+
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
7082
}
7183

7284
main()

tests/meta/repository-references.js

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import fs from 'fs'
2+
13
import walkSync from 'walk-sync'
2-
import readFileAsync from '../../lib/readfile-async.js'
34
import minimatch from 'minimatch'
45

56
/*
@@ -75,33 +76,56 @@ const REPO_REGEXP = /\/\/github\.com\/github\/(?!docs[/'"\n])([\w-.]+)/gi
7576
const IGNORE_PATHS = [
7677
'.git',
7778
'.next',
79+
'.vscode', // Not part of the repo but could be for a developer locally
7880
'node_modules',
7981
'translations',
82+
'.linkinator',
8083
'**/*.png', // Do not check images or font files.
8184
'**/*.jpg', // We could just put all of assets/* here, but that would prevent any
8285
'**/*.gif', // READMEs or other text-based files from being checked.
8386
'**/*.pdf',
8487
'**/*.ico',
8588
'**/*.woff',
89+
'**/*.csv',
90+
'**/*.br', // E.g. the search index .json.br files
91+
'**/*.graphql', // E.g. data/graphql/ghec/schema.docs.graphql
92+
'package-lock.json', // At the time of writing it's 1.5MB!
93+
'.linkinator/full.log', // Only present if you've run linkinator
94+
'lib/search/popular-pages.json', // used to build search indexes
95+
'tests/**/*.json',
8696

8797
'content/early-access', // Not committed to public repository.
8898
'data/early-access', // Not committed to public repository.
8999
'data/release-notes', // These include links to many internal issues in Liquid comments.
100+
'lib/redirects/.redirects-cache*',
90101
]
91102

92103
describe('check if a GitHub-owned private repository is referenced', () => {
93104
const filenames = walkSync(process.cwd(), {
94105
directories: false,
95106
ignore: IGNORE_PATHS,
96-
})
107+
}).filter(
108+
(filename) =>
109+
// Skip the large static json files because they're not code.
110+
!(
111+
filename.includes('static') &&
112+
(filename.endsWith('.json') || filename.endsWith('.json.br'))
113+
)
114+
)
97115

98-
test.each(filenames)('in file %s', async (filename) => {
99-
const file = await readFileAsync(filename, 'utf8')
100-
const allowDocs = ALLOW_DOCS_PATHS.some((path) => minimatch(filename, path))
116+
test.each(filenames)('in file %s', (filename) => {
117+
// When you're reading many small files, it's faster to do it
118+
// *synchronously* because the event-loop overhead is less since
119+
// the disk I/O is sufficiently small.
120+
const file = fs.readFileSync(filename, 'utf8')
101121
const matches = Array.from(file.matchAll(REPO_REGEXP))
102122
.map(([, repoName]) => repoName)
103123
.filter((repoName) => !PUBLIC_REPOS.has(repoName))
104-
.filter((repoName) => !(allowDocs && repoName.startsWith('docs')))
124+
.filter((repoName) => {
125+
return !(
126+
repoName.startsWith('docs') && ALLOW_DOCS_PATHS.some((path) => minimatch(filename, path))
127+
)
128+
})
105129
expect(
106130
matches,
107131
`Please edit ${filename} to remove references to ${matches.join(', ')}`

0 commit comments

Comments
 (0)