Skip to content

Commit 6fa5bd6

Browse files
committed
add script to update internal links
1 parent dc8319b commit 6fa5bd6

1 file changed

Lines changed: 173 additions & 0 deletions

File tree

script/update-internal-links.js

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env node
2+
3+
const fs = require('fs')
4+
const walk = require('walk-sync')
5+
const path = require('path')
6+
const astFromMarkdown = require('mdast-util-from-markdown')
7+
const visit = require('unist-util-visit')
8+
const { loadPages, loadPageMap } = require('../lib/pages')
9+
const loadSiteData = require('../lib/site-data')
10+
const loadRedirects = require('../lib/redirects/precompile')
11+
const { getPathWithoutLanguage, getPathWithoutVersion } = require('../lib/path-utils')
12+
const allVersions = Object.keys(require('../lib/all-versions'))
13+
const frontmatter = require('../lib/read-frontmatter')
14+
const renderContent = require('../lib/render-content')
15+
const patterns = require('../lib/patterns')
16+
17+
const walkFiles = (pathToWalk) => {
18+
return walk(path.join(process.cwd(), pathToWalk), { includeBasePath: true, directories: false })
19+
.filter(file => file.endsWith('.md') && !file.endsWith('README.md'))
20+
.filter(file => !file.includes('/early-access/')) // ignore EA for now
21+
}
22+
23+
const allFiles = walkFiles('content').concat(walkFiles('data'))
24+
25+
// [start-readme]
26+
//
27+
// Run this script to find internal links in all content and data Markdown files, check if either the title or link
28+
// (or both) are outdated, and automatically update them if so.
29+
//
30+
// Exceptions:
31+
// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
32+
// and title will be unchanged (e.g., [Bar](/noo#bar)).
33+
// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
34+
// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
35+
// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
36+
//
37+
// [end-readme]
38+
39+
main()
40+
41+
async function main () {
42+
console.log('Working...')
43+
const pageList = await loadPages()
44+
const pageMap = await loadPageMap(pageList)
45+
const redirects = await loadRedirects(pageList)
46+
const site = await loadSiteData()
47+
48+
const context = {
49+
pages: pageMap,
50+
redirects,
51+
site: site.en.site,
52+
currentLanguage: 'en'
53+
}
54+
55+
for (const file of allFiles) {
56+
const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
57+
const ast = astFromMarkdown(content)
58+
59+
let newContent = content
60+
61+
// We can't do async functions within visit, so gather the nodes upfront
62+
const nodesPerFile = []
63+
64+
visit(ast, node => {
65+
if (node.type !== 'link') return
66+
if (!node.url.startsWith('/')) return
67+
if (node.url.startsWith('/assets')) return
68+
if (node.url.startsWith('/public')) return
69+
if (node.url.includes('/11.10.340/')) return
70+
if (node.url.includes('/2.1/')) return
71+
if (node.url === '/') return
72+
73+
nodesPerFile.push(node)
74+
})
75+
76+
// For every Markdown link...
77+
for (const node of nodesPerFile) {
78+
const oldLink = node.url
79+
const oldTitle = node.children[0].value || node.children[0].children[0].value
80+
const oldMarkdownLink = `[${oldTitle}](${oldLink})`
81+
82+
// As a blanket rule, only update links with quotes around them.
83+
// Update: "[Foo](/foo)"
84+
// Do not update: [Bar](/bar)
85+
let noQuotesAroundLink
86+
if (!newContent.includes(`"${oldMarkdownLink}`)) {
87+
noQuotesAroundLink = true
88+
}
89+
90+
let foundPage, fragmentMatch, versionMatch
91+
92+
// Run through all supported versions...
93+
for (const version of allVersions) {
94+
context.currentVersion = version
95+
// Render the link for each version using the renderContent pipeline, which incluides the rewrite-local-links plugin.
96+
const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
97+
let linkToCheck = $('a').attr('href')
98+
99+
// We need to preserve fragments and hardcoded versions if any are found.
100+
fragmentMatch = oldLink.match(/(#.*$)/)
101+
versionMatch = oldLink.match(/(enterprise-server[/@].*?)\//)
102+
103+
// Remove the fragment for now.
104+
linkToCheck = linkToCheck
105+
.replace(/#.*$/, '')
106+
.replace(patterns.trailingSlash, '$1')
107+
108+
// Try to find the rendered link in the set of pages!
109+
foundPage = findPage(linkToCheck, pageMap, redirects)
110+
111+
// Once a page is found for a particular version, exit immediately; we don't need to check the other versions
112+
// because all we care about is the page title and path.
113+
if (foundPage) {
114+
break
115+
}
116+
}
117+
118+
if (!foundPage) {
119+
console.error(`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`)
120+
process.exit(1)
121+
}
122+
123+
// If the original link includes a fragment or the original title includes Liquid, do not change;
124+
// otherwise, use the found page title. (We don't want to update the title if a fragment is found because
125+
// the title likely points to the fragment section header, not the page title.)
126+
const newTitle = fragmentMatch || oldTitle.includes('{%') || noQuotesAroundLink ? oldTitle : foundPage.title
127+
128+
// If the original link includes a fragment, append it to the found page path.
129+
// Also remove the language code because Markdown links don't include language codes.
130+
let newLink = getPathWithoutLanguage(fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path)
131+
132+
// If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
133+
// because Markdown links don't include versioning.
134+
newLink = versionMatch ? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}` : getPathWithoutVersion(newLink)
135+
136+
let newMarkdownLink = `[${newTitle}](${newLink})`
137+
138+
// Handle a few misplaced quotation marks.
139+
if (oldMarkdownLink.includes('["')) {
140+
newMarkdownLink = `"${newMarkdownLink}`
141+
}
142+
143+
// Stream the results to console as we find them.
144+
if (oldMarkdownLink !== newMarkdownLink) {
145+
console.log('old link', oldMarkdownLink)
146+
console.log('new link', newMarkdownLink)
147+
console.log('-------')
148+
}
149+
150+
newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
151+
}
152+
153+
fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
154+
}
155+
156+
console.log('Done!')
157+
}
158+
159+
function findPage (tryPath, pageMap, redirects) {
160+
if (pageMap[tryPath]) {
161+
return {
162+
title: pageMap[tryPath].title,
163+
path: tryPath
164+
}
165+
}
166+
167+
if (pageMap[redirects[tryPath]]) {
168+
return {
169+
title: pageMap[redirects[tryPath]].title,
170+
path: redirects[tryPath]
171+
}
172+
}
173+
}

0 commit comments

Comments
 (0)