Skip to content

Commit 4daa98a

Browse files
heiskrCopilot
andauthored
Rewrite count-translation-corruptions to output structured JSON (#60117)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent e5739d3 commit 4daa98a

File tree

1 file changed

+117
-71
lines changed

1 file changed

+117
-71
lines changed

src/languages/scripts/count-translation-corruptions.ts

Lines changed: 117 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import path from 'path'
22
import fs from 'fs'
33

44
import { program } from 'commander'
5-
import chalk from 'chalk'
65
import { TokenizationError } from 'liquidjs'
76
import walk from 'walk-sync'
87

@@ -13,42 +12,112 @@ import type { Site } from '@/types'
1312
import { correctTranslatedContentStrings } from '@/languages/lib/correct-translation-content'
1413

1514
program
16-
.description('Tally the number of liquid corruptions in a translation')
15+
.description('Tally the number of liquid corruptions in a translation. Outputs JSON to stdout.')
1716
.argument('[language...]', 'language(s) to compare against')
1817
.action(main)
1918
program.parse(process.argv)
2019

2120
type Reusables = Map<string, string>
2221

22+
interface CorruptionEntry {
23+
file: string
24+
location: string
25+
error: string
26+
illegalTag?: string
27+
}
28+
29+
interface LanguageResult {
30+
language: string
31+
languageName: string
32+
total: number
33+
corruptions: CorruptionEntry[]
34+
byLocation: Record<string, number>
35+
topIllegalTags: Array<{ tag: string; count: number }>
36+
}
37+
38+
interface FrontmatterError {
39+
file: string
40+
message: string
41+
}
42+
43+
interface CorruptionReport {
44+
hasFailures: boolean
45+
totalCount: number
46+
frontmatterErrors: FrontmatterError[]
47+
languages: LanguageResult[]
48+
}
49+
2350
async function main(languageCodes: string[]) {
24-
const langCodes = languageCodes.length
25-
? languageCodes
26-
: Object.keys(languages).filter((x) => x !== 'en')
27-
const site = await warmServer(languageCodes.length ? ['en', ...langCodes] : [])
51+
// Suppress warmServer noise (frontmatter errors from translations)
52+
// and capture them as structured data instead
53+
const originalError = console.error
54+
const originalWarn = console.warn
55+
const originalLog = console.log
56+
const suppressedErrors: string[] = []
57+
console.error = (...args: unknown[]) => {
58+
suppressedErrors.push(args.map(String).join(' '))
59+
}
60+
console.warn = (...args: unknown[]) => {
61+
suppressedErrors.push(args.map(String).join(' '))
62+
}
63+
console.log = () => {}
2864

29-
// When checking reusables, we only want to check the files that
30-
// have an English equivalent.
31-
const reusables = getReusables()
65+
let langCodes: string[]
66+
let site: Site
67+
let reusables: Reusables
3268

33-
const totalErrors = new Map<string, number>()
69+
try {
70+
langCodes = languageCodes.length
71+
? languageCodes
72+
: Object.keys(languages).filter((x) => x !== 'en')
3473

35-
for (const languageCode of langCodes) {
36-
if (!(languageCode in languages)) {
37-
console.error(chalk.red(`Language ${languageCode} not found`))
38-
return process.exit(1)
39-
}
40-
if (languageCode === 'en') {
41-
console.error(chalk.red("Can't test in English ('en')"))
42-
return process.exit(1)
43-
}
44-
const { errors } = run(languageCode, site, reusables)
45-
for (const [error, count] of Array.from(errors.entries())) {
46-
totalErrors.set(error, (totalErrors.get(error) || 0) + count)
74+
for (const code of langCodes) {
75+
if (!(code in languages)) {
76+
throw new Error(`Language ${code} not found`)
77+
}
78+
if (code === 'en') {
79+
throw new Error("Can't test in English ('en')")
80+
}
4781
}
82+
83+
site = await warmServer(languageCodes.length ? ['en', ...langCodes] : [])
84+
reusables = getReusables()
85+
} finally {
86+
console.error = originalError
87+
console.warn = originalWarn
88+
console.log = originalLog
4889
}
4990

50-
const sumTotal = Array.from(totalErrors.values()).reduce((acc, count) => acc + count, 0)
51-
console.log('\nGRAND TOTAL ERRORS:', sumTotal)
91+
const frontmatterErrors = parseFrontmatterErrors(suppressedErrors)
92+
const languageResults: LanguageResult[] = []
93+
94+
for (const languageCode of langCodes) {
95+
languageResults.push(run(languageCode, site, reusables))
96+
}
97+
98+
const totalCount = languageResults.reduce((sum, r) => sum + r.total, 0)
99+
100+
const report: CorruptionReport = {
101+
hasFailures: totalCount > 0 || frontmatterErrors.length > 0,
102+
totalCount,
103+
frontmatterErrors,
104+
languages: languageResults,
105+
}
106+
107+
console.log(JSON.stringify(report, null, 2))
108+
}
109+
110+
function parseFrontmatterErrors(suppressedErrors: string[]): FrontmatterError[] {
111+
const errors: FrontmatterError[] = []
112+
const seen = new Set<string>()
113+
for (const err of suppressedErrors) {
114+
const match = err.match(/translations\/[^\s']+/)
115+
if (match && !seen.has(match[0])) {
116+
seen.add(match[0])
117+
errors.push({ file: match[0], message: 'YML parsing error' })
118+
}
119+
}
120+
return errors
52121
}
53122

54123
function getReusables(): Reusables {
@@ -65,37 +134,32 @@ function getReusables(): Reusables {
65134
return reusables
66135
}
67136

68-
function run(languageCode: string, site: Site, englishReusables: Reusables) {
69-
const PADDING = 60
137+
function run(languageCode: string, site: Site, englishReusables: Reusables): LanguageResult {
70138
const language = languages[languageCode as keyof typeof languages]
71139

72-
console.log(`--- Tallying liquid corruptions in ${languageCode} (${language.name}) ---`)
73-
74-
const pageList = site.pageList
75-
const errors = new Map<string, number>()
140+
const corruptions: CorruptionEntry[] = []
76141
const wheres = new Map<string, number>()
77142
const illegalTags = new Map<string, number>()
78143

79-
function countError(error: TokenizationError, where: string) {
80-
// TokenizationError from liquidjs may have originalError and token.content
81-
// but these aren't in the public type definitions
144+
function countError(error: TokenizationError, where: string, file: string) {
82145
const errorWithExtras = error as TokenizationError & {
83146
originalError?: Error
84147
token?: { content?: string }
85148
}
86149
const originalError = errorWithExtras.originalError
87150
const errorString = originalError ? originalError.message : error.message
151+
152+
let illegalTag: string | undefined
88153
if (errorString.includes('illegal tag syntax') && errorWithExtras.token?.content) {
89-
illegalTags.set(
90-
errorWithExtras.token.content,
91-
(illegalTags.get(errorWithExtras.token.content) || 0) + 1,
92-
)
154+
illegalTag = errorWithExtras.token.content
155+
illegalTags.set(illegalTag, (illegalTags.get(illegalTag) || 0) + 1)
93156
}
94-
errors.set(errorString, (errors.get(errorString) || 0) + 1)
157+
158+
corruptions.push({ file, location: where, error: errorString, illegalTag })
95159
wheres.set(where, (wheres.get(where) || 0) + 1)
96160
}
97161

98-
for (const page of pageList) {
162+
for (const page of site.pageList) {
99163
if (page.languageCode !== languageCode) continue
100164

101165
const strings: string[][] = [
@@ -110,7 +174,7 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
110174
getLiquidTokens(string)
111175
} catch (error) {
112176
if (error instanceof TokenizationError) {
113-
countError(error, where)
177+
countError(error, where, page.relativePath)
114178
} else {
115179
throw error
116180
}
@@ -129,7 +193,7 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
129193
getLiquidTokens(correctedContent)
130194
} catch (error) {
131195
if (error instanceof TokenizationError) {
132-
countError(error, 'reusable')
196+
countError(error, 'reusable', relativePath)
133197
} else if (error instanceof Error && error.message.startsWith('ENOENT')) {
134198
continue
135199
} else {
@@ -138,35 +202,17 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
138202
}
139203
}
140204

141-
const flat = Array.from(errors.entries()).sort((a, b) => b[1] - a[1])
142-
const sumTotal = flat.reduce((acc, [, count]) => acc + count, 0)
143-
144-
console.log('\nMost common errors')
145-
for (let i = 0; i < flat.length; i++) {
146-
const [error, count] = flat[i]
147-
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
205+
const topIllegalTags = Array.from(illegalTags.entries())
206+
.sort((a, b) => b[1] - a[1])
207+
.slice(0, 10)
208+
.map(([tag, count]) => ({ tag, count }))
209+
210+
return {
211+
language: languageCode,
212+
languageName: language.name,
213+
total: corruptions.length,
214+
corruptions,
215+
byLocation: Object.fromEntries(wheres),
216+
topIllegalTags,
148217
}
149-
console.log(`${'TOTAL:'.padEnd(3 + 1 + PADDING)}`, sumTotal)
150-
151-
if (sumTotal) {
152-
const whereFlat = Array.from(wheres.entries()).sort((a, b) => b[1] - a[1])
153-
console.log('\nMost common places')
154-
for (let i = 0; i < whereFlat.length; i++) {
155-
const [error, count] = whereFlat[i]
156-
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
157-
}
158-
159-
const illegalTagsFlat = Array.from(illegalTags.entries()).sort((a, b) => b[1] - a[1])
160-
if (illegalTagsFlat.reduce((acc, [, count]) => acc + count, 0)) {
161-
console.log('\nMost common illegal tags', illegalTagsFlat.length > 10 ? ' (Top 10)' : '')
162-
const topIllegalTags = illegalTagsFlat.slice(0, 10)
163-
for (let i = 0; i < topIllegalTags.length; i++) {
164-
const [error, count] = topIllegalTags[i]
165-
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
166-
}
167-
}
168-
}
169-
console.log('\n')
170-
171-
return { errors }
172218
}

0 commit comments

Comments
 (0)