@@ -2,7 +2,6 @@ import path from 'path'
22import fs from 'fs'
33
44import { program } from 'commander'
5- import chalk from 'chalk'
65import { TokenizationError } from 'liquidjs'
76import walk from 'walk-sync'
87
@@ -13,42 +12,112 @@ import type { Site } from '@/types'
1312import { correctTranslatedContentStrings } from '@/languages/lib/correct-translation-content'
1413
1514program
16- . description ( 'Tally the number of liquid corruptions in a translation' )
15+ . description ( 'Tally the number of liquid corruptions in a translation. Outputs JSON to stdout. ' )
1716 . argument ( '[language...]' , 'language(s) to compare against' )
1817 . action ( main )
1918program . parse ( process . argv )
2019
2120type Reusables = Map < string , string >
2221
22+ interface CorruptionEntry {
23+ file : string
24+ location : string
25+ error : string
26+ illegalTag ?: string
27+ }
28+
29+ interface LanguageResult {
30+ language : string
31+ languageName : string
32+ total : number
33+ corruptions : CorruptionEntry [ ]
34+ byLocation : Record < string , number >
35+ topIllegalTags : Array < { tag : string ; count : number } >
36+ }
37+
38+ interface FrontmatterError {
39+ file : string
40+ message : string
41+ }
42+
43+ interface CorruptionReport {
44+ hasFailures : boolean
45+ totalCount : number
46+ frontmatterErrors : FrontmatterError [ ]
47+ languages : LanguageResult [ ]
48+ }
49+
2350async function main ( languageCodes : string [ ] ) {
24- const langCodes = languageCodes . length
25- ? languageCodes
26- : Object . keys ( languages ) . filter ( ( x ) => x !== 'en' )
27- const site = await warmServer ( languageCodes . length ? [ 'en' , ...langCodes ] : [ ] )
51+ // Suppress warmServer noise (frontmatter errors from translations)
52+ // and capture them as structured data instead
53+ const originalError = console . error
54+ const originalWarn = console . warn
55+ const originalLog = console . log
56+ const suppressedErrors : string [ ] = [ ]
57+ console . error = ( ...args : unknown [ ] ) => {
58+ suppressedErrors . push ( args . map ( String ) . join ( ' ' ) )
59+ }
60+ console . warn = ( ...args : unknown [ ] ) => {
61+ suppressedErrors . push ( args . map ( String ) . join ( ' ' ) )
62+ }
63+ console . log = ( ) => { }
2864
29- // When checking reusables, we only want to check the files that
30- // have an English equivalent.
31- const reusables = getReusables ( )
65+ let langCodes : string [ ]
66+ let site : Site
67+ let reusables : Reusables
3268
33- const totalErrors = new Map < string , number > ( )
69+ try {
70+ langCodes = languageCodes . length
71+ ? languageCodes
72+ : Object . keys ( languages ) . filter ( ( x ) => x !== 'en' )
3473
35- for ( const languageCode of langCodes ) {
36- if ( ! ( languageCode in languages ) ) {
37- console . error ( chalk . red ( `Language ${ languageCode } not found` ) )
38- return process . exit ( 1 )
39- }
40- if ( languageCode === 'en' ) {
41- console . error ( chalk . red ( "Can't test in English ('en')" ) )
42- return process . exit ( 1 )
43- }
44- const { errors } = run ( languageCode , site , reusables )
45- for ( const [ error , count ] of Array . from ( errors . entries ( ) ) ) {
46- totalErrors . set ( error , ( totalErrors . get ( error ) || 0 ) + count )
74+ for ( const code of langCodes ) {
75+ if ( ! ( code in languages ) ) {
76+ throw new Error ( `Language ${ code } not found` )
77+ }
78+ if ( code === 'en' ) {
79+ throw new Error ( "Can't test in English ('en')" )
80+ }
4781 }
82+
83+ site = await warmServer ( languageCodes . length ? [ 'en' , ...langCodes ] : [ ] )
84+ reusables = getReusables ( )
85+ } finally {
86+ console . error = originalError
87+ console . warn = originalWarn
88+ console . log = originalLog
4889 }
4990
50- const sumTotal = Array . from ( totalErrors . values ( ) ) . reduce ( ( acc , count ) => acc + count , 0 )
51- console . log ( '\nGRAND TOTAL ERRORS:' , sumTotal )
91+ const frontmatterErrors = parseFrontmatterErrors ( suppressedErrors )
92+ const languageResults : LanguageResult [ ] = [ ]
93+
94+ for ( const languageCode of langCodes ) {
95+ languageResults . push ( run ( languageCode , site , reusables ) )
96+ }
97+
98+ const totalCount = languageResults . reduce ( ( sum , r ) => sum + r . total , 0 )
99+
100+ const report : CorruptionReport = {
101+ hasFailures : totalCount > 0 || frontmatterErrors . length > 0 ,
102+ totalCount,
103+ frontmatterErrors,
104+ languages : languageResults ,
105+ }
106+
107+ console . log ( JSON . stringify ( report , null , 2 ) )
108+ }
109+
110+ function parseFrontmatterErrors ( suppressedErrors : string [ ] ) : FrontmatterError [ ] {
111+ const errors : FrontmatterError [ ] = [ ]
112+ const seen = new Set < string > ( )
113+ for ( const err of suppressedErrors ) {
114+ const match = err . match ( / t r a n s l a t i o n s \/ [ ^ \s ' ] + / )
115+ if ( match && ! seen . has ( match [ 0 ] ) ) {
116+ seen . add ( match [ 0 ] )
117+ errors . push ( { file : match [ 0 ] , message : 'YML parsing error' } )
118+ }
119+ }
120+ return errors
52121}
53122
54123function getReusables ( ) : Reusables {
@@ -65,37 +134,32 @@ function getReusables(): Reusables {
65134 return reusables
66135}
67136
68- function run ( languageCode : string , site : Site , englishReusables : Reusables ) {
69- const PADDING = 60
137+ function run ( languageCode : string , site : Site , englishReusables : Reusables ) : LanguageResult {
70138 const language = languages [ languageCode as keyof typeof languages ]
71139
72- console . log ( `--- Tallying liquid corruptions in ${ languageCode } (${ language . name } ) ---` )
73-
74- const pageList = site . pageList
75- const errors = new Map < string , number > ( )
140+ const corruptions : CorruptionEntry [ ] = [ ]
76141 const wheres = new Map < string , number > ( )
77142 const illegalTags = new Map < string , number > ( )
78143
79- function countError ( error : TokenizationError , where : string ) {
80- // TokenizationError from liquidjs may have originalError and token.content
81- // but these aren't in the public type definitions
144+ function countError ( error : TokenizationError , where : string , file : string ) {
82145 const errorWithExtras = error as TokenizationError & {
83146 originalError ?: Error
84147 token ?: { content ?: string }
85148 }
86149 const originalError = errorWithExtras . originalError
87150 const errorString = originalError ? originalError . message : error . message
151+
152+ let illegalTag : string | undefined
88153 if ( errorString . includes ( 'illegal tag syntax' ) && errorWithExtras . token ?. content ) {
89- illegalTags . set (
90- errorWithExtras . token . content ,
91- ( illegalTags . get ( errorWithExtras . token . content ) || 0 ) + 1 ,
92- )
154+ illegalTag = errorWithExtras . token . content
155+ illegalTags . set ( illegalTag , ( illegalTags . get ( illegalTag ) || 0 ) + 1 )
93156 }
94- errors . set ( errorString , ( errors . get ( errorString ) || 0 ) + 1 )
157+
158+ corruptions . push ( { file, location : where , error : errorString , illegalTag } )
95159 wheres . set ( where , ( wheres . get ( where ) || 0 ) + 1 )
96160 }
97161
98- for ( const page of pageList ) {
162+ for ( const page of site . pageList ) {
99163 if ( page . languageCode !== languageCode ) continue
100164
101165 const strings : string [ ] [ ] = [
@@ -110,7 +174,7 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
110174 getLiquidTokens ( string )
111175 } catch ( error ) {
112176 if ( error instanceof TokenizationError ) {
113- countError ( error , where )
177+ countError ( error , where , page . relativePath )
114178 } else {
115179 throw error
116180 }
@@ -129,7 +193,7 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
129193 getLiquidTokens ( correctedContent )
130194 } catch ( error ) {
131195 if ( error instanceof TokenizationError ) {
132- countError ( error , 'reusable' )
196+ countError ( error , 'reusable' , relativePath )
133197 } else if ( error instanceof Error && error . message . startsWith ( 'ENOENT' ) ) {
134198 continue
135199 } else {
@@ -138,35 +202,17 @@ function run(languageCode: string, site: Site, englishReusables: Reusables) {
138202 }
139203 }
140204
141- const flat = Array . from ( errors . entries ( ) ) . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
142- const sumTotal = flat . reduce ( ( acc , [ , count ] ) => acc + count , 0 )
143-
144- console . log ( '\nMost common errors' )
145- for ( let i = 0 ; i < flat . length ; i ++ ) {
146- const [ error , count ] = flat [ i ]
147- console . log ( `${ i + 1 } .` . padEnd ( 3 ) , error . padEnd ( PADDING ) , count )
205+ const topIllegalTags = Array . from ( illegalTags . entries ( ) )
206+ . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
207+ . slice ( 0 , 10 )
208+ . map ( ( [ tag , count ] ) => ( { tag, count } ) )
209+
210+ return {
211+ language : languageCode ,
212+ languageName : language . name ,
213+ total : corruptions . length ,
214+ corruptions,
215+ byLocation : Object . fromEntries ( wheres ) ,
216+ topIllegalTags,
148217 }
149- console . log ( `${ 'TOTAL:' . padEnd ( 3 + 1 + PADDING ) } ` , sumTotal )
150-
151- if ( sumTotal ) {
152- const whereFlat = Array . from ( wheres . entries ( ) ) . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
153- console . log ( '\nMost common places' )
154- for ( let i = 0 ; i < whereFlat . length ; i ++ ) {
155- const [ error , count ] = whereFlat [ i ]
156- console . log ( `${ i + 1 } .` . padEnd ( 3 ) , error . padEnd ( PADDING ) , count )
157- }
158-
159- const illegalTagsFlat = Array . from ( illegalTags . entries ( ) ) . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] )
160- if ( illegalTagsFlat . reduce ( ( acc , [ , count ] ) => acc + count , 0 ) ) {
161- console . log ( '\nMost common illegal tags' , illegalTagsFlat . length > 10 ? ' (Top 10)' : '' )
162- const topIllegalTags = illegalTagsFlat . slice ( 0 , 10 )
163- for ( let i = 0 ; i < topIllegalTags . length ; i ++ ) {
164- const [ error , count ] = topIllegalTags [ i ]
165- console . log ( `${ i + 1 } .` . padEnd ( 3 ) , error . padEnd ( PADDING ) , count )
166- }
167- }
168- }
169- console . log ( '\n' )
170-
171- return { errors }
172218}
0 commit comments