Skip to content

Commit 63550e1

Browse files
heiskrCopilot
andauthored
🌎 Fix translation corruption rules: add dash variants, remove redundancies (#60816)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 1af8a1c commit 63550e1

File tree

2 files changed

+178
-86
lines changed

2 files changed

+178
-86
lines changed

src/languages/lib/correct-translation-content.ts

Lines changed: 66 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,9 @@ export function correctTranslatedContentStrings(
4343
content = content.replaceAll('{% Datos variables', '{% data variables')
4444
// `{% dato ` — singular form of "datos" = data
4545
content = content.replaceAll('{% dato variables', '{% data variables')
46-
// `{% variables.` — missing "data" prefix
47-
content = content.replaceAll('{% variables.', '{% data variables.')
4846
// Translated Liquid keywords
4947
content = content.replaceAll('{% comentario %}', '{% comment %}')
48+
content = content.replaceAll('{%- comentario %}', '{%- comment %}')
5049
content = content.replaceAll('{% si ', '{% if ')
5150
content = content.replaceAll('{% sin procesar %}', '{% raw %}')
5251
content = content.replaceAll('{% %} sin procesar', '{% raw %}')
@@ -110,6 +109,7 @@ export function correctTranslatedContentStrings(
110109
content = content.replaceAll('{% データ再利用可能.', '{% data reusables.')
111110
content = content.replaceAll('{% データ再利用.', '{% data reusables.')
112111
content = content.replaceAll('{% メモ %}', '{% note %}')
112+
content = content.replaceAll('{%- メモ %}', '{%- note %}')
113113
// Double-brace corruption of `{% data`: `{% {{データ}} variables.` → `{% data variables.`
114114
content = content.replaceAll('{{データ}} variables.', 'data variables.')
115115
// Catch "または" between any plan names in ifversion/elsif tags
@@ -248,7 +248,9 @@ export function correctTranslatedContentStrings(
248248
content = content.replaceAll('{{% dados ', '{% data ')
249249
content = content.replaceAll('{{% datas ', '{% data ')
250250
content = content.replaceAll('{% senão %}', '{% else %}')
251+
content = content.replaceAll('{%- senão %}', '{%- else %}')
251252
content = content.replaceAll('{% mais %}', '{% else %}')
253+
content = content.replaceAll('{%- mais %}', '{%- else %}')
252254
content = content.replaceAll('{% se ', '{% if ')
253255
content = content.replaceAll('{% atribuir ', '{% assign ')
254256
content = content.replaceAll('{% %} bruto', '{% raw %}')
@@ -268,6 +270,7 @@ export function correctTranslatedContentStrings(
268270
content = content.replaceAll('{%- observação %}', '{%- note %}')
269271
// `{% comentário %}` — Portuguese "comment" = comment
270272
content = content.replaceAll('{% comentário %}', '{% comment %}')
273+
content = content.replaceAll('{%- comentário %}', '{%- comment %}')
271274
// `{% nota de fim %}` — Portuguese "end note" = endnote
272275
content = content.replaceAll('{% nota de fim %}', '{% endnote %}')
273276
content = content.replaceAll('{%- nota de fim %}', '{%- endnote %}')
@@ -294,7 +297,9 @@ export function correctTranslatedContentStrings(
294297
content = content.replaceAll('{% 数据可重用s.', '{% data reusables.')
295298
content = content.replaceAll('{% 数据可重用', '{% data reusables')
296299
content = content.replaceAll('{% 其他 %}', '{% else %}')
300+
content = content.replaceAll('{%- 其他 %}', '{%- else %}')
297301
content = content.replaceAll('{% 原始 %}', '{% raw %}')
302+
content = content.replaceAll('{%- 原始 %}', '{%- raw %}')
298303
// `{% 否则 %}` — "otherwise" = else (different Chinese word than 其他)
299304
content = content.replaceAll('{% 否则 %}', '{% else %}')
300305
content = content.replaceAll('{%- 否则 %}', '{%- else %}')
@@ -350,6 +355,7 @@ export function correctTranslatedContentStrings(
350355
// Stray `,` replacing `data`
351356
content = content.replaceAll('{%, variables.', '{% data variables.')
352357
content = content.replaceAll('{% необработанного %}', '{% raw %}')
358+
content = content.replaceAll('{%- необработанного %}', '{%- raw %}')
353359
content = content.replaceAll('{%- ifversion fpt или ghec %}', '{%- ifversion fpt or ghec %}')
354360
content = content.replaceAll('{% ifversion fpt или ghec %}', '{% ifversion fpt or ghec %}')
355361
content = content.replaceAll('{% ifversion ghec или fpt %}', '{% ifversion ghec or fpt %}')
@@ -359,8 +365,8 @@ export function correctTranslatedContentStrings(
359365
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?или[^%]*?%\}/g, (match) => {
360366
return match.replace(/ или /g, ' or ')
361367
})
362-
content = content.replaceAll('{% endif _%}', '{% endif %}')
363368
content = content.replaceAll('{% конечным %}', '{% endif %}')
369+
content = content.replaceAll('{%- конечным %}', '{%- endif %}')
364370
// `{%- конец %}` — dash-trimmed form of "end" = endif
365371
content = content.replaceAll('{%- конец %}', '{%- endif %}')
366372
// `{%- конец для %}` — "end for" = endfor
@@ -382,6 +388,7 @@ export function correctTranslatedContentStrings(
382388
content = content.replaceAll('{% конец %}', '{% endif %}')
383389
// Cyrillic transliteration of `elsif` (lossy → else, since version param is lost)
384390
content = content.replaceAll('{% Эльсиф %}', '{% else %}')
391+
content = content.replaceAll('{%- Эльсиф %}', '{%- else %}')
385392
// Translated feature flag names
386393
content = content.replaceAll(
387394
'обязательный-2fa-dotcom-участник',
@@ -396,40 +403,31 @@ export function correctTranslatedContentStrings(
396403
content = content.replaceAll('{% переменных данных.', '{% data variables.')
397404
content = content.replaceAll('{% повторно используемых данных.', '{% data reusables.')
398405
content = content.replaceAll('{% примечание %}', '{% note %}')
406+
content = content.replaceAll('{%- примечание %}', '{%- note %}')
399407
content = content.replaceAll('{% конечных головщиков %}', '{% endrowheaders %}')
400408
content = content.replaceAll('{% данных для повторного использования.', '{% data reusables.')
401409
content = content.replaceAll('{% еще %}', '{% else %}')
410+
content = content.replaceAll('{%- еще %}', '{%- else %}')
402411
content = content.replaceAll('{% ещё %}', '{% else %}')
412+
content = content.replaceAll('{%- ещё %}', '{%- else %}')
403413
// `{% иначе %}` — "otherwise" = else
404414
content = content.replaceAll('{% иначе %}', '{% else %}')
405415
content = content.replaceAll('{%- иначе %}', '{%- else %}')
406416
content = content.replaceAll('{% необработанные %}', '{% raw %}')
417+
content = content.replaceAll('{%- необработанные %}', '{%- raw %}')
407418
content = content.replaceAll('{% необработанный %}', '{% raw %}')
419+
content = content.replaceAll('{%- необработанный %}', '{%- raw %}')
408420
content = content.replaceAll('{% сырой %}', '{% raw %}')
421+
content = content.replaceAll('{%- сырой %}', '{%- raw %}')
409422
content = content.replaceAll('{% нарисовать %}', '{% endraw %}')
423+
content = content.replaceAll('{%- нарисовать %}', '{%- endraw %}')
410424
content = content.replaceAll('{% эндкёрл %}', '{% endcurl %}')
425+
content = content.replaceAll('{%- эндкёрл %}', '{%- endcurl %}')
411426
content = content.replaceAll('{% запроса %}', '{% endraw %}')
412-
// `{% Mac %}` — capitalized mac platform tag
413-
content = content.replaceAll('{% Mac %}', '{% mac %}')
414-
// `{% Endwindows %}` — capitalized endwindows
415-
content = content.replaceAll('{% Endwindows %}', '{% endwindows %}')
416-
content = content.replaceAll('{%- Endwindows %}', '{%- endwindows %}')
417-
// `{% Elsif ` — capitalized elsif
418-
content = content.replace(/\{% Elsif /g, '{% elsif ')
419-
// `{% Linux %}` — capitalized linux platform tag
420-
content = content.replaceAll('{% Linux %}', '{% linux %}')
421-
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
427+
content = content.replaceAll('{%- запроса %}', '{%- endraw %}')
422428
// `{% джетмозги %}` — Russian literal translation of "JetBrains" (джет=jet, мозги=brains)
423429
content = content.replaceAll('{% джетмозги %}', '{% jetbrains %}')
424430
content = content.replaceAll('{%- джетмозги %}', '{%- jetbrains %}')
425-
// Fix double quotes in Russian YAML files that cause parsing errors
426-
content = content.replace(/href=""https:\/\//g, 'href="https://')
427-
428-
// Fix empty HTML tags that cause YAML parsing issues
429-
content = content.replaceAll('<b></b>', '')
430-
content = content.replaceAll('<u></u>', '')
431-
content = content.replace(/early_access:\s*"([^"]*)<b><\/b>([^"]*)"/, 'early_access: "$1$2"')
432-
content = content.replace(/(privacy_disclaimer:[^<]*)<u><\/u>/g, '$1')
433431

434432
// Russian translation of github-glossary.md
435433
content = content.replaceAll(
@@ -470,9 +468,12 @@ export function correctTranslatedContentStrings(
470468
content = content.replaceAll('{% données réutilisables.', '{% data reusables.')
471469
content = content.replaceAll('{% variables de données.', '{% data variables.')
472470
content = content.replaceAll('{% autre %}', '{% else %}')
471+
content = content.replaceAll('{%- autre %}', '{%- else %}')
473472
content = content.replaceAll('{% brut %}', '{% raw %}')
473+
content = content.replaceAll('{%- brut %}', '{%- raw %}')
474474
content = content.replaceAll('{% %brut }', '{% raw %}')
475475
content = content.replaceAll('{% redessiner %}', '{% endraw %}')
476+
content = content.replaceAll('{%- redessiner %}', '{%- endraw %}')
476477
content = content.replaceAll('{% données ', '{% data ')
477478
// `{% Données ` — capitalized form
478479
content = content.replaceAll('{% Données variables', '{% data variables')
@@ -553,13 +554,12 @@ export function correctTranslatedContentStrings(
553554
content = content.replaceAll('{% 데이터 reusables.', '{% data reusables.')
554555
content = content.replaceAll('{% 데이터 변수.', '{% data variables.')
555556
content = content.replaceAll('{% 데이터 변숫값.', '{% data variables.')
556-
content = content.replaceAll('{% dada variables', '{% data variables')
557-
// Extra `%` before data: `{% % data` → `{% data`
558-
content = content.replaceAll('{% % data', '{% data')
559557
content = content.replaceAll('{% 기타 %}', '{% else %}')
560558
content = content.replaceAll('{%- 기타 %}', '{%- else %}')
561559
content = content.replaceAll('{% 참고 %}', '{% note %}')
560+
content = content.replaceAll('{%- 참고 %}', '{%- note %}')
562561
content = content.replaceAll('{% 원시 %}', '{% raw %}')
562+
content = content.replaceAll('{%- 원시 %}', '{%- raw %}')
563563
// Catch "또는" between any plan names in ifversion/elsif/if tags
564564
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?[^%]*?%\}/g, (match) => {
565565
return match.replace(/ /g, ' or ')
@@ -569,13 +569,12 @@ export function correctTranslatedContentStrings(
569569
content = content.replaceAll('{%- 그렇지 않으면 %}', '{%- else %}')
570570
// `{% 옥티콘` — Korean transliteration of "octicon"
571571
content = content.replaceAll('{% 옥티콘 ', '{% octicon ')
572+
content = content.replaceAll('{%- 옥티콘 ', '{%- octicon ')
572573

573574
// Korean translation of github-glossary.md
574575
content = content.replaceAll('{{ 용어집.term }}', '{{ glossary.term }}')
575576
// `{% 데이터 재사용.` — Korean translation of "data reusables" path
576577
content = content.replaceAll('{% 데이터 재사용.', '{% data reusables.')
577-
// `{% datavariable` — compound missing space and plural: "datavariable" → "data variables"
578-
content = content.replaceAll('{% datavariable', '{% data variables')
579578
// `{% 행 머리글 %}` — "row headers" = rowheaders
580579
content = content.replaceAll('{% 행 머리글 %}', '{% rowheaders %}')
581580
content = content.replaceAll('{%- 행 머리글 %}', '{%- rowheaders %}')
@@ -593,13 +592,13 @@ export function correctTranslatedContentStrings(
593592
if (context.code === 'de') {
594593
content = content.replaceAll('{% Daten variables', '{% data variables')
595594
content = content.replaceAll('{% daten variables', '{% data variables')
596-
content = content.replaceAll('{% Data variables', '{% data variables')
597595
content = content.replaceAll('{% Daten reusables', '{% data reusables')
598-
content = content.replaceAll('{% Data reusables', '{% data reusables')
596+
content = content.replaceAll('{%- Daten reusables', '{%- data reusables')
599597
// `wiederverwendbare` is German for "reusables" — fix translated reusables paths
600598
content = content.replaceAll('{% data wiederverwendbare.', '{% data reusables.')
601599
content = content.replaceAll('{% Daten wiederverwendbare.', '{% data reusables.')
602600
content = content.replaceAll('{% Data wiederverwendbare.', '{% data reusables.')
601+
content = content.replaceAll('{%- Daten wiederverwendbare.', '{%- data reusables.')
603602
// `wiederverwendbar.` (without trailing 'e') — alternate German form
604603
content = content.replaceAll('{% Daten wiederverwendbar.', '{% data reusables.')
605604
// `daten wiederverwendbars.` — lowercase with trailing 's'
@@ -613,6 +612,7 @@ export function correctTranslatedContentStrings(
613612
content = content.replaceAll('{% Daten Variablen.', '{% data variables.')
614613
// `{% daten reusables` — lowercase with English "reusables"
615614
content = content.replaceAll('{% daten reusables', '{% data reusables')
615+
content = content.replaceAll('{%- daten reusables', '{%- data reusables')
616616
// `{% unformatierte %}` — "unformatted" = raw
617617
content = content.replaceAll('{% unformatierte %}', '{% raw %}')
618618
content = content.replaceAll('{%- unformatierte %}', '{%- raw %}')
@@ -659,21 +659,9 @@ export function correctTranslatedContentStrings(
659659
content = content.replaceAll('{% Rohdaten %}', '{% raw %}')
660660
content = content.replaceAll('{%- Rohdaten %}', '{%- raw %}')
661661
content = content.replaceAll('{%- Rohdaten -%}', '{%- raw -%}')
662-
// `{% okticon ` — "octicon" transliterated to "okticon"
663-
content = content.replaceAll('{% okticon ', '{% octicon ')
664662
// `{% Endnotiz %}` — "end note" = endnote
665663
content = content.replaceAll('{% Endnotiz %}', '{% endnote %}')
666664
content = content.replaceAll('{%- Endnotiz %}', '{%- endnote %}')
667-
// `{% endifen %}` — garbled "endif" = endif
668-
content = content.replaceAll('{% endifen %}', '{% endif %}')
669-
content = content.replaceAll('{%- endifen %}', '{%- endif %}')
670-
// `{% Endifen %}` — capitalized variant
671-
content = content.replaceAll('{% Endifen %}', '{% endif %}')
672-
content = content.replaceAll('{%- Endifen %}', '{%- endif %}')
673-
// `{% Endif %}` — capitalized endif
674-
content = content.replaceAll('{% Endif %}', '{% endif %}')
675-
content = content.replaceAll('{%- Endif %}', '{%- endif %}')
676-
content = content.replaceAll('{%- Endif -%}', '{%- endif -%}')
677665
// `{% Dateninstanz` — "data instance" = data
678666
content = content.replaceAll('{% Dateninstanz ', '{% data ')
679667
// `{% ifversion-Sicherheitskonfigurationen %}` — hyphenated compound
@@ -697,33 +685,6 @@ export function correctTranslatedContentStrings(
697685
'{%- ifversion-Sicherheitskampagnen %}',
698686
'{%- ifversion security-campaigns %}',
699687
)
700-
// `{% ifversion-repo-policy-rules %}` — missing space before feature flag
701-
content = content.replaceAll(
702-
'{% ifversion-repo-policy-rules %}',
703-
'{% ifversion repo-policy-rules %}',
704-
)
705-
content = content.replaceAll(
706-
'{%- ifversion-repo-policy-rules %}',
707-
'{%- ifversion repo-policy-rules %}',
708-
)
709-
// `{% ifversion-enterprise-installed-apps %}` — missing space before feature flag
710-
content = content.replaceAll(
711-
'{% ifversion-enterprise-installed-apps %}',
712-
'{% ifversion enterprise-installed-apps %}',
713-
)
714-
content = content.replaceAll(
715-
'{%- ifversion-enterprise-installed-apps %}',
716-
'{%- ifversion enterprise-installed-apps %}',
717-
)
718-
// `{% Windows %}` — capitalized platform tag
719-
content = content.replaceAll('{% Windows %}', '{% windows %}')
720-
content = content.replaceAll('{%- Windows %}', '{%- windows %}')
721-
// `{% Linux %}` — capitalized platform tag
722-
content = content.replaceAll('{% Linux %}', '{% linux %}')
723-
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
724-
// `{% Eclipse %}` — capitalized platform tag
725-
content = content.replaceAll('{% Eclipse %}', '{% eclipse %}')
726-
content = content.replaceAll('{%- Eclipse %}', '{%- eclipse %}')
727688
}
728689

729690
// --- Generic fixes (all languages) ---
@@ -734,8 +695,9 @@ export function correctTranslatedContentStrings(
734695
// words don't concatenate.
735696
content = content.replace(/\s*<\|endoftext\|>\s*/g, ' ')
736697

737-
// Capitalized Liquid keyword: `{% Data ` → `{% data `
698+
// Capitalized Liquid keyword: `{% Data ` / `{%- Data ` → `{% data ` / `{%- data `
738699
content = content.replaceAll('{% Data ', '{% data ')
700+
content = content.replaceAll('{%- Data ', '{%- data ')
739701

740702
// Capitalized platform tags (cross-language)
741703
content = content.replaceAll('{% Windows %}', '{% windows %}')
@@ -744,6 +706,40 @@ export function correctTranslatedContentStrings(
744706
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
745707
content = content.replaceAll('{% Eclipse %}', '{% eclipse %}')
746708
content = content.replaceAll('{%- Eclipse %}', '{%- eclipse %}')
709+
content = content.replaceAll('{% Mac %}', '{% mac %}')
710+
content = content.replaceAll('{%- Mac %}', '{%- mac %}')
711+
712+
// Capitalized Liquid keywords (cross-language)
713+
content = content.replaceAll('{% Endwindows %}', '{% endwindows %}')
714+
content = content.replaceAll('{%- Endwindows %}', '{%- endwindows %}')
715+
content = content.replace(/\{% Elsif /g, '{% elsif ')
716+
content = content.replaceAll('{% Endif %}', '{% endif %}')
717+
content = content.replaceAll('{%- Endif %}', '{%- endif %}')
718+
content = content.replaceAll('{%- Endif -%}', '{%- endif -%}')
719+
720+
// Garbled "endif" variants
721+
content = content.replaceAll('{% endifen %}', '{% endif %}')
722+
content = content.replaceAll('{%- endifen %}', '{%- endif %}')
723+
content = content.replaceAll('{% Endifen %}', '{% endif %}')
724+
content = content.replaceAll('{%- Endifen %}', '{%- endif %}')
725+
content = content.replaceAll('{% endif _%}', '{% endif %}')
726+
727+
// Near-miss "octicon" typo
728+
content = content.replaceAll('{% okticon ', '{% octicon ')
729+
730+
// Typos in "data" keyword
731+
content = content.replaceAll('{% dada variables', '{% data variables')
732+
content = content.replaceAll('{% % data', '{% data')
733+
734+
// Double-quote corruption in href attributes
735+
content = content.replace(/href=""https:\/\//g, 'href="https://')
736+
737+
// Empty HTML tags that cause parsing issues
738+
content = content.replaceAll('<b></b>', '')
739+
content = content.replaceAll('<u></u>', '')
740+
741+
// `{% ifversion-FEATURE %}` — hyphen instead of space before lowercase feature flag
742+
content = content.replace(/(\{%-? )ifversion-([a-z][\w-]*\s*%\})/g, '$1ifversion $2')
747743

748744
// These run after per-language fixes so that e.g. `{{% данных variables`
749745
// first becomes `{{% data variables` and then gets caught here.
@@ -810,6 +806,8 @@ export function correctTranslatedContentStrings(
810806
content = content.replaceAll('{% variables.', '{% data variables.')
811807
content = content.replaceAll('{% reusables.', '{% data reusables.')
812808
content = content.replaceAll('{% datavariables', '{% data variables')
809+
content = content.replaceAll('{% datavariable.', '{% data variables.')
810+
content = content.replaceAll('{% datavariable ', '{% data variables ')
813811

814812
// Empty `{% %}` corruptions where the tag name was removed.
815813
content = content.replaceAll('{% %} de dados reusables.', '{% data reusables.')

0 commit comments

Comments
 (0)