Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/refresh-v3-languages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ concurrency:

env:
REFRESH_BRANCH: chore/refresh-v3-languages
COMMIT_PATHS: data/v3-languages
COMMIT_PATHS: data/v3-languages snippets/language-table.jsx

jobs:
refresh:
Expand Down Expand Up @@ -58,6 +58,9 @@ jobs:
DEEPL_AUTH_KEY: ${{ secrets.DEEPL_API_KEY }}
run: python3 scripts/fetch_v3_languages.py

- name: Regenerate language-table.jsx from vended data
run: python3 scripts/generate_language_table.py

- name: Detect content changes
id: diff
run: |
Expand Down Expand Up @@ -101,7 +104,7 @@ jobs:
--base main \
--head "$REFRESH_BRANCH" \
--title "chore(v3-languages): refresh vended responses" \
--body "Automated refresh of \`data/v3-languages/\` from \`https://api.deepl.com/v3/languages\`, opened by the \`refresh-v3-languages\` workflow.
--body "Automated refresh of \`data/v3-languages/\` from \`https://api.deepl.com/v3/languages\`, with \`snippets/language-table.jsx\` regenerated from the new responses. Opened by the \`refresh-v3-languages\` workflow.

Subsequent runs append new commits to this PR when the responses change again, so the diff against \`main\` represents the cumulative update. Review the latest state and merge when ready.

Expand Down
17 changes: 17 additions & 0 deletions data/translation-memory.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"_comment": "Languages that support Translation Memory in the DeepL platform. Translation Memory is not exposed via /v3/languages, so this list is maintained manually. Codes use the same casing as the generated language-table.jsx (uppercase, hyphen-separated). See docs/learning-how-tos/examples-and-guides/how-to-use-translation-memories.",
"languages": [
"DE",
"EN",
"EN-GB",
"EN-US",
"ES",
"ES-419",
"FR",
"IT",
"JA",
"KO",
"ZH",
"ZH-HANS"
]
}
175 changes: 175 additions & 0 deletions scripts/generate_language_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#!/usr/bin/env python3
"""Regenerate the inline language data inside snippets/language-table.jsx.

Reads the vended JSON under data/v3-languages/ plus the manual
translation-memory list at data/translation-memory.json, then rewrites the
`languageData` array between the BEGIN/END GENERATED markers in
snippets/language-table.jsx. Everything else in the JSX file is left
alone.

Source-of-truth mappings (per language code returned by /v3/languages):

code uppercased BCP 47 from translate_text.json (de-CH -> DE-CH)
name translate_text "name"
translation true if present in translate_text.json
isVariant translate_text usable_as_source=false and usable_as_target=true
isBeta translate_text status != "stable"
glossaries translate_text features contain "glossary"
tagHandling translate_text features contain "tag_handling"
textImprovement language is present in write.json
styleRules translate_text features contain "style_rules"
translationMemory code is in data/translation-memory.json
"""
from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parent.parent
DATA_DIR = REPO_ROOT / "data" / "v3-languages"
TM_FILE = REPO_ROOT / "data" / "translation-memory.json"
JSX_FILE = REPO_ROOT / "snippets" / "language-table.jsx"

BEGIN_MARKER = " // BEGIN GENERATED: languageData (do not edit; run scripts/generate_language_table.py)"
END_MARKER = " // END GENERATED"


def load_json(path: Path) -> object:
with path.open("r", encoding="utf-8") as f:
return json.load(f)


def upper_code(lang: str) -> str:
return lang.upper()


def build_rows(tm_codes: set[str]) -> list[dict]:
translate_text = load_json(DATA_DIR / "translate_text.json")
write_langs = {entry["lang"] for entry in load_json(DATA_DIR / "write.json")}

rows: list[dict] = []
for entry in translate_text:
lang = entry["lang"]
code = upper_code(lang)
features = entry.get("features", {})
row = {
"code": code,
"name": entry["name"],
"translation": True,
"isVariant": not entry["usable_as_source"] and entry["usable_as_target"],
"glossaries": "glossary" in features,
"tagHandling": "tag_handling" in features,
"textImprovement": lang in write_langs,
"translationMemory": code in tm_codes,
"styleRules": "style_rules" in features,
}
if entry.get("status") != "stable":
row["isBeta"] = True
rows.append(row)

rows.sort(key=lambda r: (not r["isVariant"], r["code"]))
# Existing layout grouped non-variants first, then variants. Preserve
# a stable, readable order: base languages alphabetically, then variants
# alphabetically.
rows.sort(key=lambda r: (r["isVariant"], r["code"]))
return rows


def js_literal(value: object) -> str:
if value is True:
return "true"
if value is False:
return "false"
if isinstance(value, str):
escaped = value.replace("\\", "\\\\").replace("'", "\\'")
return f"'{escaped}'"
raise TypeError(f"Unsupported literal: {value!r}")


def format_row(row: dict) -> str:
# Key order is fixed so diffs remain readable.
ordered_keys = [
"code",
"name",
"translation",
"isVariant",
"isBeta",
"glossaries",
"tagHandling",
"textImprovement",
"translationMemory",
"styleRules",
]
parts = [f"{k}: {js_literal(row[k])}" for k in ordered_keys if k in row]
return " { " + ", ".join(parts) + " },"


def render_block(rows: list[dict]) -> str:
lines = [
BEGIN_MARKER,
" const languageData = [",
*(format_row(r) for r in rows),
" ]",
END_MARKER,
]
return "\n".join(lines)


def replace_block(jsx: str, new_block: str) -> str:
if BEGIN_MARKER in jsx and END_MARKER in jsx:
before, _, rest = jsx.partition(BEGIN_MARKER)
_, _, after = rest.partition(END_MARKER)
return before + new_block + after

# First-time install: find the existing `const languageData = [ ... ]`
# block (up to and including its closing `]`) and replace it.
needle = " // Language data with individual feature support\n const languageData = ["
start = jsx.find(needle)
if start == -1:
raise RuntimeError(
"Could not locate languageData block to replace. Insert the BEGIN/END "
"GENERATED markers manually before running this script."
)
end_anchor = "\n ]\n"
end = jsx.find(end_anchor, start)
if end == -1:
raise RuntimeError("Could not find closing ']' of languageData array.")
return jsx[:start] + new_block + jsx[end + len(end_anchor) :]


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--check",
action="store_true",
help="Exit 1 if the file would change instead of writing.",
)
args = parser.parse_args()

tm = load_json(TM_FILE)
if not isinstance(tm, dict) or "languages" not in tm:
print(f"error: {TM_FILE} is missing the 'languages' key", file=sys.stderr)
return 2
tm_codes = {c.upper() for c in tm["languages"]}

rows = build_rows(tm_codes)
new_block = render_block(rows)

current = JSX_FILE.read_text(encoding="utf-8")
updated = replace_block(current, new_block)

if updated == current:
print(f"{JSX_FILE}: up to date")
return 0
if args.check:
print(f"{JSX_FILE}: out of date", file=sys.stderr)
return 1
JSX_FILE.write_text(updated, encoding="utf-8")
print(f"{JSX_FILE}: regenerated ({len(rows)} languages)")
return 0


if __name__ == "__main__":
raise SystemExit(main())
Loading