Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions .github/scripts/i18n/mdx_repair_scope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
"""Guard the generated-doc MDX repair step.

Definition:
This script records the locale files that were already untracked before the
MDX repair agent ran, then verifies that the repair only changed allowed
locale-controlled paths and did not create additional untracked locale files.

Parameters:
command: snapshot or enforce.
--baseline: File path used to store the pre-repair untracked locale snapshot.
--workspace: Git workspace root. Default: GITHUB_WORKSPACE or current dir.
--locale: Locale directory name. Default: LOCALE environment variable.

Outputs:
snapshot writes the baseline file and prints its path.
enforce prints a short success message or exits non-zero with offending paths.

Examples:
LOCALE=fr python .github/scripts/i18n/mdx_repair_scope.py snapshot --baseline .openclaw-sync/mdx/fr.repair-baseline.txt
LOCALE=fr python .github/scripts/i18n/mdx_repair_scope.py enforce --baseline .openclaw-sync/mdx/fr.repair-baseline.txt
"""

from __future__ import annotations

import argparse
import os
import subprocess
from pathlib import Path


def git_lines(workspace: Path, args: list[str]) -> list[str]:
result = subprocess.run(["git", *args], cwd=workspace, check=True, text=True, stdout=subprocess.PIPE)
return [line.strip() for line in result.stdout.splitlines() if line.strip()]


def untracked_locale_files(workspace: Path, locale: str) -> list[str]:
return sorted(git_lines(workspace, ["ls-files", "--others", "--exclude-standard", "--", f"docs/{locale}"]))


def write_lines(path: Path, lines: list[str]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines) + ("\n" if lines else ""), encoding="utf-8")


def read_lines(path: Path) -> set[str]:
if not path.exists():
raise SystemExit(f"missing repair scope baseline: {path}")
return {line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()}


def is_allowed_changed_path(path: str, locale: str) -> bool:
return path.startswith(f"docs/{locale}/") or path == f"docs/.i18n/{locale}.tm.jsonl"


def snapshot_scope(workspace: Path, locale: str, baseline: Path) -> list[str]:
files = untracked_locale_files(workspace, locale)
write_lines(baseline, files)
print(f"Recorded {len(files)} pre-repair untracked locale file(s) in {baseline}")
return files


def enforce_scope(workspace: Path, locale: str, baseline: Path) -> None:
baseline_files = read_lines(baseline)
staged_paths = git_lines(workspace, ["diff", "--cached", "--name-only"])
if staged_paths:
print("Docs MDX repair staged files; forbidden:")
print("\n".join(staged_paths))
raise SystemExit(1)

changed_paths = git_lines(workspace, ["diff", "--name-only"])
bad_paths = [path for path in changed_paths if not is_allowed_changed_path(path, locale)]
if bad_paths:
print("Docs MDX repair touched forbidden paths:")
print("\n".join(bad_paths))
raise SystemExit(1)

current_untracked = set(untracked_locale_files(workspace, locale))
new_untracked = sorted(current_untracked - baseline_files)
if new_untracked:
print("Docs MDX repair created untracked locale files; forbidden:")
print("\n".join(new_untracked))
raise SystemExit(1)

# Full translation can legitimately create new locale pages before repair.
# The baseline makes the guard focus on repair-stage side effects only.
print(
f"Docs MDX repair scope ok: {len(changed_paths)} changed path(s), "
f"{len(current_untracked)} pre-existing untracked locale file(s)"
)


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Snapshot and enforce the translated MDX repair scope.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""Outputs:
snapshot writes the baseline file. enforce exits non-zero on forbidden repair edits.

Examples:
LOCALE=fr python .github/scripts/i18n/mdx_repair_scope.py snapshot --baseline .openclaw-sync/mdx/fr.repair-baseline.txt
LOCALE=fr python .github/scripts/i18n/mdx_repair_scope.py enforce --baseline .openclaw-sync/mdx/fr.repair-baseline.txt
""",
)
parser.add_argument("command", choices=["snapshot", "enforce"])
parser.add_argument("--baseline", required=True, type=Path)
parser.add_argument("--workspace", default=os.environ.get("GITHUB_WORKSPACE", "."), type=Path)
parser.add_argument("--locale", default=os.environ.get("LOCALE", ""))
return parser.parse_args()


def main() -> None:
args = parse_args()
if not args.locale:
raise SystemExit("missing locale: pass --locale or set LOCALE")

workspace = args.workspace.resolve()
if args.command == "snapshot":
snapshot_scope(workspace, args.locale, args.baseline)
else:
enforce_scope(workspace, args.locale, args.baseline)


if __name__ == "__main__":
main()
46 changes: 46 additions & 0 deletions .github/scripts/i18n/tests/test_i18n_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def load_module(name: str):
prepare = load_module("prepare")
pending = load_module("build_pending_manifest")
package_artifact = load_module("package_artifact")
mdx_repair_scope = load_module("mdx_repair_scope")
apply_artifacts = load_module("apply_artifacts")
read_source_metadata = load_module("read_source_metadata")
prune_stale_locale_pages = load_module("prune_stale_locale_pages")
Expand Down Expand Up @@ -175,9 +176,14 @@ def test_full_workflow_keeps_only_weekly_and_manual_triggers(self) -> None:

def test_full_workflow_gates_batches_after_canary(self) -> None:
text = (REPO_ROOT / ".github/workflows/translate-all.yml").read_text(encoding="utf-8")
reusable = (REPO_ROOT / ".github/workflows/translate-locale-reusable.yml").read_text(encoding="utf-8")
for index in range(1, 7):
self.assertIn(f"translate-batch-{index}:", text)
self.assertIn("needs.translate-canary.result == 'success'", text)
self.assertIn("artifact_role: canary", text)
self.assertIn("inputs.commit_locale || inputs.artifact_role == 'canary'", reusable)
self.assertIn("inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'", reusable)
self.assertIn("inputs.commit_locale && steps.apply.outputs.changed_count != '0'", reusable)
self.assertIn("provider-preflight:", text)
self.assertIn("Translate Full completed with failed or cancelled work", text)

Expand Down Expand Up @@ -451,6 +457,46 @@ def test_package_artifact_failure_writes_visible_github_status(self) -> None:
self.assertIn("failed=true", output.read_text(encoding="utf-8"))
self.assertIn("failed_reason=translation failed", output.read_text(encoding="utf-8"))

def test_mdx_repair_scope_allows_preexisting_untracked_locale_files_only(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
repo = Path(tmp)
init_repo(repo)
baseline = repo / ".openclaw-sync/mdx/fr.repair-baseline.txt"
(repo / "docs/fr").mkdir(parents=True)
(repo / "docs/index.md").write_text("# Index\n", encoding="utf-8")
(repo / "docs/fr/tracked.md").write_text("# Tracked FR\n", encoding="utf-8")
run_git(repo, "add", ".")
run_git(repo, "commit", "-m", "initial")

(repo / "docs/fr/from-translation.md").write_text("# New FR\n", encoding="utf-8")
mdx_repair_scope.snapshot_scope(repo, "fr", baseline)

(repo / "docs/fr/tracked.md").write_text("# Tracked FR repaired\n", encoding="utf-8")
mdx_repair_scope.enforce_scope(repo, "fr", baseline)

(repo / "docs/index.md").write_text("# Source side effect\n", encoding="utf-8")
with self.assertRaises(SystemExit):
mdx_repair_scope.enforce_scope(repo, "fr", baseline)
(repo / "docs/index.md").write_text("# Index\n", encoding="utf-8")

(repo / "docs/index.md").write_text("# Staged source side effect\n", encoding="utf-8")
run_git(repo, "add", "docs/index.md")
with self.assertRaises(SystemExit):
mdx_repair_scope.enforce_scope(repo, "fr", baseline)
run_git(repo, "restore", "--staged", "docs/index.md")
(repo / "docs/index.md").write_text("# Index\n", encoding="utf-8")

(repo / "docs/fr/from-repair.md").write_text("# Repair side effect\n", encoding="utf-8")
with self.assertRaises(SystemExit):
mdx_repair_scope.enforce_scope(repo, "fr", baseline)

baseline.write_text(baseline.read_text(encoding="utf-8") + "docs/fr/from-repair.md\n", encoding="utf-8")
run_git(repo, "add", "docs/fr/from-repair.md")
(repo / "docs/fr/staged-from-repair.md").write_text("# Staged repair side effect\n", encoding="utf-8")
run_git(repo, "add", "docs/fr/staged-from-repair.md")
with self.assertRaises(SystemExit):
mdx_repair_scope.enforce_scope(repo, "fr", baseline)

def test_full_summary_ignores_canary_as_locale_success_and_reports_missing(self) -> None:
with tempfile.TemporaryDirectory() as tmp:
artifacts = Path(tmp)
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/translate-finalize-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ jobs:
if: steps.apply.outputs.changed_count != '0'
run: sudo apt-get update && sudo apt-get install -y librsvg2-bin

- name: Install Playwright browser
if: steps.apply.outputs.changed_count != '0'
run: npx playwright install --with-deps chromium

- name: Check aggregate docs
if: steps.apply.outputs.changed_count != '0'
run: npm run docs:check
Expand Down
53 changes: 23 additions & 30 deletions .github/workflows/translate-locale-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ jobs:
node .openclaw-sync/check-docs-mdx.mjs "docs/${LOCALE}" \
--json-out ".openclaw-sync/mdx/${LOCALE}.json"

- name: Snapshot translated MDX repair scope
if: steps.stale.outputs.skip != 'true' && steps.translate_docs.outcome == 'success' && steps.mdx_check.outcome == 'failure'
env:
LOCALE: ${{ inputs.locale }}
run: |
python .github/scripts/i18n/mdx_repair_scope.py snapshot \
--baseline "${RUNNER_TEMP}/${LOCALE}.repair-baseline.txt"

- name: Repair translated MDX
id: mdx_repair
if: steps.stale.outputs.skip != 'true' && steps.translate_docs.outcome == 'success' && steps.mdx_check.outcome == 'failure'
Expand All @@ -208,27 +216,8 @@ jobs:
env:
LOCALE: ${{ inputs.locale }}
run: |
set -euo pipefail
bad_paths="$(
git diff --name-only | while IFS= read -r path; do
case "$path" in
"docs/${LOCALE}"/*|"docs/.i18n/${LOCALE}.tm.jsonl") ;;
*) printf '%s\n' "$path" ;;
esac
done
)"
if [ -n "$bad_paths" ]; then
echo "Docs MDX repair touched forbidden paths:"
printf '%s\n' "$bad_paths"
exit 1
fi

untracked_locale="$(git ls-files --others --exclude-standard -- "docs/${LOCALE}")"
if [ -n "$untracked_locale" ]; then
echo "Docs MDX repair created untracked locale files; forbidden:"
printf '%s\n' "$untracked_locale"
exit 1
fi
python .github/scripts/i18n/mdx_repair_scope.py enforce \
--baseline "${RUNNER_TEMP}/${LOCALE}.repair-baseline.txt"

- name: Recheck translated MDX
id: mdx_recheck
Expand Down Expand Up @@ -287,9 +276,9 @@ jobs:
exit 1

commit-locale:
name: Commit ${{ inputs.locale }} artifact
name: Finalize ${{ inputs.locale }} artifact
needs: translate
if: inputs.commit_locale && needs.translate.result == 'success'
if: needs.translate.result == 'success' && (inputs.commit_locale || inputs.artifact_role == 'canary')
runs-on: ubuntu-latest
permissions:
actions: write
Expand Down Expand Up @@ -326,27 +315,31 @@ jobs:
--expected-locales "${EXPECTED_LOCALES}"

- name: Set up Node for locale validation
if: steps.apply.outputs.changed_count != '0'
if: inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'
uses: actions/setup-node@v6
with:
node-version: 24
cache: npm

- name: Install docs dependencies
if: steps.apply.outputs.changed_count != '0'
if: inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'
run: npm ci

- name: Install validation system dependencies
if: steps.apply.outputs.changed_count != '0'
if: inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'
run: sudo apt-get update && sudo apt-get install -y librsvg2-bin

- name: Check docs before locale commit
if: steps.apply.outputs.changed_count != '0'
- name: Install Playwright browser
if: inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'
run: npx playwright install --with-deps chromium

- name: Check docs before artifact finalization
if: inputs.artifact_role == 'canary' || steps.apply.outputs.changed_count != '0'
run: npm run docs:check

- name: Commit locale refresh
id: locale_commit
if: steps.apply.outputs.changed_count != '0'
if: inputs.commit_locale && steps.apply.outputs.changed_count != '0'
env:
BASE_SOURCE_SHA: ${{ steps.apply.outputs.base_source_sha }}
LOCALE: ${{ inputs.locale }}
Expand All @@ -356,7 +349,7 @@ jobs:
python .github/scripts/i18n/commit_locale_artifact.py

- name: Dispatch locale docs deploy
if: steps.locale_commit.outputs.committed == 'true'
if: inputs.commit_locale && steps.locale_commit.outputs.committed == 'true'
env:
GH_TOKEN: ${{ github.token }}
run: |
Expand Down
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@ dist/
node_modules/
*.log
.DS_Store

__pycache__/
*.py[cod]

# Local workflow shell extraction is a test artifact, not sync metadata.
.openclaw-sync/workflow-shell-check/
Loading