From 1ca951e74c0c47cb539fbeb1731d4ee555026b61 Mon Sep 17 00:00:00 2001 From: Vincent Koc <25068+vincentkoc@users.noreply.github.com> Date: Sat, 27 Jun 2026 06:44:58 -0700 Subject: [PATCH] fix(ci): let locale finalizers queue independently --- .github/scripts/i18n/dispatch_r2_pages.py | 66 +++++++++--- .../scripts/i18n/tests/test_i18n_scripts.py | 100 ++++++++++++++++++ .github/workflows/r2-pages.yml | 12 +++ .../workflows/translate-locale-reusable.yml | 3 - 4 files changed, 162 insertions(+), 19 deletions(-) diff --git a/.github/scripts/i18n/dispatch_r2_pages.py b/.github/scripts/i18n/dispatch_r2_pages.py index 0984fffca..db9e693e3 100644 --- a/.github/scripts/i18n/dispatch_r2_pages.py +++ b/.github/scripts/i18n/dispatch_r2_pages.py @@ -16,6 +16,7 @@ --force-upload: Force R2 object audit/upload input. Default: true. --live-url: Optional live URL to verify after upload. --expect-h1: Expected h1 text for live URL verification. + --dispatch-attempts: Dispatch/retry count for stale scoped uploads. Default: 3. --timeout-seconds: Maximum wait. Default: 3600. --poll-seconds: Poll interval. Default: 10. @@ -43,6 +44,7 @@ import urllib.error import urllib.request from datetime import UTC, datetime +from uuid import uuid4 RUN_URL_RE = re.compile(r"/actions/runs/([0-9]+)") @@ -74,6 +76,7 @@ def dispatch( force_upload: bool, locale: str = "", page_path: str = "", + request_id: str = "", ) -> str: command = [ "gh", @@ -93,6 +96,8 @@ def dispatch( command.extend(["-f", f"locale={locale}"]) if page_path: command.extend(["-f", f"page_path={page_path}"]) + if request_id: + command.extend(["-f", f"request_id={request_id}"]) result = run(command) output = "\n".join(part for part in [result.stdout.strip(), result.stderr.strip()] if part) if output: @@ -115,7 +120,7 @@ def list_workflow_dispatch_runs(workflow: str, ref: str, repo: str) -> list[dict "--event", "workflow_dispatch", "--json", - "databaseId,createdAt,status,url", + "databaseId,createdAt,displayTitle,status,url", "--limit", "20", ] @@ -123,7 +128,14 @@ def list_workflow_dispatch_runs(workflow: str, ref: str, repo: str) -> list[dict return json.loads(result.stdout or "[]") -def find_dispatched_run(workflow: str, ref: str, repo: str, started_at: datetime, known_run_ids: set[str]) -> str: +def find_dispatched_run( + workflow: str, + ref: str, + repo: str, + started_at: datetime, + known_run_ids: set[str], + request_id: str = "", +) -> str: cutoff = started_at.replace(microsecond=0) for _ in range(12): runs = list_workflow_dispatch_runs(workflow, ref, repo) @@ -131,6 +143,7 @@ def find_dispatched_run(workflow: str, ref: str, repo: str, started_at: datetime item for item in runs if str(item["databaseId"]) not in known_run_ids and parse_time(item["createdAt"]) >= cutoff + and (not request_id or request_id in str(item.get("displayTitle") or "")) ] if len(recent) == 1: run_id = str(recent[0]["databaseId"]) @@ -157,6 +170,14 @@ def known_workflow_dispatch_run_ids(workflow: str, ref: str, repo: str) -> set[s raise SystemExit(f"could not list existing R2 Pages runs before dispatch: {exc}") from exc +def dispatch_request_id(artifact_scope: str, locale: str, page_path: str) -> str: + parts = ["i18n-r2", artifact_scope or "full", locale or "all"] + if page_path: + parts.append(re.sub(r"[^A-Za-z0-9_.-]+", "-", page_path).strip("-")[:48] or "page") + parts.append(uuid4().hex[:12]) + return "-".join(parts) + + def wait_for_run(repo: str, run_id: str, timeout_seconds: int, poll_seconds: int) -> None: deadline = time.monotonic() + timeout_seconds while True: @@ -236,6 +257,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--force-upload", default=True, action=argparse.BooleanOptionalAction) parser.add_argument("--live-url", default="") parser.add_argument("--expect-h1", default="") + parser.add_argument("--dispatch-attempts", default=3, type=int) parser.add_argument("--timeout-seconds", default=3600, type=int) parser.add_argument("--poll-seconds", default=10, type=int) return parser.parse_args() @@ -249,23 +271,35 @@ def main() -> None: raise SystemExit("timeout-seconds must be >= 1") if args.poll_seconds < 1: raise SystemExit("poll-seconds must be >= 1") + if args.dispatch_attempts < 1: + raise SystemExit("dispatch-attempts must be >= 1") # GitHub's dispatch API can omit the new run URL; snapshot first so fallback # resolution cannot attach this deploy gate to a pre-existing R2 run. - known_run_ids = known_workflow_dispatch_run_ids(args.workflow, args.ref, args.repo) - started_at = datetime.now(UTC) - run_id = dispatch( - args.workflow, - args.ref, - args.repo, - args.artifact_scope, - args.force_upload, - args.locale, - args.page_path, - ) - if not run_id: - run_id = find_dispatched_run(args.workflow, args.ref, args.repo, started_at, known_run_ids) - wait_for_run(args.repo, run_id, args.timeout_seconds, args.poll_seconds) + for attempt in range(1, args.dispatch_attempts + 1): + known_run_ids = known_workflow_dispatch_run_ids(args.workflow, args.ref, args.repo) + started_at = datetime.now(UTC) + request_id = dispatch_request_id(args.artifact_scope, args.locale, args.page_path) + try: + run_id = dispatch( + args.workflow, + args.ref, + args.repo, + args.artifact_scope, + args.force_upload, + args.locale, + args.page_path, + request_id, + ) + if not run_id: + run_id = find_dispatched_run(args.workflow, args.ref, args.repo, started_at, known_run_ids, request_id) + wait_for_run(args.repo, run_id, args.timeout_seconds, args.poll_seconds) + break + except SystemExit as exc: + if attempt >= args.dispatch_attempts: + raise + print(f"R2 Pages dispatch attempt {attempt}/{args.dispatch_attempts} failed: {exc}; retrying.") + time.sleep(args.poll_seconds) verify_live_h1(args.live_url, args.expect_h1, args.timeout_seconds, args.poll_seconds) diff --git a/.github/scripts/i18n/tests/test_i18n_scripts.py b/.github/scripts/i18n/tests/test_i18n_scripts.py index 1bb6ef8c8..fb3c4e120 100644 --- a/.github/scripts/i18n/tests/test_i18n_scripts.py +++ b/.github/scripts/i18n/tests/test_i18n_scripts.py @@ -208,6 +208,10 @@ def test_full_workflow_gates_batches_after_canary(self) -> None: self.assertIn('python "${I18N_SCRIPT_DIR}/build_pending_manifest.py"', reusable) self.assertIn('python "${I18N_SCRIPT_DIR}/commit_locale_artifact.py"', reusable) self.assertIn('python "${I18N_SCRIPT_DIR}/dispatch_r2_pages.py" "${args[@]}"', reusable) + commit_locale_block = re.search(r"(?ms)^ commit-locale:.*?(?=^ [a-zA-Z0-9_-]+:|\Z)", reusable) + self.assertIsNotNone(commit_locale_block) + self.assertNotIn("concurrency:", commit_locale_block.group(0)) + self.assertIn("It retries rebase/push conflicts", commit_locale_artifact.__doc__ or "") self.assertIn("--artifact-scope page", reusable) self.assertIn('--ref "${{ github.ref_name }}"', reusable) self.assertIn('--locale "${{ inputs.locale }}"', reusable) @@ -226,6 +230,9 @@ def test_full_workflow_gates_batches_after_canary(self) -> None: self.assertIn("- locale", r2_pages) self.assertIn("- page", r2_pages) self.assertRegex(r2_pages, r"group: r2-pages\s+cancel-in-progress: false") + self.assertIn("run-name: R2 Pages", r2_pages) + self.assertIn("request_id:", r2_pages) + self.assertIn("Fail stale scoped translation deploy", r2_pages) self.assertIn("Refresh scoped docs content from main", r2_pages) self.assertIn("SCOPED_CONTENT_SHA: ${{ steps.scoped-content.outputs.content_sha || '' }}", r2_pages) self.assertIn("R2_UPLOAD_SCOPE: ${{ steps.artifact-scope.outputs.upload_scope }}", r2_pages) @@ -636,6 +643,7 @@ def fake_run(args: list[str], check: bool = True) -> subprocess.CompletedProcess False, "zh-CN", "channels/line", + "request-123", ) self.assertEqual("28277584371", run_id) @@ -643,6 +651,7 @@ def fake_run(args: list[str], check: bool = True) -> subprocess.CompletedProcess self.assertIn("force_upload=false", captured) self.assertIn("locale=zh-CN", captured) self.assertIn("page_path=channels/line", captured) + self.assertIn("request_id=request-123", captured) def test_dispatch_r2_pages_selects_recent_workflow_dispatch(self) -> None: calls = {"count": 0} @@ -682,6 +691,97 @@ def fake_list(workflow: str, ref: str, repo: str) -> list[dict]: self.assertEqual("456", run_id) + def test_dispatch_r2_pages_uses_request_id_to_resolve_concurrent_runs(self) -> None: + now = "2026-06-27T03:43:01Z" + + def fake_list(workflow: str, ref: str, repo: str) -> list[dict]: + return [ + { + "databaseId": 123, + "createdAt": now, + "displayTitle": "R2 Pages i18n-r2-locale-ja-JP-aaa", + "status": "queued", + "url": "https://github.com/openclaw/docs/actions/runs/123", + }, + { + "databaseId": 456, + "createdAt": now, + "displayTitle": "R2 Pages i18n-r2-locale-zh-TW-bbb", + "status": "queued", + "url": "https://github.com/openclaw/docs/actions/runs/456", + }, + ] + + with patch.object(dispatch_r2_pages, "list_workflow_dispatch_runs", fake_list), patch.object(dispatch_r2_pages.time, "sleep", lambda _: None): + run_id = dispatch_r2_pages.find_dispatched_run( + "r2-pages.yml", + "main", + "openclaw/docs", + dispatch_r2_pages.parse_time(now), + set(), + "i18n-r2-locale-zh-TW-bbb", + ) + + self.assertEqual("456", run_id) + + def test_dispatch_r2_pages_retries_failed_dispatch_run(self) -> None: + dispatches: list[str] = [] + waited: list[str] = [] + verified: list[tuple[str, str]] = [] + + def fake_dispatch( + workflow: str, + ref: str, + repo: str, + artifact_scope: str, + force_upload: bool, + locale: str = "", + page_path: str = "", + request_id: str = "", + ) -> str: + dispatches.append(request_id) + return "123" if len(dispatches) == 1 else "456" + + def fake_wait(repo: str, run_id: str, timeout_seconds: int, poll_seconds: int) -> None: + waited.append(run_id) + if run_id == "123": + raise SystemExit("stale scoped deploy") + + def fake_verify(url: str, expected_h1: str, timeout_seconds: int, poll_seconds: int) -> None: + verified.append((url, expected_h1)) + + argv = [ + "dispatch_r2_pages.py", + "--repo", + "openclaw/docs", + "--artifact-scope", + "locale", + "--locale", + "zh-TW", + "--dispatch-attempts", + "2", + "--poll-seconds", + "1", + "--live-url", + "https://docs.openclaw.ai/zh-TW/channels/line", + "--expect-h1", + "LINE", + ] + with ( + patch.object(sys, "argv", argv), + patch.object(dispatch_r2_pages, "known_workflow_dispatch_run_ids", lambda workflow, ref, repo: set()), + patch.object(dispatch_r2_pages, "dispatch", fake_dispatch), + patch.object(dispatch_r2_pages, "wait_for_run", fake_wait), + patch.object(dispatch_r2_pages, "verify_live_h1", fake_verify), + patch.object(dispatch_r2_pages.time, "sleep", lambda _: None), + ): + dispatch_r2_pages.main() + + self.assertEqual(["123", "456"], waited) + self.assertEqual(2, len(dispatches)) + self.assertNotEqual(dispatches[0], dispatches[1]) + self.assertEqual([("https://docs.openclaw.ai/zh-TW/channels/line", "LINE")], verified) + def test_dispatch_r2_pages_rejects_ambiguous_new_runs(self) -> None: now = "2026-06-27T03:43:01Z" diff --git a/.github/workflows/r2-pages.yml b/.github/workflows/r2-pages.yml index 61b41caf9..caf3affed 100644 --- a/.github/workflows/r2-pages.yml +++ b/.github/workflows/r2-pages.yml @@ -1,4 +1,5 @@ name: R2 Pages +run-name: R2 Pages ${{ github.event_name == 'workflow_dispatch' && inputs.request_id || github.sha }} on: push: @@ -45,6 +46,11 @@ on: required: false type: boolean default: false + request_id: + description: "Unique caller id used by dispatch waiters to resolve this run." + required: false + type: string + default: "" permissions: actions: write @@ -220,6 +226,12 @@ jobs: echo "stale=false" >> "${GITHUB_OUTPUT}" fi + - name: Fail stale scoped translation deploy + if: github.event_name == 'workflow_dispatch' && steps.current-main.outputs.stale == 'true' && (steps.artifact-scope.outputs.scope == 'locale' || steps.artifact-scope.outputs.scope == 'page') + run: | + echo "Scoped translation content went stale before upload; retry this dispatch against latest main." >&2 + exit 1 + - name: Resolve R2 credentials if: steps.artifact-scope.outputs.scope != 'none' && steps.current-main.outputs.stale != 'true' env: diff --git a/.github/workflows/translate-locale-reusable.yml b/.github/workflows/translate-locale-reusable.yml index 35f867141..9486cdaf9 100644 --- a/.github/workflows/translate-locale-reusable.yml +++ b/.github/workflows/translate-locale-reusable.yml @@ -314,9 +314,6 @@ jobs: permissions: actions: write contents: write - concurrency: - group: docs-i18n-finalize - cancel-in-progress: false steps: # Finalizers apply artifacts to latest main, but deploy/commit control # logic must come from this workflow ref so branch canaries test the fix.