From 95c192b1f13bb1ea3e784b25d6351f0f8d49657e Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Sat, 20 Jun 2026 17:56:00 -0400 Subject: [PATCH 01/22] feat(configurator): env_params as first-class trial-identity field Make env_params a first-class part of CloudAIGymEnv trial identity so the trajectory cache keys on (action, env_params) rather than action alone, fixing the domain-randomization correctness bug where the same action under a different env_params sample returned a stale reward. - Cache key now includes env_params; cache-key tests pin the contract (formerly the TDD-red specs of #900, folded in here). - Keep env.csv and trajectory.csv 1:1 step-aligned: a single TrajectoryEntry sinks both files coherently, including on constraint failure. - Reject env_params on non-DSE jobs; reject non-finite / negative weights. - Add cache-hit + declared-env_params integration coverage. Folds the test-only PR #900 (cache-key TDD) into this PR so the stack has no permanently-red standalone PR. --- src/cloudai/_core/test_scenario.py | 1 + src/cloudai/cli/handlers.py | 27 +- src/cloudai/configurator/cloudai_gym.py | 66 ++++- src/cloudai/configurator/env_params.py | 169 +++++++++++++ src/cloudai/models/workload.py | 10 + tests/test_cloudaigym.py | 322 ++++++++++++++++++++++++ tests/test_env_params.py | 114 +++++++++ tests/test_handlers.py | 50 ++++ 8 files changed, 755 insertions(+), 4 deletions(-) create mode 100644 src/cloudai/configurator/env_params.py create mode 100644 tests/test_env_params.py diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 15efe8b2e..81f2723b0 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -97,6 +97,7 @@ class TestRun: reports: Set[Type[ReportGenerationStrategy]] = field(default_factory=set) extra_srun_args: str | None = None num_nodes_explicit: bool = False + current_env_params: dict[str, Any] = field(default_factory=dict) def __hash__(self) -> int: return hash(self.name + self.test.name + str(self.iterations) + str(self.current_iteration)) diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index 3fd099dc0..87e4ccfd5 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -39,6 +39,7 @@ System, TestParser, TestScenario, + TestScenarioParsingError, ) from cloudai.models.scenario import ReportConfig from cloudai.models.workload import TestDefinition @@ -297,12 +298,35 @@ def _check_installation( return result +def validate_dse_env_params(test_scenario: TestScenario) -> None: + """ + Reject prepped configs that declare env_params on a non-DSE test run. + + env_params are sampled only during DSE (by CloudAIGymEnv); on a non-DSE run they would be + silently ignored. is_dse_job is a property of the fully prepped config, so this is validated + here rather than at parse time. + """ + offenders = [tr.name for tr in test_scenario.test_runs if tr.test.env_params and not tr.is_dse_job] + if offenders: + raise TestScenarioParsingError( + f"Tests {offenders} declare env_params but are not DSE jobs. env_params are sampled only during " + "DSE (by CloudAIGymEnv); add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes) " + "or remove env_params." + ) + + def handle_dry_run_and_run(args: argparse.Namespace) -> int: setup_result = _setup_system_and_scenario(args) if setup_result is None: return 1 system, test_scenario, tests = setup_result + try: + validate_dse_env_params(test_scenario) + except TestScenarioParsingError as e: + logging.error(str(e)) + return 1 + if not _handle_single_sbatch(args, system): return 1 @@ -491,7 +515,8 @@ def verify_test_scenarios( tests = Parser.parse_tests(test_tomls, system) hook_tests = Parser.parse_tests(hook_test_tomls, system) hooks = Parser.parse_hooks(hook_tomls, system, {t.name: t for t in hook_tests}) - Parser.parse_test_scenario(scenario_file, system, {t.name: t for t in tests}, hooks) + scenario = Parser.parse_test_scenario(scenario_file, system, {t.name: t for t in tests}, hooks) + validate_dse_env_params(scenario) except Exception: nfailed += 1 diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index 258a47f3c..80c5210f1 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -19,13 +19,14 @@ import dataclasses import logging from pathlib import Path -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple from cloudai.core import METRIC_ERROR, BaseRunner, Registry, TestRun from cloudai.util.lazy_imports import lazy from .base_agent import RewardOverrides from .base_gym import BaseGym +from .env_params import CsvSink, EnvParamsObserver, EnvParamsSink, StepObserver @dataclasses.dataclass(frozen=True) @@ -36,6 +37,7 @@ class TrajectoryEntry: action: dict[str, Any] reward: float observation: list + env_params: dict[str, Any] = dataclasses.field(default_factory=dict) class CloudAIGymEnv(BaseGym): @@ -61,8 +63,28 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid self.max_steps = test_run.test.agent_steps self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function) self.trajectory: dict[int, list[TrajectoryEntry]] = {} + self._env_sink: EnvParamsSink | None = None + self.observers: List[StepObserver] = self._build_observers() super().__init__() + def _build_observers(self) -> List[StepObserver]: + """ + Construct the per-step observers implied by the TestDefinition. + + Workloads opt in to env_params via a TOML ``[env_params.]`` block; + an empty mapping yields no observers and zero overhead. + """ + observers: List[StepObserver] = [] + if self.test_run.test.env_params: + seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0)) + self._env_sink = CsvSink(self._env_csv_path()) + observers.append(EnvParamsObserver(self.test_run.test.env_params, seed)) + return observers + + def _env_csv_path(self) -> Path: + """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them.""" + return self.trajectory_file_path.parent / "env.csv" + def define_action_space(self) -> Dict[str, list[Any]]: return self.test_run.param_space @@ -121,6 +143,9 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: self.test_run.increment_step() self.test_run = self.test_run.apply_params_set(action) + for observer in self.observers: + observer.before_step(self.test_run) + cached_result = self.get_cached_trajectory_result(action) if cached_result is not None: logging.info( @@ -134,8 +159,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: action=action, reward=cached_result.reward, observation=cached_result.observation, + env_params=dict(self.test_run.current_env_params), ) ) + for observer in self.observers: + observer.after_step(self.test_run, cached_result.observation, cached_result.reward) return cached_result.observation, cached_result.reward, False, {} if not self.test_run.test.constraint_check(self.test_run, self.runner.system): @@ -162,6 +190,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: self.test_run.step = new_tr.step self.test_run.output_path = new_tr.output_path + # Rebuilding/replacing test_run above can drop the per-trial env_params sample; + # restore it so the trajectory entry, the cache key, and env.csv all record the + # params the trial actually ran with. + self.test_run.current_env_params = new_tr.current_env_params + observation = self.get_observation(action) reward = self.compute_reward(observation) @@ -171,9 +204,13 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: action=action, reward=reward, observation=observation, + env_params=dict(self.test_run.current_env_params), ) ) + for observer in self.observers: + observer.after_step(self.test_run, observation, reward) + return observation, reward, False, {} def render(self, mode: str = "human"): @@ -230,7 +267,14 @@ def get_observation(self, action: Any) -> list: return observation def write_trajectory(self, entry: TrajectoryEntry): - """Append the trajectory to the CSV file and to the local attribute.""" + """ + Append the entry to the in-memory cache and trajectory.csv (plus env.csv when declared). + + ``trajectory.csv`` and the ``env.csv`` projection are sunk from the same + ``TrajectoryEntry`` here, so a trial that never produces an entry (e.g. a + constraint failure returns before this call) lands in neither file and the + two stay 1:1 step-aligned. + """ self.current_trajectory.append(entry) file_exists = self.trajectory_file_path.exists() @@ -243,6 +287,9 @@ def write_trajectory(self, entry: TrajectoryEntry): writer.writerow(["step", "action", "reward", "observation"]) writer.writerow([entry.step, entry.action, entry.reward, entry.observation]) + if self._env_sink is not None: + self._env_sink.write(entry.step, entry.env_params) + @property def trajectory_file_path(self) -> Path: return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}" / "trajectory.csv" @@ -252,8 +299,21 @@ def current_trajectory(self) -> list[TrajectoryEntry]: return self.trajectory.setdefault(self.test_run.current_iteration, []) def get_cached_trajectory_result(self, action: Any) -> TrajectoryEntry | None: + """ + Return a cached entry only when the full trial identity matches. + + Trial identity is ``(action, env_params)``: env-randomized parameters + change the workload's behaviour, so a trial repeating the same action + under a different ``env_params`` sample must miss and re-run. Empty + env_params on both sides is the back-compat path for workloads that + do not declare any ``[env_params.*]`` block. + """ + current_env_params = getattr(self.test_run, "current_env_params", {}) or {} for entry in self.current_trajectory: - if self._values_match_exact(entry.action, action): + if not self._values_match_exact(entry.action, action): + continue + entry_env = getattr(entry, "env_params", {}) or {} + if self._values_match_exact(entry_env, current_env_params): return entry return None diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py new file mode 100644 index 000000000..44bf57524 --- /dev/null +++ b/src/cloudai/configurator/env_params.py @@ -0,0 +1,169 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Domain-randomization primitives for CloudAI DSE. + +An env-randomized parameter is a workload knob whose value the environment +samples per trial (categorical, optional weights). It is sibling to +``cmd_args`` on a ``TestDefinition`` and does not enter the agent's action +space; the policy learns a robust mapping under that variation. + +This module owns the data schema (``EnvParamSpec``), the deterministic +sampler (``EnvParamsSampler``), the persistence interface +(``EnvParamsSink`` + ``CsvSink``) and the per-step observer +(``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the +artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv`` +regardless of agent (PPO, BO, GA, MAB) or workload. +""" + +from __future__ import annotations + +import csv +import math +import random +from pathlib import Path +from typing import Any, Dict, List, Optional, Protocol, runtime_checkable + +from pydantic import BaseModel, ConfigDict, Field, model_validator +from typing_extensions import Self + + +class EnvParamSpec(BaseModel): + """Specification of one env-randomized parameter (categorical).""" + + model_config = ConfigDict(extra="forbid") + + values: List[Any] = Field( + min_length=2, + description="Candidate values; a single-valued parameter is just a fixed cmd_args entry.", + ) + weights: Optional[List[float]] = Field( + default=None, + description="Optional probability weights aligned with values; uniform if omitted.", + ) + + @model_validator(mode="after") + def _validate_weights(self) -> Self: + if self.weights is None: + return self + if len(self.weights) != len(self.values): + raise ValueError( + f"env_params weights length {len(self.weights)} does not match values length {len(self.values)}" + ) + for w in self.weights: + if not math.isfinite(w) or w < 0: + raise ValueError(f"env_params weights must be finite and non-negative; got {w}") + if sum(self.weights) <= 0: + raise ValueError("env_params weights must have a positive sum") + return self + + +class EnvParamsSampler: + """ + Per-trial categorical sampler. + + Determinism contract: ``sample(t)`` returns the same dict on every call + (across processes) for the same ``(seed, env_params, t)``. + + Independence contract: each parameter uses an RNG seeded by + ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated + parameter does not perturb existing parameters' draw sequences. + """ + + def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None: + self._env_params = env_params + self._seed = seed + + def sample(self, trial: int) -> Dict[str, Any]: + out: Dict[str, Any] = {} + for name, spec in self._env_params.items(): + rng = random.Random(f"{self._seed}:{name}:{trial}") + if spec.weights is not None: + out[name] = rng.choices(spec.values, weights=spec.weights, k=1)[0] + else: + out[name] = rng.choice(spec.values) + return out + + +@runtime_checkable +class EnvParamsSink(Protocol): + """Persist one trial's env_params sample; empty samples must be no-ops.""" + + def write(self, step: int, sample: Dict[str, Any]) -> None: ... + + +class CsvSink: + """ + Append per-trial env_params samples to a step-aligned CSV. + + The CSV mirrors how ``trajectory.csv`` serialises its ``action`` column + (one row per env.step(), sample dict stringified in a single cell) so the + two files align 1:1 on ``step`` and a plain ``merge`` joins them. + """ + + def __init__(self, path: Path) -> None: + self._path = path + + def write(self, step: int, sample: Dict[str, Any]) -> None: + if step < 1: + raise ValueError(f"step must be a positive trial index (cloudai DSE is 1-based); got {step}") + if not sample: + return + new_file = not self._path.exists() + self._path.parent.mkdir(parents=True, exist_ok=True) + with self._path.open("a", newline="") as f: + writer = csv.writer(f) + if new_file: + writer.writerow(("step", "env")) + writer.writerow([step, sample]) + + +@runtime_checkable +class StepObserver(Protocol): + """ + Hook fired by ``CloudAIGymEnv.step()`` around each trial. + + ``before_step`` runs before the cache lookup and before any workload + execution. ``after_step`` runs after the trajectory row is written. + """ + + def before_step(self, test_run: Any) -> None: ... + + def after_step(self, test_run: Any, observation: list, reward: float) -> None: ... + + +class EnvParamsObserver: + """ + Sample env_params per step and stash them for the cache and the workload. + + Pre-step: samples ``test_run.test.env_params`` for ``test_run.step`` and + stashes the result on ``test_run.current_env_params`` so the cache key and + the workload's substitution both see it. Persistence is owned by + ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the + ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that + writes no trajectory row writes no env.csv row either, keeping the two files + 1:1 step-aligned. Post-step: no-op. + """ + + def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None: + self._sampler = EnvParamsSampler(env_params, seed=seed) + + def before_step(self, test_run: Any) -> None: + test_run.current_env_params = self._sampler.sample(test_run.step) + + def after_step(self, test_run: Any, observation: list, reward: float) -> None: + del test_run, observation, reward # persistence handled by CloudAIGymEnv.write_trajectory diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index 8b981d8ea..99637d4e8 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -23,6 +23,8 @@ from cloudai.core import GitRepo, Installable, JobStatusResult, PythonExecutable, Registry, System, TestRun +from ..configurator.env_params import EnvParamSpec + class CmdArgs(BaseModel): """Test command arguments.""" @@ -110,6 +112,14 @@ class TestDefinition(BaseModel, ABC): agent_metrics: list[str] = Field(default=["default"]) agent_reward_function: str = "inverse" agent_config: dict[str, Any] | None = Field(default=None, description="Agent configuration.") + env_params: dict[str, EnvParamSpec] = Field( + default_factory=dict, + description=( + "Domain-randomized parameters sampled by the env per trial. Sibling to " + "cmd_args; not part of the agent's action space. CloudAIGymEnv samples, " + "persists to env.csv, and includes them in the trajectory cache key." + ), + ) @property def cmd_args_dict(self) -> Dict[str, Union[str, List[str]]]: diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index 4cd7c6fb8..e687d4373 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -21,6 +21,7 @@ import pytest from cloudai.configurator import CloudAIGymEnv, GridSearchAgent, TrajectoryEntry +from cloudai.configurator.env_params import EnvParamSpec from cloudai.core import BaseRunner, RewardOverrides, Runner, TestRun, TestScenario from cloudai.systems.slurm import SlurmSystem from cloudai.util import flatten_dict @@ -441,3 +442,324 @@ def test_cached_step_appends_trajectory_row(nemorun: NeMoRunTestDefinition, tmp_ contents = csv_path.read_text().strip().splitlines() assert contents[0] == "step,action,reward,observation" assert contents[-1].startswith("5,") + + +def _seed_cached_entry_with_env_params( + env: CloudAIGymEnv, action: dict[str, object], env_params: dict[str, object] +) -> None: + """Seed env.trajectory with one entry carrying the given env_params.""" + entry = TrajectoryEntry(step=1, action=action, reward=0.5, observation=[100.0], env_params=env_params) + env.test_run.current_iteration = 0 + env.trajectory = {0: [entry]} + + +def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) -> None: + """Cache MUST miss when env_params differ, even if action is identical. + + Without this property the agent receives stale rewards on every cache hit + under domain randomization. PPO/DQN/BO all silently train on labels that + do not correspond to the env they were nominally generated under. + """ + runner = MagicMock() + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = MagicMock(test_runs=[]) + runner.jobs = {} + runner.testrun_to_job_map = {} + + env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides()) + _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001}) + + env.test_run.current_env_params = {"drop_rate": 0.01} + + assert env.get_cached_trajectory_result({"x": 10}) is None, ( + "Cache must include env_params in its key. The current implementation " + "keys on action alone, so trials repeating the same action under a " + "different env_params sample receive a stale cached reward. See " + "env-params-cloudai-corpus-plan.md." + ) + + +def test_cache_hit_when_action_and_env_params_match(base_tr: TestRun, tmp_path: Path) -> None: + """Same action AND same env_params must still HIT the cache.""" + runner = MagicMock() + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = MagicMock(test_runs=[]) + runner.jobs = {} + runner.testrun_to_job_map = {} + + env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides()) + _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001}) + + env.test_run.current_env_params = {"drop_rate": 0.001} + + result = env.get_cached_trajectory_result({"x": 10}) + assert result is not None + assert result.step == 1 + + +def test_cache_hit_when_neither_has_env_params(base_tr: TestRun, tmp_path: Path) -> None: + """Workloads without env_params behave exactly as today (back-compat).""" + runner = MagicMock() + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = MagicMock(test_runs=[]) + runner.jobs = {} + runner.testrun_to_job_map = {} + + env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides()) + env.test_run.current_iteration = 0 + env.trajectory = {0: [TrajectoryEntry(step=1, action={"x": 10}, reward=0.5, observation=[100.0])]} + # Note: neither the cached entry nor test_run carries env_params -> existing behavior. + + result = env.get_cached_trajectory_result({"x": 10}) + assert result is not None + assert result.step == 1 + + +def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: + """Integration: env.step() with same action but different env_params re-runs the workload. + + Counterpart to test_cache_miss_when_env_params_differ but exercising the + full step() flow: increment_step -> apply_params_set -> cache lookup -> + runner.run() -> write_trajectory. + """ + tdef = nemorun.model_copy(deep=True) + tdef.cmd_args.data.global_batch_size = 8 + tdef.agent_metrics = ["default"] + test_run = TestRun( + name="dr_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "dr_tr" / "0", + reports={NeMoRunReportGenerationStrategy}, + ) + test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) + + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = test_scenario + runner.jobs = {} + runner.testrun_to_job_map = {} + runner.shutting_down = False + runner.get_job_output_path.return_value = test_run.output_path + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + action = {"trainer.max_steps": 1000} + fake_obs = iter([[100.0], [50.0]]) + + with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)): + env.test_run.step = 0 + env.test_run.current_env_params = {"drop_rate": 0.001} + obs1, _r1, *_ = env.step(action) + + env.test_run.current_env_params = {"drop_rate": 0.01} + obs2, _r2, *_ = env.step(action) + + assert runner.run.call_count == 2, ( + "Different env_params between two env.step() calls with the same action " + "must trigger a workload re-run; the cache lookup must miss." + ) + assert obs1 != obs2, "fresh workload run should produce a fresh observation" + + +def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: + """env.csv must have exactly one row per env.step() call, with steps aligned 1:1 to trajectory.csv. + + This pins the corpus-friendly contract: a downstream consumer can + ``pd.merge(traj, env, on="step")`` without losing rows on either side, + independent of whether the trial hit the trajectory cache. + """ + tdef = nemorun.model_copy(deep=True) + tdef.cmd_args.data.global_batch_size = 8 + tdef.agent_metrics = ["default"] + tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + tdef.agent_config = {"random_seed": 42} + + test_run = TestRun( + name="dr_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "dr_tr" / "0", + reports={NeMoRunReportGenerationStrategy}, + ) + test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) + + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = test_scenario + runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False + runner.get_job_output_path.return_value = test_run.output_path + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + action_a, action_b = {"trainer.max_steps": 1000}, {"trainer.max_steps": 2000} + fake_obs = iter([[100.0], [50.0], [25.0]]) + + with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)): + env.test_run.step = 0 + for action in (action_a, action_b, action_a): + env.step(action) + + env_csv = env._env_csv_path() + traj_csv = env.trajectory_file_path + assert env_csv.exists(), "env.csv must be written when env_params is declared" + + env_steps = [int(line.split(",", 1)[0]) for line in env_csv.read_text().strip().splitlines()[1:]] + traj_steps = [int(line.split(",", 1)[0]) for line in traj_csv.read_text().strip().splitlines()[1:]] + assert env_steps == traj_steps == [1, 2, 3], ( + f"step columns must align 1:1 across env.csv ({env_steps}) and trajectory.csv ({traj_steps})" + ) + + +def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: + """A constraint failure must not desync env.csv from trajectory.csv. + + Runs three steps where the middle one fails ``constraint_check`` and the + other two succeed. ``env.csv`` is sunk inside ``write_trajectory`` from the + same ``TrajectoryEntry``, which is never reached on the early-return + constraint-failure path - so the failed step lands in neither file. The + corpus-friendly contract (``pd.merge(traj, env, on="step")`` loses no rows) + therefore holds via shared absence: both files record exactly the surviving + steps, aligned 1:1. + """ + tdef = nemorun.model_copy(deep=True) + tdef.cmd_args.data.global_batch_size = 8 + tdef.agent_metrics = ["default"] + tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + tdef.agent_config = {"random_seed": 42} + + test_run = TestRun( + name="dr_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "dr_tr" / "0", + reports={NeMoRunReportGenerationStrategy}, + ) + test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) + + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = test_scenario + runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False + runner.get_job_output_path.return_value = test_run.output_path + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + + # Step 2 fails the constraint; steps 1 and 3 survive. get_observation is only + # reached on the surviving steps, so it yields exactly two values. + fake_obs = iter([[100.0], [25.0]]) + with ( + patch.object(NeMoRunTestDefinition, "constraint_check", side_effect=[True, False, True]), + patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)), + ): + env.test_run.step = 0 + for action in ({"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}, {"trainer.max_steps": 3000}): + env.step(action) + + env_csv = env._env_csv_path() + traj_csv = env.trajectory_file_path + + assert env_csv.exists(), "surviving steps declare env_params -> env.csv must exist" + env_steps = [int(line.split(",", 1)[0]) for line in env_csv.read_text().strip().splitlines()[1:]] + traj_steps = ( + [int(line.split(",", 1)[0]) for line in traj_csv.read_text().strip().splitlines()[1:]] + if traj_csv.exists() + else [] + ) + assert env_steps == traj_steps == [1, 3], ( + f"the constraint-failed step (2) must appear in neither file; env.csv ({env_steps}) " + f"and trajectory.csv ({traj_steps}) must stay 1:1 aligned on the surviving steps" + ) + + +def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( + nemorun: NeMoRunTestDefinition, tmp_path: Path +) -> None: + """End-to-end: cache HIT under observer-driven env_params still records env.csv. + + A cache hit still calls ``write_trajectory``, which sinks the trajectory row + and the matching env.csv row from the same entry - keeping the two files + step-aligned even when the workload itself is short-circuited. + Asserts: (a) the workload is NOT re-run (cache short-circuit), (b) + env.csv gains a row, (c) trajectory.csv gains a row carrying the + sampled env_params. + """ + import random as _random + + tdef = nemorun.model_copy(deep=True) + tdef.cmd_args.data.global_batch_size = 8 + tdef.agent_metrics = ["default"] + tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + tdef.agent_config = {"random_seed": 42} + + test_run = TestRun( + name="dr_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "dr_tr" / "0", + reports={NeMoRunReportGenerationStrategy}, + ) + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) + runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False + runner.get_job_output_path.return_value = test_run.output_path + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + assert env.observers, "TestDefinition.env_params declared -> observer must be built" + + expected_sample = {"drop_rate": _random.Random("42:drop_rate:1").choice([0.0, 0.001, 0.01])} + action = {"trainer.max_steps": 1000} + env.test_run.current_iteration = 0 + env.trajectory = { + 0: [TrajectoryEntry(step=0, action=action, reward=0.42, observation=[0.84], env_params=expected_sample)] + } + env.test_run.step = 0 + + with patch.object(env, "get_observation", side_effect=AssertionError("cache miss path must not run")): + obs, reward, _done, _info = env.step(action) + + runner.run.assert_not_called() + assert reward == 0.42 and obs == [0.84] + + env_csv = env._env_csv_path() + assert env_csv.exists(), "cache HIT must NOT skip the observer; env.csv must record the trial" + env_rows = env_csv.read_text().strip().splitlines() + assert env_rows[0] == "step,env" + assert env_rows[1].startswith("1,"), f"expected step 1 row in env.csv, got {env_rows[1]!r}" + + traj_rows = env.trajectory[0] + assert len(traj_rows) == 2 and traj_rows[-1].env_params == expected_sample, ( + "cache-hit trajectory entry must record the per-trial env_params sample" + ) + + +def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: + """Workloads without [env_params.*] pay zero overhead: no observer, no env.csv.""" + tdef = nemorun.model_copy(deep=True) + tdef.cmd_args.data.global_batch_size = 8 + test_run = TestRun( + name="plain_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "plain_tr" / "0", + reports={NeMoRunReportGenerationStrategy}, + ) + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + + assert env.observers == [], "no env_params declared -> no per-step observers" + assert not env._env_csv_path().exists() diff --git a/tests/test_env_params.py b/tests/test_env_params.py new file mode 100644 index 000000000..3aa931607 --- /dev/null +++ b/tests/test_env_params.py @@ -0,0 +1,114 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for the domain-randomization primitives in cloudai.configurator.env_params.""" + +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace + +import pytest +from pydantic import ValidationError + +from cloudai.configurator.env_params import ( + CsvSink, + EnvParamsObserver, + EnvParamSpec, + EnvParamsSampler, +) + + +def test_env_param_spec_requires_at_least_two_values() -> None: + with pytest.raises(ValidationError): + EnvParamSpec(values=[0.0]) + + +def test_env_param_spec_rejects_mismatched_weights() -> None: + with pytest.raises(ValidationError): + EnvParamSpec(values=[0.0, 0.1], weights=[1.0]) + + +def test_env_param_spec_rejects_zero_sum_weights() -> None: + with pytest.raises(ValidationError): + EnvParamSpec(values=[0.0, 0.1], weights=[0.0, 0.0]) + + +@pytest.mark.parametrize("bad", [float("inf"), float("nan"), float("-inf")]) +def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None: + with pytest.raises(ValidationError): + EnvParamSpec(values=[0.0, 0.1], weights=[bad, 1.0]) + + +def test_sampler_is_deterministic_across_calls() -> None: + spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + a = EnvParamsSampler(spec, seed=42) + b = EnvParamsSampler(spec, seed=42) + seq_a = [a.sample(t) for t in range(1, 6)] + seq_b = [b.sample(t) for t in range(1, 6)] + assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances" + + +def test_sampler_each_param_is_independent() -> None: + """Adding an unrelated parameter must not perturb existing parameters' draws.""" + base = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + extended = { + "drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01]), + "latency_ms": EnvParamSpec(values=[1, 5, 10]), + } + a = [EnvParamsSampler(base, seed=7).sample(t)["drop_rate"] for t in range(1, 11)] + b = [EnvParamsSampler(extended, seed=7).sample(t)["drop_rate"] for t in range(1, 11)] + assert a == b, "per-parameter RNG seeding must isolate parameters from each other" + + +def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None: + sink = CsvSink(tmp_path / "env.csv") + sink.write(1, {}) # empty -> no-op, no file + assert not (tmp_path / "env.csv").exists() + with pytest.raises(ValueError, match="must be a positive trial index"): + sink.write(0, {"drop_rate": 0.0}) + + +def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None: + sink = CsvSink(tmp_path / "env.csv") + sink.write(1, {"drop_rate": 0.001}) + sink.write(2, {"drop_rate": 0.01}) + contents = (tmp_path / "env.csv").read_text().strip().splitlines() + assert contents[0] == "step,env" + assert contents[1].startswith("1,") + assert contents[2].startswith("2,") + + +def test_observer_samples_and_stashes_current_env_params() -> None: + """before_step samples and stashes the result; persistence is the env's job.""" + spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} + observer = EnvParamsObserver(spec, seed=42) + test_run = SimpleNamespace(step=3, current_env_params={}) + + observer.before_step(test_run) + + assert "drop_rate" in test_run.current_env_params + assert test_run.current_env_params["drop_rate"] in {0.0, 0.001, 0.01} + + +def test_observer_after_step_is_noop() -> None: + """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence.""" + observer = EnvParamsObserver({}, seed=0) + test_run = SimpleNamespace(step=1, current_env_params={"x": 1}) + + observer.after_step(test_run, observation=[0.0], reward=0.0) + + assert test_run.current_env_params == {"x": 1} diff --git a/tests/test_handlers.py b/tests/test_handlers.py index bcf01978f..0cd3c20e1 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -26,19 +26,23 @@ from cloudai.cli.handlers import ( handle_dse_job, + validate_dse_env_params, verify_system_configs, verify_test_configs, verify_test_scenarios, ) +from cloudai.configurator.env_params import EnvParamSpec from cloudai.core import ( BaseAgent, BaseAgentConfig, + Parser, Registry, RewardOverrides, Runner, TestDependency, TestRun, TestScenario, + TestScenarioParsingError, ) from cloudai.models.scenario import ReportConfig from cloudai.reporter import StatusReporter @@ -427,3 +431,49 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising( contents = failure_report.read_text() assert "RuntimeError" in contents assert "agent blew up" in contents + + +def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None: + base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + scenario = TestScenario(name="s", test_runs=[base_tr]) + with pytest.raises(TestScenarioParsingError, match="declare env_params but are not DSE jobs"): + validate_dse_env_params(scenario) + + +def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun) -> None: + dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + assert dse_tr.is_dse_job is True # precondition: DSE + env_params is the allowed combination + validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) # no exception == pass + + +def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun) -> None: + base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + base_tr.num_nodes = [1, 2] + assert base_tr.is_dse_job is True # a num_nodes list sweep makes it DSE, so env_params is allowed + validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr])) # no exception == pass + + +def test_validate_dse_env_params_allows_non_dse_without_env_params(base_tr: TestRun) -> None: + assert base_tr.is_dse_job is False # precondition: not DSE, but also no env_params declared + assert not base_tr.test.env_params + validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr])) # no exception == pass + + +def test_verify_test_scenarios_rejects_env_params_without_dse( + base_tr: TestRun, monkeypatch: pytest.MonkeyPatch +) -> None: + base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + bad = TestScenario(name="s", test_runs=[base_tr]) + monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: []) + monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {}) + monkeypatch.setattr(Parser, "parse_test_scenario", lambda *a, **k: bad) + assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 1 + + +def test_verify_test_scenarios_allows_env_params_with_dse(dse_tr: TestRun, monkeypatch: pytest.MonkeyPatch) -> None: + dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + good = TestScenario(name="s", test_runs=[dse_tr]) + monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: []) + monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {}) + monkeypatch.setattr(Parser, "parse_test_scenario", lambda *a, **k: good) + assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 0 From 0dabd4e482af47095a6dd3fdcd9ccf27a86837c8 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 23 Jun 2026 17:44:40 -0400 Subject: [PATCH 02/22] refactor(configurator): env_params as cmd_args annotation, excluded from search space Make env_params a thin annotation over cmd_args fields instead of a holder of candidate values. Candidate values live in cmd_args (the single source of truth, exactly like an action-space dimension); env_params. only marks a field as env-sampled and carries optional sampling weights, never the values. - EnvParamSpec drops `values`; validates weights (finite, non-negative, sum=1.0). - Sampler/observer resolve candidate lists from cmd_args; scalar knobs are no-ops. - TestDefinition.validate_env_params cross-checks annotations against cmd_args (key must be a real field; weights require a list and must match its length). - Exclude env_params keys from both param_space and is_dse_job: an env-sampled list is not a search dimension, so an env-params-only workload is not a DSE job. - validate_dse_env_params rejects env_params on non-DSE runs and on grid_search (exhaustive search cannot exploit per-trial randomization). - Scrub private-implementation references from public docstrings. - Unit tests use generic Atari Breakout semantics (ball_speed / paddle_width). --- src/cloudai/_core/test_scenario.py | 4 +- src/cloudai/cli/handlers.py | 21 ++- src/cloudai/configurator/cloudai_gym.py | 15 +- src/cloudai/configurator/env_params.py | 98 ++++++++---- src/cloudai/models/workload.py | 55 ++++++- tests/test_cloudaigym.py | 155 ++++++++++++++----- tests/test_env_params.py | 197 ++++++++++++++++++++---- tests/test_handlers.py | 34 ++-- 8 files changed, 452 insertions(+), 127 deletions(-) diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 81f2723b0..26f22e7e0 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -157,7 +157,9 @@ def param_space(self) -> dict[str, Any]: **{ key: value for key, value in cmd_args_dict.items() - if isinstance(value, list) and not self.test.is_dse_excluded_arg(key) + if isinstance(value, list) + and not self.test.is_dse_excluded_arg(key) + and key.split(".", 1)[0] not in self.test.env_params }, **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)}, } diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index 87e4ccfd5..f7b9f3844 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -300,17 +300,24 @@ def _check_installation( def validate_dse_env_params(test_scenario: TestScenario) -> None: """ - Reject prepped configs that declare env_params on a non-DSE test run. + Reject prepped configs that declare env_params but cannot sample them. - env_params are sampled only during DSE (by CloudAIGymEnv); on a non-DSE run they would be - silently ignored. is_dse_job is a property of the fully prepped config, so this is validated - here rather than at parse time. + env_params are sampled per-trial by CloudAIGymEnv, but only for a DSE run on a *learning* + agent. A non-DSE run has no per-trial loop at all, and grid_search exhaustively enumerates the + search space - sampling env_params there would just inject noise into a deterministic sweep with + no policy to gain robustness. is_dse_job is a property of the fully prepped config, so this is + validated here rather than at parse time. """ - offenders = [tr.name for tr in test_scenario.test_runs if tr.test.env_params and not tr.is_dse_job] + offenders = [ + tr.name + for tr in test_scenario.test_runs + if tr.test.env_params and (not tr.is_dse_job or tr.test.agent == "grid_search") + ] if offenders: raise TestScenarioParsingError( - f"Tests {offenders} declare env_params but are not DSE jobs. env_params are sampled only during " - "DSE (by CloudAIGymEnv); add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes) " + f"Tests {offenders} declare env_params but will not sample them. env_params are sampled per-trial " + "only by a DSE run on a learning agent (grid_search searches the whole space and ignores them). " + "Use a learning agent and add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes), " "or remove env_params." ) diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index 80c5210f1..d16a9c63e 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -78,7 +78,7 @@ def _build_observers(self) -> List[StepObserver]: if self.test_run.test.env_params: seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0)) self._env_sink = CsvSink(self._env_csv_path()) - observers.append(EnvParamsObserver(self.test_run.test.env_params, seed)) + observers.append(EnvParamsObserver(self.test_run.test.env_params, self.test_run.test.cmd_args, seed)) return observers def _env_csv_path(self) -> Path: @@ -146,6 +146,19 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: for observer in self.observers: observer.before_step(self.test_run) + # Overlay this trial's sampled env_params onto cmd_args so the workload actually + # runs with the sampled values - the env-side twin of apply_params_set(action). + # Sampling, persistence (env.csv), and the trajectory cache key are handled + # separately; this is the single, workload-agnostic injection point. Keys are + # validated to be cmd_args fields at TestDefinition build time; we re-filter to + # those fields here so a programmatically-built run can't inject unknown attrs. + if self.test_run.current_env_params: + cmd_args = self.test_run.test.cmd_args + fields = getattr(type(cmd_args), "model_fields", {}) + overlay = {k: v for k, v in self.test_run.current_env_params.items() if k in fields} + if overlay: + self.test_run.test.cmd_args = cmd_args.model_copy(update=overlay) + cached_result = self.get_cached_trajectory_result(action) if cached_result is not None: logging.info( diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index 44bf57524..ddd26758e 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -15,19 +15,23 @@ # limitations under the License. """ -Domain-randomization primitives for CloudAI DSE. - -An env-randomized parameter is a workload knob whose value the environment -samples per trial (categorical, optional weights). It is sibling to -``cmd_args`` on a ``TestDefinition`` and does not enter the agent's action -space; the policy learns a robust mapping under that variation. - -This module owns the data schema (``EnvParamSpec``), the deterministic +Per-trial environment-parameter primitives for CloudAI DSE. + +An env-randomized parameter is a workload knob whose candidate values live in +``cmd_args`` (a plain list - the single source of truth, exactly like an +action-space dimension) but which the environment *samples* per trial rather +than letting the agent search it. ``env_params`` is the annotation that marks +such a field: ``env_params.`` reclassifies ``cmd_args.`` from +action-space to env-sampled and carries only *how* to sample (optional +``weights``), never the values. The policy learns a robust mapping under that +variation; the knob never enters the agent's action space. + +This module owns the annotation schema (``EnvParamSpec``), the deterministic sampler (``EnvParamsSampler``), the persistence interface (``EnvParamsSink`` + ``CsvSink``) and the per-step observer (``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv`` -regardless of agent (PPO, BO, GA, MAB) or workload. +regardless of agent or workload. """ from __future__ import annotations @@ -43,59 +47,71 @@ class EnvParamSpec(BaseModel): - """Specification of one env-randomized parameter (categorical).""" + """ + Annotation marking one cmd_args field as env-sampled. + + Carries only *how* to sample - the candidate values themselves live in + ``cmd_args.`` as a plain list. ``weights`` (optional) are positional, + aligned 1:1 with that candidate list; omit for uniform sampling. The + length match against the candidate list is a cross-field check enforced by + ``TestDefinition`` (which can see ``cmd_args``); here we validate only the + weights' intrinsic shape. + """ model_config = ConfigDict(extra="forbid") - values: List[Any] = Field( - min_length=2, - description="Candidate values; a single-valued parameter is just a fixed cmd_args entry.", - ) weights: Optional[List[float]] = Field( default=None, - description="Optional probability weights aligned with values; uniform if omitted.", + description="Optional probability weights aligned with the cmd_args candidate list; uniform if omitted.", ) @model_validator(mode="after") def _validate_weights(self) -> Self: if self.weights is None: return self - if len(self.weights) != len(self.values): - raise ValueError( - f"env_params weights length {len(self.weights)} does not match values length {len(self.values)}" - ) for w in self.weights: if not math.isfinite(w) or w < 0: raise ValueError(f"env_params weights must be finite and non-negative; got {w}") - if sum(self.weights) <= 0: - raise ValueError("env_params weights must have a positive sum") + total = sum(self.weights) + if abs(total - 1.0) > 1e-6: + raise ValueError(f"env_params weights must sum to 1.0; got {total}") return self class EnvParamsSampler: """ - Per-trial categorical sampler. + Per-trial categorical sampler over candidate lists. + + Candidates are resolved from ``cmd_args`` by the caller and passed in as + ``{name: [v0, v1, ...]}``; optional ``weights`` mirror that mapping. Determinism contract: ``sample(t)`` returns the same dict on every call - (across processes) for the same ``(seed, env_params, t)``. + (across processes) for the same ``(seed, candidates, t)``. Independence contract: each parameter uses an RNG seeded by ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated parameter does not perturb existing parameters' draw sequences. """ - def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None: - self._env_params = env_params + def __init__( + self, + candidates: Dict[str, List[Any]], + seed: int, + weights: Optional[Dict[str, List[float]]] = None, + ) -> None: + self._candidates = candidates + self._weights = weights or {} self._seed = seed def sample(self, trial: int) -> Dict[str, Any]: out: Dict[str, Any] = {} - for name, spec in self._env_params.items(): + for name, choices in self._candidates.items(): rng = random.Random(f"{self._seed}:{name}:{trial}") - if spec.weights is not None: - out[name] = rng.choices(spec.values, weights=spec.weights, k=1)[0] + weights = self._weights.get(name) + if weights is not None: + out[name] = rng.choices(choices, weights=weights, k=1)[0] else: - out[name] = rng.choice(spec.values) + out[name] = rng.choice(choices) return out @@ -150,17 +166,31 @@ class EnvParamsObserver: """ Sample env_params per step and stash them for the cache and the workload. - Pre-step: samples ``test_run.test.env_params`` for ``test_run.step`` and - stashes the result on ``test_run.current_env_params`` so the cache key and - the workload's substitution both see it. Persistence is owned by + Construction: resolves each annotated knob's candidate list from + ``cmd_args`` (the single source of truth) once, up front. A knob whose + ``cmd_args`` value is a scalar (not a list) is fixed - its annotation is a + no-op and it is skipped, so nothing is sampled or stashed for it. + + Pre-step: samples the resolved candidates for ``test_run.step`` and stashes + the result on ``test_run.current_env_params`` so the cache key and the + cmd_args overlay both see it. Persistence is owned by ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that writes no trajectory row writes no env.csv row either, keeping the two files 1:1 step-aligned. Post-step: no-op. """ - def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None: - self._sampler = EnvParamsSampler(env_params, seed=seed) + def __init__(self, env_params: Dict[str, EnvParamSpec], cmd_args: Any, seed: int) -> None: + candidates: Dict[str, List[Any]] = {} + weights: Dict[str, List[float]] = {} + for name, spec in env_params.items(): + value = getattr(cmd_args, name, None) + if not isinstance(value, list): + continue # scalar cmd_args knob is fixed; the annotation is a no-op + candidates[name] = value + if spec.weights is not None: + weights[name] = spec.weights + self._sampler = EnvParamsSampler(candidates, seed=seed, weights=weights) def before_step(self, test_run: Any) -> None: test_run.current_env_params = self._sampler.sample(test_run.step) diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index 99637d4e8..8683cc8bb 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -115,7 +115,7 @@ class TestDefinition(BaseModel, ABC): env_params: dict[str, EnvParamSpec] = Field( default_factory=dict, description=( - "Domain-randomized parameters sampled by the env per trial. Sibling to " + "Environment parameters sampled by the env per trial. Sibling to " "cmd_args; not part of the agent's action space. CloudAIGymEnv samples, " "persists to env.csv, and includes them in the trajectory cache key." ), @@ -146,17 +146,23 @@ def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool: @property def is_dse_job(self) -> bool: - def check_dict(d: dict, parent_key: str = "") -> bool: + def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) -> bool: if isinstance(d, dict): for key, value in d.items(): path = f"{parent_key}.{key}" if parent_key else key if self.is_dse_excluded_arg(path): continue - if isinstance(value, list) or (isinstance(value, dict) and check_dict(value, path)): + # env_params lists are sampled by the env, not searched by the agent, so they + # are not action-space dimensions and must not make a run count as DSE. + if skip_env_params and path.split(".", 1)[0] in self.env_params: + continue + if isinstance(value, list) or ( + isinstance(value, dict) and check_dict(value, path, skip_env_params) + ): return True return False - return check_dict(self.cmd_args_dict) or check_dict(self.extra_env_vars) + return check_dict(self.cmd_args_dict, skip_env_params=True) or check_dict(self.extra_env_vars) @field_validator("dse_excluded_args", mode="before") @classmethod @@ -201,3 +207,44 @@ def validate_agent_config(self) -> Self: agent_config_class = agent_class.get_config_class() agent_config_class.model_validate(self.agent_config) return self + + @model_validator(mode="after") + def validate_env_params(self) -> Self: + """ + Validate env_params annotations against cmd_args. + + ``env_params`` is an annotation: each key names a ``cmd_args`` field whose value is + the candidate set (the single source of truth), and the entry carries only *how* to + sample. So each key must name a real ``cmd_args`` field; and when ``weights`` are + declared, that field must be a candidate list of >= 2 values and the weights must + align 1:1 with it. A scalar (fixed) ``cmd_args`` value is tolerated as a no-op + marker. Sampling, persistence, the per-trial cmd_args overlay, and the cache key all + live in ``CloudAIGymEnv``; keeping this shape check in core lets the overlay stay + agent- and workload-agnostic rather than re-implemented per workload. + """ + if not self.env_params: + return self + + cmd_args_fields = getattr(type(self.cmd_args), "model_fields", None) + if not cmd_args_fields: + return self + + unknown = sorted(k for k in self.env_params if k not in cmd_args_fields) + if unknown: + raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}") + + for name, spec in self.env_params.items(): + if spec.weights is None: + continue + value = getattr(self.cmd_args, name, None) + if not isinstance(value, list) or len(value) < 2: + raise ValueError( + f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list " + "(need a list of >= 2 values)" + ) + if len(spec.weights) != len(value): + raise ValueError( + f"env_params['{name}'] weights length {len(spec.weights)} does not match " + f"cmd_args.{name} candidate count {len(value)}" + ) + return self diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index e687d4373..975efbe48 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -34,6 +34,7 @@ ) from cloudai.workloads.nemo_run.report_generation_strategy import NeMoRunReportGenerationStrategy from cloudai.workloads.nixl_bench import NIXLBenchCmdArgs, NIXLBenchTestDefinition +from tests.test_env_params import EnvVarCmdArgs, EnvVarTestDefinition @pytest.fixture @@ -457,7 +458,7 @@ def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) -> """Cache MUST miss when env_params differ, even if action is identical. Without this property the agent receives stale rewards on every cache hit - under domain randomization. PPO/DQN/BO all silently train on labels that + when the env varies per trial: any agent silently trains on labels that do not correspond to the env they were nominally generated under. """ runner = MagicMock() @@ -468,15 +469,14 @@ def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) -> runner.testrun_to_job_map = {} env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides()) - _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001}) + _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"ball_speed": 1}) - env.test_run.current_env_params = {"drop_rate": 0.01} + env.test_run.current_env_params = {"ball_speed": 2} assert env.get_cached_trajectory_result({"x": 10}) is None, ( - "Cache must include env_params in its key. The current implementation " - "keys on action alone, so trials repeating the same action under a " - "different env_params sample receive a stale cached reward. See " - "env-params-cloudai-corpus-plan.md." + "Cache must include env_params in its key. Keying on action alone means " + "trials repeating the same action under a different env_params sample " + "receive a stale cached reward." ) @@ -490,9 +490,9 @@ def test_cache_hit_when_action_and_env_params_match(base_tr: TestRun, tmp_path: runner.testrun_to_job_map = {} env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides()) - _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001}) + _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"ball_speed": 2}) - env.test_run.current_env_params = {"drop_rate": 0.001} + env.test_run.current_env_params = {"ball_speed": 2} result = env.get_cached_trajectory_result({"x": 10}) assert result is not None @@ -553,10 +553,10 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)): env.test_run.step = 0 - env.test_run.current_env_params = {"drop_rate": 0.001} + env.test_run.current_env_params = {"ball_speed": 1} obs1, _r1, *_ = env.step(action) - env.test_run.current_env_params = {"drop_rate": 0.01} + env.test_run.current_env_params = {"ball_speed": 2} obs2, _r2, *_ = env.step(action) assert runner.run.call_count == 2, ( @@ -566,18 +566,22 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit assert obs1 != obs2, "fresh workload run should produce a fresh observation" -def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: +def test_env_csv_is_step_aligned_with_trajectory(tmp_path: Path) -> None: """env.csv must have exactly one row per env.step() call, with steps aligned 1:1 to trajectory.csv. This pins the corpus-friendly contract: a downstream consumer can ``pd.merge(traj, env, on="step")`` without losing rows on either side, independent of whether the trial hit the trajectory cache. """ - tdef = nemorun.model_copy(deep=True) - tdef.cmd_args.data.global_batch_size = 8 - tdef.agent_metrics = ["default"] - tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - tdef.agent_config = {"random_seed": 42} + tdef = EnvVarTestDefinition( + name="dr", + description="dr", + test_template_name="dr_template", + cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]), + env_params={"ball_speed": EnvParamSpec()}, + agent_metrics=["default"], + agent_config={"random_seed": 42}, + ) test_run = TestRun( name="dr_tr", @@ -585,7 +589,6 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, num_nodes=1, nodes=[], output_path=tmp_path / "out" / "dr_tr" / "0", - reports={NeMoRunReportGenerationStrategy}, ) test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) @@ -597,7 +600,7 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, runner.get_job_output_path.return_value = test_run.output_path env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) - action_a, action_b = {"trainer.max_steps": 1000}, {"trainer.max_steps": 2000} + action_a, action_b = {"paddle_width": 4}, {"paddle_width": 8} fake_obs = iter([[100.0], [50.0], [25.0]]) with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)): @@ -616,7 +619,7 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, ) -def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: +def test_env_csv_step_alignment_holds_on_constraint_failure(tmp_path: Path) -> None: """A constraint failure must not desync env.csv from trajectory.csv. Runs three steps where the middle one fails ``constraint_check`` and the @@ -627,11 +630,15 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest therefore holds via shared absence: both files record exactly the surviving steps, aligned 1:1. """ - tdef = nemorun.model_copy(deep=True) - tdef.cmd_args.data.global_batch_size = 8 - tdef.agent_metrics = ["default"] - tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - tdef.agent_config = {"random_seed": 42} + tdef = EnvVarTestDefinition( + name="dr", + description="dr", + test_template_name="dr_template", + cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]), + env_params={"ball_speed": EnvParamSpec()}, + agent_metrics=["default"], + agent_config={"random_seed": 42}, + ) test_run = TestRun( name="dr_tr", @@ -639,7 +646,6 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest num_nodes=1, nodes=[], output_path=tmp_path / "out" / "dr_tr" / "0", - reports={NeMoRunReportGenerationStrategy}, ) test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) @@ -656,11 +662,11 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest # reached on the surviving steps, so it yields exactly two values. fake_obs = iter([[100.0], [25.0]]) with ( - patch.object(NeMoRunTestDefinition, "constraint_check", side_effect=[True, False, True]), + patch.object(EnvVarTestDefinition, "constraint_check", side_effect=[True, False, True]), patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)), ): env.test_run.step = 0 - for action in ({"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}, {"trainer.max_steps": 3000}): + for action in ({"paddle_width": 4}, {"paddle_width": 6}, {"paddle_width": 8}): env.step(action) env_csv = env._env_csv_path() @@ -679,9 +685,7 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest ) -def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( - nemorun: NeMoRunTestDefinition, tmp_path: Path -) -> None: +def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path: Path) -> None: """End-to-end: cache HIT under observer-driven env_params still records env.csv. A cache hit still calls ``write_trajectory``, which sinks the trajectory row @@ -693,11 +697,15 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( """ import random as _random - tdef = nemorun.model_copy(deep=True) - tdef.cmd_args.data.global_batch_size = 8 - tdef.agent_metrics = ["default"] - tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - tdef.agent_config = {"random_seed": 42} + tdef = EnvVarTestDefinition( + name="dr", + description="dr", + test_template_name="dr_template", + cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]), + env_params={"ball_speed": EnvParamSpec()}, + agent_metrics=["default"], + agent_config={"random_seed": 42}, + ) test_run = TestRun( name="dr_tr", @@ -705,7 +713,6 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( num_nodes=1, nodes=[], output_path=tmp_path / "out" / "dr_tr" / "0", - reports={NeMoRunReportGenerationStrategy}, ) runner = MagicMock(spec=BaseRunner) runner.scenario_root = tmp_path / "scenario" @@ -717,8 +724,8 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) assert env.observers, "TestDefinition.env_params declared -> observer must be built" - expected_sample = {"drop_rate": _random.Random("42:drop_rate:1").choice([0.0, 0.001, 0.01])} - action = {"trainer.max_steps": 1000} + expected_sample = {"ball_speed": _random.Random("42:ball_speed:1").choice([1, 2, 3])} + action = {"paddle_width": 4} env.test_run.current_iteration = 0 env.trajectory = { 0: [TrajectoryEntry(step=0, action=action, reward=0.42, observation=[0.84], env_params=expected_sample)] @@ -743,6 +750,76 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv( ) +def test_step_overlays_env_params_onto_cmd_args(tmp_path: Path) -> None: + """The per-trial env_params sample must be overlaid onto cmd_args before the workload runs. + + env_params are the env-side twin of the action: core overlays the sampled values onto + cmd_args inside step() so the workload actually runs with them, workload-agnostically + (no per-workload injection code). Here ``ball_speed`` is env-randomized (its candidate + list lives in cmd_args), so the value the workload runs with must equal the observer's + sample, not the whole candidate list. + """ + import random as _random + + tdef = EnvVarTestDefinition( + name="overlay", + description="overlay", + test_template_name="dr_template", + cmd_args=EnvVarCmdArgs(ball_speed=[1, 2]), + env_params={"ball_speed": EnvParamSpec()}, + agent_metrics=["default"], + agent_config={"random_seed": 42}, + ) + + test_run = TestRun( + name="overlay_tr", + test=tdef, + num_nodes=1, + nodes=[], + output_path=tmp_path / "out" / "overlay_tr" / "0", + ) + runner = MagicMock(spec=BaseRunner) + runner.scenario_root = tmp_path / "scenario" + runner.system = MagicMock() + runner.test_scenario = TestScenario(name="overlay_scenario", test_runs=[test_run]) + runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False + runner.get_job_output_path.return_value = test_run.output_path + + env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) + expected = _random.Random("42:ball_speed:1").choice([1, 2]) + + with patch.object(env, "get_observation", side_effect=lambda _action: [1.0]): + env.test_run.step = 0 + env.step({"paddle_width": 4}) + + ran_cmd_args = runner.test_scenario.test_runs[0].test.cmd_args + assert ran_cmd_args.ball_speed == expected, ( + "step() must overlay the sampled env_params onto cmd_args before runner.run(), so the " + f"workload runs with ball_speed={expected} (the observer's draw), not the candidate list." + ) + + +def test_param_space_excludes_env_params_keys(setup_env: tuple[TestRun, BaseRunner]): + """env_params keys must never surface in the grid/action space (sampled, not searched).""" + tr, _ = setup_env + tr.test = EnvVarTestDefinition( + name="dr", + description="dr", + test_template_name="dr_template", + # paddle_width is an ordinary action-space list; ball_speed is an env-sampled list. + cmd_args=EnvVarCmdArgs(paddle_width=[4, 8], ball_speed=[1, 2]), + env_params={"ball_speed": EnvParamSpec()}, + ) + + action_space = tr.param_space + + assert "paddle_width" in action_space, "an un-annotated cmd_args list must remain an action-space dimension" + assert "ball_speed" not in action_space, ( + "a knob declared in env_params is sampled by the env, not explored by the agent, so it " + "must be excluded from param_space even though its cmd_args value is a list." + ) + + def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: """Workloads without [env_params.*] pay zero overhead: no observer, no env.csv.""" tdef = nemorun.model_copy(deep=True) diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 3aa931607..3cb1ddeab 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -14,12 +14,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Unit tests for the domain-randomization primitives in cloudai.configurator.env_params.""" +"""Unit tests for the environment-parameter primitives in cloudai.configurator.env_params. + +Annotation model: candidate values live in ``cmd_args`` (the single source of +truth); an ``env_params`` entry is a thin annotation that reclassifies that +field from action-space to env-sampled, optionally carrying sampling +``weights``. These tests use a dedicated, list-capable fixture workload so the +candidate list is a first-class typed field (no serialization warnings). + +Worked example - Atari Breakout, the canonical RL arcade game: +``ball_speed`` is the env-sampled knob - the game serves the ball at a speed the +agent does not control, so we sample it per trial and the policy must stay +robust across ball speeds. ``paddle_width`` is an ordinary action-space +dimension - the knob the agent actually tunes. +""" from __future__ import annotations from pathlib import Path from types import SimpleNamespace +from typing import List, Union import pytest from pydantic import ValidationError @@ -30,33 +44,82 @@ EnvParamSpec, EnvParamsSampler, ) +from cloudai.models.workload import CmdArgs, TestDefinition -def test_env_param_spec_requires_at_least_two_values() -> None: - with pytest.raises(ValidationError): - EnvParamSpec(values=[0.0]) +class EnvVarCmdArgs(CmdArgs): + """cmd_args with top-level, list-capable fields for env_params annotation tests. + ``ball_speed`` is the env-randomized knob; ``paddle_width`` stands in for an + ordinary action-space dimension. Both accept either a scalar or a candidate + list, so a candidate list round-trips through model validation/serialization + cleanly. + """ -def test_env_param_spec_rejects_mismatched_weights() -> None: - with pytest.raises(ValidationError): - EnvParamSpec(values=[0.0, 0.1], weights=[1.0]) + ball_speed: Union[int, List[int]] = 1 + paddle_width: Union[int, List[int]] = 8 -def test_env_param_spec_rejects_zero_sum_weights() -> None: - with pytest.raises(ValidationError): - EnvParamSpec(values=[0.0, 0.1], weights=[0.0, 0.0]) +class EnvVarTestDefinition(TestDefinition): + """Minimal concrete TestDefinition wrapping ``EnvVarCmdArgs``.""" + + cmd_args: EnvVarCmdArgs = EnvVarCmdArgs() + + +def _tdef(env_params: dict, **cmd_args_overrides) -> EnvVarTestDefinition: + """Build the fixture TestDefinition through full model validation (so validators fire).""" + return EnvVarTestDefinition( + name="breakout", + description="breakout", + test_template_name="breakout_template", + cmd_args=EnvVarCmdArgs(**cmd_args_overrides), + env_params=env_params, + ) + + +# --- EnvParamSpec: weights are validated intrinsically (length is cross-field, see below) --- + + +def test_env_param_spec_accepts_normalized_weights() -> None: + spec = EnvParamSpec(weights=[0.7, 0.3]) + assert spec.weights == [0.7, 0.3] + + +def test_env_param_spec_bare_marker_is_valid() -> None: + """An annotation with no weights is a valid uniform marker.""" + assert EnvParamSpec().weights is None + + +def test_env_param_spec_rejects_unnormalized_weights() -> None: + """Strict sum: weights must sum to ~1.0 (relative weights like [7, 3] are rejected).""" + with pytest.raises(ValidationError, match="must sum to"): + EnvParamSpec(weights=[7.0, 3.0]) + + +def test_env_param_spec_rejects_negative_weights() -> None: + with pytest.raises(ValidationError, match="finite and non-negative"): + EnvParamSpec(weights=[-0.1, 1.1]) @pytest.mark.parametrize("bad", [float("inf"), float("nan"), float("-inf")]) def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None: + with pytest.raises(ValidationError, match="finite and non-negative"): + EnvParamSpec(weights=[bad, 1.0]) + + +def test_env_param_spec_rejects_unknown_fields() -> None: + """Candidate values live in cmd_args, never in the annotation (no ``values`` key).""" with pytest.raises(ValidationError): - EnvParamSpec(values=[0.0, 0.1], weights=[bad, 1.0]) + EnvParamSpec(values=[1, 2]) + + +# --- Sampler: draws from candidate lists resolved out of cmd_args --- def test_sampler_is_deterministic_across_calls() -> None: - spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - a = EnvParamsSampler(spec, seed=42) - b = EnvParamsSampler(spec, seed=42) + candidates = {"ball_speed": [1, 2, 3]} + a = EnvParamsSampler(candidates, seed=42) + b = EnvParamsSampler(candidates, seed=42) seq_a = [a.sample(t) for t in range(1, 6)] seq_b = [b.sample(t) for t in range(1, 6)] assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances" @@ -64,51 +127,123 @@ def test_sampler_is_deterministic_across_calls() -> None: def test_sampler_each_param_is_independent() -> None: """Adding an unrelated parameter must not perturb existing parameters' draws.""" - base = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - extended = { - "drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01]), - "latency_ms": EnvParamSpec(values=[1, 5, 10]), - } - a = [EnvParamsSampler(base, seed=7).sample(t)["drop_rate"] for t in range(1, 11)] - b = [EnvParamsSampler(extended, seed=7).sample(t)["drop_rate"] for t in range(1, 11)] + base = {"ball_speed": [1, 2, 3]} + extended = {"ball_speed": [1, 2, 3], "brick_rows": [3, 4, 5]} + a = [EnvParamsSampler(base, seed=7).sample(t)["ball_speed"] for t in range(1, 11)] + b = [EnvParamsSampler(extended, seed=7).sample(t)["ball_speed"] for t in range(1, 11)] assert a == b, "per-parameter RNG seeding must isolate parameters from each other" +def test_sampler_honors_weights() -> None: + """A degenerate weight ([1, 0]) must always pick the first candidate.""" + sampler = EnvParamsSampler({"ball_speed": [1, 2]}, seed=1, weights={"ball_speed": [1.0, 0.0]}) + assert all(sampler.sample(t)["ball_speed"] == 1 for t in range(1, 20)) + + +# --- CsvSink: unchanged persistence contract --- + + def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None: sink = CsvSink(tmp_path / "env.csv") sink.write(1, {}) # empty -> no-op, no file assert not (tmp_path / "env.csv").exists() with pytest.raises(ValueError, match="must be a positive trial index"): - sink.write(0, {"drop_rate": 0.0}) + sink.write(0, {"ball_speed": 1}) def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None: sink = CsvSink(tmp_path / "env.csv") - sink.write(1, {"drop_rate": 0.001}) - sink.write(2, {"drop_rate": 0.01}) + sink.write(1, {"ball_speed": 2}) + sink.write(2, {"ball_speed": 3}) contents = (tmp_path / "env.csv").read_text().strip().splitlines() assert contents[0] == "step,env" assert contents[1].startswith("1,") assert contents[2].startswith("2,") -def test_observer_samples_and_stashes_current_env_params() -> None: - """before_step samples and stashes the result; persistence is the env's job.""" - spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])} - observer = EnvParamsObserver(spec, seed=42) +# --- Observer: resolves candidates from cmd_args, stashes the per-trial sample --- + + +def test_observer_samples_from_cmd_args_and_stashes() -> None: + """before_step samples a candidate that lives in cmd_args; persistence is the env's job.""" + cmd_args = EnvVarCmdArgs(ball_speed=[1, 2, 3]) + observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42) test_run = SimpleNamespace(step=3, current_env_params={}) observer.before_step(test_run) - assert "drop_rate" in test_run.current_env_params - assert test_run.current_env_params["drop_rate"] in {0.0, 0.001, 0.01} + assert test_run.current_env_params["ball_speed"] in {1, 2, 3} + + +def test_observer_skips_scalar_annotation() -> None: + """A scalar cmd_args value is fixed; annotating it is a no-op (nothing to sample).""" + cmd_args = EnvVarCmdArgs(ball_speed=2) + observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42) + test_run = SimpleNamespace(step=1, current_env_params={}) + + observer.before_step(test_run) + + assert test_run.current_env_params == {}, "a scalar (fixed) cmd_args knob must not be sampled" def test_observer_after_step_is_noop() -> None: """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence.""" - observer = EnvParamsObserver({}, seed=0) + observer = EnvParamsObserver({}, EnvVarCmdArgs(), seed=0) test_run = SimpleNamespace(step=1, current_env_params={"x": 1}) observer.after_step(test_run, observation=[0.0], reward=0.0) assert test_run.current_env_params == {"x": 1} + + +# --- TestDefinition.validate_env_params: annotation validity (cross-field with cmd_args) --- + + +def test_env_params_uniform_list_is_accepted() -> None: + """A cmd_args candidate list annotated with a bare marker validates (uniform sampling).""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2]) + assert "ball_speed" in tdef.env_params + + +def test_env_params_weighted_list_is_accepted() -> None: + tdef = _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2]) + assert tdef.env_params["ball_speed"].weights == [0.7, 0.3] + + +def test_env_params_scalar_no_op_is_accepted() -> None: + """Annotating a scalar (fixed) knob is tolerated as a no-op marker.""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2) + assert tdef.cmd_args.ball_speed == 2 + + +def test_env_params_unknown_key_rejected() -> None: + """env_params keys must name real cmd_args fields (the overlay targets cmd_args).""" + with pytest.raises(ValidationError, match="not cmd_args fields"): + _tdef({"ghost": EnvParamSpec()}, ball_speed=[1, 2]) + + +def test_env_params_weights_on_scalar_rejected() -> None: + """Weights require a candidate list; a scalar knob cannot carry them.""" + with pytest.raises(ValidationError, match="not a candidate list"): + _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=2) + + +def test_env_params_weights_length_mismatch_rejected() -> None: + """Weights must align 1:1 with the cmd_args candidate list.""" + with pytest.raises(ValidationError, match="weights length"): + _tdef({"ball_speed": EnvParamSpec(weights=[0.5, 0.3, 0.2])}, ball_speed=[1, 2]) + + +# --- is_dse_job: env-sampled lists are not search dimensions --- + + +def test_is_dse_job_false_for_env_param_only_workload() -> None: + """A cmd_args list that is purely env-sampled is not a search dimension -> not a DSE job.""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3]) + assert tdef.is_dse_job is False + + +def test_is_dse_job_true_when_a_real_action_dimension_exists() -> None: + """An un-annotated cmd_args list is a real action dimension -> DSE, even alongside env_params.""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3], paddle_width=[4, 8]) + assert tdef.is_dse_job is True diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 0cd3c20e1..203a35d67 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -434,20 +434,31 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising( def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None: - base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + base_tr.test.env_params = {"ball_speed": EnvParamSpec()} scenario = TestScenario(name="s", test_runs=[base_tr]) - with pytest.raises(TestScenarioParsingError, match="declare env_params but are not DSE jobs"): + with pytest.raises(TestScenarioParsingError, match="will not sample them"): validate_dse_env_params(scenario) -def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun) -> None: - dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} - assert dse_tr.is_dse_job is True # precondition: DSE + env_params is the allowed combination +def test_validate_dse_env_params_rejects_grid_search(dse_tr: TestRun) -> None: + """A DSE job on grid_search exhaustively searches the space, so env_params are noise -> reject.""" + dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} + assert dse_tr.is_dse_job is True # it IS a DSE job... + assert dse_tr.test.agent == "grid_search" # ...but grid_search ignores env_params + with pytest.raises(TestScenarioParsingError, match="will not sample them"): + validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) + + +def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun, stub_agent_name: str) -> None: + dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} + dse_tr.test.agent = stub_agent_name # a learning agent (not grid_search) consumes env_params + assert dse_tr.is_dse_job is True # precondition: DSE + learning agent + env_params is allowed validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) # no exception == pass -def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun) -> None: - base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} +def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun, stub_agent_name: str) -> None: + base_tr.test.env_params = {"ball_speed": EnvParamSpec()} + base_tr.test.agent = stub_agent_name base_tr.num_nodes = [1, 2] assert base_tr.is_dse_job is True # a num_nodes list sweep makes it DSE, so env_params is allowed validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr])) # no exception == pass @@ -462,7 +473,7 @@ def test_validate_dse_env_params_allows_non_dse_without_env_params(base_tr: Test def test_verify_test_scenarios_rejects_env_params_without_dse( base_tr: TestRun, monkeypatch: pytest.MonkeyPatch ) -> None: - base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} + base_tr.test.env_params = {"ball_speed": EnvParamSpec()} bad = TestScenario(name="s", test_runs=[base_tr]) monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: []) monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {}) @@ -470,8 +481,11 @@ def test_verify_test_scenarios_rejects_env_params_without_dse( assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 1 -def test_verify_test_scenarios_allows_env_params_with_dse(dse_tr: TestRun, monkeypatch: pytest.MonkeyPatch) -> None: - dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])} +def test_verify_test_scenarios_allows_env_params_with_dse( + dse_tr: TestRun, stub_agent_name: str, monkeypatch: pytest.MonkeyPatch +) -> None: + dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} + dse_tr.test.agent = stub_agent_name # learning agent (not grid_search) good = TestScenario(name="s", test_runs=[dse_tr]) monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: []) monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {}) From d59572ea188509c8b07da0172b0310f8e0d194c9 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 23 Jun 2026 20:56:35 -0400 Subject: [PATCH 03/22] fix(configurator): tighten env_params validation, align env.csv, fix pyright - validate_env_params: reject structured (non-leaf) cmd_args targets. The observer cannot sample them, yet param_space/is_dse_job exclude the whole key, which would silently drop nested action dimensions. - CloudAIGymEnv.write_trajectory: rebind the env.csv sink to the current iteration path before each write, so env.csv stays 1:1 aligned with trajectory.csv when the env instance is reused across iterations. - test_env_params: assert the unknown-field rejection via model_validate so the negative test no longer trips pyright's call-arg check (CI Linting fix); add a structured-target rejection test. --- src/cloudai/configurator/cloudai_gym.py | 3 +++ src/cloudai/models/workload.py | 8 +++++++- tests/test_env_params.py | 20 +++++++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index d16a9c63e..895d60ae1 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -301,6 +301,9 @@ def write_trajectory(self, entry: TrajectoryEntry): writer.writerow([entry.step, entry.action, entry.reward, entry.observation]) if self._env_sink is not None: + # current_iteration can advance while this env instance is reused, so rebind the sink to + # the current iteration's env.csv (alongside trajectory.csv) to keep the two 1:1 aligned. + self._env_sink = CsvSink(self._env_csv_path()) self._env_sink.write(entry.step, entry.env_params) @property diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index 8683cc8bb..ae9699754 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -234,9 +234,15 @@ def validate_env_params(self) -> Self: raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}") for name, spec in self.env_params.items(): + value = getattr(self.cmd_args, name, None) + if isinstance(value, (dict, BaseModel)): + raise ValueError( + f"env_params['{name}'] must target a leaf cmd_args field (scalar or candidate list), " + "not a structured object; param_space/is_dse_job exclude the whole key, which would " + "silently drop nested action dimensions" + ) if spec.weights is None: continue - value = getattr(self.cmd_args, name, None) if not isinstance(value, list) or len(value) < 2: raise ValueError( f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list " diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 3cb1ddeab..dedca16e8 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -36,7 +36,7 @@ from typing import List, Union import pytest -from pydantic import ValidationError +from pydantic import BaseModel, ValidationError from cloudai.configurator.env_params import ( CsvSink, @@ -47,17 +47,24 @@ from cloudai.models.workload import CmdArgs, TestDefinition +class BrickGrid(BaseModel): + """A structured (non-leaf) cmd_args field, used to prove env_params rejects such targets.""" + + rows: int = 3 + + class EnvVarCmdArgs(CmdArgs): """cmd_args with top-level, list-capable fields for env_params annotation tests. ``ball_speed`` is the env-randomized knob; ``paddle_width`` stands in for an ordinary action-space dimension. Both accept either a scalar or a candidate list, so a candidate list round-trips through model validation/serialization - cleanly. + cleanly. ``brick_grid`` is a structured field (not a leaf) for negative tests. """ ball_speed: Union[int, List[int]] = 1 paddle_width: Union[int, List[int]] = 8 + brick_grid: BrickGrid = BrickGrid() class EnvVarTestDefinition(TestDefinition): @@ -110,7 +117,7 @@ def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None: def test_env_param_spec_rejects_unknown_fields() -> None: """Candidate values live in cmd_args, never in the annotation (no ``values`` key).""" with pytest.raises(ValidationError): - EnvParamSpec(values=[1, 2]) + EnvParamSpec.model_validate({"values": [1, 2]}) # --- Sampler: draws from candidate lists resolved out of cmd_args --- @@ -234,6 +241,13 @@ def test_env_params_weights_length_mismatch_rejected() -> None: _tdef({"ball_speed": EnvParamSpec(weights=[0.5, 0.3, 0.2])}, ball_speed=[1, 2]) +def test_env_params_structured_target_rejected() -> None: + """A structured (non-leaf) cmd_args target is rejected: the observer can't sample it, yet + param_space/is_dse_job exclude the whole key, silently dropping nested action dimensions.""" + with pytest.raises(ValidationError, match="must target a leaf cmd_args field"): + _tdef({"brick_grid": EnvParamSpec()}) + + # --- is_dse_job: env-sampled lists are not search dimensions --- From ed42f9d0ad12f632bcfcb7c14710dfa0af81c971 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Wed, 24 Jun 2026 06:32:33 -0400 Subject: [PATCH 04/22] fix(workload): reject empty env_params candidate lists at build time An unweighted env_params spec skipped the candidate-list check, so an empty cmd_args list (e.g. ball_speed = []) passed validation and only failed later in EnvParamsSampler.sample() via rng.choice([]) (IndexError). Guard against an empty candidate list in validate_env_params so the error surfaces at TestDefinition build time. Addresses CodeRabbit feedback. --- src/cloudai/models/workload.py | 5 +++++ tests/test_env_params.py | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index ae9699754..d952efbd5 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -241,6 +241,11 @@ def validate_env_params(self) -> Self: "not a structured object; param_space/is_dse_job exclude the whole key, which would " "silently drop nested action dimensions" ) + if isinstance(value, list) and not value: + raise ValueError( + f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; " + "provide at least one candidate (the sampler would otherwise fail on an empty draw)" + ) if spec.weights is None: continue if not isinstance(value, list) or len(value) < 2: diff --git a/tests/test_env_params.py b/tests/test_env_params.py index dedca16e8..726a7ed07 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -248,6 +248,13 @@ def test_env_params_structured_target_rejected() -> None: _tdef({"brick_grid": EnvParamSpec()}) +def test_env_params_empty_candidate_list_rejected() -> None: + """An empty candidate list is rejected at build time: an unweighted spec would otherwise skip + validation and let the sampler fail later on an empty draw (rng.choice([])).""" + with pytest.raises(ValidationError, match="empty candidate list"): + _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[]) + + # --- is_dse_job: env-sampled lists are not search dimensions --- From 9412c7bae2a0308f2839570a04659c880934e703 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Wed, 24 Jun 2026 18:23:31 -0400 Subject: [PATCH 05/22] refactor(configurator): model env_params as cohesive EnvParam/EnvParams value objects Replace the EnvParamsSampler class and the StepObserver/EnvParamsObserver indirection with two frozen dataclasses: EnvParam (one resolved knob: candidates, optional weights, single draw) and EnvParams (per-run knobs + seed, built via from_test, sampled per trial). The sampling RNG lives in the env: step() draws this trial's values and hands concrete values to TestRun.apply_params_set(action, env_params=...), which overlays action and sample through one deterministic path. Centralize the cmd_args -> env_params lookup in TestDefinition.is_env_sampled and access current_env_params directly. Expand EnvParam/EnvParams unit tests to cover draw, from_test, sample, and immutability. --- src/cloudai/_core/test_scenario.py | 33 +++-- src/cloudai/configurator/cloudai_gym.py | 56 ++------ src/cloudai/configurator/env_params.py | 145 ++++++++----------- src/cloudai/models/workload.py | 8 +- tests/test_cloudaigym.py | 38 ++--- tests/test_env_params.py | 180 +++++++++++++++++++----- 6 files changed, 260 insertions(+), 200 deletions(-) diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 26f22e7e0..aa03cc591 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -159,7 +159,7 @@ def param_space(self) -> dict[str, Any]: for key, value in cmd_args_dict.items() if isinstance(value, list) and not self.test.is_dse_excluded_arg(key) - and key.split(".", 1)[0] not in self.test.env_params + and not self.test.is_env_sampled(key) }, **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)}, } @@ -187,20 +187,30 @@ def all_combinations(self) -> list[dict[str, Any]]: return all_combinations - def apply_params_set(self, action: dict[str, Any]) -> "TestRun": + def apply_params_set(self, action: dict[str, Any], env_params: dict[str, Any] | None = None) -> "TestRun": tdef = self.test.model_copy(deep=True) - for key, value in action.items(): + + def _apply(key: str, value: Any) -> None: if key.startswith("extra_env_vars."): tdef.extra_env_vars[key[len("extra_env_vars.") :]] = value + return + attrs = key.split(".") + obj = tdef.cmd_args + for attr in attrs[:-1]: + obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr) + if isinstance(obj, dict): + obj[attrs[-1]] = value else: - attrs = key.split(".") - obj = tdef.cmd_args - for attr in attrs[:-1]: - obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr) - if isinstance(obj, dict): - obj[attrs[-1]] = value - else: - setattr(obj, attrs[-1], value) + setattr(obj, attrs[-1], value) + + # The agent's chosen action and this trial's sampled env_params are both just concrete + # values written onto cmd_args, applied through the one path here. env_params keys name + # cmd_args fields; sampling (the RNG) happens in the env before this call, so this method + # stays deterministic. + for key, value in action.items(): + _apply(key, value) + for key, value in (env_params or {}).items(): + _apply(key, value) type(tdef)(**tdef.model_dump()) # trigger validation @@ -208,6 +218,7 @@ def apply_params_set(self, action: dict[str, Any]) -> "TestRun": new_tr.test = tdef if "NUM_NODES" in action: new_tr.num_nodes = action["NUM_NODES"] + new_tr.current_env_params = dict(env_params or {}) return new_tr diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index 895d60ae1..c85ab0263 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -19,14 +19,14 @@ import dataclasses import logging from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Optional, Tuple from cloudai.core import METRIC_ERROR, BaseRunner, Registry, TestRun from cloudai.util.lazy_imports import lazy from .base_agent import RewardOverrides from .base_gym import BaseGym -from .env_params import CsvSink, EnvParamsObserver, EnvParamsSink, StepObserver +from .env_params import CsvSink, EnvParams, EnvParamsSink @dataclasses.dataclass(frozen=True) @@ -63,24 +63,12 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid self.max_steps = test_run.test.agent_steps self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function) self.trajectory: dict[int, list[TrajectoryEntry]] = {} - self._env_sink: EnvParamsSink | None = None - self.observers: List[StepObserver] = self._build_observers() + # Resolve env_params once, at formulation: None unless the workload declares something to + # sample, so a non-DR run carries no env-params state and pays zero per-step overhead. + self.params: EnvParams | None = EnvParams.from_test(test_run.test) + self._env_sink: EnvParamsSink | None = CsvSink(self._env_csv_path()) if self.params else None super().__init__() - def _build_observers(self) -> List[StepObserver]: - """ - Construct the per-step observers implied by the TestDefinition. - - Workloads opt in to env_params via a TOML ``[env_params.]`` block; - an empty mapping yields no observers and zero overhead. - """ - observers: List[StepObserver] = [] - if self.test_run.test.env_params: - seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0)) - self._env_sink = CsvSink(self._env_csv_path()) - observers.append(EnvParamsObserver(self.test_run.test.env_params, self.test_run.test.cmd_args, seed)) - return observers - def _env_csv_path(self) -> Path: """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them.""" return self.trajectory_file_path.parent / "env.csv" @@ -141,23 +129,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: - info (dict): Additional info for debugging. """ self.test_run.increment_step() - self.test_run = self.test_run.apply_params_set(action) - - for observer in self.observers: - observer.before_step(self.test_run) - - # Overlay this trial's sampled env_params onto cmd_args so the workload actually - # runs with the sampled values - the env-side twin of apply_params_set(action). - # Sampling, persistence (env.csv), and the trajectory cache key are handled - # separately; this is the single, workload-agnostic injection point. Keys are - # validated to be cmd_args fields at TestDefinition build time; we re-filter to - # those fields here so a programmatically-built run can't inject unknown attrs. - if self.test_run.current_env_params: - cmd_args = self.test_run.test.cmd_args - fields = getattr(type(cmd_args), "model_fields", {}) - overlay = {k: v for k, v in self.test_run.current_env_params.items() if k in fields} - if overlay: - self.test_run.test.cmd_args = cmd_args.model_copy(update=overlay) + # Sample this trial's env_params (the RNG lives here, in the env), then apply the agent's + # action and the sample together: apply_params_set overlays both onto cmd_args and records + # current_env_params, so the workload runs with the sampled values and the cache key sees them. + sampled_env_params = self.params.sample(self.test_run.step) if self.params else {} + self.test_run = self.test_run.apply_params_set(action, env_params=sampled_env_params) cached_result = self.get_cached_trajectory_result(action) if cached_result is not None: @@ -175,8 +151,6 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: env_params=dict(self.test_run.current_env_params), ) ) - for observer in self.observers: - observer.after_step(self.test_run, cached_result.observation, cached_result.reward) return cached_result.observation, cached_result.reward, False, {} if not self.test_run.test.constraint_check(self.test_run, self.runner.system): @@ -221,9 +195,6 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: ) ) - for observer in self.observers: - observer.after_step(self.test_run, observation, reward) - return observation, reward, False, {} def render(self, mode: str = "human"): @@ -324,12 +295,11 @@ def get_cached_trajectory_result(self, action: Any) -> TrajectoryEntry | None: env_params on both sides is the back-compat path for workloads that do not declare any ``[env_params.*]`` block. """ - current_env_params = getattr(self.test_run, "current_env_params", {}) or {} + current_env_params = self.test_run.current_env_params for entry in self.current_trajectory: if not self._values_match_exact(entry.action, action): continue - entry_env = getattr(entry, "env_params", {}) or {} - if self._values_match_exact(entry_env, current_env_params): + if self._values_match_exact(entry.env_params, current_env_params): return entry return None diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index ddd26758e..b727212ef 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -21,22 +21,14 @@ ``cmd_args`` (a plain list - the single source of truth, exactly like an action-space dimension) but which the environment *samples* per trial rather than letting the agent search it. ``env_params`` is the annotation that marks -such a field: ``env_params.`` reclassifies ``cmd_args.`` from -action-space to env-sampled and carries only *how* to sample (optional -``weights``), never the values. The policy learns a robust mapping under that -variation; the knob never enters the agent's action space. - -This module owns the annotation schema (``EnvParamSpec``), the deterministic -sampler (``EnvParamsSampler``), the persistence interface -(``EnvParamsSink`` + ``CsvSink``) and the per-step observer -(``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the -artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv`` -regardless of agent or workload. +such a field; it carries only *how* to sample (optional ``weights``), never the +values, and the knob never enters the agent's action space. """ from __future__ import annotations import csv +import dataclasses import math import random from pathlib import Path @@ -78,41 +70,67 @@ def _validate_weights(self) -> Self: return self -class EnvParamsSampler: +@dataclasses.dataclass(frozen=True) +class EnvParam: """ - Per-trial categorical sampler over candidate lists. + One env-sampled knob, resolved from cmd_args: its candidate values and optional weights. - Candidates are resolved from ``cmd_args`` by the caller and passed in as - ``{name: [v0, v1, ...]}``; optional ``weights`` mirror that mapping. + Weights (when present) are positional, aligned 1:1 with ``candidates``; ``None`` means + uniform sampling. Keeping the two together makes each knob a self-contained draw. + """ + + candidates: List[Any] + weights: Optional[List[float]] = None + + def draw(self, rng: random.Random) -> Any: + if self.weights is not None: + return rng.choices(self.candidates, weights=self.weights, k=1)[0] + return rng.choice(self.candidates) - Determinism contract: ``sample(t)`` returns the same dict on every call - (across processes) for the same ``(seed, candidates, t)``. - Independence contract: each parameter uses an RNG seeded by - ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated - parameter does not perturb existing parameters' draw sequences. +@dataclasses.dataclass(frozen=True) +class EnvParams: """ + Resolved env-parameter sampling state for one run. - def __init__( - self, - candidates: Dict[str, List[Any]], - seed: int, - weights: Optional[Dict[str, List[float]]] = None, - ) -> None: - self._candidates = candidates - self._weights = weights or {} - self._seed = seed + Built via ``from_test`` only when a workload actually declares list-valued env_params; + otherwise ``from_test`` returns ``None`` and the env carries no env-params state at all. + Owns the per-parameter :class:`EnvParam` draws (resolved from ``cmd_args``, the single source + of truth) and the seed, and draws one value per parameter per trial. + """ + + params: Dict[str, EnvParam] + seed: int + + @classmethod + def from_test(cls, test: Any) -> Optional["EnvParams"]: + """ + Resolve a TestDefinition's env_params annotations, or ``None`` if nothing is sampled. + + A field whose ``cmd_args`` value is a scalar is fixed: the annotation is a no-op and + the field is skipped. With no list-valued field left, there is nothing to sample and + we return ``None`` so callers stay on the zero-overhead path. + """ + params: Dict[str, EnvParam] = {} + for name, spec in test.env_params.items(): + value = getattr(test.cmd_args, name, None) + if not isinstance(value, list): + continue + params[name] = EnvParam(candidates=value, weights=spec.weights) + if not params: + return None + seed = int((test.agent_config or {}).get("random_seed", 0)) + return cls(params=params, seed=seed) def sample(self, trial: int) -> Dict[str, Any]: - out: Dict[str, Any] = {} - for name, choices in self._candidates.items(): - rng = random.Random(f"{self._seed}:{name}:{trial}") - weights = self._weights.get(name) - if weights is not None: - out[name] = rng.choices(choices, weights=weights, k=1)[0] - else: - out[name] = rng.choice(choices) - return out + """ + Draw this trial's value for each parameter. + + Determinism: the same ``(seed, name, trial)`` yields the same draw across processes. + Independence: each parameter's RNG is seeded ``f"{seed}:{name}:{trial}"`` so adding or + removing one parameter never perturbs the others' draw sequences. + """ + return {name: param.draw(random.Random(f"{self.seed}:{name}:{trial}")) for name, param in self.params.items()} @runtime_checkable @@ -146,54 +164,3 @@ def write(self, step: int, sample: Dict[str, Any]) -> None: if new_file: writer.writerow(("step", "env")) writer.writerow([step, sample]) - - -@runtime_checkable -class StepObserver(Protocol): - """ - Hook fired by ``CloudAIGymEnv.step()`` around each trial. - - ``before_step`` runs before the cache lookup and before any workload - execution. ``after_step`` runs after the trajectory row is written. - """ - - def before_step(self, test_run: Any) -> None: ... - - def after_step(self, test_run: Any, observation: list, reward: float) -> None: ... - - -class EnvParamsObserver: - """ - Sample env_params per step and stash them for the cache and the workload. - - Construction: resolves each annotated knob's candidate list from - ``cmd_args`` (the single source of truth) once, up front. A knob whose - ``cmd_args`` value is a scalar (not a list) is fixed - its annotation is a - no-op and it is skipped, so nothing is sampled or stashed for it. - - Pre-step: samples the resolved candidates for ``test_run.step`` and stashes - the result on ``test_run.current_env_params`` so the cache key and the - cmd_args overlay both see it. Persistence is owned by - ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the - ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that - writes no trajectory row writes no env.csv row either, keeping the two files - 1:1 step-aligned. Post-step: no-op. - """ - - def __init__(self, env_params: Dict[str, EnvParamSpec], cmd_args: Any, seed: int) -> None: - candidates: Dict[str, List[Any]] = {} - weights: Dict[str, List[float]] = {} - for name, spec in env_params.items(): - value = getattr(cmd_args, name, None) - if not isinstance(value, list): - continue # scalar cmd_args knob is fixed; the annotation is a no-op - candidates[name] = value - if spec.weights is not None: - weights[name] = spec.weights - self._sampler = EnvParamsSampler(candidates, seed=seed, weights=weights) - - def before_step(self, test_run: Any) -> None: - test_run.current_env_params = self._sampler.sample(test_run.step) - - def after_step(self, test_run: Any, observation: list, reward: float) -> None: - del test_run, observation, reward # persistence handled by CloudAIGymEnv.write_trajectory diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index d952efbd5..eaa971aa6 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -144,6 +144,10 @@ def installables(self) -> list[Installable]: def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool: return True + def is_env_sampled(self, cmd_args_path: str) -> bool: + """Whether a cmd_args field is env-sampled (env draws it per trial, not the agent).""" + return cmd_args_path in self.env_params + @property def is_dse_job(self) -> bool: def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) -> bool: @@ -152,9 +156,7 @@ def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) -> path = f"{parent_key}.{key}" if parent_key else key if self.is_dse_excluded_arg(path): continue - # env_params lists are sampled by the env, not searched by the agent, so they - # are not action-space dimensions and must not make a run count as DSE. - if skip_env_params and path.split(".", 1)[0] in self.env_params: + if skip_env_params and self.is_env_sampled(path): continue if isinstance(value, list) or ( isinstance(value, dict) and check_dict(value, path, skip_env_params) diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index 975efbe48..8a3d0fb9e 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -518,23 +518,30 @@ def test_cache_hit_when_neither_has_env_params(base_tr: TestRun, tmp_path: Path) assert result.step == 1 -def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None: - """Integration: env.step() with same action but different env_params re-runs the workload. +def test_step_reruns_workload_when_env_params_change(tmp_path: Path) -> None: + """Integration: two env.step() calls with the same action but different sampled env_params re-run. Counterpart to test_cache_miss_when_env_params_differ but exercising the - full step() flow: increment_step -> apply_params_set -> cache lookup -> - runner.run() -> write_trajectory. + full step() flow: increment_step -> sample env_params -> apply_params_set -> + cache lookup -> runner.run() -> write_trajectory. With seed 42 the sampler + draws ball_speed=3 then ball_speed=1 on the two consecutive trials, so the + cache key differs and the workload must re-run both times. """ - tdef = nemorun.model_copy(deep=True) - tdef.cmd_args.data.global_batch_size = 8 - tdef.agent_metrics = ["default"] + tdef = EnvVarTestDefinition( + name="dr", + description="dr", + test_template_name="dr_template", + cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]), + env_params={"ball_speed": EnvParamSpec()}, + agent_metrics=["default"], + agent_config={"random_seed": 42}, + ) test_run = TestRun( name="dr_tr", test=tdef, num_nodes=1, nodes=[], output_path=tmp_path / "out" / "dr_tr" / "0", - reports={NeMoRunReportGenerationStrategy}, ) test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run]) @@ -548,19 +555,16 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit runner.get_job_output_path.return_value = test_run.output_path env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) - action = {"trainer.max_steps": 1000} + action = {"paddle_width": 4} fake_obs = iter([[100.0], [50.0]]) with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)): env.test_run.step = 0 - env.test_run.current_env_params = {"ball_speed": 1} - obs1, _r1, *_ = env.step(action) - - env.test_run.current_env_params = {"ball_speed": 2} - obs2, _r2, *_ = env.step(action) + obs1, _r1, *_ = env.step(action) # samples ball_speed=3 + obs2, _r2, *_ = env.step(action) # samples ball_speed=1 assert runner.run.call_count == 2, ( - "Different env_params between two env.step() calls with the same action " + "Different sampled env_params between two env.step() calls with the same action " "must trigger a workload re-run; the cache lookup must miss." ) assert obs1 != obs2, "fresh workload run should produce a fresh observation" @@ -722,7 +726,7 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path: runner.get_job_output_path.return_value = test_run.output_path env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) - assert env.observers, "TestDefinition.env_params declared -> observer must be built" + assert env.params is not None, "TestDefinition.env_params declared -> EnvParams must be built" expected_sample = {"ball_speed": _random.Random("42:ball_speed:1").choice([1, 2, 3])} action = {"paddle_width": 4} @@ -838,5 +842,5 @@ def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) - assert env.observers == [], "no env_params declared -> no per-step observers" + assert env.params is None, "no env_params declared -> no EnvParams object" assert not env._env_csv_path().exists() diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 726a7ed07..e0b08f6cb 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -31,8 +31,9 @@ from __future__ import annotations +import dataclasses +import random from pathlib import Path -from types import SimpleNamespace from typing import List, Union import pytest @@ -40,9 +41,9 @@ from cloudai.configurator.env_params import ( CsvSink, - EnvParamsObserver, + EnvParam, + EnvParams, EnvParamSpec, - EnvParamsSampler, ) from cloudai.models.workload import CmdArgs, TestDefinition @@ -120,31 +121,84 @@ def test_env_param_spec_rejects_unknown_fields() -> None: EnvParamSpec.model_validate({"values": [1, 2]}) -# --- Sampler: draws from candidate lists resolved out of cmd_args --- +# --- EnvParam: one resolved knob - candidate values, optional weights, a single draw --- + + +def test_env_param_defaults_to_unweighted() -> None: + """A knob built without weights samples uniformly (weights is None).""" + assert EnvParam(candidates=[1, 2, 3]).weights is None + + +def test_env_param_draw_returns_a_candidate() -> None: + """draw() always yields one of the knob's own candidate values.""" + knob = EnvParam(candidates=[1, 2, 3]) + assert all(knob.draw(random.Random(s)) in {1, 2, 3} for s in range(50)) + + +def test_env_param_draw_is_reproducible_for_a_given_rng() -> None: + """draw() consumes the caller's RNG (no internal seeding), so equal RNG state yields equal draws.""" + knob = EnvParam(candidates=[10, 20, 30, 40]) + assert knob.draw(random.Random(123)) == knob.draw(random.Random(123)) + + +def test_env_param_draw_honors_degenerate_weights() -> None: + """A degenerate weight ([1, 0]) collapses the draw onto the first candidate, whatever the RNG.""" + knob = EnvParam(candidates=[1, 2], weights=[1.0, 0.0]) + assert all(knob.draw(random.Random(s)) == 1 for s in range(50)) + + +def test_env_param_is_immutable() -> None: + """Frozen value object: a resolved knob cannot be mutated after construction.""" + knob = EnvParam(candidates=[1, 2, 3]) + with pytest.raises(dataclasses.FrozenInstanceError): + knob.candidates = [9] # pyright: ignore[reportAttributeAccessIssue] + + +# --- EnvParams.sample: draws from candidate lists resolved out of cmd_args --- def test_sampler_is_deterministic_across_calls() -> None: - candidates = {"ball_speed": [1, 2, 3]} - a = EnvParamsSampler(candidates, seed=42) - b = EnvParamsSampler(candidates, seed=42) - seq_a = [a.sample(t) for t in range(1, 6)] - seq_b = [b.sample(t) for t in range(1, 6)] - assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances" + env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2, 3])}, seed=42) + seq_a = [env_params.sample(t) for t in range(1, 6)] + seq_b = [env_params.sample(t) for t in range(1, 6)] + assert seq_a == seq_b, "same (seed, trial) must produce the same draw across calls" def test_sampler_each_param_is_independent() -> None: """Adding an unrelated parameter must not perturb existing parameters' draws.""" - base = {"ball_speed": [1, 2, 3]} - extended = {"ball_speed": [1, 2, 3], "brick_rows": [3, 4, 5]} - a = [EnvParamsSampler(base, seed=7).sample(t)["ball_speed"] for t in range(1, 11)] - b = [EnvParamsSampler(extended, seed=7).sample(t)["ball_speed"] for t in range(1, 11)] + base = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2, 3])}, seed=7) + extended = EnvParams( + params={"ball_speed": EnvParam(candidates=[1, 2, 3]), "brick_rows": EnvParam(candidates=[3, 4, 5])}, + seed=7, + ) + a = [base.sample(t)["ball_speed"] for t in range(1, 11)] + b = [extended.sample(t)["ball_speed"] for t in range(1, 11)] assert a == b, "per-parameter RNG seeding must isolate parameters from each other" def test_sampler_honors_weights() -> None: """A degenerate weight ([1, 0]) must always pick the first candidate.""" - sampler = EnvParamsSampler({"ball_speed": [1, 2]}, seed=1, weights={"ball_speed": [1.0, 0.0]}) - assert all(sampler.sample(t)["ball_speed"] == 1 for t in range(1, 20)) + env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2], weights=[1.0, 0.0])}, seed=1) + assert all(env_params.sample(t)["ball_speed"] == 1 for t in range(1, 20)) + + +def test_sample_covers_all_declared_params() -> None: + """sample() returns exactly one value per declared knob, each drawn from that knob's candidates.""" + env_params = EnvParams( + params={"ball_speed": EnvParam(candidates=[1, 2]), "paddle_width": EnvParam(candidates=[4, 8])}, + seed=3, + ) + drawn = env_params.sample(5) + assert set(drawn) == {"ball_speed", "paddle_width"} + assert drawn["ball_speed"] in {1, 2} + assert drawn["paddle_width"] in {4, 8} + + +def test_env_params_is_immutable() -> None: + """Frozen value object: resolved sampling state cannot be mutated after construction.""" + env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2])}, seed=0) + with pytest.raises(dataclasses.FrozenInstanceError): + env_params.seed = 1 # pyright: ignore[reportAttributeAccessIssue] # --- CsvSink: unchanged persistence contract --- @@ -168,39 +222,91 @@ def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None: assert contents[2].startswith("2,") -# --- Observer: resolves candidates from cmd_args, stashes the per-trial sample --- +# --- EnvParams.from_test: resolves candidate lists from cmd_args, once at env formulation --- + +def test_env_params_from_test_resolves_list_candidate() -> None: + """A list-valued cmd_args field resolves to its candidate list; sampling then draws from it.""" + env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3])) -def test_observer_samples_from_cmd_args_and_stashes() -> None: - """before_step samples a candidate that lives in cmd_args; persistence is the env's job.""" - cmd_args = EnvVarCmdArgs(ball_speed=[1, 2, 3]) - observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42) - test_run = SimpleNamespace(step=3, current_env_params={}) + assert env_params is not None + assert env_params.params == {"ball_speed": EnvParam(candidates=[1, 2, 3], weights=None)} + assert env_params.sample(3)["ball_speed"] in {1, 2, 3} - observer.before_step(test_run) - assert test_run.current_env_params["ball_speed"] in {1, 2, 3} +def test_env_params_from_test_carries_weights() -> None: + """Weighted annotations propagate their weights alongside the resolved candidates.""" + env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2])) + assert env_params is not None + assert env_params.params["ball_speed"].weights == [0.7, 0.3] -def test_observer_skips_scalar_annotation() -> None: - """A scalar cmd_args value is fixed; annotating it is a no-op (nothing to sample).""" - cmd_args = EnvVarCmdArgs(ball_speed=2) - observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42) - test_run = SimpleNamespace(step=1, current_env_params={}) - observer.before_step(test_run) +def test_env_params_from_test_reads_seed_from_agent_config() -> None: + """The sampler seed comes from agent_config.random_seed so a run is reproducible end to end.""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3]) + tdef.agent_config = {"random_seed": 42} + + env_params = EnvParams.from_test(tdef) + + assert env_params is not None and env_params.seed == 42 + + +def test_env_params_from_test_defaults_seed_to_zero_without_agent_config() -> None: + """No agent_config (the default) -> seed 0, so a declared-but-unseeded run still samples.""" + env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3])) + + assert env_params is not None and env_params.seed == 0 + + +def test_env_params_from_test_defaults_seed_to_zero_when_key_absent() -> None: + """agent_config present but without random_seed still falls back to seed 0.""" + tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3]) + tdef.agent_config = {"other": 1} + + env_params = EnvParams.from_test(tdef) + + assert env_params is not None and env_params.seed == 0 + + +def test_env_params_from_test_resolves_multiple_params() -> None: + """Every list-valued annotation becomes its own knob, candidates resolved from cmd_args.""" + env_params = EnvParams.from_test( + _tdef( + {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()}, + ball_speed=[1, 2], + paddle_width=[4, 8], + ) + ) + + assert env_params is not None + assert set(env_params.params) == {"ball_speed", "paddle_width"} + assert env_params.params["ball_speed"].candidates == [1, 2] + assert env_params.params["paddle_width"].candidates == [4, 8] + + +def test_env_params_from_test_skips_scalar_keeps_list() -> None: + """A mix of scalar and list annotations yields only the list-valued field as a knob.""" + env_params = EnvParams.from_test( + _tdef( + {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()}, + ball_speed=[1, 2, 3], + paddle_width=8, + ) + ) - assert test_run.current_env_params == {}, "a scalar (fixed) cmd_args knob must not be sampled" + assert env_params is not None + assert set(env_params.params) == {"ball_speed"} -def test_observer_after_step_is_noop() -> None: - """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence.""" - observer = EnvParamsObserver({}, EnvVarCmdArgs(), seed=0) - test_run = SimpleNamespace(step=1, current_env_params={"x": 1}) +def test_env_params_from_test_none_when_only_scalar_annotation() -> None: + """A scalar cmd_args value is fixed; with nothing list-valued to sample, from_test is None.""" + assert EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)) is None - observer.after_step(test_run, observation=[0.0], reward=0.0) - assert test_run.current_env_params == {"x": 1} +def test_env_params_from_test_none_when_no_env_params() -> None: + """No annotations declared -> no EnvParams object (the zero-overhead path).""" + assert EnvParams.from_test(_tdef({})) is None # --- TestDefinition.validate_env_params: annotation validity (cross-field with cmd_args) --- From af3b6a811f6080cd9c6ddbfe3dd6c48f11a25807 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Thu, 25 Jun 2026 12:01:22 -0400 Subject: [PATCH 06/22] refactor(configurator): collapse env_params sink into one concrete class Drop the EnvParamsSink Protocol + CsvSink pair (and runtime_checkable) for a single concrete EnvParamsSink, built unconditionally in CloudAIGymEnv. The sink is now stateless: write() takes the record path per call and skips empty samples, so non-DR runs write nothing and write_trajectory needs no branch. Derive both records from a new iteration_dir property and expose the env record via the env_params_record_path property (was _env_csv_path), keeping env.csv and trajectory.csv step-aligned without coupling the name to CSV. --- src/cloudai/configurator/cloudai_gym.py | 22 ++++++++++++---------- src/cloudai/configurator/env_params.py | 25 +++++++++---------------- tests/test_cloudaigym.py | 8 ++++---- tests/test_env_params.py | 22 ++++++++++++---------- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index c85ab0263..ac9d5cd34 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -26,7 +26,7 @@ from .base_agent import RewardOverrides from .base_gym import BaseGym -from .env_params import CsvSink, EnvParams, EnvParamsSink +from .env_params import EnvParams, EnvParamsSink @dataclasses.dataclass(frozen=True) @@ -66,12 +66,13 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid # Resolve env_params once, at formulation: None unless the workload declares something to # sample, so a non-DR run carries no env-params state and pays zero per-step overhead. self.params: EnvParams | None = EnvParams.from_test(test_run.test) - self._env_sink: EnvParamsSink | None = CsvSink(self._env_csv_path()) if self.params else None + self.env_params_sink = EnvParamsSink() super().__init__() - def _env_csv_path(self) -> Path: + @property + def env_params_record_path(self) -> Path: """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them.""" - return self.trajectory_file_path.parent / "env.csv" + return self.iteration_dir / "env.csv" def define_action_space(self) -> Dict[str, list[Any]]: return self.test_run.param_space @@ -271,15 +272,16 @@ def write_trajectory(self, entry: TrajectoryEntry): writer.writerow(["step", "action", "reward", "observation"]) writer.writerow([entry.step, entry.action, entry.reward, entry.observation]) - if self._env_sink is not None: - # current_iteration can advance while this env instance is reused, so rebind the sink to - # the current iteration's env.csv (alongside trajectory.csv) to keep the two 1:1 aligned. - self._env_sink = CsvSink(self._env_csv_path()) - self._env_sink.write(entry.step, entry.env_params) + self.env_params_sink.write(self.env_params_record_path, entry.step, entry.env_params) + + @property + def iteration_dir(self) -> Path: + """Per-iteration output dir; trajectory.csv and env.csv both live here, step-aligned.""" + return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}" @property def trajectory_file_path(self) -> Path: - return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}" / "trajectory.csv" + return self.iteration_dir / "trajectory.csv" @property def current_trajectory(self) -> list[TrajectoryEntry]: diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index b727212ef..c23764cc0 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -32,7 +32,7 @@ import math import random from pathlib import Path -from typing import Any, Dict, List, Optional, Protocol, runtime_checkable +from typing import Any, Dict, List, Optional from pydantic import BaseModel, ConfigDict, Field, model_validator from typing_extensions import Self @@ -133,33 +133,26 @@ def sample(self, trial: int) -> Dict[str, Any]: return {name: param.draw(random.Random(f"{self.seed}:{name}:{trial}")) for name, param in self.params.items()} -@runtime_checkable -class EnvParamsSink(Protocol): - """Persist one trial's env_params sample; empty samples must be no-ops.""" - - def write(self, step: int, sample: Dict[str, Any]) -> None: ... - - -class CsvSink: +class EnvParamsSink: """ Append per-trial env_params samples to a step-aligned CSV. The CSV mirrors how ``trajectory.csv`` serialises its ``action`` column (one row per env.step(), sample dict stringified in a single cell) so the two files align 1:1 on ``step`` and a plain ``merge`` joins them. - """ - def __init__(self, path: Path) -> None: - self._path = path + Empty samples are skipped, so a run without env_params writes nothing and + callers can sink every trial unconditionally. + """ - def write(self, step: int, sample: Dict[str, Any]) -> None: + def write(self, path: Path, step: int, sample: Dict[str, Any]) -> None: if step < 1: raise ValueError(f"step must be a positive trial index (cloudai DSE is 1-based); got {step}") if not sample: return - new_file = not self._path.exists() - self._path.parent.mkdir(parents=True, exist_ok=True) - with self._path.open("a", newline="") as f: + new_file = not path.exists() + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", newline="") as f: writer = csv.writer(f) if new_file: writer.writerow(("step", "env")) diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py index 8a3d0fb9e..177a39dd7 100644 --- a/tests/test_cloudaigym.py +++ b/tests/test_cloudaigym.py @@ -612,7 +612,7 @@ def test_env_csv_is_step_aligned_with_trajectory(tmp_path: Path) -> None: for action in (action_a, action_b, action_a): env.step(action) - env_csv = env._env_csv_path() + env_csv = env.env_params_record_path traj_csv = env.trajectory_file_path assert env_csv.exists(), "env.csv must be written when env_params is declared" @@ -673,7 +673,7 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(tmp_path: Path) -> N for action in ({"paddle_width": 4}, {"paddle_width": 6}, {"paddle_width": 8}): env.step(action) - env_csv = env._env_csv_path() + env_csv = env.env_params_record_path traj_csv = env.trajectory_file_path assert env_csv.exists(), "surviving steps declare env_params -> env.csv must exist" @@ -742,7 +742,7 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path: runner.run.assert_not_called() assert reward == 0.42 and obs == [0.84] - env_csv = env._env_csv_path() + env_csv = env.env_params_record_path assert env_csv.exists(), "cache HIT must NOT skip the observer; env.csv must record the trial" env_rows = env_csv.read_text().strip().splitlines() assert env_rows[0] == "step,env" @@ -843,4 +843,4 @@ def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides()) assert env.params is None, "no env_params declared -> no EnvParams object" - assert not env._env_csv_path().exists() + assert not env.env_params_record_path.exists() diff --git a/tests/test_env_params.py b/tests/test_env_params.py index e0b08f6cb..483fa2e71 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -40,10 +40,10 @@ from pydantic import BaseModel, ValidationError from cloudai.configurator.env_params import ( - CsvSink, EnvParam, EnvParams, EnvParamSpec, + EnvParamsSink, ) from cloudai.models.workload import CmdArgs, TestDefinition @@ -201,22 +201,24 @@ def test_env_params_is_immutable() -> None: env_params.seed = 1 # pyright: ignore[reportAttributeAccessIssue] -# --- CsvSink: unchanged persistence contract --- +# --- EnvParamsSink: unchanged persistence contract --- def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None: - sink = CsvSink(tmp_path / "env.csv") - sink.write(1, {}) # empty -> no-op, no file - assert not (tmp_path / "env.csv").exists() + sink = EnvParamsSink() + path = tmp_path / "env.csv" + sink.write(path, 1, {}) # empty -> no-op, no file + assert not path.exists() with pytest.raises(ValueError, match="must be a positive trial index"): - sink.write(0, {"ball_speed": 1}) + sink.write(path, 0, {"ball_speed": 1}) def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None: - sink = CsvSink(tmp_path / "env.csv") - sink.write(1, {"ball_speed": 2}) - sink.write(2, {"ball_speed": 3}) - contents = (tmp_path / "env.csv").read_text().strip().splitlines() + sink = EnvParamsSink() + path = tmp_path / "env.csv" + sink.write(path, 1, {"ball_speed": 2}) + sink.write(path, 2, {"ball_speed": 3}) + contents = path.read_text().strip().splitlines() assert contents[0] == "step,env" assert contents[1].startswith("1,") assert contents[2].startswith("2,") From dead658f85962a9fcdf7430df84a5d380b363a02 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Thu, 25 Jun 2026 13:04:58 -0400 Subject: [PATCH 07/22] refactor(configurator): gate env_params sampling on an agent capability flag Replace the hardcoded `agent == "grid_search"` check with a BaseAgent.samples_env_params capability flag (opt-in, defaults False). Only agents whose search consumes per-trial env_params sampling set it True; enumerating/surrogate agents leave it False, so a config that declares env_params for an agent that would ignore them is rejected up front instead of silently no-op'ing. New agents answer for themselves with no string to maintain. Relocate validate_dse_env_params out of the CLI handlers into configurator/env_params.py next to the logic it guards, looking the agent up via the Registry. Unknown agents are deferred to the dedicated agent-resolution error rather than masked here. Keep all public-facing comments, docstrings, and the error message generic (no internal agent names). Cover the full validator matrix, including the unknown-agent deferral. --- src/cloudai/cli/handlers.py | 25 +--------------- src/cloudai/configurator/base_agent.py | 16 +++++++---- src/cloudai/configurator/env_params.py | 40 +++++++++++++++++++++++++- tests/test_handlers.py | 32 +++++++++++++++++---- 4 files changed, 77 insertions(+), 36 deletions(-) diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index f7b9f3844..acc88f72c 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -27,6 +27,7 @@ import toml import yaml +from cloudai.configurator.env_params import validate_dse_env_params from cloudai.core import ( BaseInstaller, CloudAIGymEnv, @@ -298,30 +299,6 @@ def _check_installation( return result -def validate_dse_env_params(test_scenario: TestScenario) -> None: - """ - Reject prepped configs that declare env_params but cannot sample them. - - env_params are sampled per-trial by CloudAIGymEnv, but only for a DSE run on a *learning* - agent. A non-DSE run has no per-trial loop at all, and grid_search exhaustively enumerates the - search space - sampling env_params there would just inject noise into a deterministic sweep with - no policy to gain robustness. is_dse_job is a property of the fully prepped config, so this is - validated here rather than at parse time. - """ - offenders = [ - tr.name - for tr in test_scenario.test_runs - if tr.test.env_params and (not tr.is_dse_job or tr.test.agent == "grid_search") - ] - if offenders: - raise TestScenarioParsingError( - f"Tests {offenders} declare env_params but will not sample them. env_params are sampled per-trial " - "only by a DSE run on a learning agent (grid_search searches the whole space and ignores them). " - "Use a learning agent and add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes), " - "or remove env_params." - ) - - def handle_dry_run_and_run(args: argparse.Namespace) -> int: setup_result = _setup_system_and_scenario(args) if setup_result is None: diff --git a/src/cloudai/configurator/base_agent.py b/src/cloudai/configurator/base_agent.py index 321e6e659..3a6fef75e 100644 --- a/src/cloudai/configurator/base_agent.py +++ b/src/cloudai/configurator/base_agent.py @@ -58,6 +58,11 @@ class BaseAgent(ABC): Provides a unified interface and parameter management for action spaces. """ + # Opt-in capability: only agents whose search consumes per-trial env_params sampling set this + # to True. Defaults False so a config that declares env_params for an agent that would ignore + # them is rejected up front instead of silently having no effect. + samples_env_params: bool = False + def __init__(self, env: BaseGym, config: BaseAgentConfig): """ Initialize the agent with the environment. @@ -94,9 +99,8 @@ def select_action(self, observation: list[float] | None = None) -> tuple[int, di Args: observation: Latest observation produced by the environment (``env.reset()`` on the - first call, then the result of the prior ``env.step()``). Stateless agents such - as grid search or Bayesian optimization may ignore this; observation-conditioned - agents (RL, contextual bandits) should use it. + first call, then the result of the prior ``env.step()``). Stateless agents may + ignore this; observation-conditioned agents should use it. Returns: Tuple[int, Dict[str, Any]] | None: The current step index and a dictionary mapping action keys @@ -120,8 +124,7 @@ def run(self) -> int: Default: a step loop driven by the dispatcher (``select_action`` → ``env.step`` → ``update_policy`` per trial). Agents that drive their - own training loop (e.g. RLlib-based agents calling ``algo.train()``) - override this method. + own training loop override this method. Failure contract (``handle_dse_job`` consumes the result via ``err |= agent.run()``): @@ -131,7 +134,8 @@ def run(self) -> int: accumulated and the next ``TestRun`` still executes. Workload-level failures are already surfaced this way: ``CloudAIGymEnv.step`` maps a failed metric to ``rewards.metric_failure`` rather than raising, and - ``rllib_run`` catches training errors and returns ``rc=1``. + agents with their own training loop should likewise catch training + errors and return a non-zero code. - Raise for *unexpected* failures (framework/agent bugs). Exceptions propagate out of ``handle_dse_job`` and hard-fail the job so the bug is surfaced instead of masked as a penalizing reward. diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index c23764cc0..ecd099ecd 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -32,11 +32,17 @@ import math import random from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from pydantic import BaseModel, ConfigDict, Field, model_validator from typing_extensions import Self +from cloudai._core.exceptions import TestScenarioParsingError +from cloudai._core.registry import Registry + +if TYPE_CHECKING: + from cloudai._core.test_scenario import TestScenario + class EnvParamSpec(BaseModel): """ @@ -157,3 +163,35 @@ def write(self, path: Path, step: int, sample: Dict[str, Any]) -> None: if new_file: writer.writerow(("step", "env")) writer.writerow([step, sample]) + + +def validate_dse_env_params(test_scenario: "TestScenario") -> None: + """ + Reject prepped configs that declare env_params no agent will sample. + + env_params are sampled per-trial by CloudAIGymEnv, but the sampling only matters for an agent + that opts into it via ``BaseAgent.samples_env_params``. A non-DSE run has no per-trial loop, and + an agent that does not opt in ignores env_params, so declaring them there is a silent no-op. + is_dse_job and the agent both resolve only on the fully prepped config, so this is validated + here rather than at parse time. + """ + agents = Registry().agents_map + + offenders = [] + for tr in test_scenario.test_runs: + if not tr.test.env_params: + continue + + agent = agents.get(tr.test.agent) + # Unknown agent: defer to the dedicated agent-resolution error rather than masking it here. + sampled = tr.is_dse_job and (agent is None or agent.samples_env_params) + if not sampled: + offenders.append(tr.name) + + if offenders: + raise TestScenarioParsingError( + f"Tests {offenders} declare env_params but no agent will sample them. env_params are sampled " + "per-trial only by a DSE run on an agent that opts into env_params sampling. Add a sweep " + "(a list-valued cmd_args/extra_env_vars entry or num_nodes) and use such an agent, or remove " + "env_params." + ) diff --git a/tests/test_handlers.py b/tests/test_handlers.py index 203a35d67..247548287 100644 --- a/tests/test_handlers.py +++ b/tests/test_handlers.py @@ -56,6 +56,7 @@ class StubAgentConfig(BaseAgentConfig): class StubAgent(BaseAgent): received_configs: ClassVar[list[StubAgentConfig]] = [] + samples_env_params: bool = True # stands in for an env-aware learning agent def __init__(self, env, config: StubAgentConfig): self.env = env @@ -436,7 +437,7 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising( def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None: base_tr.test.env_params = {"ball_speed": EnvParamSpec()} scenario = TestScenario(name="s", test_runs=[base_tr]) - with pytest.raises(TestScenarioParsingError, match="will not sample them"): + with pytest.raises(TestScenarioParsingError, match="no agent will sample them"): validate_dse_env_params(scenario) @@ -444,15 +445,36 @@ def test_validate_dse_env_params_rejects_grid_search(dse_tr: TestRun) -> None: """A DSE job on grid_search exhaustively searches the space, so env_params are noise -> reject.""" dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} assert dse_tr.is_dse_job is True # it IS a DSE job... - assert dse_tr.test.agent == "grid_search" # ...but grid_search ignores env_params - with pytest.raises(TestScenarioParsingError, match="will not sample them"): + assert dse_tr.test.agent == "grid_search" # ...but grid_search does not sample env_params + with pytest.raises(TestScenarioParsingError, match="no agent will sample them"): validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) +def test_validate_dse_env_params_rejects_non_sampling_agent( + dse_tr: TestRun, stub_agent_name: str, monkeypatch: pytest.MonkeyPatch +) -> None: + """The check keys on the agent capability, not the name: a non-grid agent that opts out is rejected too.""" + monkeypatch.setattr(StubAgent, "samples_env_params", False) + dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} + dse_tr.test.agent = stub_agent_name + assert dse_tr.is_dse_job is True and dse_tr.test.agent != "grid_search" + with pytest.raises(TestScenarioParsingError, match="no agent will sample them"): + validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) + + +def test_validate_dse_env_params_defers_unknown_agent(dse_tr: TestRun) -> None: + """An unknown agent is not flagged here; it is deferred to the dedicated agent-resolution error.""" + dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} + dse_tr.test.agent = "does_not_exist_agent" + assert dse_tr.is_dse_job is True + assert dse_tr.test.agent not in Registry().agents_map # precondition: agent is truly unknown + validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) # no exception == deferred + + def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun, stub_agent_name: str) -> None: dse_tr.test.env_params = {"ball_speed": EnvParamSpec()} - dse_tr.test.agent = stub_agent_name # a learning agent (not grid_search) consumes env_params - assert dse_tr.is_dse_job is True # precondition: DSE + learning agent + env_params is allowed + dse_tr.test.agent = stub_agent_name # an env-aware agent (samples_env_params=True) consumes env_params + assert dse_tr.is_dse_job is True # precondition: DSE + env-aware agent + env_params is allowed validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr])) # no exception == pass From 69dd7d4c739f0e8cbd6375905d4fea7a4e143f71 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Thu, 25 Jun 2026 13:13:24 -0400 Subject: [PATCH 08/22] style(configurator): trim verbose comments toward self-documenting code Compress multi-line inline comments down to the single non-obvious rationale (or drop them where the code already speaks), per the self-documenting-code principle. Public API docstrings and test intent comments are left intact. --- src/cloudai/_core/test_scenario.py | 5 +---- src/cloudai/cli/handlers.py | 3 +-- src/cloudai/configurator/base_agent.py | 5 ++--- src/cloudai/configurator/cloudai_gym.py | 10 ++-------- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index aa03cc591..523f2b701 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -203,10 +203,7 @@ def _apply(key: str, value: Any) -> None: else: setattr(obj, attrs[-1], value) - # The agent's chosen action and this trial's sampled env_params are both just concrete - # values written onto cmd_args, applied through the one path here. env_params keys name - # cmd_args fields; sampling (the RNG) happens in the env before this call, so this method - # stays deterministic. + # RNG runs in the env before this call; applying only concrete values keeps this deterministic. for key, value in action.items(): _apply(key, value) for key, value in (env_params or {}).items(): diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index acc88f72c..fb6f9b7d8 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -135,8 +135,7 @@ def handle_dse_job(runner: Runner, args: argparse.Namespace) -> int: return 1 err = 0 - # Recoverable failures return a non-zero rc and are accumulated here; an unexpected exception - # (a bug) is a hard-fail. We capture it so reports still generate, then re-raise below. + # Capture an unexpected error so reports still generate, then re-raise below. run_error: Exception | None = None try: for tr in runner.runner.test_scenario.test_runs: diff --git a/src/cloudai/configurator/base_agent.py b/src/cloudai/configurator/base_agent.py index 3a6fef75e..5fe1059ef 100644 --- a/src/cloudai/configurator/base_agent.py +++ b/src/cloudai/configurator/base_agent.py @@ -58,9 +58,8 @@ class BaseAgent(ABC): Provides a unified interface and parameter management for action spaces. """ - # Opt-in capability: only agents whose search consumes per-trial env_params sampling set this - # to True. Defaults False so a config that declares env_params for an agent that would ignore - # them is rejected up front instead of silently having no effect. + # Opt-in: agents that consume per-trial env_params sampling set this True. Default False so + # env_params declared for a non-sampling agent are rejected rather than silently ignored. samples_env_params: bool = False def __init__(self, env: BaseGym, config: BaseAgentConfig): diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index ac9d5cd34..0b4a3b0c0 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -63,8 +63,6 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid self.max_steps = test_run.test.agent_steps self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function) self.trajectory: dict[int, list[TrajectoryEntry]] = {} - # Resolve env_params once, at formulation: None unless the workload declares something to - # sample, so a non-DR run carries no env-params state and pays zero per-step overhead. self.params: EnvParams | None = EnvParams.from_test(test_run.test) self.env_params_sink = EnvParamsSink() super().__init__() @@ -130,9 +128,7 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: - info (dict): Additional info for debugging. """ self.test_run.increment_step() - # Sample this trial's env_params (the RNG lives here, in the env), then apply the agent's - # action and the sample together: apply_params_set overlays both onto cmd_args and records - # current_env_params, so the workload runs with the sampled values and the cache key sees them. + # RNG lives in the env: sample here, then apply action + sample so the run and cache key see them. sampled_env_params = self.params.sample(self.test_run.step) if self.params else {} self.test_run = self.test_run.apply_params_set(action, env_params=sampled_env_params) @@ -178,9 +174,7 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]: self.test_run.step = new_tr.step self.test_run.output_path = new_tr.output_path - # Rebuilding/replacing test_run above can drop the per-trial env_params sample; - # restore it so the trajectory entry, the cache key, and env.csv all record the - # params the trial actually ran with. + # The test_run rebuild above drops the sample; restore it so the entry, cache key, and env.csv match. self.test_run.current_env_params = new_tr.current_env_params observation = self.get_observation(action) From 7fec7f5462fdb6e9ab44e04df204d2d17c842634 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Thu, 25 Jun 2026 15:42:23 -0400 Subject: [PATCH 09/22] fix(test_scenario): skip env_params on post-overlay revalidation apply_params_set overlays sampled scalar draws onto cmd_args, then reconstructs the TestDefinition to validate the applied action values. That pass re-ran validate_env_params, which rejects a weighted env_param whose cmd_args target is no longer a candidate list - exactly what the overlay produces. env_params is already validated at parse time, so drop it from the validation-only dump. Adds a regression test covering a weighted env_param's scalar draw. --- src/cloudai/_core/test_scenario.py | 7 ++++++- tests/test_env_params.py | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py index 523f2b701..7c31406d0 100644 --- a/src/cloudai/_core/test_scenario.py +++ b/src/cloudai/_core/test_scenario.py @@ -209,7 +209,12 @@ def _apply(key: str, value: Any) -> None: for key, value in (env_params or {}).items(): _apply(key, value) - type(tdef)(**tdef.model_dump()) # trigger validation + # env_params is validated at parse time; after the overlay its target cmd_args fields hold + # concrete scalar draws, so re-validating it here would reject weighted specs. Drop it for + # this validation-only pass, which exists to validate the applied action values. + validation_args = tdef.model_dump() + validation_args.pop("env_params", None) + type(tdef)(**validation_args) # trigger validation new_tr = copy.deepcopy(self) new_tr.test = tdef diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 483fa2e71..510a79fe2 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -45,6 +45,7 @@ EnvParamSpec, EnvParamsSink, ) +from cloudai.core import TestRun from cloudai.models.workload import CmdArgs, TestDefinition @@ -376,3 +377,15 @@ def test_is_dse_job_true_when_a_real_action_dimension_exists() -> None: """An un-annotated cmd_args list is a real action dimension -> DSE, even alongside env_params.""" tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3], paddle_width=[4, 8]) assert tdef.is_dse_job is True + + +def test_apply_params_set_accepts_weighted_env_param_draw() -> None: + """Regression: apply_params_set re-validates after the overlay; a weighted env_param's scalar + draw must not trip validate_env_params (which would reject 'weights but not a candidate list').""" + tdef = _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2]) + tr = TestRun(name="tr", test=tdef, num_nodes=1, nodes=[]) + + new_tr = tr.apply_params_set({}, env_params={"ball_speed": 1}) + + assert new_tr.test.cmd_args.ball_speed == 1 + assert new_tr.current_env_params == {"ball_speed": 1} From 8f91664db9338c4df7b8e1d940ca59947a2306f6 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Fri, 26 Jun 2026 11:12:06 -0400 Subject: [PATCH 10/22] fix(workload): reject scalar-only env_params annotations at parse time An env_params entry only reclassifies a list-valued cmd_args sweep as env-sampled; a scalar is already fixed, so annotating it is a meaningless label. Previously such an annotation was tolerated as a silent no-op, which let it slip through parse-time validation and inconsistently trip (or not) the downstream "no agent will sample them" check depending on run mode. Reject it where the contract lives - TestDefinition.validate_env_params - so the failure is immediate and mode-independent. EnvParams.from_test's non-list guard becomes defensive (parse-time now guarantees lists); the post-overlay path already drops env_params before re-validating, so concrete scalar draws are unaffected. Extract the per-field checks into a helper to keep the validator under the complexity limit, and update tests: scalar annotations now assert rejection instead of no-op tolerance. --- src/cloudai/configurator/env_params.py | 7 +-- src/cloudai/models/workload.py | 65 +++++++++++++++----------- tests/test_env_params.py | 28 ++--------- 3 files changed, 46 insertions(+), 54 deletions(-) diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index ecd099ecd..6d98c6903 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -113,9 +113,10 @@ def from_test(cls, test: Any) -> Optional["EnvParams"]: """ Resolve a TestDefinition's env_params annotations, or ``None`` if nothing is sampled. - A field whose ``cmd_args`` value is a scalar is fixed: the annotation is a no-op and - the field is skipped. With no list-valued field left, there is nothing to sample and - we return ``None`` so callers stay on the zero-overhead path. + Annotated fields are guaranteed list-valued by ``TestDefinition`` parse-time validation + (a scalar annotation is rejected there), so the non-list guard below is defensive. With + no annotations declared there is nothing to sample and we return ``None`` so callers + stay on the zero-overhead path. """ params: Dict[str, EnvParam] = {} for name, spec in test.env_params.items(): diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py index eaa971aa6..c5156ccc6 100644 --- a/src/cloudai/models/workload.py +++ b/src/cloudai/models/workload.py @@ -217,10 +217,11 @@ def validate_env_params(self) -> Self: ``env_params`` is an annotation: each key names a ``cmd_args`` field whose value is the candidate set (the single source of truth), and the entry carries only *how* to - sample. So each key must name a real ``cmd_args`` field; and when ``weights`` are - declared, that field must be a candidate list of >= 2 values and the weights must - align 1:1 with it. A scalar (fixed) ``cmd_args`` value is tolerated as a no-op - marker. Sampling, persistence, the per-trial cmd_args overlay, and the cache key all + sample. So each key must name a real ``cmd_args`` field whose value is a candidate + list; a scalar is already fixed, so annotating it is a meaningless label and is + rejected here. When ``weights`` are declared, the list needs >= 2 values and the + weights must align 1:1 with it. Sampling, persistence, the per-trial cmd_args overlay, + and the cache key all live in ``CloudAIGymEnv``; keeping this shape check in core lets the overlay stay agent- and workload-agnostic rather than re-implemented per workload. """ @@ -236,28 +237,36 @@ def validate_env_params(self) -> Self: raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}") for name, spec in self.env_params.items(): - value = getattr(self.cmd_args, name, None) - if isinstance(value, (dict, BaseModel)): - raise ValueError( - f"env_params['{name}'] must target a leaf cmd_args field (scalar or candidate list), " - "not a structured object; param_space/is_dse_job exclude the whole key, which would " - "silently drop nested action dimensions" - ) - if isinstance(value, list) and not value: - raise ValueError( - f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; " - "provide at least one candidate (the sampler would otherwise fail on an empty draw)" - ) - if spec.weights is None: - continue - if not isinstance(value, list) or len(value) < 2: - raise ValueError( - f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list " - "(need a list of >= 2 values)" - ) - if len(spec.weights) != len(value): - raise ValueError( - f"env_params['{name}'] weights length {len(spec.weights)} does not match " - f"cmd_args.{name} candidate count {len(value)}" - ) + self._validate_env_param_field(name, spec, getattr(self.cmd_args, name, None)) return self + + @staticmethod + def _validate_env_param_field(name: str, spec: Any, value: Any) -> None: + """Reject one env_params entry whose target cmd_args field is not a valid candidate list.""" + if isinstance(value, (dict, BaseModel)): + raise ValueError( + f"env_params['{name}'] must target a leaf cmd_args field (a candidate list), " + "not a structured object; param_space/is_dse_job exclude the whole key, which would " + "silently drop nested action dimensions" + ) + if not isinstance(value, list): + raise ValueError( + f"env_params['{name}'] annotates cmd_args.{name}, which is not a candidate list " + f"(got {type(value).__name__}); the annotation only reclassifies a list-valued sweep as " + f"env-sampled, while a scalar is already fixed. Make cmd_args.{name} a list or remove " + "the annotation" + ) + if not value: + raise ValueError( + f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; " + "provide at least one candidate (the sampler would otherwise fail on an empty draw)" + ) + if spec.weights is None: + return + if len(value) < 2: + raise ValueError(f"env_params['{name}'] declares weights but cmd_args.{name} needs >= 2 candidate values") + if len(spec.weights) != len(value): + raise ValueError( + f"env_params['{name}'] weights length {len(spec.weights)} does not match " + f"cmd_args.{name} candidate count {len(value)}" + ) diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 510a79fe2..66cfd4352 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -288,25 +288,6 @@ def test_env_params_from_test_resolves_multiple_params() -> None: assert env_params.params["paddle_width"].candidates == [4, 8] -def test_env_params_from_test_skips_scalar_keeps_list() -> None: - """A mix of scalar and list annotations yields only the list-valued field as a knob.""" - env_params = EnvParams.from_test( - _tdef( - {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()}, - ball_speed=[1, 2, 3], - paddle_width=8, - ) - ) - - assert env_params is not None - assert set(env_params.params) == {"ball_speed"} - - -def test_env_params_from_test_none_when_only_scalar_annotation() -> None: - """A scalar cmd_args value is fixed; with nothing list-valued to sample, from_test is None.""" - assert EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)) is None - - def test_env_params_from_test_none_when_no_env_params() -> None: """No annotations declared -> no EnvParams object (the zero-overhead path).""" assert EnvParams.from_test(_tdef({})) is None @@ -326,10 +307,11 @@ def test_env_params_weighted_list_is_accepted() -> None: assert tdef.env_params["ball_speed"].weights == [0.7, 0.3] -def test_env_params_scalar_no_op_is_accepted() -> None: - """Annotating a scalar (fixed) knob is tolerated as a no-op marker.""" - tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2) - assert tdef.cmd_args.ball_speed == 2 +def test_env_params_scalar_annotation_rejected() -> None: + """A scalar (fixed) knob carries nothing to sample; the annotation is a meaningless label and is + rejected at parse time (it only reclassifies a list-valued sweep as env-sampled).""" + with pytest.raises(ValidationError, match="not a candidate list"): + _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2) def test_env_params_unknown_key_rejected() -> None: From 7db764bb054943f51f375ea9c0de8ce4700f002d Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 00:44:38 -0400 Subject: [PATCH 11/22] feat(configurator): add ObsLeafDescriptor + structured-observation protocol Add ObsLeafDescriptor (a self-describing observation leaf: "box" of width dim, or "discrete" of size n) and a StructuredObservation Protocol that documents the optional env hooks structured_observation_descriptors() and encode_observation(). These let an env expose a named, per-leaf observation so adapters (e.g. GymnasiumAdapter) can build the matching gymnasium spaces.Dict; the hooks are duck-typed, so envs need not subclass. Both exported via cloudai.core. --- src/cloudai/configurator/env_params.py | 46 +++++++++++++++++++++++++- src/cloudai/core.py | 3 ++ tests/test_env_params.py | 29 ++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py index 6d98c6903..c1b7ee0d0 100644 --- a/src/cloudai/configurator/env_params.py +++ b/src/cloudai/configurator/env_params.py @@ -32,7 +32,7 @@ import math import random from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field, model_validator from typing_extensions import Self @@ -76,6 +76,50 @@ def _validate_weights(self) -> Self: return self +class ObsLeafDescriptor(BaseModel): + """ + Description of one leaf of a structured (named) observation. + + A structured observation maps each observed name to a self-describing leaf + so adapters can build the matching subspace without guessing: a ``"box"`` + leaf becomes a continuous vector of width ``dim`` (e.g. a log-encoded + env_param as ``dim=2``); a ``"discrete"`` leaf becomes a categorical of + size ``n``. Stateless agents that consume the flat observation ignore this. + """ + + model_config = ConfigDict(extra="forbid") + + kind: Literal["box", "discrete"] + dim: int = 1 + n: Optional[int] = None + + @model_validator(mode="after") + def _validate(self) -> Self: + if self.dim < 1: + raise ValueError(f"ObsLeafDescriptor dim must be >= 1; got {self.dim}") + if self.kind == "discrete" and (self.n is None or self.n < 1): + raise ValueError(f"ObsLeafDescriptor(kind='discrete') requires n >= 1; got n={self.n}") + return self + + +@runtime_checkable +class StructuredObservation(Protocol): + """ + Optional env hooks that expose a structured (per-leaf) observation. + + An env opts in by returning per-leaf :class:`ObsLeafDescriptor` from + ``structured_observation_descriptors`` (``None`` keeps the flat-vector + path) and encoding a raw observation into the matching named leaves via + ``encode_observation``. ``GymnasiumAdapter`` consumes these to expose a + ``gymnasium.spaces.Dict`` observation; the hooks are duck-typed, so envs + need not subclass this Protocol. + """ + + def structured_observation_descriptors(self) -> Optional[Dict[str, ObsLeafDescriptor]]: ... + + def encode_observation(self, observation: list) -> Dict[str, Any]: ... + + @dataclasses.dataclass(frozen=True) class EnvParam: """ diff --git a/src/cloudai/core.py b/src/cloudai/core.py index 752d24972..aefcd0573 100644 --- a/src/cloudai/core.py +++ b/src/cloudai/core.py @@ -51,6 +51,7 @@ from ._core.test_scenario import METRIC_ERROR, MetricErrorSentinel, MetricValue, TestDependency, TestRun, TestScenario from .configurator.base_agent import BaseAgent, BaseAgentConfig, RewardOverrides from .configurator.cloudai_gym import CloudAIGymEnv +from .configurator.env_params import ObsLeafDescriptor, StructuredObservation from .configurator.grid_search import GridSearchAgent from .models.workload import CmdArgs, NsysConfiguration, PredictorConfig, TestDefinition from .parser import Parser @@ -85,6 +86,7 @@ "MetricValue", "MissingTestError", "NsysConfiguration", + "ObsLeafDescriptor", "Parser", "PerTestReporter", "PredictorConfig", @@ -96,6 +98,7 @@ "RewardOverrides", "Runner", "StatusReporter", + "StructuredObservation", "System", "SystemConfigParsingError", "TarballReporter", diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 66cfd4352..3668e9bb8 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -44,6 +44,7 @@ EnvParams, EnvParamSpec, EnvParamsSink, + ObsLeafDescriptor, ) from cloudai.core import TestRun from cloudai.models.workload import CmdArgs, TestDefinition @@ -371,3 +372,31 @@ def test_apply_params_set_accepts_weighted_env_param_draw() -> None: assert new_tr.test.cmd_args.ball_speed == 1 assert new_tr.current_env_params == {"ball_speed": 1} + + +# --- ObsLeafDescriptor: structured-observation leaf schema --- + + +def test_obs_leaf_descriptor_box_defaults() -> None: + leaf = ObsLeafDescriptor(kind="box", dim=2) + assert leaf.kind == "box" + assert leaf.dim == 2 + assert leaf.n is None + + +def test_obs_leaf_descriptor_discrete_requires_n() -> None: + leaf = ObsLeafDescriptor(kind="discrete", dim=1, n=3) + assert leaf.n == 3 + with pytest.raises(ValidationError, match="requires n"): + ObsLeafDescriptor(kind="discrete", dim=1) + with pytest.raises(ValidationError, match="requires n"): + ObsLeafDescriptor(kind="discrete", dim=1, n=0) + + +def test_obs_leaf_descriptor_rejects_bad_dim_and_extra_fields() -> None: + with pytest.raises(ValidationError, match="dim must be"): + ObsLeafDescriptor(kind="box", dim=0) + with pytest.raises(ValidationError): + ObsLeafDescriptor(kind="box", dim=1, unexpected=1) + with pytest.raises(ValidationError): + ObsLeafDescriptor(kind="categorical", dim=1) From 584f145f0386e38ec86b1843c944a1514e901910 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 10:57:57 -0400 Subject: [PATCH 12/22] test(env-params): suppress pyright on intentional ObsLeafDescriptor rejection tests Negative tests pass an extra kwarg and an out-of-Literal kind to assert ValidationError; mark the deliberate type violations with type: ignore. --- tests/test_env_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_env_params.py b/tests/test_env_params.py index 3668e9bb8..eef3feba0 100644 --- a/tests/test_env_params.py +++ b/tests/test_env_params.py @@ -397,6 +397,6 @@ def test_obs_leaf_descriptor_rejects_bad_dim_and_extra_fields() -> None: with pytest.raises(ValidationError, match="dim must be"): ObsLeafDescriptor(kind="box", dim=0) with pytest.raises(ValidationError): - ObsLeafDescriptor(kind="box", dim=1, unexpected=1) + ObsLeafDescriptor(kind="box", dim=1, unexpected=1) # type: ignore with pytest.raises(ValidationError): - ObsLeafDescriptor(kind="categorical", dim=1) + ObsLeafDescriptor(kind="categorical", dim=1) # type: ignore From 6b6d2bd30276088db9c0f9c35d237afce39b3cc2 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 01:04:25 -0400 Subject: [PATCH 13/22] feat(configurator): add GymnasiumAdapter for CloudAI envs Wrap a CloudAI BaseGym as a gymnasium.Env-shaped object: a spaces.Dict of Discrete (list params) and Box (ContinuousSpace) actions over the tunable params with fixed (single-value) params injected each step; observations as either a flat float32 Box or, when the env opts in via the structured-obs hooks, a spaces.Dict of per-leaf ObsLeafDescriptor subspaces. Continuous dtype="int" params are quantized (rounded/clamped) at decode_action so the trajectory cache key collapses float jitter. The adapter is a pure pass-through over test_run.step (never mutates it), so contextual-bandit rollouts that reset() per trial keep a monotonic trial index. gymnasium is an optional dependency lazy-imported behind the new [rl] extra (also added to dev); CloudAIGymEnv.define_observation_space() now returns one slot per agent metric so adapters get the right Box shape. Exported via cloudai.core. Caller-contract tests pin the step-monotonicity, observation pass-through, continuous-quantization, and structured-obs invariants. --- pyproject.toml | 2 + src/cloudai/configurator/__init__.py | 2 + src/cloudai/configurator/cloudai_gym.py | 7 +- src/cloudai/configurator/gymnasium_adapter.py | 258 ++++++++++ src/cloudai/core.py | 2 + tests/test_gymnasium_adapter_contract.py | 479 ++++++++++++++++++ uv.lock | 41 +- 7 files changed, 787 insertions(+), 4 deletions(-) create mode 100644 src/cloudai/configurator/gymnasium_adapter.py create mode 100644 tests/test_gymnasium_adapter_contract.py diff --git a/pyproject.toml b/pyproject.toml index 398326fc3..99e01ae81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,9 @@ requires-python = ">=3.10" "import-linter~=2.10", "pytest-deadfixtures~=3.1", "taplo~=0.9.3", + "gymnasium~=1.2", ] + rl = ["gymnasium~=1.2"] docs = [ "sphinx~=8.1", "nvidia-sphinx-theme~=0.0.8", diff --git a/src/cloudai/configurator/__init__.py b/src/cloudai/configurator/__init__.py index f05b65c5b..a88432c41 100644 --- a/src/cloudai/configurator/__init__.py +++ b/src/cloudai/configurator/__init__.py @@ -18,11 +18,13 @@ from .base_gym import BaseGym from .cloudai_gym import CloudAIGymEnv, TrajectoryEntry from .grid_search import GridSearchAgent +from .gymnasium_adapter import GymnasiumAdapter __all__ = [ "BaseAgent", "BaseGym", "CloudAIGymEnv", "GridSearchAgent", + "GymnasiumAdapter", "TrajectoryEntry", ] diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py index 0b4a3b0c0..b32e137cc 100644 --- a/src/cloudai/configurator/cloudai_gym.py +++ b/src/cloudai/configurator/cloudai_gym.py @@ -85,9 +85,10 @@ def define_observation_space(self) -> list: Define the observation space for the environment. Returns: - list: The observation space. + list: One float slot per agent metric (at least one), giving the correct shape + for adapters that derive ``gymnasium.spaces.Box`` from this output. """ - return [0.0] + return [0.0] * max(len(self.test_run.test.agent_metrics), 1) def reset( self, @@ -109,7 +110,7 @@ def reset( if seed is not None: lazy.np.random.seed(seed) self.test_run.current_iteration = 0 - observation = [0.0] + observation = self.define_observation_space() info = {} return observation, info diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py new file mode 100644 index 000000000..590abf11d --- /dev/null +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -0,0 +1,258 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Gymnasium adapter for CloudAI ``BaseGym`` environments. + +Translates a CloudAI :class:`BaseGym` into the ``gymnasium.Env`` 5-tuple shape +that RLlib-based agents (e.g. PPO / DQN) and external training loops expect. +``gymnasium`` is an optional dependency (the ``[rl]`` extra), so it is imported +lazily and only required when an adapter is actually instantiated. + +Design invariant — adapter is a pure pass-through over ``test_run.step``. +The trial counter is owned by ``TestRun`` and advanced exclusively by +``CloudAIGymEnv.step()``. Adapters that wrote ``test_run.step`` themselves — +mirroring a Gym-protocol episode-local counter — collapsed every +contextual-bandit rollout onto ``step=1`` because RLlib calls ``reset()`` per +trial. This adapter never mutates ``test_run.step``; contract tests pin that +property. +""" + +from __future__ import annotations + +from typing import Any, ClassVar, Optional, cast + +from cloudai._core.action_space import ContinuousSpace +from cloudai.configurator.base_gym import BaseGym +from cloudai.configurator.env_params import StructuredObservation + +_GYMNASIUM_INSTALL_HINT = "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'" + + +def _import_gymnasium(): + """ + Import gymnasium + numpy lazily; raise a clear, actionable error when absent. + + Kept as a single seam so cloudai installs without the ``[rl]`` extra + continue to work for non-RL agents, and tests can patch this helper to + simulate a missing install. + """ + try: + import gymnasium + import numpy as np + from gymnasium import spaces + + return gymnasium, spaces, np + except ImportError as exc: + raise ImportError(_GYMNASIUM_INSTALL_HINT) from exc + + +class GymnasiumAdapter: + """ + Expose a CloudAI :class:`BaseGym` as a ``gymnasium.Env``-shaped object. + + The adapter: + + * Builds a ``gymnasium.spaces.Dict`` of ``Discrete`` action spaces over + the *tunable* parameters (those with more than one candidate value), + and injects the *fixed* parameters (single candidate) automatically on + every step so agents never see them. + * Converts observations to ``float32`` ``numpy`` arrays sized by + ``env.define_observation_space()``. + * Returns the gymnasium 5-tuple ``(obs, reward, terminated, truncated, info)`` + from :meth:`step` and :meth:`step_raw`. + + ``gymnasium`` and ``numpy`` are optional dependencies (the ``[rl]`` extra); + instantiating the adapter without them raises ``ImportError``. + """ + + metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]} + + def __init__(self, env: BaseGym) -> None: + _, spaces, np = _import_gymnasium() + + self._np = np + self._spaces = spaces + self._env = env + + raw_action_space = env.define_action_space() + + # Three classes of params from cloudai's param_space: + # list with len > 1 -> discrete tunable, mapped to gym.Discrete. + # list with len == 1 -> fixed (collapsed); injected on every step so + # agents never see them. + # ContinuousSpace -> continuous tunable, mapped to gym.Box(1,); + # ``dtype="int"`` quantizes at decode_action. + self._discrete_params: dict[str, list] = { + k: v for k, v in raw_action_space.items() if isinstance(v, list) and len(v) > 1 + } + self._continuous_params: dict[str, ContinuousSpace] = { + k: v for k, v in raw_action_space.items() if isinstance(v, ContinuousSpace) + } + self._fixed_params: dict[str, Any] = { + k: v[0] for k, v in raw_action_space.items() if isinstance(v, list) and len(v) == 1 + } + + action_space_components: dict[str, Any] = { + name: spaces.Discrete(len(values)) for name, values in self._discrete_params.items() + } + for name, space in self._continuous_params.items(): + action_space_components[name] = spaces.Box( + low=np.array([space.low], dtype=np.float32), + high=np.array([space.high], dtype=np.float32), + shape=(1,), + dtype=np.float32, + ) + self.action_space = spaces.Dict(action_space_components) + + # Observation space: prefer the env's structured (per-leaf) spec so the + # policy sees named, individually-encoded leaves (e.g. a log-encoded + # env_param as Box(2)); RLlib connectors own normalize + flatten. Falls + # back to a flat Box for envs that only expose define_observation_space. + self._obs_descriptors: Optional[dict[str, Any]] = self._structured_obs_descriptors(env) + if self._obs_descriptors: + self.observation_space = spaces.Dict( + {name: self._descriptor_to_space(desc) for name, desc in self._obs_descriptors.items()} + ) + else: + obs_shape = (len(env.define_observation_space()),) + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=obs_shape, dtype=np.float32) + + @staticmethod + def _structured_obs_descriptors(env: BaseGym) -> Optional[dict[str, Any]]: + """ + Return the env's per-leaf obs descriptors, or ``None`` for the flat-Box path. + + The env owns the opt-in decision via ``structured_observation_descriptors`` + (returns ``None`` unless an observed name is a declared env_param). Envs + without that hook keep the legacy flat-Box path. + """ + getter = getattr(env, "structured_observation_descriptors", None) + if getter is None or not hasattr(env, "encode_observation"): + return None + descriptors = getter() + return descriptors or None + + def _descriptor_to_space(self, descriptor: Any) -> Any: + """Map a framework-agnostic ``ObsLeafDescriptor`` to a gymnasium subspace.""" + if descriptor.kind == "discrete": + return self._spaces.Discrete(descriptor.n) + return self._spaces.Box(low=-self._np.inf, high=self._np.inf, shape=(descriptor.dim,), dtype=self._np.float32) + + @property + def unwrapped(self) -> BaseGym: + return self._env + + def decode_action(self, action: dict[str, Any]) -> dict[str, Any]: + """ + Map raw gym actions back to native parameter values. + + Discrete actions are list indices and resolve to the corresponding list + entry. Continuous actions arrive as a 1-D ``numpy`` array (the Box + shape) and are clamped to the declared range; for ``dtype="int"`` the + scalar is rounded — this is the **only** place quantization happens, so + the cache key collapses float jitter (47.34 vs 47.36) onto the same + emitted int and downstream code stays type-agnostic. + + Raises: + ValueError: if ``action`` is missing tunable params, contains + unknown keys, or carries an out-of-range discrete index. + """ + expected = set(self._discrete_params) | set(self._continuous_params) + self._assert_keys(action.keys(), expected, "action") + decoded: dict[str, Any] = {} + for name, raw in action.items(): + if name in self._discrete_params: + decoded[name] = self._decode_discrete(name, raw) + else: + decoded[name] = self._decode_continuous(name, raw) + return decoded + + def _decode_discrete(self, name: str, raw: Any) -> Any: + values = self._discrete_params[name] + idx = int(raw) + if not 0 <= idx < len(values): + raise ValueError(f"Action index out of range for '{name}': {idx} (expected 0..{len(values) - 1})") + return values[idx] + + def _decode_continuous(self, name: str, raw: Any) -> Any: + space = self._continuous_params[name] + scalar = float(self._np.asarray(raw, dtype=self._np.float32).reshape(-1)[0]) + clamped = min(max(scalar, space.low), space.high) + return round(clamped) if space.dtype == "int" else clamped + + def reset( + self, + *, + seed: Optional[int] = None, + options: Optional[dict[str, Any]] = None, + ) -> tuple[Any, dict[str, Any]]: + obs, info = self._env.reset(seed=seed, options=options) + return self._as_obs_array(obs), info + + def step(self, action: dict[str, int]) -> tuple[Any, float, bool, bool, dict[str, Any]]: + params = {**self._fixed_params, **self.decode_action(action)} + return self._step_with_params(params) + + def step_raw(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]: + """ + Step the env with an already-decoded parameter dict; bypasses index decoding. + + Raises: + ValueError: if ``params`` does not cover exactly the tunable + + fixed param keys. + """ + expected = set(self._discrete_params) | set(self._continuous_params) | set(self._fixed_params) + self._assert_keys(params.keys(), expected, "raw params") + return self._step_with_params(params) + + def render(self) -> None: + self._env.render() + + @staticmethod + def _assert_keys(received: Any, expected: set[str], ctx: str) -> None: + received_set = set(received) + if received_set == expected: + return + missing = sorted(expected - received_set) + extra = sorted(received_set - expected) + raise ValueError(f"{ctx} keys mismatch; missing={missing}, extra={extra}") + + def _step_with_params(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]: + obs, reward, done, info = self._env.step(params) + return self._as_obs_array(obs), float(reward), bool(done), False, info + + def _as_obs_array(self, obs: Any) -> Any: + """ + Convert a raw env observation into the policy-facing observation. + + Structured path: the flat raw obs (which feeds the env's reward and + ``trajectory.csv``) is encoded per-leaf by the env and returned as a + ``dict`` keyed to match ``observation_space`` (a ``spaces.Dict``). + Flat path: a single ``float32`` ``Box`` array (legacy behaviour). + """ + descriptors = self._obs_descriptors + if descriptors is None: + return self._np.asarray(obs, dtype=self._np.float32) + env = cast(StructuredObservation, self._env) + encoded = env.encode_observation(list(obs)) + return {name: self._leaf_to_value(descriptors[name], leaf) for name, leaf in encoded.items()} + + def _leaf_to_value(self, descriptor: Any, leaf: Any) -> Any: + """Coerce one encoded leaf to its gymnasium subspace dtype.""" + if descriptor.kind == "discrete": + return int(leaf) + return self._np.asarray(leaf, dtype=self._np.float32) diff --git a/src/cloudai/core.py b/src/cloudai/core.py index aefcd0573..f45f0c3f9 100644 --- a/src/cloudai/core.py +++ b/src/cloudai/core.py @@ -53,6 +53,7 @@ from .configurator.cloudai_gym import CloudAIGymEnv from .configurator.env_params import ObsLeafDescriptor, StructuredObservation from .configurator.grid_search import GridSearchAgent +from .configurator.gymnasium_adapter import GymnasiumAdapter from .models.workload import CmdArgs, NsysConfiguration, PredictorConfig, TestDefinition from .parser import Parser from .reporter import PerTestReporter, StatusReporter, TarballReporter @@ -76,6 +77,7 @@ "Grader", "GradingStrategy", "GridSearchAgent", + "GymnasiumAdapter", "HFModel", "InstallStatusResult", "Installable", diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py new file mode 100644 index 000000000..cbe48bcfb --- /dev/null +++ b/tests/test_gymnasium_adapter_contract.py @@ -0,0 +1,479 @@ +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Caller-contract tests for ``GymnasiumAdapter``. + +The single invariant every consumer assumes: + + ``test_run.step`` is a **monotonic trial index** across the entire run. + +Gym's ``reset()`` is an *episode boundary*, not a trial boundary. For the +contextual-bandit configs (``agent_steps=1``), RLlib calls ``reset()`` before +*every* trial. An earlier adapter rewound ``test_run.step`` on reset and +collapsed every trial onto step 1 — silently overwriting output dirs and +producing duplicate-step rows in trajectory.csv / env.csv. + +These tests pin the negative invariant: the adapter must not mutate +``test_run.step``. That counter is owned by ``TestRun`` and advanced +exclusively by ``CloudAIGymEnv.step()``. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, Optional + +import pytest + +from cloudai._core.action_space import ContinuousSpace +from cloudai.configurator.base_gym import BaseGym +from cloudai.configurator.env_params import ObsLeafDescriptor + +try: + import gymnasium # noqa: F401 + + _HAS_GYMNASIUM = True +except ImportError: + _HAS_GYMNASIUM = False + +pytestmark = pytest.mark.skipif(not _HAS_GYMNASIUM, reason="gymnasium not installed") + +from cloudai.configurator.gymnasium_adapter import GymnasiumAdapter # noqa: E402 + + +class _StubBaseGym(BaseGym): + """Minimal BaseGym with a ``test_run`` attribute mirroring CloudAIGymEnv.""" + + def __init__(self) -> None: + self._action_space: dict[str, Any] = {"param_a": [1, 2, 3], "param_b": [10, 20]} + self._observation_space: list[float] = [0.0, 0.0, 0.0] + self.test_run = SimpleNamespace(step=0) + super().__init__() + + def define_action_space(self) -> dict[str, Any]: + return self._action_space + + def define_observation_space(self) -> list: + return self._observation_space + + def reset( + self, + seed: Optional[int] = None, + options: Optional[dict[str, Any]] = None, + ) -> tuple[list, dict[str, Any]]: + return [0.0, 0.0, 0.0], {} + + def step(self, action: Any) -> tuple[list, float, bool, dict]: + self.test_run.step += 1 + return [1.0, 2.0, 3.0], 0.5, False, {} + + def render(self, mode: str = "human") -> None: + return None + + def seed(self, seed: Optional[int] = None) -> None: + pass + + +class TestStepIsMonotonicTrialIndex: + """``test_run.step`` is a trial index, not an episode-local counter.""" + + def test_step_advances_within_single_episode(self) -> None: + gym = _StubBaseGym() + adapter = GymnasiumAdapter(gym) + adapter.reset() + + seen: list[int] = [] + for _ in range(3): + adapter.step({"param_a": 0, "param_b": 0}) + seen.append(gym.test_run.step) + + assert seen == [1, 2, 3] + + def test_step_is_monotonic_across_episode_boundaries(self) -> None: + """The bug: ``reset()`` rewinds ``_step_count`` to 0, so the next + ``step()`` writes ``test_run.step = 1`` again. With contextual-bandit + RLlib (one step per episode) this means every trial reports step 1. + """ + gym = _StubBaseGym() + adapter = GymnasiumAdapter(gym) + + seen: list[int] = [] + for _ in range(5): + adapter.reset() + adapter.step({"param_a": 0, "param_b": 0}) + seen.append(gym.test_run.step) + + assert seen == [1, 2, 3, 4, 5], ( + f"test_run.step must be a monotonic trial index across episodes; got {seen}. " + "reset() is a Gym episode boundary, not a trial boundary; rewinding the " + "trial counter collapses every contextual-bandit rollout onto step 1." + ) + + def test_mixed_within_and_across_episode_steps_are_monotonic(self) -> None: + gym = _StubBaseGym() + adapter = GymnasiumAdapter(gym) + + seen: list[int] = [] + for episode_len in (2, 1, 3): + adapter.reset() + for _ in range(episode_len): + adapter.step({"param_a": 0, "param_b": 0}) + seen.append(gym.test_run.step) + + assert seen == [1, 2, 3, 4, 5, 6], ( + f"test_run.step must be a monotonic trial index regardless of episode shape; got {seen}" + ) + + +class _ContextualStubBaseGym(BaseGym): + """BaseGym stub that simulates CloudAIGymEnv's contextual-obs contract. + + ``reset()`` returns an observation with a per-trial context value in + slot 1 (mimicking how the upstream env writes a sampled env_param into + the obs vector built at the trial boundary). Each call to ``reset()`` + advances the simulated trial counter so we can assert the adapter + surfaces the *current* context, not a stale one. + """ + + def __init__(self, contexts: list[float]) -> None: + self._contexts = list(contexts) + self._action_space: dict[str, Any] = {"param_a": [1, 2, 3], "param_b": [10, 20]} + self.test_run = SimpleNamespace(step=0) + super().__init__() + + def define_action_space(self) -> dict[str, Any]: + return self._action_space + + def define_observation_space(self) -> list: + return [0.0, 0.0] + + def reset( + self, + seed: Optional[int] = None, + options: Optional[dict[str, Any]] = None, + ) -> tuple[list, dict[str, Any]]: + ctx = self._contexts[self.test_run.step] + self.test_run.step += 1 + return [0.0, ctx], {} + + def step(self, action: Any) -> tuple[list, float, bool, dict]: + ctx = self._contexts[self.test_run.step - 1] + return [42.0, ctx], 0.5, False, {} + + def render(self, mode: str = "human") -> None: + return None + + def seed(self, seed: Optional[int] = None) -> None: + pass + + +class TestAdapterPropagatesContextualObservation: + """The adapter must pass through env-built observations unchanged. + + With the contextual-bandit fix in cloudai, ``CloudAIGymEnv.reset()`` + samples env_params at the trial boundary and bakes them into the obs + vector before returning. RLlib's policy reads obs from + ``adapter.reset()``, so the adapter must propagate that vector verbatim + (modulo numpy-float32 casting). The same propagation invariant applies + on ``adapter.step()``. + """ + + def test_reset_propagates_context_into_observation(self) -> None: + contexts = [0.001, 0.0, 0.01, 0.001] + gym = _ContextualStubBaseGym(contexts) + adapter = GymnasiumAdapter(gym) + + seen: list[float] = [] + for _ in range(len(contexts)): + obs, _info = adapter.reset() + seen.append(float(obs[1])) + + assert seen == pytest.approx(contexts, rel=1e-5), ( + f"adapter.reset() must surface the trial's context value (slot 1) verbatim " + f"(modulo float32 cast); got {seen}, expected {contexts}" + ) + + def test_step_propagates_context_into_observation(self) -> None: + contexts = [0.0, 0.01, 0.001] + gym = _ContextualStubBaseGym(contexts) + adapter = GymnasiumAdapter(gym) + + for ctx in contexts: + adapter.reset() + obs, _r, _term, _trunc, _info = adapter.step({"param_a": 0, "param_b": 0}) + assert float(obs[0]) == pytest.approx(42.0, rel=1e-5), ( + f"adapter.step() must propagate the env's measured-metric slot; got {obs[0]}" + ) + assert float(obs[1]) == pytest.approx(ctx, rel=1e-5), ( + f"adapter.step() must propagate the trial's context value; got {obs[1]}, expected {ctx}" + ) + + +class _ContinuousStubBaseGym(BaseGym): + """BaseGym stub that surfaces a ContinuousSpace in its action_space. + + Records the params received by ``step`` so the test can assert what the + adapter actually emits to the env (post-rounding / clamping). + """ + + def __init__(self, action_space: dict[str, Any]) -> None: + self._action_space = action_space + self.test_run = SimpleNamespace(step=0) + self.received: list[dict[str, Any]] = [] + super().__init__() + + def define_action_space(self) -> dict[str, Any]: + return self._action_space + + def define_observation_space(self) -> list: + return [0.0] + + def reset( + self, + seed: Optional[int] = None, + options: Optional[dict[str, Any]] = None, + ) -> tuple[list, dict[str, Any]]: + return [0.0], {} + + def step(self, action: Any) -> tuple[list, float, bool, dict]: + self.received.append(dict(action)) + return [1.0], 0.5, False, {} + + def render(self, mode: str = "human") -> None: + return None + + def seed(self, seed: Optional[int] = None) -> None: + pass + + +class TestAdapterDispatchesContinuousSpace: + """``ContinuousSpace`` in cloudai's param_space → ``gym.spaces.Box`` in the adapter. + + The adapter is also responsible for **actuator quantization**: when the + spec is ``dtype="int"``, the float coming out of the policy is rounded at + decode_action, so the env (and trajectory cache) sees only ints. This + collapses float jitter (47.34 vs 47.36) onto the same emitted int and + keeps the cache hit-rate honest. + """ + + @staticmethod + def _action_space() -> dict[str, Any]: + return { + "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"), + "discrete": [10, 20, 30], + "fixed": [99], + } + + def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> None: + import gymnasium + + gym_env = _ContinuousStubBaseGym(self._action_space()) + adapter = GymnasiumAdapter(gym_env) + + assert isinstance(adapter.action_space["threshold"], gymnasium.spaces.Box) + assert adapter.action_space["threshold"].shape == (1,) + assert float(adapter.action_space["threshold"].low[0]) == pytest.approx(0.0) + assert float(adapter.action_space["threshold"].high[0]) == pytest.approx(200.0) + + assert isinstance(adapter.action_space["discrete"], gymnasium.spaces.Discrete) + assert int(adapter.action_space["discrete"].n) == 3 + + assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable" + + def test_decode_action_rounds_dtype_int(self) -> None: + import numpy as np + + gym_env = _ContinuousStubBaseGym(self._action_space()) + adapter = GymnasiumAdapter(gym_env) + + decoded = adapter.decode_action({"threshold": np.array([47.34], dtype=np.float32), "discrete": 1}) + + assert decoded["threshold"] == 47, f"dtype=int must round; got {decoded['threshold']}" + assert isinstance(decoded["threshold"], int), "rounded action must be Python int, not float" + assert decoded["discrete"] == 20, "Discrete index 1 → 20" + + @pytest.mark.parametrize("raw,expected", [(47.34, 47), (47.36, 47), (47.5, 48), (47.4999, 47)]) + def test_cache_key_collapses_float_jitter_to_same_int(self, raw: float, expected: int) -> None: + """Adjacent float actions that round to the same int must collapse identically. + + This is the actuator-quantization invariant from the design doc: the + env (and trajectory cache key) must see the same int for any float in + the rounding interval, so the cache fills with semantic duplicates, + not float-noise duplicates. + """ + import numpy as np + + gym_env = _ContinuousStubBaseGym(self._action_space()) + adapter = GymnasiumAdapter(gym_env) + + decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0}) + assert decoded["threshold"] == expected + + @pytest.mark.parametrize("raw,expected", [(-5.0, 0), (250.0, 200), (0.0, 0), (200.0, 200)]) + def test_decode_action_clamps_to_range(self, raw: float, expected: int) -> None: + """Out-of-range continuous actions clamp to ``[low, high]``.""" + import numpy as np + + gym_env = _ContinuousStubBaseGym(self._action_space()) + adapter = GymnasiumAdapter(gym_env) + + decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0}) + assert decoded["threshold"] == expected + + def test_step_emits_rounded_int_to_underlying_env(self) -> None: + """The env (and downstream cache) must see the *rounded* int, not the raw float.""" + import numpy as np + + gym_env = _ContinuousStubBaseGym(self._action_space()) + adapter = GymnasiumAdapter(gym_env) + adapter.reset() + + adapter.step({"threshold": np.array([78.6], dtype=np.float32), "discrete": 2}) + + assert gym_env.received, "env.step was not called" + emitted = gym_env.received[-1] + assert emitted["threshold"] == 79, "env must receive rounded int, not raw float" + assert emitted["discrete"] == 30, "Discrete decode unaffected by continuous-action plumbing" + assert emitted["fixed"] == 99, "fixed params must be injected by the adapter" + + def test_dtype_float_preserves_continuous_value(self) -> None: + """``dtype="float"`` clamps but does NOT round; the policy's float reaches the env.""" + import numpy as np + + action_space: dict[str, Any] = { + "knob": ContinuousSpace(low=0.0, high=1.0, dtype="float"), + } + gym_env = _ContinuousStubBaseGym(action_space) + adapter = GymnasiumAdapter(gym_env) + + decoded = adapter.decode_action({"knob": np.array([0.7234], dtype=np.float32)}) + assert decoded["knob"] == pytest.approx(0.7234, rel=1e-4) + assert isinstance(decoded["knob"], float) + + +class _StructuredStubBaseGym(BaseGym): + """BaseGym stub that opts in/out of the structured (Dict) obs path. + + Mirrors ``CloudAIGymEnv``'s gate: ``structured_observation_descriptors`` + returns ``None`` for a metrics-only env (no observed name is a declared + env_param) and a per-leaf descriptor dict otherwise. ``encode_observation`` + produces the matching encoded leaves. + """ + + def __init__(self, descriptors: Optional[dict[str, ObsLeafDescriptor]], obs_dim: int = 2) -> None: + self._descriptors = descriptors + self._obs_dim = obs_dim + self._action_space: dict[str, Any] = {"param_a": [1, 2, 3]} + self.test_run = SimpleNamespace(step=0) + super().__init__() + + def define_action_space(self) -> dict[str, Any]: + return self._action_space + + def define_observation_space(self) -> list: + return [0.0] * self._obs_dim + + def structured_observation_descriptors(self) -> Optional[dict[str, ObsLeafDescriptor]]: + return self._descriptors + + def encode_observation(self, raw_values: list) -> dict[str, Any]: + out: dict[str, Any] = {} + assert self._descriptors is not None + for i, (name, desc) in enumerate(self._descriptors.items()): + raw = raw_values[i] if i < len(raw_values) else 0.0 + if desc.kind == "discrete": + out[name] = int(raw) + elif desc.dim == 2: + out[name] = [1.0 if raw <= 0.0 else 0.0, float(raw)] + else: + out[name] = [float(raw)] + return out + + def reset( + self, + seed: Optional[int] = None, + options: Optional[dict[str, Any]] = None, + ) -> tuple[list, dict[str, Any]]: + return [0.0] * self._obs_dim, {} + + def step(self, action: Any) -> tuple[list, float, bool, dict]: + return [0.0] * self._obs_dim, 0.5, False, {} + + def render(self, mode: str = "human") -> None: + return None + + def seed(self, seed: Optional[int] = None) -> None: + pass + + +class TestStructuredObsGate: + """D1: the structured (Dict) obs space is opt-in; metrics-only stays flat Box.""" + + def test_metrics_only_env_falls_back_to_box(self) -> None: + """An env that opts out (``structured_observation_descriptors`` -> None) keeps a flat Box. + + This is the blast-radius guard: non-DR workloads (BO/GA/MAB on plain + metrics) must NOT silently switch to a Dict obs layout. + """ + import gymnasium + + gym_env = _StructuredStubBaseGym(descriptors=None, obs_dim=3) + adapter = GymnasiumAdapter(gym_env) + + assert isinstance(adapter.observation_space, gymnasium.spaces.Box) + assert adapter.observation_space.shape == (3,) + + def test_env_param_env_uses_dict(self) -> None: + """An env with a declared env_param leaf exposes a ``spaces.Dict``.""" + import gymnasium + + descriptors = { + "bus_bw": ObsLeafDescriptor(kind="box", dim=1), + "drop_rate": ObsLeafDescriptor(kind="box", dim=2), + } + adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors)) + + assert isinstance(adapter.observation_space, gymnasium.spaces.Dict) + assert set(adapter.observation_space.spaces) == {"bus_bw", "drop_rate"} + assert adapter.observation_space.spaces["drop_rate"].shape == (2,) + + +class TestCategoricalLeafSubspace: + """D3: a categorical (discrete) descriptor maps to ``Discrete(k)`` and decodes to an int.""" + + def test_discrete_descriptor_becomes_discrete_space(self) -> None: + import gymnasium + + descriptors = { + "bus_bw": ObsLeafDescriptor(kind="box", dim=1), + "variant": ObsLeafDescriptor(kind="discrete", dim=1, n=3), + } + adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors)) + + variant_space = adapter.observation_space.spaces["variant"] + assert isinstance(variant_space, gymnasium.spaces.Discrete) + assert int(variant_space.n) == 3 + + def test_discrete_leaf_emitted_as_int_index(self) -> None: + """The emitted obs for a discrete leaf is an ``int`` the Discrete space accepts.""" + descriptors = {"variant": ObsLeafDescriptor(kind="discrete", dim=1, n=3)} + gym_env = _StructuredStubBaseGym(descriptors=descriptors, obs_dim=1) + adapter = GymnasiumAdapter(gym_env) + + obs, _ = adapter.reset() + assert isinstance(obs["variant"], int) + assert adapter.observation_space.contains(obs) diff --git a/uv.lock b/uv.lock index 7f8a19b64..5a8911dbd 100644 --- a/uv.lock +++ b/uv.lock @@ -282,6 +282,7 @@ dependencies = [ [package.optional-dependencies] dev = [ { name = "build" }, + { name = "gymnasium" }, { name = "import-linter" }, { name = "pandas-stubs" }, { name = "pre-commit" }, @@ -312,6 +313,9 @@ docs-cms = [ { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-mermaid" }, ] +rl = [ + { name = "gymnasium" }, +] [package.metadata] requires-dist = [ @@ -320,6 +324,8 @@ requires-dist = [ { name = "bokeh", specifier = "~=3.8" }, { name = "build", marker = "extra == 'dev'", specifier = "~=1.4" }, { name = "click", specifier = "~=8.3" }, + { name = "gymnasium", marker = "extra == 'dev'", specifier = "~=1.2" }, + { name = "gymnasium", marker = "extra == 'rl'", specifier = "~=1.2" }, { name = "huggingface-hub", specifier = "~=1.4" }, { name = "import-linter", marker = "extra == 'dev'", specifier = "~=2.10" }, { name = "jinja2", specifier = "~=3.1.6" }, @@ -350,7 +356,16 @@ requires-dist = [ { name = "vulture", marker = "extra == 'dev'", specifier = "==2.14" }, { name = "websockets", specifier = "~=16.0" }, ] -provides-extras = ["dev", "docs", "docs-cms"] +provides-extras = ["dev", "rl", "docs", "docs-cms"] + +[[package]] +name = "cloudpickle" +version = "3.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, +] [[package]] name = "colorama" @@ -674,6 +689,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "farama-notifications" +version = "0.0.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/91/14397890dde30adc4bee6462158933806207bc5dd10d7b4d09d5c33845cf/farama_notifications-0.0.6.tar.gz", hash = "sha256:b19acac4bb41d76e59e03394b5dd165f4761c86fa327f56307a35cbee3b60158", size = 2517, upload-time = "2026-04-24T08:43:57.603Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/f0/21f81892e4ed10f4ec3ef2e7cf8635fb76e7c0907c55d0da66be50094760/farama_notifications-0.0.6-py3-none-any.whl", hash = "sha256:f84839188efa1ce5bb361c2a84881b2dc2c0d0d7fb661ff00421820170930935", size = 2897, upload-time = "2026-04-24T08:43:56.785Z" }, +] + [[package]] name = "fastapi" version = "0.136.3" @@ -884,6 +908,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" }, ] +[[package]] +name = "gymnasium" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cloudpickle" }, + { name = "farama-notifications" }, + { name = "numpy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/ff/14b6880d703dfaca204490979d3254ccd280c99550798993319902873658/gymnasium-1.3.0.tar.gz", hash = "sha256:6939e86e835d6b71b6ba6bfd360487420876deafc79bfb7bacba83a7c446bcf3", size = 830646, upload-time = "2026-04-22T13:47:14.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/73/fda6a25f3beeb5e49d74330b44092b9e5a547395ccd478d1103ddcbff1fc/gymnasium-1.3.0-py3-none-any.whl", hash = "sha256:6b8c159a8540dcbcb221722d7efda24d78ebbcbc3bd2ea1c2611aa2a34471fc2", size = 953904, upload-time = "2026-04-22T13:47:12.13Z" }, +] + [[package]] name = "h11" version = "0.16.0" From 6c2452fce89aa468391b11cbb20bc481d6d80785 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 10:59:15 -0400 Subject: [PATCH 14/22] fix(gymnasium-adapter): accept continuous actions in step() type signature step() delegates to decode_action(dict[str, Any]) and exists precisely to round float/continuous policy actions to ints; widen its parameter type from dict[str, int] to dict[str, Any] to match. --- src/cloudai/configurator/gymnasium_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index 590abf11d..b827e053a 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -203,7 +203,7 @@ def reset( obs, info = self._env.reset(seed=seed, options=options) return self._as_obs_array(obs), info - def step(self, action: dict[str, int]) -> tuple[Any, float, bool, bool, dict[str, Any]]: + def step(self, action: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]: params = {**self._fixed_params, **self.decode_action(action)} return self._step_with_params(params) From 89a5b8f1122e8b243296aa9498fabd9ad2eca819 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 11:17:46 -0400 Subject: [PATCH 15/22] fix(gymnasium-adapter): validate structured-observation key parity; preserve traceback on DSE re-raise - _as_obs_array(): assert encoded keys match descriptors before coercion (reuses _assert_keys, same guard as decode_action/step_raw) and materialize output by descriptor keys to avoid KeyError on extra keys and silent partial observations on missing keys. - handlers.py: re-raise the captured hard-fail with its original traceback. Addresses CodeRabbit findings on #930. --- src/cloudai/cli/handlers.py | 2 +- src/cloudai/configurator/gymnasium_adapter.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py index fb6f9b7d8..5fb6796cf 100644 --- a/src/cloudai/cli/handlers.py +++ b/src/cloudai/cli/handlers.py @@ -178,7 +178,7 @@ def handle_dse_job(runner: Runner, args: argparse.Namespace) -> int: ) if run_error is not None: - raise run_error + raise run_error.with_traceback(run_error.__traceback__) logging.info("All jobs are complete.") return err diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index b827e053a..4ad53d23a 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -249,7 +249,8 @@ def _as_obs_array(self, obs: Any) -> Any: return self._np.asarray(obs, dtype=self._np.float32) env = cast(StructuredObservation, self._env) encoded = env.encode_observation(list(obs)) - return {name: self._leaf_to_value(descriptors[name], leaf) for name, leaf in encoded.items()} + self._assert_keys(encoded.keys(), set(descriptors), "encoded observation") + return {name: self._leaf_to_value(descriptors[name], encoded[name]) for name in descriptors} def _leaf_to_value(self, descriptor: Any, leaf: Any) -> Any: """Coerce one encoded leaf to its gymnasium subspace dtype.""" From b8684791431fefcfa15f8d7ed888b2268195f912 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 17:46:18 -0400 Subject: [PATCH 16/22] refactor(configurator): route gymnasium import through LazyImports singleton MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the bespoke _import_gymnasium() in-method seam with the canonical lazy.gymnasium / lazy.np properties; addresses the in-method-import review concern. No behavior change — gymnasium stays an optional [rl] extra. --- src/cloudai/configurator/gymnasium_adapter.py | 27 +++---------------- src/cloudai/util/lazy_imports.py | 15 +++++++++++ 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index 4ad53d23a..e1b3f9971 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -38,26 +38,7 @@ from cloudai._core.action_space import ContinuousSpace from cloudai.configurator.base_gym import BaseGym from cloudai.configurator.env_params import StructuredObservation - -_GYMNASIUM_INSTALL_HINT = "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'" - - -def _import_gymnasium(): - """ - Import gymnasium + numpy lazily; raise a clear, actionable error when absent. - - Kept as a single seam so cloudai installs without the ``[rl]`` extra - continue to work for non-RL agents, and tests can patch this helper to - simulate a missing install. - """ - try: - import gymnasium - import numpy as np - from gymnasium import spaces - - return gymnasium, spaces, np - except ImportError as exc: - raise ImportError(_GYMNASIUM_INSTALL_HINT) from exc +from cloudai.util.lazy_imports import lazy class GymnasiumAdapter: @@ -82,10 +63,8 @@ class GymnasiumAdapter: metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]} def __init__(self, env: BaseGym) -> None: - _, spaces, np = _import_gymnasium() - - self._np = np - self._spaces = spaces + np = self._np = lazy.np + spaces = self._spaces = lazy.gymnasium.spaces self._env = env raw_action_space = env.define_action_space() diff --git a/src/cloudai/util/lazy_imports.py b/src/cloudai/util/lazy_imports.py index 65ba62df3..1df8059be 100644 --- a/src/cloudai/util/lazy_imports.py +++ b/src/cloudai/util/lazy_imports.py @@ -27,6 +27,7 @@ import bokeh.palettes as bokeh_pallettes import bokeh.plotting as bokeh_plotting import bokeh.transform as bokeh_transform + import gymnasium import kubernetes as k8s import numpy as np import pandas as pd @@ -39,6 +40,7 @@ def __init__(self): self._np: ModuleType | None = None self._pd: ModuleType | None = None self._k8s: ModuleType | None = None + self._gymnasium: ModuleType | None = None self._bokeh: ModuleType | None = None self._bokeh_plotting: ModuleType | None = None self._bokeh_models: ModuleType | None = None @@ -75,6 +77,19 @@ def k8s(self) -> k8s: # type: ignore[no-any-return] return cast("k8s", self._k8s) + @property + def gymnasium(self) -> gymnasium: # type: ignore[no-any-return] + """Lazy import of gymnasium (optional ``cloudai[rl]`` extra).""" + if self._gymnasium is None: + try: + import gymnasium + except ImportError as exc: + raise ImportError( + "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'" + ) from exc + self._gymnasium = gymnasium + return cast("gymnasium", self._gymnasium) + @property def bokeh(self) -> bokeh: # type: ignore[no-any-return] """Lazy import of bokeh.""" From 4b59c499150b41bf5c2e38a88e3392925e3ab7c0 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 16 Jun 2026 18:33:15 -0400 Subject: [PATCH 17/22] fix(configurator): make lazy-gymnasium refactor land CI-green The lazy.gymnasium.spaces refactor gives the adapter precise gymnasium types instead of Any, which surfaced two latent issues the scoped pre-commit run missed: - pyright now sees Space[Any]/Dict in the adapter contract test, so concrete attribute access (.low/.high/.n/.spaces) is flagged. Narrow via local bindings + isinstance before access. - lazy_imports.py now has a 2026 commit in its history, so the ci_only copyright check requires the year range 2025-2026. --- src/cloudai/util/lazy_imports.py | 2 +- tests/test_gymnasium_adapter_contract.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/cloudai/util/lazy_imports.py b/src/cloudai/util/lazy_imports.py index 1df8059be..def790dfc 100644 --- a/src/cloudai/util/lazy_imports.py +++ b/src/cloudai/util/lazy_imports.py @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py index cbe48bcfb..ab202dff3 100644 --- a/tests/test_gymnasium_adapter_contract.py +++ b/tests/test_gymnasium_adapter_contract.py @@ -284,13 +284,15 @@ def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> Non gym_env = _ContinuousStubBaseGym(self._action_space()) adapter = GymnasiumAdapter(gym_env) - assert isinstance(adapter.action_space["threshold"], gymnasium.spaces.Box) - assert adapter.action_space["threshold"].shape == (1,) - assert float(adapter.action_space["threshold"].low[0]) == pytest.approx(0.0) - assert float(adapter.action_space["threshold"].high[0]) == pytest.approx(200.0) + threshold = adapter.action_space["threshold"] + assert isinstance(threshold, gymnasium.spaces.Box) + assert threshold.shape == (1,) + assert float(threshold.low[0]) == pytest.approx(0.0) + assert float(threshold.high[0]) == pytest.approx(200.0) - assert isinstance(adapter.action_space["discrete"], gymnasium.spaces.Discrete) - assert int(adapter.action_space["discrete"].n) == 3 + discrete = adapter.action_space["discrete"] + assert isinstance(discrete, gymnasium.spaces.Discrete) + assert int(discrete.n) == 3 assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable" @@ -464,7 +466,9 @@ def test_discrete_descriptor_becomes_discrete_space(self) -> None: } adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors)) - variant_space = adapter.observation_space.spaces["variant"] + observation_space = adapter.observation_space + assert isinstance(observation_space, gymnasium.spaces.Dict) + variant_space = observation_space.spaces["variant"] assert isinstance(variant_space, gymnasium.spaces.Discrete) assert int(variant_space.n) == 3 From c30b72357ad63e6245264360ec3ce53ed339ecc7 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Sun, 21 Jun 2026 10:09:30 -0400 Subject: [PATCH 18/22] feat(configurator): make GymnasiumAdapter inherit gymnasium.Env Inherit from gymnasium.Env (guarded import, falls back to object when the optional [rl] extra is absent) so ecosystem tooling that performs isinstance checks (e.g. Stable-Baselines3) accepts the adapter. - Use the TYPE_CHECKING import form so pyright sees a concrete base class while runtime keeps the optional-dependency fallback. - Drop ClassVar on metadata to match Env's attribute shape (noqa RUF012). - Rename the inner-env accessor unwrapped -> cloudai_env; gymnasium's Env.unwrapped (returns self) is the correct base-env semantics, and the old override returned a non-Env (BaseGym), which would mislead ecosystem code calling .unwrapped. --- src/cloudai/configurator/gymnasium_adapter.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index e1b3f9971..6a05afe5a 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -33,15 +33,23 @@ from __future__ import annotations -from typing import Any, ClassVar, Optional, cast +from typing import TYPE_CHECKING, Any, Optional, cast from cloudai._core.action_space import ContinuousSpace from cloudai.configurator.base_gym import BaseGym from cloudai.configurator.env_params import StructuredObservation from cloudai.util.lazy_imports import lazy +if TYPE_CHECKING: + from gymnasium import Env as _GymnasiumEnvBase +else: + try: # ``gymnasium`` is an optional [rl] dependency; fall back to ``object`` when absent. + from gymnasium import Env as _GymnasiumEnvBase + except ImportError: + _GymnasiumEnvBase = object -class GymnasiumAdapter: + +class GymnasiumAdapter(_GymnasiumEnvBase): """ Expose a CloudAI :class:`BaseGym` as a ``gymnasium.Env``-shaped object. @@ -60,7 +68,9 @@ class GymnasiumAdapter: instantiating the adapter without them raises ``ImportError``. """ - metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]} + # Overrides gymnasium.Env.metadata (a non-ClassVar instance attribute); matching that + # shape satisfies pyright's override check, so RUF012's ClassVar suggestion is silenced. + metadata: dict[str, Any] = {"render_modes": ["human"]} # noqa: RUF012 def __init__(self, env: BaseGym) -> None: np = self._np = lazy.np @@ -132,7 +142,8 @@ def _descriptor_to_space(self, descriptor: Any) -> Any: return self._spaces.Box(low=-self._np.inf, high=self._np.inf, shape=(descriptor.dim,), dtype=self._np.float32) @property - def unwrapped(self) -> BaseGym: + def cloudai_env(self) -> BaseGym: + """Return the wrapped CloudAI :class:`BaseGym` (gymnasium's ``unwrapped`` returns ``self``).""" return self._env def decode_action(self, action: dict[str, Any]) -> dict[str, Any]: From 3fe318ca7f8ea869dc7e34500891f00880caf36d Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Sun, 21 Jun 2026 11:22:55 -0400 Subject: [PATCH 19/22] test(gym): cast action_space to Dict in adapter contract test Inheriting gymnasium.Env widened the static type of action_space from the concrete spaces.Dict the adapter builds to the base spaces.Space, which has no __getitem__. Cast at the test call site to restore subspace indexing for pyright (runtime is unchanged; action_space is always a Dict). --- tests/test_gymnasium_adapter_contract.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py index ab202dff3..ad7a8f955 100644 --- a/tests/test_gymnasium_adapter_contract.py +++ b/tests/test_gymnasium_adapter_contract.py @@ -35,7 +35,7 @@ from __future__ import annotations from types import SimpleNamespace -from typing import Any, Optional +from typing import Any, Optional, cast import pytest @@ -284,17 +284,18 @@ def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> Non gym_env = _ContinuousStubBaseGym(self._action_space()) adapter = GymnasiumAdapter(gym_env) - threshold = adapter.action_space["threshold"] + action_space = cast(gymnasium.spaces.Dict, adapter.action_space) + threshold = action_space["threshold"] assert isinstance(threshold, gymnasium.spaces.Box) assert threshold.shape == (1,) assert float(threshold.low[0]) == pytest.approx(0.0) assert float(threshold.high[0]) == pytest.approx(200.0) - discrete = adapter.action_space["discrete"] + discrete = action_space["discrete"] assert isinstance(discrete, gymnasium.spaces.Discrete) assert int(discrete.n) == 3 - assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable" + assert "fixed" not in action_space, "single-element lists are fixed, not tunable" def test_decode_action_rounds_dtype_int(self) -> None: import numpy as np From 092573ef2d11b04b32013eeb45fe2f3c48808e23 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Sun, 21 Jun 2026 18:25:51 -0400 Subject: [PATCH 20/22] feat(configurator): add GymnasiumAdapter.encode_action as public inverse of decode_action decode_action had no public inverse, so consumers needing value->index encoding (e.g. RLlib warm-start / behavioral cloning) reached into the private _tunable_params dict. When ContinuousSpace support split that internal, those consumers broke with AttributeError. encode_action closes the contract: discrete values map to their candidate index, continuous values wrap into the clamped float32 Box array, so decode_action(encode_action(v)) == v for any native v. Adds round-trip contract tests pinning the invariant and rejection of non-candidate values / key mismatches. --- src/cloudai/configurator/gymnasium_adapter.py | 44 +++++++++++++++++++ tests/test_gymnasium_adapter_contract.py | 44 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index 6a05afe5a..73627334b 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -184,6 +184,50 @@ def _decode_continuous(self, name: str, raw: Any) -> Any: clamped = min(max(scalar, space.low), space.high) return round(clamped) if space.dtype == "int" else clamped + def encode_action(self, values: dict[str, Any]) -> dict[str, Any]: + """ + Map native parameter values back to raw gym actions; inverse of :meth:`decode_action`. + + Discrete values resolve to their index in the candidate list; continuous + values are wrapped in the 1-D ``float32`` array the ``Box`` space expects + (clamped to the declared range). Together with :meth:`decode_action` this + is an invertible pair on native values: + ``decode_action(encode_action(v)) == v`` for any ``v`` drawn from the + tunable params (continuous ``dtype="int"`` round-trips through the same + rounding ``decode_action`` applies). + + Consumers that need to express known native configs in the policy's + action space — e.g. warm-start / behavioral cloning from a recorded + trajectory — call this instead of reaching into the adapter internals. + + Raises: + ValueError: if ``values`` does not cover exactly the tunable params, + or carries a discrete value absent from its candidate list. + """ + expected = set(self._discrete_params) | set(self._continuous_params) + self._assert_keys(values.keys(), expected, "values") + encoded: dict[str, Any] = {} + for name, value in values.items(): + if name in self._discrete_params: + encoded[name] = self._encode_discrete(name, value) + else: + encoded[name] = self._encode_continuous(name, value) + return encoded + + def _encode_discrete(self, name: str, value: Any) -> int: + values = self._discrete_params[name] + try: + return values.index(value) + except ValueError: + raise ValueError( + f"Value {value!r} for '{name}' is not a candidate; expected one of {values}" + ) from None + + def _encode_continuous(self, name: str, value: Any) -> Any: + space = self._continuous_params[name] + clamped = min(max(float(value), space.low), space.high) + return self._np.asarray([clamped], dtype=self._np.float32) + def reset( self, *, diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py index ad7a8f955..74048181a 100644 --- a/tests/test_gymnasium_adapter_contract.py +++ b/tests/test_gymnasium_adapter_contract.py @@ -368,6 +368,50 @@ def test_dtype_float_preserves_continuous_value(self) -> None: assert isinstance(decoded["knob"], float) +class TestEncodeDecodeAreInverse: + """``encode_action`` is the inverse of ``decode_action`` on native values. + + Consumers (e.g. RLlib warm-start / behavioral cloning) must be able to + express a recorded native config in the policy's action space without + reaching into adapter internals. The public pair guarantees + ``decode_action(encode_action(v)) == v`` for any native ``v``. + """ + + def test_discrete_round_trip_decode_of_encode_is_identity(self) -> None: + adapter = GymnasiumAdapter(_StubBaseGym()) + native = {"param_a": 3, "param_b": 10} + assert adapter.decode_action(adapter.encode_action(native)) == native + + def test_discrete_encode_of_decode_is_identity(self) -> None: + adapter = GymnasiumAdapter(_StubBaseGym()) + action = {"param_a": 2, "param_b": 1} + assert adapter.encode_action(adapter.decode_action(action)) == action + + def test_encode_maps_value_to_candidate_index(self) -> None: + adapter = GymnasiumAdapter(_StubBaseGym()) + assert adapter.encode_action({"param_a": 2, "param_b": 20}) == {"param_a": 1, "param_b": 1} + + def test_encode_rejects_non_candidate_value(self) -> None: + adapter = GymnasiumAdapter(_StubBaseGym()) + with pytest.raises(ValueError, match="not a candidate"): + adapter.encode_action({"param_a": 7, "param_b": 10}) + + def test_encode_rejects_key_mismatch(self) -> None: + adapter = GymnasiumAdapter(_StubBaseGym()) + with pytest.raises(ValueError, match="keys mismatch"): + adapter.encode_action({"param_a": 1}) # missing param_b + + def test_continuous_int_round_trips_through_rounding(self) -> None: + action_space: dict[str, Any] = { + "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"), + "discrete": [10, 20, 30], + "fixed": [99], + } + adapter = GymnasiumAdapter(_ContinuousStubBaseGym(action_space)) + native = {"threshold": 47, "discrete": 20} + assert adapter.decode_action(adapter.encode_action(native)) == native + + class _StructuredStubBaseGym(BaseGym): """BaseGym stub that opts in/out of the structured (Dict) obs path. From 2e2d431c7bd4c8ce844cf235522053c950fddb4b Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Tue, 23 Jun 2026 21:12:00 -0400 Subject: [PATCH 21/22] style(configurator): ruff format gymnasium_adapter.py --- src/cloudai/configurator/gymnasium_adapter.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index 73627334b..6e0e1d934 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -219,9 +219,7 @@ def _encode_discrete(self, name: str, value: Any) -> int: try: return values.index(value) except ValueError: - raise ValueError( - f"Value {value!r} for '{name}' is not a candidate; expected one of {values}" - ) from None + raise ValueError(f"Value {value!r} for '{name}' is not a candidate; expected one of {values}") from None def _encode_continuous(self, name: str, value: Any) -> Any: space = self._continuous_params[name] From 4f479dd6564d15bcda0c7e254e59c2bf118be2f7 Mon Sep 17 00:00:00 2001 From: Rutayan Patro Date: Thu, 25 Jun 2026 18:13:10 -0400 Subject: [PATCH 22/22] refactor(gymnasium-adapter): defer continuous-action support with ContinuousSpace The GymnasiumAdapter's continuous-action path depends on ContinuousSpace, which ships separately. Until then nothing constructs a ContinuousSpace, so the continuous branches here are unreachable and the only effect of the import is an ImportError at module load. Drop the continuous import, _continuous_params, the Box action mapping, and decode/encode continuous handling so the adapter builds and ships standalone over discrete + structured-observation support. The continuous support rejoins when ContinuousSpace lands. --- src/cloudai/configurator/gymnasium_adapter.py | 65 ++----- tests/test_gymnasium_adapter_contract.py | 158 +----------------- 2 files changed, 11 insertions(+), 212 deletions(-) diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py index 6e0e1d934..a90a35f15 100644 --- a/src/cloudai/configurator/gymnasium_adapter.py +++ b/src/cloudai/configurator/gymnasium_adapter.py @@ -35,7 +35,6 @@ from typing import TYPE_CHECKING, Any, Optional, cast -from cloudai._core.action_space import ContinuousSpace from cloudai.configurator.base_gym import BaseGym from cloudai.configurator.env_params import StructuredObservation from cloudai.util.lazy_imports import lazy @@ -79,18 +78,13 @@ def __init__(self, env: BaseGym) -> None: raw_action_space = env.define_action_space() - # Three classes of params from cloudai's param_space: + # Two classes of params from cloudai's param_space: # list with len > 1 -> discrete tunable, mapped to gym.Discrete. # list with len == 1 -> fixed (collapsed); injected on every step so # agents never see them. - # ContinuousSpace -> continuous tunable, mapped to gym.Box(1,); - # ``dtype="int"`` quantizes at decode_action. self._discrete_params: dict[str, list] = { k: v for k, v in raw_action_space.items() if isinstance(v, list) and len(v) > 1 } - self._continuous_params: dict[str, ContinuousSpace] = { - k: v for k, v in raw_action_space.items() if isinstance(v, ContinuousSpace) - } self._fixed_params: dict[str, Any] = { k: v[0] for k, v in raw_action_space.items() if isinstance(v, list) and len(v) == 1 } @@ -98,13 +92,6 @@ def __init__(self, env: BaseGym) -> None: action_space_components: dict[str, Any] = { name: spaces.Discrete(len(values)) for name, values in self._discrete_params.items() } - for name, space in self._continuous_params.items(): - action_space_components[name] = spaces.Box( - low=np.array([space.low], dtype=np.float32), - high=np.array([space.high], dtype=np.float32), - shape=(1,), - dtype=np.float32, - ) self.action_space = spaces.Dict(action_space_components) # Observation space: prefer the env's structured (per-leaf) spec so the @@ -151,25 +138,14 @@ def decode_action(self, action: dict[str, Any]) -> dict[str, Any]: Map raw gym actions back to native parameter values. Discrete actions are list indices and resolve to the corresponding list - entry. Continuous actions arrive as a 1-D ``numpy`` array (the Box - shape) and are clamped to the declared range; for ``dtype="int"`` the - scalar is rounded — this is the **only** place quantization happens, so - the cache key collapses float jitter (47.34 vs 47.36) onto the same - emitted int and downstream code stays type-agnostic. + entry. Raises: ValueError: if ``action`` is missing tunable params, contains unknown keys, or carries an out-of-range discrete index. """ - expected = set(self._discrete_params) | set(self._continuous_params) - self._assert_keys(action.keys(), expected, "action") - decoded: dict[str, Any] = {} - for name, raw in action.items(): - if name in self._discrete_params: - decoded[name] = self._decode_discrete(name, raw) - else: - decoded[name] = self._decode_continuous(name, raw) - return decoded + self._assert_keys(action.keys(), set(self._discrete_params), "action") + return {name: self._decode_discrete(name, raw) for name, raw in action.items()} def _decode_discrete(self, name: str, raw: Any) -> Any: values = self._discrete_params[name] @@ -178,23 +154,14 @@ def _decode_discrete(self, name: str, raw: Any) -> Any: raise ValueError(f"Action index out of range for '{name}': {idx} (expected 0..{len(values) - 1})") return values[idx] - def _decode_continuous(self, name: str, raw: Any) -> Any: - space = self._continuous_params[name] - scalar = float(self._np.asarray(raw, dtype=self._np.float32).reshape(-1)[0]) - clamped = min(max(scalar, space.low), space.high) - return round(clamped) if space.dtype == "int" else clamped - def encode_action(self, values: dict[str, Any]) -> dict[str, Any]: """ Map native parameter values back to raw gym actions; inverse of :meth:`decode_action`. - Discrete values resolve to their index in the candidate list; continuous - values are wrapped in the 1-D ``float32`` array the ``Box`` space expects - (clamped to the declared range). Together with :meth:`decode_action` this - is an invertible pair on native values: + Discrete values resolve to their index in the candidate list. Together + with :meth:`decode_action` this is an invertible pair on native values: ``decode_action(encode_action(v)) == v`` for any ``v`` drawn from the - tunable params (continuous ``dtype="int"`` round-trips through the same - rounding ``decode_action`` applies). + tunable params. Consumers that need to express known native configs in the policy's action space — e.g. warm-start / behavioral cloning from a recorded @@ -204,15 +171,8 @@ def encode_action(self, values: dict[str, Any]) -> dict[str, Any]: ValueError: if ``values`` does not cover exactly the tunable params, or carries a discrete value absent from its candidate list. """ - expected = set(self._discrete_params) | set(self._continuous_params) - self._assert_keys(values.keys(), expected, "values") - encoded: dict[str, Any] = {} - for name, value in values.items(): - if name in self._discrete_params: - encoded[name] = self._encode_discrete(name, value) - else: - encoded[name] = self._encode_continuous(name, value) - return encoded + self._assert_keys(values.keys(), set(self._discrete_params), "values") + return {name: self._encode_discrete(name, value) for name, value in values.items()} def _encode_discrete(self, name: str, value: Any) -> int: values = self._discrete_params[name] @@ -221,11 +181,6 @@ def _encode_discrete(self, name: str, value: Any) -> int: except ValueError: raise ValueError(f"Value {value!r} for '{name}' is not a candidate; expected one of {values}") from None - def _encode_continuous(self, name: str, value: Any) -> Any: - space = self._continuous_params[name] - clamped = min(max(float(value), space.low), space.high) - return self._np.asarray([clamped], dtype=self._np.float32) - def reset( self, *, @@ -247,7 +202,7 @@ def step_raw(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict ValueError: if ``params`` does not cover exactly the tunable + fixed param keys. """ - expected = set(self._discrete_params) | set(self._continuous_params) | set(self._fixed_params) + expected = set(self._discrete_params) | set(self._fixed_params) self._assert_keys(params.keys(), expected, "raw params") return self._step_with_params(params) diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py index 74048181a..a5683a7cc 100644 --- a/tests/test_gymnasium_adapter_contract.py +++ b/tests/test_gymnasium_adapter_contract.py @@ -35,11 +35,10 @@ from __future__ import annotations from types import SimpleNamespace -from typing import Any, Optional, cast +from typing import Any, Optional import pytest -from cloudai._core.action_space import ContinuousSpace from cloudai.configurator.base_gym import BaseGym from cloudai.configurator.env_params import ObsLeafDescriptor @@ -223,151 +222,6 @@ def test_step_propagates_context_into_observation(self) -> None: ) -class _ContinuousStubBaseGym(BaseGym): - """BaseGym stub that surfaces a ContinuousSpace in its action_space. - - Records the params received by ``step`` so the test can assert what the - adapter actually emits to the env (post-rounding / clamping). - """ - - def __init__(self, action_space: dict[str, Any]) -> None: - self._action_space = action_space - self.test_run = SimpleNamespace(step=0) - self.received: list[dict[str, Any]] = [] - super().__init__() - - def define_action_space(self) -> dict[str, Any]: - return self._action_space - - def define_observation_space(self) -> list: - return [0.0] - - def reset( - self, - seed: Optional[int] = None, - options: Optional[dict[str, Any]] = None, - ) -> tuple[list, dict[str, Any]]: - return [0.0], {} - - def step(self, action: Any) -> tuple[list, float, bool, dict]: - self.received.append(dict(action)) - return [1.0], 0.5, False, {} - - def render(self, mode: str = "human") -> None: - return None - - def seed(self, seed: Optional[int] = None) -> None: - pass - - -class TestAdapterDispatchesContinuousSpace: - """``ContinuousSpace`` in cloudai's param_space → ``gym.spaces.Box`` in the adapter. - - The adapter is also responsible for **actuator quantization**: when the - spec is ``dtype="int"``, the float coming out of the policy is rounded at - decode_action, so the env (and trajectory cache) sees only ints. This - collapses float jitter (47.34 vs 47.36) onto the same emitted int and - keeps the cache hit-rate honest. - """ - - @staticmethod - def _action_space() -> dict[str, Any]: - return { - "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"), - "discrete": [10, 20, 30], - "fixed": [99], - } - - def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> None: - import gymnasium - - gym_env = _ContinuousStubBaseGym(self._action_space()) - adapter = GymnasiumAdapter(gym_env) - - action_space = cast(gymnasium.spaces.Dict, adapter.action_space) - threshold = action_space["threshold"] - assert isinstance(threshold, gymnasium.spaces.Box) - assert threshold.shape == (1,) - assert float(threshold.low[0]) == pytest.approx(0.0) - assert float(threshold.high[0]) == pytest.approx(200.0) - - discrete = action_space["discrete"] - assert isinstance(discrete, gymnasium.spaces.Discrete) - assert int(discrete.n) == 3 - - assert "fixed" not in action_space, "single-element lists are fixed, not tunable" - - def test_decode_action_rounds_dtype_int(self) -> None: - import numpy as np - - gym_env = _ContinuousStubBaseGym(self._action_space()) - adapter = GymnasiumAdapter(gym_env) - - decoded = adapter.decode_action({"threshold": np.array([47.34], dtype=np.float32), "discrete": 1}) - - assert decoded["threshold"] == 47, f"dtype=int must round; got {decoded['threshold']}" - assert isinstance(decoded["threshold"], int), "rounded action must be Python int, not float" - assert decoded["discrete"] == 20, "Discrete index 1 → 20" - - @pytest.mark.parametrize("raw,expected", [(47.34, 47), (47.36, 47), (47.5, 48), (47.4999, 47)]) - def test_cache_key_collapses_float_jitter_to_same_int(self, raw: float, expected: int) -> None: - """Adjacent float actions that round to the same int must collapse identically. - - This is the actuator-quantization invariant from the design doc: the - env (and trajectory cache key) must see the same int for any float in - the rounding interval, so the cache fills with semantic duplicates, - not float-noise duplicates. - """ - import numpy as np - - gym_env = _ContinuousStubBaseGym(self._action_space()) - adapter = GymnasiumAdapter(gym_env) - - decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0}) - assert decoded["threshold"] == expected - - @pytest.mark.parametrize("raw,expected", [(-5.0, 0), (250.0, 200), (0.0, 0), (200.0, 200)]) - def test_decode_action_clamps_to_range(self, raw: float, expected: int) -> None: - """Out-of-range continuous actions clamp to ``[low, high]``.""" - import numpy as np - - gym_env = _ContinuousStubBaseGym(self._action_space()) - adapter = GymnasiumAdapter(gym_env) - - decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0}) - assert decoded["threshold"] == expected - - def test_step_emits_rounded_int_to_underlying_env(self) -> None: - """The env (and downstream cache) must see the *rounded* int, not the raw float.""" - import numpy as np - - gym_env = _ContinuousStubBaseGym(self._action_space()) - adapter = GymnasiumAdapter(gym_env) - adapter.reset() - - adapter.step({"threshold": np.array([78.6], dtype=np.float32), "discrete": 2}) - - assert gym_env.received, "env.step was not called" - emitted = gym_env.received[-1] - assert emitted["threshold"] == 79, "env must receive rounded int, not raw float" - assert emitted["discrete"] == 30, "Discrete decode unaffected by continuous-action plumbing" - assert emitted["fixed"] == 99, "fixed params must be injected by the adapter" - - def test_dtype_float_preserves_continuous_value(self) -> None: - """``dtype="float"`` clamps but does NOT round; the policy's float reaches the env.""" - import numpy as np - - action_space: dict[str, Any] = { - "knob": ContinuousSpace(low=0.0, high=1.0, dtype="float"), - } - gym_env = _ContinuousStubBaseGym(action_space) - adapter = GymnasiumAdapter(gym_env) - - decoded = adapter.decode_action({"knob": np.array([0.7234], dtype=np.float32)}) - assert decoded["knob"] == pytest.approx(0.7234, rel=1e-4) - assert isinstance(decoded["knob"], float) - - class TestEncodeDecodeAreInverse: """``encode_action`` is the inverse of ``decode_action`` on native values. @@ -401,16 +255,6 @@ def test_encode_rejects_key_mismatch(self) -> None: with pytest.raises(ValueError, match="keys mismatch"): adapter.encode_action({"param_a": 1}) # missing param_b - def test_continuous_int_round_trips_through_rounding(self) -> None: - action_space: dict[str, Any] = { - "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"), - "discrete": [10, 20, 30], - "fixed": [99], - } - adapter = GymnasiumAdapter(_ContinuousStubBaseGym(action_space)) - native = {"threshold": 47, "discrete": 20} - assert adapter.decode_action(adapter.encode_action(native)) == native - class _StructuredStubBaseGym(BaseGym): """BaseGym stub that opts in/out of the structured (Dict) obs path.