From 95c192b1f13bb1ea3e784b25d6351f0f8d49657e Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Sat, 20 Jun 2026 17:56:00 -0400
Subject: [PATCH 01/22] feat(configurator): env_params as first-class
 trial-identity field

Make env_params a first-class part of CloudAIGymEnv trial identity so the
trajectory cache keys on (action, env_params) rather than action alone,
fixing the domain-randomization correctness bug where the same action under
a different env_params sample returned a stale reward.

- Cache key now includes env_params; cache-key tests pin the contract
  (formerly the TDD-red specs of #900, folded in here).
- Keep env.csv and trajectory.csv 1:1 step-aligned: a single TrajectoryEntry
  sinks both files coherently, including on constraint failure.
- Reject env_params on non-DSE jobs; reject non-finite / negative weights.
- Add cache-hit + declared-env_params integration coverage.

Folds the test-only PR #900 (cache-key TDD) into this PR so the stack has no
permanently-red standalone PR.
---
 src/cloudai/_core/test_scenario.py      |   1 +
 src/cloudai/cli/handlers.py             |  27 +-
 src/cloudai/configurator/cloudai_gym.py |  66 ++++-
 src/cloudai/configurator/env_params.py  | 169 +++++++++++++
 src/cloudai/models/workload.py          |  10 +
 tests/test_cloudaigym.py                | 322 ++++++++++++++++++++++++
 tests/test_env_params.py                | 114 +++++++++
 tests/test_handlers.py                  |  50 ++++
 8 files changed, 755 insertions(+), 4 deletions(-)
 create mode 100644 src/cloudai/configurator/env_params.py
 create mode 100644 tests/test_env_params.py

diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py
index 15efe8b2e..81f2723b0 100644
--- a/src/cloudai/_core/test_scenario.py
+++ b/src/cloudai/_core/test_scenario.py
@@ -97,6 +97,7 @@ class TestRun:
     reports: Set[Type[ReportGenerationStrategy]] = field(default_factory=set)
     extra_srun_args: str | None = None
     num_nodes_explicit: bool = False
+    current_env_params: dict[str, Any] = field(default_factory=dict)
 
     def __hash__(self) -> int:
         return hash(self.name + self.test.name + str(self.iterations) + str(self.current_iteration))
diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index 3fd099dc0..87e4ccfd5 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -39,6 +39,7 @@
     System,
     TestParser,
     TestScenario,
+    TestScenarioParsingError,
 )
 from cloudai.models.scenario import ReportConfig
 from cloudai.models.workload import TestDefinition
@@ -297,12 +298,35 @@ def _check_installation(
     return result
 
 
+def validate_dse_env_params(test_scenario: TestScenario) -> None:
+    """
+    Reject prepped configs that declare env_params on a non-DSE test run.
+
+    env_params are sampled only during DSE (by CloudAIGymEnv); on a non-DSE run they would be
+    silently ignored. is_dse_job is a property of the fully prepped config, so this is validated
+    here rather than at parse time.
+    """
+    offenders = [tr.name for tr in test_scenario.test_runs if tr.test.env_params and not tr.is_dse_job]
+    if offenders:
+        raise TestScenarioParsingError(
+            f"Tests {offenders} declare env_params but are not DSE jobs. env_params are sampled only during "
+            "DSE (by CloudAIGymEnv); add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes) "
+            "or remove env_params."
+        )
+
+
 def handle_dry_run_and_run(args: argparse.Namespace) -> int:
     setup_result = _setup_system_and_scenario(args)
     if setup_result is None:
         return 1
     system, test_scenario, tests = setup_result
 
+    try:
+        validate_dse_env_params(test_scenario)
+    except TestScenarioParsingError as e:
+        logging.error(str(e))
+        return 1
+
     if not _handle_single_sbatch(args, system):
         return 1
 
@@ -491,7 +515,8 @@ def verify_test_scenarios(
             tests = Parser.parse_tests(test_tomls, system)
             hook_tests = Parser.parse_tests(hook_test_tomls, system)
             hooks = Parser.parse_hooks(hook_tomls, system, {t.name: t for t in hook_tests})
-            Parser.parse_test_scenario(scenario_file, system, {t.name: t for t in tests}, hooks)
+            scenario = Parser.parse_test_scenario(scenario_file, system, {t.name: t for t in tests}, hooks)
+            validate_dse_env_params(scenario)
         except Exception:
             nfailed += 1
 
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index 258a47f3c..80c5210f1 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -19,13 +19,14 @@
 import dataclasses
 import logging
 from pathlib import Path
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 from cloudai.core import METRIC_ERROR, BaseRunner, Registry, TestRun
 from cloudai.util.lazy_imports import lazy
 
 from .base_agent import RewardOverrides
 from .base_gym import BaseGym
+from .env_params import CsvSink, EnvParamsObserver, EnvParamsSink, StepObserver
 
 
 @dataclasses.dataclass(frozen=True)
@@ -36,6 +37,7 @@ class TrajectoryEntry:
     action: dict[str, Any]
     reward: float
     observation: list
+    env_params: dict[str, Any] = dataclasses.field(default_factory=dict)
 
 
 class CloudAIGymEnv(BaseGym):
@@ -61,8 +63,28 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid
         self.max_steps = test_run.test.agent_steps
         self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function)
         self.trajectory: dict[int, list[TrajectoryEntry]] = {}
+        self._env_sink: EnvParamsSink | None = None
+        self.observers: List[StepObserver] = self._build_observers()
         super().__init__()
 
+    def _build_observers(self) -> List[StepObserver]:
+        """
+        Construct the per-step observers implied by the TestDefinition.
+
+        Workloads opt in to env_params via a TOML ``[env_params.<name>]`` block;
+        an empty mapping yields no observers and zero overhead.
+        """
+        observers: List[StepObserver] = []
+        if self.test_run.test.env_params:
+            seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0))
+            self._env_sink = CsvSink(self._env_csv_path())
+            observers.append(EnvParamsObserver(self.test_run.test.env_params, seed))
+        return observers
+
+    def _env_csv_path(self) -> Path:
+        """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them."""
+        return self.trajectory_file_path.parent / "env.csv"
+
     def define_action_space(self) -> Dict[str, list[Any]]:
         return self.test_run.param_space
 
@@ -121,6 +143,9 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
         self.test_run.increment_step()
         self.test_run = self.test_run.apply_params_set(action)
 
+        for observer in self.observers:
+            observer.before_step(self.test_run)
+
         cached_result = self.get_cached_trajectory_result(action)
         if cached_result is not None:
             logging.info(
@@ -134,8 +159,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
                     action=action,
                     reward=cached_result.reward,
                     observation=cached_result.observation,
+                    env_params=dict(self.test_run.current_env_params),
                 )
             )
+            for observer in self.observers:
+                observer.after_step(self.test_run, cached_result.observation, cached_result.reward)
             return cached_result.observation, cached_result.reward, False, {}
 
         if not self.test_run.test.constraint_check(self.test_run, self.runner.system):
@@ -162,6 +190,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
             self.test_run.step = new_tr.step
             self.test_run.output_path = new_tr.output_path
 
+        # Rebuilding/replacing test_run above can drop the per-trial env_params sample;
+        # restore it so the trajectory entry, the cache key, and env.csv all record the
+        # params the trial actually ran with.
+        self.test_run.current_env_params = new_tr.current_env_params
+
         observation = self.get_observation(action)
         reward = self.compute_reward(observation)
 
@@ -171,9 +204,13 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
                 action=action,
                 reward=reward,
                 observation=observation,
+                env_params=dict(self.test_run.current_env_params),
             )
         )
 
+        for observer in self.observers:
+            observer.after_step(self.test_run, observation, reward)
+
         return observation, reward, False, {}
 
     def render(self, mode: str = "human"):
@@ -230,7 +267,14 @@ def get_observation(self, action: Any) -> list:
         return observation
 
     def write_trajectory(self, entry: TrajectoryEntry):
-        """Append the trajectory to the CSV file and to the local attribute."""
+        """
+        Append the entry to the in-memory cache and trajectory.csv (plus env.csv when declared).
+
+        ``trajectory.csv`` and the ``env.csv`` projection are sunk from the same
+        ``TrajectoryEntry`` here, so a trial that never produces an entry (e.g. a
+        constraint failure returns before this call) lands in neither file and the
+        two stay 1:1 step-aligned.
+        """
         self.current_trajectory.append(entry)
 
         file_exists = self.trajectory_file_path.exists()
@@ -243,6 +287,9 @@ def write_trajectory(self, entry: TrajectoryEntry):
                 writer.writerow(["step", "action", "reward", "observation"])
             writer.writerow([entry.step, entry.action, entry.reward, entry.observation])
 
+        if self._env_sink is not None:
+            self._env_sink.write(entry.step, entry.env_params)
+
     @property
     def trajectory_file_path(self) -> Path:
         return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}" / "trajectory.csv"
@@ -252,8 +299,21 @@ def current_trajectory(self) -> list[TrajectoryEntry]:
         return self.trajectory.setdefault(self.test_run.current_iteration, [])
 
     def get_cached_trajectory_result(self, action: Any) -> TrajectoryEntry | None:
+        """
+        Return a cached entry only when the full trial identity matches.
+
+        Trial identity is ``(action, env_params)``: env-randomized parameters
+        change the workload's behaviour, so a trial repeating the same action
+        under a different ``env_params`` sample must miss and re-run. Empty
+        env_params on both sides is the back-compat path for workloads that
+        do not declare any ``[env_params.*]`` block.
+        """
+        current_env_params = getattr(self.test_run, "current_env_params", {}) or {}
         for entry in self.current_trajectory:
-            if self._values_match_exact(entry.action, action):
+            if not self._values_match_exact(entry.action, action):
+                continue
+            entry_env = getattr(entry, "env_params", {}) or {}
+            if self._values_match_exact(entry_env, current_env_params):
                 return entry
 
         return None
diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
new file mode 100644
index 000000000..44bf57524
--- /dev/null
+++ b/src/cloudai/configurator/env_params.py
@@ -0,0 +1,169 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Domain-randomization primitives for CloudAI DSE.
+
+An env-randomized parameter is a workload knob whose value the environment
+samples per trial (categorical, optional weights). It is sibling to
+``cmd_args`` on a ``TestDefinition`` and does not enter the agent's action
+space; the policy learns a robust mapping under that variation.
+
+This module owns the data schema (``EnvParamSpec``), the deterministic
+sampler (``EnvParamsSampler``), the persistence interface
+(``EnvParamsSink`` + ``CsvSink``) and the per-step observer
+(``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the
+artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv``
+regardless of agent (PPO, BO, GA, MAB) or workload.
+"""
+
+from __future__ import annotations
+
+import csv
+import math
+import random
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing_extensions import Self
+
+
+class EnvParamSpec(BaseModel):
+    """Specification of one env-randomized parameter (categorical)."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    values: List[Any] = Field(
+        min_length=2,
+        description="Candidate values; a single-valued parameter is just a fixed cmd_args entry.",
+    )
+    weights: Optional[List[float]] = Field(
+        default=None,
+        description="Optional probability weights aligned with values; uniform if omitted.",
+    )
+
+    @model_validator(mode="after")
+    def _validate_weights(self) -> Self:
+        if self.weights is None:
+            return self
+        if len(self.weights) != len(self.values):
+            raise ValueError(
+                f"env_params weights length {len(self.weights)} does not match values length {len(self.values)}"
+            )
+        for w in self.weights:
+            if not math.isfinite(w) or w < 0:
+                raise ValueError(f"env_params weights must be finite and non-negative; got {w}")
+        if sum(self.weights) <= 0:
+            raise ValueError("env_params weights must have a positive sum")
+        return self
+
+
+class EnvParamsSampler:
+    """
+    Per-trial categorical sampler.
+
+    Determinism contract: ``sample(t)`` returns the same dict on every call
+    (across processes) for the same ``(seed, env_params, t)``.
+
+    Independence contract: each parameter uses an RNG seeded by
+    ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated
+    parameter does not perturb existing parameters' draw sequences.
+    """
+
+    def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None:
+        self._env_params = env_params
+        self._seed = seed
+
+    def sample(self, trial: int) -> Dict[str, Any]:
+        out: Dict[str, Any] = {}
+        for name, spec in self._env_params.items():
+            rng = random.Random(f"{self._seed}:{name}:{trial}")
+            if spec.weights is not None:
+                out[name] = rng.choices(spec.values, weights=spec.weights, k=1)[0]
+            else:
+                out[name] = rng.choice(spec.values)
+        return out
+
+
+@runtime_checkable
+class EnvParamsSink(Protocol):
+    """Persist one trial's env_params sample; empty samples must be no-ops."""
+
+    def write(self, step: int, sample: Dict[str, Any]) -> None: ...
+
+
+class CsvSink:
+    """
+    Append per-trial env_params samples to a step-aligned CSV.
+
+    The CSV mirrors how ``trajectory.csv`` serialises its ``action`` column
+    (one row per env.step(), sample dict stringified in a single cell) so the
+    two files align 1:1 on ``step`` and a plain ``merge`` joins them.
+    """
+
+    def __init__(self, path: Path) -> None:
+        self._path = path
+
+    def write(self, step: int, sample: Dict[str, Any]) -> None:
+        if step < 1:
+            raise ValueError(f"step must be a positive trial index (cloudai DSE is 1-based); got {step}")
+        if not sample:
+            return
+        new_file = not self._path.exists()
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        with self._path.open("a", newline="") as f:
+            writer = csv.writer(f)
+            if new_file:
+                writer.writerow(("step", "env"))
+            writer.writerow([step, sample])
+
+
+@runtime_checkable
+class StepObserver(Protocol):
+    """
+    Hook fired by ``CloudAIGymEnv.step()`` around each trial.
+
+    ``before_step`` runs before the cache lookup and before any workload
+    execution. ``after_step`` runs after the trajectory row is written.
+    """
+
+    def before_step(self, test_run: Any) -> None: ...
+
+    def after_step(self, test_run: Any, observation: list, reward: float) -> None: ...
+
+
+class EnvParamsObserver:
+    """
+    Sample env_params per step and stash them for the cache and the workload.
+
+    Pre-step: samples ``test_run.test.env_params`` for ``test_run.step`` and
+    stashes the result on ``test_run.current_env_params`` so the cache key and
+    the workload's substitution both see it. Persistence is owned by
+    ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the
+    ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that
+    writes no trajectory row writes no env.csv row either, keeping the two files
+    1:1 step-aligned. Post-step: no-op.
+    """
+
+    def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None:
+        self._sampler = EnvParamsSampler(env_params, seed=seed)
+
+    def before_step(self, test_run: Any) -> None:
+        test_run.current_env_params = self._sampler.sample(test_run.step)
+
+    def after_step(self, test_run: Any, observation: list, reward: float) -> None:
+        del test_run, observation, reward  # persistence handled by CloudAIGymEnv.write_trajectory
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index 8b981d8ea..99637d4e8 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -23,6 +23,8 @@
 
 from cloudai.core import GitRepo, Installable, JobStatusResult, PythonExecutable, Registry, System, TestRun
 
+from ..configurator.env_params import EnvParamSpec
+
 
 class CmdArgs(BaseModel):
     """Test command arguments."""
@@ -110,6 +112,14 @@ class TestDefinition(BaseModel, ABC):
     agent_metrics: list[str] = Field(default=["default"])
     agent_reward_function: str = "inverse"
     agent_config: dict[str, Any] | None = Field(default=None, description="Agent configuration.")
+    env_params: dict[str, EnvParamSpec] = Field(
+        default_factory=dict,
+        description=(
+            "Domain-randomized parameters sampled by the env per trial. Sibling to "
+            "cmd_args; not part of the agent's action space. CloudAIGymEnv samples, "
+            "persists to env.csv, and includes them in the trajectory cache key."
+        ),
+    )
 
     @property
     def cmd_args_dict(self) -> Dict[str, Union[str, List[str]]]:
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
index 4cd7c6fb8..e687d4373 100644
--- a/tests/test_cloudaigym.py
+++ b/tests/test_cloudaigym.py
@@ -21,6 +21,7 @@
 import pytest
 
 from cloudai.configurator import CloudAIGymEnv, GridSearchAgent, TrajectoryEntry
+from cloudai.configurator.env_params import EnvParamSpec
 from cloudai.core import BaseRunner, RewardOverrides, Runner, TestRun, TestScenario
 from cloudai.systems.slurm import SlurmSystem
 from cloudai.util import flatten_dict
@@ -441,3 +442,324 @@ def test_cached_step_appends_trajectory_row(nemorun: NeMoRunTestDefinition, tmp_
     contents = csv_path.read_text().strip().splitlines()
     assert contents[0] == "step,action,reward,observation"
     assert contents[-1].startswith("5,")
+
+
+def _seed_cached_entry_with_env_params(
+    env: CloudAIGymEnv, action: dict[str, object], env_params: dict[str, object]
+) -> None:
+    """Seed env.trajectory with one entry carrying the given env_params."""
+    entry = TrajectoryEntry(step=1, action=action, reward=0.5, observation=[100.0], env_params=env_params)
+    env.test_run.current_iteration = 0
+    env.trajectory = {0: [entry]}
+
+
+def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) -> None:
+    """Cache MUST miss when env_params differ, even if action is identical.
+
+    Without this property the agent receives stale rewards on every cache hit
+    under domain randomization. PPO/DQN/BO all silently train on labels that
+    do not correspond to the env they were nominally generated under.
+    """
+    runner = MagicMock()
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = MagicMock(test_runs=[])
+    runner.jobs = {}
+    runner.testrun_to_job_map = {}
+
+    env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides())
+    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001})
+
+    env.test_run.current_env_params = {"drop_rate": 0.01}
+
+    assert env.get_cached_trajectory_result({"x": 10}) is None, (
+        "Cache must include env_params in its key. The current implementation "
+        "keys on action alone, so trials repeating the same action under a "
+        "different env_params sample receive a stale cached reward. See "
+        "env-params-cloudai-corpus-plan.md."
+    )
+
+
+def test_cache_hit_when_action_and_env_params_match(base_tr: TestRun, tmp_path: Path) -> None:
+    """Same action AND same env_params must still HIT the cache."""
+    runner = MagicMock()
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = MagicMock(test_runs=[])
+    runner.jobs = {}
+    runner.testrun_to_job_map = {}
+
+    env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides())
+    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001})
+
+    env.test_run.current_env_params = {"drop_rate": 0.001}
+
+    result = env.get_cached_trajectory_result({"x": 10})
+    assert result is not None
+    assert result.step == 1
+
+
+def test_cache_hit_when_neither_has_env_params(base_tr: TestRun, tmp_path: Path) -> None:
+    """Workloads without env_params behave exactly as today (back-compat)."""
+    runner = MagicMock()
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = MagicMock(test_runs=[])
+    runner.jobs = {}
+    runner.testrun_to_job_map = {}
+
+    env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides())
+    env.test_run.current_iteration = 0
+    env.trajectory = {0: [TrajectoryEntry(step=1, action={"x": 10}, reward=0.5, observation=[100.0])]}
+    # Note: neither the cached entry nor test_run carries env_params -> existing behavior.
+
+    result = env.get_cached_trajectory_result({"x": 10})
+    assert result is not None
+    assert result.step == 1
+
+
+def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+    """Integration: env.step() with same action but different env_params re-runs the workload.
+
+    Counterpart to test_cache_miss_when_env_params_differ but exercising the
+    full step() flow: increment_step -> apply_params_set -> cache lookup ->
+    runner.run() -> write_trajectory.
+    """
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    tdef.agent_metrics = ["default"]
+    test_run = TestRun(
+        name="dr_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "dr_tr" / "0",
+        reports={NeMoRunReportGenerationStrategy},
+    )
+    test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
+
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = test_scenario
+    runner.jobs = {}
+    runner.testrun_to_job_map = {}
+    runner.shutting_down = False
+    runner.get_job_output_path.return_value = test_run.output_path
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+    action = {"trainer.max_steps": 1000}
+    fake_obs = iter([[100.0], [50.0]])
+
+    with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)):
+        env.test_run.step = 0
+        env.test_run.current_env_params = {"drop_rate": 0.001}
+        obs1, _r1, *_ = env.step(action)
+
+        env.test_run.current_env_params = {"drop_rate": 0.01}
+        obs2, _r2, *_ = env.step(action)
+
+    assert runner.run.call_count == 2, (
+        "Different env_params between two env.step() calls with the same action "
+        "must trigger a workload re-run; the cache lookup must miss."
+    )
+    assert obs1 != obs2, "fresh workload run should produce a fresh observation"
+
+
+def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+    """env.csv must have exactly one row per env.step() call, with steps aligned 1:1 to trajectory.csv.
+
+    This pins the corpus-friendly contract: a downstream consumer can
+    ``pd.merge(traj, env, on="step")`` without losing rows on either side,
+    independent of whether the trial hit the trajectory cache.
+    """
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    tdef.agent_metrics = ["default"]
+    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    tdef.agent_config = {"random_seed": 42}
+
+    test_run = TestRun(
+        name="dr_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "dr_tr" / "0",
+        reports={NeMoRunReportGenerationStrategy},
+    )
+    test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
+
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = test_scenario
+    runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False
+    runner.get_job_output_path.return_value = test_run.output_path
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+    action_a, action_b = {"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}
+    fake_obs = iter([[100.0], [50.0], [25.0]])
+
+    with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)):
+        env.test_run.step = 0
+        for action in (action_a, action_b, action_a):
+            env.step(action)
+
+    env_csv = env._env_csv_path()
+    traj_csv = env.trajectory_file_path
+    assert env_csv.exists(), "env.csv must be written when env_params is declared"
+
+    env_steps = [int(line.split(",", 1)[0]) for line in env_csv.read_text().strip().splitlines()[1:]]
+    traj_steps = [int(line.split(",", 1)[0]) for line in traj_csv.read_text().strip().splitlines()[1:]]
+    assert env_steps == traj_steps == [1, 2, 3], (
+        f"step columns must align 1:1 across env.csv ({env_steps}) and trajectory.csv ({traj_steps})"
+    )
+
+
+def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+    """A constraint failure must not desync env.csv from trajectory.csv.
+
+    Runs three steps where the middle one fails ``constraint_check`` and the
+    other two succeed. ``env.csv`` is sunk inside ``write_trajectory`` from the
+    same ``TrajectoryEntry``, which is never reached on the early-return
+    constraint-failure path - so the failed step lands in neither file. The
+    corpus-friendly contract (``pd.merge(traj, env, on="step")`` loses no rows)
+    therefore holds via shared absence: both files record exactly the surviving
+    steps, aligned 1:1.
+    """
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    tdef.agent_metrics = ["default"]
+    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    tdef.agent_config = {"random_seed": 42}
+
+    test_run = TestRun(
+        name="dr_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "dr_tr" / "0",
+        reports={NeMoRunReportGenerationStrategy},
+    )
+    test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
+
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = test_scenario
+    runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False
+    runner.get_job_output_path.return_value = test_run.output_path
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+
+    # Step 2 fails the constraint; steps 1 and 3 survive. get_observation is only
+    # reached on the surviving steps, so it yields exactly two values.
+    fake_obs = iter([[100.0], [25.0]])
+    with (
+        patch.object(NeMoRunTestDefinition, "constraint_check", side_effect=[True, False, True]),
+        patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)),
+    ):
+        env.test_run.step = 0
+        for action in ({"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}, {"trainer.max_steps": 3000}):
+            env.step(action)
+
+    env_csv = env._env_csv_path()
+    traj_csv = env.trajectory_file_path
+
+    assert env_csv.exists(), "surviving steps declare env_params -> env.csv must exist"
+    env_steps = [int(line.split(",", 1)[0]) for line in env_csv.read_text().strip().splitlines()[1:]]
+    traj_steps = (
+        [int(line.split(",", 1)[0]) for line in traj_csv.read_text().strip().splitlines()[1:]]
+        if traj_csv.exists()
+        else []
+    )
+    assert env_steps == traj_steps == [1, 3], (
+        f"the constraint-failed step (2) must appear in neither file; env.csv ({env_steps}) "
+        f"and trajectory.csv ({traj_steps}) must stay 1:1 aligned on the surviving steps"
+    )
+
+
+def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
+    nemorun: NeMoRunTestDefinition, tmp_path: Path
+) -> None:
+    """End-to-end: cache HIT under observer-driven env_params still records env.csv.
+
+    A cache hit still calls ``write_trajectory``, which sinks the trajectory row
+    and the matching env.csv row from the same entry - keeping the two files
+    step-aligned even when the workload itself is short-circuited.
+    Asserts: (a) the workload is NOT re-run (cache short-circuit), (b)
+    env.csv gains a row, (c) trajectory.csv gains a row carrying the
+    sampled env_params.
+    """
+    import random as _random
+
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    tdef.agent_metrics = ["default"]
+    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    tdef.agent_config = {"random_seed": 42}
+
+    test_run = TestRun(
+        name="dr_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "dr_tr" / "0",
+        reports={NeMoRunReportGenerationStrategy},
+    )
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
+    runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False
+    runner.get_job_output_path.return_value = test_run.output_path
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+    assert env.observers, "TestDefinition.env_params declared -> observer must be built"
+
+    expected_sample = {"drop_rate": _random.Random("42:drop_rate:1").choice([0.0, 0.001, 0.01])}
+    action = {"trainer.max_steps": 1000}
+    env.test_run.current_iteration = 0
+    env.trajectory = {
+        0: [TrajectoryEntry(step=0, action=action, reward=0.42, observation=[0.84], env_params=expected_sample)]
+    }
+    env.test_run.step = 0
+
+    with patch.object(env, "get_observation", side_effect=AssertionError("cache miss path must not run")):
+        obs, reward, _done, _info = env.step(action)
+
+    runner.run.assert_not_called()
+    assert reward == 0.42 and obs == [0.84]
+
+    env_csv = env._env_csv_path()
+    assert env_csv.exists(), "cache HIT must NOT skip the observer; env.csv must record the trial"
+    env_rows = env_csv.read_text().strip().splitlines()
+    assert env_rows[0] == "step,env"
+    assert env_rows[1].startswith("1,"), f"expected step 1 row in env.csv, got {env_rows[1]!r}"
+
+    traj_rows = env.trajectory[0]
+    assert len(traj_rows) == 2 and traj_rows[-1].env_params == expected_sample, (
+        "cache-hit trajectory entry must record the per-trial env_params sample"
+    )
+
+
+def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+    """Workloads without [env_params.*] pay zero overhead: no observer, no env.csv."""
+    tdef = nemorun.model_copy(deep=True)
+    tdef.cmd_args.data.global_batch_size = 8
+    test_run = TestRun(
+        name="plain_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "plain_tr" / "0",
+        reports={NeMoRunReportGenerationStrategy},
+    )
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+
+    assert env.observers == [], "no env_params declared -> no per-step observers"
+    assert not env._env_csv_path().exists()
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
new file mode 100644
index 000000000..3aa931607
--- /dev/null
+++ b/tests/test_env_params.py
@@ -0,0 +1,114 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for the domain-randomization primitives in cloudai.configurator.env_params."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+from pydantic import ValidationError
+
+from cloudai.configurator.env_params import (
+    CsvSink,
+    EnvParamsObserver,
+    EnvParamSpec,
+    EnvParamsSampler,
+)
+
+
+def test_env_param_spec_requires_at_least_two_values() -> None:
+    with pytest.raises(ValidationError):
+        EnvParamSpec(values=[0.0])
+
+
+def test_env_param_spec_rejects_mismatched_weights() -> None:
+    with pytest.raises(ValidationError):
+        EnvParamSpec(values=[0.0, 0.1], weights=[1.0])
+
+
+def test_env_param_spec_rejects_zero_sum_weights() -> None:
+    with pytest.raises(ValidationError):
+        EnvParamSpec(values=[0.0, 0.1], weights=[0.0, 0.0])
+
+
+@pytest.mark.parametrize("bad", [float("inf"), float("nan"), float("-inf")])
+def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None:
+    with pytest.raises(ValidationError):
+        EnvParamSpec(values=[0.0, 0.1], weights=[bad, 1.0])
+
+
+def test_sampler_is_deterministic_across_calls() -> None:
+    spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    a = EnvParamsSampler(spec, seed=42)
+    b = EnvParamsSampler(spec, seed=42)
+    seq_a = [a.sample(t) for t in range(1, 6)]
+    seq_b = [b.sample(t) for t in range(1, 6)]
+    assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances"
+
+
+def test_sampler_each_param_is_independent() -> None:
+    """Adding an unrelated parameter must not perturb existing parameters' draws."""
+    base = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    extended = {
+        "drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01]),
+        "latency_ms": EnvParamSpec(values=[1, 5, 10]),
+    }
+    a = [EnvParamsSampler(base, seed=7).sample(t)["drop_rate"] for t in range(1, 11)]
+    b = [EnvParamsSampler(extended, seed=7).sample(t)["drop_rate"] for t in range(1, 11)]
+    assert a == b, "per-parameter RNG seeding must isolate parameters from each other"
+
+
+def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None:
+    sink = CsvSink(tmp_path / "env.csv")
+    sink.write(1, {})  # empty -> no-op, no file
+    assert not (tmp_path / "env.csv").exists()
+    with pytest.raises(ValueError, match="must be a positive trial index"):
+        sink.write(0, {"drop_rate": 0.0})
+
+
+def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None:
+    sink = CsvSink(tmp_path / "env.csv")
+    sink.write(1, {"drop_rate": 0.001})
+    sink.write(2, {"drop_rate": 0.01})
+    contents = (tmp_path / "env.csv").read_text().strip().splitlines()
+    assert contents[0] == "step,env"
+    assert contents[1].startswith("1,")
+    assert contents[2].startswith("2,")
+
+
+def test_observer_samples_and_stashes_current_env_params() -> None:
+    """before_step samples and stashes the result; persistence is the env's job."""
+    spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
+    observer = EnvParamsObserver(spec, seed=42)
+    test_run = SimpleNamespace(step=3, current_env_params={})
+
+    observer.before_step(test_run)
+
+    assert "drop_rate" in test_run.current_env_params
+    assert test_run.current_env_params["drop_rate"] in {0.0, 0.001, 0.01}
+
+
+def test_observer_after_step_is_noop() -> None:
+    """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence."""
+    observer = EnvParamsObserver({}, seed=0)
+    test_run = SimpleNamespace(step=1, current_env_params={"x": 1})
+
+    observer.after_step(test_run, observation=[0.0], reward=0.0)
+
+    assert test_run.current_env_params == {"x": 1}
diff --git a/tests/test_handlers.py b/tests/test_handlers.py
index bcf01978f..0cd3c20e1 100644
--- a/tests/test_handlers.py
+++ b/tests/test_handlers.py
@@ -26,19 +26,23 @@
 
 from cloudai.cli.handlers import (
     handle_dse_job,
+    validate_dse_env_params,
     verify_system_configs,
     verify_test_configs,
     verify_test_scenarios,
 )
+from cloudai.configurator.env_params import EnvParamSpec
 from cloudai.core import (
     BaseAgent,
     BaseAgentConfig,
+    Parser,
     Registry,
     RewardOverrides,
     Runner,
     TestDependency,
     TestRun,
     TestScenario,
+    TestScenarioParsingError,
 )
 from cloudai.models.scenario import ReportConfig
 from cloudai.reporter import StatusReporter
@@ -427,3 +431,49 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising(
     contents = failure_report.read_text()
     assert "RuntimeError" in contents
     assert "agent blew up" in contents
+
+
+def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None:
+    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    scenario = TestScenario(name="s", test_runs=[base_tr])
+    with pytest.raises(TestScenarioParsingError, match="declare env_params but are not DSE jobs"):
+        validate_dse_env_params(scenario)
+
+
+def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun) -> None:
+    dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    assert dse_tr.is_dse_job is True  # precondition: DSE + env_params is the allowed combination
+    validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))  # no exception == pass
+
+
+def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun) -> None:
+    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    base_tr.num_nodes = [1, 2]
+    assert base_tr.is_dse_job is True  # a num_nodes list sweep makes it DSE, so env_params is allowed
+    validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr]))  # no exception == pass
+
+
+def test_validate_dse_env_params_allows_non_dse_without_env_params(base_tr: TestRun) -> None:
+    assert base_tr.is_dse_job is False  # precondition: not DSE, but also no env_params declared
+    assert not base_tr.test.env_params
+    validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr]))  # no exception == pass
+
+
+def test_verify_test_scenarios_rejects_env_params_without_dse(
+    base_tr: TestRun, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    bad = TestScenario(name="s", test_runs=[base_tr])
+    monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: [])
+    monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {})
+    monkeypatch.setattr(Parser, "parse_test_scenario", lambda *a, **k: bad)
+    assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 1
+
+
+def test_verify_test_scenarios_allows_env_params_with_dse(dse_tr: TestRun, monkeypatch: pytest.MonkeyPatch) -> None:
+    dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    good = TestScenario(name="s", test_runs=[dse_tr])
+    monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: [])
+    monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {})
+    monkeypatch.setattr(Parser, "parse_test_scenario", lambda *a, **k: good)
+    assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 0

From 0dabd4e482af47095a6dd3fdcd9ccf27a86837c8 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 23 Jun 2026 17:44:40 -0400
Subject: [PATCH 02/22] refactor(configurator): env_params as cmd_args
 annotation, excluded from search space

Make env_params a thin annotation over cmd_args fields instead of a holder of
candidate values. Candidate values live in cmd_args (the single source of truth,
exactly like an action-space dimension); env_params.<name> only marks a field as
env-sampled and carries optional sampling weights, never the values.

- EnvParamSpec drops `values`; validates weights (finite, non-negative, sum=1.0).
- Sampler/observer resolve candidate lists from cmd_args; scalar knobs are no-ops.
- TestDefinition.validate_env_params cross-checks annotations against cmd_args
  (key must be a real field; weights require a list and must match its length).
- Exclude env_params keys from both param_space and is_dse_job: an env-sampled
  list is not a search dimension, so an env-params-only workload is not a DSE job.
- validate_dse_env_params rejects env_params on non-DSE runs and on grid_search
  (exhaustive search cannot exploit per-trial randomization).
- Scrub private-implementation references from public docstrings.
- Unit tests use generic Atari Breakout semantics (ball_speed / paddle_width).
---
 src/cloudai/_core/test_scenario.py      |   4 +-
 src/cloudai/cli/handlers.py             |  21 ++-
 src/cloudai/configurator/cloudai_gym.py |  15 +-
 src/cloudai/configurator/env_params.py  |  98 ++++++++----
 src/cloudai/models/workload.py          |  55 ++++++-
 tests/test_cloudaigym.py                | 155 ++++++++++++++-----
 tests/test_env_params.py                | 197 ++++++++++++++++++++----
 tests/test_handlers.py                  |  34 ++--
 8 files changed, 452 insertions(+), 127 deletions(-)

diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py
index 81f2723b0..26f22e7e0 100644
--- a/src/cloudai/_core/test_scenario.py
+++ b/src/cloudai/_core/test_scenario.py
@@ -157,7 +157,9 @@ def param_space(self) -> dict[str, Any]:
             **{
                 key: value
                 for key, value in cmd_args_dict.items()
-                if isinstance(value, list) and not self.test.is_dse_excluded_arg(key)
+                if isinstance(value, list)
+                and not self.test.is_dse_excluded_arg(key)
+                and key.split(".", 1)[0] not in self.test.env_params
             },
             **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)},
         }
diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index 87e4ccfd5..f7b9f3844 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -300,17 +300,24 @@ def _check_installation(
 
 def validate_dse_env_params(test_scenario: TestScenario) -> None:
     """
-    Reject prepped configs that declare env_params on a non-DSE test run.
+    Reject prepped configs that declare env_params but cannot sample them.
 
-    env_params are sampled only during DSE (by CloudAIGymEnv); on a non-DSE run they would be
-    silently ignored. is_dse_job is a property of the fully prepped config, so this is validated
-    here rather than at parse time.
+    env_params are sampled per-trial by CloudAIGymEnv, but only for a DSE run on a *learning*
+    agent. A non-DSE run has no per-trial loop at all, and grid_search exhaustively enumerates the
+    search space - sampling env_params there would just inject noise into a deterministic sweep with
+    no policy to gain robustness. is_dse_job is a property of the fully prepped config, so this is
+    validated here rather than at parse time.
     """
-    offenders = [tr.name for tr in test_scenario.test_runs if tr.test.env_params and not tr.is_dse_job]
+    offenders = [
+        tr.name
+        for tr in test_scenario.test_runs
+        if tr.test.env_params and (not tr.is_dse_job or tr.test.agent == "grid_search")
+    ]
     if offenders:
         raise TestScenarioParsingError(
-            f"Tests {offenders} declare env_params but are not DSE jobs. env_params are sampled only during "
-            "DSE (by CloudAIGymEnv); add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes) "
+            f"Tests {offenders} declare env_params but will not sample them. env_params are sampled per-trial "
+            "only by a DSE run on a learning agent (grid_search searches the whole space and ignores them). "
+            "Use a learning agent and add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes), "
             "or remove env_params."
         )
 
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index 80c5210f1..d16a9c63e 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -78,7 +78,7 @@ def _build_observers(self) -> List[StepObserver]:
         if self.test_run.test.env_params:
             seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0))
             self._env_sink = CsvSink(self._env_csv_path())
-            observers.append(EnvParamsObserver(self.test_run.test.env_params, seed))
+            observers.append(EnvParamsObserver(self.test_run.test.env_params, self.test_run.test.cmd_args, seed))
         return observers
 
     def _env_csv_path(self) -> Path:
@@ -146,6 +146,19 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
         for observer in self.observers:
             observer.before_step(self.test_run)
 
+        # Overlay this trial's sampled env_params onto cmd_args so the workload actually
+        # runs with the sampled values - the env-side twin of apply_params_set(action).
+        # Sampling, persistence (env.csv), and the trajectory cache key are handled
+        # separately; this is the single, workload-agnostic injection point. Keys are
+        # validated to be cmd_args fields at TestDefinition build time; we re-filter to
+        # those fields here so a programmatically-built run can't inject unknown attrs.
+        if self.test_run.current_env_params:
+            cmd_args = self.test_run.test.cmd_args
+            fields = getattr(type(cmd_args), "model_fields", {})
+            overlay = {k: v for k, v in self.test_run.current_env_params.items() if k in fields}
+            if overlay:
+                self.test_run.test.cmd_args = cmd_args.model_copy(update=overlay)
+
         cached_result = self.get_cached_trajectory_result(action)
         if cached_result is not None:
             logging.info(
diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index 44bf57524..ddd26758e 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -15,19 +15,23 @@
 # limitations under the License.
 
 """
-Domain-randomization primitives for CloudAI DSE.
-
-An env-randomized parameter is a workload knob whose value the environment
-samples per trial (categorical, optional weights). It is sibling to
-``cmd_args`` on a ``TestDefinition`` and does not enter the agent's action
-space; the policy learns a robust mapping under that variation.
-
-This module owns the data schema (``EnvParamSpec``), the deterministic
+Per-trial environment-parameter primitives for CloudAI DSE.
+
+An env-randomized parameter is a workload knob whose candidate values live in
+``cmd_args`` (a plain list - the single source of truth, exactly like an
+action-space dimension) but which the environment *samples* per trial rather
+than letting the agent search it. ``env_params`` is the annotation that marks
+such a field: ``env_params.<name>`` reclassifies ``cmd_args.<name>`` from
+action-space to env-sampled and carries only *how* to sample (optional
+``weights``), never the values. The policy learns a robust mapping under that
+variation; the knob never enters the agent's action space.
+
+This module owns the annotation schema (``EnvParamSpec``), the deterministic
 sampler (``EnvParamsSampler``), the persistence interface
 (``EnvParamsSink`` + ``CsvSink``) and the per-step observer
 (``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the
 artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv``
-regardless of agent (PPO, BO, GA, MAB) or workload.
+regardless of agent or workload.
 """
 
 from __future__ import annotations
@@ -43,59 +47,71 @@
 
 
 class EnvParamSpec(BaseModel):
-    """Specification of one env-randomized parameter (categorical)."""
+    """
+    Annotation marking one cmd_args field as env-sampled.
+
+    Carries only *how* to sample - the candidate values themselves live in
+    ``cmd_args.<name>`` as a plain list. ``weights`` (optional) are positional,
+    aligned 1:1 with that candidate list; omit for uniform sampling. The
+    length match against the candidate list is a cross-field check enforced by
+    ``TestDefinition`` (which can see ``cmd_args``); here we validate only the
+    weights' intrinsic shape.
+    """
 
     model_config = ConfigDict(extra="forbid")
 
-    values: List[Any] = Field(
-        min_length=2,
-        description="Candidate values; a single-valued parameter is just a fixed cmd_args entry.",
-    )
     weights: Optional[List[float]] = Field(
         default=None,
-        description="Optional probability weights aligned with values; uniform if omitted.",
+        description="Optional probability weights aligned with the cmd_args candidate list; uniform if omitted.",
     )
 
     @model_validator(mode="after")
     def _validate_weights(self) -> Self:
         if self.weights is None:
             return self
-        if len(self.weights) != len(self.values):
-            raise ValueError(
-                f"env_params weights length {len(self.weights)} does not match values length {len(self.values)}"
-            )
         for w in self.weights:
             if not math.isfinite(w) or w < 0:
                 raise ValueError(f"env_params weights must be finite and non-negative; got {w}")
-        if sum(self.weights) <= 0:
-            raise ValueError("env_params weights must have a positive sum")
+        total = sum(self.weights)
+        if abs(total - 1.0) > 1e-6:
+            raise ValueError(f"env_params weights must sum to 1.0; got {total}")
         return self
 
 
 class EnvParamsSampler:
     """
-    Per-trial categorical sampler.
+    Per-trial categorical sampler over candidate lists.
+
+    Candidates are resolved from ``cmd_args`` by the caller and passed in as
+    ``{name: [v0, v1, ...]}``; optional ``weights`` mirror that mapping.
 
     Determinism contract: ``sample(t)`` returns the same dict on every call
-    (across processes) for the same ``(seed, env_params, t)``.
+    (across processes) for the same ``(seed, candidates, t)``.
 
     Independence contract: each parameter uses an RNG seeded by
     ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated
     parameter does not perturb existing parameters' draw sequences.
     """
 
-    def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None:
-        self._env_params = env_params
+    def __init__(
+        self,
+        candidates: Dict[str, List[Any]],
+        seed: int,
+        weights: Optional[Dict[str, List[float]]] = None,
+    ) -> None:
+        self._candidates = candidates
+        self._weights = weights or {}
         self._seed = seed
 
     def sample(self, trial: int) -> Dict[str, Any]:
         out: Dict[str, Any] = {}
-        for name, spec in self._env_params.items():
+        for name, choices in self._candidates.items():
             rng = random.Random(f"{self._seed}:{name}:{trial}")
-            if spec.weights is not None:
-                out[name] = rng.choices(spec.values, weights=spec.weights, k=1)[0]
+            weights = self._weights.get(name)
+            if weights is not None:
+                out[name] = rng.choices(choices, weights=weights, k=1)[0]
             else:
-                out[name] = rng.choice(spec.values)
+                out[name] = rng.choice(choices)
         return out
 
 
@@ -150,17 +166,31 @@ class EnvParamsObserver:
     """
     Sample env_params per step and stash them for the cache and the workload.
 
-    Pre-step: samples ``test_run.test.env_params`` for ``test_run.step`` and
-    stashes the result on ``test_run.current_env_params`` so the cache key and
-    the workload's substitution both see it. Persistence is owned by
+    Construction: resolves each annotated knob's candidate list from
+    ``cmd_args`` (the single source of truth) once, up front. A knob whose
+    ``cmd_args`` value is a scalar (not a list) is fixed - its annotation is a
+    no-op and it is skipped, so nothing is sampled or stashed for it.
+
+    Pre-step: samples the resolved candidates for ``test_run.step`` and stashes
+    the result on ``test_run.current_env_params`` so the cache key and the
+    cmd_args overlay both see it. Persistence is owned by
     ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the
     ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that
     writes no trajectory row writes no env.csv row either, keeping the two files
     1:1 step-aligned. Post-step: no-op.
     """
 
-    def __init__(self, env_params: Dict[str, EnvParamSpec], seed: int) -> None:
-        self._sampler = EnvParamsSampler(env_params, seed=seed)
+    def __init__(self, env_params: Dict[str, EnvParamSpec], cmd_args: Any, seed: int) -> None:
+        candidates: Dict[str, List[Any]] = {}
+        weights: Dict[str, List[float]] = {}
+        for name, spec in env_params.items():
+            value = getattr(cmd_args, name, None)
+            if not isinstance(value, list):
+                continue  # scalar cmd_args knob is fixed; the annotation is a no-op
+            candidates[name] = value
+            if spec.weights is not None:
+                weights[name] = spec.weights
+        self._sampler = EnvParamsSampler(candidates, seed=seed, weights=weights)
 
     def before_step(self, test_run: Any) -> None:
         test_run.current_env_params = self._sampler.sample(test_run.step)
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index 99637d4e8..8683cc8bb 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -115,7 +115,7 @@ class TestDefinition(BaseModel, ABC):
     env_params: dict[str, EnvParamSpec] = Field(
         default_factory=dict,
         description=(
-            "Domain-randomized parameters sampled by the env per trial. Sibling to "
+            "Environment parameters sampled by the env per trial. Sibling to "
             "cmd_args; not part of the agent's action space. CloudAIGymEnv samples, "
             "persists to env.csv, and includes them in the trajectory cache key."
         ),
@@ -146,17 +146,23 @@ def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool:
 
     @property
     def is_dse_job(self) -> bool:
-        def check_dict(d: dict, parent_key: str = "") -> bool:
+        def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) -> bool:
             if isinstance(d, dict):
                 for key, value in d.items():
                     path = f"{parent_key}.{key}" if parent_key else key
                     if self.is_dse_excluded_arg(path):
                         continue
-                    if isinstance(value, list) or (isinstance(value, dict) and check_dict(value, path)):
+                    # env_params lists are sampled by the env, not searched by the agent, so they
+                    # are not action-space dimensions and must not make a run count as DSE.
+                    if skip_env_params and path.split(".", 1)[0] in self.env_params:
+                        continue
+                    if isinstance(value, list) or (
+                        isinstance(value, dict) and check_dict(value, path, skip_env_params)
+                    ):
                         return True
             return False
 
-        return check_dict(self.cmd_args_dict) or check_dict(self.extra_env_vars)
+        return check_dict(self.cmd_args_dict, skip_env_params=True) or check_dict(self.extra_env_vars)
 
     @field_validator("dse_excluded_args", mode="before")
     @classmethod
@@ -201,3 +207,44 @@ def validate_agent_config(self) -> Self:
             agent_config_class = agent_class.get_config_class()
             agent_config_class.model_validate(self.agent_config)
         return self
+
+    @model_validator(mode="after")
+    def validate_env_params(self) -> Self:
+        """
+        Validate env_params annotations against cmd_args.
+
+        ``env_params`` is an annotation: each key names a ``cmd_args`` field whose value is
+        the candidate set (the single source of truth), and the entry carries only *how* to
+        sample. So each key must name a real ``cmd_args`` field; and when ``weights`` are
+        declared, that field must be a candidate list of >= 2 values and the weights must
+        align 1:1 with it. A scalar (fixed) ``cmd_args`` value is tolerated as a no-op
+        marker. Sampling, persistence, the per-trial cmd_args overlay, and the cache key all
+        live in ``CloudAIGymEnv``; keeping this shape check in core lets the overlay stay
+        agent- and workload-agnostic rather than re-implemented per workload.
+        """
+        if not self.env_params:
+            return self
+
+        cmd_args_fields = getattr(type(self.cmd_args), "model_fields", None)
+        if not cmd_args_fields:
+            return self
+
+        unknown = sorted(k for k in self.env_params if k not in cmd_args_fields)
+        if unknown:
+            raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}")
+
+        for name, spec in self.env_params.items():
+            if spec.weights is None:
+                continue
+            value = getattr(self.cmd_args, name, None)
+            if not isinstance(value, list) or len(value) < 2:
+                raise ValueError(
+                    f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list "
+                    "(need a list of >= 2 values)"
+                )
+            if len(spec.weights) != len(value):
+                raise ValueError(
+                    f"env_params['{name}'] weights length {len(spec.weights)} does not match "
+                    f"cmd_args.{name} candidate count {len(value)}"
+                )
+        return self
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
index e687d4373..975efbe48 100644
--- a/tests/test_cloudaigym.py
+++ b/tests/test_cloudaigym.py
@@ -34,6 +34,7 @@
 )
 from cloudai.workloads.nemo_run.report_generation_strategy import NeMoRunReportGenerationStrategy
 from cloudai.workloads.nixl_bench import NIXLBenchCmdArgs, NIXLBenchTestDefinition
+from tests.test_env_params import EnvVarCmdArgs, EnvVarTestDefinition
 
 
 @pytest.fixture
@@ -457,7 +458,7 @@ def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) ->
     """Cache MUST miss when env_params differ, even if action is identical.
 
     Without this property the agent receives stale rewards on every cache hit
-    under domain randomization. PPO/DQN/BO all silently train on labels that
+    when the env varies per trial: any agent silently trains on labels that
     do not correspond to the env they were nominally generated under.
     """
     runner = MagicMock()
@@ -468,15 +469,14 @@ def test_cache_miss_when_env_params_differ(base_tr: TestRun, tmp_path: Path) ->
     runner.testrun_to_job_map = {}
 
     env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides())
-    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001})
+    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"ball_speed": 1})
 
-    env.test_run.current_env_params = {"drop_rate": 0.01}
+    env.test_run.current_env_params = {"ball_speed": 2}
 
     assert env.get_cached_trajectory_result({"x": 10}) is None, (
-        "Cache must include env_params in its key. The current implementation "
-        "keys on action alone, so trials repeating the same action under a "
-        "different env_params sample receive a stale cached reward. See "
-        "env-params-cloudai-corpus-plan.md."
+        "Cache must include env_params in its key. Keying on action alone means "
+        "trials repeating the same action under a different env_params sample "
+        "receive a stale cached reward."
     )
 
 
@@ -490,9 +490,9 @@ def test_cache_hit_when_action_and_env_params_match(base_tr: TestRun, tmp_path:
     runner.testrun_to_job_map = {}
 
     env = CloudAIGymEnv(test_run=base_tr, runner=runner, rewards=RewardOverrides())
-    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"drop_rate": 0.001})
+    _seed_cached_entry_with_env_params(env, {"x": 10}, env_params={"ball_speed": 2})
 
-    env.test_run.current_env_params = {"drop_rate": 0.001}
+    env.test_run.current_env_params = {"ball_speed": 2}
 
     result = env.get_cached_trajectory_result({"x": 10})
     assert result is not None
@@ -553,10 +553,10 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit
 
     with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)):
         env.test_run.step = 0
-        env.test_run.current_env_params = {"drop_rate": 0.001}
+        env.test_run.current_env_params = {"ball_speed": 1}
         obs1, _r1, *_ = env.step(action)
 
-        env.test_run.current_env_params = {"drop_rate": 0.01}
+        env.test_run.current_env_params = {"ball_speed": 2}
         obs2, _r2, *_ = env.step(action)
 
     assert runner.run.call_count == 2, (
@@ -566,18 +566,22 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit
     assert obs1 != obs2, "fresh workload run should produce a fresh observation"
 
 
-def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+def test_env_csv_is_step_aligned_with_trajectory(tmp_path: Path) -> None:
     """env.csv must have exactly one row per env.step() call, with steps aligned 1:1 to trajectory.csv.
 
     This pins the corpus-friendly contract: a downstream consumer can
     ``pd.merge(traj, env, on="step")`` without losing rows on either side,
     independent of whether the trial hit the trajectory cache.
     """
-    tdef = nemorun.model_copy(deep=True)
-    tdef.cmd_args.data.global_batch_size = 8
-    tdef.agent_metrics = ["default"]
-    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    tdef.agent_config = {"random_seed": 42}
+    tdef = EnvVarTestDefinition(
+        name="dr",
+        description="dr",
+        test_template_name="dr_template",
+        cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]),
+        env_params={"ball_speed": EnvParamSpec()},
+        agent_metrics=["default"],
+        agent_config={"random_seed": 42},
+    )
 
     test_run = TestRun(
         name="dr_tr",
@@ -585,7 +589,6 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition,
         num_nodes=1,
         nodes=[],
         output_path=tmp_path / "out" / "dr_tr" / "0",
-        reports={NeMoRunReportGenerationStrategy},
     )
     test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
 
@@ -597,7 +600,7 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition,
     runner.get_job_output_path.return_value = test_run.output_path
 
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
-    action_a, action_b = {"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}
+    action_a, action_b = {"paddle_width": 4}, {"paddle_width": 8}
     fake_obs = iter([[100.0], [50.0], [25.0]])
 
     with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)):
@@ -616,7 +619,7 @@ def test_env_csv_is_step_aligned_with_trajectory(nemorun: NeMoRunTestDefinition,
     )
 
 
-def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
+def test_env_csv_step_alignment_holds_on_constraint_failure(tmp_path: Path) -> None:
     """A constraint failure must not desync env.csv from trajectory.csv.
 
     Runs three steps where the middle one fails ``constraint_check`` and the
@@ -627,11 +630,15 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest
     therefore holds via shared absence: both files record exactly the surviving
     steps, aligned 1:1.
     """
-    tdef = nemorun.model_copy(deep=True)
-    tdef.cmd_args.data.global_batch_size = 8
-    tdef.agent_metrics = ["default"]
-    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    tdef.agent_config = {"random_seed": 42}
+    tdef = EnvVarTestDefinition(
+        name="dr",
+        description="dr",
+        test_template_name="dr_template",
+        cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]),
+        env_params={"ball_speed": EnvParamSpec()},
+        agent_metrics=["default"],
+        agent_config={"random_seed": 42},
+    )
 
     test_run = TestRun(
         name="dr_tr",
@@ -639,7 +646,6 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest
         num_nodes=1,
         nodes=[],
         output_path=tmp_path / "out" / "dr_tr" / "0",
-        reports={NeMoRunReportGenerationStrategy},
     )
     test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
 
@@ -656,11 +662,11 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest
     # reached on the surviving steps, so it yields exactly two values.
     fake_obs = iter([[100.0], [25.0]])
     with (
-        patch.object(NeMoRunTestDefinition, "constraint_check", side_effect=[True, False, True]),
+        patch.object(EnvVarTestDefinition, "constraint_check", side_effect=[True, False, True]),
         patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)),
     ):
         env.test_run.step = 0
-        for action in ({"trainer.max_steps": 1000}, {"trainer.max_steps": 2000}, {"trainer.max_steps": 3000}):
+        for action in ({"paddle_width": 4}, {"paddle_width": 6}, {"paddle_width": 8}):
             env.step(action)
 
     env_csv = env._env_csv_path()
@@ -679,9 +685,7 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(nemorun: NeMoRunTest
     )
 
 
-def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
-    nemorun: NeMoRunTestDefinition, tmp_path: Path
-) -> None:
+def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path: Path) -> None:
     """End-to-end: cache HIT under observer-driven env_params still records env.csv.
 
     A cache hit still calls ``write_trajectory``, which sinks the trajectory row
@@ -693,11 +697,15 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
     """
     import random as _random
 
-    tdef = nemorun.model_copy(deep=True)
-    tdef.cmd_args.data.global_batch_size = 8
-    tdef.agent_metrics = ["default"]
-    tdef.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    tdef.agent_config = {"random_seed": 42}
+    tdef = EnvVarTestDefinition(
+        name="dr",
+        description="dr",
+        test_template_name="dr_template",
+        cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]),
+        env_params={"ball_speed": EnvParamSpec()},
+        agent_metrics=["default"],
+        agent_config={"random_seed": 42},
+    )
 
     test_run = TestRun(
         name="dr_tr",
@@ -705,7 +713,6 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
         num_nodes=1,
         nodes=[],
         output_path=tmp_path / "out" / "dr_tr" / "0",
-        reports={NeMoRunReportGenerationStrategy},
     )
     runner = MagicMock(spec=BaseRunner)
     runner.scenario_root = tmp_path / "scenario"
@@ -717,8 +724,8 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
     assert env.observers, "TestDefinition.env_params declared -> observer must be built"
 
-    expected_sample = {"drop_rate": _random.Random("42:drop_rate:1").choice([0.0, 0.001, 0.01])}
-    action = {"trainer.max_steps": 1000}
+    expected_sample = {"ball_speed": _random.Random("42:ball_speed:1").choice([1, 2, 3])}
+    action = {"paddle_width": 4}
     env.test_run.current_iteration = 0
     env.trajectory = {
         0: [TrajectoryEntry(step=0, action=action, reward=0.42, observation=[0.84], env_params=expected_sample)]
@@ -743,6 +750,76 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(
     )
 
 
+def test_step_overlays_env_params_onto_cmd_args(tmp_path: Path) -> None:
+    """The per-trial env_params sample must be overlaid onto cmd_args before the workload runs.
+
+    env_params are the env-side twin of the action: core overlays the sampled values onto
+    cmd_args inside step() so the workload actually runs with them, workload-agnostically
+    (no per-workload injection code). Here ``ball_speed`` is env-randomized (its candidate
+    list lives in cmd_args), so the value the workload runs with must equal the observer's
+    sample, not the whole candidate list.
+    """
+    import random as _random
+
+    tdef = EnvVarTestDefinition(
+        name="overlay",
+        description="overlay",
+        test_template_name="dr_template",
+        cmd_args=EnvVarCmdArgs(ball_speed=[1, 2]),
+        env_params={"ball_speed": EnvParamSpec()},
+        agent_metrics=["default"],
+        agent_config={"random_seed": 42},
+    )
+
+    test_run = TestRun(
+        name="overlay_tr",
+        test=tdef,
+        num_nodes=1,
+        nodes=[],
+        output_path=tmp_path / "out" / "overlay_tr" / "0",
+    )
+    runner = MagicMock(spec=BaseRunner)
+    runner.scenario_root = tmp_path / "scenario"
+    runner.system = MagicMock()
+    runner.test_scenario = TestScenario(name="overlay_scenario", test_runs=[test_run])
+    runner.jobs, runner.testrun_to_job_map, runner.shutting_down = {}, {}, False
+    runner.get_job_output_path.return_value = test_run.output_path
+
+    env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
+    expected = _random.Random("42:ball_speed:1").choice([1, 2])
+
+    with patch.object(env, "get_observation", side_effect=lambda _action: [1.0]):
+        env.test_run.step = 0
+        env.step({"paddle_width": 4})
+
+    ran_cmd_args = runner.test_scenario.test_runs[0].test.cmd_args
+    assert ran_cmd_args.ball_speed == expected, (
+        "step() must overlay the sampled env_params onto cmd_args before runner.run(), so the "
+        f"workload runs with ball_speed={expected} (the observer's draw), not the candidate list."
+    )
+
+
+def test_param_space_excludes_env_params_keys(setup_env: tuple[TestRun, BaseRunner]):
+    """env_params keys must never surface in the grid/action space (sampled, not searched)."""
+    tr, _ = setup_env
+    tr.test = EnvVarTestDefinition(
+        name="dr",
+        description="dr",
+        test_template_name="dr_template",
+        # paddle_width is an ordinary action-space list; ball_speed is an env-sampled list.
+        cmd_args=EnvVarCmdArgs(paddle_width=[4, 8], ball_speed=[1, 2]),
+        env_params={"ball_speed": EnvParamSpec()},
+    )
+
+    action_space = tr.param_space
+
+    assert "paddle_width" in action_space, "an un-annotated cmd_args list must remain an action-space dimension"
+    assert "ball_speed" not in action_space, (
+        "a knob declared in env_params is sampled by the env, not explored by the agent, so it "
+        "must be excluded from param_space even though its cmd_args value is a list."
+    )
+
+
 def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
     """Workloads without [env_params.*] pay zero overhead: no observer, no env.csv."""
     tdef = nemorun.model_copy(deep=True)
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 3aa931607..3cb1ddeab 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -14,12 +14,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Unit tests for the domain-randomization primitives in cloudai.configurator.env_params."""
+"""Unit tests for the environment-parameter primitives in cloudai.configurator.env_params.
+
+Annotation model: candidate values live in ``cmd_args`` (the single source of
+truth); an ``env_params`` entry is a thin annotation that reclassifies that
+field from action-space to env-sampled, optionally carrying sampling
+``weights``. These tests use a dedicated, list-capable fixture workload so the
+candidate list is a first-class typed field (no serialization warnings).
+
+Worked example - Atari Breakout, the canonical RL arcade game:
+``ball_speed`` is the env-sampled knob - the game serves the ball at a speed the
+agent does not control, so we sample it per trial and the policy must stay
+robust across ball speeds. ``paddle_width`` is an ordinary action-space
+dimension - the knob the agent actually tunes.
+"""
 
 from __future__ import annotations
 
 from pathlib import Path
 from types import SimpleNamespace
+from typing import List, Union
 
 import pytest
 from pydantic import ValidationError
@@ -30,33 +44,82 @@
     EnvParamSpec,
     EnvParamsSampler,
 )
+from cloudai.models.workload import CmdArgs, TestDefinition
 
 
-def test_env_param_spec_requires_at_least_two_values() -> None:
-    with pytest.raises(ValidationError):
-        EnvParamSpec(values=[0.0])
+class EnvVarCmdArgs(CmdArgs):
+    """cmd_args with top-level, list-capable fields for env_params annotation tests.
 
+    ``ball_speed`` is the env-randomized knob; ``paddle_width`` stands in for an
+    ordinary action-space dimension. Both accept either a scalar or a candidate
+    list, so a candidate list round-trips through model validation/serialization
+    cleanly.
+    """
 
-def test_env_param_spec_rejects_mismatched_weights() -> None:
-    with pytest.raises(ValidationError):
-        EnvParamSpec(values=[0.0, 0.1], weights=[1.0])
+    ball_speed: Union[int, List[int]] = 1
+    paddle_width: Union[int, List[int]] = 8
 
 
-def test_env_param_spec_rejects_zero_sum_weights() -> None:
-    with pytest.raises(ValidationError):
-        EnvParamSpec(values=[0.0, 0.1], weights=[0.0, 0.0])
+class EnvVarTestDefinition(TestDefinition):
+    """Minimal concrete TestDefinition wrapping ``EnvVarCmdArgs``."""
+
+    cmd_args: EnvVarCmdArgs = EnvVarCmdArgs()
+
+
+def _tdef(env_params: dict, **cmd_args_overrides) -> EnvVarTestDefinition:
+    """Build the fixture TestDefinition through full model validation (so validators fire)."""
+    return EnvVarTestDefinition(
+        name="breakout",
+        description="breakout",
+        test_template_name="breakout_template",
+        cmd_args=EnvVarCmdArgs(**cmd_args_overrides),
+        env_params=env_params,
+    )
+
+
+# --- EnvParamSpec: weights are validated intrinsically (length is cross-field, see below) ---
+
+
+def test_env_param_spec_accepts_normalized_weights() -> None:
+    spec = EnvParamSpec(weights=[0.7, 0.3])
+    assert spec.weights == [0.7, 0.3]
+
+
+def test_env_param_spec_bare_marker_is_valid() -> None:
+    """An annotation with no weights is a valid uniform marker."""
+    assert EnvParamSpec().weights is None
+
+
+def test_env_param_spec_rejects_unnormalized_weights() -> None:
+    """Strict sum: weights must sum to ~1.0 (relative weights like [7, 3] are rejected)."""
+    with pytest.raises(ValidationError, match="must sum to"):
+        EnvParamSpec(weights=[7.0, 3.0])
+
+
+def test_env_param_spec_rejects_negative_weights() -> None:
+    with pytest.raises(ValidationError, match="finite and non-negative"):
+        EnvParamSpec(weights=[-0.1, 1.1])
 
 
 @pytest.mark.parametrize("bad", [float("inf"), float("nan"), float("-inf")])
 def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None:
+    with pytest.raises(ValidationError, match="finite and non-negative"):
+        EnvParamSpec(weights=[bad, 1.0])
+
+
+def test_env_param_spec_rejects_unknown_fields() -> None:
+    """Candidate values live in cmd_args, never in the annotation (no ``values`` key)."""
     with pytest.raises(ValidationError):
-        EnvParamSpec(values=[0.0, 0.1], weights=[bad, 1.0])
+        EnvParamSpec(values=[1, 2])
+
+
+# --- Sampler: draws from candidate lists resolved out of cmd_args ---
 
 
 def test_sampler_is_deterministic_across_calls() -> None:
-    spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    a = EnvParamsSampler(spec, seed=42)
-    b = EnvParamsSampler(spec, seed=42)
+    candidates = {"ball_speed": [1, 2, 3]}
+    a = EnvParamsSampler(candidates, seed=42)
+    b = EnvParamsSampler(candidates, seed=42)
     seq_a = [a.sample(t) for t in range(1, 6)]
     seq_b = [b.sample(t) for t in range(1, 6)]
     assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances"
@@ -64,51 +127,123 @@ def test_sampler_is_deterministic_across_calls() -> None:
 
 def test_sampler_each_param_is_independent() -> None:
     """Adding an unrelated parameter must not perturb existing parameters' draws."""
-    base = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    extended = {
-        "drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01]),
-        "latency_ms": EnvParamSpec(values=[1, 5, 10]),
-    }
-    a = [EnvParamsSampler(base, seed=7).sample(t)["drop_rate"] for t in range(1, 11)]
-    b = [EnvParamsSampler(extended, seed=7).sample(t)["drop_rate"] for t in range(1, 11)]
+    base = {"ball_speed": [1, 2, 3]}
+    extended = {"ball_speed": [1, 2, 3], "brick_rows": [3, 4, 5]}
+    a = [EnvParamsSampler(base, seed=7).sample(t)["ball_speed"] for t in range(1, 11)]
+    b = [EnvParamsSampler(extended, seed=7).sample(t)["ball_speed"] for t in range(1, 11)]
     assert a == b, "per-parameter RNG seeding must isolate parameters from each other"
 
 
+def test_sampler_honors_weights() -> None:
+    """A degenerate weight ([1, 0]) must always pick the first candidate."""
+    sampler = EnvParamsSampler({"ball_speed": [1, 2]}, seed=1, weights={"ball_speed": [1.0, 0.0]})
+    assert all(sampler.sample(t)["ball_speed"] == 1 for t in range(1, 20))
+
+
+# --- CsvSink: unchanged persistence contract ---
+
+
 def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None:
     sink = CsvSink(tmp_path / "env.csv")
     sink.write(1, {})  # empty -> no-op, no file
     assert not (tmp_path / "env.csv").exists()
     with pytest.raises(ValueError, match="must be a positive trial index"):
-        sink.write(0, {"drop_rate": 0.0})
+        sink.write(0, {"ball_speed": 1})
 
 
 def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None:
     sink = CsvSink(tmp_path / "env.csv")
-    sink.write(1, {"drop_rate": 0.001})
-    sink.write(2, {"drop_rate": 0.01})
+    sink.write(1, {"ball_speed": 2})
+    sink.write(2, {"ball_speed": 3})
     contents = (tmp_path / "env.csv").read_text().strip().splitlines()
     assert contents[0] == "step,env"
     assert contents[1].startswith("1,")
     assert contents[2].startswith("2,")
 
 
-def test_observer_samples_and_stashes_current_env_params() -> None:
-    """before_step samples and stashes the result; persistence is the env's job."""
-    spec = {"drop_rate": EnvParamSpec(values=[0.0, 0.001, 0.01])}
-    observer = EnvParamsObserver(spec, seed=42)
+# --- Observer: resolves candidates from cmd_args, stashes the per-trial sample ---
+
+
+def test_observer_samples_from_cmd_args_and_stashes() -> None:
+    """before_step samples a candidate that lives in cmd_args; persistence is the env's job."""
+    cmd_args = EnvVarCmdArgs(ball_speed=[1, 2, 3])
+    observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42)
     test_run = SimpleNamespace(step=3, current_env_params={})
 
     observer.before_step(test_run)
 
-    assert "drop_rate" in test_run.current_env_params
-    assert test_run.current_env_params["drop_rate"] in {0.0, 0.001, 0.01}
+    assert test_run.current_env_params["ball_speed"] in {1, 2, 3}
+
+
+def test_observer_skips_scalar_annotation() -> None:
+    """A scalar cmd_args value is fixed; annotating it is a no-op (nothing to sample)."""
+    cmd_args = EnvVarCmdArgs(ball_speed=2)
+    observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42)
+    test_run = SimpleNamespace(step=1, current_env_params={})
+
+    observer.before_step(test_run)
+
+    assert test_run.current_env_params == {}, "a scalar (fixed) cmd_args knob must not be sampled"
 
 
 def test_observer_after_step_is_noop() -> None:
     """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence."""
-    observer = EnvParamsObserver({}, seed=0)
+    observer = EnvParamsObserver({}, EnvVarCmdArgs(), seed=0)
     test_run = SimpleNamespace(step=1, current_env_params={"x": 1})
 
     observer.after_step(test_run, observation=[0.0], reward=0.0)
 
     assert test_run.current_env_params == {"x": 1}
+
+
+# --- TestDefinition.validate_env_params: annotation validity (cross-field with cmd_args) ---
+
+
+def test_env_params_uniform_list_is_accepted() -> None:
+    """A cmd_args candidate list annotated with a bare marker validates (uniform sampling)."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2])
+    assert "ball_speed" in tdef.env_params
+
+
+def test_env_params_weighted_list_is_accepted() -> None:
+    tdef = _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2])
+    assert tdef.env_params["ball_speed"].weights == [0.7, 0.3]
+
+
+def test_env_params_scalar_no_op_is_accepted() -> None:
+    """Annotating a scalar (fixed) knob is tolerated as a no-op marker."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)
+    assert tdef.cmd_args.ball_speed == 2
+
+
+def test_env_params_unknown_key_rejected() -> None:
+    """env_params keys must name real cmd_args fields (the overlay targets cmd_args)."""
+    with pytest.raises(ValidationError, match="not cmd_args fields"):
+        _tdef({"ghost": EnvParamSpec()}, ball_speed=[1, 2])
+
+
+def test_env_params_weights_on_scalar_rejected() -> None:
+    """Weights require a candidate list; a scalar knob cannot carry them."""
+    with pytest.raises(ValidationError, match="not a candidate list"):
+        _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=2)
+
+
+def test_env_params_weights_length_mismatch_rejected() -> None:
+    """Weights must align 1:1 with the cmd_args candidate list."""
+    with pytest.raises(ValidationError, match="weights length"):
+        _tdef({"ball_speed": EnvParamSpec(weights=[0.5, 0.3, 0.2])}, ball_speed=[1, 2])
+
+
+# --- is_dse_job: env-sampled lists are not search dimensions ---
+
+
+def test_is_dse_job_false_for_env_param_only_workload() -> None:
+    """A cmd_args list that is purely env-sampled is not a search dimension -> not a DSE job."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3])
+    assert tdef.is_dse_job is False
+
+
+def test_is_dse_job_true_when_a_real_action_dimension_exists() -> None:
+    """An un-annotated cmd_args list is a real action dimension -> DSE, even alongside env_params."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3], paddle_width=[4, 8])
+    assert tdef.is_dse_job is True
diff --git a/tests/test_handlers.py b/tests/test_handlers.py
index 0cd3c20e1..203a35d67 100644
--- a/tests/test_handlers.py
+++ b/tests/test_handlers.py
@@ -434,20 +434,31 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising(
 
 
 def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None:
-    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    base_tr.test.env_params = {"ball_speed": EnvParamSpec()}
     scenario = TestScenario(name="s", test_runs=[base_tr])
-    with pytest.raises(TestScenarioParsingError, match="declare env_params but are not DSE jobs"):
+    with pytest.raises(TestScenarioParsingError, match="will not sample them"):
         validate_dse_env_params(scenario)
 
 
-def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun) -> None:
-    dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
-    assert dse_tr.is_dse_job is True  # precondition: DSE + env_params is the allowed combination
+def test_validate_dse_env_params_rejects_grid_search(dse_tr: TestRun) -> None:
+    """A DSE job on grid_search exhaustively searches the space, so env_params are noise -> reject."""
+    dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    assert dse_tr.is_dse_job is True  # it IS a DSE job...
+    assert dse_tr.test.agent == "grid_search"  # ...but grid_search ignores env_params
+    with pytest.raises(TestScenarioParsingError, match="will not sample them"):
+        validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))
+
+
+def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun, stub_agent_name: str) -> None:
+    dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    dse_tr.test.agent = stub_agent_name  # a learning agent (not grid_search) consumes env_params
+    assert dse_tr.is_dse_job is True  # precondition: DSE + learning agent + env_params is allowed
     validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))  # no exception == pass
 
 
-def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun) -> None:
-    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+def test_validate_dse_env_params_allows_num_nodes_sweep(base_tr: TestRun, stub_agent_name: str) -> None:
+    base_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    base_tr.test.agent = stub_agent_name
     base_tr.num_nodes = [1, 2]
     assert base_tr.is_dse_job is True  # a num_nodes list sweep makes it DSE, so env_params is allowed
     validate_dse_env_params(TestScenario(name="s", test_runs=[base_tr]))  # no exception == pass
@@ -462,7 +473,7 @@ def test_validate_dse_env_params_allows_non_dse_without_env_params(base_tr: Test
 def test_verify_test_scenarios_rejects_env_params_without_dse(
     base_tr: TestRun, monkeypatch: pytest.MonkeyPatch
 ) -> None:
-    base_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+    base_tr.test.env_params = {"ball_speed": EnvParamSpec()}
     bad = TestScenario(name="s", test_runs=[base_tr])
     monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: [])
     monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {})
@@ -470,8 +481,11 @@ def test_verify_test_scenarios_rejects_env_params_without_dse(
     assert verify_test_scenarios([Path("dummy.toml")], [], [], []) == 1
 
 
-def test_verify_test_scenarios_allows_env_params_with_dse(dse_tr: TestRun, monkeypatch: pytest.MonkeyPatch) -> None:
-    dse_tr.test.env_params = {"drop_rate": EnvParamSpec(values=[0.0, 0.1])}
+def test_verify_test_scenarios_allows_env_params_with_dse(
+    dse_tr: TestRun, stub_agent_name: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    dse_tr.test.agent = stub_agent_name  # learning agent (not grid_search)
     good = TestScenario(name="s", test_runs=[dse_tr])
     monkeypatch.setattr(Parser, "parse_tests", lambda *a, **k: [])
     monkeypatch.setattr(Parser, "parse_hooks", lambda *a, **k: {})

From d59572ea188509c8b07da0172b0310f8e0d194c9 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 23 Jun 2026 20:56:35 -0400
Subject: [PATCH 03/22] fix(configurator): tighten env_params validation, align
 env.csv, fix pyright

- validate_env_params: reject structured (non-leaf) cmd_args targets. The
  observer cannot sample them, yet param_space/is_dse_job exclude the whole
  key, which would silently drop nested action dimensions.
- CloudAIGymEnv.write_trajectory: rebind the env.csv sink to the current
  iteration path before each write, so env.csv stays 1:1 aligned with
  trajectory.csv when the env instance is reused across iterations.
- test_env_params: assert the unknown-field rejection via model_validate so the
  negative test no longer trips pyright's call-arg check (CI Linting fix); add a
  structured-target rejection test.
---
 src/cloudai/configurator/cloudai_gym.py |  3 +++
 src/cloudai/models/workload.py          |  8 +++++++-
 tests/test_env_params.py                | 20 +++++++++++++++++---
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index d16a9c63e..895d60ae1 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -301,6 +301,9 @@ def write_trajectory(self, entry: TrajectoryEntry):
             writer.writerow([entry.step, entry.action, entry.reward, entry.observation])
 
         if self._env_sink is not None:
+            # current_iteration can advance while this env instance is reused, so rebind the sink to
+            # the current iteration's env.csv (alongside trajectory.csv) to keep the two 1:1 aligned.
+            self._env_sink = CsvSink(self._env_csv_path())
             self._env_sink.write(entry.step, entry.env_params)
 
     @property
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index 8683cc8bb..ae9699754 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -234,9 +234,15 @@ def validate_env_params(self) -> Self:
             raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}")
 
         for name, spec in self.env_params.items():
+            value = getattr(self.cmd_args, name, None)
+            if isinstance(value, (dict, BaseModel)):
+                raise ValueError(
+                    f"env_params['{name}'] must target a leaf cmd_args field (scalar or candidate list), "
+                    "not a structured object; param_space/is_dse_job exclude the whole key, which would "
+                    "silently drop nested action dimensions"
+                )
             if spec.weights is None:
                 continue
-            value = getattr(self.cmd_args, name, None)
             if not isinstance(value, list) or len(value) < 2:
                 raise ValueError(
                     f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list "
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 3cb1ddeab..dedca16e8 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -36,7 +36,7 @@
 from typing import List, Union
 
 import pytest
-from pydantic import ValidationError
+from pydantic import BaseModel, ValidationError
 
 from cloudai.configurator.env_params import (
     CsvSink,
@@ -47,17 +47,24 @@
 from cloudai.models.workload import CmdArgs, TestDefinition
 
 
+class BrickGrid(BaseModel):
+    """A structured (non-leaf) cmd_args field, used to prove env_params rejects such targets."""
+
+    rows: int = 3
+
+
 class EnvVarCmdArgs(CmdArgs):
     """cmd_args with top-level, list-capable fields for env_params annotation tests.
 
     ``ball_speed`` is the env-randomized knob; ``paddle_width`` stands in for an
     ordinary action-space dimension. Both accept either a scalar or a candidate
     list, so a candidate list round-trips through model validation/serialization
-    cleanly.
+    cleanly. ``brick_grid`` is a structured field (not a leaf) for negative tests.
     """
 
     ball_speed: Union[int, List[int]] = 1
     paddle_width: Union[int, List[int]] = 8
+    brick_grid: BrickGrid = BrickGrid()
 
 
 class EnvVarTestDefinition(TestDefinition):
@@ -110,7 +117,7 @@ def test_env_param_spec_rejects_non_finite_weights(bad: float) -> None:
 def test_env_param_spec_rejects_unknown_fields() -> None:
     """Candidate values live in cmd_args, never in the annotation (no ``values`` key)."""
     with pytest.raises(ValidationError):
-        EnvParamSpec(values=[1, 2])
+        EnvParamSpec.model_validate({"values": [1, 2]})
 
 
 # --- Sampler: draws from candidate lists resolved out of cmd_args ---
@@ -234,6 +241,13 @@ def test_env_params_weights_length_mismatch_rejected() -> None:
         _tdef({"ball_speed": EnvParamSpec(weights=[0.5, 0.3, 0.2])}, ball_speed=[1, 2])
 
 
+def test_env_params_structured_target_rejected() -> None:
+    """A structured (non-leaf) cmd_args target is rejected: the observer can't sample it, yet
+    param_space/is_dse_job exclude the whole key, silently dropping nested action dimensions."""
+    with pytest.raises(ValidationError, match="must target a leaf cmd_args field"):
+        _tdef({"brick_grid": EnvParamSpec()})
+
+
 # --- is_dse_job: env-sampled lists are not search dimensions ---
 
 

From ed42f9d0ad12f632bcfcb7c14710dfa0af81c971 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Wed, 24 Jun 2026 06:32:33 -0400
Subject: [PATCH 04/22] fix(workload): reject empty env_params candidate lists
 at build time

An unweighted env_params spec skipped the candidate-list check, so an empty cmd_args list
(e.g. ball_speed = []) passed validation and only failed later in EnvParamsSampler.sample()
via rng.choice([]) (IndexError). Guard against an empty candidate list in validate_env_params
so the error surfaces at TestDefinition build time. Addresses CodeRabbit feedback.
---
 src/cloudai/models/workload.py | 5 +++++
 tests/test_env_params.py       | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index ae9699754..d952efbd5 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -241,6 +241,11 @@ def validate_env_params(self) -> Self:
                     "not a structured object; param_space/is_dse_job exclude the whole key, which would "
                     "silently drop nested action dimensions"
                 )
+            if isinstance(value, list) and not value:
+                raise ValueError(
+                    f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; "
+                    "provide at least one candidate (the sampler would otherwise fail on an empty draw)"
+                )
             if spec.weights is None:
                 continue
             if not isinstance(value, list) or len(value) < 2:
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index dedca16e8..726a7ed07 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -248,6 +248,13 @@ def test_env_params_structured_target_rejected() -> None:
         _tdef({"brick_grid": EnvParamSpec()})
 
 
+def test_env_params_empty_candidate_list_rejected() -> None:
+    """An empty candidate list is rejected at build time: an unweighted spec would otherwise skip
+    validation and let the sampler fail later on an empty draw (rng.choice([]))."""
+    with pytest.raises(ValidationError, match="empty candidate list"):
+        _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[])
+
+
 # --- is_dse_job: env-sampled lists are not search dimensions ---
 
 

From 9412c7bae2a0308f2839570a04659c880934e703 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Wed, 24 Jun 2026 18:23:31 -0400
Subject: [PATCH 05/22] refactor(configurator): model env_params as cohesive
 EnvParam/EnvParams value objects

Replace the EnvParamsSampler class and the StepObserver/EnvParamsObserver
indirection with two frozen dataclasses: EnvParam (one resolved knob: candidates,
optional weights, single draw) and EnvParams (per-run knobs + seed, built via
from_test, sampled per trial). The sampling RNG lives in the env: step() draws
this trial's values and hands concrete values to TestRun.apply_params_set(action,
env_params=...), which overlays action and sample through one deterministic path.

Centralize the cmd_args -> env_params lookup in TestDefinition.is_env_sampled and
access current_env_params directly. Expand EnvParam/EnvParams unit tests to cover
draw, from_test, sample, and immutability.
---
 src/cloudai/_core/test_scenario.py      |  33 +++--
 src/cloudai/configurator/cloudai_gym.py |  56 ++------
 src/cloudai/configurator/env_params.py  | 145 ++++++++-----------
 src/cloudai/models/workload.py          |   8 +-
 tests/test_cloudaigym.py                |  38 ++---
 tests/test_env_params.py                | 180 +++++++++++++++++++-----
 6 files changed, 260 insertions(+), 200 deletions(-)

diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py
index 26f22e7e0..aa03cc591 100644
--- a/src/cloudai/_core/test_scenario.py
+++ b/src/cloudai/_core/test_scenario.py
@@ -159,7 +159,7 @@ def param_space(self) -> dict[str, Any]:
                 for key, value in cmd_args_dict.items()
                 if isinstance(value, list)
                 and not self.test.is_dse_excluded_arg(key)
-                and key.split(".", 1)[0] not in self.test.env_params
+                and not self.test.is_env_sampled(key)
             },
             **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)},
         }
@@ -187,20 +187,30 @@ def all_combinations(self) -> list[dict[str, Any]]:
 
         return all_combinations
 
-    def apply_params_set(self, action: dict[str, Any]) -> "TestRun":
+    def apply_params_set(self, action: dict[str, Any], env_params: dict[str, Any] | None = None) -> "TestRun":
         tdef = self.test.model_copy(deep=True)
-        for key, value in action.items():
+
+        def _apply(key: str, value: Any) -> None:
             if key.startswith("extra_env_vars."):
                 tdef.extra_env_vars[key[len("extra_env_vars.") :]] = value
+                return
+            attrs = key.split(".")
+            obj = tdef.cmd_args
+            for attr in attrs[:-1]:
+                obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr)
+            if isinstance(obj, dict):
+                obj[attrs[-1]] = value
             else:
-                attrs = key.split(".")
-                obj = tdef.cmd_args
-                for attr in attrs[:-1]:
-                    obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr)
-                if isinstance(obj, dict):
-                    obj[attrs[-1]] = value
-                else:
-                    setattr(obj, attrs[-1], value)
+                setattr(obj, attrs[-1], value)
+
+        # The agent's chosen action and this trial's sampled env_params are both just concrete
+        # values written onto cmd_args, applied through the one path here. env_params keys name
+        # cmd_args fields; sampling (the RNG) happens in the env before this call, so this method
+        # stays deterministic.
+        for key, value in action.items():
+            _apply(key, value)
+        for key, value in (env_params or {}).items():
+            _apply(key, value)
 
         type(tdef)(**tdef.model_dump())  # trigger validation
 
@@ -208,6 +218,7 @@ def apply_params_set(self, action: dict[str, Any]) -> "TestRun":
         new_tr.test = tdef
         if "NUM_NODES" in action:
             new_tr.num_nodes = action["NUM_NODES"]
+        new_tr.current_env_params = dict(env_params or {})
         return new_tr
 
 
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index 895d60ae1..c85ab0263 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -19,14 +19,14 @@
 import dataclasses
 import logging
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
 
 from cloudai.core import METRIC_ERROR, BaseRunner, Registry, TestRun
 from cloudai.util.lazy_imports import lazy
 
 from .base_agent import RewardOverrides
 from .base_gym import BaseGym
-from .env_params import CsvSink, EnvParamsObserver, EnvParamsSink, StepObserver
+from .env_params import CsvSink, EnvParams, EnvParamsSink
 
 
 @dataclasses.dataclass(frozen=True)
@@ -63,24 +63,12 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid
         self.max_steps = test_run.test.agent_steps
         self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function)
         self.trajectory: dict[int, list[TrajectoryEntry]] = {}
-        self._env_sink: EnvParamsSink | None = None
-        self.observers: List[StepObserver] = self._build_observers()
+        # Resolve env_params once, at formulation: None unless the workload declares something to
+        # sample, so a non-DR run carries no env-params state and pays zero per-step overhead.
+        self.params: EnvParams | None = EnvParams.from_test(test_run.test)
+        self._env_sink: EnvParamsSink | None = CsvSink(self._env_csv_path()) if self.params else None
         super().__init__()
 
-    def _build_observers(self) -> List[StepObserver]:
-        """
-        Construct the per-step observers implied by the TestDefinition.
-
-        Workloads opt in to env_params via a TOML ``[env_params.<name>]`` block;
-        an empty mapping yields no observers and zero overhead.
-        """
-        observers: List[StepObserver] = []
-        if self.test_run.test.env_params:
-            seed = int((self.test_run.test.agent_config or {}).get("random_seed", 0))
-            self._env_sink = CsvSink(self._env_csv_path())
-            observers.append(EnvParamsObserver(self.test_run.test.env_params, self.test_run.test.cmd_args, seed))
-        return observers
-
     def _env_csv_path(self) -> Path:
         """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them."""
         return self.trajectory_file_path.parent / "env.csv"
@@ -141,23 +129,11 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
                 - info (dict): Additional info for debugging.
         """
         self.test_run.increment_step()
-        self.test_run = self.test_run.apply_params_set(action)
-
-        for observer in self.observers:
-            observer.before_step(self.test_run)
-
-        # Overlay this trial's sampled env_params onto cmd_args so the workload actually
-        # runs with the sampled values - the env-side twin of apply_params_set(action).
-        # Sampling, persistence (env.csv), and the trajectory cache key are handled
-        # separately; this is the single, workload-agnostic injection point. Keys are
-        # validated to be cmd_args fields at TestDefinition build time; we re-filter to
-        # those fields here so a programmatically-built run can't inject unknown attrs.
-        if self.test_run.current_env_params:
-            cmd_args = self.test_run.test.cmd_args
-            fields = getattr(type(cmd_args), "model_fields", {})
-            overlay = {k: v for k, v in self.test_run.current_env_params.items() if k in fields}
-            if overlay:
-                self.test_run.test.cmd_args = cmd_args.model_copy(update=overlay)
+        # Sample this trial's env_params (the RNG lives here, in the env), then apply the agent's
+        # action and the sample together: apply_params_set overlays both onto cmd_args and records
+        # current_env_params, so the workload runs with the sampled values and the cache key sees them.
+        sampled_env_params = self.params.sample(self.test_run.step) if self.params else {}
+        self.test_run = self.test_run.apply_params_set(action, env_params=sampled_env_params)
 
         cached_result = self.get_cached_trajectory_result(action)
         if cached_result is not None:
@@ -175,8 +151,6 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
                     env_params=dict(self.test_run.current_env_params),
                 )
             )
-            for observer in self.observers:
-                observer.after_step(self.test_run, cached_result.observation, cached_result.reward)
             return cached_result.observation, cached_result.reward, False, {}
 
         if not self.test_run.test.constraint_check(self.test_run, self.runner.system):
@@ -221,9 +195,6 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
             )
         )
 
-        for observer in self.observers:
-            observer.after_step(self.test_run, observation, reward)
-
         return observation, reward, False, {}
 
     def render(self, mode: str = "human"):
@@ -324,12 +295,11 @@ def get_cached_trajectory_result(self, action: Any) -> TrajectoryEntry | None:
         env_params on both sides is the back-compat path for workloads that
         do not declare any ``[env_params.*]`` block.
         """
-        current_env_params = getattr(self.test_run, "current_env_params", {}) or {}
+        current_env_params = self.test_run.current_env_params
         for entry in self.current_trajectory:
             if not self._values_match_exact(entry.action, action):
                 continue
-            entry_env = getattr(entry, "env_params", {}) or {}
-            if self._values_match_exact(entry_env, current_env_params):
+            if self._values_match_exact(entry.env_params, current_env_params):
                 return entry
 
         return None
diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index ddd26758e..b727212ef 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -21,22 +21,14 @@
 ``cmd_args`` (a plain list - the single source of truth, exactly like an
 action-space dimension) but which the environment *samples* per trial rather
 than letting the agent search it. ``env_params`` is the annotation that marks
-such a field: ``env_params.<name>`` reclassifies ``cmd_args.<name>`` from
-action-space to env-sampled and carries only *how* to sample (optional
-``weights``), never the values. The policy learns a robust mapping under that
-variation; the knob never enters the agent's action space.
-
-This module owns the annotation schema (``EnvParamSpec``), the deterministic
-sampler (``EnvParamsSampler``), the persistence interface
-(``EnvParamsSink`` + ``CsvSink``) and the per-step observer
-(``EnvParamsObserver``). ``CloudAIGymEnv`` consumes these directly so the
-artifacts (``env.csv``) and the cache key align 1:1 with ``trajectory.csv``
-regardless of agent or workload.
+such a field; it carries only *how* to sample (optional ``weights``), never the
+values, and the knob never enters the agent's action space.
 """
 
 from __future__ import annotations
 
 import csv
+import dataclasses
 import math
 import random
 from pathlib import Path
@@ -78,41 +70,67 @@ def _validate_weights(self) -> Self:
         return self
 
 
-class EnvParamsSampler:
+@dataclasses.dataclass(frozen=True)
+class EnvParam:
     """
-    Per-trial categorical sampler over candidate lists.
+    One env-sampled knob, resolved from cmd_args: its candidate values and optional weights.
 
-    Candidates are resolved from ``cmd_args`` by the caller and passed in as
-    ``{name: [v0, v1, ...]}``; optional ``weights`` mirror that mapping.
+    Weights (when present) are positional, aligned 1:1 with ``candidates``; ``None`` means
+    uniform sampling. Keeping the two together makes each knob a self-contained draw.
+    """
+
+    candidates: List[Any]
+    weights: Optional[List[float]] = None
+
+    def draw(self, rng: random.Random) -> Any:
+        if self.weights is not None:
+            return rng.choices(self.candidates, weights=self.weights, k=1)[0]
+        return rng.choice(self.candidates)
 
-    Determinism contract: ``sample(t)`` returns the same dict on every call
-    (across processes) for the same ``(seed, candidates, t)``.
 
-    Independence contract: each parameter uses an RNG seeded by
-    ``f"{seed}:{name}:{trial}"`` so adding or removing an unrelated
-    parameter does not perturb existing parameters' draw sequences.
+@dataclasses.dataclass(frozen=True)
+class EnvParams:
     """
+    Resolved env-parameter sampling state for one run.
 
-    def __init__(
-        self,
-        candidates: Dict[str, List[Any]],
-        seed: int,
-        weights: Optional[Dict[str, List[float]]] = None,
-    ) -> None:
-        self._candidates = candidates
-        self._weights = weights or {}
-        self._seed = seed
+    Built via ``from_test`` only when a workload actually declares list-valued env_params;
+    otherwise ``from_test`` returns ``None`` and the env carries no env-params state at all.
+    Owns the per-parameter :class:`EnvParam` draws (resolved from ``cmd_args``, the single source
+    of truth) and the seed, and draws one value per parameter per trial.
+    """
+
+    params: Dict[str, EnvParam]
+    seed: int
+
+    @classmethod
+    def from_test(cls, test: Any) -> Optional["EnvParams"]:
+        """
+        Resolve a TestDefinition's env_params annotations, or ``None`` if nothing is sampled.
+
+        A field whose ``cmd_args`` value is a scalar is fixed: the annotation is a no-op and
+        the field is skipped. With no list-valued field left, there is nothing to sample and
+        we return ``None`` so callers stay on the zero-overhead path.
+        """
+        params: Dict[str, EnvParam] = {}
+        for name, spec in test.env_params.items():
+            value = getattr(test.cmd_args, name, None)
+            if not isinstance(value, list):
+                continue
+            params[name] = EnvParam(candidates=value, weights=spec.weights)
+        if not params:
+            return None
+        seed = int((test.agent_config or {}).get("random_seed", 0))
+        return cls(params=params, seed=seed)
 
     def sample(self, trial: int) -> Dict[str, Any]:
-        out: Dict[str, Any] = {}
-        for name, choices in self._candidates.items():
-            rng = random.Random(f"{self._seed}:{name}:{trial}")
-            weights = self._weights.get(name)
-            if weights is not None:
-                out[name] = rng.choices(choices, weights=weights, k=1)[0]
-            else:
-                out[name] = rng.choice(choices)
-        return out
+        """
+        Draw this trial's value for each parameter.
+
+        Determinism: the same ``(seed, name, trial)`` yields the same draw across processes.
+        Independence: each parameter's RNG is seeded ``f"{seed}:{name}:{trial}"`` so adding or
+        removing one parameter never perturbs the others' draw sequences.
+        """
+        return {name: param.draw(random.Random(f"{self.seed}:{name}:{trial}")) for name, param in self.params.items()}
 
 
 @runtime_checkable
@@ -146,54 +164,3 @@ def write(self, step: int, sample: Dict[str, Any]) -> None:
             if new_file:
                 writer.writerow(("step", "env"))
             writer.writerow([step, sample])
-
-
-@runtime_checkable
-class StepObserver(Protocol):
-    """
-    Hook fired by ``CloudAIGymEnv.step()`` around each trial.
-
-    ``before_step`` runs before the cache lookup and before any workload
-    execution. ``after_step`` runs after the trajectory row is written.
-    """
-
-    def before_step(self, test_run: Any) -> None: ...
-
-    def after_step(self, test_run: Any, observation: list, reward: float) -> None: ...
-
-
-class EnvParamsObserver:
-    """
-    Sample env_params per step and stash them for the cache and the workload.
-
-    Construction: resolves each annotated knob's candidate list from
-    ``cmd_args`` (the single source of truth) once, up front. A knob whose
-    ``cmd_args`` value is a scalar (not a list) is fixed - its annotation is a
-    no-op and it is skipped, so nothing is sampled or stashed for it.
-
-    Pre-step: samples the resolved candidates for ``test_run.step`` and stashes
-    the result on ``test_run.current_env_params`` so the cache key and the
-    cmd_args overlay both see it. Persistence is owned by
-    ``CloudAIGymEnv.write_trajectory``, which sinks ``trajectory.csv`` and the
-    ``env.csv`` projection from the single ``TrajectoryEntry`` - so a trial that
-    writes no trajectory row writes no env.csv row either, keeping the two files
-    1:1 step-aligned. Post-step: no-op.
-    """
-
-    def __init__(self, env_params: Dict[str, EnvParamSpec], cmd_args: Any, seed: int) -> None:
-        candidates: Dict[str, List[Any]] = {}
-        weights: Dict[str, List[float]] = {}
-        for name, spec in env_params.items():
-            value = getattr(cmd_args, name, None)
-            if not isinstance(value, list):
-                continue  # scalar cmd_args knob is fixed; the annotation is a no-op
-            candidates[name] = value
-            if spec.weights is not None:
-                weights[name] = spec.weights
-        self._sampler = EnvParamsSampler(candidates, seed=seed, weights=weights)
-
-    def before_step(self, test_run: Any) -> None:
-        test_run.current_env_params = self._sampler.sample(test_run.step)
-
-    def after_step(self, test_run: Any, observation: list, reward: float) -> None:
-        del test_run, observation, reward  # persistence handled by CloudAIGymEnv.write_trajectory
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index d952efbd5..eaa971aa6 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -144,6 +144,10 @@ def installables(self) -> list[Installable]:
     def constraint_check(self, tr: TestRun, system: Optional[System]) -> bool:
         return True
 
+    def is_env_sampled(self, cmd_args_path: str) -> bool:
+        """Whether a cmd_args field is env-sampled (env draws it per trial, not the agent)."""
+        return cmd_args_path in self.env_params
+
     @property
     def is_dse_job(self) -> bool:
         def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) -> bool:
@@ -152,9 +156,7 @@ def check_dict(d: dict, parent_key: str = "", skip_env_params: bool = False) ->
                     path = f"{parent_key}.{key}" if parent_key else key
                     if self.is_dse_excluded_arg(path):
                         continue
-                    # env_params lists are sampled by the env, not searched by the agent, so they
-                    # are not action-space dimensions and must not make a run count as DSE.
-                    if skip_env_params and path.split(".", 1)[0] in self.env_params:
+                    if skip_env_params and self.is_env_sampled(path):
                         continue
                     if isinstance(value, list) or (
                         isinstance(value, dict) and check_dict(value, path, skip_env_params)
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
index 975efbe48..8a3d0fb9e 100644
--- a/tests/test_cloudaigym.py
+++ b/tests/test_cloudaigym.py
@@ -518,23 +518,30 @@ def test_cache_hit_when_neither_has_env_params(base_tr: TestRun, tmp_path: Path)
     assert result.step == 1
 
 
-def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinition, tmp_path: Path) -> None:
-    """Integration: env.step() with same action but different env_params re-runs the workload.
+def test_step_reruns_workload_when_env_params_change(tmp_path: Path) -> None:
+    """Integration: two env.step() calls with the same action but different sampled env_params re-run.
 
     Counterpart to test_cache_miss_when_env_params_differ but exercising the
-    full step() flow: increment_step -> apply_params_set -> cache lookup ->
-    runner.run() -> write_trajectory.
+    full step() flow: increment_step -> sample env_params -> apply_params_set ->
+    cache lookup -> runner.run() -> write_trajectory. With seed 42 the sampler
+    draws ball_speed=3 then ball_speed=1 on the two consecutive trials, so the
+    cache key differs and the workload must re-run both times.
     """
-    tdef = nemorun.model_copy(deep=True)
-    tdef.cmd_args.data.global_batch_size = 8
-    tdef.agent_metrics = ["default"]
+    tdef = EnvVarTestDefinition(
+        name="dr",
+        description="dr",
+        test_template_name="dr_template",
+        cmd_args=EnvVarCmdArgs(ball_speed=[1, 2, 3]),
+        env_params={"ball_speed": EnvParamSpec()},
+        agent_metrics=["default"],
+        agent_config={"random_seed": 42},
+    )
     test_run = TestRun(
         name="dr_tr",
         test=tdef,
         num_nodes=1,
         nodes=[],
         output_path=tmp_path / "out" / "dr_tr" / "0",
-        reports={NeMoRunReportGenerationStrategy},
     )
     test_scenario = TestScenario(name="dr_scenario", test_runs=[test_run])
 
@@ -548,19 +555,16 @@ def test_step_reruns_workload_when_env_params_change(nemorun: NeMoRunTestDefinit
     runner.get_job_output_path.return_value = test_run.output_path
 
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
-    action = {"trainer.max_steps": 1000}
+    action = {"paddle_width": 4}
     fake_obs = iter([[100.0], [50.0]])
 
     with patch.object(env, "get_observation", side_effect=lambda _action: next(fake_obs)):
         env.test_run.step = 0
-        env.test_run.current_env_params = {"ball_speed": 1}
-        obs1, _r1, *_ = env.step(action)
-
-        env.test_run.current_env_params = {"ball_speed": 2}
-        obs2, _r2, *_ = env.step(action)
+        obs1, _r1, *_ = env.step(action)  # samples ball_speed=3
+        obs2, _r2, *_ = env.step(action)  # samples ball_speed=1
 
     assert runner.run.call_count == 2, (
-        "Different env_params between two env.step() calls with the same action "
+        "Different sampled env_params between two env.step() calls with the same action "
         "must trigger a workload re-run; the cache lookup must miss."
     )
     assert obs1 != obs2, "fresh workload run should produce a fresh observation"
@@ -722,7 +726,7 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path:
     runner.get_job_output_path.return_value = test_run.output_path
 
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
-    assert env.observers, "TestDefinition.env_params declared -> observer must be built"
+    assert env.params is not None, "TestDefinition.env_params declared -> EnvParams must be built"
 
     expected_sample = {"ball_speed": _random.Random("42:ball_speed:1").choice([1, 2, 3])}
     action = {"paddle_width": 4}
@@ -838,5 +842,5 @@ def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition,
 
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
 
-    assert env.observers == [], "no env_params declared -> no per-step observers"
+    assert env.params is None, "no env_params declared -> no EnvParams object"
     assert not env._env_csv_path().exists()
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 726a7ed07..e0b08f6cb 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -31,8 +31,9 @@
 
 from __future__ import annotations
 
+import dataclasses
+import random
 from pathlib import Path
-from types import SimpleNamespace
 from typing import List, Union
 
 import pytest
@@ -40,9 +41,9 @@
 
 from cloudai.configurator.env_params import (
     CsvSink,
-    EnvParamsObserver,
+    EnvParam,
+    EnvParams,
     EnvParamSpec,
-    EnvParamsSampler,
 )
 from cloudai.models.workload import CmdArgs, TestDefinition
 
@@ -120,31 +121,84 @@ def test_env_param_spec_rejects_unknown_fields() -> None:
         EnvParamSpec.model_validate({"values": [1, 2]})
 
 
-# --- Sampler: draws from candidate lists resolved out of cmd_args ---
+# --- EnvParam: one resolved knob - candidate values, optional weights, a single draw ---
+
+
+def test_env_param_defaults_to_unweighted() -> None:
+    """A knob built without weights samples uniformly (weights is None)."""
+    assert EnvParam(candidates=[1, 2, 3]).weights is None
+
+
+def test_env_param_draw_returns_a_candidate() -> None:
+    """draw() always yields one of the knob's own candidate values."""
+    knob = EnvParam(candidates=[1, 2, 3])
+    assert all(knob.draw(random.Random(s)) in {1, 2, 3} for s in range(50))
+
+
+def test_env_param_draw_is_reproducible_for_a_given_rng() -> None:
+    """draw() consumes the caller's RNG (no internal seeding), so equal RNG state yields equal draws."""
+    knob = EnvParam(candidates=[10, 20, 30, 40])
+    assert knob.draw(random.Random(123)) == knob.draw(random.Random(123))
+
+
+def test_env_param_draw_honors_degenerate_weights() -> None:
+    """A degenerate weight ([1, 0]) collapses the draw onto the first candidate, whatever the RNG."""
+    knob = EnvParam(candidates=[1, 2], weights=[1.0, 0.0])
+    assert all(knob.draw(random.Random(s)) == 1 for s in range(50))
+
+
+def test_env_param_is_immutable() -> None:
+    """Frozen value object: a resolved knob cannot be mutated after construction."""
+    knob = EnvParam(candidates=[1, 2, 3])
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        knob.candidates = [9]  # pyright: ignore[reportAttributeAccessIssue]
+
+
+# --- EnvParams.sample: draws from candidate lists resolved out of cmd_args ---
 
 
 def test_sampler_is_deterministic_across_calls() -> None:
-    candidates = {"ball_speed": [1, 2, 3]}
-    a = EnvParamsSampler(candidates, seed=42)
-    b = EnvParamsSampler(candidates, seed=42)
-    seq_a = [a.sample(t) for t in range(1, 6)]
-    seq_b = [b.sample(t) for t in range(1, 6)]
-    assert seq_a == seq_b, "same (seed, trial) must produce the same draw across instances"
+    env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2, 3])}, seed=42)
+    seq_a = [env_params.sample(t) for t in range(1, 6)]
+    seq_b = [env_params.sample(t) for t in range(1, 6)]
+    assert seq_a == seq_b, "same (seed, trial) must produce the same draw across calls"
 
 
 def test_sampler_each_param_is_independent() -> None:
     """Adding an unrelated parameter must not perturb existing parameters' draws."""
-    base = {"ball_speed": [1, 2, 3]}
-    extended = {"ball_speed": [1, 2, 3], "brick_rows": [3, 4, 5]}
-    a = [EnvParamsSampler(base, seed=7).sample(t)["ball_speed"] for t in range(1, 11)]
-    b = [EnvParamsSampler(extended, seed=7).sample(t)["ball_speed"] for t in range(1, 11)]
+    base = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2, 3])}, seed=7)
+    extended = EnvParams(
+        params={"ball_speed": EnvParam(candidates=[1, 2, 3]), "brick_rows": EnvParam(candidates=[3, 4, 5])},
+        seed=7,
+    )
+    a = [base.sample(t)["ball_speed"] for t in range(1, 11)]
+    b = [extended.sample(t)["ball_speed"] for t in range(1, 11)]
     assert a == b, "per-parameter RNG seeding must isolate parameters from each other"
 
 
 def test_sampler_honors_weights() -> None:
     """A degenerate weight ([1, 0]) must always pick the first candidate."""
-    sampler = EnvParamsSampler({"ball_speed": [1, 2]}, seed=1, weights={"ball_speed": [1.0, 0.0]})
-    assert all(sampler.sample(t)["ball_speed"] == 1 for t in range(1, 20))
+    env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2], weights=[1.0, 0.0])}, seed=1)
+    assert all(env_params.sample(t)["ball_speed"] == 1 for t in range(1, 20))
+
+
+def test_sample_covers_all_declared_params() -> None:
+    """sample() returns exactly one value per declared knob, each drawn from that knob's candidates."""
+    env_params = EnvParams(
+        params={"ball_speed": EnvParam(candidates=[1, 2]), "paddle_width": EnvParam(candidates=[4, 8])},
+        seed=3,
+    )
+    drawn = env_params.sample(5)
+    assert set(drawn) == {"ball_speed", "paddle_width"}
+    assert drawn["ball_speed"] in {1, 2}
+    assert drawn["paddle_width"] in {4, 8}
+
+
+def test_env_params_is_immutable() -> None:
+    """Frozen value object: resolved sampling state cannot be mutated after construction."""
+    env_params = EnvParams(params={"ball_speed": EnvParam(candidates=[1, 2])}, seed=0)
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        env_params.seed = 1  # pyright: ignore[reportAttributeAccessIssue]
 
 
 # --- CsvSink: unchanged persistence contract ---
@@ -168,39 +222,91 @@ def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None:
     assert contents[2].startswith("2,")
 
 
-# --- Observer: resolves candidates from cmd_args, stashes the per-trial sample ---
+# --- EnvParams.from_test: resolves candidate lists from cmd_args, once at env formulation ---
+
 
+def test_env_params_from_test_resolves_list_candidate() -> None:
+    """A list-valued cmd_args field resolves to its candidate list; sampling then draws from it."""
+    env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3]))
 
-def test_observer_samples_from_cmd_args_and_stashes() -> None:
-    """before_step samples a candidate that lives in cmd_args; persistence is the env's job."""
-    cmd_args = EnvVarCmdArgs(ball_speed=[1, 2, 3])
-    observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42)
-    test_run = SimpleNamespace(step=3, current_env_params={})
+    assert env_params is not None
+    assert env_params.params == {"ball_speed": EnvParam(candidates=[1, 2, 3], weights=None)}
+    assert env_params.sample(3)["ball_speed"] in {1, 2, 3}
 
-    observer.before_step(test_run)
 
-    assert test_run.current_env_params["ball_speed"] in {1, 2, 3}
+def test_env_params_from_test_carries_weights() -> None:
+    """Weighted annotations propagate their weights alongside the resolved candidates."""
+    env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2]))
 
+    assert env_params is not None
+    assert env_params.params["ball_speed"].weights == [0.7, 0.3]
 
-def test_observer_skips_scalar_annotation() -> None:
-    """A scalar cmd_args value is fixed; annotating it is a no-op (nothing to sample)."""
-    cmd_args = EnvVarCmdArgs(ball_speed=2)
-    observer = EnvParamsObserver({"ball_speed": EnvParamSpec()}, cmd_args, seed=42)
-    test_run = SimpleNamespace(step=1, current_env_params={})
 
-    observer.before_step(test_run)
+def test_env_params_from_test_reads_seed_from_agent_config() -> None:
+    """The sampler seed comes from agent_config.random_seed so a run is reproducible end to end."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3])
+    tdef.agent_config = {"random_seed": 42}
+
+    env_params = EnvParams.from_test(tdef)
+
+    assert env_params is not None and env_params.seed == 42
+
+
+def test_env_params_from_test_defaults_seed_to_zero_without_agent_config() -> None:
+    """No agent_config (the default) -> seed 0, so a declared-but-unseeded run still samples."""
+    env_params = EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3]))
+
+    assert env_params is not None and env_params.seed == 0
+
+
+def test_env_params_from_test_defaults_seed_to_zero_when_key_absent() -> None:
+    """agent_config present but without random_seed still falls back to seed 0."""
+    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3])
+    tdef.agent_config = {"other": 1}
+
+    env_params = EnvParams.from_test(tdef)
+
+    assert env_params is not None and env_params.seed == 0
+
+
+def test_env_params_from_test_resolves_multiple_params() -> None:
+    """Every list-valued annotation becomes its own knob, candidates resolved from cmd_args."""
+    env_params = EnvParams.from_test(
+        _tdef(
+            {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()},
+            ball_speed=[1, 2],
+            paddle_width=[4, 8],
+        )
+    )
+
+    assert env_params is not None
+    assert set(env_params.params) == {"ball_speed", "paddle_width"}
+    assert env_params.params["ball_speed"].candidates == [1, 2]
+    assert env_params.params["paddle_width"].candidates == [4, 8]
+
+
+def test_env_params_from_test_skips_scalar_keeps_list() -> None:
+    """A mix of scalar and list annotations yields only the list-valued field as a knob."""
+    env_params = EnvParams.from_test(
+        _tdef(
+            {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()},
+            ball_speed=[1, 2, 3],
+            paddle_width=8,
+        )
+    )
 
-    assert test_run.current_env_params == {}, "a scalar (fixed) cmd_args knob must not be sampled"
+    assert env_params is not None
+    assert set(env_params.params) == {"ball_speed"}
 
 
-def test_observer_after_step_is_noop() -> None:
-    """after_step must not mutate test_run; CloudAIGymEnv.write_trajectory owns persistence."""
-    observer = EnvParamsObserver({}, EnvVarCmdArgs(), seed=0)
-    test_run = SimpleNamespace(step=1, current_env_params={"x": 1})
+def test_env_params_from_test_none_when_only_scalar_annotation() -> None:
+    """A scalar cmd_args value is fixed; with nothing list-valued to sample, from_test is None."""
+    assert EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)) is None
 
-    observer.after_step(test_run, observation=[0.0], reward=0.0)
 
-    assert test_run.current_env_params == {"x": 1}
+def test_env_params_from_test_none_when_no_env_params() -> None:
+    """No annotations declared -> no EnvParams object (the zero-overhead path)."""
+    assert EnvParams.from_test(_tdef({})) is None
 
 
 # --- TestDefinition.validate_env_params: annotation validity (cross-field with cmd_args) ---

From af3b6a811f6080cd9c6ddbfe3dd6c48f11a25807 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Thu, 25 Jun 2026 12:01:22 -0400
Subject: [PATCH 06/22] refactor(configurator): collapse env_params sink into
 one concrete class

Drop the EnvParamsSink Protocol + CsvSink pair (and runtime_checkable) for a
single concrete EnvParamsSink, built unconditionally in CloudAIGymEnv. The
sink is now stateless: write() takes the record path per call and skips empty
samples, so non-DR runs write nothing and write_trajectory needs no branch.

Derive both records from a new iteration_dir property and expose the env
record via the env_params_record_path property (was _env_csv_path), keeping
env.csv and trajectory.csv step-aligned without coupling the name to CSV.
---
 src/cloudai/configurator/cloudai_gym.py | 22 ++++++++++++----------
 src/cloudai/configurator/env_params.py  | 25 +++++++++----------------
 tests/test_cloudaigym.py                |  8 ++++----
 tests/test_env_params.py                | 22 ++++++++++++----------
 4 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index c85ab0263..ac9d5cd34 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -26,7 +26,7 @@
 
 from .base_agent import RewardOverrides
 from .base_gym import BaseGym
-from .env_params import CsvSink, EnvParams, EnvParamsSink
+from .env_params import EnvParams, EnvParamsSink
 
 
 @dataclasses.dataclass(frozen=True)
@@ -66,12 +66,13 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid
         # Resolve env_params once, at formulation: None unless the workload declares something to
         # sample, so a non-DR run carries no env-params state and pays zero per-step overhead.
         self.params: EnvParams | None = EnvParams.from_test(test_run.test)
-        self._env_sink: EnvParamsSink | None = CsvSink(self._env_csv_path()) if self.params else None
+        self.env_params_sink = EnvParamsSink()
         super().__init__()
 
-    def _env_csv_path(self) -> Path:
+    @property
+    def env_params_record_path(self) -> Path:
         """``env.csv`` lives alongside ``trajectory.csv`` so a plain ``merge`` joins them."""
-        return self.trajectory_file_path.parent / "env.csv"
+        return self.iteration_dir / "env.csv"
 
     def define_action_space(self) -> Dict[str, list[Any]]:
         return self.test_run.param_space
@@ -271,15 +272,16 @@ def write_trajectory(self, entry: TrajectoryEntry):
                 writer.writerow(["step", "action", "reward", "observation"])
             writer.writerow([entry.step, entry.action, entry.reward, entry.observation])
 
-        if self._env_sink is not None:
-            # current_iteration can advance while this env instance is reused, so rebind the sink to
-            # the current iteration's env.csv (alongside trajectory.csv) to keep the two 1:1 aligned.
-            self._env_sink = CsvSink(self._env_csv_path())
-            self._env_sink.write(entry.step, entry.env_params)
+        self.env_params_sink.write(self.env_params_record_path, entry.step, entry.env_params)
+
+    @property
+    def iteration_dir(self) -> Path:
+        """Per-iteration output dir; trajectory.csv and env.csv both live here, step-aligned."""
+        return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}"
 
     @property
     def trajectory_file_path(self) -> Path:
-        return self.runner.scenario_root / self.test_run.name / f"{self.test_run.current_iteration}" / "trajectory.csv"
+        return self.iteration_dir / "trajectory.csv"
 
     @property
     def current_trajectory(self) -> list[TrajectoryEntry]:
diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index b727212ef..c23764cc0 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -32,7 +32,7 @@
 import math
 import random
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 from typing_extensions import Self
@@ -133,33 +133,26 @@ def sample(self, trial: int) -> Dict[str, Any]:
         return {name: param.draw(random.Random(f"{self.seed}:{name}:{trial}")) for name, param in self.params.items()}
 
 
-@runtime_checkable
-class EnvParamsSink(Protocol):
-    """Persist one trial's env_params sample; empty samples must be no-ops."""
-
-    def write(self, step: int, sample: Dict[str, Any]) -> None: ...
-
-
-class CsvSink:
+class EnvParamsSink:
     """
     Append per-trial env_params samples to a step-aligned CSV.
 
     The CSV mirrors how ``trajectory.csv`` serialises its ``action`` column
     (one row per env.step(), sample dict stringified in a single cell) so the
     two files align 1:1 on ``step`` and a plain ``merge`` joins them.
-    """
 
-    def __init__(self, path: Path) -> None:
-        self._path = path
+    Empty samples are skipped, so a run without env_params writes nothing and
+    callers can sink every trial unconditionally.
+    """
 
-    def write(self, step: int, sample: Dict[str, Any]) -> None:
+    def write(self, path: Path, step: int, sample: Dict[str, Any]) -> None:
         if step < 1:
             raise ValueError(f"step must be a positive trial index (cloudai DSE is 1-based); got {step}")
         if not sample:
             return
-        new_file = not self._path.exists()
-        self._path.parent.mkdir(parents=True, exist_ok=True)
-        with self._path.open("a", newline="") as f:
+        new_file = not path.exists()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with path.open("a", newline="") as f:
             writer = csv.writer(f)
             if new_file:
                 writer.writerow(("step", "env"))
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
index 8a3d0fb9e..177a39dd7 100644
--- a/tests/test_cloudaigym.py
+++ b/tests/test_cloudaigym.py
@@ -612,7 +612,7 @@ def test_env_csv_is_step_aligned_with_trajectory(tmp_path: Path) -> None:
         for action in (action_a, action_b, action_a):
             env.step(action)
 
-    env_csv = env._env_csv_path()
+    env_csv = env.env_params_record_path
     traj_csv = env.trajectory_file_path
     assert env_csv.exists(), "env.csv must be written when env_params is declared"
 
@@ -673,7 +673,7 @@ def test_env_csv_step_alignment_holds_on_constraint_failure(tmp_path: Path) -> N
         for action in ({"paddle_width": 4}, {"paddle_width": 6}, {"paddle_width": 8}):
             env.step(action)
 
-    env_csv = env._env_csv_path()
+    env_csv = env.env_params_record_path
     traj_csv = env.trajectory_file_path
 
     assert env_csv.exists(), "surviving steps declare env_params -> env.csv must exist"
@@ -742,7 +742,7 @@ def test_step_cache_hit_with_declared_env_params_still_writes_env_csv(tmp_path:
     runner.run.assert_not_called()
     assert reward == 0.42 and obs == [0.84]
 
-    env_csv = env._env_csv_path()
+    env_csv = env.env_params_record_path
     assert env_csv.exists(), "cache HIT must NOT skip the observer; env.csv must record the trial"
     env_rows = env_csv.read_text().strip().splitlines()
     assert env_rows[0] == "step,env"
@@ -843,4 +843,4 @@ def test_no_env_csv_when_env_params_not_declared(nemorun: NeMoRunTestDefinition,
     env = CloudAIGymEnv(test_run=test_run, runner=runner, rewards=RewardOverrides())
 
     assert env.params is None, "no env_params declared -> no EnvParams object"
-    assert not env._env_csv_path().exists()
+    assert not env.env_params_record_path.exists()
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index e0b08f6cb..483fa2e71 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -40,10 +40,10 @@
 from pydantic import BaseModel, ValidationError
 
 from cloudai.configurator.env_params import (
-    CsvSink,
     EnvParam,
     EnvParams,
     EnvParamSpec,
+    EnvParamsSink,
 )
 from cloudai.models.workload import CmdArgs, TestDefinition
 
@@ -201,22 +201,24 @@ def test_env_params_is_immutable() -> None:
         env_params.seed = 1  # pyright: ignore[reportAttributeAccessIssue]
 
 
-# --- CsvSink: unchanged persistence contract ---
+# --- EnvParamsSink: unchanged persistence contract ---
 
 
 def test_csv_sink_skips_empty_samples_and_rejects_zero_step(tmp_path: Path) -> None:
-    sink = CsvSink(tmp_path / "env.csv")
-    sink.write(1, {})  # empty -> no-op, no file
-    assert not (tmp_path / "env.csv").exists()
+    sink = EnvParamsSink()
+    path = tmp_path / "env.csv"
+    sink.write(path, 1, {})  # empty -> no-op, no file
+    assert not path.exists()
     with pytest.raises(ValueError, match="must be a positive trial index"):
-        sink.write(0, {"ball_speed": 1})
+        sink.write(path, 0, {"ball_speed": 1})
 
 
 def test_csv_sink_writes_header_then_rows(tmp_path: Path) -> None:
-    sink = CsvSink(tmp_path / "env.csv")
-    sink.write(1, {"ball_speed": 2})
-    sink.write(2, {"ball_speed": 3})
-    contents = (tmp_path / "env.csv").read_text().strip().splitlines()
+    sink = EnvParamsSink()
+    path = tmp_path / "env.csv"
+    sink.write(path, 1, {"ball_speed": 2})
+    sink.write(path, 2, {"ball_speed": 3})
+    contents = path.read_text().strip().splitlines()
     assert contents[0] == "step,env"
     assert contents[1].startswith("1,")
     assert contents[2].startswith("2,")

From dead658f85962a9fcdf7430df84a5d380b363a02 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Thu, 25 Jun 2026 13:04:58 -0400
Subject: [PATCH 07/22] refactor(configurator): gate env_params sampling on an
 agent capability flag

Replace the hardcoded `agent == "grid_search"` check with a BaseAgent.samples_env_params
capability flag (opt-in, defaults False). Only agents whose search consumes per-trial
env_params sampling set it True; enumerating/surrogate agents leave it False, so a config
that declares env_params for an agent that would ignore them is rejected up front instead
of silently no-op'ing. New agents answer for themselves with no string to maintain.

Relocate validate_dse_env_params out of the CLI handlers into configurator/env_params.py
next to the logic it guards, looking the agent up via the Registry. Unknown agents are
deferred to the dedicated agent-resolution error rather than masked here.

Keep all public-facing comments, docstrings, and the error message generic (no internal
agent names). Cover the full validator matrix, including the unknown-agent deferral.
---
 src/cloudai/cli/handlers.py            | 25 +---------------
 src/cloudai/configurator/base_agent.py | 16 +++++++----
 src/cloudai/configurator/env_params.py | 40 +++++++++++++++++++++++++-
 tests/test_handlers.py                 | 32 +++++++++++++++++----
 4 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index f7b9f3844..acc88f72c 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -27,6 +27,7 @@
 import toml
 import yaml
 
+from cloudai.configurator.env_params import validate_dse_env_params
 from cloudai.core import (
     BaseInstaller,
     CloudAIGymEnv,
@@ -298,30 +299,6 @@ def _check_installation(
     return result
 
 
-def validate_dse_env_params(test_scenario: TestScenario) -> None:
-    """
-    Reject prepped configs that declare env_params but cannot sample them.
-
-    env_params are sampled per-trial by CloudAIGymEnv, but only for a DSE run on a *learning*
-    agent. A non-DSE run has no per-trial loop at all, and grid_search exhaustively enumerates the
-    search space - sampling env_params there would just inject noise into a deterministic sweep with
-    no policy to gain robustness. is_dse_job is a property of the fully prepped config, so this is
-    validated here rather than at parse time.
-    """
-    offenders = [
-        tr.name
-        for tr in test_scenario.test_runs
-        if tr.test.env_params and (not tr.is_dse_job or tr.test.agent == "grid_search")
-    ]
-    if offenders:
-        raise TestScenarioParsingError(
-            f"Tests {offenders} declare env_params but will not sample them. env_params are sampled per-trial "
-            "only by a DSE run on a learning agent (grid_search searches the whole space and ignores them). "
-            "Use a learning agent and add a sweep (a list-valued cmd_args/extra_env_vars entry or num_nodes), "
-            "or remove env_params."
-        )
-
-
 def handle_dry_run_and_run(args: argparse.Namespace) -> int:
     setup_result = _setup_system_and_scenario(args)
     if setup_result is None:
diff --git a/src/cloudai/configurator/base_agent.py b/src/cloudai/configurator/base_agent.py
index 321e6e659..3a6fef75e 100644
--- a/src/cloudai/configurator/base_agent.py
+++ b/src/cloudai/configurator/base_agent.py
@@ -58,6 +58,11 @@ class BaseAgent(ABC):
     Provides a unified interface and parameter management for action spaces.
     """
 
+    # Opt-in capability: only agents whose search consumes per-trial env_params sampling set this
+    # to True. Defaults False so a config that declares env_params for an agent that would ignore
+    # them is rejected up front instead of silently having no effect.
+    samples_env_params: bool = False
+
     def __init__(self, env: BaseGym, config: BaseAgentConfig):
         """
         Initialize the agent with the environment.
@@ -94,9 +99,8 @@ def select_action(self, observation: list[float] | None = None) -> tuple[int, di
 
         Args:
             observation: Latest observation produced by the environment (``env.reset()`` on the
-                first call, then the result of the prior ``env.step()``). Stateless agents such
-                as grid search or Bayesian optimization may ignore this; observation-conditioned
-                agents (RL, contextual bandits) should use it.
+                first call, then the result of the prior ``env.step()``). Stateless agents may
+                ignore this; observation-conditioned agents should use it.
 
         Returns:
             Tuple[int, Dict[str, Any]] | None: The current step index and a dictionary mapping action keys
@@ -120,8 +124,7 @@ def run(self) -> int:
 
         Default: a step loop driven by the dispatcher (``select_action`` →
         ``env.step`` → ``update_policy`` per trial). Agents that drive their
-        own training loop (e.g. RLlib-based agents calling ``algo.train()``)
-        override this method.
+        own training loop override this method.
 
         Failure contract (``handle_dse_job`` consumes the result via
         ``err |= agent.run()``):
@@ -131,7 +134,8 @@ def run(self) -> int:
           accumulated and the next ``TestRun`` still executes. Workload-level
           failures are already surfaced this way: ``CloudAIGymEnv.step`` maps a
           failed metric to ``rewards.metric_failure`` rather than raising, and
-          ``rllib_run`` catches training errors and returns ``rc=1``.
+          agents with their own training loop should likewise catch training
+          errors and return a non-zero code.
         - Raise for *unexpected* failures (framework/agent bugs). Exceptions
           propagate out of ``handle_dse_job`` and hard-fail the job so the bug
           is surfaced instead of masked as a penalizing reward.
diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index c23764cc0..ecd099ecd 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -32,11 +32,17 @@
 import math
 import random
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 from typing_extensions import Self
 
+from cloudai._core.exceptions import TestScenarioParsingError
+from cloudai._core.registry import Registry
+
+if TYPE_CHECKING:
+    from cloudai._core.test_scenario import TestScenario
+
 
 class EnvParamSpec(BaseModel):
     """
@@ -157,3 +163,35 @@ def write(self, path: Path, step: int, sample: Dict[str, Any]) -> None:
             if new_file:
                 writer.writerow(("step", "env"))
             writer.writerow([step, sample])
+
+
+def validate_dse_env_params(test_scenario: "TestScenario") -> None:
+    """
+    Reject prepped configs that declare env_params no agent will sample.
+
+    env_params are sampled per-trial by CloudAIGymEnv, but the sampling only matters for an agent
+    that opts into it via ``BaseAgent.samples_env_params``. A non-DSE run has no per-trial loop, and
+    an agent that does not opt in ignores env_params, so declaring them there is a silent no-op.
+    is_dse_job and the agent both resolve only on the fully prepped config, so this is validated
+    here rather than at parse time.
+    """
+    agents = Registry().agents_map
+
+    offenders = []
+    for tr in test_scenario.test_runs:
+        if not tr.test.env_params:
+            continue
+
+        agent = agents.get(tr.test.agent)
+        # Unknown agent: defer to the dedicated agent-resolution error rather than masking it here.
+        sampled = tr.is_dse_job and (agent is None or agent.samples_env_params)
+        if not sampled:
+            offenders.append(tr.name)
+
+    if offenders:
+        raise TestScenarioParsingError(
+            f"Tests {offenders} declare env_params but no agent will sample them. env_params are sampled "
+            "per-trial only by a DSE run on an agent that opts into env_params sampling. Add a sweep "
+            "(a list-valued cmd_args/extra_env_vars entry or num_nodes) and use such an agent, or remove "
+            "env_params."
+        )
diff --git a/tests/test_handlers.py b/tests/test_handlers.py
index 203a35d67..247548287 100644
--- a/tests/test_handlers.py
+++ b/tests/test_handlers.py
@@ -56,6 +56,7 @@ class StubAgentConfig(BaseAgentConfig):
 
 class StubAgent(BaseAgent):
     received_configs: ClassVar[list[StubAgentConfig]] = []
+    samples_env_params: bool = True  # stands in for an env-aware learning agent
 
     def __init__(self, env, config: StubAgentConfig):
         self.env = env
@@ -436,7 +437,7 @@ def test_handle_dse_job_documents_failure_in_reports_before_raising(
 def test_validate_dse_env_params_rejects_non_dse(base_tr: TestRun) -> None:
     base_tr.test.env_params = {"ball_speed": EnvParamSpec()}
     scenario = TestScenario(name="s", test_runs=[base_tr])
-    with pytest.raises(TestScenarioParsingError, match="will not sample them"):
+    with pytest.raises(TestScenarioParsingError, match="no agent will sample them"):
         validate_dse_env_params(scenario)
 
 
@@ -444,15 +445,36 @@ def test_validate_dse_env_params_rejects_grid_search(dse_tr: TestRun) -> None:
     """A DSE job on grid_search exhaustively searches the space, so env_params are noise -> reject."""
     dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
     assert dse_tr.is_dse_job is True  # it IS a DSE job...
-    assert dse_tr.test.agent == "grid_search"  # ...but grid_search ignores env_params
-    with pytest.raises(TestScenarioParsingError, match="will not sample them"):
+    assert dse_tr.test.agent == "grid_search"  # ...but grid_search does not sample env_params
+    with pytest.raises(TestScenarioParsingError, match="no agent will sample them"):
         validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))
 
 
+def test_validate_dse_env_params_rejects_non_sampling_agent(
+    dse_tr: TestRun, stub_agent_name: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """The check keys on the agent capability, not the name: a non-grid agent that opts out is rejected too."""
+    monkeypatch.setattr(StubAgent, "samples_env_params", False)
+    dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    dse_tr.test.agent = stub_agent_name
+    assert dse_tr.is_dse_job is True and dse_tr.test.agent != "grid_search"
+    with pytest.raises(TestScenarioParsingError, match="no agent will sample them"):
+        validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))
+
+
+def test_validate_dse_env_params_defers_unknown_agent(dse_tr: TestRun) -> None:
+    """An unknown agent is not flagged here; it is deferred to the dedicated agent-resolution error."""
+    dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
+    dse_tr.test.agent = "does_not_exist_agent"
+    assert dse_tr.is_dse_job is True
+    assert dse_tr.test.agent not in Registry().agents_map  # precondition: agent is truly unknown
+    validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))  # no exception == deferred
+
+
 def test_validate_dse_env_params_allows_dse_run(dse_tr: TestRun, stub_agent_name: str) -> None:
     dse_tr.test.env_params = {"ball_speed": EnvParamSpec()}
-    dse_tr.test.agent = stub_agent_name  # a learning agent (not grid_search) consumes env_params
-    assert dse_tr.is_dse_job is True  # precondition: DSE + learning agent + env_params is allowed
+    dse_tr.test.agent = stub_agent_name  # an env-aware agent (samples_env_params=True) consumes env_params
+    assert dse_tr.is_dse_job is True  # precondition: DSE + env-aware agent + env_params is allowed
     validate_dse_env_params(TestScenario(name="s", test_runs=[dse_tr]))  # no exception == pass
 
 

From 69dd7d4c739f0e8cbd6375905d4fea7a4e143f71 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Thu, 25 Jun 2026 13:13:24 -0400
Subject: [PATCH 08/22] style(configurator): trim verbose comments toward
 self-documenting code

Compress multi-line inline comments down to the single non-obvious rationale (or drop
them where the code already speaks), per the self-documenting-code principle. Public API
docstrings and test intent comments are left intact.
---
 src/cloudai/_core/test_scenario.py      |  5 +----
 src/cloudai/cli/handlers.py             |  3 +--
 src/cloudai/configurator/base_agent.py  |  5 ++---
 src/cloudai/configurator/cloudai_gym.py | 10 ++--------
 4 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py
index aa03cc591..523f2b701 100644
--- a/src/cloudai/_core/test_scenario.py
+++ b/src/cloudai/_core/test_scenario.py
@@ -203,10 +203,7 @@ def _apply(key: str, value: Any) -> None:
             else:
                 setattr(obj, attrs[-1], value)
 
-        # The agent's chosen action and this trial's sampled env_params are both just concrete
-        # values written onto cmd_args, applied through the one path here. env_params keys name
-        # cmd_args fields; sampling (the RNG) happens in the env before this call, so this method
-        # stays deterministic.
+        # RNG runs in the env before this call; applying only concrete values keeps this deterministic.
         for key, value in action.items():
             _apply(key, value)
         for key, value in (env_params or {}).items():
diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index acc88f72c..fb6f9b7d8 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -135,8 +135,7 @@ def handle_dse_job(runner: Runner, args: argparse.Namespace) -> int:
         return 1
 
     err = 0
-    # Recoverable failures return a non-zero rc and are accumulated here; an unexpected exception
-    # (a bug) is a hard-fail. We capture it so reports still generate, then re-raise below.
+    # Capture an unexpected error so reports still generate, then re-raise below.
     run_error: Exception | None = None
     try:
         for tr in runner.runner.test_scenario.test_runs:
diff --git a/src/cloudai/configurator/base_agent.py b/src/cloudai/configurator/base_agent.py
index 3a6fef75e..5fe1059ef 100644
--- a/src/cloudai/configurator/base_agent.py
+++ b/src/cloudai/configurator/base_agent.py
@@ -58,9 +58,8 @@ class BaseAgent(ABC):
     Provides a unified interface and parameter management for action spaces.
     """
 
-    # Opt-in capability: only agents whose search consumes per-trial env_params sampling set this
-    # to True. Defaults False so a config that declares env_params for an agent that would ignore
-    # them is rejected up front instead of silently having no effect.
+    # Opt-in: agents that consume per-trial env_params sampling set this True. Default False so
+    # env_params declared for a non-sampling agent are rejected rather than silently ignored.
     samples_env_params: bool = False
 
     def __init__(self, env: BaseGym, config: BaseAgentConfig):
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index ac9d5cd34..0b4a3b0c0 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -63,8 +63,6 @@ def __init__(self, test_run: TestRun, runner: BaseRunner, rewards: RewardOverrid
         self.max_steps = test_run.test.agent_steps
         self.reward_function = Registry().get_reward_function(test_run.test.agent_reward_function)
         self.trajectory: dict[int, list[TrajectoryEntry]] = {}
-        # Resolve env_params once, at formulation: None unless the workload declares something to
-        # sample, so a non-DR run carries no env-params state and pays zero per-step overhead.
         self.params: EnvParams | None = EnvParams.from_test(test_run.test)
         self.env_params_sink = EnvParamsSink()
         super().__init__()
@@ -130,9 +128,7 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
                 - info (dict): Additional info for debugging.
         """
         self.test_run.increment_step()
-        # Sample this trial's env_params (the RNG lives here, in the env), then apply the agent's
-        # action and the sample together: apply_params_set overlays both onto cmd_args and records
-        # current_env_params, so the workload runs with the sampled values and the cache key sees them.
+        # RNG lives in the env: sample here, then apply action + sample so the run and cache key see them.
         sampled_env_params = self.params.sample(self.test_run.step) if self.params else {}
         self.test_run = self.test_run.apply_params_set(action, env_params=sampled_env_params)
 
@@ -178,9 +174,7 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
             self.test_run.step = new_tr.step
             self.test_run.output_path = new_tr.output_path
 
-        # Rebuilding/replacing test_run above can drop the per-trial env_params sample;
-        # restore it so the trajectory entry, the cache key, and env.csv all record the
-        # params the trial actually ran with.
+        # The test_run rebuild above drops the sample; restore it so the entry, cache key, and env.csv match.
         self.test_run.current_env_params = new_tr.current_env_params
 
         observation = self.get_observation(action)

From 7fec7f5462fdb6e9ab44e04df204d2d17c842634 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Thu, 25 Jun 2026 15:42:23 -0400
Subject: [PATCH 09/22] fix(test_scenario): skip env_params on post-overlay
 revalidation

apply_params_set overlays sampled scalar draws onto cmd_args, then
reconstructs the TestDefinition to validate the applied action values.
That pass re-ran validate_env_params, which rejects a weighted env_param
whose cmd_args target is no longer a candidate list - exactly what the
overlay produces. env_params is already validated at parse time, so drop
it from the validation-only dump. Adds a regression test covering a
weighted env_param's scalar draw.
---
 src/cloudai/_core/test_scenario.py |  7 ++++++-
 tests/test_env_params.py           | 13 +++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/cloudai/_core/test_scenario.py b/src/cloudai/_core/test_scenario.py
index 523f2b701..7c31406d0 100644
--- a/src/cloudai/_core/test_scenario.py
+++ b/src/cloudai/_core/test_scenario.py
@@ -209,7 +209,12 @@ def _apply(key: str, value: Any) -> None:
         for key, value in (env_params or {}).items():
             _apply(key, value)
 
-        type(tdef)(**tdef.model_dump())  # trigger validation
+        # env_params is validated at parse time; after the overlay its target cmd_args fields hold
+        # concrete scalar draws, so re-validating it here would reject weighted specs. Drop it for
+        # this validation-only pass, which exists to validate the applied action values.
+        validation_args = tdef.model_dump()
+        validation_args.pop("env_params", None)
+        type(tdef)(**validation_args)  # trigger validation
 
         new_tr = copy.deepcopy(self)
         new_tr.test = tdef
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 483fa2e71..510a79fe2 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -45,6 +45,7 @@
     EnvParamSpec,
     EnvParamsSink,
 )
+from cloudai.core import TestRun
 from cloudai.models.workload import CmdArgs, TestDefinition
 
 
@@ -376,3 +377,15 @@ def test_is_dse_job_true_when_a_real_action_dimension_exists() -> None:
     """An un-annotated cmd_args list is a real action dimension -> DSE, even alongside env_params."""
     tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=[1, 2, 3], paddle_width=[4, 8])
     assert tdef.is_dse_job is True
+
+
+def test_apply_params_set_accepts_weighted_env_param_draw() -> None:
+    """Regression: apply_params_set re-validates after the overlay; a weighted env_param's scalar
+    draw must not trip validate_env_params (which would reject 'weights but not a candidate list')."""
+    tdef = _tdef({"ball_speed": EnvParamSpec(weights=[0.7, 0.3])}, ball_speed=[1, 2])
+    tr = TestRun(name="tr", test=tdef, num_nodes=1, nodes=[])
+
+    new_tr = tr.apply_params_set({}, env_params={"ball_speed": 1})
+
+    assert new_tr.test.cmd_args.ball_speed == 1
+    assert new_tr.current_env_params == {"ball_speed": 1}

From 8f91664db9338c4df7b8e1d940ca59947a2306f6 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Fri, 26 Jun 2026 11:12:06 -0400
Subject: [PATCH 10/22] fix(workload): reject scalar-only env_params
 annotations at parse time

An env_params entry only reclassifies a list-valued cmd_args sweep as
env-sampled; a scalar is already fixed, so annotating it is a meaningless
label. Previously such an annotation was tolerated as a silent no-op, which
let it slip through parse-time validation and inconsistently trip (or not)
the downstream "no agent will sample them" check depending on run mode.

Reject it where the contract lives - TestDefinition.validate_env_params -
so the failure is immediate and mode-independent. EnvParams.from_test's
non-list guard becomes defensive (parse-time now guarantees lists); the
post-overlay path already drops env_params before re-validating, so concrete
scalar draws are unaffected.

Extract the per-field checks into a helper to keep the validator under the
complexity limit, and update tests: scalar annotations now assert rejection
instead of no-op tolerance.
---
 src/cloudai/configurator/env_params.py |  7 +--
 src/cloudai/models/workload.py         | 65 +++++++++++++++-----------
 tests/test_env_params.py               | 28 ++---------
 3 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index ecd099ecd..6d98c6903 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -113,9 +113,10 @@ def from_test(cls, test: Any) -> Optional["EnvParams"]:
         """
         Resolve a TestDefinition's env_params annotations, or ``None`` if nothing is sampled.
 
-        A field whose ``cmd_args`` value is a scalar is fixed: the annotation is a no-op and
-        the field is skipped. With no list-valued field left, there is nothing to sample and
-        we return ``None`` so callers stay on the zero-overhead path.
+        Annotated fields are guaranteed list-valued by ``TestDefinition`` parse-time validation
+        (a scalar annotation is rejected there), so the non-list guard below is defensive. With
+        no annotations declared there is nothing to sample and we return ``None`` so callers
+        stay on the zero-overhead path.
         """
         params: Dict[str, EnvParam] = {}
         for name, spec in test.env_params.items():
diff --git a/src/cloudai/models/workload.py b/src/cloudai/models/workload.py
index eaa971aa6..c5156ccc6 100644
--- a/src/cloudai/models/workload.py
+++ b/src/cloudai/models/workload.py
@@ -217,10 +217,11 @@ def validate_env_params(self) -> Self:
 
         ``env_params`` is an annotation: each key names a ``cmd_args`` field whose value is
         the candidate set (the single source of truth), and the entry carries only *how* to
-        sample. So each key must name a real ``cmd_args`` field; and when ``weights`` are
-        declared, that field must be a candidate list of >= 2 values and the weights must
-        align 1:1 with it. A scalar (fixed) ``cmd_args`` value is tolerated as a no-op
-        marker. Sampling, persistence, the per-trial cmd_args overlay, and the cache key all
+        sample. So each key must name a real ``cmd_args`` field whose value is a candidate
+        list; a scalar is already fixed, so annotating it is a meaningless label and is
+        rejected here. When ``weights`` are declared, the list needs >= 2 values and the
+        weights must align 1:1 with it. Sampling, persistence, the per-trial cmd_args overlay,
+        and the cache key all
         live in ``CloudAIGymEnv``; keeping this shape check in core lets the overlay stay
         agent- and workload-agnostic rather than re-implemented per workload.
         """
@@ -236,28 +237,36 @@ def validate_env_params(self) -> Self:
             raise ValueError(f"env_params keys {unknown} are not cmd_args fields on {type(self.cmd_args).__name__}")
 
         for name, spec in self.env_params.items():
-            value = getattr(self.cmd_args, name, None)
-            if isinstance(value, (dict, BaseModel)):
-                raise ValueError(
-                    f"env_params['{name}'] must target a leaf cmd_args field (scalar or candidate list), "
-                    "not a structured object; param_space/is_dse_job exclude the whole key, which would "
-                    "silently drop nested action dimensions"
-                )
-            if isinstance(value, list) and not value:
-                raise ValueError(
-                    f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; "
-                    "provide at least one candidate (the sampler would otherwise fail on an empty draw)"
-                )
-            if spec.weights is None:
-                continue
-            if not isinstance(value, list) or len(value) < 2:
-                raise ValueError(
-                    f"env_params['{name}'] declares weights but cmd_args.{name} is not a candidate list "
-                    "(need a list of >= 2 values)"
-                )
-            if len(spec.weights) != len(value):
-                raise ValueError(
-                    f"env_params['{name}'] weights length {len(spec.weights)} does not match "
-                    f"cmd_args.{name} candidate count {len(value)}"
-                )
+            self._validate_env_param_field(name, spec, getattr(self.cmd_args, name, None))
         return self
+
+    @staticmethod
+    def _validate_env_param_field(name: str, spec: Any, value: Any) -> None:
+        """Reject one env_params entry whose target cmd_args field is not a valid candidate list."""
+        if isinstance(value, (dict, BaseModel)):
+            raise ValueError(
+                f"env_params['{name}'] must target a leaf cmd_args field (a candidate list), "
+                "not a structured object; param_space/is_dse_job exclude the whole key, which would "
+                "silently drop nested action dimensions"
+            )
+        if not isinstance(value, list):
+            raise ValueError(
+                f"env_params['{name}'] annotates cmd_args.{name}, which is not a candidate list "
+                f"(got {type(value).__name__}); the annotation only reclassifies a list-valued sweep as "
+                f"env-sampled, while a scalar is already fixed. Make cmd_args.{name} a list or remove "
+                "the annotation"
+            )
+        if not value:
+            raise ValueError(
+                f"env_params['{name}'] references an empty candidate list in cmd_args.{name}; "
+                "provide at least one candidate (the sampler would otherwise fail on an empty draw)"
+            )
+        if spec.weights is None:
+            return
+        if len(value) < 2:
+            raise ValueError(f"env_params['{name}'] declares weights but cmd_args.{name} needs >= 2 candidate values")
+        if len(spec.weights) != len(value):
+            raise ValueError(
+                f"env_params['{name}'] weights length {len(spec.weights)} does not match "
+                f"cmd_args.{name} candidate count {len(value)}"
+            )
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 510a79fe2..66cfd4352 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -288,25 +288,6 @@ def test_env_params_from_test_resolves_multiple_params() -> None:
     assert env_params.params["paddle_width"].candidates == [4, 8]
 
 
-def test_env_params_from_test_skips_scalar_keeps_list() -> None:
-    """A mix of scalar and list annotations yields only the list-valued field as a knob."""
-    env_params = EnvParams.from_test(
-        _tdef(
-            {"ball_speed": EnvParamSpec(), "paddle_width": EnvParamSpec()},
-            ball_speed=[1, 2, 3],
-            paddle_width=8,
-        )
-    )
-
-    assert env_params is not None
-    assert set(env_params.params) == {"ball_speed"}
-
-
-def test_env_params_from_test_none_when_only_scalar_annotation() -> None:
-    """A scalar cmd_args value is fixed; with nothing list-valued to sample, from_test is None."""
-    assert EnvParams.from_test(_tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)) is None
-
-
 def test_env_params_from_test_none_when_no_env_params() -> None:
     """No annotations declared -> no EnvParams object (the zero-overhead path)."""
     assert EnvParams.from_test(_tdef({})) is None
@@ -326,10 +307,11 @@ def test_env_params_weighted_list_is_accepted() -> None:
     assert tdef.env_params["ball_speed"].weights == [0.7, 0.3]
 
 
-def test_env_params_scalar_no_op_is_accepted() -> None:
-    """Annotating a scalar (fixed) knob is tolerated as a no-op marker."""
-    tdef = _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)
-    assert tdef.cmd_args.ball_speed == 2
+def test_env_params_scalar_annotation_rejected() -> None:
+    """A scalar (fixed) knob carries nothing to sample; the annotation is a meaningless label and is
+    rejected at parse time (it only reclassifies a list-valued sweep as env-sampled)."""
+    with pytest.raises(ValidationError, match="not a candidate list"):
+        _tdef({"ball_speed": EnvParamSpec()}, ball_speed=2)
 
 
 def test_env_params_unknown_key_rejected() -> None:

From 7db764bb054943f51f375ea9c0de8ce4700f002d Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 00:44:38 -0400
Subject: [PATCH 11/22] feat(configurator): add ObsLeafDescriptor +
 structured-observation protocol

Add ObsLeafDescriptor (a self-describing observation leaf: "box" of width
dim, or "discrete" of size n) and a StructuredObservation Protocol that
documents the optional env hooks structured_observation_descriptors() and
encode_observation(). These let an env expose a named, per-leaf observation
so adapters (e.g. GymnasiumAdapter) can build the matching gymnasium
spaces.Dict; the hooks are duck-typed, so envs need not subclass.

Both exported via cloudai.core.
---
 src/cloudai/configurator/env_params.py | 46 +++++++++++++++++++++++++-
 src/cloudai/core.py                    |  3 ++
 tests/test_env_params.py               | 29 ++++++++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/src/cloudai/configurator/env_params.py b/src/cloudai/configurator/env_params.py
index 6d98c6903..c1b7ee0d0 100644
--- a/src/cloudai/configurator/env_params.py
+++ b/src/cloudai/configurator/env_params.py
@@ -32,7 +32,7 @@
 import math
 import random
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 from typing_extensions import Self
@@ -76,6 +76,50 @@ def _validate_weights(self) -> Self:
         return self
 
 
+class ObsLeafDescriptor(BaseModel):
+    """
+    Description of one leaf of a structured (named) observation.
+
+    A structured observation maps each observed name to a self-describing leaf
+    so adapters can build the matching subspace without guessing: a ``"box"``
+    leaf becomes a continuous vector of width ``dim`` (e.g. a log-encoded
+    env_param as ``dim=2``); a ``"discrete"`` leaf becomes a categorical of
+    size ``n``. Stateless agents that consume the flat observation ignore this.
+    """
+
+    model_config = ConfigDict(extra="forbid")
+
+    kind: Literal["box", "discrete"]
+    dim: int = 1
+    n: Optional[int] = None
+
+    @model_validator(mode="after")
+    def _validate(self) -> Self:
+        if self.dim < 1:
+            raise ValueError(f"ObsLeafDescriptor dim must be >= 1; got {self.dim}")
+        if self.kind == "discrete" and (self.n is None or self.n < 1):
+            raise ValueError(f"ObsLeafDescriptor(kind='discrete') requires n >= 1; got n={self.n}")
+        return self
+
+
+@runtime_checkable
+class StructuredObservation(Protocol):
+    """
+    Optional env hooks that expose a structured (per-leaf) observation.
+
+    An env opts in by returning per-leaf :class:`ObsLeafDescriptor` from
+    ``structured_observation_descriptors`` (``None`` keeps the flat-vector
+    path) and encoding a raw observation into the matching named leaves via
+    ``encode_observation``. ``GymnasiumAdapter`` consumes these to expose a
+    ``gymnasium.spaces.Dict`` observation; the hooks are duck-typed, so envs
+    need not subclass this Protocol.
+    """
+
+    def structured_observation_descriptors(self) -> Optional[Dict[str, ObsLeafDescriptor]]: ...
+
+    def encode_observation(self, observation: list) -> Dict[str, Any]: ...
+
+
 @dataclasses.dataclass(frozen=True)
 class EnvParam:
     """
diff --git a/src/cloudai/core.py b/src/cloudai/core.py
index 752d24972..aefcd0573 100644
--- a/src/cloudai/core.py
+++ b/src/cloudai/core.py
@@ -51,6 +51,7 @@
 from ._core.test_scenario import METRIC_ERROR, MetricErrorSentinel, MetricValue, TestDependency, TestRun, TestScenario
 from .configurator.base_agent import BaseAgent, BaseAgentConfig, RewardOverrides
 from .configurator.cloudai_gym import CloudAIGymEnv
+from .configurator.env_params import ObsLeafDescriptor, StructuredObservation
 from .configurator.grid_search import GridSearchAgent
 from .models.workload import CmdArgs, NsysConfiguration, PredictorConfig, TestDefinition
 from .parser import Parser
@@ -85,6 +86,7 @@
     "MetricValue",
     "MissingTestError",
     "NsysConfiguration",
+    "ObsLeafDescriptor",
     "Parser",
     "PerTestReporter",
     "PredictorConfig",
@@ -96,6 +98,7 @@
     "RewardOverrides",
     "Runner",
     "StatusReporter",
+    "StructuredObservation",
     "System",
     "SystemConfigParsingError",
     "TarballReporter",
diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 66cfd4352..3668e9bb8 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -44,6 +44,7 @@
     EnvParams,
     EnvParamSpec,
     EnvParamsSink,
+    ObsLeafDescriptor,
 )
 from cloudai.core import TestRun
 from cloudai.models.workload import CmdArgs, TestDefinition
@@ -371,3 +372,31 @@ def test_apply_params_set_accepts_weighted_env_param_draw() -> None:
 
     assert new_tr.test.cmd_args.ball_speed == 1
     assert new_tr.current_env_params == {"ball_speed": 1}
+
+
+# --- ObsLeafDescriptor: structured-observation leaf schema ---
+
+
+def test_obs_leaf_descriptor_box_defaults() -> None:
+    leaf = ObsLeafDescriptor(kind="box", dim=2)
+    assert leaf.kind == "box"
+    assert leaf.dim == 2
+    assert leaf.n is None
+
+
+def test_obs_leaf_descriptor_discrete_requires_n() -> None:
+    leaf = ObsLeafDescriptor(kind="discrete", dim=1, n=3)
+    assert leaf.n == 3
+    with pytest.raises(ValidationError, match="requires n"):
+        ObsLeafDescriptor(kind="discrete", dim=1)
+    with pytest.raises(ValidationError, match="requires n"):
+        ObsLeafDescriptor(kind="discrete", dim=1, n=0)
+
+
+def test_obs_leaf_descriptor_rejects_bad_dim_and_extra_fields() -> None:
+    with pytest.raises(ValidationError, match="dim must be"):
+        ObsLeafDescriptor(kind="box", dim=0)
+    with pytest.raises(ValidationError):
+        ObsLeafDescriptor(kind="box", dim=1, unexpected=1)
+    with pytest.raises(ValidationError):
+        ObsLeafDescriptor(kind="categorical", dim=1)

From 584f145f0386e38ec86b1843c944a1514e901910 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 10:57:57 -0400
Subject: [PATCH 12/22] test(env-params): suppress pyright on intentional
 ObsLeafDescriptor rejection tests

Negative tests pass an extra kwarg and an out-of-Literal kind to assert
ValidationError; mark the deliberate type violations with type: ignore.
---
 tests/test_env_params.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_env_params.py b/tests/test_env_params.py
index 3668e9bb8..eef3feba0 100644
--- a/tests/test_env_params.py
+++ b/tests/test_env_params.py
@@ -397,6 +397,6 @@ def test_obs_leaf_descriptor_rejects_bad_dim_and_extra_fields() -> None:
     with pytest.raises(ValidationError, match="dim must be"):
         ObsLeafDescriptor(kind="box", dim=0)
     with pytest.raises(ValidationError):
-        ObsLeafDescriptor(kind="box", dim=1, unexpected=1)
+        ObsLeafDescriptor(kind="box", dim=1, unexpected=1)  # type: ignore
     with pytest.raises(ValidationError):
-        ObsLeafDescriptor(kind="categorical", dim=1)
+        ObsLeafDescriptor(kind="categorical", dim=1)  # type: ignore

From 6b6d2bd30276088db9c0f9c35d237afce39b3cc2 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 01:04:25 -0400
Subject: [PATCH 13/22] feat(configurator): add GymnasiumAdapter for CloudAI
 envs

Wrap a CloudAI BaseGym as a gymnasium.Env-shaped object: a spaces.Dict of
Discrete (list params) and Box (ContinuousSpace) actions over the tunable
params with fixed (single-value) params injected each step; observations as
either a flat float32 Box or, when the env opts in via the structured-obs
hooks, a spaces.Dict of per-leaf ObsLeafDescriptor subspaces. Continuous
dtype="int" params are quantized (rounded/clamped) at decode_action so the
trajectory cache key collapses float jitter. The adapter is a pure
pass-through over test_run.step (never mutates it), so contextual-bandit
rollouts that reset() per trial keep a monotonic trial index.

gymnasium is an optional dependency lazy-imported behind the new [rl] extra
(also added to dev); CloudAIGymEnv.define_observation_space() now returns one
slot per agent metric so adapters get the right Box shape. Exported via
cloudai.core. Caller-contract tests pin the step-monotonicity, observation
pass-through, continuous-quantization, and structured-obs invariants.
---
 pyproject.toml                                |   2 +
 src/cloudai/configurator/__init__.py          |   2 +
 src/cloudai/configurator/cloudai_gym.py       |   7 +-
 src/cloudai/configurator/gymnasium_adapter.py | 258 ++++++++++
 src/cloudai/core.py                           |   2 +
 tests/test_gymnasium_adapter_contract.py      | 479 ++++++++++++++++++
 uv.lock                                       |  41 +-
 7 files changed, 787 insertions(+), 4 deletions(-)
 create mode 100644 src/cloudai/configurator/gymnasium_adapter.py
 create mode 100644 tests/test_gymnasium_adapter_contract.py

diff --git a/pyproject.toml b/pyproject.toml
index 398326fc3..99e01ae81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,9 @@ requires-python = ">=3.10"
     "import-linter~=2.10",
     "pytest-deadfixtures~=3.1",
     "taplo~=0.9.3",
+    "gymnasium~=1.2",
   ]
+  rl = ["gymnasium~=1.2"]
   docs = [
     "sphinx~=8.1",
     "nvidia-sphinx-theme~=0.0.8",
diff --git a/src/cloudai/configurator/__init__.py b/src/cloudai/configurator/__init__.py
index f05b65c5b..a88432c41 100644
--- a/src/cloudai/configurator/__init__.py
+++ b/src/cloudai/configurator/__init__.py
@@ -18,11 +18,13 @@
 from .base_gym import BaseGym
 from .cloudai_gym import CloudAIGymEnv, TrajectoryEntry
 from .grid_search import GridSearchAgent
+from .gymnasium_adapter import GymnasiumAdapter
 
 __all__ = [
     "BaseAgent",
     "BaseGym",
     "CloudAIGymEnv",
     "GridSearchAgent",
+    "GymnasiumAdapter",
     "TrajectoryEntry",
 ]
diff --git a/src/cloudai/configurator/cloudai_gym.py b/src/cloudai/configurator/cloudai_gym.py
index 0b4a3b0c0..b32e137cc 100644
--- a/src/cloudai/configurator/cloudai_gym.py
+++ b/src/cloudai/configurator/cloudai_gym.py
@@ -85,9 +85,10 @@ def define_observation_space(self) -> list:
         Define the observation space for the environment.
 
         Returns:
-            list: The observation space.
+            list: One float slot per agent metric (at least one), giving the correct shape
+            for adapters that derive ``gymnasium.spaces.Box`` from this output.
         """
-        return [0.0]
+        return [0.0] * max(len(self.test_run.test.agent_metrics), 1)
 
     def reset(
         self,
@@ -109,7 +110,7 @@ def reset(
         if seed is not None:
             lazy.np.random.seed(seed)
         self.test_run.current_iteration = 0
-        observation = [0.0]
+        observation = self.define_observation_space()
         info = {}
         return observation, info
 
diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
new file mode 100644
index 000000000..590abf11d
--- /dev/null
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -0,0 +1,258 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Gymnasium adapter for CloudAI ``BaseGym`` environments.
+
+Translates a CloudAI :class:`BaseGym` into the ``gymnasium.Env`` 5-tuple shape
+that RLlib-based agents (e.g. PPO / DQN) and external training loops expect.
+``gymnasium`` is an optional dependency (the ``[rl]`` extra), so it is imported
+lazily and only required when an adapter is actually instantiated.
+
+Design invariant — adapter is a pure pass-through over ``test_run.step``.
+The trial counter is owned by ``TestRun`` and advanced exclusively by
+``CloudAIGymEnv.step()``. Adapters that wrote ``test_run.step`` themselves —
+mirroring a Gym-protocol episode-local counter — collapsed every
+contextual-bandit rollout onto ``step=1`` because RLlib calls ``reset()`` per
+trial. This adapter never mutates ``test_run.step``; contract tests pin that
+property.
+"""
+
+from __future__ import annotations
+
+from typing import Any, ClassVar, Optional, cast
+
+from cloudai._core.action_space import ContinuousSpace
+from cloudai.configurator.base_gym import BaseGym
+from cloudai.configurator.env_params import StructuredObservation
+
+_GYMNASIUM_INSTALL_HINT = "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'"
+
+
+def _import_gymnasium():
+    """
+    Import gymnasium + numpy lazily; raise a clear, actionable error when absent.
+
+    Kept as a single seam so cloudai installs without the ``[rl]`` extra
+    continue to work for non-RL agents, and tests can patch this helper to
+    simulate a missing install.
+    """
+    try:
+        import gymnasium
+        import numpy as np
+        from gymnasium import spaces
+
+        return gymnasium, spaces, np
+    except ImportError as exc:
+        raise ImportError(_GYMNASIUM_INSTALL_HINT) from exc
+
+
+class GymnasiumAdapter:
+    """
+    Expose a CloudAI :class:`BaseGym` as a ``gymnasium.Env``-shaped object.
+
+    The adapter:
+
+    * Builds a ``gymnasium.spaces.Dict`` of ``Discrete`` action spaces over
+      the *tunable* parameters (those with more than one candidate value),
+      and injects the *fixed* parameters (single candidate) automatically on
+      every step so agents never see them.
+    * Converts observations to ``float32`` ``numpy`` arrays sized by
+      ``env.define_observation_space()``.
+    * Returns the gymnasium 5-tuple ``(obs, reward, terminated, truncated, info)``
+      from :meth:`step` and :meth:`step_raw`.
+
+    ``gymnasium`` and ``numpy`` are optional dependencies (the ``[rl]`` extra);
+    instantiating the adapter without them raises ``ImportError``.
+    """
+
+    metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]}
+
+    def __init__(self, env: BaseGym) -> None:
+        _, spaces, np = _import_gymnasium()
+
+        self._np = np
+        self._spaces = spaces
+        self._env = env
+
+        raw_action_space = env.define_action_space()
+
+        # Three classes of params from cloudai's param_space:
+        #   list with len > 1   -> discrete tunable, mapped to gym.Discrete.
+        #   list with len == 1  -> fixed (collapsed); injected on every step so
+        #                          agents never see them.
+        #   ContinuousSpace     -> continuous tunable, mapped to gym.Box(1,);
+        #                          ``dtype="int"`` quantizes at decode_action.
+        self._discrete_params: dict[str, list] = {
+            k: v for k, v in raw_action_space.items() if isinstance(v, list) and len(v) > 1
+        }
+        self._continuous_params: dict[str, ContinuousSpace] = {
+            k: v for k, v in raw_action_space.items() if isinstance(v, ContinuousSpace)
+        }
+        self._fixed_params: dict[str, Any] = {
+            k: v[0] for k, v in raw_action_space.items() if isinstance(v, list) and len(v) == 1
+        }
+
+        action_space_components: dict[str, Any] = {
+            name: spaces.Discrete(len(values)) for name, values in self._discrete_params.items()
+        }
+        for name, space in self._continuous_params.items():
+            action_space_components[name] = spaces.Box(
+                low=np.array([space.low], dtype=np.float32),
+                high=np.array([space.high], dtype=np.float32),
+                shape=(1,),
+                dtype=np.float32,
+            )
+        self.action_space = spaces.Dict(action_space_components)
+
+        # Observation space: prefer the env's structured (per-leaf) spec so the
+        # policy sees named, individually-encoded leaves (e.g. a log-encoded
+        # env_param as Box(2)); RLlib connectors own normalize + flatten. Falls
+        # back to a flat Box for envs that only expose define_observation_space.
+        self._obs_descriptors: Optional[dict[str, Any]] = self._structured_obs_descriptors(env)
+        if self._obs_descriptors:
+            self.observation_space = spaces.Dict(
+                {name: self._descriptor_to_space(desc) for name, desc in self._obs_descriptors.items()}
+            )
+        else:
+            obs_shape = (len(env.define_observation_space()),)
+            self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=obs_shape, dtype=np.float32)
+
+    @staticmethod
+    def _structured_obs_descriptors(env: BaseGym) -> Optional[dict[str, Any]]:
+        """
+        Return the env's per-leaf obs descriptors, or ``None`` for the flat-Box path.
+
+        The env owns the opt-in decision via ``structured_observation_descriptors``
+        (returns ``None`` unless an observed name is a declared env_param). Envs
+        without that hook keep the legacy flat-Box path.
+        """
+        getter = getattr(env, "structured_observation_descriptors", None)
+        if getter is None or not hasattr(env, "encode_observation"):
+            return None
+        descriptors = getter()
+        return descriptors or None
+
+    def _descriptor_to_space(self, descriptor: Any) -> Any:
+        """Map a framework-agnostic ``ObsLeafDescriptor`` to a gymnasium subspace."""
+        if descriptor.kind == "discrete":
+            return self._spaces.Discrete(descriptor.n)
+        return self._spaces.Box(low=-self._np.inf, high=self._np.inf, shape=(descriptor.dim,), dtype=self._np.float32)
+
+    @property
+    def unwrapped(self) -> BaseGym:
+        return self._env
+
+    def decode_action(self, action: dict[str, Any]) -> dict[str, Any]:
+        """
+        Map raw gym actions back to native parameter values.
+
+        Discrete actions are list indices and resolve to the corresponding list
+        entry. Continuous actions arrive as a 1-D ``numpy`` array (the Box
+        shape) and are clamped to the declared range; for ``dtype="int"`` the
+        scalar is rounded — this is the **only** place quantization happens, so
+        the cache key collapses float jitter (47.34 vs 47.36) onto the same
+        emitted int and downstream code stays type-agnostic.
+
+        Raises:
+            ValueError: if ``action`` is missing tunable params, contains
+                unknown keys, or carries an out-of-range discrete index.
+        """
+        expected = set(self._discrete_params) | set(self._continuous_params)
+        self._assert_keys(action.keys(), expected, "action")
+        decoded: dict[str, Any] = {}
+        for name, raw in action.items():
+            if name in self._discrete_params:
+                decoded[name] = self._decode_discrete(name, raw)
+            else:
+                decoded[name] = self._decode_continuous(name, raw)
+        return decoded
+
+    def _decode_discrete(self, name: str, raw: Any) -> Any:
+        values = self._discrete_params[name]
+        idx = int(raw)
+        if not 0 <= idx < len(values):
+            raise ValueError(f"Action index out of range for '{name}': {idx} (expected 0..{len(values) - 1})")
+        return values[idx]
+
+    def _decode_continuous(self, name: str, raw: Any) -> Any:
+        space = self._continuous_params[name]
+        scalar = float(self._np.asarray(raw, dtype=self._np.float32).reshape(-1)[0])
+        clamped = min(max(scalar, space.low), space.high)
+        return round(clamped) if space.dtype == "int" else clamped
+
+    def reset(
+        self,
+        *,
+        seed: Optional[int] = None,
+        options: Optional[dict[str, Any]] = None,
+    ) -> tuple[Any, dict[str, Any]]:
+        obs, info = self._env.reset(seed=seed, options=options)
+        return self._as_obs_array(obs), info
+
+    def step(self, action: dict[str, int]) -> tuple[Any, float, bool, bool, dict[str, Any]]:
+        params = {**self._fixed_params, **self.decode_action(action)}
+        return self._step_with_params(params)
+
+    def step_raw(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]:
+        """
+        Step the env with an already-decoded parameter dict; bypasses index decoding.
+
+        Raises:
+            ValueError: if ``params`` does not cover exactly the tunable +
+                fixed param keys.
+        """
+        expected = set(self._discrete_params) | set(self._continuous_params) | set(self._fixed_params)
+        self._assert_keys(params.keys(), expected, "raw params")
+        return self._step_with_params(params)
+
+    def render(self) -> None:
+        self._env.render()
+
+    @staticmethod
+    def _assert_keys(received: Any, expected: set[str], ctx: str) -> None:
+        received_set = set(received)
+        if received_set == expected:
+            return
+        missing = sorted(expected - received_set)
+        extra = sorted(received_set - expected)
+        raise ValueError(f"{ctx} keys mismatch; missing={missing}, extra={extra}")
+
+    def _step_with_params(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]:
+        obs, reward, done, info = self._env.step(params)
+        return self._as_obs_array(obs), float(reward), bool(done), False, info
+
+    def _as_obs_array(self, obs: Any) -> Any:
+        """
+        Convert a raw env observation into the policy-facing observation.
+
+        Structured path: the flat raw obs (which feeds the env's reward and
+        ``trajectory.csv``) is encoded per-leaf by the env and returned as a
+        ``dict`` keyed to match ``observation_space`` (a ``spaces.Dict``).
+        Flat path: a single ``float32`` ``Box`` array (legacy behaviour).
+        """
+        descriptors = self._obs_descriptors
+        if descriptors is None:
+            return self._np.asarray(obs, dtype=self._np.float32)
+        env = cast(StructuredObservation, self._env)
+        encoded = env.encode_observation(list(obs))
+        return {name: self._leaf_to_value(descriptors[name], leaf) for name, leaf in encoded.items()}
+
+    def _leaf_to_value(self, descriptor: Any, leaf: Any) -> Any:
+        """Coerce one encoded leaf to its gymnasium subspace dtype."""
+        if descriptor.kind == "discrete":
+            return int(leaf)
+        return self._np.asarray(leaf, dtype=self._np.float32)
diff --git a/src/cloudai/core.py b/src/cloudai/core.py
index aefcd0573..f45f0c3f9 100644
--- a/src/cloudai/core.py
+++ b/src/cloudai/core.py
@@ -53,6 +53,7 @@
 from .configurator.cloudai_gym import CloudAIGymEnv
 from .configurator.env_params import ObsLeafDescriptor, StructuredObservation
 from .configurator.grid_search import GridSearchAgent
+from .configurator.gymnasium_adapter import GymnasiumAdapter
 from .models.workload import CmdArgs, NsysConfiguration, PredictorConfig, TestDefinition
 from .parser import Parser
 from .reporter import PerTestReporter, StatusReporter, TarballReporter
@@ -76,6 +77,7 @@
     "Grader",
     "GradingStrategy",
     "GridSearchAgent",
+    "GymnasiumAdapter",
     "HFModel",
     "InstallStatusResult",
     "Installable",
diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py
new file mode 100644
index 000000000..cbe48bcfb
--- /dev/null
+++ b/tests/test_gymnasium_adapter_contract.py
@@ -0,0 +1,479 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Caller-contract tests for ``GymnasiumAdapter``.
+
+The single invariant every consumer assumes:
+
+    ``test_run.step`` is a **monotonic trial index** across the entire run.
+
+Gym's ``reset()`` is an *episode boundary*, not a trial boundary. For the
+contextual-bandit configs (``agent_steps=1``), RLlib calls ``reset()`` before
+*every* trial. An earlier adapter rewound ``test_run.step`` on reset and
+collapsed every trial onto step 1 — silently overwriting output dirs and
+producing duplicate-step rows in trajectory.csv / env.csv.
+
+These tests pin the negative invariant: the adapter must not mutate
+``test_run.step``. That counter is owned by ``TestRun`` and advanced
+exclusively by ``CloudAIGymEnv.step()``.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any, Optional
+
+import pytest
+
+from cloudai._core.action_space import ContinuousSpace
+from cloudai.configurator.base_gym import BaseGym
+from cloudai.configurator.env_params import ObsLeafDescriptor
+
+try:
+    import gymnasium  # noqa: F401
+
+    _HAS_GYMNASIUM = True
+except ImportError:
+    _HAS_GYMNASIUM = False
+
+pytestmark = pytest.mark.skipif(not _HAS_GYMNASIUM, reason="gymnasium not installed")
+
+from cloudai.configurator.gymnasium_adapter import GymnasiumAdapter  # noqa: E402
+
+
+class _StubBaseGym(BaseGym):
+    """Minimal BaseGym with a ``test_run`` attribute mirroring CloudAIGymEnv."""
+
+    def __init__(self) -> None:
+        self._action_space: dict[str, Any] = {"param_a": [1, 2, 3], "param_b": [10, 20]}
+        self._observation_space: list[float] = [0.0, 0.0, 0.0]
+        self.test_run = SimpleNamespace(step=0)
+        super().__init__()
+
+    def define_action_space(self) -> dict[str, Any]:
+        return self._action_space
+
+    def define_observation_space(self) -> list:
+        return self._observation_space
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict[str, Any]] = None,
+    ) -> tuple[list, dict[str, Any]]:
+        return [0.0, 0.0, 0.0], {}
+
+    def step(self, action: Any) -> tuple[list, float, bool, dict]:
+        self.test_run.step += 1
+        return [1.0, 2.0, 3.0], 0.5, False, {}
+
+    def render(self, mode: str = "human") -> None:
+        return None
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        pass
+
+
+class TestStepIsMonotonicTrialIndex:
+    """``test_run.step`` is a trial index, not an episode-local counter."""
+
+    def test_step_advances_within_single_episode(self) -> None:
+        gym = _StubBaseGym()
+        adapter = GymnasiumAdapter(gym)
+        adapter.reset()
+
+        seen: list[int] = []
+        for _ in range(3):
+            adapter.step({"param_a": 0, "param_b": 0})
+            seen.append(gym.test_run.step)
+
+        assert seen == [1, 2, 3]
+
+    def test_step_is_monotonic_across_episode_boundaries(self) -> None:
+        """The bug: ``reset()`` rewinds ``_step_count`` to 0, so the next
+        ``step()`` writes ``test_run.step = 1`` again. With contextual-bandit
+        RLlib (one step per episode) this means every trial reports step 1.
+        """
+        gym = _StubBaseGym()
+        adapter = GymnasiumAdapter(gym)
+
+        seen: list[int] = []
+        for _ in range(5):
+            adapter.reset()
+            adapter.step({"param_a": 0, "param_b": 0})
+            seen.append(gym.test_run.step)
+
+        assert seen == [1, 2, 3, 4, 5], (
+            f"test_run.step must be a monotonic trial index across episodes; got {seen}. "
+            "reset() is a Gym episode boundary, not a trial boundary; rewinding the "
+            "trial counter collapses every contextual-bandit rollout onto step 1."
+        )
+
+    def test_mixed_within_and_across_episode_steps_are_monotonic(self) -> None:
+        gym = _StubBaseGym()
+        adapter = GymnasiumAdapter(gym)
+
+        seen: list[int] = []
+        for episode_len in (2, 1, 3):
+            adapter.reset()
+            for _ in range(episode_len):
+                adapter.step({"param_a": 0, "param_b": 0})
+                seen.append(gym.test_run.step)
+
+        assert seen == [1, 2, 3, 4, 5, 6], (
+            f"test_run.step must be a monotonic trial index regardless of episode shape; got {seen}"
+        )
+
+
+class _ContextualStubBaseGym(BaseGym):
+    """BaseGym stub that simulates CloudAIGymEnv's contextual-obs contract.
+
+    ``reset()`` returns an observation with a per-trial context value in
+    slot 1 (mimicking how the upstream env writes a sampled env_param into
+    the obs vector built at the trial boundary). Each call to ``reset()``
+    advances the simulated trial counter so we can assert the adapter
+    surfaces the *current* context, not a stale one.
+    """
+
+    def __init__(self, contexts: list[float]) -> None:
+        self._contexts = list(contexts)
+        self._action_space: dict[str, Any] = {"param_a": [1, 2, 3], "param_b": [10, 20]}
+        self.test_run = SimpleNamespace(step=0)
+        super().__init__()
+
+    def define_action_space(self) -> dict[str, Any]:
+        return self._action_space
+
+    def define_observation_space(self) -> list:
+        return [0.0, 0.0]
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict[str, Any]] = None,
+    ) -> tuple[list, dict[str, Any]]:
+        ctx = self._contexts[self.test_run.step]
+        self.test_run.step += 1
+        return [0.0, ctx], {}
+
+    def step(self, action: Any) -> tuple[list, float, bool, dict]:
+        ctx = self._contexts[self.test_run.step - 1]
+        return [42.0, ctx], 0.5, False, {}
+
+    def render(self, mode: str = "human") -> None:
+        return None
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        pass
+
+
+class TestAdapterPropagatesContextualObservation:
+    """The adapter must pass through env-built observations unchanged.
+
+    With the contextual-bandit fix in cloudai, ``CloudAIGymEnv.reset()``
+    samples env_params at the trial boundary and bakes them into the obs
+    vector before returning. RLlib's policy reads obs from
+    ``adapter.reset()``, so the adapter must propagate that vector verbatim
+    (modulo numpy-float32 casting). The same propagation invariant applies
+    on ``adapter.step()``.
+    """
+
+    def test_reset_propagates_context_into_observation(self) -> None:
+        contexts = [0.001, 0.0, 0.01, 0.001]
+        gym = _ContextualStubBaseGym(contexts)
+        adapter = GymnasiumAdapter(gym)
+
+        seen: list[float] = []
+        for _ in range(len(contexts)):
+            obs, _info = adapter.reset()
+            seen.append(float(obs[1]))
+
+        assert seen == pytest.approx(contexts, rel=1e-5), (
+            f"adapter.reset() must surface the trial's context value (slot 1) verbatim "
+            f"(modulo float32 cast); got {seen}, expected {contexts}"
+        )
+
+    def test_step_propagates_context_into_observation(self) -> None:
+        contexts = [0.0, 0.01, 0.001]
+        gym = _ContextualStubBaseGym(contexts)
+        adapter = GymnasiumAdapter(gym)
+
+        for ctx in contexts:
+            adapter.reset()
+            obs, _r, _term, _trunc, _info = adapter.step({"param_a": 0, "param_b": 0})
+            assert float(obs[0]) == pytest.approx(42.0, rel=1e-5), (
+                f"adapter.step() must propagate the env's measured-metric slot; got {obs[0]}"
+            )
+            assert float(obs[1]) == pytest.approx(ctx, rel=1e-5), (
+                f"adapter.step() must propagate the trial's context value; got {obs[1]}, expected {ctx}"
+            )
+
+
+class _ContinuousStubBaseGym(BaseGym):
+    """BaseGym stub that surfaces a ContinuousSpace in its action_space.
+
+    Records the params received by ``step`` so the test can assert what the
+    adapter actually emits to the env (post-rounding / clamping).
+    """
+
+    def __init__(self, action_space: dict[str, Any]) -> None:
+        self._action_space = action_space
+        self.test_run = SimpleNamespace(step=0)
+        self.received: list[dict[str, Any]] = []
+        super().__init__()
+
+    def define_action_space(self) -> dict[str, Any]:
+        return self._action_space
+
+    def define_observation_space(self) -> list:
+        return [0.0]
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict[str, Any]] = None,
+    ) -> tuple[list, dict[str, Any]]:
+        return [0.0], {}
+
+    def step(self, action: Any) -> tuple[list, float, bool, dict]:
+        self.received.append(dict(action))
+        return [1.0], 0.5, False, {}
+
+    def render(self, mode: str = "human") -> None:
+        return None
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        pass
+
+
+class TestAdapterDispatchesContinuousSpace:
+    """``ContinuousSpace`` in cloudai's param_space → ``gym.spaces.Box`` in the adapter.
+
+    The adapter is also responsible for **actuator quantization**: when the
+    spec is ``dtype="int"``, the float coming out of the policy is rounded at
+    decode_action, so the env (and trajectory cache) sees only ints. This
+    collapses float jitter (47.34 vs 47.36) onto the same emitted int and
+    keeps the cache hit-rate honest.
+    """
+
+    @staticmethod
+    def _action_space() -> dict[str, Any]:
+        return {
+            "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"),
+            "discrete": [10, 20, 30],
+            "fixed": [99],
+        }
+
+    def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> None:
+        import gymnasium
+
+        gym_env = _ContinuousStubBaseGym(self._action_space())
+        adapter = GymnasiumAdapter(gym_env)
+
+        assert isinstance(adapter.action_space["threshold"], gymnasium.spaces.Box)
+        assert adapter.action_space["threshold"].shape == (1,)
+        assert float(adapter.action_space["threshold"].low[0]) == pytest.approx(0.0)
+        assert float(adapter.action_space["threshold"].high[0]) == pytest.approx(200.0)
+
+        assert isinstance(adapter.action_space["discrete"], gymnasium.spaces.Discrete)
+        assert int(adapter.action_space["discrete"].n) == 3
+
+        assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable"
+
+    def test_decode_action_rounds_dtype_int(self) -> None:
+        import numpy as np
+
+        gym_env = _ContinuousStubBaseGym(self._action_space())
+        adapter = GymnasiumAdapter(gym_env)
+
+        decoded = adapter.decode_action({"threshold": np.array([47.34], dtype=np.float32), "discrete": 1})
+
+        assert decoded["threshold"] == 47, f"dtype=int must round; got {decoded['threshold']}"
+        assert isinstance(decoded["threshold"], int), "rounded action must be Python int, not float"
+        assert decoded["discrete"] == 20, "Discrete index 1 → 20"
+
+    @pytest.mark.parametrize("raw,expected", [(47.34, 47), (47.36, 47), (47.5, 48), (47.4999, 47)])
+    def test_cache_key_collapses_float_jitter_to_same_int(self, raw: float, expected: int) -> None:
+        """Adjacent float actions that round to the same int must collapse identically.
+
+        This is the actuator-quantization invariant from the design doc: the
+        env (and trajectory cache key) must see the same int for any float in
+        the rounding interval, so the cache fills with semantic duplicates,
+        not float-noise duplicates.
+        """
+        import numpy as np
+
+        gym_env = _ContinuousStubBaseGym(self._action_space())
+        adapter = GymnasiumAdapter(gym_env)
+
+        decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0})
+        assert decoded["threshold"] == expected
+
+    @pytest.mark.parametrize("raw,expected", [(-5.0, 0), (250.0, 200), (0.0, 0), (200.0, 200)])
+    def test_decode_action_clamps_to_range(self, raw: float, expected: int) -> None:
+        """Out-of-range continuous actions clamp to ``[low, high]``."""
+        import numpy as np
+
+        gym_env = _ContinuousStubBaseGym(self._action_space())
+        adapter = GymnasiumAdapter(gym_env)
+
+        decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0})
+        assert decoded["threshold"] == expected
+
+    def test_step_emits_rounded_int_to_underlying_env(self) -> None:
+        """The env (and downstream cache) must see the *rounded* int, not the raw float."""
+        import numpy as np
+
+        gym_env = _ContinuousStubBaseGym(self._action_space())
+        adapter = GymnasiumAdapter(gym_env)
+        adapter.reset()
+
+        adapter.step({"threshold": np.array([78.6], dtype=np.float32), "discrete": 2})
+
+        assert gym_env.received, "env.step was not called"
+        emitted = gym_env.received[-1]
+        assert emitted["threshold"] == 79, "env must receive rounded int, not raw float"
+        assert emitted["discrete"] == 30, "Discrete decode unaffected by continuous-action plumbing"
+        assert emitted["fixed"] == 99, "fixed params must be injected by the adapter"
+
+    def test_dtype_float_preserves_continuous_value(self) -> None:
+        """``dtype="float"`` clamps but does NOT round; the policy's float reaches the env."""
+        import numpy as np
+
+        action_space: dict[str, Any] = {
+            "knob": ContinuousSpace(low=0.0, high=1.0, dtype="float"),
+        }
+        gym_env = _ContinuousStubBaseGym(action_space)
+        adapter = GymnasiumAdapter(gym_env)
+
+        decoded = adapter.decode_action({"knob": np.array([0.7234], dtype=np.float32)})
+        assert decoded["knob"] == pytest.approx(0.7234, rel=1e-4)
+        assert isinstance(decoded["knob"], float)
+
+
+class _StructuredStubBaseGym(BaseGym):
+    """BaseGym stub that opts in/out of the structured (Dict) obs path.
+
+    Mirrors ``CloudAIGymEnv``'s gate: ``structured_observation_descriptors``
+    returns ``None`` for a metrics-only env (no observed name is a declared
+    env_param) and a per-leaf descriptor dict otherwise. ``encode_observation``
+    produces the matching encoded leaves.
+    """
+
+    def __init__(self, descriptors: Optional[dict[str, ObsLeafDescriptor]], obs_dim: int = 2) -> None:
+        self._descriptors = descriptors
+        self._obs_dim = obs_dim
+        self._action_space: dict[str, Any] = {"param_a": [1, 2, 3]}
+        self.test_run = SimpleNamespace(step=0)
+        super().__init__()
+
+    def define_action_space(self) -> dict[str, Any]:
+        return self._action_space
+
+    def define_observation_space(self) -> list:
+        return [0.0] * self._obs_dim
+
+    def structured_observation_descriptors(self) -> Optional[dict[str, ObsLeafDescriptor]]:
+        return self._descriptors
+
+    def encode_observation(self, raw_values: list) -> dict[str, Any]:
+        out: dict[str, Any] = {}
+        assert self._descriptors is not None
+        for i, (name, desc) in enumerate(self._descriptors.items()):
+            raw = raw_values[i] if i < len(raw_values) else 0.0
+            if desc.kind == "discrete":
+                out[name] = int(raw)
+            elif desc.dim == 2:
+                out[name] = [1.0 if raw <= 0.0 else 0.0, float(raw)]
+            else:
+                out[name] = [float(raw)]
+        return out
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict[str, Any]] = None,
+    ) -> tuple[list, dict[str, Any]]:
+        return [0.0] * self._obs_dim, {}
+
+    def step(self, action: Any) -> tuple[list, float, bool, dict]:
+        return [0.0] * self._obs_dim, 0.5, False, {}
+
+    def render(self, mode: str = "human") -> None:
+        return None
+
+    def seed(self, seed: Optional[int] = None) -> None:
+        pass
+
+
+class TestStructuredObsGate:
+    """D1: the structured (Dict) obs space is opt-in; metrics-only stays flat Box."""
+
+    def test_metrics_only_env_falls_back_to_box(self) -> None:
+        """An env that opts out (``structured_observation_descriptors`` -> None) keeps a flat Box.
+
+        This is the blast-radius guard: non-DR workloads (BO/GA/MAB on plain
+        metrics) must NOT silently switch to a Dict obs layout.
+        """
+        import gymnasium
+
+        gym_env = _StructuredStubBaseGym(descriptors=None, obs_dim=3)
+        adapter = GymnasiumAdapter(gym_env)
+
+        assert isinstance(adapter.observation_space, gymnasium.spaces.Box)
+        assert adapter.observation_space.shape == (3,)
+
+    def test_env_param_env_uses_dict(self) -> None:
+        """An env with a declared env_param leaf exposes a ``spaces.Dict``."""
+        import gymnasium
+
+        descriptors = {
+            "bus_bw": ObsLeafDescriptor(kind="box", dim=1),
+            "drop_rate": ObsLeafDescriptor(kind="box", dim=2),
+        }
+        adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors))
+
+        assert isinstance(adapter.observation_space, gymnasium.spaces.Dict)
+        assert set(adapter.observation_space.spaces) == {"bus_bw", "drop_rate"}
+        assert adapter.observation_space.spaces["drop_rate"].shape == (2,)
+
+
+class TestCategoricalLeafSubspace:
+    """D3: a categorical (discrete) descriptor maps to ``Discrete(k)`` and decodes to an int."""
+
+    def test_discrete_descriptor_becomes_discrete_space(self) -> None:
+        import gymnasium
+
+        descriptors = {
+            "bus_bw": ObsLeafDescriptor(kind="box", dim=1),
+            "variant": ObsLeafDescriptor(kind="discrete", dim=1, n=3),
+        }
+        adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors))
+
+        variant_space = adapter.observation_space.spaces["variant"]
+        assert isinstance(variant_space, gymnasium.spaces.Discrete)
+        assert int(variant_space.n) == 3
+
+    def test_discrete_leaf_emitted_as_int_index(self) -> None:
+        """The emitted obs for a discrete leaf is an ``int`` the Discrete space accepts."""
+        descriptors = {"variant": ObsLeafDescriptor(kind="discrete", dim=1, n=3)}
+        gym_env = _StructuredStubBaseGym(descriptors=descriptors, obs_dim=1)
+        adapter = GymnasiumAdapter(gym_env)
+
+        obs, _ = adapter.reset()
+        assert isinstance(obs["variant"], int)
+        assert adapter.observation_space.contains(obs)
diff --git a/uv.lock b/uv.lock
index 7f8a19b64..5a8911dbd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -282,6 +282,7 @@ dependencies = [
 [package.optional-dependencies]
 dev = [
     { name = "build" },
+    { name = "gymnasium" },
     { name = "import-linter" },
     { name = "pandas-stubs" },
     { name = "pre-commit" },
@@ -312,6 +313,9 @@ docs-cms = [
     { name = "sphinx-rtd-theme" },
     { name = "sphinxcontrib-mermaid" },
 ]
+rl = [
+    { name = "gymnasium" },
+]
 
 [package.metadata]
 requires-dist = [
@@ -320,6 +324,8 @@ requires-dist = [
     { name = "bokeh", specifier = "~=3.8" },
     { name = "build", marker = "extra == 'dev'", specifier = "~=1.4" },
     { name = "click", specifier = "~=8.3" },
+    { name = "gymnasium", marker = "extra == 'dev'", specifier = "~=1.2" },
+    { name = "gymnasium", marker = "extra == 'rl'", specifier = "~=1.2" },
     { name = "huggingface-hub", specifier = "~=1.4" },
     { name = "import-linter", marker = "extra == 'dev'", specifier = "~=2.10" },
     { name = "jinja2", specifier = "~=3.1.6" },
@@ -350,7 +356,16 @@ requires-dist = [
     { name = "vulture", marker = "extra == 'dev'", specifier = "==2.14" },
     { name = "websockets", specifier = "~=16.0" },
 ]
-provides-extras = ["dev", "docs", "docs-cms"]
+provides-extras = ["dev", "rl", "docs", "docs-cms"]
+
+[[package]]
+name = "cloudpickle"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
+]
 
 [[package]]
 name = "colorama"
@@ -674,6 +689,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
 ]
 
+[[package]]
+name = "farama-notifications"
+version = "0.0.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/91/14397890dde30adc4bee6462158933806207bc5dd10d7b4d09d5c33845cf/farama_notifications-0.0.6.tar.gz", hash = "sha256:b19acac4bb41d76e59e03394b5dd165f4761c86fa327f56307a35cbee3b60158", size = 2517, upload-time = "2026-04-24T08:43:57.603Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/f0/21f81892e4ed10f4ec3ef2e7cf8635fb76e7c0907c55d0da66be50094760/farama_notifications-0.0.6-py3-none-any.whl", hash = "sha256:f84839188efa1ce5bb361c2a84881b2dc2c0d0d7fb661ff00421820170930935", size = 2897, upload-time = "2026-04-24T08:43:56.785Z" },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.136.3"
@@ -884,6 +908,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/48/b2/b096ccce418882fbfda4f7496f9357aaa9a5af1896a9a7f60d9f2b275a06/grpcio-1.78.0-cp314-cp314-win_amd64.whl", hash = "sha256:dce09d6116df20a96acfdbf85e4866258c3758180e8c49845d6ba8248b6d0bbb", size = 4929852, upload-time = "2026-02-06T09:56:45.885Z" },
 ]
 
+[[package]]
+name = "gymnasium"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cloudpickle" },
+    { name = "farama-notifications" },
+    { name = "numpy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/ff/14b6880d703dfaca204490979d3254ccd280c99550798993319902873658/gymnasium-1.3.0.tar.gz", hash = "sha256:6939e86e835d6b71b6ba6bfd360487420876deafc79bfb7bacba83a7c446bcf3", size = 830646, upload-time = "2026-04-22T13:47:14.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/73/fda6a25f3beeb5e49d74330b44092b9e5a547395ccd478d1103ddcbff1fc/gymnasium-1.3.0-py3-none-any.whl", hash = "sha256:6b8c159a8540dcbcb221722d7efda24d78ebbcbc3bd2ea1c2611aa2a34471fc2", size = 953904, upload-time = "2026-04-22T13:47:12.13Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"

From 6c2452fce89aa468391b11cbb20bc481d6d80785 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 10:59:15 -0400
Subject: [PATCH 14/22] fix(gymnasium-adapter): accept continuous actions in
 step() type signature

step() delegates to decode_action(dict[str, Any]) and exists precisely to
round float/continuous policy actions to ints; widen its parameter type
from dict[str, int] to dict[str, Any] to match.
---
 src/cloudai/configurator/gymnasium_adapter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index 590abf11d..b827e053a 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -203,7 +203,7 @@ def reset(
         obs, info = self._env.reset(seed=seed, options=options)
         return self._as_obs_array(obs), info
 
-    def step(self, action: dict[str, int]) -> tuple[Any, float, bool, bool, dict[str, Any]]:
+    def step(self, action: dict[str, Any]) -> tuple[Any, float, bool, bool, dict[str, Any]]:
         params = {**self._fixed_params, **self.decode_action(action)}
         return self._step_with_params(params)
 

From 89a5b8f1122e8b243296aa9498fabd9ad2eca819 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 11:17:46 -0400
Subject: [PATCH 15/22] fix(gymnasium-adapter): validate structured-observation
 key parity; preserve traceback on DSE re-raise

- _as_obs_array(): assert encoded keys match descriptors before coercion
  (reuses _assert_keys, same guard as decode_action/step_raw) and
  materialize output by descriptor keys to avoid KeyError on extra keys
  and silent partial observations on missing keys.
- handlers.py: re-raise the captured hard-fail with its original traceback.

Addresses CodeRabbit findings on #930.
---
 src/cloudai/cli/handlers.py                   | 2 +-
 src/cloudai/configurator/gymnasium_adapter.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index fb6f9b7d8..5fb6796cf 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -178,7 +178,7 @@ def handle_dse_job(runner: Runner, args: argparse.Namespace) -> int:
         )
 
     if run_error is not None:
-        raise run_error
+        raise run_error.with_traceback(run_error.__traceback__)
 
     logging.info("All jobs are complete.")
     return err
diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index b827e053a..4ad53d23a 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -249,7 +249,8 @@ def _as_obs_array(self, obs: Any) -> Any:
             return self._np.asarray(obs, dtype=self._np.float32)
         env = cast(StructuredObservation, self._env)
         encoded = env.encode_observation(list(obs))
-        return {name: self._leaf_to_value(descriptors[name], leaf) for name, leaf in encoded.items()}
+        self._assert_keys(encoded.keys(), set(descriptors), "encoded observation")
+        return {name: self._leaf_to_value(descriptors[name], encoded[name]) for name in descriptors}
 
     def _leaf_to_value(self, descriptor: Any, leaf: Any) -> Any:
         """Coerce one encoded leaf to its gymnasium subspace dtype."""

From b8684791431fefcfa15f8d7ed888b2268195f912 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 17:46:18 -0400
Subject: [PATCH 16/22] refactor(configurator): route gymnasium import through
 LazyImports singleton
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the bespoke _import_gymnasium() in-method seam with the canonical
lazy.gymnasium / lazy.np properties; addresses the in-method-import review
concern. No behavior change — gymnasium stays an optional [rl] extra.
---
 src/cloudai/configurator/gymnasium_adapter.py | 27 +++----------------
 src/cloudai/util/lazy_imports.py              | 15 +++++++++++
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index 4ad53d23a..e1b3f9971 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -38,26 +38,7 @@
 from cloudai._core.action_space import ContinuousSpace
 from cloudai.configurator.base_gym import BaseGym
 from cloudai.configurator.env_params import StructuredObservation
-
-_GYMNASIUM_INSTALL_HINT = "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'"
-
-
-def _import_gymnasium():
-    """
-    Import gymnasium + numpy lazily; raise a clear, actionable error when absent.
-
-    Kept as a single seam so cloudai installs without the ``[rl]`` extra
-    continue to work for non-RL agents, and tests can patch this helper to
-    simulate a missing install.
-    """
-    try:
-        import gymnasium
-        import numpy as np
-        from gymnasium import spaces
-
-        return gymnasium, spaces, np
-    except ImportError as exc:
-        raise ImportError(_GYMNASIUM_INSTALL_HINT) from exc
+from cloudai.util.lazy_imports import lazy
 
 
 class GymnasiumAdapter:
@@ -82,10 +63,8 @@ class GymnasiumAdapter:
     metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]}
 
     def __init__(self, env: BaseGym) -> None:
-        _, spaces, np = _import_gymnasium()
-
-        self._np = np
-        self._spaces = spaces
+        np = self._np = lazy.np
+        spaces = self._spaces = lazy.gymnasium.spaces
         self._env = env
 
         raw_action_space = env.define_action_space()
diff --git a/src/cloudai/util/lazy_imports.py b/src/cloudai/util/lazy_imports.py
index 65ba62df3..1df8059be 100644
--- a/src/cloudai/util/lazy_imports.py
+++ b/src/cloudai/util/lazy_imports.py
@@ -27,6 +27,7 @@
     import bokeh.palettes as bokeh_pallettes
     import bokeh.plotting as bokeh_plotting
     import bokeh.transform as bokeh_transform
+    import gymnasium
     import kubernetes as k8s
     import numpy as np
     import pandas as pd
@@ -39,6 +40,7 @@ def __init__(self):
         self._np: ModuleType | None = None
         self._pd: ModuleType | None = None
         self._k8s: ModuleType | None = None
+        self._gymnasium: ModuleType | None = None
         self._bokeh: ModuleType | None = None
         self._bokeh_plotting: ModuleType | None = None
         self._bokeh_models: ModuleType | None = None
@@ -75,6 +77,19 @@ def k8s(self) -> k8s:  # type: ignore[no-any-return]
 
         return cast("k8s", self._k8s)
 
+    @property
+    def gymnasium(self) -> gymnasium:  # type: ignore[no-any-return]
+        """Lazy import of gymnasium (optional ``cloudai[rl]`` extra)."""
+        if self._gymnasium is None:
+            try:
+                import gymnasium
+            except ImportError as exc:
+                raise ImportError(
+                    "gymnasium is required for GymnasiumAdapter. Install it with: pip install 'cloudai[rl]'"
+                ) from exc
+            self._gymnasium = gymnasium
+        return cast("gymnasium", self._gymnasium)
+
     @property
     def bokeh(self) -> bokeh:  # type: ignore[no-any-return]
         """Lazy import of bokeh."""

From 4b59c499150b41bf5c2e38a88e3392925e3ab7c0 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 16 Jun 2026 18:33:15 -0400
Subject: [PATCH 17/22] fix(configurator): make lazy-gymnasium refactor land
 CI-green

The lazy.gymnasium.spaces refactor gives the adapter precise gymnasium types
instead of Any, which surfaced two latent issues the scoped pre-commit run
missed:

- pyright now sees Space[Any]/Dict in the adapter contract test, so concrete
  attribute access (.low/.high/.n/.spaces) is flagged. Narrow via local
  bindings + isinstance before access.
- lazy_imports.py now has a 2026 commit in its history, so the ci_only
  copyright check requires the year range 2025-2026.
---
 src/cloudai/util/lazy_imports.py         |  2 +-
 tests/test_gymnasium_adapter_contract.py | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/cloudai/util/lazy_imports.py b/src/cloudai/util/lazy_imports.py
index 1df8059be..def790dfc 100644
--- a/src/cloudai/util/lazy_imports.py
+++ b/src/cloudai/util/lazy_imports.py
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py
index cbe48bcfb..ab202dff3 100644
--- a/tests/test_gymnasium_adapter_contract.py
+++ b/tests/test_gymnasium_adapter_contract.py
@@ -284,13 +284,15 @@ def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> Non
         gym_env = _ContinuousStubBaseGym(self._action_space())
         adapter = GymnasiumAdapter(gym_env)
 
-        assert isinstance(adapter.action_space["threshold"], gymnasium.spaces.Box)
-        assert adapter.action_space["threshold"].shape == (1,)
-        assert float(adapter.action_space["threshold"].low[0]) == pytest.approx(0.0)
-        assert float(adapter.action_space["threshold"].high[0]) == pytest.approx(200.0)
+        threshold = adapter.action_space["threshold"]
+        assert isinstance(threshold, gymnasium.spaces.Box)
+        assert threshold.shape == (1,)
+        assert float(threshold.low[0]) == pytest.approx(0.0)
+        assert float(threshold.high[0]) == pytest.approx(200.0)
 
-        assert isinstance(adapter.action_space["discrete"], gymnasium.spaces.Discrete)
-        assert int(adapter.action_space["discrete"].n) == 3
+        discrete = adapter.action_space["discrete"]
+        assert isinstance(discrete, gymnasium.spaces.Discrete)
+        assert int(discrete.n) == 3
 
         assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable"
 
@@ -464,7 +466,9 @@ def test_discrete_descriptor_becomes_discrete_space(self) -> None:
         }
         adapter = GymnasiumAdapter(_StructuredStubBaseGym(descriptors=descriptors))
 
-        variant_space = adapter.observation_space.spaces["variant"]
+        observation_space = adapter.observation_space
+        assert isinstance(observation_space, gymnasium.spaces.Dict)
+        variant_space = observation_space.spaces["variant"]
         assert isinstance(variant_space, gymnasium.spaces.Discrete)
         assert int(variant_space.n) == 3
 

From c30b72357ad63e6245264360ec3ce53ed339ecc7 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Sun, 21 Jun 2026 10:09:30 -0400
Subject: [PATCH 18/22] feat(configurator): make GymnasiumAdapter inherit
 gymnasium.Env

Inherit from gymnasium.Env (guarded import, falls back to object when the
optional [rl] extra is absent) so ecosystem tooling that performs isinstance
checks (e.g. Stable-Baselines3) accepts the adapter.

- Use the TYPE_CHECKING import form so pyright sees a concrete base class
  while runtime keeps the optional-dependency fallback.
- Drop ClassVar on metadata to match Env's attribute shape (noqa RUF012).
- Rename the inner-env accessor unwrapped -> cloudai_env; gymnasium's
  Env.unwrapped (returns self) is the correct base-env semantics, and the
  old override returned a non-Env (BaseGym), which would mislead ecosystem
  code calling .unwrapped.
---
 src/cloudai/configurator/gymnasium_adapter.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index e1b3f9971..6a05afe5a 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -33,15 +33,23 @@
 
 from __future__ import annotations
 
-from typing import Any, ClassVar, Optional, cast
+from typing import TYPE_CHECKING, Any, Optional, cast
 
 from cloudai._core.action_space import ContinuousSpace
 from cloudai.configurator.base_gym import BaseGym
 from cloudai.configurator.env_params import StructuredObservation
 from cloudai.util.lazy_imports import lazy
 
+if TYPE_CHECKING:
+    from gymnasium import Env as _GymnasiumEnvBase
+else:
+    try:  # ``gymnasium`` is an optional [rl] dependency; fall back to ``object`` when absent.
+        from gymnasium import Env as _GymnasiumEnvBase
+    except ImportError:
+        _GymnasiumEnvBase = object
 
-class GymnasiumAdapter:
+
+class GymnasiumAdapter(_GymnasiumEnvBase):
     """
     Expose a CloudAI :class:`BaseGym` as a ``gymnasium.Env``-shaped object.
 
@@ -60,7 +68,9 @@ class GymnasiumAdapter:
     instantiating the adapter without them raises ``ImportError``.
     """
 
-    metadata: ClassVar[dict[str, Any]] = {"render_modes": ["human"]}
+    # Overrides gymnasium.Env.metadata (a non-ClassVar instance attribute); matching that
+    # shape satisfies pyright's override check, so RUF012's ClassVar suggestion is silenced.
+    metadata: dict[str, Any] = {"render_modes": ["human"]}  # noqa: RUF012
 
     def __init__(self, env: BaseGym) -> None:
         np = self._np = lazy.np
@@ -132,7 +142,8 @@ def _descriptor_to_space(self, descriptor: Any) -> Any:
         return self._spaces.Box(low=-self._np.inf, high=self._np.inf, shape=(descriptor.dim,), dtype=self._np.float32)
 
     @property
-    def unwrapped(self) -> BaseGym:
+    def cloudai_env(self) -> BaseGym:
+        """Return the wrapped CloudAI :class:`BaseGym` (gymnasium's ``unwrapped`` returns ``self``)."""
         return self._env
 
     def decode_action(self, action: dict[str, Any]) -> dict[str, Any]:

From 3fe318ca7f8ea869dc7e34500891f00880caf36d Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Sun, 21 Jun 2026 11:22:55 -0400
Subject: [PATCH 19/22] test(gym): cast action_space to Dict in adapter
 contract test

Inheriting gymnasium.Env widened the static type of action_space from the
concrete spaces.Dict the adapter builds to the base spaces.Space, which has
no __getitem__. Cast at the test call site to restore subspace indexing for
pyright (runtime is unchanged; action_space is always a Dict).
---
 tests/test_gymnasium_adapter_contract.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py
index ab202dff3..ad7a8f955 100644
--- a/tests/test_gymnasium_adapter_contract.py
+++ b/tests/test_gymnasium_adapter_contract.py
@@ -35,7 +35,7 @@
 from __future__ import annotations
 
 from types import SimpleNamespace
-from typing import Any, Optional
+from typing import Any, Optional, cast
 
 import pytest
 
@@ -284,17 +284,18 @@ def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> Non
         gym_env = _ContinuousStubBaseGym(self._action_space())
         adapter = GymnasiumAdapter(gym_env)
 
-        threshold = adapter.action_space["threshold"]
+        action_space = cast(gymnasium.spaces.Dict, adapter.action_space)
+        threshold = action_space["threshold"]
         assert isinstance(threshold, gymnasium.spaces.Box)
         assert threshold.shape == (1,)
         assert float(threshold.low[0]) == pytest.approx(0.0)
         assert float(threshold.high[0]) == pytest.approx(200.0)
 
-        discrete = adapter.action_space["discrete"]
+        discrete = action_space["discrete"]
         assert isinstance(discrete, gymnasium.spaces.Discrete)
         assert int(discrete.n) == 3
 
-        assert "fixed" not in adapter.action_space, "single-element lists are fixed, not tunable"
+        assert "fixed" not in action_space, "single-element lists are fixed, not tunable"
 
     def test_decode_action_rounds_dtype_int(self) -> None:
         import numpy as np

From 092573ef2d11b04b32013eeb45fe2f3c48808e23 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Sun, 21 Jun 2026 18:25:51 -0400
Subject: [PATCH 20/22] feat(configurator): add GymnasiumAdapter.encode_action
 as public inverse of decode_action

decode_action had no public inverse, so consumers needing value->index
encoding (e.g. RLlib warm-start / behavioral cloning) reached into the
private _tunable_params dict. When ContinuousSpace support split that
internal, those consumers broke with AttributeError.

encode_action closes the contract: discrete values map to their candidate
index, continuous values wrap into the clamped float32 Box array, so
decode_action(encode_action(v)) == v for any native v. Adds round-trip
contract tests pinning the invariant and rejection of non-candidate
values / key mismatches.
---
 src/cloudai/configurator/gymnasium_adapter.py | 44 +++++++++++++++++++
 tests/test_gymnasium_adapter_contract.py      | 44 +++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index 6a05afe5a..73627334b 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -184,6 +184,50 @@ def _decode_continuous(self, name: str, raw: Any) -> Any:
         clamped = min(max(scalar, space.low), space.high)
         return round(clamped) if space.dtype == "int" else clamped
 
+    def encode_action(self, values: dict[str, Any]) -> dict[str, Any]:
+        """
+        Map native parameter values back to raw gym actions; inverse of :meth:`decode_action`.
+
+        Discrete values resolve to their index in the candidate list; continuous
+        values are wrapped in the 1-D ``float32`` array the ``Box`` space expects
+        (clamped to the declared range). Together with :meth:`decode_action` this
+        is an invertible pair on native values:
+        ``decode_action(encode_action(v)) == v`` for any ``v`` drawn from the
+        tunable params (continuous ``dtype="int"`` round-trips through the same
+        rounding ``decode_action`` applies).
+
+        Consumers that need to express known native configs in the policy's
+        action space — e.g. warm-start / behavioral cloning from a recorded
+        trajectory — call this instead of reaching into the adapter internals.
+
+        Raises:
+            ValueError: if ``values`` does not cover exactly the tunable params,
+                or carries a discrete value absent from its candidate list.
+        """
+        expected = set(self._discrete_params) | set(self._continuous_params)
+        self._assert_keys(values.keys(), expected, "values")
+        encoded: dict[str, Any] = {}
+        for name, value in values.items():
+            if name in self._discrete_params:
+                encoded[name] = self._encode_discrete(name, value)
+            else:
+                encoded[name] = self._encode_continuous(name, value)
+        return encoded
+
+    def _encode_discrete(self, name: str, value: Any) -> int:
+        values = self._discrete_params[name]
+        try:
+            return values.index(value)
+        except ValueError:
+            raise ValueError(
+                f"Value {value!r} for '{name}' is not a candidate; expected one of {values}"
+            ) from None
+
+    def _encode_continuous(self, name: str, value: Any) -> Any:
+        space = self._continuous_params[name]
+        clamped = min(max(float(value), space.low), space.high)
+        return self._np.asarray([clamped], dtype=self._np.float32)
+
     def reset(
         self,
         *,
diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py
index ad7a8f955..74048181a 100644
--- a/tests/test_gymnasium_adapter_contract.py
+++ b/tests/test_gymnasium_adapter_contract.py
@@ -368,6 +368,50 @@ def test_dtype_float_preserves_continuous_value(self) -> None:
         assert isinstance(decoded["knob"], float)
 
 
+class TestEncodeDecodeAreInverse:
+    """``encode_action`` is the inverse of ``decode_action`` on native values.
+
+    Consumers (e.g. RLlib warm-start / behavioral cloning) must be able to
+    express a recorded native config in the policy's action space without
+    reaching into adapter internals. The public pair guarantees
+    ``decode_action(encode_action(v)) == v`` for any native ``v``.
+    """
+
+    def test_discrete_round_trip_decode_of_encode_is_identity(self) -> None:
+        adapter = GymnasiumAdapter(_StubBaseGym())
+        native = {"param_a": 3, "param_b": 10}
+        assert adapter.decode_action(adapter.encode_action(native)) == native
+
+    def test_discrete_encode_of_decode_is_identity(self) -> None:
+        adapter = GymnasiumAdapter(_StubBaseGym())
+        action = {"param_a": 2, "param_b": 1}
+        assert adapter.encode_action(adapter.decode_action(action)) == action
+
+    def test_encode_maps_value_to_candidate_index(self) -> None:
+        adapter = GymnasiumAdapter(_StubBaseGym())
+        assert adapter.encode_action({"param_a": 2, "param_b": 20}) == {"param_a": 1, "param_b": 1}
+
+    def test_encode_rejects_non_candidate_value(self) -> None:
+        adapter = GymnasiumAdapter(_StubBaseGym())
+        with pytest.raises(ValueError, match="not a candidate"):
+            adapter.encode_action({"param_a": 7, "param_b": 10})
+
+    def test_encode_rejects_key_mismatch(self) -> None:
+        adapter = GymnasiumAdapter(_StubBaseGym())
+        with pytest.raises(ValueError, match="keys mismatch"):
+            adapter.encode_action({"param_a": 1})  # missing param_b
+
+    def test_continuous_int_round_trips_through_rounding(self) -> None:
+        action_space: dict[str, Any] = {
+            "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"),
+            "discrete": [10, 20, 30],
+            "fixed": [99],
+        }
+        adapter = GymnasiumAdapter(_ContinuousStubBaseGym(action_space))
+        native = {"threshold": 47, "discrete": 20}
+        assert adapter.decode_action(adapter.encode_action(native)) == native
+
+
 class _StructuredStubBaseGym(BaseGym):
     """BaseGym stub that opts in/out of the structured (Dict) obs path.
 

From 2e2d431c7bd4c8ce844cf235522053c950fddb4b Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Tue, 23 Jun 2026 21:12:00 -0400
Subject: [PATCH 21/22] style(configurator): ruff format gymnasium_adapter.py

---
 src/cloudai/configurator/gymnasium_adapter.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index 73627334b..6e0e1d934 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -219,9 +219,7 @@ def _encode_discrete(self, name: str, value: Any) -> int:
         try:
             return values.index(value)
         except ValueError:
-            raise ValueError(
-                f"Value {value!r} for '{name}' is not a candidate; expected one of {values}"
-            ) from None
+            raise ValueError(f"Value {value!r} for '{name}' is not a candidate; expected one of {values}") from None
 
     def _encode_continuous(self, name: str, value: Any) -> Any:
         space = self._continuous_params[name]

From 4f479dd6564d15bcda0c7e254e59c2bf118be2f7 Mon Sep 17 00:00:00 2001
From: Rutayan Patro <rpatro@nvidia.com>
Date: Thu, 25 Jun 2026 18:13:10 -0400
Subject: [PATCH 22/22] refactor(gymnasium-adapter): defer continuous-action
 support with ContinuousSpace

The GymnasiumAdapter's continuous-action path depends on ContinuousSpace, which
ships separately. Until then nothing constructs a ContinuousSpace, so the
continuous branches here are unreachable and the only effect of the import is an
ImportError at module load. Drop the continuous import, _continuous_params, the
Box action mapping, and decode/encode continuous handling so the adapter builds
and ships standalone over discrete + structured-observation support. The
continuous support rejoins when ContinuousSpace lands.
---
 src/cloudai/configurator/gymnasium_adapter.py |  65 ++-----
 tests/test_gymnasium_adapter_contract.py      | 158 +-----------------
 2 files changed, 11 insertions(+), 212 deletions(-)

diff --git a/src/cloudai/configurator/gymnasium_adapter.py b/src/cloudai/configurator/gymnasium_adapter.py
index 6e0e1d934..a90a35f15 100644
--- a/src/cloudai/configurator/gymnasium_adapter.py
+++ b/src/cloudai/configurator/gymnasium_adapter.py
@@ -35,7 +35,6 @@
 
 from typing import TYPE_CHECKING, Any, Optional, cast
 
-from cloudai._core.action_space import ContinuousSpace
 from cloudai.configurator.base_gym import BaseGym
 from cloudai.configurator.env_params import StructuredObservation
 from cloudai.util.lazy_imports import lazy
@@ -79,18 +78,13 @@ def __init__(self, env: BaseGym) -> None:
 
         raw_action_space = env.define_action_space()
 
-        # Three classes of params from cloudai's param_space:
+        # Two classes of params from cloudai's param_space:
         #   list with len > 1   -> discrete tunable, mapped to gym.Discrete.
         #   list with len == 1  -> fixed (collapsed); injected on every step so
         #                          agents never see them.
-        #   ContinuousSpace     -> continuous tunable, mapped to gym.Box(1,);
-        #                          ``dtype="int"`` quantizes at decode_action.
         self._discrete_params: dict[str, list] = {
             k: v for k, v in raw_action_space.items() if isinstance(v, list) and len(v) > 1
         }
-        self._continuous_params: dict[str, ContinuousSpace] = {
-            k: v for k, v in raw_action_space.items() if isinstance(v, ContinuousSpace)
-        }
         self._fixed_params: dict[str, Any] = {
             k: v[0] for k, v in raw_action_space.items() if isinstance(v, list) and len(v) == 1
         }
@@ -98,13 +92,6 @@ def __init__(self, env: BaseGym) -> None:
         action_space_components: dict[str, Any] = {
             name: spaces.Discrete(len(values)) for name, values in self._discrete_params.items()
         }
-        for name, space in self._continuous_params.items():
-            action_space_components[name] = spaces.Box(
-                low=np.array([space.low], dtype=np.float32),
-                high=np.array([space.high], dtype=np.float32),
-                shape=(1,),
-                dtype=np.float32,
-            )
         self.action_space = spaces.Dict(action_space_components)
 
         # Observation space: prefer the env's structured (per-leaf) spec so the
@@ -151,25 +138,14 @@ def decode_action(self, action: dict[str, Any]) -> dict[str, Any]:
         Map raw gym actions back to native parameter values.
 
         Discrete actions are list indices and resolve to the corresponding list
-        entry. Continuous actions arrive as a 1-D ``numpy`` array (the Box
-        shape) and are clamped to the declared range; for ``dtype="int"`` the
-        scalar is rounded — this is the **only** place quantization happens, so
-        the cache key collapses float jitter (47.34 vs 47.36) onto the same
-        emitted int and downstream code stays type-agnostic.
+        entry.
 
         Raises:
             ValueError: if ``action`` is missing tunable params, contains
                 unknown keys, or carries an out-of-range discrete index.
         """
-        expected = set(self._discrete_params) | set(self._continuous_params)
-        self._assert_keys(action.keys(), expected, "action")
-        decoded: dict[str, Any] = {}
-        for name, raw in action.items():
-            if name in self._discrete_params:
-                decoded[name] = self._decode_discrete(name, raw)
-            else:
-                decoded[name] = self._decode_continuous(name, raw)
-        return decoded
+        self._assert_keys(action.keys(), set(self._discrete_params), "action")
+        return {name: self._decode_discrete(name, raw) for name, raw in action.items()}
 
     def _decode_discrete(self, name: str, raw: Any) -> Any:
         values = self._discrete_params[name]
@@ -178,23 +154,14 @@ def _decode_discrete(self, name: str, raw: Any) -> Any:
             raise ValueError(f"Action index out of range for '{name}': {idx} (expected 0..{len(values) - 1})")
         return values[idx]
 
-    def _decode_continuous(self, name: str, raw: Any) -> Any:
-        space = self._continuous_params[name]
-        scalar = float(self._np.asarray(raw, dtype=self._np.float32).reshape(-1)[0])
-        clamped = min(max(scalar, space.low), space.high)
-        return round(clamped) if space.dtype == "int" else clamped
-
     def encode_action(self, values: dict[str, Any]) -> dict[str, Any]:
         """
         Map native parameter values back to raw gym actions; inverse of :meth:`decode_action`.
 
-        Discrete values resolve to their index in the candidate list; continuous
-        values are wrapped in the 1-D ``float32`` array the ``Box`` space expects
-        (clamped to the declared range). Together with :meth:`decode_action` this
-        is an invertible pair on native values:
+        Discrete values resolve to their index in the candidate list. Together
+        with :meth:`decode_action` this is an invertible pair on native values:
         ``decode_action(encode_action(v)) == v`` for any ``v`` drawn from the
-        tunable params (continuous ``dtype="int"`` round-trips through the same
-        rounding ``decode_action`` applies).
+        tunable params.
 
         Consumers that need to express known native configs in the policy's
         action space — e.g. warm-start / behavioral cloning from a recorded
@@ -204,15 +171,8 @@ def encode_action(self, values: dict[str, Any]) -> dict[str, Any]:
             ValueError: if ``values`` does not cover exactly the tunable params,
                 or carries a discrete value absent from its candidate list.
         """
-        expected = set(self._discrete_params) | set(self._continuous_params)
-        self._assert_keys(values.keys(), expected, "values")
-        encoded: dict[str, Any] = {}
-        for name, value in values.items():
-            if name in self._discrete_params:
-                encoded[name] = self._encode_discrete(name, value)
-            else:
-                encoded[name] = self._encode_continuous(name, value)
-        return encoded
+        self._assert_keys(values.keys(), set(self._discrete_params), "values")
+        return {name: self._encode_discrete(name, value) for name, value in values.items()}
 
     def _encode_discrete(self, name: str, value: Any) -> int:
         values = self._discrete_params[name]
@@ -221,11 +181,6 @@ def _encode_discrete(self, name: str, value: Any) -> int:
         except ValueError:
             raise ValueError(f"Value {value!r} for '{name}' is not a candidate; expected one of {values}") from None
 
-    def _encode_continuous(self, name: str, value: Any) -> Any:
-        space = self._continuous_params[name]
-        clamped = min(max(float(value), space.low), space.high)
-        return self._np.asarray([clamped], dtype=self._np.float32)
-
     def reset(
         self,
         *,
@@ -247,7 +202,7 @@ def step_raw(self, params: dict[str, Any]) -> tuple[Any, float, bool, bool, dict
             ValueError: if ``params`` does not cover exactly the tunable +
                 fixed param keys.
         """
-        expected = set(self._discrete_params) | set(self._continuous_params) | set(self._fixed_params)
+        expected = set(self._discrete_params) | set(self._fixed_params)
         self._assert_keys(params.keys(), expected, "raw params")
         return self._step_with_params(params)
 
diff --git a/tests/test_gymnasium_adapter_contract.py b/tests/test_gymnasium_adapter_contract.py
index 74048181a..a5683a7cc 100644
--- a/tests/test_gymnasium_adapter_contract.py
+++ b/tests/test_gymnasium_adapter_contract.py
@@ -35,11 +35,10 @@
 from __future__ import annotations
 
 from types import SimpleNamespace
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
 import pytest
 
-from cloudai._core.action_space import ContinuousSpace
 from cloudai.configurator.base_gym import BaseGym
 from cloudai.configurator.env_params import ObsLeafDescriptor
 
@@ -223,151 +222,6 @@ def test_step_propagates_context_into_observation(self) -> None:
             )
 
 
-class _ContinuousStubBaseGym(BaseGym):
-    """BaseGym stub that surfaces a ContinuousSpace in its action_space.
-
-    Records the params received by ``step`` so the test can assert what the
-    adapter actually emits to the env (post-rounding / clamping).
-    """
-
-    def __init__(self, action_space: dict[str, Any]) -> None:
-        self._action_space = action_space
-        self.test_run = SimpleNamespace(step=0)
-        self.received: list[dict[str, Any]] = []
-        super().__init__()
-
-    def define_action_space(self) -> dict[str, Any]:
-        return self._action_space
-
-    def define_observation_space(self) -> list:
-        return [0.0]
-
-    def reset(
-        self,
-        seed: Optional[int] = None,
-        options: Optional[dict[str, Any]] = None,
-    ) -> tuple[list, dict[str, Any]]:
-        return [0.0], {}
-
-    def step(self, action: Any) -> tuple[list, float, bool, dict]:
-        self.received.append(dict(action))
-        return [1.0], 0.5, False, {}
-
-    def render(self, mode: str = "human") -> None:
-        return None
-
-    def seed(self, seed: Optional[int] = None) -> None:
-        pass
-
-
-class TestAdapterDispatchesContinuousSpace:
-    """``ContinuousSpace`` in cloudai's param_space → ``gym.spaces.Box`` in the adapter.
-
-    The adapter is also responsible for **actuator quantization**: when the
-    spec is ``dtype="int"``, the float coming out of the policy is rounded at
-    decode_action, so the env (and trajectory cache) sees only ints. This
-    collapses float jitter (47.34 vs 47.36) onto the same emitted int and
-    keeps the cache hit-rate honest.
-    """
-
-    @staticmethod
-    def _action_space() -> dict[str, Any]:
-        return {
-            "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"),
-            "discrete": [10, 20, 30],
-            "fixed": [99],
-        }
-
-    def test_action_space_uses_box_for_continuous_and_discrete_for_list(self) -> None:
-        import gymnasium
-
-        gym_env = _ContinuousStubBaseGym(self._action_space())
-        adapter = GymnasiumAdapter(gym_env)
-
-        action_space = cast(gymnasium.spaces.Dict, adapter.action_space)
-        threshold = action_space["threshold"]
-        assert isinstance(threshold, gymnasium.spaces.Box)
-        assert threshold.shape == (1,)
-        assert float(threshold.low[0]) == pytest.approx(0.0)
-        assert float(threshold.high[0]) == pytest.approx(200.0)
-
-        discrete = action_space["discrete"]
-        assert isinstance(discrete, gymnasium.spaces.Discrete)
-        assert int(discrete.n) == 3
-
-        assert "fixed" not in action_space, "single-element lists are fixed, not tunable"
-
-    def test_decode_action_rounds_dtype_int(self) -> None:
-        import numpy as np
-
-        gym_env = _ContinuousStubBaseGym(self._action_space())
-        adapter = GymnasiumAdapter(gym_env)
-
-        decoded = adapter.decode_action({"threshold": np.array([47.34], dtype=np.float32), "discrete": 1})
-
-        assert decoded["threshold"] == 47, f"dtype=int must round; got {decoded['threshold']}"
-        assert isinstance(decoded["threshold"], int), "rounded action must be Python int, not float"
-        assert decoded["discrete"] == 20, "Discrete index 1 → 20"
-
-    @pytest.mark.parametrize("raw,expected", [(47.34, 47), (47.36, 47), (47.5, 48), (47.4999, 47)])
-    def test_cache_key_collapses_float_jitter_to_same_int(self, raw: float, expected: int) -> None:
-        """Adjacent float actions that round to the same int must collapse identically.
-
-        This is the actuator-quantization invariant from the design doc: the
-        env (and trajectory cache key) must see the same int for any float in
-        the rounding interval, so the cache fills with semantic duplicates,
-        not float-noise duplicates.
-        """
-        import numpy as np
-
-        gym_env = _ContinuousStubBaseGym(self._action_space())
-        adapter = GymnasiumAdapter(gym_env)
-
-        decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0})
-        assert decoded["threshold"] == expected
-
-    @pytest.mark.parametrize("raw,expected", [(-5.0, 0), (250.0, 200), (0.0, 0), (200.0, 200)])
-    def test_decode_action_clamps_to_range(self, raw: float, expected: int) -> None:
-        """Out-of-range continuous actions clamp to ``[low, high]``."""
-        import numpy as np
-
-        gym_env = _ContinuousStubBaseGym(self._action_space())
-        adapter = GymnasiumAdapter(gym_env)
-
-        decoded = adapter.decode_action({"threshold": np.array([raw], dtype=np.float32), "discrete": 0})
-        assert decoded["threshold"] == expected
-
-    def test_step_emits_rounded_int_to_underlying_env(self) -> None:
-        """The env (and downstream cache) must see the *rounded* int, not the raw float."""
-        import numpy as np
-
-        gym_env = _ContinuousStubBaseGym(self._action_space())
-        adapter = GymnasiumAdapter(gym_env)
-        adapter.reset()
-
-        adapter.step({"threshold": np.array([78.6], dtype=np.float32), "discrete": 2})
-
-        assert gym_env.received, "env.step was not called"
-        emitted = gym_env.received[-1]
-        assert emitted["threshold"] == 79, "env must receive rounded int, not raw float"
-        assert emitted["discrete"] == 30, "Discrete decode unaffected by continuous-action plumbing"
-        assert emitted["fixed"] == 99, "fixed params must be injected by the adapter"
-
-    def test_dtype_float_preserves_continuous_value(self) -> None:
-        """``dtype="float"`` clamps but does NOT round; the policy's float reaches the env."""
-        import numpy as np
-
-        action_space: dict[str, Any] = {
-            "knob": ContinuousSpace(low=0.0, high=1.0, dtype="float"),
-        }
-        gym_env = _ContinuousStubBaseGym(action_space)
-        adapter = GymnasiumAdapter(gym_env)
-
-        decoded = adapter.decode_action({"knob": np.array([0.7234], dtype=np.float32)})
-        assert decoded["knob"] == pytest.approx(0.7234, rel=1e-4)
-        assert isinstance(decoded["knob"], float)
-
-
 class TestEncodeDecodeAreInverse:
     """``encode_action`` is the inverse of ``decode_action`` on native values.
 
@@ -401,16 +255,6 @@ def test_encode_rejects_key_mismatch(self) -> None:
         with pytest.raises(ValueError, match="keys mismatch"):
             adapter.encode_action({"param_a": 1})  # missing param_b
 
-    def test_continuous_int_round_trips_through_rounding(self) -> None:
-        action_space: dict[str, Any] = {
-            "threshold": ContinuousSpace(low=0.0, high=200.0, dtype="int"),
-            "discrete": [10, 20, 30],
-            "fixed": [99],
-        }
-        adapter = GymnasiumAdapter(_ContinuousStubBaseGym(action_space))
-        native = {"threshold": 47, "discrete": 20}
-        assert adapter.decode_action(adapter.encode_action(native)) == native
-
 
 class _StructuredStubBaseGym(BaseGym):
     """BaseGym stub that opts in/out of the structured (Dict) obs path.