diff --git a/.claude/skills/add-atomic-action b/.claude/skills/add-atomic-action
new file mode 120000
index 00000000..ee63a4bc
--- /dev/null
+++ b/.claude/skills/add-atomic-action
@@ -0,0 +1 @@
+../../skills/add-atomic-action
\ No newline at end of file
diff --git a/.claude/skills/add-functor b/.claude/skills/add-functor
new file mode 120000
index 00000000..59a2505a
--- /dev/null
+++ b/.claude/skills/add-functor
@@ -0,0 +1 @@
+../../skills/add-functor
\ No newline at end of file
diff --git a/.claude/skills/add-task-env b/.claude/skills/add-task-env
new file mode 120000
index 00000000..c06093df
--- /dev/null
+++ b/.claude/skills/add-task-env
@@ -0,0 +1 @@
+../../skills/add-task-env
\ No newline at end of file
diff --git a/.claude/skills/add-test b/.claude/skills/add-test
new file mode 120000
index 00000000..bc175531
--- /dev/null
+++ b/.claude/skills/add-test
@@ -0,0 +1 @@
+../../skills/add-test
\ No newline at end of file
diff --git a/.claude/skills/benchmark b/.claude/skills/benchmark
new file mode 120000
index 00000000..2735c494
--- /dev/null
+++ b/.claude/skills/benchmark
@@ -0,0 +1 @@
+../../skills/benchmark
\ No newline at end of file
diff --git a/.claude/skills/pr b/.claude/skills/pr
new file mode 120000
index 00000000..5167ba85
--- /dev/null
+++ b/.claude/skills/pr
@@ -0,0 +1 @@
+../../skills/pr
\ No newline at end of file
diff --git a/.claude/skills/pre-commit-check b/.claude/skills/pre-commit-check
new file mode 120000
index 00000000..b0cc815c
--- /dev/null
+++ b/.claude/skills/pre-commit-check
@@ -0,0 +1 @@
+../../skills/pre-commit-check
\ No newline at end of file
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 27483d52..f97b4fa2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -24,6 +24,7 @@ jobs:
         - "/usr/share/glvnd/egl_vendor.d:/usr/share/glvnd/egl_vendor.d"
         - "/tmp/.X11-unix:/tmp/.X11-unix"
         - "/dev/shm/shared:/dev/shm/shared"
+        - "/usr/share/nvidia:/usr/share/nvidia"
       options: --memory 100g --gpus device=1 --shm-size 53687091200
     steps:
       - uses: actions/checkout@v4
@@ -31,7 +32,7 @@ jobs:
         run: |
           echo "Workspace: ${GITHUB_WORKSPACE}"
           ls
-          pip install black==24.3.0
+          pip install black==26.3.1
           black --check --diff --color ./
           if [ $? -ne 0 ]; then
             echo "Code style check failed, please run [black ./] before commit!"
@@ -45,24 +46,100 @@ jobs:
       NVIDIA_DRIVER_CAPABILITIES: all
       NVIDIA_VISIBLE_DEVICES: all
       NVIDIA_DISABLE_REQUIRE: 1
+      DOCS_MAX_VERSIONS: 5
     container: *container_template
     steps:
       - uses: actions/checkout@v4
+
+      - name: Cache Python dependencies
+        id: cache-pip
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-docs-${{ hashFiles('docs/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-docs-
+
+      # Restore the full multi-version site from the last successful build.
+      # The key never matches exactly (run_id is unique), so restore-keys is
+      # always used to pick up the most recently saved full site.
+      - name: Restore full multi-version docs site
+        if: github.event_name == 'push'
+        uses: actions/cache/restore@v4
+        with:
+          path: docs/build/html
+          key: docs-full-site-${{ github.repository }}-${{ github.run_id }}
+          restore-keys: |
+            docs-full-site-${{ github.repository }}-
+
+      # Tag-scoped caches are invisible on main; merge live Pages so releases survive.
+      - name: Merge versions from live GitHub Pages
+        if: github.event_name == 'push'
+        shell: bash
+        run: |
+          SITE_URL="https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}"
+          SKIP_VERSION="main"
+          if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
+            SKIP_VERSION="${GITHUB_REF_NAME}"
+          fi
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/merge_published_site.py \
+            --build-dir ${GITHUB_WORKSPACE}/docs/build/html \
+            --site-base-url "${SITE_URL}" \
+            --skip-version "${SKIP_VERSION}"
+
       - name: Build docs
+        shell: bash
         run: |
-          pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+          pip install -e ".[gensim]" \
+            --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+            --trusted-host pyp.open3dv.site \
+            --extra-index-url https://download.blender.org/pypi/
           pip install -r docs/requirements.txt
+          python3 docs/scripts/sync_readme.py
           cd ${GITHUB_WORKSPACE}/docs
-          echo "Start Building docs..."
           pip uninstall pymeshlab -y
           pip install pymeshlab==2023.12.post3
-          make html
-      - name: Upload docs artifact
+
+          if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
+            VERSION="${GITHUB_REF_NAME}"
+            echo "Building docs for release tag ${VERSION}..."
+            sphinx-build source build/html/${VERSION}
+
+            cd build/html
+
+            # Prune old release versions beyond the window
+            mapfile -t TAG_DIRS < <(ls -d v*/ 2>/dev/null | sort -V)
+            while [[ ${#TAG_DIRS[@]} -gt ${DOCS_MAX_VERSIONS:-5} ]]; do
+              echo "Pruning old version: ${TAG_DIRS[0]}"
+              rm -rf "${TAG_DIRS[0]}"
+              TAG_DIRS=("${TAG_DIRS[@]:1}")
+            done
+
+          else
+            echo "Building dev docs for main branch..."
+            # Only rebuild main/ — other versions come from cache + live Pages merge
+            rm -rf build/html/main
+            sphinx-build source build/html/main
+            cd build/html
+          fi
+
+          # Regenerate versions.json and root index.html from all present dirs
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/generate_versions_json.py \
+            --build-dir .
+
+      # Default-branch cache only (tag-scoped caches are not visible on main).
+      - name: Save full multi-version docs site
         if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: actions/cache/save@v4
+        with:
+          path: docs/build/html
+          key: docs-full-site-${{ github.repository }}-${{ github.run_id }}
+
+      - name: Upload docs artifact
+        if: github.event_name == 'push'
         uses: actions/upload-pages-artifact@v3
-        with: 
+        with:
           path: ${{ github.workspace }}/docs/build/html
-          retention-days: 3
 
   test:
     if: github.event_name == 'pull_request'
@@ -77,71 +154,85 @@ jobs:
       - uses: actions/checkout@v4
       - name: Run tests
         run: |
-          pip install -e .[lerobot] --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+          pip install -e ".[gensim]" \
+            --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+            --trusted-host pyp.open3dv.site \
+            --extra-index-url https://download.blender.org/pypi/
           echo "Unit test Start"
           export HF_ENDPOINT=https://hf-mirror.com
-          pip uninstall pymeshlab -y
-          pip install pymeshlab==2023.12.post3
-          pip install numpy==1.26.4
+          pytest tests/docs -q --confcutdir=tests/docs
           pytest tests
 
   publish:
-    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+    if: github.event_name == 'push'
     needs: build
-    runs-on: Linux
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
     permissions:
       pages: write
-      id-token: write 
-    env:
-      NVIDIA_DRIVER_CAPABILITIES: all
-      NVIDIA_VISIBLE_DEVICES: all
-      NVIDIA_DISABLE_REQUIRE: 1
+      id-token: write
+    steps:
+      - name: Deploy GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
+
+
+  release-build:
+    if: startsWith(github.ref, 'refs/tags/v')
+    needs: lint
+    runs-on: Linux
+
     container: *container_template
+
     steps:
       - uses: actions/checkout@v4
-      - name: Download docs artifact
-        uses: actions/download-artifact@v4
         with:
-          name: github-pages
+          fetch-depth: 0
 
-      - name: Deploy GitHub Pages
-        uses: actions/deploy-pages@v4
+      - name: (Release) Install build tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+
+      - name: (Release) Build sdist and wheel
+        run: |
+          python -m build
+
+      # - name: (Release) Create GitHub Release (draft)
+      #   uses: softprops/action-gh-release@v2
+      #   with:
+      #     draft: true
+      #     generate_release_notes: true
+      #     files: |
+      #       dist/*
+      #   env:
+      #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: (Release) Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-distributions
+          path: dist/
+
+  release-publish:
+    if: startsWith(github.ref, 'refs/tags/v')
+    needs: release-build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/embodichain
+    permissions:
+      contents: read
+      id-token: write   # PyPI Trusted Publishing
 
+    steps:
+      - name: (Release) Download distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: python-distributions
+          path: dist/
 
-  # release:
-  #   if: startsWith(github.ref, 'refs/tags/v')
-  #   runs-on: Linux
-  #   permissions:
-  #     contents: write
-  #     id-token: write   # PyPI Trusted Publishing
-
-  #   container: *container_template
-
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #       with:
-  #         fetch-depth: 0   
-
-  #     - name: (Release) Install build tools
-  #       run: |
-  #         python -m pip install --upgrade pip
-  #         pip install build
-
-  #     - name: (Release) Build sdist and wheel
-  #       run: |
-  #         python -m build --wheel
-
-  #     # - name: (Release) Create GitHub Release (draft)
-  #     #   uses: softprops/action-gh-release@v2
-  #     #   with:
-  #     #     draft: true
-  #     #     generate_release_notes: true
-  #     #     files: |
-  #     #       dist/*
-  #     #   env:
-  #     #     GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-  #     - name: (Release) Publish to PyPI
-  #       uses: pypa/gh-action-pypi-publish@release/v1
-  #       with:
-  #         password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
+      - name: (Release) Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/tests/test_docs_publish.yml b/.github/workflows/tests/test_docs_publish.yml
new file mode 100644
index 00000000..2560048b
--- /dev/null
+++ b/.github/workflows/tests/test_docs_publish.yml
@@ -0,0 +1,226 @@
+name: Test docs publish logic
+
+on:
+  workflow_dispatch:
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run merge_published_site unit tests
+        run: pytest tests/docs -q --confcutdir=tests/docs
+
+  # -----------------------------------------------------------------------
+  # Scenario A: push to main — existing v0.1.0, v0.2.0 must survive
+  # Simulates: cache holds v0.1.0 + v0.2.0, build adds/updates main/
+  # -----------------------------------------------------------------------
+  test-main-push:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up fake "cache" (previous full-site with versioned dirs)
+        run: |
+          mkdir -p docs/build/html/v0.1.0 docs/build/html/v0.2.0
+          echo "<html>v0.1.0</html>" > docs/build/html/v0.1.0/index.html
+          echo "<html>v0.2.0</html>" > docs/build/html/v0.2.0/index.html
+
+      - name: Simulate build step — update main/ only
+        run: |
+          GITHUB_REF=refs/heads/main
+          DOCS_MAX_VERSIONS=4
+
+          # Mirrors the workflow: rm -rf build/html/main, then build
+          rm -rf docs/build/html/main
+          mkdir -p docs/build/html/main
+          echo "<html><body>main docs (new build)</body></html>" > docs/build/html/main/index.html
+
+          cd docs/build/html
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/generate_versions_json.py --build-dir .
+
+      - name: Assert — v0.1.0 and v0.2.0 still present, main updated
+        run: |
+          echo "=== docs/build/html structure ===" && find docs/build/html -maxdepth 1 | sort
+          [ -d docs/build/html/v0.1.0 ] || (echo "FAIL: v0.1.0 removed!" && exit 1)
+          [ -d docs/build/html/v0.2.0 ] || (echo "FAIL: v0.2.0 removed!" && exit 1)
+          grep -q "new build" docs/build/html/main/index.html || (echo "FAIL: main not updated!" && exit 1)
+          [ -f docs/build/html/versions.json ] || (echo "FAIL: versions.json missing!" && exit 1)
+          echo "=== versions.json ===" && cat docs/build/html/versions.json
+          python3 -c "
+          import json
+          d = json.load(open('docs/build/html/versions.json'))
+          names = [v['name'] for v in d['versions']]
+          assert 'v0.1.0' in names and 'v0.2.0' in names and 'main' in names, f'Missing versions: {names}'
+          print('PASS: all versions present:', names)
+          "
+          echo "PASS: main_push — existing versions preserved"
+
+  # -----------------------------------------------------------------------
+  # Scenario D: main push after tag — stale cache (main only) + live Pages
+  # This is the production bug: tag cache is not on main; merge fixes it.
+  # -----------------------------------------------------------------------
+  test-main-after-tag-merge:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Stale default-branch cache (main/ only)
+        run: |
+          mkdir -p docs/build/html/main
+          echo "<html>stale main</html>" > docs/build/html/main/index.html
+
+      - name: Mock live GitHub Pages (has tag release v0.3.0)
+        run: |
+          PUBLISHED="${GITHUB_WORKSPACE}/mock-published-site"
+          mkdir -p "${PUBLISHED}/v0.3.0" "${PUBLISHED}/main"
+          echo "<html>v0.3.0 live</html>" > "${PUBLISHED}/v0.3.0/index.html"
+          echo "<html>main live</html>" > "${PUBLISHED}/main/index.html"
+          python3 -c "
+          import json, pathlib
+          root = pathlib.Path('${PUBLISHED}')
+          manifest = {
+            'latest': 'v0.3.0',
+            'versions': [
+              {'name': 'v0.3.0', 'url': './v0.3.0/index.html', 'type': 'tag'},
+              {'name': 'main', 'url': './main/index.html', 'type': 'branch'},
+            ],
+          }
+          (root / 'versions.json').write_text(json.dumps(manifest, indent=2))
+          "
+
+      - name: Merge published (skip main — will rebuild)
+        run: |
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/merge_published_site.py \
+            --build-dir ${GITHUB_WORKSPACE}/docs/build/html \
+            --published-root ${GITHUB_WORKSPACE}/mock-published-site \
+            --skip-version main
+
+      - name: Rebuild main/ only
+        run: |
+          rm -rf docs/build/html/main
+          mkdir -p docs/build/html/main
+          echo "<html>main rebuilt</html>" > docs/build/html/main/index.html
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/generate_versions_json.py \
+            --build-dir ${GITHUB_WORKSPACE}/docs/build/html
+
+      - name: Assert — v0.3.0 preserved after main push
+        run: |
+          [ -d docs/build/html/v0.3.0 ] || (echo "FAIL: v0.3.0 missing after merge!" && exit 1)
+          grep -q "v0.3.0 live" docs/build/html/v0.3.0/index.html
+          grep -q "main rebuilt" docs/build/html/main/index.html
+          python3 -c "
+          import json
+          d = json.load(open('docs/build/html/versions.json'))
+          names = [v['name'] for v in d['versions']]
+          assert 'v0.3.0' in names and 'main' in names, names
+          assert d['latest'] == 'v0.3.0', d['latest']
+          "
+          echo "PASS: main_after_tag — release dir restored from published mock"
+
+  # -----------------------------------------------------------------------
+  # Scenario B: tag push v0.3.0 — new version added, old dirs untouched
+  # -----------------------------------------------------------------------
+  test-tag-push:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up fake "cache" (previous full-site)
+        run: |
+          mkdir -p docs/build/html/v0.1.0 docs/build/html/v0.2.0 docs/build/html/main
+          echo "<html>v0.1.0</html>" > docs/build/html/v0.1.0/index.html
+          echo "<html>v0.2.0</html>" > docs/build/html/v0.2.0/index.html
+          echo "<html>main</html>"   > docs/build/html/main/index.html
+
+      - name: Simulate build step — add v0.3.0
+        run: |
+          GITHUB_REF=refs/tags/v0.3.0
+          DOCS_MAX_VERSIONS=4
+
+          mkdir -p docs/build/html/v0.3.0
+          echo "<html>v0.3.0</html>" > docs/build/html/v0.3.0/index.html
+
+          cd docs/build/html
+          mapfile -t TAG_DIRS < <(ls -d v*/ 2>/dev/null | sort -V)
+          while [[ ${#TAG_DIRS[@]} -gt ${DOCS_MAX_VERSIONS} ]]; do
+            echo "Pruning: ${TAG_DIRS[0]}"
+            rm -rf "${TAG_DIRS[0]}"
+            TAG_DIRS=("${TAG_DIRS[@]:1}")
+          done
+
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/generate_versions_json.py --build-dir .
+
+      - name: Assert — all four dirs present, latest is v0.3.0
+        run: |
+          echo "=== docs/build/html structure ===" && find docs/build/html -maxdepth 1 | sort
+          for d in v0.1.0 v0.2.0 v0.3.0 main; do
+            [ -d "docs/build/html/$d" ] || (echo "FAIL: $d missing!" && exit 1)
+          done
+          echo "=== versions.json ===" && cat docs/build/html/versions.json
+          python3 -c "
+          import json
+          d = json.load(open('docs/build/html/versions.json'))
+          names = [v['name'] for v in d['versions']]
+          assert d['latest'] == 'v0.3.0', f'latest should be v0.3.0, got {d[\"latest\"]}'
+          assert all(n in names for n in ['v0.1.0','v0.2.0','v0.3.0','main']), f'Missing: {names}'
+          print('PASS: versions.json correct, latest =', d['latest'])
+          "
+          echo "PASS: tag_push — new version added, others preserved"
+
+  # -----------------------------------------------------------------------
+  # Scenario C: 5th tag triggers pruning — oldest (v0.1.0) removed
+  # -----------------------------------------------------------------------
+  test-prune-old-versions:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up fake "cache" with 4 existing tag dirs (at the limit)
+        run: |
+          for v in v0.1.0 v0.2.0 v0.3.0 v0.4.0; do
+            mkdir -p docs/build/html/${v}
+            echo "<html>${v}</html>" > docs/build/html/${v}/index.html
+          done
+          mkdir -p docs/build/html/main
+          echo "<html>main</html>" > docs/build/html/main/index.html
+
+      - name: Simulate build step — push v0.5.0 triggers prune
+        run: |
+          GITHUB_REF=refs/tags/v0.5.0
+          DOCS_MAX_VERSIONS=4
+
+          mkdir -p docs/build/html/v0.5.0
+          echo "<html>v0.5.0</html>" > docs/build/html/v0.5.0/index.html
+
+          cd docs/build/html
+          mapfile -t TAG_DIRS < <(ls -d v*/ 2>/dev/null | sort -V)
+          while [[ ${#TAG_DIRS[@]} -gt ${DOCS_MAX_VERSIONS} ]]; do
+            echo "Pruning: ${TAG_DIRS[0]}"
+            rm -rf "${TAG_DIRS[0]}"
+            TAG_DIRS=("${TAG_DIRS[@]:1}")
+          done
+
+          python3 ${GITHUB_WORKSPACE}/docs/scripts/generate_versions_json.py --build-dir .
+
+      - name: Assert — v0.1.0 pruned, max 4 tags kept, main untouched
+        run: |
+          echo "=== docs/build/html structure ===" && find docs/build/html -maxdepth 1 | sort
+          [ ! -d docs/build/html/v0.1.0 ] || (echo "FAIL: v0.1.0 should be pruned!" && exit 1)
+          for d in v0.2.0 v0.3.0 v0.4.0 v0.5.0 main; do
+            [ -d "docs/build/html/$d" ] || (echo "FAIL: $d was incorrectly removed!" && exit 1)
+          done
+          TAG_COUNT=$(ls -d docs/build/html/v*/ 2>/dev/null | wc -l)
+          [ "${TAG_COUNT}" -le 4 ] || (echo "FAIL: ${TAG_COUNT} tags exceed DOCS_MAX_VERSIONS=4" && exit 1)
+          echo "=== versions.json ===" && cat docs/build/html/versions.json
+          python3 -c "
+          import json
+          d = json.load(open('docs/build/html/versions.json'))
+          names = [v['name'] for v in d['versions']]
+          assert 'v0.1.0' not in names, f'v0.1.0 should be pruned: {names}'
+          assert d['latest'] == 'v0.5.0', f'latest should be v0.5.0, got {d[\"latest\"]}'
+          tag_count = sum(1 for v in d['versions'] if v['type'] == 'tag')
+          assert tag_count <= 4, f'Too many tags: {tag_count}'
+          print('PASS: pruning correct, latest =', d['latest'], ', tag count =', tag_count)
+          "
+          echo "PASS: prune — oldest removed, within limit, main preserved"
diff --git a/.gitignore b/.gitignore
index 040955d9..7405b279 100644
--- a/.gitignore
+++ b/.gitignore
@@ -198,3 +198,6 @@ wandb/
 .vscode/
 
 embodichain/VERSION
+
+# benchmark results
+scripts/benchmark/rl/reports/*
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 117cd57f..0920a327 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -58,10 +58,11 @@ EmbodiChain/
 
 ### Formatting
 
-- **Formatter**: `black==24.3.0` — run before every commit.
+- **Formatter**: `black==26.3.1` — run before every commit.
   ```bash
   black .
   ```
+- Use the `/pre-commit-check` skill before committing to catch all CI violations locally.
 
 ### File Headers
 
@@ -108,22 +109,14 @@ class MyManagerCfg:
 
 ### Functor / Manager Pattern
 
-Managers (observation, event, reward, randomization) use a `Functor`/`FunctorCfg` pattern:
+Managers (observation, event, reward, randomization) use a `Functor`/`FunctorCfg` pattern with two styles:
 
 - **Function-style**: a plain function with signature `(env, env_ids, ...) -> None`.
 - **Class-style**: a class inheriting `Functor`, with `__init__(cfg, env)` and `__call__(env, env_ids, ...)`.
-- Registered in a manager config via `FunctorCfg(func=..., params={...})`.
 
-```python
-from embodichain.lab.gym.envs.managers import Functor, FunctorCfg
-
-class my_randomizer(Functor):
-    def __init__(self, cfg: FunctorCfg, env):
-        super().__init__(cfg, env)
+Registered in a manager config via `FunctorCfg(func=..., params={...})`.
 
-    def __call__(self, env, env_ids, my_param: float = 0.5):
-        ...
-```
+Use the `/add-functor` skill to scaffold new functors with the correct signature and module placement.
 
 ### Docstrings
 
@@ -200,20 +193,10 @@ Include:
 
 1. **Fork** the repository and create a focused branch.
 2. **Keep PRs small** — one logical change per PR.
-3. **Format** the code with `black==24.3.0` before submitting.
+3. **Format** the code with `black==26.3.1` before submitting.
 4. **Update documentation** for any public API changes.
 5. **Add tests** that prove your fix or feature works.
-6. **Submit** using the PR template (`.github/PULL_REQUEST_TEMPLATE.md`):
-   - Summarize changes and link the related issue (`Fixes #123`).
-   - Specify the type of change (bug fix / enhancement / new feature / breaking change / docs).
-   - Attach before/after screenshots for visual changes.
-   - Complete the checklist:
-     - [ ] `black .` has been run
-     - [ ] Documentation updated
-     - [ ] Tests added
-     - [ ] Dependencies updated (if applicable)
-
-> It is recommended to open an issue and discuss the design before opening a large PR.
+6. Use the `/pr` skill to create PRs following the project's template and label conventions.
 
 ### Adding a New Robot
 
@@ -231,107 +214,25 @@ Also add robot documentation in `docs/source/resources/robot/` (see existing exa
 
 ### Adding a New Task Environment
 
-Refer to `embodichain/lab/gym/envs/tasks/` for existing examples. Tasks subclass `EmbodiedEnv` or `BaseAgentEnv` and implement `_setup_scene`, `_reset_idx`, and evaluation logic.
-
----
-
-## Unit Tests
-
-### Structure
-
-Tests live in `tests/` and mirror the source tree:
-
-```text
-tests/
-├── toolkits/
-│   └── test_pg_grasp.py
-├── gym/
-│   └── action_bank/
-│       └── test_configurable_action.py
-└── sim/
-    ├── objects/
-    │   ├── test_light.py
-    │   └── test_rigid_object_group.py
-    ├── sensors/
-    │   ├── test_camera.py
-    │   └── test_stereo.py
-    └── planners/
-        └── test_motion_generator.py
-```
-
-Place new test files at `tests/<subpackage>/test_<module>.py`, matching the layout of `embodichain/`.
+Use the `/add-task-env` skill to scaffold a new task with the correct file structure, `@register_env` decorator, base class, and test stub.
 
-### Two accepted styles
+### Adding Functors
 
-**pytest style** — for pure-Python logic with no test ordering dependency:
+Use the `/add-functor` skill to scaffold observation, reward, event, action, dataset, or randomization functors with the correct signature, style, and module placement.
 
-```python
-# ----------------------------------------------------------------------------
-# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# ...
-# ----------------------------------------------------------------------------
-
-from embodichain.my_module import my_function
-
-
-def test_expected_output():
-    result = my_function(input_value)
-    assert result == expected_value
-
-
-def test_edge_case():
-    result = my_function(edge_input)
-    assert result is not None
-```
+### Writing Tests
 
-**`Class` style** — when tests must run in a specific order or share `setup_method`/`teardown_method` state:
+Use the `/add-test` skill to scaffold tests with the correct file placement, style (pytest vs class), mock patterns, and project conventions.
 
-```python
-# ----------------------------------------------------------------------------
-# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# ...
-# ----------------------------------------------------------------------------
-
-from embodichain.my_module import MyClass
-
-
-class TestMyClass():
-    def setup_method(self):
-        self.obj = MyClass(param=1.0)
-
-    def teardown_method(self):
-        pass
-
-    def test_basic_behavior(self):
-        result = self.obj.run()
-        assert result == expected_result
-
-    def test_raises_on_bad_input(self):
-        with pytest.raises(ValueError):
-            self.obj.run(bad_input)
-
-### Conventions
-
-- **File header**: include the standard Apache 2.0 copyright block (same as all source files).
-- **Naming**: test files are `test_<module>.py`; test functions/methods are `test_<scenario>`.
-- **Simulation-dependent tests**: tests that require a running `SimulationManager` (GPU, sensors, robots) must initialize and teardown the sim inside `setUp`/`tearDown` or a pytest fixture. Keep them isolated from pure-logic tests.
-- **No magic numbers**: define expected values as named constants or comments explaining their origin.
-- **`if __name__ == "__main__"`**: include this block for tests that support optional visual/interactive output (pass `is_visual=True` manually when debugging).
-
-### Running tests
-
-```bash
-# Run all tests
-pytest tests/
-
-# Run a specific file
-pytest tests/toolkits/test_pg_grasp.py
+---
 
-# Run a specific test function
-pytest tests/toolkits/test_pg_grasp.py::test_antipodal_score_selector
+## Skills Quick Reference
 
-# Run with verbose output
-pytest -v tests/
-```
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| Add Task Env | `/add-task-env` | Scaffold a new `EmbodiedEnv` task |
+| Add Functor | `/add-functor` | Scaffold observation/reward/event/action/dataset/randomization functors |
+| Add Test | `/add-test` | Scaffold tests following project conventions |
+| Pre-Commit Check | `/pre-commit-check` | Run all local CI checks before committing |
+| Create PR | `/pr` | Create a PR following the project template |
+| Benchmark | `/benchmark` | Write benchmark scripts for EmbodiChain modules |
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c8ce9852..af1401cf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,15 +29,31 @@ We welcome pull requests for bug fixes, new features, and documentation improvem
     ```bash
     black .
     ```
-    > Currently, we use black==24.3.0 for formatting. Make sure to use the same version to avoid inconsistencies.
+    > Currently, we use black==26.3.1 for formatting. Make sure to use the same version to avoid inconsistencies.
 4.  **Submit a Pull Request**.
     *   Use the [Pull Request Template](.github/PULL_REQUEST_TEMPLATE.md).
     *   Keep PRs small and focused.
     *   Include a summary of the changes and link to any relevant issues (e.g., `Fixes #123`).
     *   Ensure all checks pass.
 
+
+## Contribute specific robots
+
+To contribute a new robot, please check the documentation on [Adding a New Robot](https://dexforce.github.io/EmbodiChain/guides/add_robot.html).
+
+## Contribute specific environments
+
+To contribute a new environment, please check the documentation on [Embodied Environments](https://dexforce.github.io/EmbodiChain/overview/gym/env.html) and see the tutorial below:
+- [Creating a Basic Environment](https://dexforce.github.io/EmbodiChain/tutorial/basic_env.html) 
+- [Creating a Modular Environment](https://dexforce.github.io/EmbodiChain/tutorial/modular_env.html)
+
+If you want to implement your tasks in a new repo and with some customized functors and utilities, you can also use the [Task Template Repo](https://github.com/DexForce/embodichain_task_template).
+
 ## Using Claude Code for Contributions
 
+<details>
+<summary>Setup, skills, and tips for using Claude Code</summary>
+
 [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) is an AI-powered CLI that can assist you throughout the contribution workflow — from understanding the codebase to writing, reviewing, and debugging code.
 
 ### Setup
@@ -51,6 +67,33 @@ claude
 
 A `CLAUDE.md` file is present at the root of this repository. Claude Code reads it automatically at startup to load project conventions, structure, and style rules, so it is context-aware from the first prompt.
 
+### Skills
+
+Claude Code skills are built-in slash commands that automate common development tasks. They scaffold code, run checks, and enforce project conventions so you can focus on your logic instead of boilerplate. Invoke any skill by typing its command in the Claude Code prompt.
+
+| Skill | Command | Purpose |
+|-------|---------|---------|
+| Add Functor | `/add-functor` | Scaffold a new observation, reward, event, action, dataset, or randomization functor with the correct signature, style, and module placement |
+| Add Task Env | `/add-task-env` | Scaffold a new task environment with the correct file structure, `@register_env` decorator, base class, and test stub |
+| Add Test | `/add-test` | Scaffold tests with the correct file placement, style (pytest vs class), mock patterns, and project conventions |
+| Pre-Commit Check | `/pre-commit-check` | Run all local CI checks — code style, headers, annotations, exports, and docstrings — before committing |
+| Create PR | `/pr` | Create a pull request following the project template and label conventions |
+| Benchmark | `/benchmark` | Write benchmark scripts for measuring performance of solvers, samplers, and other computationally intensive components |
+
+#### When to use each skill
+
+**`/add-functor`** — Use when adding a new observation, event, reward, action, dataset, or randomization functor to an EmbodiChain environment. The skill will ask for the functor type and name, then generate the function- or class-style implementation with proper docstrings, type hints, and `__all__` exports.
+
+**`/add-task-env`** — Use when creating a new task environment, including expert demonstration tasks, RL tasks, or any `EmbodiedEnv` subclass. The skill scaffolds the task file with `_setup_scene`, `_reset_idx`, and evaluation logic, plus a test stub.
+
+**`/add-test`** — Use when writing tests for any EmbodiChain module — functors, solvers, sensors, environments, or utilities. The skill determines the correct test file location, style (pytest function vs class), and generates tests with the standard Apache 2.0 header and named constants.
+
+**`/pre-commit-check`** — Run this before committing or creating a PR. It verifies code formatting (`black`), file headers, type annotations, `__all__` exports, and docstring completeness — the same checks the CI pipeline enforces.
+
+**`/pr`** — Use after committing your changes to create a pull request. The skill checks git state, determines the PR type, drafts a description following the project template, runs formatting, creates a feature branch, and opens the PR via `gh` CLI with the correct labels.
+
+**`/benchmark`** — Use when you need to measure the performance of a module (IK solvers, grasp samplers, metrics, etc.). The skill generates a well-structured benchmark script following project conventions.
+
 ### Suggested workflows
 
 **Explore the codebase before making changes**
@@ -66,7 +109,7 @@ A `CLAUDE.md` file is present at the root of this repository. Claude Code reads
 ```
 > I want to add a new observation functor that returns the end-effector velocity.
   Which existing functor should I model it after?
-> Generate the functor following the project style, with a proper docstring and type hints.
+> /add-functor
 ```
 
 **Validate style and formatting before submitting**
@@ -74,13 +117,13 @@ A `CLAUDE.md` file is present at the root of this repository. Claude Code reads
 ```
 > Review my changes in embodichain/lab/gym/envs/managers/randomization/visual.py
   for style issues, missing type hints, and docstring completeness.
+> /pre-commit-check
 ```
 
 **Write or update tests**
 
 ```
-> Write a pytest test for the randomize_emission_light function in
-  embodichain/lab/gym/envs/managers/randomization/visual.py.
+> /add-test
 ```
 
 **Understand a bug**
@@ -92,38 +135,17 @@ A `CLAUDE.md` file is present at the root of this repository. Claude Code reads
 
 **Create a pull request**
 
-After you've made your changes and committed them, use the `/pr` command to create a pull request:
+After you've made your changes and committed them:
 
 ```
 > /pr
 ```
 
-This will guide you through:
-1. Checking the current git state and changes
-2. Determining the PR type (bug fix, enhancement, new feature, etc.)
-3. Drafting a proper PR description following the project template
-4. Running code formatting with `black .`
-5. Creating a properly named feature branch
-6. Committing changes with a conventional commit message
-7. Pushing to remote and creating the PR via `gh` CLI
-
-The `/pr` skill ensures your PR follows the EmbodiChain contribution guidelines and populates the required checklist items.
+The `/pr` skill will guide you through checking git state, determining the PR type, drafting a description, running formatting, and creating the PR with proper labels.
 
 ### Tips
 
-*   Always run `black .` after Claude Code generates or edits Python files — Claude Code can do this for you if you ask.
+*   Always run `/pre-commit-check` after making changes — it catches the same issues the CI pipeline checks.
 *   Claude Code respects the `CLAUDE.md` conventions. If you notice it deviating (wrong docstring style, missing `__all__`, etc.), point it out and it will correct the output.
-*   For large features, break the work into small, focused tasks and handle them one at a time.
-*   Claude Code can help draft your PR description and populate the PR checklist once your changes are ready.
-
-## Contribute specific robots
-
-To contribute a new robot, please check the documentation on [Adding a New Robot](https://dexforce.github.io/EmbodiChain/guides/add_robot.html).
-
-## Contribute specific environments
-
-To contribute a new environment, please check the documentation on [Embodied Environments](https://dexforce.github.io/EmbodiChain/overview/gym/env.html) and see the tutorial below:
-- [Creating a Basic Environment](https://dexforce.github.io/EmbodiChain/tutorial/basic_env.html) 
-- [Creating a Modular Environment](https://dexforce.github.io/EmbodiChain/tutorial/modular_env.html)
-
-If you want to implement your tasks in a new repo and with some customized functors and utilities, you can also use the [Task Template Repo](https://github.com/DexForce/embodichain_task_template).
\ No newline at end of file
+*   For large features, break the work into small, focused tasks and handle them one at a time using the appropriate skill for each step.
+*   If you add a new skill to `.claude/skills/`, make sure to also add it to the Skills table and "When to use each skill" list in this document so contributors can discover it.
\ No newline at end of file
diff --git a/README.md b/README.md
index e7c28de7..eae06369 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,18 @@
 
 ![teaser](assets/imgs/teaser.jpg)
 
-[![License](https://img.shields.io/github/license/DexForce/EmbodiChain)](LICENSE)
-[![Website](https://img.shields.io/badge/website-dexforce.com-yellow?logo=google-chrome&logoColor=white)](https://dexforce.com/embodichain/index.html#/)
-[![GitHub Pages](https://img.shields.io/badge/GitHub%20Pages-docs-blue?logo=github&logoColor=white)](https://dexforce.github.io/EmbodiChain/introduction.html)
-[![Python](https://img.shields.io/badge/python-3.10%20|%203.11-blue.svg)](https://docs.python.org/3/whatsnew/3.10.html)
-[![Version](https://img.shields.io/github/v/release/DexForce/EmbodiChain?label=version)](https://github.com/DexForce/EmbodiChain/releases)
+[![License](https://img.shields.io/github/license/DexForce/EmbodiChain?style=for-the-badge)](LICENSE)
+[![Website](https://img.shields.io/badge/website-dexforce.com-yellow?style=for-the-badge&logo=google-chrome&logoColor=white)](https://dexforce.com/embodichain/index.html#/)
+[![GitHub Pages](https://img.shields.io/badge/GitHub%20Pages-docs-blue?style=for-the-badge&logo=github&logoColor=white)](https://dexforce.github.io/EmbodiChain/main/index.html)
+[![Python](https://img.shields.io/badge/python-3.10%20|%203.11-blue?style=for-the-badge&logo=python&logoColor=white)](https://docs.python.org/3/whatsnew/3.10.html)
+[![Version](https://img.shields.io/github/v/release/DexForce/EmbodiChain?style=for-the-badge&label=version)](https://github.com/DexForce/EmbodiChain/releases)
 ---
 
-EmbodiChain is an end-to-end, GPU-accelerated framework for Embodied AI. It streamlines research and development by unifying high-performance simulation, real-to-sim data pipelines, modular model architectures, and efficient training workflows. This integration enables rapid experimentation, seamless deployment of intelligent agents, and effective Sim2Real transfer for real-world robotic systems.
+EmbodiChain is an end-to-end, GPU-accelerated framework for Embodied AI. It streamlines research and development by unifying high-performance simulation, automated generative data pipelines, modular model architectures, and efficient training workflows. This integration enables rapid experimentation, seamless deployment of intelligent agents, and effective Sim2Real transfer for real-world robotic systems.
 
 > [!NOTE]
 > EmbodiChain is in Alpha and under active development:
-> * More features will be continually added in the coming months. You can find more details in the [roadmap](https://dexforce.github.io/EmbodiChain/resources/roadmap.html).
+> * More features will be continually added in the coming months. You can find more details in the [roadmap](https://dexforce.github.io/EmbodiChain/main/resources/roadmap.html).
 > * Since this is an early release, we welcome feedback (bug reports, feature requests, etc.) via GitHub Issues.
 
 
@@ -36,14 +36,18 @@ The figure below illustrates the overall architecture of EmbodiChain:
 
 To get started with EmbodiChain, follow these steps:
 
-- [Installation Guide](https://dexforce.github.io/EmbodiChain/quick_start/install.html)
-- [Quick Start Tutorial](https://dexforce.github.io/EmbodiChain/tutorial/index.html)
-- [API Reference](https://dexforce.github.io/EmbodiChain/api_reference/index.html)
+- [Installation Guide](https://dexforce.github.io/EmbodiChain/main/quick_start/install.html)
+- [Quick Start Tutorial](https://dexforce.github.io/EmbodiChain/main/tutorial/index.html)
+- [API Reference](https://dexforce.github.io/EmbodiChain/main/api_reference/index.html)
 
 ## Contribution Guide
 
 We welcome contributions! Please see the [CONTRIBUTING.md](CONTRIBUTING.md) file in this repository for guidelines on how to get started.
 
+## Publications
+
+See [Academic Publications](docs/source/resources/publications/README.md) for a complete list of academic papers related to EmbodiChain.
+
 ## Citation
 
 If you find EmbodiChain helpful for your research, please consider citing our work:
@@ -67,14 +71,4 @@ If you find EmbodiChain helpful for your research, please consider citing our wo
    year = {2025},
    journal = {TechRxiv}
    }
-```
-
-```bibtex
-@inproceedings{Sim2RealVLA,
-    title = {Sim2Real {VLA}: Zero-Shot Generalization of Synthesized Skills to Realistic Manipulation},
-    author = {Runyi Zhao, Sheng Xu, Ruixing Jin, Yueci Deng, Yunxin Tai, Kui Jia, Guiliang Liu},
-    booktitle = {The Fourteenth International Conference on Learning Representations, ICLR},
-    year = {2026},
-    url = {https://openreview.net/forum?id=H4SyKHjd4c}
-}
 ```
\ No newline at end of file
diff --git a/VERSION b/VERSION
index b1e80bb2..0c62199f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.1.3
+0.2.1
diff --git a/configs/agents/rl/basic/cart_pole/train_config.json b/configs/agents/rl/basic/cart_pole/train_config.json
index 02a302d1..6da5f735 100644
--- a/configs/agents/rl/basic/cart_pole/train_config.json
+++ b/configs/agents/rl/basic/cart_pole/train_config.json
@@ -1,11 +1,10 @@
-{ 
+{
     "trainer": {
         "exp_name": "cart_pole_ppo",
         "gym_config": "configs/agents/rl/basic/cart_pole/gym_config.json",
         "seed": 42,
         "device": "cuda:0",
         "headless": true,
-        "enable_rt": false,
         "gpu_id": 0,
         "num_envs": 64,
         "iterations": 1000,
@@ -22,30 +21,57 @@
                     "interval_step": 1,
                     "params": {
                         "name": "main_cam",
-                        "resolution": [640, 480],
-                        "eye": [-1.4, 1.4, 2.5],
-                        "target": [0, 0, 0.7],
-                        "up": [0, 0, 1],
-                        "intrinsics": [600, 600, 320, 240],
+                        "resolution": [
+                            640,
+                            480
+                        ],
+                        "eye": [
+                            -1.4,
+                            1.4,
+                            2.5
+                        ],
+                        "target": [
+                            0,
+                            0,
+                            0.7
+                        ],
+                        "up": [
+                            0,
+                            0,
+                            1
+                        ],
+                        "intrinsics": [
+                            600,
+                            600,
+                            320,
+                            240
+                        ],
                         "save_path": "./outputs/videos/eval"
                     }
                 }
             }
-        }
+        },
+        "renderer": "fast-rt"
     },
     "policy": {
         "name": "actor_critic",
         "actor": {
             "type": "mlp",
             "network_cfg": {
-                "hidden_sizes": [256, 256],
+                "hidden_sizes": [
+                    256,
+                    256
+                ],
                 "activation": "relu"
             }
         },
         "critic": {
             "type": "mlp",
             "network_cfg": {
-                "hidden_sizes": [256, 256],
+                "hidden_sizes": [
+                    256,
+                    256
+                ],
                 "activation": "relu"
             }
         }
@@ -64,4 +90,4 @@
             "max_grad_norm": 0.5
         }
     }
-}
+}
\ No newline at end of file
diff --git a/configs/agents/rl/basic/cart_pole/train_config_grpo.json b/configs/agents/rl/basic/cart_pole/train_config_grpo.json
index 4da5cab7..86ac34f2 100644
--- a/configs/agents/rl/basic/cart_pole/train_config_grpo.json
+++ b/configs/agents/rl/basic/cart_pole/train_config_grpo.json
@@ -5,7 +5,6 @@
         "seed": 42,
         "device": "cuda:0",
         "headless": true,
-        "enable_rt": false,
         "gpu_id": 0,
         "num_envs": 64,
         "iterations": 1000,
@@ -23,23 +22,47 @@
                     "interval_step": 1,
                     "params": {
                         "name": "main_cam",
-                        "resolution": [640, 480],
-                        "eye": [-1.4, 1.4, 2.5],
-                        "target": [0, 0, 0.7],
-                        "up": [0, 0, 1],
-                        "intrinsics": [600, 600, 320, 240],
+                        "resolution": [
+                            640,
+                            480
+                        ],
+                        "eye": [
+                            -1.4,
+                            1.4,
+                            2.5
+                        ],
+                        "target": [
+                            0,
+                            0,
+                            0.7
+                        ],
+                        "up": [
+                            0,
+                            0,
+                            1
+                        ],
+                        "intrinsics": [
+                            600,
+                            600,
+                            320,
+                            240
+                        ],
                         "save_path": "./outputs/videos/eval"
                     }
                 }
             }
-        }
+        },
+        "renderer": "hybrid"
     },
     "policy": {
         "name": "actor_only",
         "actor": {
             "type": "mlp",
             "network_cfg": {
-                "hidden_sizes": [256, 256],
+                "hidden_sizes": [
+                    256,
+                    256
+                ],
                 "activation": "relu"
             }
         }
@@ -55,7 +78,7 @@
             "ent_coef": 0.01,
             "kl_coef": 0.0,
             "group_size": 4,
-            "eps": 1e-8,
+            "eps": 1e-08,
             "reset_every_rollout": true,
             "max_grad_norm": 0.5,
             "truncate_at_first_done": true
diff --git a/configs/agents/rl/push_cube/gym_config.json b/configs/agents/rl/push_cube/gym_config.json
index 4e8cec4d..a97cc65d 100644
--- a/configs/agents/rl/push_cube/gym_config.json
+++ b/configs/agents/rl/push_cube/gym_config.json
@@ -71,33 +71,33 @@
             "reaching_reward": {
                 "func": "reaching_behind_object",
                 "mode": "add",
-                "weight": 0.1,
+                "weight": 0.03,
                 "params": {
                     "object_cfg": {
                         "uid": "cube"
                     },
                     "target_pose_key": "goal_pose",
-                    "behind_offset": 0.015,
+                    "behind_offset": 0.03,
                     "height_offset": 0.015,
-                    "distance_scale": 5.0,
+                    "distance_scale": 8.0,
                     "part_name": "arm"
                 }
             },
-            "place_reward": {
-                "func": "incremental_distance_to_target",
+            "goal_distance_reward": {
+                "func": "distance_to_target",
                 "mode": "add",
-                "weight": 1.0,
+                "weight": 0.8,
                 "params": {
                     "source_entity_cfg": {
                         "uid": "cube"
                     },
                     "target_pose_key": "goal_pose",
-                    "tanh_scale": 10.0,
-                    "positive_weight": 2.0,
-                    "negative_weight": 0.5,
+                    "exponential": true,
+                    "sigma": 0.12,
                     "use_xy_only": true
                 }
             },
+
             "action_penalty": {
                 "func": "action_smoothness_penalty",
                 "mode": "add",
@@ -175,9 +175,9 @@
             "body_type": "dynamic",
             "init_pos": [-0.6, -0.4, 0.05],
             "attrs": {
-                "mass": 10.0,
-                "static_friction": 3.0,
-                "dynamic_friction": 2.0,
+                "mass": 2.0,
+                "static_friction": 1.0,
+                "dynamic_friction": 0.8,
                 "linear_damping": 2.0,
                 "angular_damping": 2.0,
                 "contact_offset": 0.003,
diff --git a/configs/agents/rl/push_cube/train_config.json b/configs/agents/rl/push_cube/train_config.json
index d44aa0b3..11b0972d 100644
--- a/configs/agents/rl/push_cube/train_config.json
+++ b/configs/agents/rl/push_cube/train_config.json
@@ -1,11 +1,10 @@
-{ 
+{
     "trainer": {
         "exp_name": "push_cube_ppo",
         "gym_config": "configs/agents/rl/push_cube/gym_config.json",
         "seed": 42,
         "device": "cuda:0",
         "headless": true,
-        "enable_rt": false,
         "gpu_id": 0,
         "num_envs": 64,
         "iterations": 1000,
@@ -13,9 +12,9 @@
         "enable_eval": true,
         "num_eval_envs": 16,
         "num_eval_episodes": 3,
-        "eval_freq": 2,
-        "save_freq": 200,
-        "use_wandb": false,
+        "eval_freq": 100,
+        "save_freq": 100,
+        "use_wandb": true,
         "wandb_project_name": "embodichain-push_cube",
         "events": {
             "eval": {
@@ -30,25 +29,32 @@
                         "target": [0, 0, 0],
                         "up": [0, 0, 1],
                         "intrinsics": [600, 600, 320, 240],
-                        "save_path": "./outputs/videos/eval"
+                        "save_path": "./outputs/videos_ppo1/eval"
                     }
                 }
             }
-        }
+        },
+        "renderer": "hybrid"
     },
     "policy": {
         "name": "actor_critic",
         "actor": {
             "type": "mlp",
             "network_cfg": {
-                "hidden_sizes": [256, 256],
+                "hidden_sizes": [
+                    256,
+                    256
+                ],
                 "activation": "relu"
             }
         },
         "critic": {
             "type": "mlp",
             "network_cfg": {
-                "hidden_sizes": [256, 256],
+                "hidden_sizes": [
+                    256,
+                    256
+                ],
                 "activation": "relu"
             }
         }
@@ -67,4 +73,4 @@
             "max_grad_norm": 0.5
         }
     }
-}
+}
\ No newline at end of file
diff --git a/configs/agents/rl/push_cube/train_config_grpo.json b/configs/agents/rl/push_cube/train_config_grpo.json
new file mode 100644
index 00000000..df5f6681
--- /dev/null
+++ b/configs/agents/rl/push_cube/train_config_grpo.json
@@ -0,0 +1,65 @@
+{
+    "trainer": {
+        "exp_name": "push_cube_grpo",
+        "gym_config": "configs/agents/rl/push_cube/gym_config.json",
+        "seed": 42,
+        "device": "cuda:0",
+        "headless": true,
+        "gpu_id": 0,
+        "num_envs": 64,
+        "iterations": 1000,
+        "buffer_size": 1024,
+        "enable_eval": true,
+        "num_eval_envs": 16,
+        "num_eval_episodes": 3,
+        "eval_freq": 200,
+        "save_freq": 200,
+        "use_wandb": false,
+        "wandb_project_name": "embodichain-push_cube",
+        "events": {
+            "eval": {
+                "record_camera": {
+                    "func": "record_camera_data_async",
+                    "mode": "interval",
+                    "interval_step": 1,
+                    "params": {
+                        "name": "main_cam",
+                        "resolution": [640, 480],
+                        "eye": [-1.4, 1.4, 2.0],
+                        "target": [0, 0, 0],
+                        "up": [0, 0, 1],
+                        "intrinsics": [600, 600, 320, 240],
+                        "save_path": "./outputs/videos/eval"
+                    }
+                }
+            }
+        }
+    },
+    "policy": {
+        "name": "actor_only",
+        "actor": {
+            "type": "mlp",
+            "network_cfg": {
+                "hidden_sizes": [256, 256],
+                "activation": "relu"
+            }
+        }
+    },
+    "algorithm": {
+        "name": "grpo",
+        "cfg": {
+            "learning_rate": 0.0001,
+            "n_epochs": 10,
+            "batch_size": 8192,
+            "gamma": 0.99,
+            "clip_coef": 0.2,
+            "ent_coef": 0.01,
+            "kl_coef": 0.0,
+            "group_size": 4,
+            "eps": 1e-8,
+            "reset_every_rollout": true,
+            "max_grad_norm": 0.5,
+            "truncate_at_first_done": true
+        }
+    }
+}
diff --git a/configs/gym/pour_water/gym_config_simple.json b/configs/gym/pour_water/gym_config_simple.json
index ca45e80b..bcce5bc4 100644
--- a/configs/gym/pour_water/gym_config_simple.json
+++ b/configs/gym/pour_water/gym_config_simple.json
@@ -203,7 +203,7 @@
                 "mode": "modify",
                 "name": "robot/qpos",
                 "params": {
-                    "joint_ids": [12, 13, 14, 15]
+                    "joint_ids": [6, 13]
                 }
             }
         },
@@ -227,7 +227,8 @@
                     "use_videos": true
                 }
             }
-        }
+        },
+        "control_parts": ["left_arm", "left_eef", "right_arm", "right_eef"]
     },
     "robot": {
         "uid": "CobotMagic",
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 00000000..00987125
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,24 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+# Make the scripts/ directory importable so tests can do:
+#   from benchmark.rl.metrics import ...
+sys.path.insert(0, str(Path(__file__).parent / "scripts"))
diff --git a/docs/Makefile b/docs/Makefile
index 864eb2a7..9ded7fad 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -14,8 +14,20 @@ help:
 
 .PHONY: help Makefile
 
+# Sync README.md -> introduction.rst before building
+.PHONY: sync-readme
+sync-readme:
+	@python3 "$(CURDIR)/scripts/sync_readme.py"
+
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
+%: Makefile sync-readme
 	@rm -rf "$(BUILDDIR)"
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# Build current version only (for local development / PR verification)
+.PHONY: current-docs
+current-docs: sync-readme
+	@rm -rf "$(BUILDDIR)/html"
+	@$(SPHINXBUILD) -W --keep-going "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
+	@python3 "$(CURDIR)/scripts/generate_versions_json.py" --build-dir "$(BUILDDIR)/html"
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 53d9dd9d..87db2c49 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,4 +8,4 @@ sphinx-autosummary-accessors
 sphinxcontrib-bibtex
 sphinx-design
 sphinx_autodoc_typehints
-sphinx-multiversion
\ No newline at end of file
+pypandoc_binary
\ No newline at end of file
diff --git a/docs/scripts/build_versions.py b/docs/scripts/build_versions.py
new file mode 100644
index 00000000..dbbd7224
--- /dev/null
+++ b/docs/scripts/build_versions.py
@@ -0,0 +1,97 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Helper script for filtering versions to maintain buffer size."""
+
+import re
+from pathlib import Path
+
+
+def parse_version(tag: str) -> tuple[int, int, int]:
+    """Parse a version tag like 'v1.2.3' into a tuple (1, 2, 3)."""
+    match = re.match(r"^v(\d+)\.(\d+)\.(\d+)$", tag)
+    if not match:
+        return (0, 0, 0)
+    return (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+
+def filter_versions(
+    all_versions: list[str],
+    buffer_size: int,
+    main_branch: str = "main",
+) -> list[str]:
+    """Filter versions to maintain buffer size.
+
+    Keeps the latest (buffer_size - 1) release versions plus the main branch.
+
+    Args:
+        all_versions: List of all available version references
+        buffer_size: Total number of versions to keep (releases + main)
+        main_branch: Name of the main branch
+
+    Returns:
+        List of versions to keep
+    """
+    # Separate releases from branches
+    releases = [v for v in all_versions if re.match(r"^v\d+\.\d+\.\d+$", v)]
+    branches = [v for v in all_versions if v not in releases]
+
+    # Sort releases by version (newest first)
+    releases.sort(key=parse_version, reverse=True)
+
+    # Keep latest (buffer_size - 1) releases
+    releases_to_keep = releases[: (buffer_size - 1)]
+
+    # Always include main branch if it exists
+    versions_to_keep = releases_to_keep.copy()
+    if main_branch in branches:
+        versions_to_keep.append(main_branch)
+
+    return versions_to_keep
+
+
+def main():
+    """CLI entry point for version filtering."""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Filter versions for multi-version docs"
+    )
+    parser.add_argument(
+        "--versions",
+        nargs="+",
+        required=True,
+        help="List of all available versions",
+    )
+    parser.add_argument(
+        "--buffer-size",
+        type=int,
+        default=5,
+        help="Total number of versions to keep (releases + main)",
+    )
+    parser.add_argument(
+        "--main-branch",
+        default="main",
+        help="Name of the main branch",
+    )
+    args = parser.parse_args()
+
+    filtered = filter_versions(args.versions, args.buffer_size, args.main_branch)
+    print(" ".join(filtered))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/scripts/generate_versions_json.py b/docs/scripts/generate_versions_json.py
new file mode 100644
index 00000000..d4905565
--- /dev/null
+++ b/docs/scripts/generate_versions_json.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+"""Generate versions.json and root index.html for the docs version selector."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+from pathlib import Path
+
+
+def parse_version(tag: str) -> tuple[int, int, int]:
+    """Parse a version tag like 'v1.2.3' into a tuple (1, 2, 3)."""
+    match = re.match(r"^v(\d+)\.(\d+)\.(\d+)$", tag)
+    if not match:
+        return (0, 0, 0)
+    return (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Generate versions.json and root index.html for multi-version docs"
+    )
+    parser.add_argument(
+        "--build-dir",
+        default="build/html",
+        help="Path to build/html directory (default: build/html)",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help="Output path for versions.json (default: <build-dir>/versions.json)",
+    )
+    parser.add_argument(
+        "--latest",
+        default=None,
+        help="Name of the latest stable version (default: auto-detected from tags, falls back to main)",
+    )
+    args = parser.parse_args()
+
+    html_dir = Path(args.build_dir)
+    output = Path(args.output) if args.output else html_dir / "versions.json"
+
+    if not html_dir.exists():
+        print(f"Error: Build directory '{html_dir}' does not exist.")
+        raise SystemExit(1)
+
+    versions: list[dict[str, str]] = []
+
+    # Collect tag versions (vX.Y.Z directories), sorted newest-first
+    tag_dirs = sorted(
+        [d for d in html_dir.glob("v*") if d.is_dir()],
+        key=lambda d: parse_version(d.name),
+        reverse=True,
+    )
+    for d in tag_dirs:
+        name = d.name
+        versions.append({"name": name, "url": f"./{name}/index.html", "type": "tag"})
+
+    # Collect main (dev branch)
+    if (html_dir / "main").is_dir():
+        versions.append({"name": "main", "url": "./main/index.html", "type": "branch"})
+
+    # Determine latest: explicit arg > newest tag > main
+    if args.latest:
+        latest = args.latest
+    elif versions:
+        tag_names = [v["name"] for v in versions if v["type"] == "tag"]
+        latest = tag_names[0] if tag_names else "main"
+    else:
+        latest = "main"
+
+    manifest = {
+        "latest": latest,
+        "versions": versions,
+    }
+
+    # Write versions.json
+    output.parent.mkdir(parents=True, exist_ok=True)
+    output.write_text(json.dumps(manifest, indent=2))
+    print(f"Generated {output} with {len(versions)} versions (latest: {latest})")
+
+    # Write root index.html redirect
+    index_path = html_dir / "index.html"
+    index_content = (
+        "<!DOCTYPE html>\n"
+        "<html><head>\n"
+        f"  <title>EmbodiChain Docs</title>\n"
+        f'  <meta http-equiv="refresh" content="0; url=./{latest}/index.html">\n'
+        "</head></html>\n"
+    )
+    index_path.write_text(index_content)
+    print(f"Generated {index_path} (redirects to ./{latest}/index.html)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/scripts/merge_published_site.py b/docs/scripts/merge_published_site.py
new file mode 100644
index 00000000..612f49a6
--- /dev/null
+++ b/docs/scripts/merge_published_site.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+"""Merge version directories from the live docs site into a local build tree.
+
+CI restores an Actions cache and rebuilds only one version (``main`` or a tag).
+Tag-scoped cache entries are not visible on ``main`` pushes, so the cache alone
+cannot hold all versions. This script fills *missing* version directories from
+the currently published GitHub Pages site (or a local directory in tests).
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+from urllib.error import HTTPError, URLError
+from urllib.request import urlopen
+
+__all__ = ["load_versions_manifest", "merge_published_site"]
+
+
+def load_versions_manifest(
+    *,
+    site_base_url: str | None = None,
+    published_root: Path | None = None,
+) -> dict[str, Any] | None:
+    """Load ``versions.json`` from a local tree or the live site URL."""
+    if published_root is not None:
+        manifest_path = published_root / "versions.json"
+        if not manifest_path.is_file():
+            return None
+        return json.loads(manifest_path.read_text(encoding="utf-8"))
+
+    if not site_base_url:
+        return None
+
+    manifest_url = f"{site_base_url.rstrip('/')}/versions.json"
+    try:
+        with urlopen(manifest_url, timeout=30) as response:
+            if response.status != 200:
+                return None
+            return json.loads(response.read().decode("utf-8"))
+    except (HTTPError, URLError, TimeoutError, json.JSONDecodeError) as exc:
+        print(f"No published manifest at {manifest_url}: {exc}", file=sys.stderr)
+        return None
+
+
+def _copy_local_version(src: Path, dest: Path) -> None:
+    if dest.exists():
+        shutil.rmtree(dest)
+    shutil.copytree(src, dest)
+
+
+def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None:
+    """Download one version subtree with wget (available in CI containers)."""
+    url = f"{site_base_url.rstrip('/')}/{version}/"
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    if dest.exists():
+        shutil.rmtree(dest)
+
+    # -nH: no host-based dirs; -np: stay under version URL; -P: output prefix
+    result = subprocess.run(
+        [
+            "wget",
+            "-q",
+            "-r",
+            "-l",
+            "50",
+            "-np",
+            "-nH",
+            "-P",
+            str(dest.parent),
+            url,
+        ],
+        check=False,
+    )
+    if result.returncode != 0:
+        print(f"wget failed for {url} (exit {result.returncode})", file=sys.stderr)
+        return
+
+    # wget may create dest.parent/<version>/ or nest extra path segments — normalize
+    if not dest.is_dir():
+        candidates = list(dest.parent.glob(f"*/{version}"))
+        if len(candidates) == 1 and candidates[0].is_dir():
+            candidates[0].rename(dest)
+        else:
+            nested = dest.parent / version
+            if nested.is_dir() and nested != dest:
+                nested.rename(dest)
+
+
+def merge_published_site(
+    build_dir: Path,
+    *,
+    site_base_url: str | None = None,
+    published_root: Path | None = None,
+    skip_versions: frozenset[str] | None = None,
+) -> list[str]:
+    """Copy missing version dirs from published site into ``build_dir``.
+
+    Args:
+        build_dir: Sphinx output root (``docs/build/html``).
+        site_base_url: Live Pages base, e.g. ``https://org.github.io/Repo``.
+        published_root: Local published tree for tests (``versions.json`` + dirs).
+        skip_versions: Version names to leave for a fresh build (e.g. ``main``).
+
+    Returns:
+        Names of versions merged from the published site.
+    """
+    build_dir = build_dir.resolve()
+    build_dir.mkdir(parents=True, exist_ok=True)
+    skip = skip_versions or frozenset()
+
+    manifest = load_versions_manifest(
+        site_base_url=site_base_url,
+        published_root=published_root,
+    )
+    if not manifest:
+        print("No published versions manifest; skipping merge.")
+        return []
+
+    merged: list[str] = []
+    for entry in manifest.get("versions", []):
+        name = entry.get("name")
+        if not name or name in skip:
+            continue
+        if (build_dir / name).is_dir():
+            continue
+
+        if published_root is not None:
+            src = published_root / name
+            if not src.is_dir():
+                print(
+                    f"Published root missing directory {name}; skip.", file=sys.stderr
+                )
+                continue
+            print(f"Merging local published version: {name}")
+            _copy_local_version(src, build_dir / name)
+            merged.append(name)
+        elif site_base_url:
+            print(f"Downloading published version: {name}")
+            _download_version_wget(site_base_url, name, build_dir / name)
+            if (build_dir / name).is_dir():
+                merged.append(name)
+        else:
+            print(
+                "Neither published_root nor site_base_url set; cannot merge.",
+                file=sys.stderr,
+            )
+
+    return merged
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Merge missing doc version dirs from live GitHub Pages into build/html"
+    )
+    parser.add_argument(
+        "--build-dir",
+        type=Path,
+        default=Path("build/html"),
+        help="Local docs build directory (default: build/html)",
+    )
+    parser.add_argument(
+        "--site-base-url",
+        default=None,
+        help="Published site base URL, e.g. https://org.github.io/EmbodiChain",
+    )
+    parser.add_argument(
+        "--published-root",
+        type=Path,
+        default=None,
+        help="Local directory mirroring published site (for tests)",
+    )
+    parser.add_argument(
+        "--skip-version",
+        action="append",
+        default=[],
+        help="Version to skip (repeatable); rebuilt in the same CI run",
+    )
+    args = parser.parse_args()
+
+    merged = merge_published_site(
+        args.build_dir,
+        site_base_url=args.site_base_url,
+        published_root=args.published_root,
+        skip_versions=frozenset(args.skip_version),
+    )
+    if merged:
+        print(f"Merged versions: {', '.join(merged)}")
+    else:
+        print("No versions merged from published site.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/scripts/sync_readme.py b/docs/scripts/sync_readme.py
new file mode 100644
index 00000000..ca784513
--- /dev/null
+++ b/docs/scripts/sync_readme.py
@@ -0,0 +1,239 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+"""Synchronize README.md to docs/source/introduction.rst.
+
+Uses pypandoc for Markdown-to-RST conversion, then post-processes the output
+to fix Sphinx-specific formatting issues.
+
+Usage:
+    python docs/scripts/sync_readme.py           # Overwrite introduction.rst
+    python docs/scripts/sync_readme.py --check    # Exit 1 if stale
+"""
+
+from __future__ import annotations
+
+import argparse
+import re
+import sys
+from pathlib import Path
+
+__all__ = ["convert_readme_to_rst", "postprocess_rst"]
+
+# Resolve paths relative to this script
+REPO_ROOT = Path(__file__).resolve().parents[2]
+README_PATH = REPO_ROOT / "README.md"
+RST_PATH = REPO_ROOT / "docs" / "source" / "introduction.rst"
+
+# Prefix to make repo-root-relative paths work from docs/source/
+_DOCS_PATH_PREFIX = "../../"
+
+
+def _fix_image_path(path: str) -> str:
+    """Prefix a repo-root-relative image path for use from docs/source/.
+
+    Args:
+        path: Image path from pandoc output (repo-root-relative).
+
+    Returns:
+        Path adjusted for the RST file location in docs/source/.
+    """
+    if path.startswith(("http://", "https://")):
+        return path
+    return _DOCS_PATH_PREFIX + path
+
+
+def convert_readme_to_rst(readme_content: str) -> str:
+    """Convert Markdown content to RST via pypandoc.
+
+    Args:
+        readme_content: Raw Markdown text from README.md.
+
+    Returns:
+        Raw RST string from pandoc (before post-processing).
+    """
+    import pypandoc
+
+    return pypandoc.convert_text(readme_content, "rst", format="md")
+
+
+def postprocess_rst(rst: str, readme_content: str) -> str:
+    """Fix pandoc RST output for Sphinx compatibility.
+
+    Applies these transformations:
+    1. Strip badge substitution references and definitions.
+    2. Convert ``[!NOTE]`` blockquote to ``.. NOTE::`` directive.
+    3. Convert ``.. raw:: html`` centered-image blocks to ``.. image::``.
+    4. Replace ``.. code:: bibtex`` with ``.. code-block:: bibtex``.
+    5. Convert ``.. figure::`` (with caption) to ``.. image::``.
+
+    Args:
+        rst: Raw RST from pandoc.
+        readme_content: Original Markdown (used to extract image paths).
+
+    Returns:
+        Cleaned RST suitable for Sphinx.
+    """
+    # Extract image paths from README <img> tags for centered HTML blocks
+    readme_images = re.findall(r'<img\s+[^>]*src="([^"]+)"[^>]*>', readme_content)
+
+    lines = rst.split("\n")
+    result_lines: list[str] = []
+    i = 0
+
+    while i < len(lines):
+        line = lines[i]
+
+        # --- 1. Strip badge substitution reference lines ---
+        if re.match(r"^\|.*\|", line):
+            i += 1
+            continue
+
+        # --- 1b. Strip badge substitution definitions at the bottom ---
+        if re.match(r"^\.\. \|\w[\w ]*\w\| image::", line):
+            i += 1
+            while i < len(lines) and lines[i].startswith("   "):
+                i += 1
+            continue
+
+        # --- 2. Convert [!NOTE] blockquote to .. NOTE:: ---
+        if re.match(r"^\s+\[!NOTE\]", line):
+            note_match = re.match(r"^\s+\[!NOTE\]\s*(.*)", line)
+            note_text = note_match.group(1) if note_match else ""
+            note_text = note_text.replace("\\*", "*")
+            note_lines: list[str] = []
+            if note_text:
+                note_lines.append(note_text)
+            i += 1
+            while i < len(lines) and lines[i].startswith("   ") and lines[i].strip():
+                cleaned = lines[i].strip().replace("\\*", "*")
+                note_lines.append(cleaned)
+                i += 1
+            result_lines.append(".. NOTE::")
+            for nl in note_lines:
+                result_lines.append(f"   {nl}")
+            continue
+
+        # --- 3. Convert .. raw:: html centered blocks to .. image:: ---
+        if line.strip() == ".. raw:: html":
+            # Look ahead (skipping blank lines) for <p align="center">
+            j = i + 1
+            while j < len(lines) and lines[j].strip() == "":
+                j += 1
+            if j < len(lines) and "<p align" in lines[j]:
+                # Skip from i through the matching </p> raw block
+                i = j + 1  # skip past <p> line
+                while i < len(lines):
+                    if "</p>" in lines[i]:
+                        i += 1
+                        # Skip any trailing .. raw:: html for </p>
+                        while i < len(lines) and (
+                            lines[i].strip() == ""
+                            or lines[i].strip() == ".. raw:: html"
+                            or "</p>" in lines[i]
+                        ):
+                            i += 1
+                        break
+                    i += 1
+                # Insert images from README source
+                for img_src in readme_images:
+                    result_lines.append(f".. image:: {_fix_image_path(img_src)}")
+                    result_lines.append("   :align: center")
+                result_lines.append("")  # blank line after directive
+                continue
+            elif j < len(lines) and "</p>" in lines[j]:
+                i = j + 1
+                continue
+
+        # --- 4. Replace .. code:: bibtex with .. code-block:: bibtex ---
+        if re.match(r"^\.\. code:: bibtex\s*$", line):
+            result_lines.append(".. code-block:: bibtex")
+            i += 1
+            continue
+
+        # --- 5. Convert .. figure:: with caption to .. image:: ---
+        if re.match(r"^\.\. figure::", line):
+            path_match = re.match(r"^\.\. figure:: (.+)", line)
+            if path_match:
+                img_path = path_match.group(1).strip()
+                result_lines.append(f".. image:: {_fix_image_path(img_path)}")
+                i += 1
+                # Skip :alt:, blank line, and caption lines
+                while i < len(lines):
+                    if lines[i].startswith("   :"):
+                        i += 1
+                        continue
+                    if lines[i].strip() == "":
+                        i += 1
+                        continue
+                    if lines[i].startswith("   "):
+                        i += 1
+                        continue
+                    break
+                continue
+
+        result_lines.append(line)
+        i += 1
+
+    # Clean up excessive blank lines
+    text = "\n".join(result_lines)
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip() + "\n"
+
+
+def main() -> None:
+    """CLI entry point for syncing README.md to introduction.rst."""
+    parser = argparse.ArgumentParser(
+        description="Sync README.md to docs/source/introduction.rst"
+    )
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Check if introduction.rst is up-to-date (exit 1 if stale)",
+    )
+    args = parser.parse_args()
+
+    if not README_PATH.exists():
+        print(f"Error: {README_PATH} not found", file=sys.stderr)
+        sys.exit(1)
+
+    readme_content = README_PATH.read_text(encoding="utf-8")
+    raw_rst = convert_readme_to_rst(readme_content)
+    final_rst = postprocess_rst(raw_rst, readme_content)
+
+    if args.check:
+        if not RST_PATH.exists():
+            print(
+                f"Error: {RST_PATH} does not exist. Run without --check to generate.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        current = RST_PATH.read_text(encoding="utf-8")
+        if current != final_rst:
+            print(
+                f"Error: {RST_PATH} is out of sync with README.md. "
+                "Run 'python docs/scripts/sync_readme.py' to update.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        print(f"OK: {RST_PATH} is up-to-date.")
+    else:
+        RST_PATH.parent.mkdir(parents=True, exist_ok=True)
+        RST_PATH.write_text(final_rst, encoding="utf-8")
+        print(f"Synced: {README_PATH} -> {RST_PATH}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/source/_static/version-redirect.js b/docs/source/_static/version-redirect.js
new file mode 100644
index 00000000..effe08cf
--- /dev/null
+++ b/docs/source/_static/version-redirect.js
@@ -0,0 +1,36 @@
+/**
+ * Version redirect script for multi-version documentation.
+ * Redirects to the latest stable release version, or falls back to 'main'.
+ */
+
+(function() {
+    'use strict';
+
+    // Try to fetch versions.json (generated by generate_versions_json.py)
+    fetch('versions.json')
+        .then(response => {
+            if (!response.ok) {
+                throw new Error('versions.json not found');
+            }
+            return response.json();
+        })
+        .then(data => {
+            // Get the latest version from the JSON
+            const latestVersion = data.latest || data.versions?.[0]?.name || 'main';
+
+            const currentPath = window.location.pathname;
+
+            // If we're at root, redirect to latest version
+            if (currentPath === '/' || currentPath.endsWith('/index.html') || currentPath.endsWith('/')) {
+                window.location.href = latestVersion + '/';
+            }
+        })
+        .catch(error => {
+            console.warn('Version redirect failed:', error.message);
+            // Fallback to main on error
+            const currentPath = window.location.pathname;
+            if (currentPath === '/' || currentPath.endsWith('/index.html') || currentPath.endsWith('/')) {
+                window.location.href = 'main/';
+            }
+        });
+})();
diff --git a/docs/source/_templates/index.html b/docs/source/_templates/index.html
new file mode 100644
index 00000000..f1351f20
--- /dev/null
+++ b/docs/source/_templates/index.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Redirecting to the latest EmbodiChain documentation</title>
+    <meta charset="utf-8">
+    <meta http-equiv="refresh" content="0; url=./main/index.html">
+  </head>
+</html>
diff --git a/docs/source/_templates/versioning.html b/docs/source/_templates/versioning.html
new file mode 100644
index 00000000..a6cb2726
--- /dev/null
+++ b/docs/source/_templates/versioning.html
@@ -0,0 +1,56 @@
+<nav class="bd-links bd-docs-nav">
+  <div class="bd-toc-item navbar-nav">
+    <ul class="nav bd-sidenav">
+      <li class="toctree-l1 has-children" style="display:flex;justify-content:center;align-items:center;flex-direction:column;">
+        <div style="text-align:center;">
+          <label for="version-select" style="font-weight:bold;display:block;">Version</label>
+        </div>
+        <select id="version-select" class="version-dropdown" style="margin:0 auto;display:block;">
+          <option>Loading...</option>
+        </select>
+      </li>
+    </ul>
+  </div>
+</nav>
+<script>
+(function() {
+  var sel = document.getElementById("version-select");
+  var path = window.location.pathname;
+
+  // Derive the docs root by stripping the version segment from the current path.
+  // CI layout: /<prefix>/v0.1.3/api.html -> /<prefix>
+  // Local layout (no version segment): keep path as-is and try root
+  var match = path.match(/^(.*\/)(main|v[\d.]+)\//);
+  var base = match ? match[1].replace(/\/$/, "") : "";
+
+  fetch(base + "/versions.json")
+    .then(function(r) {
+      if (!r.ok) throw new Error(r.status);
+      return r.json();
+    })
+    .then(function(data) {
+      if (!data.versions || data.versions.length === 0) throw new Error("no versions");
+      sel.innerHTML = "";
+      data.versions.forEach(function(v) {
+        var opt = document.createElement("option");
+        opt.value = base + "/" + v.name + "/index.html";
+        opt.textContent = v.name + (v.name === data.latest ? " (latest)" : "");
+        if (path.indexOf("/" + v.name + "/") !== -1) {
+          opt.selected = true;
+        }
+        sel.appendChild(opt);
+      });
+    })
+    .catch(function() {
+      // Local build or versions.json unavailable — show current version label
+      var label = match ? match[2] : "local";
+      sel.innerHTML = "";
+      var opt = document.createElement("option");
+      opt.textContent = label;
+      opt.selected = true;
+      opt.disabled = true;
+      sel.appendChild(opt);
+    });
+  sel.onchange = function() { if (this.value) window.location.href = this.value; };
+})();
+</script>
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.algo.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.algo.rst
index d5a1be05..35b11ab4 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.algo.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.algo.rst
@@ -3,6 +3,11 @@
 
 .. automodule:: embodichain.agents.rl.algo
 
+Overview
+--------
+
+Algorithm registry and algorithm-construction helpers for RL training.
+
    
    .. rubric:: Functions
 
@@ -10,4 +15,9 @@
    
       build_algo
       get_registered_algo_names
+
+.. automodule:: embodichain.agents.rl.algo
+   :members:
+   :undoc-members:
+   :show-inheritance:
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.buffer.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.buffer.rst
index 0a178379..a79f3706 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.buffer.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.buffer.rst
@@ -3,4 +3,35 @@
 
 .. automodule:: embodichain.agents.rl.buffer
 
+Overview
+--------
+
+The ``buffer`` package provides rollout and replay buffer structures used by
+RL algorithms.
+
+.. rubric:: Submodules
+
+.. autosummary::
+
+   standard_buffer
+   utils
+
+.. currentmodule:: embodichain.agents.rl.buffer
+
+Rollout Buffer Classes
+----------------------
+
+.. automodule:: embodichain.agents.rl.buffer.standard_buffer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Buffer Utilities
+----------------
+
+.. automodule:: embodichain.agents.rl.buffer.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.collector.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.collector.rst
new file mode 100644
index 00000000..4fd639ed
--- /dev/null
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.collector.rst
@@ -0,0 +1,33 @@
+embodichain.agents.rl.collector
+================================
+
+.. automodule:: embodichain.agents.rl.collector
+
+Overview
+--------
+
+Collectors are responsible for interacting with vectorized environments and
+assembling rollout data into a preallocated ``TensorDict`` layout.
+
+.. rubric:: Classes
+
+.. autosummary::
+
+   BaseCollector
+   SyncCollector
+
+.. currentmodule:: embodichain.agents.rl.collector
+
+BaseCollector
+-------------
+
+.. autoclass:: BaseCollector
+   :members:
+   :show-inheritance:
+
+SyncCollector
+-------------
+
+.. autoclass:: SyncCollector
+   :members:
+   :show-inheritance:
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.models.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.models.rst
index d74efb22..6de1449a 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.models.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.models.rst
@@ -3,6 +3,11 @@
 
 .. automodule:: embodichain.agents.rl.models
 
+Overview
+--------
+
+Policy-network registration and model construction APIs for RL agents.
+
    
    .. rubric:: Functions
 
@@ -13,4 +18,9 @@
       get_policy_class
       get_registered_policy_names
       register_policy
+
+.. automodule:: embodichain.agents.rl.models
+   :members:
+   :undoc-members:
+   :show-inheritance:
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.rst
index 2fa64a6e..7dda1a38 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.rst
@@ -3,6 +3,12 @@ embodichain.agents.rl
 
 .. automodule:: embodichain.agents.rl
 
+Overview
+--------
+
+The ``embodichain.agents.rl`` package contains algorithm registries, rollout
+collection logic, policy/model builders, and training entry points.
+
    .. rubric:: Submodules
 
    .. autosummary::
@@ -10,6 +16,7 @@ embodichain.agents.rl
 
       algo
       buffer
+      collector
       models
       train
       utils
@@ -30,6 +37,14 @@ Rollout Buffer
    :undoc-members:
    :show-inheritance:
 
+Collectors
+----------
+
+.. automodule:: embodichain.agents.rl.collector
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Policy Models
 -------------
 
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.train.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.train.rst
index 4376c750..7fb189eb 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.train.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.train.rst
@@ -3,6 +3,11 @@
 
 .. automodule:: embodichain.agents.rl.train
 
+Overview
+--------
+
+Training entry points and command-line helpers for launching RL experiments.
+
    
    .. rubric:: Functions
 
@@ -11,4 +16,9 @@
       main
       parse_args
       train_from_config
+
+.. automodule:: embodichain.agents.rl.train
+   :members:
+   :undoc-members:
+   :show-inheritance:
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rl.utils.rst b/docs/source/api_reference/embodichain/embodichain.agents.rl.utils.rst
index 1f2706a5..b00828a3 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rl.utils.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rl.utils.rst
@@ -3,4 +3,42 @@
 
 .. automodule:: embodichain.agents.rl.utils
 
+Overview
+--------
+
+The ``utils`` package contains helper utilities for RL configuration,
+data conversion, and training orchestration.
+
+.. rubric:: Submodules
+
+.. autosummary::
+
+   config
+   helper
+   trainer
+
+Configuration Helpers
+---------------------
+
+.. automodule:: embodichain.agents.rl.utils.config
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+General Helpers
+---------------
+
+.. automodule:: embodichain.agents.rl.utils.helper
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Trainer Utilities
+-----------------
+
+.. automodule:: embodichain.agents.rl.utils.trainer
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.agents.rst b/docs/source/api_reference/embodichain/embodichain.agents.rst
index b5942c7e..6b1e5589 100644
--- a/docs/source/api_reference/embodichain/embodichain.agents.rst
+++ b/docs/source/api_reference/embodichain/embodichain.agents.rst
@@ -48,6 +48,7 @@ Reinforcement Learning
 
       algo
       buffer
+      collector
       models
       train
       utils
diff --git a/docs/source/api_reference/embodichain/embodichain.data.rst b/docs/source/api_reference/embodichain/embodichain.data.rst
new file mode 100644
index 00000000..9d8b0984
--- /dev/null
+++ b/docs/source/api_reference/embodichain/embodichain.data.rst
@@ -0,0 +1,51 @@
+embodichain.data
+================
+
+.. automodule:: embodichain.data
+
+Data Package Overview
+---------------------
+
+The ``embodichain.data`` package centralizes dataset resolution and asset download
+helpers used by simulation tasks and training pipelines.
+
+.. rubric:: Submodules
+
+.. autosummary::
+
+   constants
+   dataset
+   download
+   enum
+
+Constants
+---------
+
+.. automodule:: embodichain.data.constants
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Dataset Resolution
+------------------
+
+.. automodule:: embodichain.data.dataset
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Asset Download CLI
+------------------
+
+.. automodule:: embodichain.data.download
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Enums
+-----
+
+.. automodule:: embodichain.data.enum
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
new file mode 100644
index 00000000..181086c3
--- /dev/null
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
@@ -0,0 +1,89 @@
+embodichain.lab.sim.atomic_actions
+==================================
+
+.. automodule:: embodichain.lab.sim.atomic_actions
+
+   .. rubric:: Classes
+
+   .. autosummary::
+
+      Affordance
+      InteractionPoints
+      ObjectSemantics
+      ActionCfg
+      AtomicAction
+      MoveActionCfg
+      MoveAction
+      PickUpActionCfg
+      PickUpAction
+      PlaceActionCfg
+      PlaceAction
+      AtomicActionEngine
+
+.. currentmodule:: embodichain.lab.sim.atomic_actions
+
+Core
+----
+
+.. autoclass:: Affordance
+    :members:
+    :show-inheritance:
+
+.. autoclass:: InteractionPoints
+    :members:
+    :show-inheritance:
+
+.. autoclass:: ObjectSemantics
+    :members:
+    :show-inheritance:
+
+.. autoclass:: ActionCfg
+    :members:
+    :exclude-members: __init__, copy, replace, to_dict, validate
+
+.. autoclass:: AtomicAction
+    :members:
+    :show-inheritance:
+
+Actions
+-------
+
+.. autoclass:: MoveActionCfg
+    :members:
+    :exclude-members: __init__, copy, replace, to_dict, validate
+    :show-inheritance:
+
+.. autoclass:: MoveAction
+    :members:
+    :show-inheritance:
+
+.. autoclass:: PickUpActionCfg
+    :members:
+    :exclude-members: __init__, copy, replace, to_dict, validate
+    :show-inheritance:
+
+.. autoclass:: PickUpAction
+    :members:
+    :show-inheritance:
+
+.. autoclass:: PlaceActionCfg
+    :members:
+    :exclude-members: __init__, copy, replace, to_dict, validate
+    :show-inheritance:
+
+.. autoclass:: PlaceAction
+    :members:
+    :show-inheritance:
+
+Engine & Registry
+-----------------
+
+.. autoclass:: AtomicActionEngine
+    :members:
+    :show-inheritance:
+
+.. autofunction:: register_action
+
+.. autofunction:: unregister_action
+
+.. autofunction:: get_registered_actions
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.robots.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.robots.rst
index d6428af3..c3457108 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.robots.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.robots.rst
@@ -3,4 +3,23 @@
 
 .. automodule:: embodichain.lab.sim.robots
 
+Overview
+--------
+
+This module exposes robot-specific configuration presets for simulation scenes.
+
+.. rubric:: Classes
+
+.. autosummary::
+
+   CobotMagicCfg
+
+.. currentmodule:: embodichain.lab.sim.robots
+
+.. autoclass:: CobotMagicCfg
+   :members:
+   :inherited-members:
+   :show-inheritance:
+   :exclude-members: __init__, copy, replace, to_dict, validate
+
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.rst
index 2a21fcf0..412f570d 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.rst
@@ -3,21 +3,30 @@
 
 .. automodule:: embodichain.lab.sim
 
-   .. rubric:: Submodules
-
-   .. autosummary::
-      :toctree: .
-
-      sim_manager
-      cfg
-      common
-      material
-      shapes
-      objects
-      sensors
-      planners
-      solvers
-      utility
+Overview
+--------
+
+The ``sim`` package provides simulation-core APIs including scene/object
+management, materials, sensors, planning/IK utilities, and action helpers.
+
+.. rubric:: Submodules
+
+.. autosummary::
+   :toctree: .
+
+   sim_manager
+   cfg
+   common
+   material
+   shapes
+   objects
+   robots
+   sensors
+   solvers
+   planners
+   atomic_actions
+   types
+   utility
 
 .. currentmodule:: embodichain.lab.sim
 
@@ -35,8 +44,8 @@ Simulation Manager
    :show-inheritance:
    :exclude-members: __init__, copy, replace, to_dict, validate
 
-Configurations
-------------------
+Configuration
+-------------
 
 .. automodule:: embodichain.lab.sim.cfg
    :members:
@@ -44,8 +53,8 @@ Configurations
    :show-inheritance:
    :exclude-members: __init__, copy, replace, to_dict, validate
 
-Common Conponents
-------------------
+Common Components
+-----------------
 
 .. automodule:: embodichain.lab.sim.common
    :members:
@@ -53,7 +62,7 @@ Common Conponents
    :show-inheritance:
 
 Materials
-------------------
+---------
 
 .. automodule:: embodichain.lab.sim.material
    :members:
@@ -61,7 +70,7 @@ Materials
    :show-inheritance:
 
 Shapes
-------------------
+------
 
 .. automodule:: embodichain.lab.sim.shapes
    :members:
@@ -69,6 +78,14 @@ Shapes
    :show-inheritance:
    :exclude-members: __init__, copy, replace, to_dict, validate
 
+Atomic Actions
+--------------
+
+.. automodule:: embodichain.lab.sim.atom_actions
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Objects
 -------
 
@@ -85,6 +102,14 @@ Sensors
 
    embodichain.lab.sim.sensors
 
+Robot Configurations
+--------------------
+
+.. automodule:: embodichain.lab.sim.robots
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Solvers
 -------
 
@@ -101,6 +126,21 @@ Planners
 
    embodichain.lab.sim.planners
 
+Atomic Actions
+--------------
+
+.. toctree::
+   :maxdepth: 1
+
+   embodichain.lab.sim.atomic_actions
+Shared Types
+------------
+
+.. automodule:: embodichain.lab.sim.types
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 Utility
 -------
 
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.types.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.types.rst
index 5b1c4bd8..f01bae1f 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.types.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.types.rst
@@ -3,4 +3,27 @@
 
 .. automodule:: embodichain.lab.sim.types
 
+Overview
+--------
+
+Shared tensor/type aliases used across simulation, environment, and policy
+interfaces.
+
+.. rubric:: Type Aliases
+
+.. autosummary::
+
+   Array
+   Device
+   EnvObs
+   EnvAction
+
+.. autodata:: Array
+
+.. autodata:: Device
+
+.. autodata:: EnvObs
+
+.. autodata:: EnvAction
+
    
\ No newline at end of file
diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.utility.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.utility.rst
index f64d3ce3..2e45ea5d 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.utility.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.utility.rst
@@ -3,21 +3,73 @@ embodichain.lab.sim.utility
 
 .. automodule:: embodichain.lab.sim.utility
 
-Utility Functions
------------------
+Overview
+--------
 
-This module contains utility functions for simulation, mesh processing, and URDF handling.
+This package contains helper utilities for simulation state conversion,
+mesh/geometry handling, configuration transforms, keyboard interaction, and
+action/solver adaptation.
 
 .. rubric:: Submodules
 
 .. autosummary::
 
+   action_utils
+   atom_action_utils
+   cfg_utils
+   gizmo_utils
+   import_utils
+   io_utils
+   keyboard_utils
    sim_utils
    mesh_utils
-   urdf_utils
+   solver_utils
+   tensor
 
 .. currentmodule:: embodichain.lab.sim.utility
 
+Action Utilities
+~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.action_utils
+   :members:
+
+Atomic Action Utilities
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.atom_action_utils
+   :members:
+
+Configuration Utilities
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.cfg_utils
+   :members:
+
+Gizmo Utilities
+~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.gizmo_utils
+   :members:
+
+Import Utilities
+~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.import_utils
+   :members:
+
+I/O Utilities
+~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.io_utils
+   :members:
+
+Keyboard Utilities
+~~~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.keyboard_utils
+   :members:
+
 Simulation Utils
 ~~~~~~~~~~~~~~~~
 
@@ -29,3 +81,15 @@ Mesh Utils
 
 .. automodule:: embodichain.lab.sim.utility.mesh_utils
    :members:
+
+Solver Utilities
+~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.solver_utils
+   :members:
+
+Tensor Utilities
+~~~~~~~~~~~~~~~~
+
+.. automodule:: embodichain.lab.sim.utility.tensor
+   :members:
diff --git a/docs/source/api_reference/embodichain/embodichain.utils.rst b/docs/source/api_reference/embodichain/embodichain.utils.rst
index 490962ce..c4d131a1 100644
--- a/docs/source/api_reference/embodichain/embodichain.utils.rst
+++ b/docs/source/api_reference/embodichain/embodichain.utils.rst
@@ -3,13 +3,16 @@
 
 .. automodule:: embodichain.utils
 
-   .. Rubric:: Submodules
+   .. rubric:: Submodules
 
    .. autosummary::
 
       warp
+      cfg
       configclass
+      device_utils
       file
+      img_utils
       logger
       math
       module_utils
diff --git a/docs/source/api_reference/index.rst b/docs/source/api_reference/index.rst
index fa3112ae..f73a7480 100644
--- a/docs/source/api_reference/index.rst
+++ b/docs/source/api_reference/index.rst
@@ -1,7 +1,16 @@
 API Reference
 =============
 
-This page provides detailed documentation for all EmbodiChain modules and classes.
+This section provides the API-level documentation for EmbodiChain's public Python
+modules.
+
+Use this reference when you need:
+
+* module-level overviews and responsibilities,
+* public classes, functions, and configuration objects,
+* links into specialized subpackages (simulation, gym environments, RL, and utilities).
+
+The pages are organized from high-level package namespaces to concrete submodules.
 
 Core Framework
 --------------
@@ -14,6 +23,7 @@ The following modules are available in the core ``embodichain`` framework:
    :toctree: embodichain
 
    agents
+   data
    lab
    toolkits
    utils
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 59145215..a0b23064 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -41,7 +41,6 @@
     "sphinx_design",
     "myst_parser",  # if you prefer Markdown pages
     "sphinx_copybutton",
-    "sphinx_multiversion",
 ]
 # Napoleon settings if using Google/NumPy docstring style:
 napoleon_google_docstring = True
@@ -65,17 +64,45 @@
 exclude_patterns = []
 
 
+# -- Version selector sidebar ---------------------------------------------------
+html_sidebars = {
+    "**": [
+        "navbar-logo.html",
+        "versioning.html",
+        "search-field.html",
+        "sbt-sidebar-nav.html",
+    ]
+}
+
+
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
 html_theme = "sphinx_book_theme"
 html_static_path = ["_static"]
+# Don't include version-redirect.js automatically - we add it manually to root
+html_js_files = []
 # html_logo = "_static/logo_e.png"
 
-# -- sphinx-multiversion configuration -------------------------------------------------
-# Only build tags that look like v1.0.0 or branches like main/dev
-smv_tag_whitelist = r"^v\d+\.\d+\.\d+$"
-smv_branch_whitelist = r"^(main|dev)$"
-smv_remote_whitelist = r"^origin$"
-smv_released_pattern = r"^tags/v\d+\.\d+\.\d+$"
-smv_outputdir_format = "{ref.name}"
+# Configure HTML base URL for better local previewing
+# Use empty string to use relative paths from the build directory
+html_baseurl = ""
+
+# HTML context for better path handling
+html_context = {
+    "github_user": "dexforce",
+    "github_repo": "EmbodiChain",
+    "github_version": "main",
+    "doc_path": "docs/source",
+}
+
+html_theme_options = {
+    "title": "EmbodiChain",
+    "logo_only": False,
+    "show_toc_level": 2,
+    "collapse_navigation": True,
+    "sticky_navigation": True,
+    "navigation_depth": 4,
+    "includehidden": True,
+    "prev_next_buttons_location": "bottom",
+}
diff --git a/docs/source/features/agents.md b/docs/source/features/generative_sim/agents.md
similarity index 93%
rename from docs/source/features/agents.md
rename to docs/source/features/generative_sim/agents.md
index 7cb2356d..5c75fee5 100644
--- a/docs/source/features/agents.md
+++ b/docs/source/features/generative_sim/agents.md
@@ -1,4 +1,4 @@
-# EmbodiAgent
+# EmbodiAgent（aborted）
 
 EmbodiAgent is a hierarchical multi-agent system that enables robots to perform complex manipulation tasks through closed-loop planning, code generation, and validation. The system combines vision-language models (VLMs) and large language models (LLMs) to translate high-level goals into executable robot actions.
 
@@ -164,3 +164,12 @@ embodichain/agents/
 │   └── prompt/                # Prompt templates (LangChain)
 └── prompts/                   # Agent prompt templates
 ```
+
+---
+
+## See Also
+
+- [Online Data Streaming](../online_data.md) — Streaming live simulation data for training
+- [RL Architecture](../../overview/rl/index.rst) — RL training pipeline and algorithms
+- [Atomic Actions Tutorial](../../tutorial/atomic_actions.rst) — Action primitives used by the CodeAgent
+- [Supported Tasks](../../resources/task/index.rst) — Available task environments
diff --git a/docs/source/features/generative_sim/index.rst b/docs/source/features/generative_sim/index.rst
new file mode 100644
index 00000000..1f7c759f
--- /dev/null
+++ b/docs/source/features/generative_sim/index.rst
@@ -0,0 +1,9 @@
+Generative Simulation
+=====================
+
+Generative Simulation collects EmbodiChain features for generating simulation-ready assets and executing agent-driven task workflows.
+
+.. toctree::
+   :maxdepth: 2
+
+   SimReady Asset Pipeline <simready_pipeline.md>
diff --git a/docs/source/features/generative_sim/simready_pipeline.md b/docs/source/features/generative_sim/simready_pipeline.md
new file mode 100644
index 00000000..58aa9cf1
--- /dev/null
+++ b/docs/source/features/generative_sim/simready_pipeline.md
@@ -0,0 +1,224 @@
+# SimReady Asset Pipeline
+
+The SimReady asset pipeline converts raw mesh archives into normalized simulation assets. It ingests a source mesh, preserves or bakes visual materials, cleans mesh topology, estimates real-world scale and semantics with multimodal LLMs, and exports assets that can be loaded directly in EmbodiChain simulations.
+
+## Quick Start
+
+Run the pipeline on a single asset directory:
+
+```bash
+python -m embodichain.gen_sim.simready_pipeline.cli.start \
+    --input_dir /path/to/raw_mesh_folder \
+    --output_root /path/to/output_folder \
+    --category YourCategory
+```
+
+Preview the generated SimReady mesh:
+
+```bash
+python -m embodichain preview-asset \
+    --asset_path /path/to/sim_ready_asset_or_usd_asset \
+    --asset_type rigid
+```
+
+## Prerequisites
+
+The full pipeline uses Blender, trimesh, pyrender, and an OpenAI-compatible multimodal chat completions endpoint. Install EmbodiChain with the `gensim` extra and enable both the EmbodiChain package index and Blender package index.
+
+Install from PyPI with `uv`:
+
+```bash
+uv pip install "embodichain[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
+```
+
+Install from source with `uv`:
+
+```bash
+git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+uv pip install -e ".[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
+```
+
+Install from PyPI with `pip`:
+
+```bash
+pip install "embodichain[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
+```
+
+Install from source with `pip`:
+
+```bash
+git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+pip install -e ".[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
+```
+
+Set the OpenAI-compatible LLM api(OpenAI, Gemini, Doubao, etc.) before running the pipeline, or configure them in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. Environment variables override the JSON config.
+
+OpenAI-compatible API example:
+
+```bash
+export OPENAI_API_KEY="your-openai-api-key"
+export OPENAI_MODEL="gpt-4o"
+export OPENAI_BASE_URL="https://api.openai.com/v1"
+```
+
+## Processing Flow
+
+The command above runs the full parser sequence:
+
+- **Ingest**: finds the first parseable mesh (`.glb`, `.gltf`, `.obj`, `.ply`, `.stl`), archives the raw input, and writes a canonical `asset_source/asset.obj`.
+- **Visual processing**: by default, Blender remeshes the source mesh, unwraps UVs, and bakes diffuse and normal textures. With `--simple`, ingest uses trimesh only and skips Blender remesh/bake.
+- **Inspection**: detects whether the normalized source is a mesh, articulation, or scene.
+- **Geometry processing**: cleans topology and applies Blender decimation to the canonical mesh.
+- **SimReady finalization**: renders multi-view images, uses the LLM to infer object orientation, physical dimensions, and semantics, then exports `asset_simready/asset_simready.obj`.
+- **Physics and USD export**: infers physics properties and writes a USD package when possible.
+- **Internal preview assets**: generates thumbnails and internal metadata for asset browsing.
+
+## Output Layout
+
+Each processed asset is written under a generated asset ID:
+
+```text
+simready_car/
+`-- <asset_id>/
+    |-- asset_archive/          # Raw source directory copy
+    |-- asset_source/           # Canonical normalized source mesh and textures
+    |   |-- asset.obj
+    |   |-- asset.mtl
+    |   |-- diffuse.png
+    |   `-- normal.png
+    |-- asset_simready/         # Final oriented and scaled mesh
+    |   `-- asset_simready.obj
+    |-- asset_usd/              # USD export
+    `-- asset.json              # Metadata, geometry, semantics, physics, and paths
+```
+
+Use `asset_simready/asset_simready.obj` or `asset_usd/` for simulation preview and downstream scene construction.
+
+## Command-Line Arguments
+
+| Argument | Description | Default |
+| :--- | :--- | :--- |
+| `--input_dir` | Directory containing the raw asset files. | **required** |
+| `--output_root` | Directory where processed assets are written. | **required** |
+| `--category` | Category hint passed into the pipeline, such as `car`, `bowl`, or `chair`. | **required** |
+| `--simple` | Use trimesh-only ingest and skip Blender remesh/bake during ingest. Geometry cleanup later in the pipeline still uses Blender. | `False` |
+
+## Configuration
+
+Pipeline hyperparameters live in `embodichain/gen_sim/simready_pipeline/configs/gen_config.json`. The main hyperparameters are as follow:
+
+### Mesh Processing
+
+```json
+"mesh_processing": {
+  "blender_remesh_bake": {
+    "remesh": {
+      "voxel_size": 0.01,
+      "min_voxel_size_ratio": 0.005,
+      "use_smooth_shade": true
+    },
+    "decimate": {
+      "ratio": 0.9
+    },
+    "uv": {
+      "angle_limit": 66.0,
+      "island_margin": 0.02
+    },
+    "bake": {
+      "texture_size": 2048,
+      "cage_extrusion_ratio": 0.05
+    }
+  },
+  "blender_cleanup_decimate": {
+    "enabled": true,
+    "cleanup": {
+      "merge_dist": 0.00001,
+      "remove_non_manifold": true,
+      "triangulate": false
+    },
+    "simplify": {
+      "ratio": 0.5,
+      "weld_distance": 0.0001,
+      "collapse_triangulate": true
+    }
+  },
+}
+```
+
+`blender_remesh_bake` controls the default ingest path when `--simple` is not provided. It remeshes the raw mesh, decimates it, unwraps UVs, and bakes textures.
+
+`blender_cleanup_decimate` controls the later geometry parser stage. It uses Blender mesh operators and the Blender Decimate modifier to clean and simplify the canonical mesh.
+
+
+### LLM
+
+```json
+"llm": {
+  "openai_compatible": {
+    "api_key": "",
+    "model": "gpt-4o",
+    "base_url": "https://api.openai.com/v1",
+    "default_query": {}
+  }
+}
+```
+
+This section configures the multimodal LLM used for object classification, orientation selection, dimension inference, semantic annotation, and physics inference. Any provider that supports the OpenAI-compatible chat completions API can be used by changing `api_key`, `model`, `base_url`, and optional `default_query` parameters.
+
+For Azure-style OpenAI-compatible endpoints that require an API version query parameter, use `default_query`:
+
+```json
+"llm": {
+  "openai_compatible": {
+    "api_key": "your-api-key",
+    "model": "gpt-4o",
+    "base_url": "your_api",
+    "default_query": {
+      "api-version": "2025-01-01-preview"
+    }
+  }
+}
+```
+
+## Default vs Simple Ingest
+
+The default command uses Blender during ingest:
+
+```bash
+python -m embodichain.gen_sim.simready_pipeline.cli.start \
+    --input_dir /path/to/raw_mesh_folder \
+    --output_root /path/to/output_folder \
+    --category YourCategory
+```
+
+Use `--simple` when you want faster trimesh-only ingest:
+
+```bash
+python -m embodichain.gen_sim.simready_pipeline.cli.start \
+    --input_dir /path/to/raw_mesh_folder \
+    --output_root /path/to/output_folder \
+    --category YourCategory \
+    --simple
+```
+
+The simple mode only affects the ingest step. The downstream geometry parser still uses Blender cleanup and decimation unless `mesh_processing.blender_cleanup_decimate.enabled` is set to `false`.
+
+## See Also
+
+- [Asset Preview](../interaction/preview_asset.md): Load generated meshes and USD assets in the simulator.
+- [Installation](../../quick_start/install.md): Install EmbodiChain with Blender and rendering dependencies.
+- [Toolkits](../toolkits/index.rst): Other asset preparation utilities.
diff --git a/docs/source/features/interaction/preview_asset.md b/docs/source/features/interaction/preview_asset.md
index 4dc2c4be..df3aa040 100644
--- a/docs/source/features/interaction/preview_asset.md
+++ b/docs/source/features/interaction/preview_asset.md
@@ -75,7 +75,7 @@ asset.set_root_pose(pos=[0, 0, 1.0], rot=[0, 0, 0])
 | `--fix_base`         | Fix the base of articulations                                      | `True`               |
 | `--sim_device`       | Simulation device                                                  | `cpu`                |
 | `--headless`         | Run without rendering window                                       | `False`              |
-| `--enable_rt`        | Enable ray tracing                                                 | `False`              |
+| `--renderer`         | Renderer backend: `hybrid`, `fast-rt` or `rt`            | `hybrid`             |
 | `--preview`          | Enter interactive embed mode after loading                         | `False`              |
 
 ## Examples
diff --git a/docs/source/features/interaction/window.md b/docs/source/features/interaction/window.md
index 6c512186..e19b0da0 100644
--- a/docs/source/features/interaction/window.md
+++ b/docs/source/features/interaction/window.md
@@ -9,6 +9,7 @@ The simulation window comes with a set of default controls that enable users to
 | Events                        | Description                                                                                                                                      |
 |---------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------|
 | **Raycast Information Display** | Press the right mouse button to select a point and the 'C' key to print the raycast distance and hit position of a surface (world coordinates) to the console. Useful for debugging and checking the position of objects in the simulation. |
+| **Viewer recording (toggle)** | Press **`r`** to **start** recording what the interactive viewer shows, and press **`r`** again to **stop** and save as MP4 videos. Recording uses a hidden camera that follows the live viewer camera pose, so the exported videos match the on-screen view. Useful for debugging and recording the demos.|
 
 > **Note:** We will add more interaction features in future releases. Stay tuned for updates!
 
diff --git a/docs/source/features/online_data.md b/docs/source/features/online_data.md
index c186aef6..4c016633 100644
--- a/docs/source/features/online_data.md
+++ b/docs/source/features/online_data.md
@@ -143,3 +143,11 @@ It shows item mode, batch mode, and dynamic chunk sizes. Run it with:
 ```bash
 python examples/agents/datasets/online_dataset_demo.py
 ```
+
+---
+
+## See Also
+
+- [EmbodiAgent](generative_sim/agents.md) — Hierarchical agent that uses online data for training
+- [RL Architecture](../overview/rl/index.rst) — RL training pipeline
+- [Data Generation Tutorial](../tutorial/data_generation.rst) — Generating offline datasets
diff --git a/docs/source/features/toolkits/grasp_generator.rst b/docs/source/features/toolkits/grasp_generator.rst
index ba77e77b..7fde03b9 100644
--- a/docs/source/features/toolkits/grasp_generator.rst
+++ b/docs/source/features/toolkits/grasp_generator.rst
@@ -24,7 +24,7 @@ The Code Explained
 Configuring the simulation
 --------------------------
 
-Command-line arguments are parsed with ``argparse`` to select the number of parallel environments, the compute device, and optional rendering features such as ray tracing and headless mode.
+Command-line arguments are parsed with ``argparse`` to select the number of parallel environments, the compute device, and optional rendering features such as renderer backend and headless mode.
 
 .. literalinclude:: ../../../../scripts/tutorials/grasp/grasp_generator.py
    :language: python
@@ -109,6 +109,12 @@ Configuring GraspGeneratorCfg
    * - ``max_deviation_angle``
      - ``π / 12``
      - Maximum allowed angle (in radians) between the specified approach direction and the axis connecting an antipodal point pair. Pairs that deviate more than this threshold are discarded.
+   * - ``is_partial_annotate``
+     - ``True``
+     - When ``True``, the annotator allows selecting a partial region of the mesh for grasp sampling. If ``False``, the entire mesh is used.
+   * - ``is_filter_ground_collision``
+     - ``True``
+     - Whether to filter out grasp poses that would cause the gripper to  collide.
 
 The ``antipodal_sampler_cfg`` field accepts an :class:`~embodichain.toolkits.graspkit.pg_grasp.AntipodalSamplerCfg` instance, which controls how antipodal point pairs are sampled on the mesh surface.
 
@@ -185,7 +191,7 @@ You can customize the run with additional arguments:
 
 .. code-block:: bash
 
-   python scripts/tutorials/grasp/grasp_generator.py --num_envs <n> --device <cuda/cpu> --enable_rt --headless
+   python scripts/tutorials/grasp/grasp_generator.py --num_envs <n> --device <cuda/cpu> --renderer <legacy|hybrid|fast-rt|rt> --headless
 
 After confirming the grasp region in the browser, the script will compute a grasp pose, print the elapsed time, and then wait for you to press **Enter** before executing the full grasp trajectory in the simulation. Press **Enter** again to exit once the motion is complete.
 
diff --git a/docs/source/features/toolkits/urdf_assembly.md b/docs/source/features/toolkits/urdf_assembly.md
index 76f48ddb..dd504956 100644
--- a/docs/source/features/toolkits/urdf_assembly.md
+++ b/docs/source/features/toolkits/urdf_assembly.md
@@ -18,7 +18,7 @@ The tool provides a programmatic way to:
 ```python
 from pathlib import Path
 import numpy as np
-from embedichain.toolkits.urdf_assembly import URDFAssemblyManager
+from embodichain.toolkits.urdf_assembly import URDFAssemblyManager
 
 # Initialize the assembly manager
 manager = URDFAssemblyManager()
@@ -201,6 +201,72 @@ Get all attached sensors.
 manager.get_attached_sensors() -> dict
 ```
 
+##### Component name prefixes (`component_prefix`)
+
+`URDFAssemblyManager` uses `component_prefix` to configure name prefixes for
+each supported component type. This attribute is a list of 2-tuples:
+
+- Form: `[(component_name, prefix), ...]`
+- The default value is:
+
+    ```python
+    [
+        ("chassis", None),
+        ("legs", None),
+        ("torso", None),
+        ("head", None),
+        ("left_arm", "left_"),
+        ("right_arm", "right_"),
+        ("left_hand", "left_"),
+        ("right_hand", "right_"),
+        ("arm", None),
+        ("hand", None),
+    ]
+    ```
+
+You can configure it in a *patch-style* manner via the property:
+
+```python
+# Only override prefixes for existing components; do not introduce
+# new component names.
+manager.component_prefix = [
+    ("left_arm", "L_"),
+    ("right_arm", "R_"),
+    ("left_hand", "L_"),
+    ("right_hand", "R_"),
+]
+```
+
+Semantics:
+
+- Only components that already exist in the default configuration (e.g. `chassis/torso/left_arm/...`) may be overridden; new component names are not allowed.
+- Components not listed in `new_prefixes` keep their original prefix.
+- If `new_prefixes` contains an unknown component name, a `ValueError` is raised indicating that new component types cannot be introduced.
+
+##### Name casing policy (`name_case`)
+
+`URDFAssemblyManager` supports a global name casing policy that controls how
+link and joint names are normalized during assembly. This is configured on
+the manager instance after construction:
+
+```python
+manager = URDFAssemblyManager()
+manager.name_case = {
+        "joint": "upper",  # or "lower" / "none"
+        "link": "lower",  # or "upper" / "none"
+}
+
+Semantics:
+
+- Valid keys: `"joint"`, `"link"`.
+- Valid values: `"upper"`, `"lower"`, `"none"`.
+- Default behavior matches the legacy implementation:
+  - joints are normalized to **UPPERCASE**,
+  - links are normalized to **lowercase**.
+- This policy is propagated to the internal component and connection managers,
+    and is also included in the assembly signature. Changing `name_case` will
+    therefore force a rebuild of the assembled URDF.
+
 ## Using with URDFCfg for Robot Creation
 
 The URDF Assembly Tool can be used directly with `URDFCfg` to create robots with multiple components in the simulation. This is the recommended approach when building robots from assembled URDF files.
@@ -210,7 +276,7 @@ The URDF Assembly Tool can be used directly with `URDFCfg` to create robots with
 The `URDFCfg` class provides a convenient way to define multi-component robots:
 
 ```python
-from embedichain.lab.sim.cfg import RobotCfg, URDFCfg
+from embodichain.lab.sim.cfg import RobotCfg, URDFCfg
 
 cfg = RobotCfg(
     uid="my_robot",
@@ -232,6 +298,27 @@ cfg = RobotCfg(
 )
 ```
 
+When using `URDFCfg` to build multi-component robots, you can pass custom
+component prefixes to the internal `URDFAssemblyManager` via
+`URDFCfg.component_prefix`. Its semantics are identical to
+`URDFAssemblyManager.component_prefix`:
+
+- Each element is a `(component_name, prefix)` tuple.
+- Only prefixes for components that exist in the default configuration may be overridden; no new component names can be added.
+- Components not explicitly listed keep their original prefix.
+
+Example:
+
+```python
+urdf_cfg = URDFCfg(
+    components=[...],
+)
+urdf_cfg.component_prefix = [
+    ("left_arm", "L_"),
+    ("right_arm", "R_"),
+]
+```
+
 ### Complete Example
 
 Here's a complete example from `scripts/tutorials/sim/create_robot.py`:
@@ -241,14 +328,14 @@ import numpy as np
 import torch
 from scipy.spatial.transform import Rotation as R
 
-from embedichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embedichain.lab.sim.objects import Robot
-from embedichain.lab.sim.cfg import (
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.objects import Robot
+from embodichain.lab.sim.cfg import (
     JointDrivePropertiesCfg,
     RobotCfg,
     URDFCfg,
 )
-from embedichain.data import get_data_path
+from embodichain.data import get_data_path
 
 
 def create_robot(sim):
@@ -269,7 +356,6 @@ def create_robot(sim):
     # Define transformation for hand attachment
     hand_transform = np.eye(4)
     hand_transform[:3, :3] = R.from_rotvec([90, 0, 0], degrees=True).as_matrix()
-    hand_transform[2, 3] = 0.02  # 2cm offset along z-axis
 
     # Create robot configuration
     cfg = RobotCfg(
@@ -300,6 +386,86 @@ def create_robot(sim):
     return robot
 
 
+# Initialize simulation and create robot
+sim = SimulationManager(SimulationManagerCfg(headless=True, num_envs=4))
+robot = create_robot(sim)
+print(f"Robot created with {robot.dof} joints")
+```
+
+```python
+import numpy as np
+import torch
+from scipy.spatial.transform import Rotation as R
+
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.objects import Robot
+from embodichain.lab.sim.cfg import (
+    JointDrivePropertiesCfg,
+    RobotCfg,
+    URDFCfg,
+)
+from embodichain.data import get_data_path
+
+
+def create_robot(sim):
+    """Create and configure a robot with arm and hand components."""
+
+    # Get URDF paths for robot components
+    arm_urdf_path = get_data_path("Rokae/SR5/SR5.urdf")
+    hand_urdf_path = get_data_path(
+        "BrainCoHandRevo1/BrainCoLeftHand/BrainCoLeftHand.urdf"
+    )
+
+    # Define transformation for hand attachment
+    hand_transform = np.eye(4)
+    hand_transform[:3, :3] = R.from_rotvec([90, 0, 0], degrees=True).as_matrix()
+
+    left_arm_base_xpos = np.eye(4)
+    left_arm_base_xpos[1, 3] = 0.3
+
+    right_arm_base_xpos = np.eye(4)
+    right_arm_base_xpos[1, 3] = -0.3
+
+    # Create robot configuration
+    cfg = RobotCfg(
+        uid="dual_sr5",
+        urdf_cfg=URDFCfg(
+            components=[
+                {
+                    "component_type": "left_arm",
+                    "urdf_path": arm_urdf_path,
+                    "transform": left_arm_base_xpos,
+                },
+                {
+                    "component_type": "right_arm",
+                    "urdf_path": arm_urdf_path,
+                    "transform": right_arm_base_xpos,
+                },
+                {
+                    "component_type": "left_hand",
+                    "urdf_path": hand_urdf_path,
+                    "transform": hand_transform,
+                },
+                {
+                    "component_type": "right_hand",
+                    "urdf_path": hand_urdf_path,
+                    "transform": hand_transform,
+                },
+            ],
+            component_prefix=[("left_arm", "L_"), ("right_arm", "R_"), ("left_hand", "left_"), ("right_hand", "right_")],
+            name_case={
+                "joint": "lower",
+                "link": "lower",
+            }
+        ),
+    )
+
+    # Add robot to simulation
+    robot: Robot = sim.add_robot(cfg=cfg)
+
+    return robot
+
+
 # Initialize simulation and create robot
 sim = SimulationManager(SimulationManagerCfg(headless=True, num_envs=4))
 robot = create_robot(sim)
diff --git a/docs/source/guides/add_robot.rst b/docs/source/guides/add_robot.rst
index d58740a1..f437fd0b 100644
--- a/docs/source/guides/add_robot.rst
+++ b/docs/source/guides/add_robot.rst
@@ -1,563 +1,54 @@
-.. _tutorial_add_robot:
+.. _guide_add_robot:
 
-Adding a New Robot
-==================
+Adding a New Robot — Quick Reference
+=====================================
 
-.. currentmodule:: embodichain.lab.sim.robots
+This guide provides a checklist and key reference for adding a new robot to EmbodiChain. For the full step-by-step walkthrough with code examples, see :doc:`/tutorial/add_robot`.
 
-This tutorial guides you through adding a new robot to EmbodiChain. You'll learn the file structure, key components, and patterns used for robot definitions.
+Checklist
+---------
 
-EmbodiChain supports two approaches for defining robots:
+1. **Prepare the URDF** — Place your URDF file (and associated meshes) in the robot assets directory.
+2. **Create the config class** — Inherit from ``RobotCfg``, implement ``from_dict`` and ``_build_default_cfgs``.
+3. **Define control parts** — Group joints into logical sets (e.g., ``arm``, ``gripper``).
+4. **Configure IK solver** — Choose ``OPWSolverCfg``, ``SRSSolverCfg``, or a generic ``SolverCfg``.
+5. **Set drive properties** — Configure stiffness, damping, and max effort per joint group.
+6. **Implement** ``build_pk_serial_chain`` — Required for PyTorch-Kinematics IK support.
+7. **Register in** ``embodichain/lab/sim/robots/__init__.py``.
+8. **Add documentation** — Create ``docs/source/resources/robot/my_robot.md`` and update ``resources/robot/index.rst``.
+9. **Test** — Add a ``__main__`` block or use the ``preview-asset`` CLI to verify.
 
-1. **Single-file approach**: For simpler robots (like ``CobotMagic``)
-2. **Package approach**: For complex robots with multiple variants (like ``DexforceW1``)
+Approaches
+----------
 
-Choose the approach based on your robot's complexity.
+- **Single-file** (simple robots): One ``my_robot.py`` with everything.
+- **Package** (complex robots): Directory with ``types.py``, ``params.py``, ``utils.py``, ``cfg.py``, ``__init__.py``.
 
----
-
-Prerequisites
-~~~~~~~~~~~~~~
-
-Before adding a new robot, ensure you have:
-
-- URDF file(s) for your robot
-- Robot's kinematic parameters (DH parameters or joint limits)
-- Understanding of your robot's joint structure and control parts
-
----
-
-Approach 1: Single-File Robot (Simple Robots)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Use this approach for robots with a single variant and straightforward configuration.
-
-File: ``embodichain/lab/sim/robots/my_robot.py``
-
-.. dropdown:: Complete Example: CobotMagic-style Robot
-   :icon: code
-
-   .. literalinclude:: ../../../embodichain/lab/sim/robots/cobotmagic.py
-      :language: python
-      :linenos:
-
-Step-by-Step Guide
-------------------
-
-1. **Create the configuration class** inheriting from ``RobotCfg``:
-
-   .. code-block:: python
-
-      from __future__ import annotations
-
-      from typing import Dict, List, Any
-      import numpy as np
-
-      from embodichain.lab.sim.cfg import (
-          RobotCfg,
-          URDFCfg,
-          JointDrivePropertiesCfg,
-          RigidBodyAttributesCfg,
-      )
-      from embodichain.lab.sim.solvers import SolverCfg, OPWSolverCfg
-      from embodichain.lab.sim.utility.cfg_utils import merge_robot_cfg
-      from embodichain.data import get_data_path
-      from embodichain.utils import configclass
-
-      @configclass
-      class MyRobotCfg(RobotCfg):
-          urdf_cfg: URDFCfg = None
-          control_parts: Dict[str, List[str]] | None = None
-          solver_cfg: Dict[str, "SolverCfg"] | None = None
-
-2. **Implement the ``from_dict`` class method** for flexible initialization:
-
-   .. code-block:: python
-
-      @classmethod
-      def from_dict(cls, init_dict: Dict[str, Any]) -> "MyRobotCfg":
-          cfg = cls()
-          default_cfgs = cls()._build_default_cfgs()
-          for key, value in default_cfgs.items():
-              setattr(cfg, key, value)
-          cfg = merge_robot_cfg(cfg, init_dict)
-          return cfg
-
-3. **Define ``_build_default_cfgs``** with your robot's defaults:
-
-   .. code-block:: python
-
-      @staticmethod
-      def _build_default_cfgs() -> Dict[str, Any]:
-          # URDF path
-          urdf_path = get_data_path("MyRobot/my_robot.urdf")
-
-          # URDF configuration (for multi-component robots)
-          urdf_cfg = URDFCfg(
-              components=[
-                  {
-                      "component_type": "arm",
-                      "urdf_path": urdf_path,
-                      "transform": np.eye(4),  # 4x4 transform matrix
-                  },
-              ]
-          )
-
-          # Control parts - group joints for control
-          control_parts = {
-              "arm": [
-                  "JOINT1", "JOINT2", "JOINT3",
-                  "JOINT4", "JOINT5", "JOINT6",
-              ],
-              "gripper": ["JOINT7", "JOINT8"],
-          }
-
-          # Solver configuration for IK
-          solver_cfg = {
-              "arm": OPWSolverCfg(
-                  end_link_name="link6",
-                  root_link_name="base_link",
-                  tcp=np.array([...]),  # Tool center point transform
-              ),
-          }
-
-          # Drive properties - joint physics parameters
-          drive_pros = JointDrivePropertiesCfg(
-              stiffness={
-                  "JOINT[1-6]": 7e4,  # Regex pattern for joints 1-6
-                  "JOINT[7-8]": 3e2,
-              },
-              damping={
-                  "JOINT[1-6]": 1e3,
-                  "JOINT[7-8]": 3e1,
-              },
-              max_effort={
-                  "JOINT[1-6]": 3e6,
-                  "JOINT[7-8]": 3e3,
-              },
-          )
-
-          return {
-              "uid": "MyRobot",
-              "urdf_cfg": urdf_cfg,
-              "control_parts": control_parts,
-              "solver_cfg": solver_cfg,
-              "drive_pros": drive_pros,
-              "attrs": RigidBodyAttributesCfg(
-                  mass=0.1,
-                  static_friction=0.95,
-                  dynamic_friction=0.9,
-                  linear_damping=0.7,
-                  angular_damping=0.7,
-              ),
-          }
-
-4. **Implement ``build_pk_serial_chain``** for PyTorch-Kinematics:
-
-   .. code-block:: python
-
-      def build_pk_serial_chain(
-          self, device: torch.device = torch.device("cpu"), **kwargs
-      ) -> Dict[str, "pk.SerialChain"]:
-          from embodichain.lab.sim.utility.solver_utils import (
-              create_pk_chain,
-              create_pk_serial_chain,
-          )
-
-          urdf_path = get_data_path("MyRobot/my_robot.urdf")
-          chain = create_pk_chain(urdf_path, device)
-
-          arm_chain = create_pk_serial_chain(
-              chain=chain,
-              end_link_name="link6",
-              root_link_name="base_link"
-          ).to(device=device)
-
-          return {"arm": arm_chain}
-
-5. **Register in** ``embodichain/lab/sim/robots/__init__.py``:
-
-   .. code-block:: python
-
-      from .my_robot import MyRobotCfg
-
----
-
-Approach 2: Package-Based Robot (Complex Robots)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Use this approach for robots with multiple variants (e.g., different arm types, versions, or configurations).
-
-File Structure
+Key Parameters
 --------------
 
-For complex robots, create a package directory:
-
-.. code-block::
-
-   robots/
-   └── my_robot/
-       ├── __init__.py      # Exports the main config class
-       ├── types.py         # Enums for robot variants
-       ├── params.py        # Kinematics parameters
-       ├── utils.py         # Manager classes and builders
-       └── cfg.py           # Main configuration class
-
-Step-by-Step Guide
------------------
-
-1. **types.py** - Define enums for robot variants:
-
-   .. code-block:: python
-
-      from enum import Enum
-
-      class MyRobotVersion(Enum):
-          V010 = "v010"
-          V020 = "v020"
-
-      class MyRobotArmKind(Enum):
-          STANDARD = "standard"
-          EXTENDED = "extended"
-
-      class MyRobotSide(Enum):
-          LEFT = "left"
-          RIGHT = "right"
-
-2. **params.py** - Define kinematics parameters:
-
-   .. code-block:: python
-
-      from dataclasses import dataclass
-      import numpy as np
-      from typing import Optional
-
-      @dataclass
-      class MyRobotArmKineParams:
-          arm_side: MyRobotSide
-          arm_kind: MyRobotArmKind
-          version: MyRobotVersion
-
-          dh_params: np.ndarray = None  # DH parameters (N x 4)
-          qpos_limits: np.ndarray = None  # Joint limits (N x 2)
-          link_lengths: np.ndarray = None  # Link lengths
-          T_b_ob: np.ndarray = None  # Base to origin transform
-          T_e_oe: np.ndarray = None  # End-effector transform
-
-3. **utils.py** - Manager classes and builder functions:
-
-   .. code-block:: python
-
-      class ArmManager:
-          """Manages arm URDF and configuration."""
-          pass
-
-      def build_my_robot_assembly_urdf_cfg(...):
-          """Build URDF assembly from components."""
-          pass
-
-      def build_my_robot_cfg(...):
-          """Build complete robot configuration."""
-          pass
-
-4. **cfg.py** - Main configuration class:
-
-   .. code-block:: python
-
-      @configclass
-      class MyRobotCfg(RobotCfg):
-          version: MyRobotVersion = MyRobotVersion.V010
-          arm_kind: MyRobotArmKind = MyRobotArmKind.STANDARD
-
-          @classmethod
-          def from_dict(cls, init_dict: Dict) -> "MyRobotCfg":
-              # Implementation similar to single-file approach
-              pass
-
-5. **__init__.py** - Export the config:
-
-   .. code-block:: python
-
-      from .cfg import MyRobotCfg
-
-6. **Register in** ``robots/__init__.py``:
-
-   .. code-block:: python
-
-      from .my_robot import *
-
----
-
-Key Configuration Parameters
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Regardless of the approach, your robot config needs these core parameters:
-
-+---------------------+------------------------+----------------------------------+
-| Parameter           | Type                   | Description                      |
-+=====================+========================+==================================+
-| ``uid``             | str                    | Unique robot identifier         |
-+---------------------+------------------------+----------------------------------+
-| ``urdf_cfg``        | URDFCfg                | URDF file and components        |
-+---------------------+------------------------+----------------------------------+
-| ``control_parts``   | Dict[str, List[str]]   | Joint groups for control        |
-+---------------------+------------------------+----------------------------------+
-| ``solver_cfg``      | Dict[str, SolverCfg]   | IK solver configurations        |
-+---------------------+------------------------+----------------------------------+
-| ``drive_pros``      | JointDrivePropertiesCfg | Joint stiffness, damping, force |
-+---------------------+------------------------+----------------------------------+
-| ``attrs``           | RigidBodyAttributesCfg | Mass, friction, damping         |
-+---------------------+------------------------+----------------------------------+
-
-URDF Configuration
------------------
-
-The ``URDFCfg`` allows composing robots from multiple URDF files:
-
-.. code-block:: python
-
-   urdf_cfg = URDFCfg(
-       components=[
-           {
-               "component_type": "arm",
-               "urdf_path": arm_urdf,
-               "transform": np.eye(4),
-           },
-           {
-               "component_type": "gripper",
-               "urdf_path": gripper_urdf,
-               "transform": gripper_transform,
-           },
-       ]
-   )
-
-Control Parts
--------------
-
-Group joints logically for different control modes:
-
-.. code-block:: python
-
-   control_parts = {
-       "arm": ["JOINT1", "JOINT2", "JOINT3", "JOINT4", "JOINT5", "JOINT6"],
-       "gripper": ["JOINT7", "JOINT8"],
-   }
-
-Use regex patterns for flexible matching:
-- ``"JOINT[1-6]"`` matches JOINT1 through JOINT6
-- ``"(LEFT|RIGHT)_ARM.*"`` matches all arm joints
-
-Drive Properties
-----------------
-
-Configure joint physics behavior:
-
-.. code-block:: python
-
-   drive_pros = JointDrivePropertiesCfg(
-       stiffness={
-           "ARM_JOINTS": 1e4,    # High stiffness for arm joints
-           "GRIPPER_JOINTS": 3e2,  # Lower stiffness for gripper
-       },
-       damping={
-           "ARM_JOINTS": 1e3,
-           "GRIPPER_JOINTS": 3e1,
-       },
-       max_effort={
-           "ARM_JOINTS": 1e5,
-           "GRIPPER_JOINTS": 1e3,
-       },
-   )
-
-IK Solver Configuration
------------------------
-
-Choose the appropriate solver for your robot:
-
-- **OPWSolverCfg**: For 6-axis industrial arms (like CobotMagic)
-- **SRSSolverCfg**: For robots with specific kinematics (like DexforceW1)
-- **SolverCfg**: Generic solver configuration
-
-.. code-block:: python
-
-   solver_cfg = {
-       "arm": OPWSolverCfg(
-           end_link_name="link6",
-           root_link_name="base_link",
-           tcp=np.array([...]),  # Tool center point
-       ),
-   }
-
----
-
-Using Your Robot
-~~~~~~~~~~~~~~~~
-
-After adding the robot, use it in your code:
-
-.. code-block:: python
-
-   from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-   from embodichain.lab.sim.robots import MyRobotCfg
-
-   # Create simulation
-   sim_cfg = SimulationManagerCfg(headless=False, num_envs=2)
-   sim = SimulationManager(sim_cfg)
-
-   # Create robot config
-   robot_cfg = MyRobotCfg.from_dict({
-       "uid": "my_robot",
-   })
-
-   # Add robot to simulation
-   robot = sim.add_robot(cfg=robot_cfg)
-
----
-
-Testing Your Robot
-~~~~~~~~~~~~~~~~~~
-
-Add a test block at the bottom of your robot config file:
-
-.. code-block:: python
-
-   if __name__ == "__main__":
-       from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-
-       sim_cfg = SimulationManagerCfg(headless=True, num_envs=2)
-       sim = SimulationManager(sim_cfg)
-
-       robot_cfg = MyRobotCfg.from_dict({"uid": "my_robot"})
-       robot = sim.add_robot(cfg=robot_cfg)
-
-       print("Robot added successfully!")
-
----
-
-Best Practices
-~~~~~~~~~~~~~~
-
-1. **Use the** ``@configclass`` **decorator** for all config classes
-2. **Provide** ``from_dict`` **method** for flexible initialization
-3. **Use regex patterns** for joint names in drive properties
-4. **Keep kinematics parameters** separate in ``params.py`` for complex robots
-5. **Include** ``build_pk_serial_chain`` **method** for IK support
-6. **Add** ``to_dict`` **and** ``save_to_file`` **methods** for serialization
-7. **Test with** ``__main__`` **block** before integrating
-8. **Add robot documentation** in ``docs/source/resources/robot/`` for user reference
-
----
-
-Adding Robot Documentation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When adding a new robot, create documentation in ``docs/source/resources/robot/`` to help users understand and use your robot.
-
-File Location
--------------
-
-Create a markdown file: ``docs/source/resources/robot/my_robot.md``
-
-Recommended Structure
----------------------
-
-.. code-block:: markdown
-
-   # MyRobot
-
-   Brief description of the robot and its manufacturer.
-
-   <div style="text-align: center;">
-     <img src="../../_static/robots/my_robot.jpg" alt="MyRobot" style="height: 400px; width: auto;"/>
-     <p><b>MyRobot</b></p>
-   </div>
-
-   ## Key Features
-
-   - Feature 1
-   - Feature 2
-   - Feature 3
-
-   ---
-
-   ## Robot Parameters
-
-   | Parameter | Description |
-   |-----------|-------------|
-   | Joints    | Number of joints |
-   | DOF       | Degrees of freedom |
-   | ...       | ... |
-
-   ---
-
-   ## Quick Initialization Example
-
-   ```python
-   from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-   from embodichain.lab.sim.robots import MyRobotCfg
-
-   config = SimulationManagerCfg(headless=False, sim_device="cpu", num_envs=2)
-   sim = SimulationManager(config)
-
-   robot = sim.add_robot(cfg=MyRobotCfg.from_dict({}))
-   ```
-
-   ---
-
-   ## Configuration Parameters
-
-   ### Main Configuration Items
-
-   - **uid**: Unique identifier
-   - **urdf_cfg**: URDF configuration
-   - **control_parts**: Control groups
-   - **solver_cfg**: IK solver configuration
-   - **drive_pros**: Joint drive properties
-   - **attrs**: Physical attributes
-
-   ### Custom Usage Example
-
-   ```python
-   custom_cfg = {
-       "uid": "my_robot",
-       # Add parameters
-   }
-   cfg = MyRobotCfg.from_dict(custom_cfg)
-   robot = sim.add_robot(cfg=cfg)
-   ```
-
-   ---
-
-   ## References
-
-   - Manufacturer product page
-   - URDF file paths
-   - Related documentation
-
-Register the Robot in Index
----------------------------
-
-After creating the robot documentation, add it to the index file at ``docs/source/resources/robot/index.rst``:
-
-.. code-block:: rst
-
-   .. toctree::
-      :maxdepth: 1
-
-      Dexforce W1 <dexforce_w1.md>
-      CobotMagic <cobotmagic.md>
-      MyRobot <my_robot.md>  # Add your robot here
-
----
-
-Next Steps
-~~~~~~~~~~
-
-After adding your robot:
-
-- Add robot documentation in ``docs/source/resources/robot/``
-- Update ``docs/source/resources/robot/index.rst`` to include the new robot
-- Add task environments that use your robot
-- Configure sensors (cameras, force sensors)
-- Implement custom IK solvers if needed
-- Add motion planning support
++---------------------+----------------------------+----------------------------------+
+| Parameter           | Type                       | Description                      |
++=====================+============================+==================================+
+| ``uid``             | str                        | Unique robot identifier          |
++---------------------+----------------------------+----------------------------------+
+| ``urdf_cfg``        | URDFCfg                    | URDF file and components         |
++---------------------+----------------------------+----------------------------------+
+| ``control_parts``   | Dict[str, List[str]]       | Joint groups for control         |
++---------------------+----------------------------+----------------------------------+
+| ``solver_cfg``      | Dict[str, SolverCfg]       | IK solver configurations         |
++---------------------+----------------------------+----------------------------------+
+| ``drive_pros``      | JointDrivePropertiesCfg    | Joint stiffness, damping, force  |
++---------------------+----------------------------+----------------------------------+
+
+.. tip::
+
+   See the :doc:`full tutorial </tutorial/add_robot>` for complete code examples of both approaches.
+
+See Also
+--------
+
+- :doc:`/tutorial/add_robot` — Full step-by-step tutorial
+- :doc:`/tutorial/robot` — Using robots in simulation
+- :doc:`/overview/sim/solvers/index` — IK solver reference
+- :doc:`/resources/robot/index` — Existing robot documentation
diff --git a/docs/source/guides/cli.md b/docs/source/guides/cli.md
index debb0078..623704d6 100644
--- a/docs/source/guides/cli.md
+++ b/docs/source/guides/cli.md
@@ -27,6 +27,38 @@ python -m embodichain.data download --all
 
 ---
 
+## SimReady Asset Pipeline
+
+Convert a raw mesh asset directory into sim_ready assets for simulation.
+
+```bash
+# Run the full SimReady pipeline on a single asset directory
+python -m embodichain.gen_sim.simready_pipeline.cli.start \
+    --input_dir /path/to/raw_mesh_folder \
+    --output_root /path/to/output_folder \
+    --category YourCategory
+
+# Use trimesh-only ingest for source normalization
+python -m embodichain.gen_sim.simready_pipeline.cli.start \
+    --input_dir /path/to/raw_mesh_folder \
+    --output_root /path/to/output_folder \
+    --category YourCategory \
+    --simple
+```
+
+### Arguments
+
+| Argument | Default | Description |
+|---|---|---|
+| ``--input_dir`` | *(required)* | Directory containing the raw asset files |
+| ``--output_root`` | *(required)* | Directory where processed assets are written |
+| ``--category`` | *(required)* | Category hint passed into the pipeline |
+| ``--simple`` | ``False`` | Use trimesh-only ingest and skip Blender remesh/bake during ingest |
+
+The generated output contains the canonical source mesh under ``asset_source/``, the final SimReady mesh under ``asset_simready/``, and USD export files under ``asset_usd/`` when export succeeds.
+
+---
+
 ## Preview Asset
 
 Preview a USD or mesh asset in the simulation without writing code.
@@ -64,7 +96,7 @@ python -m embodichain preview-asset \
 | ``--fix_base`` | ``True`` | Fix the base of articulations |
 | ``--sim_device`` | ``cpu`` | Simulation device |
 | ``--headless`` | ``False`` | Run without rendering window |
-| ``--enable_rt`` | ``False`` | Enable ray tracing |
+| ``--renderer`` | ``hybrid`` | Renderer backend: ``legacy``, ``hybrid``, ``fast-rt``, or ``rt`` |
 | ``--preview`` | ``False`` | Enter interactive embed mode after loading |
 
 ### Preview Mode
@@ -108,7 +140,7 @@ python -m embodichain run-env --gym_config config.json --headless
 | ``--num_envs`` | ``1`` | Number of parallel environments |
 | ``--device`` | ``cpu`` | Device (``cpu`` or ``cuda``) |
 | ``--headless`` | ``False`` | Run in headless mode |
-| ``--enable_rt`` | ``False`` | Use RTX rendering backend |
+| ``--renderer`` | ``hybrid`` | Renderer backend: ``legacy``, ``hybrid``, ``fast-rt`` or ``rt`` |
 | ``--arena_space`` | ``5.0`` | Arena space size |
 | ``--gpu_id`` | ``0`` | GPU ID to use |
 | ``--preview`` | ``False`` | Enter interactive preview mode |
diff --git a/docs/source/guides/configuration.md b/docs/source/guides/configuration.md
new file mode 100644
index 00000000..c031b891
--- /dev/null
+++ b/docs/source/guides/configuration.md
@@ -0,0 +1,293 @@
+# Configuration Guide
+
+EmbodiChain uses a declarative configuration system built on Python dataclasses. This guide explains the key patterns: `@configclass`, `FunctorCfg`, and JSON configuration files.
+
+---
+
+## The `@configclass` Decorator
+
+All configuration objects use the `@configclass` decorator, which is similar to Python's `@dataclass` with additional validation and serialization support.
+
+```python
+from embodichain.utils import configclass
+from dataclasses import MISSING
+
+
+@configclass
+class MyManagerCfg:
+    param_a: float = 1.0
+    param_b: str = MISSING  # Required — must be set by caller
+    param_c: int = 10
+```
+
+- **Optional parameters** have default values.
+- **Required parameters** use `MISSING` as the default — callers must provide them.
+- All parameters are typed for IDE auto-completion and static analysis.
+
+---
+
+## Configuration Hierarchy
+
+EmbodiChain configs form a nested hierarchy:
+
+```
+EmbodiedEnvCfg
+├── sim_cfg: SimulationManagerCfg
+│   ├── render_cfg: RenderCfg
+│   ├── physics_config: PhysicsCfg
+│   └── gpu_memory_config: GPUMemoryCfg
+├── robot: RobotCfg
+│   ├── urdf_cfg: URDFCfg
+│   ├── drive_pros: JointDrivePropertiesCfg
+│   └── solver_cfg: Dict[str, SolverCfg]
+├── sensor: List[SensorCfg]
+├── events: EventCfg
+├── observations: ObservationCfg
+├── rewards: RewardCfg
+├── actions: ActionTermCfg
+├── dataset: DatasetFunctorCfg
+└── extensions: Dict[str, Any]
+```
+
+Each sub-config can be set independently, allowing fine-grained control over the environment.
+
+---
+
+## Functor Configuration
+
+Functors are configured through specialized config classes that inherit from `FunctorCfg`. The base class has three fields:
+
+```python
+@configclass
+class FunctorCfg:
+    func: Callable | Functor = MISSING   # The function or class to call
+    params: dict[str, Any] = dict()      # Keyword arguments
+    extra: dict[str, Any] = dict()       # Optional metadata
+```
+
+### Specialized Config Classes
+
+| Config Class | Extra Fields | Used By |
+|---|---|---|
+| `ObservationCfg` | `mode`, `name` | ObservationManager |
+| `EventCfg` | `mode`, `interval_step`, `is_global` | EventManager |
+| `RewardCfg` | `weight`, `mode` | RewardManager |
+| `ActionTermCfg` | `mode` | ActionManager |
+| `DatasetFunctorCfg` | `mode` | DatasetManager |
+
+### Python Config Example
+
+```python
+from embodichain.utils import configclass
+from embodichain.lab.gym.envs.managers.cfg import (
+    ObservationCfg,
+    RewardCfg,
+    EventCfg,
+    SceneEntityCfg,
+)
+from embodichain.lab.gym.envs.managers.observations import get_object_pose
+
+
+@configclass
+class MyObsCfg:
+    object_pose: ObservationCfg = ObservationCfg(
+        func=get_object_pose,
+        mode="add",
+        name="object/pose",
+        params={"entity_cfg": SceneEntityCfg(uid="my_cube")},
+    )
+
+
+@configclass
+class MyRewardCfg:
+    distance: RewardCfg = RewardCfg(
+        func="distance_between_objects",
+        weight=0.5,
+        params={
+            "source_entity_cfg": SceneEntityCfg(uid="cube"),
+            "target_entity_cfg": SceneEntityCfg(uid="target"),
+        },
+    )
+
+
+@configclass
+class MyEventCfg:
+    randomize_light: EventCfg = EventCfg(
+        func="randomize_light",
+        mode="interval",
+        interval_step=5,
+        params={"light_uid": "main_light"},
+    )
+```
+
+---
+
+## JSON Configuration
+
+For RL training and data generation, EmbodiChain uses JSON config files. The JSON config mirrors the Python config structure but uses string names instead of direct function references.
+
+### Environment Config (`gym_config.json`)
+
+```json
+{
+    "max_episodes": 100,
+    "max_episode_steps": 600,
+    "env": {
+        "num_envs": 4,
+        "sim_cfg": {
+            "sim_device": "cuda:0",
+            "headless": true
+        },
+        "robot": {
+            "uid": "robot",
+            "urdf_cfg": {"fpath": "robots/my_robot/my_robot.urdf"}
+        },
+        "control_parts": ["arm"],
+        "sensor": [
+            {
+                "uid": "cam_high",
+                "type": "StereoCamera",
+                "height": 540,
+                "width": 960
+            }
+        ],
+        "actions": {
+            "delta_qpos": {
+                "func": "DeltaQposTerm",
+                "params": {"scale": 0.1}
+            }
+        },
+        "events": {
+            "randomize_table": {
+                "func": "randomize_visual_material",
+                "mode": "interval",
+                "interval_step": 10,
+                "params": {"uid": "table"}
+            }
+        },
+        "observations": {
+            "obj_pose": {
+                "func": "get_object_pose",
+                "mode": "add",
+                "name": "object/pose",
+                "params": {"entity_cfg": {"uid": "cube"}}
+            }
+        },
+        "rewards": {
+            "distance": {
+                "func": "distance_between_objects",
+                "weight": 0.5,
+                "params": {
+                    "source_entity_cfg": {"uid": "cube"},
+                    "target_entity_cfg": {"uid": "target"}
+                }
+            }
+        },
+        "dataset": {
+            "lerobot": {
+                "func": "LeRobotRecorder",
+                "mode": "save",
+                "params": {
+                    "save_path": "/path/to/output",
+                    "robot_meta": {"robot_type": "DexforceW1"},
+                    "use_videos": true
+                }
+            }
+        },
+        "extensions": {
+            "success_threshold": 0.1
+        }
+    }
+}
+```
+
+### RL Training Config (`train_config.json`)
+
+```json
+{
+    "trainer": {
+        "exp_name": "push_cube",
+        "seed": 42,
+        "device": "cuda:0",
+        "iterations": 500,
+        "buffer_size": 1024
+    },
+    "env": {
+        "id": "PushCubeRL",
+        "cfg": {
+            "num_envs": 4,
+            "actions": {
+                "delta_qpos": {
+                    "func": "DeltaQposTerm",
+                    "params": {"scale": 0.1}
+                }
+            }
+        }
+    },
+    "policy": {
+        "name": "actor_critic",
+        "actor": {
+            "type": "mlp",
+            "network_cfg": {"hidden_sizes": [256, 256], "activation": "relu"}
+        },
+        "critic": {
+            "type": "mlp",
+            "network_cfg": {"hidden_sizes": [256, 256], "activation": "relu"}
+        }
+    },
+    "algorithm": {
+        "name": "ppo",
+        "cfg": {
+            "learning_rate": 0.0001,
+            "n_epochs": 10,
+            "batch_size": 64,
+            "gamma": 0.99,
+            "gae_lambda": 0.95,
+            "clip_coef": 0.2
+        }
+    }
+}
+```
+
+---
+
+## String-Based Function Resolution
+
+In JSON configs, functor functions are specified by name (string). EmbodiChain resolves these strings at runtime by searching registered modules. For example:
+
+- `"distance_between_objects"` resolves to `embodichain.lab.gym.envs.managers.rewards.distance_between_objects`
+- `"DeltaQposTerm"` resolves to `embodichain.lab.gym.envs.managers.actions.DeltaQposTerm`
+- `"get_object_pose"` resolves to `embodichain.lab.gym.envs.managers.observations.get_object_pose`
+
+When writing custom functors, make sure they are imported in the module's `__init__.py` so the resolver can find them.
+
+---
+
+## `SceneEntityCfg` in JSON
+
+When referencing scene entities in JSON, use a dictionary with a `uid` key:
+
+```json
+{"uid": "my_cube"}
+```
+
+This is automatically converted to a `SceneEntityCfg` object at runtime.
+
+---
+
+## Tips
+
+1. **Start from an existing config.** Copy a config file from `configs/gym/` and modify it for your task.
+2. **Use Python configs for development.** They provide IDE auto-completion and type checking.
+3. **Use JSON configs for experiments.** They are easier to version, diff, and share.
+4. **Validate configs early.** Run your environment with a short episode count to catch config errors before long training runs.
+5. **Keep config pairs together.** For action-bank tasks, version `gym_config.json` and `action_config.json` together.
+
+---
+
+## See Also
+
+- [Custom Functors Guide](custom_functors.md) — How to write observation, reward, event, and action functors
+- [Embodied Environments](../overview/gym/env.md) — Full environment configuration reference
+- [Tutorial: Modular Environment](../tutorial/modular_env.rst) — Complete example using config-driven setup
+- [Tutorial: RL Training](../tutorial/rl.rst) — RL training configuration walkthrough
diff --git a/docs/source/guides/custom_functors.md b/docs/source/guides/custom_functors.md
new file mode 100644
index 00000000..383754f1
--- /dev/null
+++ b/docs/source/guides/custom_functors.md
@@ -0,0 +1,390 @@
+# Writing Custom Functors
+
+Functors are the building blocks of EmbodiChain's manager system. They define how observations are computed, rewards are calculated, events are triggered, actions are preprocessed, and datasets are recorded.
+
+This guide explains the two functor styles (function and class), how to register them in manager configs, and provides examples for each functor type.
+
+---
+
+## Functor Basics
+
+Every functor is configured through a `FunctorCfg` object with three fields:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `func` | `Callable \| Functor` | The function or class to call. **Required.** |
+| `params` | `dict` | Keyword arguments passed to the function. |
+| `extra` | `dict` | Optional metadata (e.g., observation shapes). |
+
+The `func` field can be:
+- A **function** (callable) — receives the environment as the first argument, plus any `params` as keyword arguments.
+- A **class** inheriting from `Functor` — instantiated with `(cfg, env)`, then called via `__call__`.
+
+---
+
+## Function-Style Functors
+
+Function-style functors are plain Python functions. They are stateless and easy to write. Use them when your functor is a simple computation that doesn't need to maintain state between calls.
+
+### General Pattern
+
+```python
+def my_functor(env, obs, **kwargs) -> torch.Tensor:
+    """Compute something from the environment state.
+
+    Args:
+        env: The environment instance.
+        obs: The current observation dictionary.
+        **kwargs: Additional parameters from FunctorCfg.params.
+
+    Returns:
+        A tensor of shape (num_envs, ...).
+    """
+    # Access environment state
+    value = compute_value(env)
+
+    return value
+```
+
+The exact signature depends on the functor type (see below).
+
+### Example: Observation Functor
+
+Observation functors receive `(env, obs)` plus any params. They must return a tensor.
+
+```python
+from __future__ import annotations
+import torch
+from embodichain.lab.gym.envs import EmbodiedEnv
+from embodichain.lab.gym.envs.managers.observations import EnvObs
+from embodichain.lab.sim.cfg import SceneEntityCfg
+
+
+def get_object_height(
+    env: EmbodiedEnv,
+    obs: EnvObs,
+    entity_cfg: SceneEntityCfg,
+) -> torch.Tensor:
+    """Get the Z-coordinate (height) of an object.
+
+    Args:
+        env: The environment instance.
+        obs: The current observation dictionary.
+        entity_cfg: Scene entity configuration with the object UID.
+
+    Returns:
+        Tensor of shape (num_envs, 1) with the object height.
+    """
+    obj = env.sim.get_rigid_object(entity_cfg.uid)
+    pose = obj.get_local_pose(to_matrix=True)  # (num_envs, 4, 4)
+    height = pose[:, 2, 3:4]  # Extract Z from translation
+    return height
+```
+
+Register it in your environment config:
+
+```python
+from embodichain.lab.gym.envs.managers.cfg import ObservationCfg, SceneEntityCfg
+from embodichain.utils import configclass
+
+
+@configclass
+class MyObsCfg:
+    obj_height: ObservationCfg = ObservationCfg(
+        func=get_object_height,
+        mode="add",
+        name="object/height",
+        params={"entity_cfg": SceneEntityCfg(uid="my_cube")},
+    )
+```
+
+Or in JSON:
+
+```json
+"observations": {
+    "obj_height": {
+        "func": "get_object_height",
+        "mode": "add",
+        "name": "object/height",
+        "params": {"entity_cfg": {"uid": "my_cube"}}
+    }
+}
+```
+
+### Example: Reward Functor
+
+Reward functors receive `(env, obs, action, info)` plus any params. They return a tensor of shape `(num_envs,)`.
+
+```python
+import torch
+from embodichain.lab.gym.envs import EmbodiedEnv
+from embodichain.lab.sim.cfg import SceneEntityCfg
+
+
+def target_height_reward(
+    env: EmbodiedEnv,
+    obs: dict,
+    action,
+    info: dict,
+    entity_cfg: SceneEntityCfg = None,
+    target_height: float = 0.5,
+) -> torch.Tensor:
+    """Reward for lifting an object to a target height.
+
+    Returns:
+        Negative distance to the target height. Shape (num_envs,).
+    """
+    obj = env.sim.get_rigid_object(entity_cfg.uid)
+    pose = obj.get_local_pose(to_matrix=True)
+    current_height = pose[:, 2, 3]
+    return -torch.abs(current_height - target_height)
+```
+
+Register it:
+
+```python
+from embodichain.lab.gym.envs.managers.cfg import RewardCfg
+from embodichain.utils import configclass
+
+
+@configclass
+class MyRewardCfg:
+    lift_reward: RewardCfg = RewardCfg(
+        func=target_height_reward,
+        weight=1.0,
+        params={
+            "entity_cfg": SceneEntityCfg(uid="my_cube"),
+            "target_height": 0.5,
+        },
+    )
+```
+
+---
+
+## Class-Style Functors
+
+Class-style functors inherit from `Functor` and implement `__init__(cfg, env)` and `__call__(...)`. Use them when you need to:
+
+- Maintain state across calls (e.g., caching, counters)
+- Perform expensive initialization once
+- Implement a `reset()` method for per-episode cleanup
+
+### General Pattern
+
+```python
+from embodichain.lab.gym.envs.managers import Functor
+from embodichain.lab.gym.envs.managers.cfg import FunctorCfg
+
+
+class MyFunctor(Functor):
+    """A stateful functor."""
+
+    def __init__(self, cfg: FunctorCfg, env):
+        super().__init__(cfg, env)
+        # Initialize state, buffers, etc.
+        self._counter = 0
+
+    def reset(self, env_ids=None):
+        """Called on environment reset."""
+        self._counter = 0
+
+    def __call__(self, env, obs, **kwargs):
+        """Called every step."""
+        self._counter += 1
+        # Compute and return result
+```
+
+### Example: Observation Functor with Caching
+
+```python
+from __future__ import annotations
+import torch
+from embodichain.lab.gym.envs import EmbodiedEnv
+from embodichain.lab.gym.envs.managers import Functor
+from embodichain.lab.gym.envs.managers.cfg import FunctorCfg, ObservationCfg
+from embodichain.lab.sim.cfg import SceneEntityCfg
+
+
+class get_object_mass(Functor):
+    """Get the mass of a rigid object, with caching.
+
+    Caches the result to avoid repeated queries to the physics engine.
+    Cache is cleared on environment reset.
+    """
+
+    def __init__(self, cfg: FunctorCfg, env: EmbodiedEnv):
+        super().__init__(cfg, env)
+        self._cache = {}
+
+    def reset(self, env_ids=None):
+        self._cache.clear()
+
+    def __call__(
+        self,
+        env: EmbodiedEnv,
+        obs,
+        entity_cfg: SceneEntityCfg,
+    ) -> torch.Tensor:
+        uid = entity_cfg.uid
+        if uid in self._cache:
+            return self._cache[uid].clone()
+
+        obj = env.sim.get_rigid_object(uid)
+        mass = obj.get_mass()  # (num_envs, 1)
+
+        self._cache[uid] = mass.clone()
+        return mass
+```
+
+### Example: Action Functor
+
+Action functors inherit from `ActionTerm` and implement `process_action`. They transform raw policy actions into robot control commands.
+
+```python
+from __future__ import annotations
+import torch
+from embodichain.lab.gym.envs.managers.actions import ActionTerm
+from embodichain.lab.gym.envs.managers.cfg import ActionTermCfg
+
+
+class DeltaQposTerm(ActionTerm):
+    """Delta joint position: current_qpos + scale * action -> target qpos.
+
+    The policy outputs a position offset, which is added to the current
+    joint positions to get the target.
+    """
+
+    def __init__(self, cfg: ActionTermCfg, env):
+        super().__init__(cfg, env)
+        self._scale = cfg.params.get("scale", 1.0)
+
+    @property
+    def input_key(self) -> str:
+        return "qpos"
+
+    @property
+    def action_dim(self) -> int:
+        return len(self._env.active_joint_ids)
+
+    def process_action(self, action: torch.Tensor) -> torch.Tensor:
+        return action * self._scale + self._env.robot.get_qpos()
+```
+
+Register it in JSON config:
+
+```json
+"actions": {
+    "delta_qpos": {
+        "func": "DeltaQposTerm",
+        "params": {"scale": 0.1}
+    }
+}
+```
+
+---
+
+## Functor Signature Reference
+
+Each functor type has a specific call signature:
+
+### Observation Functors
+
+```python
+def my_obs_functor(env, obs, **params) -> torch.Tensor
+```
+
+- `env`: The environment instance.
+- `obs`: The current observation dictionary.
+- Additional params from `ObservationCfg.params`.
+- Returns: tensor of shape `(num_envs, ...)`.
+
+Config class: `ObservationCfg` with `mode` (`"add"` or `"modify"`) and `name`.
+
+### Reward Functors
+
+```python
+def my_reward_functor(env, obs, action, info, **params) -> torch.Tensor
+```
+
+- `env`: The environment instance.
+- `obs`: The current observation dictionary.
+- `action`: The action taken this step.
+- `info`: The info dictionary.
+- Additional params from `RewardCfg.params`.
+- Returns: tensor of shape `(num_envs,)`.
+
+Config class: `RewardCfg` with `weight` and `mode` (`"add"` or `"replace"`).
+
+### Event Functors
+
+```python
+def my_event_functor(env, env_ids, **params) -> None
+```
+
+- `env`: The environment instance.
+- `env_ids`: The environment IDs affected by this event.
+- Additional params from `EventCfg.params`.
+- Returns: `None` (events modify the environment in-place).
+
+Config class: `EventCfg` with `mode` (`"startup"`, `"reset"`, or `"interval"`) and `interval_step`.
+
+### Action Functors
+
+```python
+class MyActionTerm(ActionTerm):
+    def process_action(self, action: torch.Tensor) -> torch.Tensor
+```
+
+- `action`: Raw action from the policy, shape `(num_envs, action_dim)`.
+- Returns: transformed action tensor.
+
+Config class: `ActionTermCfg` with `mode` (`"pre"` or `"post"`).
+
+### Dataset Functors
+
+Dataset functors handle recording and saving. In most cases you should use the built-in `LeRobotRecorder` rather than writing a custom one.
+
+Config class: `DatasetFunctorCfg` with `mode` (`"save"`).
+
+---
+
+## Using `SceneEntityCfg` in Params
+
+Many functors need to reference scene objects (robots, rigid objects, sensors). Instead of passing string UIDs directly, use `SceneEntityCfg`:
+
+```python
+from embodichain.lab.sim.cfg import SceneEntityCfg
+
+params = {
+    "entity_cfg": SceneEntityCfg(uid="my_cube"),
+}
+```
+
+The manager automatically resolves `SceneEntityCfg` objects to the actual simulation entities at runtime.
+
+---
+
+## File Placement
+
+| Functor Type | Recommended Location |
+|---|---|
+| Observation | `embodichain/lab/gym/envs/managers/observations.py` |
+| Reward | `embodichain/lab/gym/envs/managers/rewards.py` |
+| Event | `embodichain/lab/gym/envs/managers/events.py` or `embodichain/lab/gym/envs/managers/randomization/` |
+| Action | `embodichain/lab/gym/envs/managers/actions.py` |
+| Dataset | `embodichain/lab/gym/envs/managers/datasets.py` |
+
+For task-specific functors, place them in the task module file (e.g., alongside the task environment class).
+
+Remember to:
+- Add the functor to `__all__` in the module.
+- Add the Apache 2.0 license header.
+- Use type annotations with `from __future__ import annotations`.
+
+---
+
+## See Also
+
+- [Configuration Guide](configuration.md) — How to set up `@configclass` configs and JSON files
+- [Embodied Environments](../overview/gym/env.md) — Full environment architecture
+- [Tutorial: Modular Environment](../tutorial/modular_env.rst) — Using functors in a complete environment
diff --git a/docs/source/guides/index.rst b/docs/source/guides/index.rst
index e5c0f2de..f44ad5a0 100644
--- a/docs/source/guides/index.rst
+++ b/docs/source/guides/index.rst
@@ -1,10 +1,14 @@
 How-to Guides
-=========
+=============
+
+Practical guides for common tasks in EmbodiChain.
 
 .. toctree::
    :maxdepth: 1
    :hidden:
 
+   custom_functors
+   configuration
    add_robot
    cli
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c3a47f2f..f2f2a252 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,7 +1,9 @@
 EmbodiChain Documentation
 =========================
 
-Welcome to the EmbodiChain! 
+EmbodiChain is a GPU-accelerated robotics simulation framework for embodied AI research. It provides tools for building generating and processing simulation assets and scenes, creating robot learning environments, generating expert demonstration data, training policies with imitation learning and reinforcement learning, and deploying models into real world.
+
+The framework is built on top of `DexSim <https://github.com/DexForce>`_, a high-performance physics and rendering engine, designed for Embodied AI research and production use.
 
 Table of Contents
 =================
@@ -38,7 +40,7 @@ Table of Contents
    :glob:
 
    features/online_data.md
-   features/agents.md
+   features/generative_sim/index*
    features/workspace_analyzer/index*
    features/interaction/index*
    features/toolkits/index*
@@ -52,6 +54,7 @@ Table of Contents
    resources/robot/index*
    resources/task/index*
    resources/roadmap.md
+   resources/publications/README.md
 
 .. toctree::
    :maxdepth: 2
@@ -59,4 +62,3 @@ Table of Contents
    :titlesonly:
 
    api_reference/index
-
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
index 3f2254d4..d437b4fb 100644
--- a/docs/source/introduction.rst
+++ b/docs/source/introduction.rst
@@ -1,59 +1,73 @@
-.. EmbodiChain documentation master file, created by
-   sphinx-quickstart on Tue Nov 19 11:00:25 2024.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
 EmbodiChain
-======================================
+===========
 
 .. image:: ../../assets/imgs/teaser.jpg
-   :alt: teaser
-
----
 
-EmbodiChain is an end-to-end, GPU-accelerated framework for Embodied AI. It streamlines research and development by unifying high-performance simulation, real-to-sim data pipelines, modular model architectures, and efficient training workflows. This integration enables rapid experimentation, seamless deployment of intelligent agents, and effective Sim2Real transfer for real-world robotic systems.
+EmbodiChain is an end-to-end, GPU-accelerated framework for Embodied AI.
+It streamlines research and development by unifying high-performance
+simulation, automated generative data pipelines, modular model
+architectures, and efficient training workflows. This integration
+enables rapid experimentation, seamless deployment of intelligent
+agents, and effective Sim2Real transfer for real-world robotic systems.
 
 .. NOTE::
-   EmbodiChain is in Alpha and under active development:
-
-   * More features will be continually added in the coming months. You can find more details in the `roadmap <https://dexforce.github.io/EmbodiChain/resources/roadmap.html>`_.
-   * Since this is an early release, we welcome feedback (bug reports, feature requests, etc.) via GitHub Issues.
-
+   EmbodiChain is in Alpha and under active development: * More
+   features will be continually added in the coming months. You can find
+   more details in the
+   `roadmap <https://dexforce.github.io/EmbodiChain/resources/roadmap.html>`__.
+   * Since this is an early release, we welcome feedback (bug reports,
+   feature requests, etc.) via GitHub Issues.
 
 Key Features
 ------------
 
-* 🚀 **High-Fidelity GPU Simulation**: Realistic physics for rigid & deformable objects, advanced ray-traced sensors, all GPU-accelerated for high-throughput batch simulation.
-* 🤖 **Unified Robot Learning Environment**: Standardized interfaces for Imitation Learning, Reinforcement Learning, and more.
-* 📊 **Scalable Data Pipeline**: Automated data collection, efficient processing, and large-scale generation for model training.
-* ⚡ **Efficient Training & Evaluation**: Online data streaming, parallel environment rollouts, and modern training paradigms.
-* 🧩 **Modular & Extensible**: Easily integrate new robots, environments, and learning algorithms.
+- 🚀 **High-Fidelity GPU Simulation**: Realistic physics for rigid &
+  deformable objects, advanced ray-traced sensors, all GPU-accelerated
+  for high-throughput batch simulation.
+- 🤖 **Unified Robot Learning Environment**: Standardized interfaces for
+  Imitation Learning, Reinforcement Learning, and more.
+- 📊 **Scalable Data Pipeline**: Automated data collection, efficient
+  processing, and large-scale generation for model training.
+- ⚡ **Efficient Training & Evaluation**: Online data streaming,
+  parallel environment rollouts, and modern training paradigms.
+- 🧩 **Modular & Extensible**: Easily integrate new robots,
+  environments, and learning algorithms.
 
 The figure below illustrates the overall architecture of EmbodiChain:
 
 .. image:: ../../assets/imgs/frameworks.jpg
-   :alt: frameworks
+   :align: center
 
 Getting Started
 ---------------
 
 To get started with EmbodiChain, follow these steps:
 
-* `Installation Guide <https://dexforce.github.io/EmbodiChain/quick_start/install.html>`_
-* `Quick Start Tutorial <https://dexforce.github.io/EmbodiChain/tutorial/index.html>`_
-* `API Reference <https://dexforce.github.io/EmbodiChain/api_reference/index.html>`_
+- `Installation
+  Guide <https://dexforce.github.io/EmbodiChain/main/quick_start/install.html>`__
+- `Quick Start
+  Tutorial <https://dexforce.github.io/EmbodiChain/main/tutorial/index.html>`__
+- `API
+  Reference <https://dexforce.github.io/EmbodiChain/main/api_reference/index.html>`__
 
+Contribution Guide
+------------------
+
+We welcome contributions! Please see the
+`CONTRIBUTING.md <CONTRIBUTING.md>`__ file in this repository for
+guidelines on how to get started.
 
 Citation
 --------
 
-If you find EmbodiChain helpful for your research, please consider citing our work:
+If you find EmbodiChain helpful for your research, please consider
+citing our work:
 
 .. code-block:: bibtex
 
    @misc{EmbodiChain,
      author = {EmbodiChain Developers},
-     title = {EmbodiChain: An end-to-end, GPU-accelerated, and modular platform for building generalized Embodied Intelligence.},
+     title = {EmbodiChain: An end-to-end, GPU-accelerated, and modular platform for building generalized Embodied Intelligence},
      month = {November},
      year = {2025},
      url = {https://github.com/DexForce/EmbodiChain}
@@ -68,15 +82,14 @@ If you find EmbodiChain helpful for your research, please consider citing our wo
       month = {October},
       year = {2025},
       journal = {TechRxiv}
-   }
+      }
 
 .. code-block:: bibtex
 
    @inproceedings{Sim2RealVLA,
-      title = {Sim2Real {VLA}: Zero-Shot Generalization of Synthesized Skills to Realistic Manipulation},
-      author = {Runyi Zhao, Sheng Xu, Ruixing Jin, Yueci Deng, Yunxin Tai, Kui Jia, Guiliang Liu},
-      booktitle = {The Fourteenth International Conference on Learning Representations, ICLR},
-      year = {2026},
-      url = {https://openreview.net/forum?id=H4SyKHjd4c}
+       title = {Sim2Real {VLA}: Zero-Shot Generalization of Synthesized Skills to Realistic Manipulation},
+       author = {Runyi Zhao, Sheng Xu, Ruixing Jin, Yueci Deng, Yunxin Tai, Kui Jia, Guiliang Liu},
+       booktitle = {The Fourteenth International Conference on Learning Representations, ICLR},
+       year = {2026},
+       url = {https://openreview.net/forum?id=H4SyKHjd4c}
    }
-
diff --git a/docs/source/overview/gym/action_functors.md b/docs/source/overview/gym/action_functors.md
index 670fa078..225424da 100644
--- a/docs/source/overview/gym/action_functors.md
+++ b/docs/source/overview/gym/action_functors.md
@@ -5,6 +5,10 @@
 
 This page lists all available action terms that can be used with the Action Manager. Action terms are configured using {class}`~cfg.ActionTermCfg` and are responsible for processing raw actions from the policy and converting them to the format expected by the robot (e.g., qpos, qvel, qf).
 
+````{tip}
+**Using an AI coding agent?** Use the **`/add-functor`** skill to scaffold a new action term with the correct class structure, `ActionTermCfg` registration, and module placement in `actions.py`.
+````
+
 ## Joint Position Control
 
 ```{list-table} Joint Position Action Terms
diff --git a/docs/source/overview/gym/dataset_functors.md b/docs/source/overview/gym/dataset_functors.md
index a418bc6e..c043ee68 100644
--- a/docs/source/overview/gym/dataset_functors.md
+++ b/docs/source/overview/gym/dataset_functors.md
@@ -5,6 +5,10 @@
 
 This page lists all available dataset functors that can be used with the Dataset Manager. Dataset functors are configured using {class}`~cfg.DatasetFunctorCfg` and are responsible for collecting and saving episode data during environment interaction.
 
+````{tip}
+**Using an AI coding agent?** Use the **`/add-functor`** skill to scaffold a new dataset functor with the correct signature, `DatasetFunctorCfg` registration, and module placement in `datasets.py`.
+````
+
 ## Recording Functors
 
 ```{list-table} Dataset Recording Functors
diff --git a/docs/source/overview/gym/env.md b/docs/source/overview/gym/env.md
index fa7c9bc9..88f44fb9 100644
--- a/docs/source/overview/gym/env.md
+++ b/docs/source/overview/gym/env.md
@@ -229,6 +229,16 @@ In JSON config, use the ``actions`` section:
 
 ## Creating a Custom Task
 
+````{tip}
+**Using an AI coding agent?** The following skills can scaffold boilerplate for you:
+
+- **`/add-task-env`** — Generate a new task environment with the correct file structure, `@register_env` decorator, base class methods, `__init__.py` update, and test stub.
+- **`/add-functor`** — Add observation, reward, event, or randomization functors with the correct signature and module placement.
+- **`/add-test`** — Write tests following project conventions (pytest or class style, mock patterns, correct file placement).
+- **`/pre-commit-check`** — Run all local CI checks (black, headers, `__all__`, type annotations) before committing.
+
+````
+
 ### For Reinforcement Learning Tasks
 
 Inherit from {class}`~envs.EmbodiedEnv` and implement the task-specific logic. Configure the Action Manager via ``actions`` in your config:
@@ -295,6 +305,8 @@ For a complete example of a modular environment setup, please refer to the {ref}
 - {ref}`tutorial_modular_env` - Advanced modular environment setup
 - {ref}`tutorial_rl` - Reinforcement learning training guide
 - {doc}`/api_reference/embodichain/embodichain.lab.gym.envs` - Complete API reference for EmbodiedEnv and configurations
+- {doc}`/guides/custom_functors` - How to write custom functors
+- {doc}`/guides/configuration` - Configuration system guide
 
 ```{toctree}
 :maxdepth: 1
diff --git a/docs/source/overview/gym/event_functors.md b/docs/source/overview/gym/event_functors.md
index 2ddbb19f..fb110ef3 100644
--- a/docs/source/overview/gym/event_functors.md
+++ b/docs/source/overview/gym/event_functors.md
@@ -5,6 +5,10 @@
 
 This page lists all available event functors that can be used with the Event Manager. Event functors are configured using {class}`~cfg.EventCfg` and can be triggered at different stages: ``startup``, ``reset``, or ``interval``.
 
+````{tip}
+**Using an AI coding agent?** Use the **`/add-functor`** skill to scaffold a new event or randomization functor with the correct signature (`env, env_ids, ...`), function or class style, and module placement. Use **`/add-test`** to generate mock-based tests.
+````
+
 ## Physics Randomization
 
 ```{list-table} Physics Randomization Functors
@@ -79,6 +83,27 @@ This page lists all available event functors that can be used with the Event Man
      "params": {"color_range": [[0.6, 0.6, 0.6], [1, 1, 1]],
                 "intensity_range": [0.5, 2.0]}}
     ```
+* - {class}`~randomization.visual.randomize_indirect_lighting`
+  - Randomize indirect (IBL) lighting or emissive light. Implemented as a Functor class. Operates in one of two **mutually exclusive** modes — configuring both raises a ``ValueError``:
+
+    **HDR mode** — provide ``path`` pointing to a folder of ``.hdr`` files. A random file is selected on each call and applied as the environment map. The ``path`` is resolved via ``get_data_path``, supporting absolute paths, data-root-relative paths, and dataset-class paths.
+
+    ```json
+    {"func": "randomize_indirect_lighting",
+     "mode": "interval", "interval_step": 10,
+     "params": {"path": "EnvMapHDR/EnvMapHDR"}}
+    ```
+
+    **Emissive mode** — provide ``emissive_color_range`` (pair of RGB lists) and/or ``emissive_intensity_range`` (pair of floats). Color and intensity are sampled uniformly on each call and applied via ``set_emission_light``.
+
+    ```json
+    {"func": "randomize_indirect_lighting",
+     "mode": "interval", "interval_step": 10,
+     "params": {"emissive_color_range": [[0.8, 0.8, 0.8], [1.0, 1.0, 1.0]],
+                "emissive_intensity_range": [80.0, 150.0]}}
+    ```
+
+    Applies the same lighting to all environments.
 * - {func}`~randomization.visual.randomize_camera_extrinsics`
   - Randomize camera poses for viewpoint diversity. Supports both attach mode (pos/euler perturbation) and look_at mode (eye/target/up perturbation).
 
diff --git a/docs/source/overview/gym/observation_functors.md b/docs/source/overview/gym/observation_functors.md
index bf2b7915..bb67cce6 100644
--- a/docs/source/overview/gym/observation_functors.md
+++ b/docs/source/overview/gym/observation_functors.md
@@ -5,6 +5,10 @@
 
 This page lists all available observation functors that can be used with the Observation Manager. Observation functors are configured using {class}`~cfg.ObservationCfg` and can operate in two modes: ``modify`` (update existing observations) or ``add`` (add new observations).
 
+````{tip}
+**Using an AI coding agent?** Use the **`/add-functor`** skill to scaffold a new observation functor with the correct signature (`env, obs, entity_cfg, ...`), module placement in `observations.py`, and `__all__` export. Use **`/add-test`** to generate mock-based tests.
+````
+
 ## Pose Computations
 
 ```{list-table} Pose Computation Functors
diff --git a/docs/source/overview/gym/reward_functors.md b/docs/source/overview/gym/reward_functors.md
index ad0255fd..fb91cbf0 100644
--- a/docs/source/overview/gym/reward_functors.md
+++ b/docs/source/overview/gym/reward_functors.md
@@ -5,6 +5,10 @@
 
 This page lists all available reward functors that can be used with the Reward Manager. Reward functors are configured using {class}`~cfg.RewardCfg` and return scalar reward tensors that are weighted and summed to form the total environment reward.
 
+````{tip}
+**Using an AI coding agent?** Use the **`/add-functor`** skill to scaffold a new reward functor with the correct signature (`env, obs, action, info, ...`), module placement in `rewards.py`, and `__all__` export. Use **`/add-test`** to generate mock-based tests.
+````
+
 ## Distance-Based Rewards
 
 ```{list-table} Distance-Based Reward Functors
diff --git a/docs/source/overview/rl/index.rst b/docs/source/overview/rl/index.rst
index cac282f4..df2fd29e 100644
--- a/docs/source/overview/rl/index.rst
+++ b/docs/source/overview/rl/index.rst
@@ -79,3 +79,11 @@ See also
     config.md
     train_script.md
     multi_gpu.md
+
+See Also
+--------
+
+- :doc:`/tutorial/rl` — Step-by-step RL training tutorial
+- :doc:`/overview/gym/env` — EmbodiedEnv configuration and Action Manager
+- :doc:`/features/online_data` — Online data streaming pipeline
+- :doc:`/resources/task/index` — Available RL task environments
diff --git a/docs/source/overview/sim/atomic_actions.md b/docs/source/overview/sim/atomic_actions.md
new file mode 100644
index 00000000..979df571
--- /dev/null
+++ b/docs/source/overview/sim/atomic_actions.md
@@ -0,0 +1,241 @@
+# Atomic Actions
+
+```{currentmodule} embodichain.lab.sim.atomic_actions
+```
+
+Atomic actions are the building blocks for automated robot motion generation. Each action encapsulates a complete, self-contained motion primitive — such as picking up an object or moving to a pose — that can be chained together to form complex manipulation workflows.
+
+## Design Overview
+
+The module is organized into three layers:
+
+```
+AtomicActionEngine          ← orchestrates a sequence of actions
+    │
+    ├── AtomicAction(s)     ← each action plans one motion primitive
+    │       │
+    │       └── MotionGenerator   ← low-level trajectory planner (IK + trajectory optimization)
+    │
+    └── SemanticAnalyzer    ← resolves object labels → ObjectSemantics
+```
+
+Each action receives a target (object semantics or a pose tensor), runs its planning pipeline,
+and returns a joint trajectory.  The engine threads the end state of each action as the start
+state of the next, then concatenates all trajectories into one contiguous sequence:
+
+```
+ObjectSemantics ──► AffordanceEstimation ──► AtomicAction.execute()
+(label + geometry                              │
+ + affordance                                  ├─ IK solve
+ + entity)                                     ├─ Motion plan
+                                               └─ Gripper interpolation
+                                                      │
+AtomicActionEngine ◄─────────────── PlanResult ───────┘
+(sequences actions, accumulates
+ full-robot trajectory)
+```
+
+### Core Concepts
+
+**`ObjectSemantics`** describes an interaction target. It bundles:
+- `geometry` — mesh data (vertices, triangles) used for grasp annotation
+- `affordance` — *how* to interact with the object (e.g. antipodal grasp poses)
+- `entity` — a live reference to the simulation object, so actions can read its current pose
+
+**`Affordance`** is a data class that encodes a specific interaction capability. The built-in affordance types are:
+
+| Class | Use case |
+|---|---|
+| `AntipodalAffordance` | Parallel-jaw grasping via antipodal point pairs |
+| `InteractionPoints` | Contact-based interactions (push, poke, touch) |
+
+**`AtomicAction`** is the abstract base class for all motion primitives. Every action must implement:
+- `execute(target, start_qpos)` — plan and return a joint trajectory
+- `validate(target, start_qpos)` — fast feasibility check without full planning
+
+**`AtomicActionEngine`** manages a named registry of actions and runs them in sequence via `execute_static()`, threading the end state of each action as the start state of the next.
+
+---
+
+## Built-in Actions
+
+(supported_atomic_actions)=
+
+The following actions are available out of the box:
+
+| Action | Config class | Target type | Motion phases |
+|---|---|---|---|
+| `MoveAction` | `MoveActionCfg` | `Tensor (4,4)` — EEF pose | Move arm to pose |
+| `PickUpAction` | `PickUpActionCfg` | `ObjectSemantics` or `Tensor (4,4)` | Approach → close gripper → lift |
+| `PlaceAction` | `PlaceActionCfg` | `Tensor (4,4)` — EEF release pose | Lower → open gripper → retract |
+
+### `MoveAction`
+
+Moves the end-effector to a target pose in free space.
+
+| Config field | Default | Description |
+|---|---|---|
+| `control_part` | `"arm"` | Robot control part to move |
+| `sample_interval` | `50` | Number of waypoints in the trajectory |
+
+**Target:** `torch.Tensor` of shape `(4, 4)` or `(n_envs, 4, 4)` — a homogeneous EEF pose.
+
+---
+
+### `PickUpAction`
+
+Three-phase grasp motion: *approach → close gripper → lift*.
+
+| Config field | Default | Description |
+|---|---|---|
+| `approach_direction` | `[0, 0, -1]` | Gripper approach direction in object frame |
+| `pre_grasp_distance` | `0.15` | Hover distance before descending (m) |
+| `lift_height` | `0.10` | Lift height after grasping (m) |
+| `hand_open_qpos` | `None` | **Required.** Gripper open joint positions |
+| `hand_close_qpos` | `None` | **Required.** Gripper closed joint positions |
+| `hand_control_part` | `"hand"` | Robot control part for the gripper |
+| `hand_interp_steps` | `5` | Waypoints for the gripper close phase |
+| `sample_interval` | `80` | Total waypoints across all three phases |
+
+**Target:** `ObjectSemantics` (grasp pose computed automatically) **or** a `torch.Tensor` EEF pose.
+
+---
+
+### `PlaceAction`
+
+Three-phase release motion: *lower → open gripper → retract*. Mirrors `PickUpAction`.
+
+Inherits all gripper config fields from `GraspActionCfg`. The `approach_direction` field is not used — the arm moves straight down to the target pose.
+
+**Target:** `torch.Tensor` of shape `(4, 4)` or `(n_envs, 4, 4)` — the EEF pose at release.
+
+---
+
+## Typical Workflow
+
+```python
+from embodichain.lab.sim.atomic_actions import (
+    AtomicActionEngine,
+    ObjectSemantics,
+    AntipodalAffordance,
+    PickUpActionCfg,
+    PlaceActionCfg,
+    MoveActionCfg,
+)
+
+# 1. Configure each action
+pickup_cfg = PickUpActionCfg(
+    control_part="arm",
+    hand_control_part="hand",
+    hand_open_qpos=torch.tensor([0.0, 0.0]),
+    hand_close_qpos=torch.tensor([0.025, 0.025]),
+)
+place_cfg  = PlaceActionCfg(...)
+move_cfg   = MoveActionCfg(control_part="arm")
+
+# 2. Build the engine — action order matches target_list order
+engine = AtomicActionEngine(
+    motion_generator=motion_gen,
+    actions_cfg_list=[pickup_cfg, place_cfg, move_cfg],
+)
+
+# 3. Describe the object to pick
+semantics = ObjectSemantics(
+    label="mug",
+    geometry={"mesh_vertices": ..., "mesh_triangles": ...},
+    affordance=AntipodalAffordance(object_label="mug", ...),
+    entity=mug,
+)
+
+# 4. Plan the full sequence and replay
+is_success, traj = engine.execute_static(
+    target_list=[semantics, place_pose, rest_pose]
+)
+# traj: (n_envs, n_waypoints, dof)
+```
+
+---
+
+## How to Extend: Adding a Custom Action
+
+You can add any motion primitive by subclassing `AtomicAction` and registering it with the engine.
+
+### Step 1 — Define the config
+
+```python
+from embodichain.utils import configclass
+from embodichain.lab.sim.atomic_actions import ActionCfg
+
+@configclass
+class PushActionCfg(ActionCfg):
+    name: str = "push"
+    push_distance: float = 0.05  # metres to push forward
+    push_speed: int = 30          # waypoints for the push phase
+```
+
+### Step 2 — Implement the action
+
+```python
+import torch
+from typing import Optional, Union
+from embodichain.lab.sim.atomic_actions import AtomicAction, ObjectSemantics
+from embodichain.lab.sim.planners import PlanState, MoveType
+
+class PushAction(AtomicAction):
+    def __init__(self, motion_generator, cfg: PushActionCfg | None = None):
+        super().__init__(motion_generator, cfg=cfg or PushActionCfg())
+        self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
+
+    def execute(
+        self,
+        target: Union[torch.Tensor, ObjectSemantics],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list]:
+        # Resolve target to a batched [n_envs, 4, 4] EEF pose
+        # ... your planning logic here ...
+        return is_success, trajectory, self.arm_joint_ids
+
+    def validate(self, target, start_qpos=None, **kwargs) -> bool:
+        return True  # add IK check here if needed
+```
+
+### Step 3 — Register and use
+
+```python
+from embodichain.lab.sim.atomic_actions import register_action
+
+register_action("push", PushAction, PushActionCfg)
+
+engine = AtomicActionEngine(
+    motion_generator=motion_gen,
+    actions_cfg_list=[PushActionCfg(push_distance=0.08)],
+)
+is_success, traj = engine.execute_static(target_list=[target_pose])
+```
+
+> **Tip:** The `execute()` return signature is always `(is_success, trajectory, joint_ids)`.  
+> `trajectory` has shape `(n_envs, n_waypoints, len(joint_ids))`.  
+> `joint_ids` tells the engine which columns of the full robot DOF vector the trajectory covers.
+
+---
+
+## Target Resolution
+
+`AtomicActionEngine` accepts several target formats in `target_list`, giving you flexibility without boilerplate:
+
+| Input type | Resolved to |
+|---|---|
+| `torch.Tensor (4,4)` or `(n_envs,4,4)` | EEF pose, broadcast across envs |
+| `ObjectSemantics` | Passed directly to the action |
+| `str` (object label) | Looked up in `SemanticAnalyzer` cache |
+| `dict` with `"pose"` key | Unwrapped to tensor |
+| `dict` with `"label"` key | Analyzed via `SemanticAnalyzer` |
+
+---
+
+## Further Reading
+
+- {doc}`planners/motion_generator` — the trajectory planner used by every action
+- {doc}`sim_robot` — how control parts and IK solvers are configured
+- Tutorial: `scripts/tutorials/sim/atomic_actions.py`
diff --git a/docs/source/overview/sim/index.rst b/docs/source/overview/sim/index.rst
index 56f98ef2..60cdfd56 100644
--- a/docs/source/overview/sim/index.rst
+++ b/docs/source/overview/sim/index.rst
@@ -22,3 +22,4 @@ Overview of the Simulation Framework:
    sim_sensor.md
    solvers/index
    planners/index
+   atomic_actions.md
diff --git a/docs/source/overview/sim/sim_manager.md b/docs/source/overview/sim/sim_manager.md
index b7d86691..5897dfd0 100644
--- a/docs/source/overview/sim/sim_manager.md
+++ b/docs/source/overview/sim/sim_manager.md
@@ -33,9 +33,7 @@ sim_config = SimulationManagerCfg(
 | `width` | `int` | `1920` | The width of the simulation window. |
 | `height` | `int` | `1080` | The height of the simulation window. |
 | `headless` | `bool` | `False` | Whether to run the simulation in headless mode (no Window). |
-| `enable_rt` | `bool` | `False` | Whether to enable ray tracing rendering. |
-| `enable_denoiser` | `bool` | `True` | Whether to enable denoising for ray tracing rendering. |
-| `spp` | `int` | `64` | Samples per pixel for ray tracing rendering. Only valid when ray tracing is enabled and denoiser is False. |
+| `render_cfg` | `RenderCfg` | `RenderCfg()` | The rendering configuration parameters. |
 | `gpu_id` | `int` | `0` | The gpu index that the simulation engine will be used. Affects gpu physics device. |
 | `thread_mode` | `ThreadMode` | `RENDER_SHARE_ENGINE` | The threading mode for the simulation engine. |
 | `cpu_num` | `int` | `1` | The number of CPU threads to use for the simulation engine. |
@@ -60,6 +58,29 @@ The {class}`~cfg.PhysicsCfg` class controls the global physics simulation parame
 
 For more parameters and details, refer to the [PhysicsCfg](https://dexforce.github.io/EmbodiChain/api_reference/embodichain/embodichain.lab.sim.html#embodichain.lab.sim.cfg.PhysicsCfg) documentation.
 
+### Render Configuration
+
+The {class}`~cfg.RenderCfg` class controls the rendering backend and quality settings.
+
+| Parameter | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `renderer` | `str` | `"hybrid"` | Renderer backend to use. Options are `'hybrid'` (ray tracing for shadows/reflections + rasterization), `'fast-rt'` (full ray tracing), and `'rt'` (offline ray-traced renderer for maximum visual fidelity). |
+| `enable_denoiser` | `bool` | `True` | Whether to enable denoising. Only valid when `renderer` is `'hybrid'`, `'fast-rt'` or `'rt'`. |
+| `spp` | `int` | `64` | Samples per pixel for ray tracing rendering. Only valid when `renderer` is `'hybrid'`, `'fast-rt'` or `'rt'` and `enable_denoiser` is `False`. |
+
+```python
+from embodichain.lab.sim import SimulationManagerCfg
+from embodichain.lab.sim.cfg import RenderCfg
+
+sim_config = SimulationManagerCfg(
+    render_cfg=RenderCfg(
+        renderer="fast-rt",    # Use full ray tracing
+        enable_denoiser=True,  # Enable denoising
+        spp=64,                # Samples per pixel (used when denoiser is off)
+    )
+)
+```
+
 
 ## Initialization
 
diff --git a/docs/source/overview/sim/sim_rigid_object.md b/docs/source/overview/sim/sim_rigid_object.md
index af636ab2..185a533d 100644
--- a/docs/source/overview/sim/sim_rigid_object.md
+++ b/docs/source/overview/sim/sim_rigid_object.md
@@ -110,9 +110,12 @@ Rigid objects are observed and controlled via single poses and linear/angular ve
 | `get_local_pose(to_matrix=False)` | `(N, 7)` or `(N, 4, 4)` | Get object local pose as (x, y, z, qw, qx, qy, qz) or 4x4 matrix per environment. |
 | `set_local_pose(pose, env_ids=None)` | `pose: (N, 7)` or `(N, 4, 4)` | Teleport object to given pose (requires calling `sim.update()` to apply). |
 | `body_data.pose` | `(N, 7)` | Access object pose directly (for dynamic/kinematic bodies). |
-| `body_data.lin_vel` | `(N, 3)` | Access linear velocity of object root (for dynamic/kinematic bodies). |
-| `body_data.ang_vel` | `(N, 3)` | Access angular velocity of object root (for dynamic/kinematic bodies). |
+| `body_data.lin_vel` | `(N, 3)` | Access linear velocity of object root (for dynamic bodies). |
+| `body_data.ang_vel` | `(N, 3)` | Access angular velocity of object root (for dynamic bodies). |
 | `body_data.vel` | `(N, 6)` | Concatenated linear and angular velocities. |
+| `body_data.lin_acc` | `(N, 3)` | Access linear acceleration of object root (for dynamic bodies). |
+| `body_data.ang_acc` | `(N, 3)` | Access angular acceleration of object root (for dynamic bodies). |
+| `body_data.acc` | `(N, 6)` | Concatenated linear and angular accelerations. |
 | `body_data.com_pose` | `(N, 7)` | Get center of mass pose of rigid bodies. |
 | `body_data.default_com_pose` | `(N, 7)` | Default center of mass pose. |
 | `body_state` | `(N, 13)` | Get full body state: [x, y, z, qw, qx, qy, qz, lin_x, lin_y, lin_z, ang_x, ang_y, ang_z]. |
diff --git a/docs/source/overview/sim/solvers/srs_solver.md b/docs/source/overview/sim/solvers/srs_solver.md
index 3cabb57e..2b26ee6d 100644
--- a/docs/source/overview/sim/solvers/srs_solver.md
+++ b/docs/source/overview/sim/solvers/srs_solver.md
@@ -51,7 +51,7 @@ cfg = SRSSolverCfg(
     end_link_name="left_ee",
     root_link_name="left_arm_base",
     dh_params=arm_params.dh_params,
-    qpos_limits=arm_params.qpos_limits,
+    user_qpos_limit=arm_params.qpos_limits,
     T_e_oe=arm_params.T_e_oe,
     T_b_ob=arm_params.T_b_ob,
     link_lengths=arm_params.link_lengths,
diff --git a/docs/source/quick_start/docs.md b/docs/source/quick_start/docs.md
index c62a3d71..12d8cb3d 100644
--- a/docs/source/quick_start/docs.md
+++ b/docs/source/quick_start/docs.md
@@ -10,9 +10,49 @@ pip install -r docs/requirements.txt
 
 ## 2. Build the HTML site
 
+### Local development (current version only)
+
 ```bash
 cd docs
-make html
+make current-docs
 ```
 
 Then you can preview the documentation in your browser at `docs/build/html/index.html`.
+
+### Multi-version docs (CI/production)
+
+The production docs site hosts multiple versions side by side. Each version is built independently into its own subdirectory under `docs/build/html/`:
+
+```
+docs/build/html/
+├── index.html           # Redirect → latest stable
+├── versions.json        # Version manifest for the sidebar selector
+├── main/                # Dev docs (latest main branch)
+├── v0.1.3/              # Release docs
+└── v0.1.2/              # Release docs
+```
+
+To build a specific version into this layout:
+
+```bash
+cd docs
+sphinx-build source build/html/<version>
+```
+
+For example, to build the `main` branch docs:
+
+```bash
+sphinx-build source build/html/main
+```
+
+Then generate the version manifest and root redirect:
+
+```bash
+python3 scripts/generate_versions_json.py --build-dir build/html
+```
+
+This generates both `versions.json` (for the sidebar version selector) and `index.html` (redirects to the latest stable version, falling back to `main`).
+
+> Old release versions beyond `DOCS_MAX_VERSIONS` (default: 5 in CI) are automatically pruned during CI builds.
+>
+> CI merges missing version directories from the live GitHub Pages site before each build so a `main` push cannot wipe docs built for release tags. See `docs/scripts/merge_published_site.py` and `tests/docs/test_merge_published_site.py`.
diff --git a/docs/source/quick_start/install.md b/docs/source/quick_start/install.md
index 4c655f2e..ae408f83 100644
--- a/docs/source/quick_start/install.md
+++ b/docs/source/quick_start/install.md
@@ -2,81 +2,135 @@
 
 ## System Requirements
 
-The following minimum system requirements are recommended to run EmbodiChain reliably. These are the tested configurations during development — other Linux distributions and versions may work but are not officially supported.
+| Component | Requirement |
+|-----------|------------|
+| **OS** | Linux (x86_64): Ubuntu 20.04+ |
+| **GPU** | NVIDIA with compute capability 7.0+ |
+| **NVIDIA Driver** | 535 - 570 (580+ is untested and may be unstable) |
+| **Python** | 3.10 or 3.11 |
 
-- Operating System: 
-    - Linux (x86_64): Ubuntu 20.04+
-
-- NVIDIA GPU and drivers:
-    - Hardware: NVIDIA GPU with compute capability 7.0 or higher
-    - NVIDIA Driver: 535 or higher (recommended 570)
+> [!NOTE]
+> Ensure your NVIDIA driver is compatible with your chosen PyTorch wheel. We recommend installing PyTorch from the [official PyTorch instructions](https://pytorch.org/get-started/locally/) for your CUDA version.
 
+## Installation
 
-- Python:
-    - 3.10
-    - 3.11
+### Docker (Recommended)
 
-Notes:
+We strongly recommend using our pre-configured Docker environment, which contains all necessary dependencies including CUDA, Vulkan, and GPU rendering support.
 
-- Ensure your NVIDIA driver is compatible with your chosen PyTorch wheel.
-- We recommend installing PyTorch from the official PyTorch instructions for your CUDA version: https://pytorch.org/get-started/locally/
+**1. Pull the image:**
 
----
+```bash
+docker pull dexforce/embodichain:ubuntu22.04-cuda12.8
+```
 
-### Recommended: Install with Docker 
+**2. Start a container:**
 
-We strongly recommend using our pre-configured Docker environment, which contains all necessary dependencies.
+Use the provided run script ([`docker/docker_run.sh`](../../../docker/docker_run.sh)), which handles GPU driver and Vulkan mounting:
 
 ```bash
-docker pull dexforce/embodichain:ubuntu22.04-cuda12.8
+./docker/docker_run.sh <container_name> <data_path>
 ```
 
-After pulling the Docker image, you can run a container with the provided [scripts](../../../docker/docker_run.sh).
+### uv (Recommended for local development)
+
+> [!TIP]
+> [uv](https://github.com/astral-sh/uv) is an extremely fast Python package manager and project manager. We recommend using `uv` for local development due to its significantly faster dependency resolution and installation times compared to pip.
+
+**Install uv:**
 
 ```bash
-./docker_run.sh [container_name] [data_path]
+curl -LsSf https://astral.sh/uv/install.sh | sh
 ```
 
----
+**Install from PyPI:**
+
+```bash
+uv pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+```
 
+**Install from source (editable mode):**
 
-### Install EmbodiChain
+```bash
+git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+uv pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+```
 
-> **We strongly recommend using a virtual environment to avoid dependency conflicts.**
+### pip (PyPI)
 
-To install EmbodiChain from pypi, run:
+> [!TIP]
+> We strongly recommend using a virtual environment to avoid dependency conflicts.
 
 ```bash
 pip install embodichain --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+```
+
+### From Source
+
+> [!TIP]
+> We strongly recommend using a virtual environment to avoid dependency conflicts.
+
+```bash
+git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+```
+
+### Generative Simulation Dependencies
 
-# Or install with the lerobot extras:
-pip install embodichain[lerobot] --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+If you want to use the generative simulation features, install EmbodiChain with the `gensim` extra. This installs the additional rendering and asset-processing dependencies, including `pyrender` and `bpy`. The `bpy` wheel is distributed from Blender's package index, so the Blender index must be included in the install command.
+
+**Install from PyPI with `uv`:**
+
+```bash
+uv pip install "embodichain[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
 ```
 
-To install the Embodichain from source, clone the EmbodiChain repository:
+**Install from source with `uv`:**
+
 ```bash
 git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+uv pip install -e ".[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
 ```
 
-Install the project in development mode:
+**Install from PyPI with `pip`:**
 
 ```bash
-pip install -e . --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+pip install "embodichain[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
+```
+
+**Install from source with `pip`:**
 
-# Or install with the lerobot extras:
-pip install -e .[lerobot] --extra-index-url http://pyp.open3dv.site:2345/simple/ --trusted-host pyp.open3dv.site
+```bash
+git clone https://github.com/DexForce/EmbodiChain.git
+cd EmbodiChain
+pip install -e ".[gensim]" \
+    --extra-index-url http://pyp.open3dv.site:2345/simple/ \
+    --trusted-host pyp.open3dv.site \
+    --extra-index-url https://download.blender.org/pypi/
 ```
 
-> [!NOTE]
-> * [LeRobot](https://huggingface.co/docs/lerobot/installation) is an optional module for EmbodiChain that provides data saving and loading functionalities for robot learning tasks. Installing with the `lerobot` extras will include this module and its dependencies.
+## Verify Installation
 
-### Verify Installation
-To verify that EmbodiChain is installed correctly, run a simple demo script to create a simulation scene:
+Run the demo script to confirm everything is set up correctly:
 
 ```bash
 python scripts/tutorials/sim/create_scene.py
+```
 
-# Or run in headless mode.
+If the installation is successful, you will see a simulation window with a rendered scene. To run without a display:
+
+```bash
 python scripts/tutorials/sim/create_scene.py --headless
 ```
----
diff --git a/docs/source/resources/publications/README.md b/docs/source/resources/publications/README.md
new file mode 100644
index 00000000..b2b50848
--- /dev/null
+++ b/docs/source/resources/publications/README.md
@@ -0,0 +1,79 @@
+# Academic Publications
+
+[![DOI](https://img.shields.io/badge/DOI-available-success?style=for-the-badge)](#)
+[![Year](https://img.shields.io/badge/year-2025--2026-blue?style=for-the-badge)](#)
+---
+
+This page contains bibliographic information for academic papers related to EmbodiChain. Papers are ordered by year (newest first).
+
+## Publications
+
+### 2026
+
+#### From Reaction to Anticipation: Proactive Failure Recovery through Agentic Task Graph for Robotic Manipulation
+
+**Authors:** Sheng Xu, Ruixing Jin, Huayi Zhou, Bo Yue, Guanren Qiao, Yueci Deng, Yunxin Tai, Kui Jia, Guiliang Liu
+
+**Venue:** Robotics: Science and Systems (RSS), 2026
+
+```bibtex
+@inproceedings{xu2026agentchord,
+  title = {From Reaction to Anticipation: Proactive Failure Recovery through Agentic Task Graph for Robotic Manipulation},
+  author = {Xu, Sheng and Jin, Ruixing and Zhou, Huayi and Yue, Bo and Qiao, Guanren and Deng, Yueci and Tai, Yunxin and Jia, Kui and Liu, Guiliang},
+  booktitle = {Robotics: Science and Systems (RSS)},
+  year = {2026}
+}
+```
+
+---
+
+#### Sim2Real VLA: Zero-Shot Generalization of Synthesized Skills to Realistic Manipulation
+
+**Authors:** Runyi Zhao, Sheng Xu, Ruixing Jin, Yueci Deng, Yunxin Tai, Kui Jia, Guiliang Liu
+
+**Venue:** The Fourteenth International Conference on Learning Representations (ICLR), 2026
+
+```bibtex
+@inproceedings{zhao2026sim2real,
+  title={Sim2real vla: Zero-shot generalization of synthesized skills to realistic manipulation},
+  author={Zhao, Runyi and Xu, Sheng and Jin, Ruixing and Deng, Yueci and Tai, Yunxin and Jia, Kui and Liu, Guiliang},
+  booktitle={The Fourteenth International Conference on Learning Representations},
+  year={2026}
+}
+```
+
+---
+
+### 2025
+
+#### DexScale: Automating Data Scaling for Sim2Real Generalizable Robot Control
+
+**Authors:** Guiliang Liu, Yueci Deng, Runyi Zhao, Huayi Zhou, Jian Chen, Jietao Chen, Ruiyan Xu, Yunxin Tai, Kui Jia
+
+**Venue:** Forty-Second International Conference on Machine Learning (ICML), 2025
+
+```bibtex
+@inproceedings{liu2025dexscale,
+  title={DexScale: automating data scaling for sim2real generalizable robot control},
+  author={Liu, Guiliang and Deng, Yueci and Zhao, Runyi and Zhou, Huayi and Chen, Jian and Chen, Jietao and Xu, Ruiyan and Tai, Yunxin and Jia, Kui},
+  booktitle={Forty-second international conference on machine learning},
+  year={2025}
+}
+```
+
+---
+
+## Adding a New Paper
+
+To add a new publication:
+
+1. Add a new section under the appropriate year heading
+2. Include the paper title, authors, venue, and BibTeX entry
+3. Keep entries ordered by year (newest first)
+
+## Core Framework Citations
+
+The following citations are kept in the main [README.md](https://github.com/DexForce/EmbodiChain) as they are considered core framework references:
+
+- **EmbodiChain** - The framework itself
+- **GS-World** - The underlying generative simulation paradigm
diff --git a/docs/source/resources/roadmap.md b/docs/source/resources/roadmap.md
index 899a9c56..c4870a9e 100644
--- a/docs/source/resources/roadmap.md
+++ b/docs/source/resources/roadmap.md
@@ -1,36 +1,92 @@
 # Roadmap
 
-Currently, EmbodiChain is under active development. Our roadmap includes the following planned features and enhancements:
-
-- Simulation:
-    - Rendering:
-        - Improve ray-tracing backend performance and fix some konwn issues.
-        - Add a high performance Hybrid rendering backend for better visual quality and speed trade-off.
-        - Support a more efficient real-time denoiser.
-        - Add a new rasterization backend for basic rendering tasks.
-    - Physics:
-        - Improve GPU physics throughput.
-        - We are working on research and development of next-generation physics backend, supporting high-accuracy simulation, differentiable dynamics, and neural physical models for end-to-end AI integration.
-    - Sensors:
-        - Add more physical sensors (eg, force sensor) with examples.
-    - Motion Generation:
-        - Add more advanced motion generation methods with examples.
-    - Useful Tools:
-        - We are working on USD support for EmbodiChain to enable better asset management and interoperability.
-    - Robots Integration:
-        - Add support for more robot models (eg: LeRobot, Unitree H1/G1, etc).
-
-- Data Pipeline Coming Soon:
-    - We will release a Real2Sim pipeline, which enables automatic data generation and scaling from real-world seeding priors.
-    - We will release an agentic skill generation framework for automated expert trajectory generation.
-    - Add assets and scenes generator and the integration with data pipeline.
-
-- Models & Training Infrastructure Coming Soon:
-    - We will release a modular VLA framework for fast prototyping and training of embodied agents.
-    - Add online data streaming pipeline for model training.
-
-- Embodied Tasks Coming Soon:
-    - Add more benchmark tasks for EmbodiChain.
-    - Add more tasks with reinforcement learning support.
-    - Add a set of manipulation tasks for demonstration of data generation pipeline.
-    
\ No newline at end of file
+EmbodiChain is in alpha and under active development. This roadmap summarizes
+the main areas we are improving and the capabilities planned for upcoming
+releases.
+
+The roadmap is organized by product area so new work can be added without
+changing the whole page. Each item should be short, user-facing, and grouped
+under the area it improves.
+
+## Status Legend
+
+| Marker | Status | Meaning |
+| --- | --- | --- |
+| 🚧 | In progress | Work is actively being designed, implemented, or validated. |
+| 📌 | Planned | Work is on the project roadmap but not yet released. |
+| 🔬 | Research | Work is exploratory and may change as the technical approach matures. |
+
+## Simulation
+
+### Rendering
+
+| Status | Planned capability |
+| --- | --- |
+| 🚧 | Support a more efficient real-time denoiser. |
+| 🔬 | Add 3DGS support for rendering and data generation. |
+
+### Physics
+
+| Status | Planned capability |
+| --- | --- |
+| 🔬 | Develop a next-generation physics backend with high-accuracy simulation, differentiable dynamics, and neural physical models for end-to-end AI integration. |
+
+### Sensors
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Add more physical sensor models, such as force sensors, with runnable examples. |
+
+### Motion Generation
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Add more advanced motion generation methods with examples. |
+
+### Robot Integration
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Add support for more robot models, including LeRobot and Unitree H1/G1. |
+
+## Data Pipeline
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Release a Real2Sim pipeline for automatic data generation and scaling from real-world seeding priors. |
+| 📌 | Release an agentic skill generation framework for automated expert trajectory generation. |
+| 📌 | Release a sim-ready asset and scene-layout generation framework for fast environment prototyping. |
+
+## Models and Training Infrastructure
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Release a modular VLA framework for fast prototyping and training of embodied agents. |
+
+## Embodied Tasks
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Add more benchmark tasks for EmbodiChain. |
+| 📌 | Add more tasks with reinforcement learning support. |
+| 📌 | Add manipulation tasks that demonstrate the data generation pipeline. |
+
+## Extending This Roadmap
+
+When adding roadmap items:
+
+- Add the item under the closest existing area before creating a new section.
+- Use one row per user-facing capability.
+- Keep status markers limited to the status legend above unless the legend is
+  updated at the same time.
+- Prefer concrete outcomes over implementation details.
+
+New sections should follow this template:
+
+```md
+## Area Name
+
+| Status | Planned capability |
+| --- | --- |
+| 📌 | Describe the capability and the user-facing outcome. |
+```
diff --git a/docs/source/resources/task/index.rst b/docs/source/resources/task/index.rst
index 998f6614..1c65e7e1 100644
--- a/docs/source/resources/task/index.rst
+++ b/docs/source/resources/task/index.rst
@@ -6,6 +6,5 @@ Supported Tasks
 .. toctree::
    :maxdepth: 1
 
-    Push Cube
     Pour Water
 
diff --git a/docs/source/tutorial/atomic_actions.rst b/docs/source/tutorial/atomic_actions.rst
new file mode 100644
index 00000000..10b8e97c
--- /dev/null
+++ b/docs/source/tutorial/atomic_actions.rst
@@ -0,0 +1,170 @@
+.. _tutorial_atomic_actions:
+
+Atomic Actions
+==============
+
+EmbodiChain's **atomic action** layer provides a high-level, composable interface for common
+manipulation primitives such as *move*, *pick up*, and *place*.  Each action encapsulates the
+full planning pipeline — grasp-pose estimation, IK, trajectory generation, and gripper
+interpolation — behind a single ``execute()`` call, making it straightforward to chain
+multiple actions together into complex robot behaviours.
+
+Key Features
+------------
+
+- **Semantic-aware execution** — actions accept either a raw pose tensor or an
+  ``ObjectSemantics`` descriptor that bundles affordance data (grasp poses, interaction
+  points) with the simulation entity.
+- **Three built-in primitives** — ``MoveAction``, ``PickUpAction``, and ``PlaceAction``
+  cover the most common tabletop manipulation workflows out of the box.
+  See the :ref:`supported_atomic_actions` table for configs and target types.
+- **Extensible registry** — custom actions can be registered globally with
+  ``register_action`` and discovered by the engine at runtime.
+- **Engine orchestration** — ``AtomicActionEngine`` sequences multiple actions,
+  threads ``start_qpos`` from one action to the next, and returns a single concatenated
+  trajectory ready to replay in the simulator.
+
+For the full design overview, architecture diagram, and extension guide see
+:doc:`/overview/sim/atomic_actions`.
+
+The Code
+--------
+
+The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/sim``
+directory.
+
+.. dropdown:: Code for atomic_actions.py
+    :icon: code
+
+    .. literalinclude:: ../../../scripts/tutorials/sim/atomic_actions.py
+        :language: python
+        :linenos:
+
+Typical Usage
+-------------
+
+Setting up the engine
+~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+   import torch
+   from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg
+   from embodichain.lab.sim.atomic_actions import (
+       AtomicActionEngine,
+       PickUpActionCfg,
+       PlaceActionCfg,
+       MoveActionCfg,
+   )
+
+   motion_gen = MotionGenerator(cfg=MotionGenCfg(...))
+
+   hand_open  = torch.tensor([0.00,  0.00],  dtype=torch.float32, device=device)
+   hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=device)
+
+   pickup_cfg = PickUpActionCfg(
+       hand_open_qpos=hand_open,
+       hand_close_qpos=hand_close,
+       control_part="arm",
+       hand_control_part="hand",
+       approach_direction=torch.tensor([0.0, 0.0, -1.0], dtype=torch.float32, device=device),
+       pre_grasp_distance=0.15,
+       lift_height=0.15,
+   )
+   place_cfg = PlaceActionCfg(
+       hand_open_qpos=hand_open,
+       hand_close_qpos=hand_close,
+       control_part="arm",
+       hand_control_part="hand",
+       lift_height=0.15,
+   )
+   move_cfg = MoveActionCfg(control_part="arm")
+
+   engine = AtomicActionEngine(
+       motion_generator=motion_gen,
+       actions_cfg_list=[pickup_cfg, place_cfg, move_cfg],
+   )
+
+Defining object semantics
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+   from embodichain.lab.sim.atomic_actions import (
+       ObjectSemantics,
+       AntipodalAffordance,
+   )
+   from embodichain.toolkits.graspkit.pg_grasp import GraspGeneratorCfg, AntipodalSamplerCfg
+   from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import GripperCollisionCfg
+
+   affordance = AntipodalAffordance(
+       object_label="mug",
+       force_reannotate=False,
+       custom_config={
+           "gripper_collision_cfg": GripperCollisionCfg(
+               max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012
+           ),
+           "generator_cfg": GraspGeneratorCfg(
+               antipodal_sampler_cfg=AntipodalSamplerCfg(
+                   n_sample=20000, max_length=0.088, min_length=0.003
+               )
+           ),
+       },
+   )
+
+   semantics = ObjectSemantics(
+       label="mug",
+       geometry={
+           "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0],
+           "mesh_triangles": mug.get_triangles(env_ids=[0])[0],
+       },
+       affordance=affordance,
+       entity=mug,   # required so the action can query the live object pose
+   )
+
+Executing a pick-place-move sequence
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+   place_xpos = ...  # torch.Tensor [4, 4] — target placement pose
+   rest_xpos  = ...  # torch.Tensor [4, 4] — resting pose after placing
+
+   is_success, trajectory = engine.execute_static(
+       target_list=[semantics, place_xpos, rest_xpos]
+   )
+   # trajectory: [n_envs, n_waypoints, robot_dof]
+
+   for i in range(trajectory.shape[1]):
+       robot.set_qpos(trajectory[:, i])
+       sim.update(step=4)
+
+Registering custom actions
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+   from embodichain.lab.sim.atomic_actions import AtomicAction, ActionCfg, register_action
+
+   class PushAction(AtomicAction):
+       def execute(self, target, start_qpos=None, **kwargs):
+           # ... your planning logic ...
+           return is_success, trajectory, joint_ids
+
+       def validate(self, target, start_qpos=None, **kwargs):
+           return True   # quick feasibility check
+
+   register_action("push", PushAction)
+
+Notes & Best Practices
+----------------------
+
+- ``PickUpAction`` expects an ``AntipodalAffordance`` with valid mesh data
+  (``mesh_vertices`` / ``mesh_triangles``) so the grasp generator can annotate the object.
+  Set ``force_reannotate=False`` (the default) to reuse cached annotations across episodes.
+- ``ObjectSemantics.entity`` must be set when using semantic targets so the action can read
+  the object's current world pose at planning time.
+- For static (non-physics) playback, iterate over ``trajectory[:, i]`` and call
+  ``robot.set_qpos`` directly; for physics-enabled playback, feed waypoints through your
+  controller or gym wrapper instead.
+- To add a new action type, see :doc:`/overview/sim/atomic_actions`.
diff --git a/docs/source/tutorial/basic_env.rst b/docs/source/tutorial/basic_env.rst
index 6de0c48b..443fbe97 100644
--- a/docs/source/tutorial/basic_env.rst
+++ b/docs/source/tutorial/basic_env.rst
@@ -182,3 +182,14 @@ This tutorial showcases several important features of EmbodiChain environments:
 4. **Custom Objects**: Adding and manipulating scene objects
 5. **Flexible Actions**: Customizable action spaces and execution methods
 6. **Extensible Observations**: Adding task-specific observation data
+
+.. tip::
+   **Using an AI coding agent?** Once you're ready to create your own task environment, use the **/add-task-env** skill to scaffold the file with the correct structure, ``@register_env`` decorator, base class methods, and test stub. Use **/add-test** to write tests and **/pre-commit-check** to verify everything passes CI before committing.
+
+Next Steps
+~~~~~~~~~~
+
+- :doc:`modular_env` — Build advanced config-driven environments with ``EmbodiedEnv``
+- :doc:`rl` — Train RL agents with PPO or GRPO
+- :doc:`/overview/gym/env` — Full environment architecture and manager reference
+- :doc:`/guides/custom_functors` — Write custom observation, reward, and event functors
diff --git a/docs/source/tutorial/create_scene.rst b/docs/source/tutorial/create_scene.rst
index 244bd932..da13d5ec 100644
--- a/docs/source/tutorial/create_scene.rst
+++ b/docs/source/tutorial/create_scene.rst
@@ -89,3 +89,12 @@ You can also pass arguments to customize the simulation. For example, to run in
    python scripts/tutorials/sim/create_scene.py --headless --num_envs <n> --device <cuda/cpu>
 
 Now that we have a basic understanding of how to create a scene, let's move on to more advanced topics.
+
+Next Steps
+~~~~~~~~~~
+
+- :doc:`create_softbody` — Add deformable bodies to your scene
+- :doc:`robot` — Load and control a robot
+- :doc:`sensor` — Add cameras and capture sensor data
+- :doc:`basic_env` — Create your first Gymnasium environment
+- :doc:`/overview/sim/sim_manager` — Full SimulationManager API reference
diff --git a/docs/source/tutorial/data_generation.rst b/docs/source/tutorial/data_generation.rst
new file mode 100644
index 00000000..ca994f3d
--- /dev/null
+++ b/docs/source/tutorial/data_generation.rst
@@ -0,0 +1,189 @@
+.. _tutorial_data_generation:
+
+Data Generation
+===============
+
+.. currentmodule:: embodichain.lab.gym
+
+This tutorial shows how to generate synthetic expert demonstration datasets using EmbodiChain's built-in environment rollout and dataset manager. You will learn how to configure LeRobot recording in ``gym_config.json``, how ``run_env.py`` builds an environment from configuration files, and how completed episodes are automatically saved to disk.
+
+Overview
+~~~~~~~~
+
+EmbodiChain provides a built-in data generation workflow for imitation-learning and manipulation tasks:
+
+- **Gym Configuration**: Describes the scene, robot, sensors, randomization events, observations, dataset recorder, and rollout settings.
+- **Action Configuration**: Describes the task-specific expert action graph for tasks that use the action bank.
+- **Environment Rollout**: Builds the environment directly from configuration files and executes offline generation.
+- **Expert Policy**: Each task provides ``create_demo_action_list()`` or another scripted policy entry to generate expert actions.
+- **Dataset Manager**: Records observation-action pairs during ``env.step()``.
+- **LeRobotRecorder**: Converts completed episodes into LeRobot-compatible datasets, with optional video export.
+
+What This Tutorial Records
+--------------------------
+
+This page documents the full path from task configuration to saved dataset:
+
+1. Prepare a task ``gym_config.json``.
+2. Prepare an ``action_config.json`` if the task uses the action bank.
+3. Launch the environment rollout with ``run-env``.
+4. Let the dataset manager automatically save completed episodes.
+
+Example Task
+------------
+
+As a concrete example, this tutorial uses a real action-bank task shipped in the repository:
+
+- ``configs/gym/pour_water/gym_config.json`` defines the simulation scene and dataset recording behavior.
+- ``configs/gym/pour_water/action_config.json`` defines the action-bank graph used to solve the task.
+
+The Code
+~~~~~~~~
+
+The tutorial corresponds to the ``run_env.py`` script in ``embodichain/lab/scripts``.
+
+.. dropdown:: Code for run_env.py
+   :icon: code
+
+   .. literalinclude:: ../../../embodichain/lab/scripts/run_env.py
+      :language: python
+      :linenos:
+
+
+The Code Explained
+~~~~~~~~~~~~~~~~~~
+
+The rollout script builds the environment from configuration, generates expert trajectories, executes them step by step, and relies on the dataset manager to auto-save valid episodes.
+
+Step 1: Prepare the Task Configuration
+--------------------------------------
+
+The first input to the pipeline is the task ``gym_config.json``. In the example below, the same file contains rollout settings, scene randomization, observations, dataset recording, and robot or sensor definitions.
+
+The rollout settings include the episode count:
+
+.. literalinclude:: ../../../configs/gym/pour_water/gym_config.json
+   :language: json
+   :lines: 2-4
+
+The dataset-related part looks like this:
+
+.. literalinclude:: ../../../configs/gym/pour_water/gym_config.json
+   :language: json
+   :lines: 261-281
+
+Important parameters are:
+
+- **max_episodes**: Number of rollout episodes generated by ``run_env.py``.
+- **max_episode_steps**: Maximum number of environment steps per episode.
+- **dataset.lerobot.params.robot_meta**: Robot metadata such as robot type and control frequency.
+- **dataset.lerobot.params.instruction**: Task language instruction stored together with the dataset.
+- **dataset.lerobot.params.extra**: Additional metadata such as scene type and task description.
+- **dataset.lerobot.params.use_videos**: Whether camera observations should be stored as videos.
+- **env.control_parts**: Controlled robot parts in the environment.
+
+
+In the current implementation, ``LeRobotRecorder`` stores robot state and action features such as ``observation.qpos``, ``observation.qvel``, ``observation.qf``, ``action``, and camera images when sensors are present.
+
+Step 2: Prepare the Action Configuration
+----------------------------------------
+
+For tasks that use the action bank, the second input is ``action_config.json``. This file defines the expert action graph consumed by ``create_demo_action_list()``. In the example below, the file is organized around ``scope``, ``node``, ``edge``, and ``sync``.
+
+.. dropdown:: Action bank structure in the example task Pour_Water
+   :icon: code
+
+   **Scope Configuration**
+
+   .. literalinclude:: ../../../configs/gym/pour_water/action_config.json
+      :language: json
+      :lines: 2-57
+
+   **Node Configuration**
+
+   .. literalinclude:: ../../../configs/gym/pour_water/action_config.json
+      :language: json
+      :lines: 96-177
+
+   **Edge Configuration**
+
+   .. literalinclude:: ../../../configs/gym/pour_water/action_config.json
+      :language: json
+      :lines: 763-790
+
+   **Synchronization**
+
+   .. literalinclude:: ../../../configs/gym/pour_water/action_config.json
+      :language: json
+      :lines: 906-932
+
+This structure defines the expert rollout as follows:
+
+- **Scope**: Defines controllable sub-graphs such as ``right_arm``, ``left_arm``, ``right_eef``, and ``left_eef``.
+- **Node**: Defines key poses, targets computed from object affordances, and IK-generated joint targets.
+- **Edge**: Defines executable transitions between nodes, including duration and execution function.
+- **Sync**: Defines execution order rules between independently configured sub-actions.
+
+Note: Action bank is not the only way to generate demonstrations. Depending on the task design, trajectories can also be produced by other scripted generation methods.
+
+Step 3: Launch the Environment Rollout
+--------------------------------------
+
+The rollout script parses command-line arguments, loads ``gym_config.json`` and ``action_config.json``, converts them into environment configuration objects, creates the environment instance, and then runs offline rollout for ``max_episodes`` episodes:
+
+.. literalinclude:: ../../../embodichain/lab/scripts/run_env.py
+   :language: python
+   :start-at: def cli():
+   :end-at:     main(args, env, gym_config)
+
+Each rollout internally calls ``create_demo_action_list()``, validates the returned sequence, executes actions with ``env.step(action)``, and discards invalid rollouts by resetting with ``save_data=False``.
+
+The recommended CLI entrypoint is:
+
+.. code-block:: bash
+
+   python -m embodichain run-env \
+       --gym_config configs/gym/pour_water/gym_config.json \
+       --action_config configs/gym/pour_water/action_config.json \
+       --headless
+
+For interactive inspection, you can use preview mode: replace ``--headless`` with ``--preview``.
+When ``--preview`` is enabled, the script opens the environment in an interactive debugging mode. This mode is for inspection and does not save datasets.
+
+
+Useful CLI arguments:
+
+- **--gym_config**: Path to the task JSON configuration.
+- **--action_config**: Path to the action-bank configuration.
+- **--num_envs**: Number of environments to run in parallel.
+- **--device**: Simulation device, such as ``cpu`` or ``cuda``.
+- **--headless**: Run without GUI for faster generation.
+- **--enable_rt**: Enable ray tracing for higher-quality visual observations.
+- **--preview**: Launch the environment in interactive preview mode.
+- **--filter_dataset_saving**: Disable dataset saving for debugging.
+
+For the complete CLI argument list, see :doc:`CLI Reference </guides/cli>`.
+
+Outputs
+~~~~~~~
+
+After successful execution, completed episodes are saved under the configured dataset root. A LeRobot dataset typically contains:
+
+If no explicit save path is provided and ``EMBODICHAIN_DATASET_ROOT`` is not set, ``LeRobotRecorder`` uses ``~/.cache/embodichain_datasets`` as the default dataset root.
+
+- **data/**: Recorded action and state data.
+- **videos/**: Camera observations saved as videos when ``use_videos=True``.
+- **meta/**: Dataset metadata such as task information and robot description.
+
+Dataset folders are automatically numbered, which makes it easy to run repeated generations without overwriting previous results.
+
+In a practical workflow, the output of this stage is the synthesized dataset itself. Later training scripts typically consume these saved LeRobot episodes instead of regenerating trajectories each time.
+
+Best Practices
+~~~~~~~~~~~~~~
+
+- **Keep the config pair together**: Version ``gym_config.json`` and ``action_config.json`` together for action-bank tasks.
+- **Use valid scripted policies**: Make sure ``create_demo_action_list()`` returns executable trajectories for the current scene.
+- **Use ``--headless`` for throughput**: Disable the GUI when generating large datasets.
+- **Use ``--preview`` and ``--filter_dataset_saving`` for debugging**: Inspect task logic without writing datasets.
+- **Discard invalid rollouts**: Keep the default validation logic so failed trajectories are not saved.
diff --git a/docs/source/tutorial/gizmo.rst b/docs/source/tutorial/gizmo.rst
index b0d39b2c..6f2a5b7a 100644
--- a/docs/source/tutorial/gizmo.rst
+++ b/docs/source/tutorial/gizmo.rst
@@ -213,7 +213,7 @@ Command-line options:
 - ``--device cpu|cuda``: Choose simulation device
 - ``--num_envs N``: Number of parallel environments
 - ``--headless``: Run without GUI for automated testing
-- ``--enable_rt``: Enable ray tracing for better visuals
+- ``--renderer``: Enable ray tracing for better visuals
 
 Once running:
 
diff --git a/docs/source/tutorial/index.rst b/docs/source/tutorial/index.rst
index ef6efe79..33b95a8b 100644
--- a/docs/source/tutorial/index.rst
+++ b/docs/source/tutorial/index.rst
@@ -1,6 +1,36 @@
 Tutorials
 =========
 
+These tutorials walk you through EmbodiChain step by step, from creating your first simulation scene to training RL agents. Each tutorial includes a complete runnable script and a line-by-line explanation.
+
+Suggested Learning Path
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Follow the tutorials in this order for the best learning experience:
+
+**Phase 1: Simulation Basics**
+
+1. :doc:`create_scene` — Set up a simulation, add objects, and run the render loop. **Start here.**
+2. :doc:`create_softbody` and :doc:`create_cloth` — Add deformable bodies to your scenes.
+3. :doc:`rigid_object_group` — Manage collections of rigid objects efficiently.
+4. :doc:`robot` — Load and control a robot in simulation.
+5. :doc:`sensor` — Add cameras and capture RGB/depth/segmentation data.
+6. :doc:`solver` — Configure IK solvers for end-effector control.
+7. :doc:`motion_gen` — Generate smooth trajectories with motion planners.
+8. :doc:`atomic_actions` — Use built-in action primitives (pick, place, move).
+9. :doc:`gizmo` — Interactively control robots with on-screen gizmos.
+
+**Phase 2: Environments**
+
+10. :doc:`basic_env` — Create a simple Gymnasium environment with ``BaseEnv``. Prerequisite: Phase 1 basics.
+11. :doc:`modular_env` — Build a config-driven environment with ``EmbodiedEnv``, managers, and randomization. Prerequisite: :doc:`basic_env`.
+12. :doc:`data_generation` — Generate expert demonstration datasets for imitation learning. Prerequisite: :doc:`modular_env`.
+13. :doc:`rl` — Train RL agents with PPO or GRPO. Prerequisite: :doc:`basic_env`.
+
+**Phase 3: Extending the Framework**
+
+14. :doc:`add_robot` — Add a new robot model to EmbodiChain.
+
 .. toctree::
    :maxdepth: 1
    :hidden:
@@ -14,8 +44,9 @@ Tutorials
    solver
    sensor
    motion_gen
+   atomic_actions
    gizmo
    basic_env
    modular_env
+   data_generation
    rl
-   
diff --git a/docs/source/tutorial/modular_env.rst b/docs/source/tutorial/modular_env.rst
index 356a7ac4..eef801c3 100644
--- a/docs/source/tutorial/modular_env.rst
+++ b/docs/source/tutorial/modular_env.rst
@@ -64,7 +64,7 @@ The ``randomize_table_mat`` event varies visual appearance:
 - **Mode**: ``"interval"`` - triggers every 10 steps
 - **Features**: Random textures from COCO dataset and base color variations
 
-for more randomization events, please refer
+For more randomization events, please refer to :doc:`/overview/gym/event_functors`.
 
 Observation Configuration
 -------------------------
@@ -235,3 +235,9 @@ This tutorial showcases the most advanced features of EmbodiChain environments:
 
 
 This tutorial demonstrates the full power of EmbodiChain's modular environment system, providing the foundation for creating sophisticated robotic learning scenarios.
+
+.. tip::
+   **Using an AI coding agent?** These skills can help you build on this tutorial:
+
+   - **/add-task-env** — Scaffold a new task environment with the correct file structure, ``@register_env`` decorator, base class methods, ``__init__.py`` update, and test stub.
+   - **/add-functor** — Add observation, reward, event, or randomization functors with the correct signature and module placement.
\ No newline at end of file
diff --git a/docs/source/tutorial/rl.rst b/docs/source/tutorial/rl.rst
index 28054648..db1c7ab1 100644
--- a/docs/source/tutorial/rl.rst
+++ b/docs/source/tutorial/rl.rst
@@ -420,3 +420,11 @@ Best Practices
 
 - **Checkpoints**: Regular checkpoints are saved to ``outputs/<exp_name>/checkpoints/``. Use these to resume training or evaluate policies.
 
+See Also
+--------
+
+- :doc:`/overview/rl/index` — RL module architecture and component reference
+- :doc:`/overview/gym/env` — EmbodiedEnv configuration and Action Manager
+- :doc:`basic_env` — Creating basic Gymnasium environments
+- :doc:`modular_env` — Advanced modular environments with managers
+- :doc:`/resources/task/index` — List of available RL task environments
diff --git a/docs/source/tutorial/robot.rst b/docs/source/tutorial/robot.rst
index 8312ad27..c3a54ab5 100644
--- a/docs/source/tutorial/robot.rst
+++ b/docs/source/tutorial/robot.rst
@@ -116,7 +116,7 @@ You can customize the simulation with various command-line options:
    python scripts/tutorials/sim/create_robot.py --headless
    
    # Enable ray tracing rendering
-   python scripts/tutorials/sim/create_robot.py --enable_rt
+   python scripts/tutorials/sim/create_robot.py --renderer
 
 The simulation will show the robot moving through different poses, demonstrating basic joint control capabilities.
 
diff --git a/docs/source/tutorial/sensor.rst b/docs/source/tutorial/sensor.rst
index 1d5c4dc9..9119d1ea 100644
--- a/docs/source/tutorial/sensor.rst
+++ b/docs/source/tutorial/sensor.rst
@@ -89,7 +89,7 @@ You can customize the simulation with the following command-line options:
    python scripts/tutorials/sim/create_sensor.py --headless
 
    # Enable ray tracing rendering
-   python scripts/tutorials/sim/create_sensor.py --enable_rt
+   python scripts/tutorials/sim/create_sensor.py --renderer
 
    # Attach the camera to the robot end-effector
    python scripts/tutorials/sim/create_sensor.py --attach_sensor
diff --git a/docs/source/tutorial/solver.rst b/docs/source/tutorial/solver.rst
index b3c95807..61300096 100644
--- a/docs/source/tutorial/solver.rst
+++ b/docs/source/tutorial/solver.rst
@@ -95,7 +95,7 @@ API Reference
            """Compute the Jacobian matrix for the given joint positions."""
 
 - **set_ik_nearst_weight**: Set weights for IK nearest neighbor search.
-- **set_position_limits / get_position_limits**: Set or get joint position limits.
+- **set_qpos_limits / get_qpos_limits**: Set or get joint position limits.
 - **set_tcp / get_tcp**: Set or get the tool center point (TCP) transformation.
 
 Configuration
diff --git a/docs/sync_readme.py b/docs/sync_readme.py
index a3198b6e..67620ef2 100644
--- a/docs/sync_readme.py
+++ b/docs/sync_readme.py
@@ -3,6 +3,7 @@
 
 Idempotent copy. Exit code 0 on success.
 """
+
 import shutil
 from pathlib import Path
 import sys
diff --git a/embodichain/agents/datasets/online_data.py b/embodichain/agents/datasets/online_data.py
index ac359020..2f2a117f 100644
--- a/embodichain/agents/datasets/online_data.py
+++ b/embodichain/agents/datasets/online_data.py
@@ -24,7 +24,6 @@
 from embodichain.agents.engine.data import OnlineDataEngine
 from embodichain.agents.datasets.sampler import ChunkSizeSampler
 
-
 __all__ = [
     "OnlineDataset",
 ]
diff --git a/embodichain/agents/datasets/sampler.py b/embodichain/agents/datasets/sampler.py
index 464af009..70385484 100644
--- a/embodichain/agents/datasets/sampler.py
+++ b/embodichain/agents/datasets/sampler.py
@@ -20,7 +20,6 @@
 from abc import ABC, abstractmethod
 from typing import Callable, Iterator, List, Optional, Union
 
-
 __all__ = [
     "ChunkSizeSampler",
     "UniformChunkSampler",
diff --git a/embodichain/agents/engine/data.py b/embodichain/agents/engine/data.py
index f25987ab..c11fb966 100644
--- a/embodichain/agents/engine/data.py
+++ b/embodichain/agents/engine/data.py
@@ -25,6 +25,7 @@
 from tensordict import TensorDict
 from tqdm import tqdm
 
+from embodichain.lab.sim.cfg import RenderCfg
 from embodichain.utils.logger import log_info, log_error
 from embodichain.utils import configclass
 
@@ -112,7 +113,7 @@ def _sim_worker_fn(
     env_cfg.sim_cfg = SimulationManagerCfg(
         headless=gym_config.get("headless", True),
         sim_device=gym_config.get("device", "cpu"),
-        enable_rt=gym_config.get("enable_rt", True),
+        render_cfg=RenderCfg(renderer=gym_config.get("renderer", "hybrid")),
         gpu_id=gym_config.get("gpu_id", 0),
     )
 
diff --git a/embodichain/agents/rl/models/mlp.py b/embodichain/agents/rl/models/mlp.py
index f788dfed..459e08e3 100644
--- a/embodichain/agents/rl/models/mlp.py
+++ b/embodichain/agents/rl/models/mlp.py
@@ -22,7 +22,6 @@
 import torch
 import torch.nn as nn
 
-
 ActivationName = Union[str, None]
 
 
diff --git a/embodichain/agents/rl/train.py b/embodichain/agents/rl/train.py
index fa1f5948..0c74843a 100644
--- a/embodichain/agents/rl/train.py
+++ b/embodichain/agents/rl/train.py
@@ -37,6 +37,7 @@
 from embodichain.utils.utility import load_json
 from embodichain.utils.module_utils import find_function_from_modules
 from embodichain.lab.sim import SimulationManagerCfg
+from embodichain.lab.sim.cfg import RenderCfg
 from embodichain.lab.gym.envs.managers.cfg import EventCfg
 
 
@@ -113,7 +114,7 @@ def train_from_config(config_path: str, distributed: bool | None = None):
     save_freq = int(trainer_cfg.get("save_freq", 50000))
     num_eval_episodes = int(trainer_cfg.get("num_eval_episodes", 5))
     headless = bool(trainer_cfg.get("headless", True))
-    enable_rt = bool(trainer_cfg.get("enable_rt", False))
+    renderer = trainer_cfg.get("renderer", "hybrid")
     gpu_id = int(trainer_cfg.get("gpu_id", 0))
     num_envs = trainer_cfg.get("num_envs", None)
     wandb_project_name = trainer_cfg.get("wandb_project_name", "embodichain-generic")
@@ -205,13 +206,12 @@ def train_from_config(config_path: str, distributed: bool | None = None):
     else:
         gym_env_cfg.sim_cfg.sim_device = torch.device("cpu")
     gym_env_cfg.sim_cfg.headless = headless
-    gym_env_cfg.sim_cfg.enable_rt = enable_rt
-    gym_env_cfg.sim_cfg.gpu_id = local_rank if distributed else gpu_id
+    gym_env_cfg.sim_cfg.render_cfg = RenderCfg(renderer=renderer)
+    gym_env_cfg.sim_cfg.gpu_id = gpu_id
 
-    if rank == 0:
-        logger.log_info(
-            f"Loaded gym_config from {gym_config_path} (env_id={gym_config_data['id']}, num_envs={gym_env_cfg.num_envs}, headless={gym_env_cfg.sim_cfg.headless}, enable_rt={gym_env_cfg.sim_cfg.enable_rt}, sim_device={gym_env_cfg.sim_cfg.sim_device})"
-        )
+    logger.log_info(
+        f"Loaded gym_config from {gym_config_path} (env_id={gym_config_data['id']}, num_envs={gym_env_cfg.num_envs}, headless={gym_env_cfg.sim_cfg.headless}, renderer={gym_env_cfg.sim_cfg.render_cfg.renderer}, sim_device={gym_env_cfg.sim_cfg.sim_device})"
+    )
 
     env = build_env(gym_config_data["id"], base_env_cfg=gym_env_cfg)
     sample_obs, _ = env.reset()
diff --git a/embodichain/agents/rl/utils/trainer.py b/embodichain/agents/rl/utils/trainer.py
index 56ea0db2..93d01acf 100644
--- a/embodichain/agents/rl/utils/trainer.py
+++ b/embodichain/agents/rl/utils/trainer.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+from typing import Any, Dict
 import time
 import numpy as np
 import torch
@@ -85,6 +86,11 @@ def __init__(
         self.start_time = time.time()
         self.ret_window = deque(maxlen=100)
         self.len_window = deque(maxlen=100)
+        self.train_history: list[dict[str, float]] = []
+        self.eval_history: list[dict[str, float]] = []
+        self.last_eval_metrics: dict[str, float] = {}
+        self.last_train_metrics: dict[str, float] = {}
+        self.latest_checkpoint_path: str | None = None
         num_envs = getattr(self.env, "num_envs", None)
         if num_envs is None:
             raise RuntimeError("Env must expose num_envs for trainer statistics.")
@@ -146,9 +152,9 @@ def _pack_log_dict(self, prefix: str, data: dict) -> dict:
                 continue
         return out
 
-    def train(self, total_timesteps: int):
+    def train(self, total_timesteps: int) -> Dict[str, Any]:
         if self.rank == 0:
-            logger.log_info(f"Start training, total steps: {total_timesteps}")
+            print(f"Start training, total steps: {total_timesteps}")
         while self.global_step < total_timesteps:
             self._collect_rollout()
             losses = self.algorithm.update(self.buffer.get(flatten=False))
@@ -161,6 +167,7 @@ def train(self, total_timesteps: int):
                 self._eval_once(num_episodes=self.num_eval_episodes)
             if self.global_step % self.save_freq == 0:
                 self.save_checkpoint()
+        return self.get_summary()
 
     @torch.no_grad()
     def _collect_rollout(self):
@@ -197,9 +204,10 @@ def on_step(tensordict: TensorDict, info: dict):
                 if log_dict and self.use_wandb:
                     wandb.log(log_dict, step=self.global_step)
 
+        rollout = self.buffer.start_rollout()
         rollout = self.collector.collect(
             num_steps=self.buffer_size,
-            rollout=self.buffer.start_rollout(),
+            rollout=rollout,
             on_step_callback=on_step,
         )
         self.buffer.add(rollout)
@@ -278,13 +286,23 @@ def _sync_episode_stats(self) -> None:
             self.len_window.extend(all_len[start:])
 
     def _log_train(self, losses: Dict[str, float]):
-        if self.rank != 0:
-            return
+        elapsed = max(1e-6, time.time() - self.start_time)
+        sps = self.global_step / elapsed
+        avgR = np.mean(self.ret_window) if len(self.ret_window) > 0 else float("nan")
+        avgL = np.mean(self.len_window) if len(self.len_window) > 0 else float("nan")
+        history_entry = {
+            "global_step": float(self.global_step),
+            "charts/SPS": float(sps),
+            "charts/episode_reward_avg_100": float(avgR),
+            "charts/episode_length_avg_100": float(avgL),
+        }
+        history_entry.update({f"train/{k}": float(v) for k, v in losses.items()})
+        self.train_history.append(history_entry)
+        self.last_train_metrics = history_entry
+
         if self.writer:
             for k, v in losses.items():
                 self.writer.add_scalar(f"train/{k}", v, self.global_step)
-            elapsed = max(1e-6, time.time() - self.start_time)
-            sps = self.global_step / elapsed
             self.writer.add_scalar("charts/SPS", sps, self.global_step)
             if len(self.ret_window) > 0:
                 self.writer.add_scalar(
@@ -298,26 +316,24 @@ def _log_train(self, losses: Dict[str, float]):
                     float(np.mean(self.len_window)),
                     self.global_step,
                 )
-        # console
-        sps = self.global_step / max(1e-6, time.time() - self.start_time)
-        avgR = np.mean(self.ret_window) if len(self.ret_window) > 0 else float("nan")
-        avgL = np.mean(self.len_window) if len(self.len_window) > 0 else float("nan")
-        print(
-            f"[train] step={self.global_step} sps={sps:.0f} avgReward(100)={avgR:.3f} avgLength(100)={avgL:.1f}"
-        )
+        # console and external logging are rank-0 only in distributed mode.
+        if self.rank == 0:
+            print(
+                f"[train] step={self.global_step} sps={sps:.0f} avgReward(100)={avgR:.3f} avgLength(100)={avgL:.1f}"
+            )
 
-        # wandb (mirror TB logs)
-        if self.use_wandb:
-            log_dict = {f"train/{k}": v for k, v in losses.items()}
-            log_dict["charts/SPS"] = sps
-            if not np.isnan(avgR):
-                log_dict["charts/episode_reward_avg_100"] = float(avgR)
-            if not np.isnan(avgL):
-                log_dict["charts/episode_length_avg_100"] = float(avgL)
-            wandb.log(log_dict, step=self.global_step)
+            # wandb (mirror TB logs)
+            if self.use_wandb:
+                log_dict = {f"train/{k}": v for k, v in losses.items()}
+                log_dict["charts/SPS"] = sps
+                if not np.isnan(avgR):
+                    log_dict["charts/episode_reward_avg_100"] = float(avgR)
+                if not np.isnan(avgL):
+                    log_dict["charts/episode_length_avg_100"] = float(avgL)
+                wandb.log(log_dict, step=self.global_step)
 
     @torch.no_grad()
-    def _eval_once(self, num_episodes: int = 5):
+    def _eval_once(self, num_episodes: int = 5) -> Dict[str, float]:
         """Run evaluation for specified number of episodes.
 
         Each episode runs all parallel environments until completion, allowing
@@ -329,8 +345,11 @@ def _eval_once(self, num_episodes: int = 5):
         self.policy.eval()
         episode_returns = []
         episode_lengths = []
+        episode_successes = []
+        metric_values: dict[str, list[float]] = {}
 
-        self.eval_env.set_rollout_buffer(self.buffer.buffer)
+        # Evaluation does not consume the training rollout buffer; binding it here can
+        # overflow the shared RL buffer when eval episodes are longer than buffer_size.
         for _ in range(num_episodes):
             # Reset and initialize episode tracking
             obs, _ = self.eval_env.reset()
@@ -372,6 +391,17 @@ def _eval_once(self, num_episodes: int = 5):
                 still_running = ~done_mask
                 cumulative_reward[still_running] += reward[still_running].float()
                 step_count[still_running] += 1
+                newly_done = done & (~done_mask)
+                if newly_done.any():
+                    if isinstance(info, dict) and "success" in info:
+                        successes = info["success"][newly_done].detach().cpu().tolist()
+                        episode_successes.extend([float(v) for v in successes])
+                    if isinstance(info, dict) and "metrics" in info:
+                        for key, value in info["metrics"].items():
+                            values = value[newly_done].detach().cpu().tolist()
+                            metric_values.setdefault(key, []).extend(
+                                [float(v) for v in values]
+                            )
                 done_mask |= done
 
                 # Trigger evaluation events (e.g., video recording)
@@ -404,11 +434,44 @@ def _eval_once(self, num_episodes: int = 5):
             self.writer.add_scalar(
                 "eval/avg_length", float(np.mean(episode_lengths)), self.global_step
             )
+            if episode_successes:
+                self.writer.add_scalar(
+                    "eval/success_rate",
+                    float(np.mean(episode_successes)),
+                    self.global_step,
+                )
 
-    def save_checkpoint(self):
-        if self.rank != 0:
-            return
+        summary = {
+            "global_step": float(self.global_step),
+            "eval/avg_reward": (
+                float(np.mean(episode_returns)) if episode_returns else float("nan")
+            ),
+            "eval/avg_length": (
+                float(np.mean(episode_lengths)) if episode_lengths else float("nan")
+            ),
+            "eval/success_rate": (
+                float(np.mean(episode_successes)) if episode_successes else float("nan")
+            ),
+        }
+        for key, values in metric_values.items():
+            if values:
+                summary[f"eval/metrics/{key}"] = float(np.mean(values))
+        self.eval_history.append(summary)
+        self.last_eval_metrics = summary
+        if self.rank == 0 and self.use_wandb:
+            log_dict = {
+                key: value
+                for key, value in summary.items()
+                if key != "global_step" and not np.isnan(value)
+            }
+            if log_dict:
+                wandb.log(log_dict, step=self.global_step)
+        return summary
+
+    def save_checkpoint(self) -> str | None:
         # minimal model-only checkpoint; trainer/algorithm states can be added
+        if self.rank != 0:
+            return None
         path = f"{self.checkpoint_dir}/{self.exp_name}_step_{self.global_step}.pt"
         policy_state = (
             self.policy.module.state_dict()
@@ -422,4 +485,19 @@ def save_checkpoint(self):
             },
             path,
         )
+        self.latest_checkpoint_path = path
         print(f"Checkpoint saved: {path}")
+        return path
+
+    def get_summary(self) -> Dict[str, Any]:
+        elapsed = max(1e-6, time.time() - self.start_time)
+        return {
+            "global_step": int(self.global_step),
+            "elapsed_time_sec": float(elapsed),
+            "training_fps": float(self.global_step / elapsed),
+            "last_train_metrics": dict(self.last_train_metrics),
+            "last_eval_metrics": dict(self.last_eval_metrics),
+            "train_history": list(self.train_history),
+            "eval_history": list(self.eval_history),
+            "latest_checkpoint_path": self.latest_checkpoint_path,
+        }
diff --git a/embodichain/data/assets/eef_assets.py b/embodichain/data/assets/eef_assets.py
index b2644712..75918c7e 100644
--- a/embodichain/data/assets/eef_assets.py
+++ b/embodichain/data/assets/eef_assets.py
@@ -23,7 +23,6 @@
     EMBODICHAIN_DEFAULT_DATA_ROOT,
 )
 
-
 eef_assets = "eef_assets"
 
 
diff --git a/embodichain/data/assets/materials.py b/embodichain/data/assets/materials.py
index 8243cb8a..22183147 100644
--- a/embodichain/data/assets/materials.py
+++ b/embodichain/data/assets/materials.py
@@ -27,7 +27,6 @@
     EMBODICHAIN_DEFAULT_DATA_ROOT,
 )
 
-
 material_assets = "materials"
 
 
@@ -101,6 +100,46 @@ def get_material_list(self) -> List[str]:
         ]
 
 
+class EnvMapHDR(EmbodiChainDataset):
+    def __init__(self, data_root: str = None):
+        data_descriptor = o3d.data.DataDescriptor(
+            os.path.join(EMBODICHAIN_DOWNLOAD_PREFIX, material_assets, "EnvMapHDR.zip"),
+            "ea7abc8e955fe64069073d63834da60e",
+        )
+        prefix = type(self).__name__
+        path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root
+
+        super().__init__(prefix, data_descriptor, path)
+
+    def get_env_map_path(self, name: str) -> str:
+        """Get the path of an HDR environment map.
+
+        Args:
+            name (str): The name of the HDR environment map.
+
+        Returns:
+            str: The path to the HDR environment map file.
+        """
+        env_map_names = self.get_env_map_list()
+        if name not in env_map_names:
+            logger.log_error(
+                f"Invalid env map name: {name}. Available names are: {env_map_names}"
+            )
+        return str(Path(self.extract_dir) / "EnvMapHDR" / name)
+
+    def get_env_map_list(self) -> List[str]:
+        """Get the names of all HDR environment maps.
+
+        Returns:
+            List[str]: The names of all HDR environment map files.
+        """
+        return [
+            f.name
+            for f in Path(self.extract_dir).glob("EnvMapHDR/*.hdr")
+            if f.is_file()
+        ]
+
+
 class CocoBackground(EmbodiChainDataset):
     def __init__(self, data_root: str = None):
         data_descriptor = o3d.data.DataDescriptor(
diff --git a/embodichain/data/assets/obj_assets.py b/embodichain/data/assets/obj_assets.py
index e81fd252..89f28d0d 100644
--- a/embodichain/data/assets/obj_assets.py
+++ b/embodichain/data/assets/obj_assets.py
@@ -23,7 +23,6 @@
     EMBODICHAIN_DEFAULT_DATA_ROOT,
 )
 
-
 obj_assets = "obj_assets"
 
 
diff --git a/embodichain/data/assets/robot_assets.py b/embodichain/data/assets/robot_assets.py
index 55cd17a7..f37cfd3a 100644
--- a/embodichain/data/assets/robot_assets.py
+++ b/embodichain/data/assets/robot_assets.py
@@ -23,7 +23,6 @@
     EMBODICHAIN_DEFAULT_DATA_ROOT,
 )
 
-
 robot_assets = "robot_assets"
 
 
@@ -54,9 +53,9 @@ class CobotMagicArm(EmbodiChainDataset):
     def __init__(self, data_root: str = None):
         data_descriptor = o3d.data.DataDescriptor(
             os.path.join(
-                EMBODICHAIN_DOWNLOAD_PREFIX, robot_assets, "CobotMagicArmV2.zip"
+                EMBODICHAIN_DOWNLOAD_PREFIX, robot_assets, "CobotMagicArmV3.zip"
             ),
-            "14af3e84b74193680899a59fc74e8337",
+            "12a249e231bfc2faf0fd55f9e2646b8d",
         )
         prefix = type(self).__name__
         path = EMBODICHAIN_DEFAULT_DATA_ROOT if data_root is None else data_root
diff --git a/embodichain/data/assets/scene_assets.py b/embodichain/data/assets/scene_assets.py
index 5b7b90bb..751dc01a 100644
--- a/embodichain/data/assets/scene_assets.py
+++ b/embodichain/data/assets/scene_assets.py
@@ -23,7 +23,6 @@
     EMBODICHAIN_DEFAULT_DATA_ROOT,
 )
 
-
 scene_assets = "scene_assets"
 
 
diff --git a/embodichain/gen_sim/__init__.py b/embodichain/gen_sim/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/__init__.py b/embodichain/gen_sim/simready_pipeline/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/cli/__init__.py b/embodichain/gen_sim/simready_pipeline/cli/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/cli/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/cli/start.py b/embodichain/gen_sim/simready_pipeline/cli/start.py
new file mode 100644
index 00000000..ee0372d0
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/cli/start.py
@@ -0,0 +1,85 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import argparse
+from pathlib import Path
+import os
+
+os.environ["PYOPENGL_PLATFORM"] = "egl"
+
+from embodichain.gen_sim.simready_pipeline.pipeline.ingest import ingest_one_asset
+from embodichain.gen_sim.simready_pipeline.io.json_store import JsonStore
+from embodichain.gen_sim.simready_pipeline.parser.base import ParserManager
+
+
+def cli_ingest_single(
+    input_dir: str, output_dir: str, category: str, simple_ingest: bool
+):
+    input_path = Path(input_dir)
+    output_path = Path(output_dir)
+
+    if not input_path.exists():
+        raise FileNotFoundError(f"Input directory not found: {input_path}")
+
+    output_path.mkdir(parents=True, exist_ok=True)
+    store = JsonStore(output_path)
+    manager = ParserManager()
+
+    print(f"Processing Single Asset: {input_path.name} (Category: {category})")
+
+    asset = ingest_one_asset(
+        asset_dir=input_path,
+        category=category,
+        output_root=output_path,
+        store=store,
+        manager=manager,
+        simple_ingest=simple_ingest,
+    )
+
+    if asset:
+        print(f"Successfully Processed")
+    else:
+        print("no asset returned (might be direct_copy mode)")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="embodichain.gen_sim.simready_pipeline Asset Ingestion Pipeline"
+    )
+
+    parser.add_argument(
+        "--input_dir", type=str, help="Path to the single asset directory"
+    )
+    parser.add_argument("--output_root", type=str, help="Path to the output directory")
+    parser.add_argument(
+        "--category",
+        type=str,
+        required=True,
+        help="Specify the category for this asset (e.g., 'cup', 'chair')",
+    )
+    parser.add_argument(
+        "--simple", action="store_true", help="trimesh only, skip Blender"
+    )
+
+    args = parser.parse_args()
+
+    cli_ingest_single(
+        args.input_dir, args.output_root, args.category, simple_ingest=args.simple
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/embodichain/gen_sim/simready_pipeline/configs/__init__.py b/embodichain/gen_sim/simready_pipeline/configs/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/configs/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/configs/gen_config.json b/embodichain/gen_sim/simready_pipeline/configs/gen_config.json
new file mode 100644
index 00000000..5a2bf634
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/configs/gen_config.json
@@ -0,0 +1,70 @@
+{
+  "ingest": {
+    "canonical_asset_name": "asset.obj",
+    "unprocessed_formats": [".urdf", ".usd"],
+    "parseable_mesh_formats": [".glb", ".gltf", ".obj", ".ply", ".stl"]
+  },
+  "mesh_processing": {
+    "trimesh_ingest": {
+      "scene_mesh_strategy": "first",
+      "mtl_name": "asset.mtl",
+      "visual": {
+        "default_face_color": [128, 128, 128, 255],
+        "pbr_base_color_only": true
+      },
+      "export": {
+        "include_normals": true,
+        "include_color": true,
+        "include_texture": true,
+        "write_texture": false
+      }
+    },
+    "blender_remesh_bake": {
+      "remesh": {
+        "voxel_size": 0.01,
+        "min_voxel_size_ratio": 0.005,
+        "use_smooth_shade": true
+      },
+      "decimate": {
+        "ratio": 0.9
+      },
+      "uv": {
+        "angle_limit": 66.0,
+        "island_margin": 0.02
+      },
+      "bake": {
+        "texture_size": 2048,
+        "diffuse_texture_name": "diffuse.png",
+        "normal_texture_name": "normal.png",
+        "cage_extrusion_ratio": 0.05
+      },
+      "material": {
+        "name": "BakeMat"
+      }
+    },
+    "blender_cleanup_decimate": {
+      "enabled": true,
+      "cleanup": {
+        "merge_dist": 0.00001,
+        "remove_non_manifold": true,
+        "triangulate": false
+      },
+      "simplify": {
+        "ratio": 0.5,
+        "weld_distance": 0.0001,
+        "collapse_triangulate": true
+      }
+    },
+    "simready_finalize": {
+      "render_resolution": 1024
+    }
+  },
+  "llm": {
+    "openai_compatible": {
+      "api_key": "",
+      "model": "gpt-4o",
+      "base_url": "",
+      "default_query": {}
+    }
+  }
+}
diff --git a/embodichain/gen_sim/simready_pipeline/core/__init__.py b/embodichain/gen_sim/simready_pipeline/core/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/core/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/core/asset.py b/embodichain/gen_sim/simready_pipeline/core/asset.py
new file mode 100644
index 00000000..020f696f
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/core/asset.py
@@ -0,0 +1,88 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from datetime import datetime
+
+
+@dataclass
+class Asset:
+
+    asset_id: str
+
+    identity: Dict[str, Any] = field(default_factory=dict)
+    asset_data: Dict[str, Any] = field(default_factory=dict)
+
+    parsed: Dict[str, Any] = field(default_factory=dict)  # Visual, Geometry, Topology
+    semantics: Dict[str, Any] = field(default_factory=dict)
+    physics: Dict[str, Any] = field(default_factory=dict)
+    simulation: Dict[str, Any] = field(default_factory=dict)
+    affordance: Dict[str, Any] = field(default_factory=dict)
+    usd: Dict[str, Any] = field(default_factory=dict)
+
+    provenance: Dict[str, Any] = field(default_factory=dict)
+    quality: Dict[str, Any] = field(default_factory=dict)
+    status: Dict[str, Any] = field(default_factory=dict)
+    internal: Dict[str, Any] = field(default_factory=dict)
+
+    ingest_info: Dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        self._init_simulation_defaults()
+        self.touch()
+
+    def _init_simulation_defaults(self) -> None:
+        self.simulation.setdefault("articulation", None)
+        self.simulation.setdefault("sim_ready", {})
+
+    def touch(self) -> None:
+        self.status["last_updated"] = datetime.now().isoformat()
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "asset_id": self.asset_id,
+            "identity": self.identity,
+            "asset_data": self.asset_data,
+            "parsed": self.parsed,
+            "quality": self.quality,
+            "semantics": self.semantics,
+            "physics": self.physics,
+            "simulation": self.simulation,
+            "usd": self.usd,
+            "provenance": self.provenance,
+            "status": self.status,
+            "internal": self.internal,
+            "affordance": self.affordance,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Asset":
+        return cls(
+            asset_id=data["asset_id"],
+            identity=data.get("identity", {}),
+            asset_data=data.get("asset_data", []),
+            parsed=data.get("parsed", {}),
+            quality=data.get("quality", {}),
+            semantics=data.get("semantics", {}),
+            physics=data.get("physics", {}),
+            simulation=data.get("simulation", {}),
+            usd=data.get("usd", {}),
+            provenance=data.get("provenance", {}),
+            status=data.get("status", {}),
+            internal=data.get("internal", {}),
+            affordance=data.get("affordance", {}),
+        )
diff --git a/embodichain/gen_sim/simready_pipeline/io/__init__.py b/embodichain/gen_sim/simready_pipeline/io/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/io/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/io/json_store.py b/embodichain/gen_sim/simready_pipeline/io/json_store.py
new file mode 100644
index 00000000..65fee676
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/io/json_store.py
@@ -0,0 +1,80 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import json
+from pathlib import Path
+from typing import Any, Optional
+
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+
+
+class JsonStore:
+    """
+    Simple JSON-based store for Assets and a global registry.
+    """
+
+    def __init__(self, root_dir: str | Path):
+        self.root = Path(root_dir)
+        self.registry_path = self.root / "registry.json"
+
+    def _get_asset_json_path(self, asset_id: str) -> Path:
+        return self.root / asset_id / "asset.json"
+
+    def load_registry(self) -> dict[str, Any]:
+        if not self.registry_path.exists():
+            return {"assets": {}}
+
+        registry = json.loads(self.registry_path.read_text())
+        registry.setdefault("assets", {})
+        return registry
+
+    def _write_registry(self, registry: dict[str, Any]) -> None:
+        self.registry_path.parent.mkdir(parents=True, exist_ok=True)
+        self.registry_path.write_text(json.dumps(registry, indent=2))
+
+    def _register_asset(self, asset_id: str, asset_json: dict[str, Any]) -> None:
+        registry = self.load_registry()
+        registry["assets"][asset_id] = {
+            "path": str(self.root / asset_id),
+            "category": asset_json.get("identity", {}).get("category"),
+        }
+        self._write_registry(registry)
+
+    def save_asset(self, asset: Asset) -> None:
+        asset_path = self._get_asset_json_path(asset.asset_id)
+        asset_path.parent.mkdir(parents=True, exist_ok=True)
+        asset_json = asset.to_dict()
+        asset_path.write_text(json.dumps(asset_json, indent=2))
+        self._register_asset(asset.asset_id, asset_json)
+
+    def load_asset(self, asset_id: str) -> Optional[Asset]:
+        asset_path = self._get_asset_json_path(asset_id)
+        if not asset_path.exists():
+            return None
+        data = json.loads(asset_path.read_text())
+        return Asset.from_dict(data)
+
+    def write_asset(self, asset_id: str, asset_json: dict[str, Any]) -> None:
+        asset_root = self.root / asset_id
+        asset_root.mkdir(parents=True, exist_ok=True)
+
+        asset_path = asset_root / "asset.json"
+        asset_path.write_text(json.dumps(asset_json, indent=2))
+        self._register_asset(asset_id, asset_json)
+
+    def list_asset_ids(self) -> list[str]:
+        registry = self.load_registry()
+        return list(registry.get("assets", {}).keys())
diff --git a/embodichain/gen_sim/simready_pipeline/parser/__init__.py b/embodichain/gen_sim/simready_pipeline/parser/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/parser/base.py b/embodichain/gen_sim/simready_pipeline/parser/base.py
new file mode 100644
index 00000000..9583bf7d
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/base.py
@@ -0,0 +1,97 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from typing import Dict, List, Optional
+from abc import ABC, abstractmethod
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from pathlib import Path
+
+
+class AssetParser(ABC):
+    """
+    Parser = capability, no orchestration logic.
+    """
+
+    name: str
+
+    @abstractmethod
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        """
+        Mutate asset in-place.
+        Must be idempotent.
+        """
+        raise NotImplementedError
+
+
+from embodichain.gen_sim.simready_pipeline.parser.inspector import AssetInspector
+from embodichain.gen_sim.simready_pipeline.parser.geometry import GeometryParser
+from embodichain.gen_sim.simready_pipeline.parser.physics import PhysicsParser
+from embodichain.gen_sim.simready_pipeline.parser.usd import UsdParser
+from embodichain.gen_sim.simready_pipeline.parser.internal import InternalParser
+
+
+class ParserManager:
+    """
+    Central parser dispatcher & pipeline owner.
+    """
+
+    DEFAULT_PIPELINE: List[str] = [
+        "inspector",
+        "geometry",
+        "physics",
+        "usd",
+        "internal",
+    ]
+
+    def __init__(self):
+        self._parsers: Dict[str, object] = {}
+
+        self._register(
+            AssetInspector(),
+            GeometryParser(),
+            PhysicsParser(),
+            UsdParser(),
+            InternalParser(),
+        )
+
+    def _register(self, *parsers):
+        for p in parsers:
+            if not getattr(p, "name", None):
+                raise ValueError(f"Parser missing name: {p}")
+            if p.name in self._parsers:
+                raise ValueError(f"Duplicate parser: {p.name}")
+            self._parsers[p.name] = p
+
+    def parse(
+        self,
+        asset: Asset,
+        asset_root: Path,
+        pipeline: Optional[List[str]] = None,
+    ) -> None:
+        pipeline = pipeline or self.DEFAULT_PIPELINE
+
+        for name in pipeline:
+            self._run(name, asset, asset_root)
+        asset.status["parsed"] = True
+
+    def parse_one(self, name: str, asset: Asset, asset_root: Path) -> None:
+        self._run(name, asset, asset_root)
+
+    def _run(self, name: str, asset: Asset, asset_root: Path):
+        parser = self._parsers.get(name)
+        if not parser:
+            raise KeyError(f"Parser not registered: {name}")
+        parser.parse(asset, asset_root)
diff --git a/embodichain/gen_sim/simready_pipeline/parser/geometry.py b/embodichain/gen_sim/simready_pipeline/parser/geometry.py
new file mode 100644
index 00000000..98fa4117
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/geometry.py
@@ -0,0 +1,151 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import trimesh
+from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.utils.geometry_utils import process_obj
+
+
+def _load_geometry_cleanup_config() -> dict:
+    config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json"
+    with config_path.open("r", encoding="utf-8") as f:
+        cfg = json.load(f)
+    return cfg.get("mesh_processing", {}).get(
+        "blender_cleanup_decimate", cfg.get("geometry_cleanup", {})
+    )
+
+
+GEOMETRY_CLEANUP_CONFIG = _load_geometry_cleanup_config()
+
+
+class GeometryParser(AssetParser):
+    name = "geometry"
+
+    def __init__(self):
+        super().__init__()
+
+    def _topology_stats(self, mesh: trimesh.Trimesh) -> dict[str, Any]:
+        stats: dict[str, Any] = {
+            "is_empty": bool(mesh.is_empty),
+            "is_watertight": bool(mesh.is_watertight),
+            "is_winding_consistent": bool(mesh.is_winding_consistent),
+            "is_volume": bool(mesh.is_volume),
+            "euler_number": None,
+            "body_count": int(mesh.body_count) if hasattr(mesh, "body_count") else None,
+            "face_component_count": None,
+            "broken_face_count": None,
+            "boundary_edge_count": None,
+            "manifold_edge_count": None,
+            "nonmanifold_edge_count": None,
+            "edge_incidence_hist": None,
+        }
+
+        if mesh.is_empty:
+            return stats
+
+        try:
+            tmp = mesh.copy(include_visual=False)
+            tmp.remove_unreferenced_vertices()
+            stats["euler_number"] = int(tmp.euler_number)
+        except Exception:
+            try:
+                stats["euler_number"] = int(mesh.euler_number)
+            except Exception:
+                stats["euler_number"] = None
+
+        stats["face_component_count"] = None
+
+        try:
+            broken = trimesh.repair.broken_faces(mesh)
+            stats["broken_face_count"] = int(len(broken))
+        except Exception:
+            stats["broken_face_count"] = None
+
+        try:
+            edges = mesh.edges_unique
+            if len(edges) > 0:
+                counts = np.bincount(mesh.edges_unique_inverse)
+                stats["boundary_edge_count"] = int(np.sum(counts == 1))
+                stats["manifold_edge_count"] = int(np.sum(counts == 2))
+                stats["nonmanifold_edge_count"] = int(np.sum(counts > 2))
+        except Exception:
+            pass
+
+        return stats
+
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        asset.parsed.setdefault("geometry", {})
+
+        if asset.asset_data.get("type") != "mesh":
+            asset.parsed["geometry"] = {"asset dont have a mesh": "skipped"}
+            return
+
+        mesh_path = asset_root / asset.asset_data.get("path")
+        if GEOMETRY_CLEANUP_CONFIG.get("enabled", True):
+            cleanup_config = GEOMETRY_CLEANUP_CONFIG.get("cleanup", {})
+            simplify_config = GEOMETRY_CLEANUP_CONFIG.get("simplify", {})
+            process_obj(
+                input_path=str(mesh_path),
+                output_path=str(mesh_path),
+                ratio=simplify_config.get(
+                    "ratio", GEOMETRY_CLEANUP_CONFIG.get("ratio", 0.5)
+                ),
+                weld_distance=simplify_config.get(
+                    "weld_distance",
+                    GEOMETRY_CLEANUP_CONFIG.get("weld_distance", 0.0001),
+                ),
+                merge_dist=cleanup_config.get(
+                    "merge_dist", GEOMETRY_CLEANUP_CONFIG.get("merge_dist", 1e-5)
+                ),
+                remove_non_manifold=cleanup_config.get(
+                    "remove_non_manifold",
+                    GEOMETRY_CLEANUP_CONFIG.get("remove_non_manifold", True),
+                ),
+                triangulate=cleanup_config.get(
+                    "triangulate",
+                    GEOMETRY_CLEANUP_CONFIG.get("triangulate", False),
+                ),
+                collapse_triangulate=simplify_config.get("collapse_triangulate", True),
+            )
+
+        try:
+
+            mesh = trimesh.load(
+                mesh_path, force="mesh", skip_materials=True, process=False
+            )
+
+            geom_info = {
+                "vertices": int(len(mesh.vertices)),
+                "faces": int(len(mesh.faces)),
+                "bounds": mesh.bounds.tolist() if mesh.bounds is not None else None,
+                "extents": mesh.extents.tolist() if mesh.extents is not None else None,
+                "area": float(mesh.area),
+            }
+
+            geom_info.update(self._topology_stats(mesh))
+            asset.parsed["geometry"] = geom_info
+
+        except Exception as e:
+            print(f"[GEOMETRY PARSER FAILED] {mesh_path}: {str(e)}")
+            asset.parsed["geometry"] = {"error": str(e)}
diff --git a/embodichain/gen_sim/simready_pipeline/parser/inspector.py b/embodichain/gen_sim/simready_pipeline/parser/inspector.py
new file mode 100644
index 00000000..65e113d9
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/inspector.py
@@ -0,0 +1,91 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from pathlib import Path
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser
+
+
+class AssetInspector(AssetParser):
+    name = "inspector"
+
+    def _find_first_file(self, root: Path, suffixes: tuple[str, ...]) -> Path | None:
+        candidates: list[Path] = []
+        for suffix in suffixes:
+            candidates.extend(sorted(root.rglob(f"*{suffix}")))
+        return candidates[0] if candidates else None
+
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        asset_source_dir = asset_root / "asset_source"
+
+        asset.asset_data.clear()
+        asset.simulation.setdefault("articulation", {})
+
+        if not asset_source_dir.exists():
+            print(f"Warning: asset_source not found: {asset_source_dir}")
+            return
+
+        asset_id = asset.asset_id
+        canonical_mesh = asset_source_dir / "asset.obj"
+
+        urdf_file = self._find_first_file(asset_source_dir, (".urdf",))
+        if urdf_file is not None:
+            asset.simulation["articulation"] = {
+                "type": "articulation",
+                "format": "urdf",
+                "file_path": str(urdf_file.relative_to(asset_root)),
+            }
+            asset.asset_data = {
+                "id": asset_id,
+                "type": "articulation",
+                "format": "urdf",
+                "path": str(urdf_file.relative_to(asset_root)),
+            }
+            return
+
+        if canonical_mesh.exists():
+            asset.asset_data = {
+                "id": asset_id,
+                "type": "mesh",
+                "format": "obj",
+                "path": str(canonical_mesh.relative_to(asset_root)),
+            }
+            return
+
+        mesh_file = self._find_first_file(
+            asset_source_dir, (".obj", ".gltf", ".glb", ".ply", ".stl")
+        )
+        if mesh_file is not None:
+            asset.asset_data = {
+                "id": asset_id,
+                "type": "mesh",
+                "format": mesh_file.suffix.lstrip(".").lower(),
+                "path": str(mesh_file.relative_to(asset_root)),
+            }
+            return
+
+        usd_file = self._find_first_file(asset_source_dir, (".usd",))
+
+        if usd_file is not None:
+            asset.asset_data = {
+                "id": asset_id,
+                "type": "scene",
+                "format": "usd",
+                "path": str(usd_file.relative_to(asset_root)),
+            }
+            return
+
+        print(f"Warning: No supported files found in {asset_source_dir}")
diff --git a/embodichain/gen_sim/simready_pipeline/parser/internal.py b/embodichain/gen_sim/simready_pipeline/parser/internal.py
new file mode 100644
index 00000000..fcd3bafd
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/internal.py
@@ -0,0 +1,126 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import numpy as np
+import trimesh
+import pyrender
+from PIL import Image
+from pathlib import Path
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser
+
+
+class InternalParser(AssetParser):
+    name = "internal"
+
+    @staticmethod
+    def _render_thumbnail(mesh: trimesh.Trimesh, output_path: Path) -> None:
+        """
+        Internal static function to handle the rendering logic.
+        Camera is on X-axis positive, looking at the mesh's bounding box center.
+        Z-axis is up.
+        """
+        bounds = mesh.bounds
+        model_center = (bounds[0] + bounds[1]) / 2.0
+        size = bounds[1] - bounds[0]
+
+        target_frustum_size = max(size[1], size[2]) * 1.5
+        yfov = np.pi / 4.0
+        img_width, img_height = 512, 512
+        camera_distance = (target_frustum_size / 2.0) / np.tan(yfov / 2.0)
+
+        eye = model_center + np.array([camera_distance, 0.0, 0.0])
+        target = model_center  # Look at the mesh center, not origin
+        up = np.array([0.0, 0.0, 1.0])  # Z-up
+
+        forward = eye - target
+        forward = forward / np.linalg.norm(forward)
+
+        right = np.cross(up, forward)
+        right = right / np.linalg.norm(right)
+
+        corrected_up = np.cross(forward, right)
+
+        camera_pose = np.eye(4)
+        camera_pose[:3, 0] = right
+        camera_pose[:3, 1] = corrected_up
+        camera_pose[:3, 2] = forward
+        camera_pose[:3, 3] = eye
+
+        scene = pyrender.Scene(bg_color=[1.0, 1.0, 1.0, 1.0])
+        pyrender_mesh = pyrender.Mesh.from_trimesh(mesh, smooth=False)
+        scene.add(pyrender_mesh)
+
+        camera = pyrender.PerspectiveCamera(
+            yfov=yfov, aspectRatio=img_width / img_height
+        )
+        scene.add(camera, pose=camera_pose)
+
+        key_light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=3.0)
+        key_pose = np.eye(4)
+        key_pose[:3, 3] = eye + np.array([0, camera_distance, camera_distance])
+        scene.add(key_light, pose=key_pose)
+
+        fill_light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=1.0)
+        fill_pose = np.eye(4)
+        fill_pose[:3, 3] = eye + np.array([0, -camera_distance, 0.5 * camera_distance])
+        scene.add(fill_light, pose=fill_pose)
+
+        renderer = pyrender.OffscreenRenderer(
+            viewport_width=img_width, viewport_height=img_height
+        )
+        color, _ = renderer.render(scene)
+        renderer.delete()
+
+        Image.fromarray(color).save(output_path)
+
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        asset.internal.setdefault("thumbnail_path", "")
+        asset.internal.setdefault("rendered", False)
+        asset.internal.setdefault("error", None)
+
+        mesh_path_ori = asset_root / asset.asset_data.get("path")
+        mesh_path_sr = asset_root / "asset_simready" / "asset_simready.obj"
+        mesh_path = None
+        if mesh_path_sr.exists():
+            mesh_path = mesh_path_sr
+        elif mesh_path_ori.exists():
+            mesh_path = mesh_path_ori
+        else:
+            asset.internal["error"] = (
+                "No mesh file found (neither simready nor original)"
+            )
+            return
+
+        try:
+
+            mesh = trimesh.load(str(mesh_path), force="mesh")
+            output_filename = f"{asset.asset_id}.png"
+            output_path = asset_root / output_filename
+            self._render_thumbnail(mesh, output_path)
+
+            asset.internal.update(
+                {
+                    "thumbnail_path": f"{asset.asset_id}/{asset.asset_id}.png",
+                    "rendered": True,
+                    "error": None,
+                }
+            )
+
+        except Exception as e:
+            asset.internal.update({"rendered": False, "error": f"Exception: {str(e)}"})
+
+        return
diff --git a/embodichain/gen_sim/simready_pipeline/parser/physics.py b/embodichain/gen_sim/simready_pipeline/parser/physics.py
new file mode 100644
index 00000000..7118cfbb
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/physics.py
@@ -0,0 +1,479 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import json
+import re
+from copy import deepcopy
+from pathlib import Path
+from typing import Dict, Any, List
+
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser
+from embodichain.gen_sim.simready_pipeline.utils.simready_utils import (
+    process_mesh,
+    delete_rendered_pngs,
+    client,
+    DEPLOYMENT,
+)
+
+DEFAULT_RIGID_PHYSICS: Dict[str, Any] = {
+    "mass": 1.0,
+    "density": 1000.0,
+    "linear_damping": 0.7,
+    "angular_damping": 0.7,
+    "enable_collision": True,
+    "enable_ccd": False,
+    "contact_offset": 0.002,
+    "rest_offset": 0.001,
+    "dynamic_friction": 0.5,
+    "static_friction": 0.5,
+    "restitution": 0.0,
+    "max_linear_velocity": 1.0e2,
+    "max_angular_velocity": 1.0e2,
+    "max_depenetration_velocity": 10.0,
+    "solver_min_position_iters": 4,
+    "solver_min_velocity_iters": 1,
+    "sleep_threshold": 0.001,
+}
+
+DEFAULT_SOFTBODY_PHYSICS: Dict[str, Any] = {
+    "triangle_remesh_resolution": 8,
+    "triangle_simplify_target": 0,
+    "maximal_edge_length": 0.0,
+    "simulation_mesh_resolution": 8,
+    "simulation_mesh_output_obj": False,
+    "mass": -1.0,
+    "density": 1000.0,
+    "youngs_modulus": 1.0e6,
+    "poissons_ratio": 0.45,
+    "material_model": "CO_ROTATIONAL",
+    "elasticity_damping": 0.0,
+    "vertex_velocity_damping": 0.005,
+    "linear_damping": 0.0,
+    "enable_ccd": False,
+    "enable_self_collision": False,
+    "self_collision_stress_tolerance": 0.9,
+    "collision_mesh_simplification": True,
+    "self_collision_filter_distance": 0.1,
+    "has_gravity": True,
+    "max_velocity": 100.0,
+    "max_depenetration_velocity": 1.0e6,
+    "sleep_threshold": 0.05,
+    "settling_threshold": 0.1,
+    "settling_damping": 10.0,
+    "solver_min_position_iters": 4,
+    "solver_min_velocity_iters": 1,
+}
+
+ALLOWED_MODES = {"rigid", "softbody", "articulation"}
+RIGID_KEYS = list(DEFAULT_RIGID_PHYSICS.keys())
+SOFT_KEYS = list(DEFAULT_SOFTBODY_PHYSICS.keys())
+
+
+def _load_simready_finalize_config() -> dict:
+    config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json"
+    with config_path.open("r", encoding="utf-8") as f:
+        cfg = json.load(f)
+    return cfg.get("mesh_processing", {}).get("simready_finalize", {})
+
+
+SIMREADY_FINALIZE_CONFIG = _load_simready_finalize_config()
+
+PHYSICS_SYSTEM_PROMPT = """You are a physics annotation model for robot training and simulation-ready asset ingestion.
+
+This task is safety-critical: a wrong physical annotation can cause severe hardware damage, unsafe robot behavior, broken simulation, and large downstream losses.
+
+You must reason from the real physical world:
+- infer the most plausible physics mode from the description
+- estimate realistic values using object material, shape, use case, and expected behavior
+- be conservative and physically plausible
+- do not hallucinate exotic values
+- do not explain your reasoning
+- do not output markdown
+- do not output any extra text outside JSON
+- do not output any keys other than the required keys
+
+CRITICAL COMPLETENESS REQUIREMENT:
+- You MUST return every required property for the chosen mode.
+- Do NOT omit any required key.
+- Do NOT return null for required keys.
+- Do NOT return empty strings for required keys.
+- Do NOT return partial objects.
+- If a field is hard to estimate, still provide your best physically plausible value.
+- Missing even one required property makes the output invalid.
+- The properties object must be fully populated and complete for the selected mode.
+
+You must return EXACTLY one JSON object with this structure:
+{
+  "mode": "rigid" | "softbody" | "articulation",
+  "confidence": 0.0-1.0,
+  "properties": {    
+    "mass": ,
+    "density": ,
+    "linear_damping": ,
+    "angular_damping": ,
+    "enable_collision": True,
+    "enable_ccd": ,
+    "contact_offset": ,
+    "rest_offset": ,
+    "dynamic_friction": ,
+    "static_friction": ,
+    "restitution": ,
+    "max_linear_velocity": ,
+    "max_angular_velocity": ,
+    "max_depenetration_velocity": ,
+    "solver_min_position_iters": 4,
+    "solver_min_velocity_iters": 1,
+    "sleep_threshold": 0.001, }
+}
+
+Important:
+- If the object is clearly deformable, cloth-like, flesh-like, cable-like, or highly elastic, choose "softbody".
+- If it is a mechanically jointed object with distinct links and joints, choose "articulation".
+- Otherwise choose "rigid".
+- Confidence must reflect how much the description supports the decision.
+- The properties object must match the selected mode exactly.
+- The properties object must include ALL required keys for the selected mode, no exceptions.
+
+For rigid mode:
+Return ONLY these keys, exactly once each:
+mass, density, linear_damping, angular_damping, enable_collision, enable_ccd,
+contact_offset, rest_offset, dynamic_friction, static_friction, restitution,
+max_linear_velocity, max_angular_velocity, max_depenetration_velocity,
+solver_min_position_iters, solver_min_velocity_iters, sleep_threshold
+
+Rigid mode completeness rules:
+- Every key listed above is mandatory.
+- No key may be missing.
+- No extra keys may appear.
+- If uncertain, choose a conservative physically plausible value for every field.
+- You must always provide a value for mass, density, damping, collision flags, contact offsets, friction, restitution, velocity limits, solver iterations, and sleep threshold.
+
+Guidance:
+- mass: estimate in kg from size/material/use case; if unknown use a conservative default near 1.0
+- density: use realistic density in kg/m^3 based on material; metals high, wood mid, foam low, plastic medium, stone high
+- linear_damping / angular_damping: higher for unstable / floating / draggy objects, lower for rigid stable objects
+- enable_collision: usually true for physical objects
+- enable_ccd: true only if fast motion or small/thin geometry would cause tunneling
+- contact_offset must be > rest_offset
+- friction: rubber/rough surfaces higher, metal/plastic smoother lower
+- restitution: bouncing materials higher, dead materials near 0
+- sleep_threshold: smaller for stable heavy objects, larger for tiny or soft objects
+
+For softbody mode:
+Return ONLY these keys, exactly once each:
+triangle_remesh_resolution, triangle_simplify_target, maximal_edge_length,
+simulation_mesh_resolution, simulation_mesh_output_obj,
+mass, density, youngs_modulus, poissons_ratio, material_model, elasticity_damping,
+vertex_velocity_damping, linear_damping,
+enable_ccd, enable_self_collision, self_collision_stress_tolerance,
+collision_mesh_simplification, self_collision_filter_distance,
+has_gravity, max_velocity, max_depenetration_velocity,
+sleep_threshold, settling_threshold, settling_damping,
+solver_min_position_iters, solver_min_velocity_iters
+
+Softbody mode completeness rules:
+- Every key listed above is mandatory.
+- No key may be missing.
+- No extra keys may appear.
+- If uncertain, choose a conservative physically plausible value for every field.
+- You must always provide a value for mesh resolution parameters, mass, density, elasticity parameters, collision parameters, gravity flags, damping terms, thresholds, and solver iterations.
+
+Guidance:
+- youngs_modulus: higher for stiffer materials; lower for cloth, flesh, foam, rubber-like objects
+- poissons_ratio: typical soft solids are around 0.3-0.49, avoid invalid values
+- material_model: choose the closest physically plausible model, default CO_ROTATIONAL if unsure
+- enable_self_collision: true for cloth, cables, highly deformable shapes that can fold onto themselves
+- collision_mesh_simplification: usually true for simulation efficiency
+- has_gravity: true unless explicitly suspended or otherwise constrained
+- max_depenetration_velocity: high enough to resolve interpenetration robustly
+
+For articulation mode:
+If you choose articulation, keep the properties object minimal and physically conservative.
+If you do not have enough evidence for articulation, prefer rigid.
+Even in articulation mode, the properties object must still be complete and valid according to the selected schema used by your pipeline.
+Do not omit any field that your downstream system expects for articulation.
+
+Output only JSON, no code fences, no explanation.
+"""
+
+
+def extract_json(text: str) -> Dict[str, Any]:
+    text = re.sub(r"```json|```", "", text).strip()
+    match = re.search(r"\{.*\}", text, re.DOTALL)
+    if not match:
+        raise ValueError("No JSON object found in response:\n" + text)
+    return json.loads(match.group())
+
+
+class PhysicsParser(AssetParser):
+    """
+    Physics inference & completion parser.
+    """
+
+    name = "physics"
+
+    def __init__(self):
+        super().__init__()
+
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        self._ensure_sections(asset)
+        self._simready_process(asset, asset_root)
+        self._infer_physics(asset)
+        self._ensure_properties(asset)
+        self._update_simulation_status(asset)
+
+    def _ensure_sections(self, asset: Asset) -> None:
+        asset.physics.setdefault("mode", None)
+        asset.physics.setdefault("properties", {})
+        asset.physics.setdefault("source", None)
+        asset.physics.setdefault("confidence", None)
+
+        asset.simulation["sim_ready"].setdefault("is_sim_ready", False)
+        asset.simulation["sim_ready"].setdefault("sim_ready_path", None)
+        asset.simulation.setdefault("blockers", [])
+
+    def _simready_process(self, asset: Asset, asset_root: Path) -> None:
+        mesh_path = asset_root / asset.asset_data.get("path")
+        out_path = asset_root / "asset_simready"
+
+        result = process_mesh(
+            mesh_path,
+            "asset",
+            extra_text=str(asset.ingest_info["extra_info"].get("simready_info", "")),
+            out_dir=out_path,
+            res=int(SIMREADY_FINALIZE_CONFIG.get("render_resolution", 1024)),
+        )
+        print(result)
+        semantics_generated = {}
+        semantics_generated["object_name_generated"] = result["semantics_result"][
+            "object_name"
+        ]
+        semantics_generated["semantic_tag_generated"] = result["semantics_result"][
+            "semantic_tag"
+        ]
+        semantics_generated["description_generated"] = result["semantics_result"][
+            "description"
+        ]
+        semantics_generated["primary_materials_generated"] = result["semantics_result"][
+            "primary_materials"
+        ]
+        asset.semantics.update(semantics_generated)
+        delete_rendered_pngs(out_path)
+        asset.simulation["sim_ready"]["is_sim_ready"] = True
+        sim_ready_path = asset_root / "asset_simready" / "asset_simready.obj"
+        rel_path = sim_ready_path.relative_to(asset_root)
+        asset.simulation["sim_ready"]["sim_ready_path"] = str(rel_path)
+        return
+
+    def _infer_physics(self, asset: Asset) -> None:
+        if asset.physics.get("mode"):
+            return
+
+        description = (
+            asset.semantics.get("description")
+            or asset.semantics.get("description_generated")
+            or ""
+        ).strip()
+
+        try:
+            result = self._call_LLM(description)
+
+            mode = result["mode"]
+            if mode not in ALLOWED_MODES:
+                raise ValueError(f"Invalid mode returned by LLM: {mode}")
+
+            properties = result.get("properties")
+            if not isinstance(properties, dict):
+                raise ValueError("LLM returned non-dict properties")
+
+            properties = self._validate_and_sanitize_properties(mode, properties)
+
+            asset.physics["mode"] = mode
+            asset.physics["properties"] = {
+                "mode": mode,
+                "data": properties,
+            }
+            asset.physics["source"] = "generative"
+            asset.physics["confidence"] = float(result.get("confidence", 0.0))
+
+        except Exception:
+            mode = self._fallback_mode(asset)
+            asset.physics["mode"] = mode
+            asset.physics["properties"] = {
+                "mode": mode,
+                "data": self._default_properties(mode),
+            }
+            asset.physics["source"] = "default"
+            asset.physics["confidence"] = 0.0
+
+    def _call_LLM(self, description: str) -> Dict[str, Any]:
+        if not description:
+            raise ValueError("Missing semantics description for physics inference")
+
+        user_prompt = f"""
+            Asset description:
+            {description}
+
+            Infer the most plausible physics mode and physical properties for this asset.
+
+            Hard constraints:
+            - Output EXACTLY one JSON object.
+            - Do not include markdown, comments, or any extra text.
+            - Do not invent fields.
+            - The returned properties object must match the selected mode exactly.
+            - Use real-world physical intuition.
+            - Prefer conservative, physically plausible values over aggressive or extreme values.
+            - If evidence for articulation is weak, prefer rigid.
+            """
+
+        resp = client.chat.completions.create(
+            model=DEPLOYMENT,
+            temperature=0.0,
+            messages=[
+                {"role": "system", "content": PHYSICS_SYSTEM_PROMPT},
+                {"role": "user", "content": user_prompt},
+            ],
+        )
+
+        content = resp.choices[0].message.content or ""
+        return extract_json(content)
+
+    def _fallback_mode(self, asset: Asset) -> str:
+        if asset.asset_data.get("type") == "articulation":
+            return "articulation"
+        return "rigid"
+
+    def _default_properties(self, mode: str) -> Dict[str, Any]:
+        if mode == "rigid":
+            return deepcopy(DEFAULT_RIGID_PHYSICS)
+        if mode == "softbody":
+            return deepcopy(DEFAULT_SOFTBODY_PHYSICS)
+        return {}
+
+    def _validate_and_sanitize_properties(
+        self, mode: str, properties: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        if mode == "rigid":
+            expected = set(RIGID_KEYS)
+            got = set(properties.keys())
+            if got != expected:
+                print(
+                    f"Rigid properties keys mismatch.\nExpected: {expected}\nGot: {got}"
+                )
+
+            out = deepcopy(DEFAULT_RIGID_PHYSICS)
+            for k in expected:
+                out[k] = properties[k]
+
+            out["contact_offset"] = float(out["contact_offset"])
+            out["rest_offset"] = float(out["rest_offset"])
+            if out["contact_offset"] <= out["rest_offset"]:
+                out["contact_offset"] = max(out["rest_offset"] + 1e-4, 1e-4)
+
+            out["mass"] = float(out["mass"])
+            out["density"] = float(out["density"])
+            out["linear_damping"] = float(out["linear_damping"])
+            out["angular_damping"] = float(out["angular_damping"])
+            out["dynamic_friction"] = float(out["dynamic_friction"])
+            out["static_friction"] = float(out["static_friction"])
+            out["restitution"] = float(out["restitution"])
+            out["max_linear_velocity"] = float(out["max_linear_velocity"])
+            out["max_angular_velocity"] = float(out["max_angular_velocity"])
+            out["max_depenetration_velocity"] = float(out["max_depenetration_velocity"])
+            out["solver_min_position_iters"] = int(out["solver_min_position_iters"])
+            out["solver_min_velocity_iters"] = int(out["solver_min_velocity_iters"])
+            out["sleep_threshold"] = float(out["sleep_threshold"])
+
+            return out
+
+        if mode == "softbody":
+            expected = set(SOFT_KEYS)
+            got = set(properties.keys())
+            if got != expected:
+                raise ValueError(
+                    f"Softbody properties keys mismatch.\nExpected: {expected}\nGot: {got}"
+                )
+
+            out = deepcopy(DEFAULT_SOFTBODY_PHYSICS)
+            for k in expected:
+                out[k] = properties[k]
+
+            out["triangle_remesh_resolution"] = int(out["triangle_remesh_resolution"])
+            out["triangle_simplify_target"] = int(out["triangle_simplify_target"])
+            out["maximal_edge_length"] = float(out["maximal_edge_length"])
+            out["simulation_mesh_resolution"] = int(out["simulation_mesh_resolution"])
+            out["simulation_mesh_output_obj"] = bool(out["simulation_mesh_output_obj"])
+
+            out["mass"] = float(out["mass"])
+            out["density"] = float(out["density"])
+            out["youngs_modulus"] = float(out["youngs_modulus"])
+            out["poissons_ratio"] = float(out["poissons_ratio"])
+            out["poissons_ratio"] = min(max(out["poissons_ratio"], 0.0), 0.49)
+            out["material_model"] = str(out["material_model"])
+            out["elasticity_damping"] = float(out["elasticity_damping"])
+            out["vertex_velocity_damping"] = float(out["vertex_velocity_damping"])
+            out["linear_damping"] = float(out["linear_damping"])
+            out["enable_ccd"] = bool(out["enable_ccd"])
+            out["enable_self_collision"] = bool(out["enable_self_collision"])
+            out["self_collision_stress_tolerance"] = float(
+                out["self_collision_stress_tolerance"]
+            )
+            out["collision_mesh_simplification"] = bool(
+                out["collision_mesh_simplification"]
+            )
+            out["self_collision_filter_distance"] = float(
+                out["self_collision_filter_distance"]
+            )
+            out["has_gravity"] = bool(out["has_gravity"])
+            out["max_velocity"] = float(out["max_velocity"])
+            out["max_depenetration_velocity"] = float(out["max_depenetration_velocity"])
+            out["sleep_threshold"] = float(out["sleep_threshold"])
+            out["settling_threshold"] = float(out["settling_threshold"])
+            out["settling_damping"] = float(out["settling_damping"])
+            out["solver_min_position_iters"] = int(out["solver_min_position_iters"])
+            out["solver_min_velocity_iters"] = int(out["solver_min_velocity_iters"])
+
+            return out
+
+        if properties and not isinstance(properties, dict):
+            raise ValueError("Articulation properties must be a dict")
+        return properties or {}
+
+    def _ensure_properties(self, asset: Asset) -> None:
+        props = asset.physics.get("properties", {})
+        if not props or not props.get("data"):
+            mode = asset.physics.get("mode")
+            asset.physics["properties"] = {
+                "mode": mode,
+                "data": self._default_properties(mode),
+            }
+            asset.physics["source"] = "default"
+
+    def _update_simulation_status(self, asset: Asset) -> None:
+        blockers: List[str] = []
+
+        if not asset.physics.get("mode"):
+            blockers.append("missing_physics_mode")
+
+        props = asset.physics.get("properties", {})
+        if not props.get("data"):
+            blockers.append("missing_physics_properties")
+
+        asset.simulation["blockers"] = blockers
+        # asset.simulation["sim_ready"] = len(blockers) == 0
diff --git a/embodichain/gen_sim/simready_pipeline/parser/usd.py b/embodichain/gen_sim/simready_pipeline/parser/usd.py
new file mode 100644
index 00000000..7c8488ba
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/parser/usd.py
@@ -0,0 +1,146 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from pathlib import Path
+from typing import Any, Dict
+
+import numpy as np
+import trimesh
+from embodichain.gen_sim.simready_pipeline.parser.base import AssetParser
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.utils.usd_utils import (
+    convert_model_to_usd,
+    DEFAULT_PHYSICS_PARAMS,
+)
+
+
+class UsdParser(AssetParser):
+
+    name = "usd"
+
+    def __init__(self):
+        super().__init__()
+        self.physics_properties = {}
+
+    def build_physics(self, asset: Asset) -> Dict[str, Any]:
+
+        if not isinstance(asset.physics, dict):
+            raise ValueError("asset.physics must be a dict")
+
+        physics_block = asset.physics
+
+        if "properties" not in physics_block:
+            raise KeyError("asset.physics missing 'properties'")
+
+        props_block = physics_block["properties"]
+
+        if not isinstance(props_block, dict):
+            raise ValueError("asset.physics['properties'] must be dict")
+
+        if "data" not in props_block:
+            raise KeyError("asset.physics['properties'] missing 'data'")
+
+        data_block = props_block["data"]
+
+        if not isinstance(data_block, dict):
+            raise ValueError("asset.physics['properties']['data'] must be dict")
+
+        # Required numeric physics keys used by USD pipeline
+        required_keys = [
+            "mass",
+            "density",
+            "static_friction",
+            "dynamic_friction",
+            "restitution",
+            "linear_damping",
+            "angular_damping",
+        ]
+
+        # Merge provided data with defaults so missing keys are filled with safe defaults
+        merged_data = DEFAULT_PHYSICS_PARAMS.copy()
+        # data_block may contain a subset of params; update defaults with provided values
+        merged_data.update({k: v for k, v in data_block.items() if v is not None})
+
+        # Report any keys that were missing and therefore filled from defaults
+        missing = [k for k in required_keys if k not in data_block]
+        if missing:
+            print(
+                f"[Warning] Missing physics keys {missing}; using DEFAULT_PHYSICS_PARAMS for those values."
+            )
+
+        # Validate numeric types for required numeric keys
+        for k in required_keys:
+            if k not in merged_data:
+                # This should not happen because DEFAULT_PHYSICS_PARAMS contains these keys
+                raise KeyError(
+                    f"Missing required physics parameter even after merging defaults: {k}"
+                )
+            if not isinstance(merged_data[k], (int, float)):
+                raise TypeError(
+                    f"Physics param '{k}' must be numeric, got {type(merged_data[k])}"
+                )
+
+        # Use merged_data going forward
+        data_block = merged_data
+
+        self.physics_properties = {
+            "mode": physics_block["mode"],
+            "source": physics_block.get("source"),
+            "confidence": physics_block.get("confidence"),
+            "properties": {
+                "mode": props_block["mode"],
+                "data": data_block,
+            },
+        }
+
+        return self.physics_properties
+
+    def parse(self, asset: Asset, asset_root: Path) -> None:
+        asset.usd.setdefault("is_usd", False)
+        asset.usd.setdefault("usd_path", "")
+        if asset.asset_data.get("type") != "mesh":
+            asset.usd.update({"asset dont have a mesh": "skipped"})
+            return
+
+        mesh_path_ori = asset_root / asset.asset_data.get("path")
+        mesh_path_sr = asset_root / "asset_simready" / "asset_simready.obj"
+        mesh_path = (
+            mesh_path_sr
+            if mesh_path_sr.exists()
+            else mesh_path_ori if mesh_path_ori.exists() else None
+        )
+        out_path = asset_root / "asset_usd"
+        self.build_physics(asset)
+        convert_model_to_usd(
+            mesh_path,
+            out_path,
+            physics_params=self.physics_properties["properties"]["data"],
+        )
+        usd_file = out_path / "asset_simready_inst.usdc"
+        usd_path_str = ""
+        if usd_file.exists():
+            try:
+                usd_path_str = str(usd_file.relative_to(asset_root))
+            except Exception:
+                usd_path_str = str(usd_file)
+
+        asset.usd.update(
+            {
+                "is_usd": True,
+                "usd_path": usd_path_str,
+            }
+        )
+        return
diff --git a/embodichain/gen_sim/simready_pipeline/pipeline/__init__.py b/embodichain/gen_sim/simready_pipeline/pipeline/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/pipeline/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/pipeline/ingest.py b/embodichain/gen_sim/simready_pipeline/pipeline/ingest.py
new file mode 100644
index 00000000..b87a16d1
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/pipeline/ingest.py
@@ -0,0 +1,160 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from pathlib import Path
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from typing import Iterable, Optional
+
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+from embodichain.gen_sim.simready_pipeline.utils.ingest_utils import (
+    new_uuid,
+    trimesh_parse_ingest,
+    blender_parser_ingest,
+    inject_semantic_from_config,
+    inject_user_extra_info,
+)
+from embodichain.gen_sim.simready_pipeline.io.json_store import JsonStore
+from embodichain.gen_sim.simready_pipeline.parser.base import ParserManager
+
+
+def _load_ingest_config() -> dict:
+    config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json"
+    with config_path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+GEN_CONFIG = _load_ingest_config()
+INGEST_CONFIG = GEN_CONFIG.get("ingest", {})
+MESH_PROCESSING_CONFIG = GEN_CONFIG.get("mesh_processing", {})
+CANOCAIL_ASSET_NAME = INGEST_CONFIG.get("canonical_asset_name", "asset.obj")
+UNPROCESSED_FORMATS = INGEST_CONFIG.get(
+    "unprocessed_formats", [".urdf", ".usd"]
+)  # Copy these for now; parsing can be added later.
+PARSEABLE_MESH_FORMATS = INGEST_CONFIG.get(
+    "parseable_mesh_formats", [".glb", ".gltf", ".obj", ".ply", ".stl"]
+)  # Common mesh formats that need processing.
+
+TRIMESH_INGEST_CONFIG = MESH_PROCESSING_CONFIG.get("trimesh_ingest", {})
+BLENDER_REMESH_BAKE_CONFIG = MESH_PROCESSING_CONFIG.get(
+    "blender_remesh_bake", INGEST_CONFIG.get("blender_remesh_bake", {})
+)
+
+
+def ingest_one_asset(
+    asset_dir: str | Path,
+    category: str,
+    output_root: Path,
+    store: JsonStore,
+    manager: ParserManager,
+    simple_ingest: bool = True,
+) -> Optional[Asset]:
+
+    asset_dir = Path(asset_dir)  # source path
+
+    output_root = Path(output_root)
+    output_root.mkdir(parents=True, exist_ok=True)
+
+    asset_id = new_uuid()
+    asset_root = output_root / asset_id
+    asset_root.mkdir(parents=True, exist_ok=False)
+
+    asset_source = asset_root / "asset_source"
+    asset_archive = asset_root / "asset_archive"
+
+    files = [p for p in asset_dir.iterdir() if p.is_file()]
+    file_suffixes = {p.suffix.lower() for p in files}
+
+    has_unprocessed_format = any(
+        suffix in file_suffixes for suffix in UNPROCESSED_FORMATS
+    )
+
+    archive_dst = asset_archive / asset_dir.name
+    if archive_dst.exists():
+        raise RuntimeError(f"Archive destination already exists: {archive_dst}")
+    shutil.copytree(asset_dir, archive_dst)
+
+    def find_first_mesh_file(files, formats):
+        for suffix in formats:
+            candidates = sorted(p for p in files if p.suffix.lower() == suffix)
+            if candidates:
+                return candidates[0]
+        raise RuntimeError("No Valid Mesh File")
+
+    if has_unprocessed_format:
+        source_file = None
+        ingest_mode = "direct_copy"
+        asset_name = asset_dir.stem
+        visual_info = None
+    else:
+        source_file = find_first_mesh_file(files, PARSEABLE_MESH_FORMATS)
+        asset_name = source_file.stem if source_file else None
+        ingest_mode = "unified"
+        if simple_ingest:
+            visual_info = trimesh_parse_ingest(
+                source_file,
+                asset_source,
+                obj_name=CANOCAIL_ASSET_NAME,
+                mtl_name=Path(CANOCAIL_ASSET_NAME).with_suffix(".mtl").name,
+                config=TRIMESH_INGEST_CONFIG,
+            )
+        else:
+            visual_info = blender_parser_ingest(
+                source_file,
+                asset_source,
+                obj_name=CANOCAIL_ASSET_NAME,
+                config=BLENDER_REMESH_BAKE_CONFIG,
+                trimesh_config=TRIMESH_INGEST_CONFIG,
+            )
+
+    asset = Asset(
+        asset_id=asset_id,
+        identity={
+            "name": asset_name,
+            "source_dir": asset_dir.name,
+            "category": category,
+            "ingest_mode": ingest_mode,
+        },
+        parsed={"visual": visual_info},
+    )
+    asset.status["ingested"] = True
+    asset.status.setdefault("parsed", False)
+    asset.status.setdefault("validated", False)
+
+    if ingest_mode == "direct_copy":
+        shutil.copytree(asset_dir, asset_source)
+        asset.identity["normalized_source"] = "raw_copy"
+        asset.identity["source_file"] = None
+        asset.identity["source_type"] = "direct_copy"
+        store.save_asset(asset)
+        return asset  # no parser
+    else:
+        asset.identity["source_file"] = source_file.name
+        asset.identity["source_type"] = source_file.suffix.lower()
+        asset.identity["normalized_source"] = CANOCAIL_ASSET_NAME
+
+    inject_semantic_from_config(asset_dir, asset)
+    inject_user_extra_info(asset_dir, asset)
+    manager.parse(asset, asset_root)
+    store.save_asset(asset)
+
+    return asset
diff --git a/embodichain/gen_sim/simready_pipeline/utils/__init__.py b/embodichain/gen_sim/simready_pipeline/utils/__init__.py
new file mode 100644
index 00000000..015c4151
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/__init__.py
@@ -0,0 +1,19 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+__all__: list[str] = []
diff --git a/embodichain/gen_sim/simready_pipeline/utils/geometry_utils.py b/embodichain/gen_sim/simready_pipeline/utils/geometry_utils.py
new file mode 100644
index 00000000..4fbf7c0b
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/geometry_utils.py
@@ -0,0 +1,205 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import bpy
+from pathlib import Path
+
+
+def clear_scene():
+    bpy.ops.object.select_all(action="SELECT")
+    bpy.ops.object.delete(use_global=False, confirm=False)
+
+    for block in (
+        bpy.data.meshes,
+        bpy.data.materials,
+        bpy.data.images,
+        bpy.data.collections,
+    ):
+        for item in list(block):
+            try:
+                block.remove(item)
+            except:
+                pass
+
+
+def load_obj(filepath):
+    bpy.ops.wm.obj_import(filepath=str(filepath))
+    objs = [o for o in bpy.context.scene.objects if o.type == "MESH"]
+    return objs
+
+
+def join_meshes(objs):
+    if not objs:
+        raise RuntimeError("No mesh objects to join.")
+
+    bpy.ops.object.select_all(action="DESELECT")
+    for o in objs:
+        o.select_set(True)
+
+    bpy.context.view_layer.objects.active = objs[0]
+    bpy.ops.object.join()
+    return bpy.context.active_object
+
+
+def decimate_optimized(
+    obj,
+    ratio: float = 0.5,
+    weld_distance: float = 0.0001,
+    collapse_triangulate: bool = True,
+):
+
+    bpy.context.view_layer.objects.active = obj
+
+    if obj.mode != "OBJECT":
+        bpy.ops.object.mode_set(mode="OBJECT")
+
+    # 1) Weld
+    # weld_mod = obj.modifiers.new(name="Weld", type="WELD")
+    # weld_mod.merge_threshold = weld_distance
+    # bpy.ops.object.modifier_apply(modifier=weld_mod.name)
+    # bpy.ops.object.mode_set(mode="EDIT")
+    # bpy.ops.mesh.select_all(action="SELECT")
+
+    # bpy.ops.mesh.normals_make_consistent(inside=False)
+    # bpy.ops.mesh.customdata_custom_splitnormals_clear()
+
+    # bpy.ops.object.mode_set(mode="OBJECT")
+
+    # 2) remove loose
+    bpy.ops.object.mode_set(mode="EDIT")
+    bpy.ops.mesh.select_all(action="DESELECT")
+    bpy.ops.mesh.select_loose()
+    bpy.ops.mesh.delete(type="VERT")
+    bpy.ops.object.mode_set(mode="OBJECT")
+
+    # 3) decimate
+    print(f"Simplifying mesh (Ratio: {ratio})...")
+    decimate_mod = obj.modifiers.new(name="Decimate", type="DECIMATE")
+    decimate_mod.ratio = ratio
+    decimate_mod.use_collapse_triangulate = collapse_triangulate
+    bpy.ops.object.modifier_apply(modifier=decimate_mod.name)
+
+    # 4) post clean
+    bpy.ops.object.mode_set(mode="EDIT")
+    bpy.ops.mesh.select_all(action="SELECT")
+    bpy.ops.mesh.remove_doubles(threshold=weld_distance)
+    bpy.ops.mesh.delete_loose()
+    bpy.ops.object.mode_set(mode="OBJECT")
+
+    print(
+        f"[Info] Optimized state: Vertices {len(obj.data.vertices)}, Faces {len(obj.data.polygons)}"
+    )
+
+    return obj
+
+
+def clean_mesh(obj, merge_dist=1e-5, remove_non_manifold=True, triangulate=False):
+    bpy.context.view_layer.objects.active = obj
+
+    if obj.mode != "OBJECT":
+        bpy.ops.object.mode_set(mode="OBJECT")
+
+    bpy.ops.object.mode_set(mode="EDIT")
+    bpy.ops.mesh.select_all(action="SELECT")
+
+    bpy.ops.mesh.remove_doubles(threshold=merge_dist)
+
+    bpy.ops.mesh.delete_loose()
+
+    bpy.ops.mesh.dissolve_degenerate()
+
+    bpy.ops.mesh.normals_make_consistent(inside=False)
+
+    if remove_non_manifold:
+        bpy.ops.mesh.select_all(action="DESELECT")
+        bpy.ops.mesh.select_non_manifold()
+        bpy.ops.mesh.delete(type="VERT")
+
+    bpy.ops.mesh.select_all(action="SELECT")
+    bpy.ops.mesh.remove_doubles(threshold=merge_dist)
+    bpy.ops.mesh.delete_loose()
+
+    if triangulate:
+        bpy.ops.mesh.quads_convert_to_tris()
+
+    bpy.ops.object.mode_set(mode="OBJECT")
+    return obj
+
+
+def fill_holes(obj, max_sides=8):
+    bpy.context.view_layer.objects.active = obj
+
+    if obj.mode != "OBJECT":
+        bpy.ops.object.mode_set(mode="OBJECT")
+
+    bpy.ops.object.mode_set(mode="EDIT")
+    bpy.ops.mesh.select_all(action="SELECT")
+
+    bpy.ops.mesh.fill_holes(sides=max_sides)
+
+    bpy.ops.mesh.beautify_fill()
+    bpy.ops.mesh.dissolve_degenerate()
+    bpy.ops.mesh.normals_make_consistent(inside=False)
+
+    bpy.ops.object.mode_set(mode="OBJECT")
+    return obj
+
+
+def export_obj(obj, out_path):
+    bpy.ops.object.select_all(action="DESELECT")
+    obj.select_set(True)
+    bpy.context.view_layer.objects.active = obj
+
+    bpy.ops.wm.obj_export(filepath=str(out_path), export_selected_objects=True)
+
+
+def process_obj(
+    input_path,
+    output_path,
+    ratio=0.5,
+    weld_distance=0.0001,
+    merge_dist=1e-5,
+    remove_non_manifold=True,
+    triangulate=False,
+    collapse_triangulate=True,
+):
+    clear_scene()
+    objs = load_obj(input_path)
+    if not objs:
+        raise RuntimeError("No mesh objects imported.")
+
+    obj = join_meshes(objs)
+
+    bpy.context.view_layer.objects.active = obj
+    bpy.ops.object.transform_apply(location=True, rotation=True, scale=True)
+
+    clean_mesh(
+        obj,
+        merge_dist=merge_dist,
+        remove_non_manifold=remove_non_manifold,
+        triangulate=triangulate,
+    )
+    decimate_optimized(
+        obj,
+        ratio=ratio,
+        weld_distance=weld_distance,
+        collapse_triangulate=collapse_triangulate,
+    )
+
+    export_obj(obj, output_path)
+    print("Clean mesh saved to:", output_path)
diff --git a/embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py b/embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py
new file mode 100644
index 00000000..bb5a80d8
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/ingest_utils.py
@@ -0,0 +1,487 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import uuid
+import trimesh
+import json
+from pathlib import Path
+from typing import Union, Dict, Any
+from embodichain.gen_sim.simready_pipeline.utils.texture_utils import classify_visual
+import hashlib
+import os
+from embodichain.gen_sim.simready_pipeline.core.asset import Asset
+
+
+def new_uuid() -> str:
+    return uuid.uuid4().hex
+
+
+def compute_folder_sha256(folder_path: Union[str, Path]) -> str:
+
+    folder_path = Path(folder_path).resolve()
+
+    if not folder_path.is_dir():
+        raise ValueError(f"Path {folder_path} is not a valid directory.")
+
+    sha256_hash = hashlib.sha256()
+
+    all_files = []
+    for root, dirs, files in os.walk(folder_path):
+        dirs.sort()
+        files.sort()
+        for file_name in files:
+            file_path = Path(root) / file_name
+            relative_path = file_path.relative_to(folder_path)
+            all_files.append(relative_path)
+
+    for rel_path in sorted(all_files):
+        full_path = folder_path / rel_path
+        sha256_hash.update(str(rel_path).encode("utf-8"))
+        with open(full_path, "rb") as f:
+            for byte_block in iter(lambda: f.read(65536), b""):
+                sha256_hash.update(byte_block)
+
+    return sha256_hash.hexdigest()
+
+
+def inject_semantic_from_config(asset_source: Path, asset: Asset) -> None:
+
+    config_path = asset_source / "config.json"
+
+    if not config_path.exists():
+        print(f"[INFO] No config.json found at {config_path}")
+        return
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            config: Dict[str, Any] = json.load(f)
+    except Exception as e:
+        print(f"[WARN] Failed to read config.json: {e}")
+        return
+
+    semantic = config.get("semantic")
+    if not semantic:
+        print("[INFO] No semantic field in config.json")
+        return
+
+    asset.semantics.setdefault("tags", [])
+    asset.semantics.setdefault("description", None)
+
+    if "tags" in semantic and isinstance(semantic["tags"], list):
+        existing_tags = set(asset.semantics.get("tags", []))
+        new_tags = set(semantic["tags"])
+        asset.semantics["tags"] = list(existing_tags | new_tags)
+
+    if "description" in semantic and semantic["description"]:
+        if not asset.semantics.get("description"):
+            asset.semantics["description"] = semantic["description"]
+
+    print(f"[INFO] Injected semantic from {config_path}")
+
+
+def inject_user_extra_info(asset_source: Path, asset: Asset) -> None:
+
+    config_path = asset_source / "config.json"
+    asset.ingest_info.setdefault("extra_info", {})
+    if not config_path.exists():
+        print(f"[INFO] No config.json found at {config_path}")
+        return
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            config: Dict[str, Any] = json.load(f)
+    except Exception as e:
+        print(f"[WARN] Failed to read config.json: {e}")
+        return
+
+    extra_info = config.get("extra_info")
+    if not extra_info:
+        print("[INFO] No extra_info field in config.json")
+        return
+
+    asset.ingest_info["extra_info"].update(extra_info)
+
+    print(f"[INFO] Injected extra_info from {config_path}")
+
+
+def load_one_trimesh(
+    path: str,
+    scene_mesh_strategy: str = "first",
+) -> Union[
+    trimesh.Trimesh, None
+]:  # The input may be a scene; process only the first geometry unless configured to concatenate.
+    try:
+        mesh_or_scene = trimesh.load_mesh(path)
+        if isinstance(mesh_or_scene, trimesh.Scene):
+            if len(mesh_or_scene.geometry) == 0:
+                print(f"No geometry found in Scene: {path}")
+                return None
+            if scene_mesh_strategy == "concatenate":
+                meshes = list(mesh_or_scene.geometry.values())
+                return trimesh.util.concatenate(meshes)
+            first_mesh = list(mesh_or_scene.geometry.values())[0]
+            return first_mesh
+        if isinstance(mesh_or_scene, trimesh.Trimesh):
+            return mesh_or_scene
+        print(f"Unexpected type: {type(mesh_or_scene)}")
+        return None
+
+    except Exception as e:
+        print(f"Failed to load {path}: {e}")
+        return None
+
+
+def trimesh_parse_ingest(
+    source_file: Path,
+    asset_source: Path,
+    obj_name: str = "asset.obj",
+    mtl_name: str = "asset.mtl",
+    write_files: bool = True,
+    config: Dict[str, Any] | None = None,
+):
+    config = config or {}
+    visual_config = config.get("visual", {})
+    export_config = config.get("export", {})
+    scene_mesh_strategy = config.get("scene_mesh_strategy", "first")
+    mtl_name = config.get("mtl_name", mtl_name)
+
+    mesh = load_one_trimesh(source_file, scene_mesh_strategy=scene_mesh_strategy)
+    if mesh is None:
+        return None
+
+    texture_info = classify_visual(mesh)
+    visual_category = texture_info.get("visual_category")
+    material_kind = texture_info.get("material_kind")
+    textures = texture_info.get("material", {}).get("textures", {})
+    uv_present = texture_info.get("uv_present")
+
+    visual = {
+        "visual_category": visual_category,
+        "uv_present": uv_present,
+        "texture_count_total": texture_info.get("texture_count_total"),
+        "material_kind": material_kind,
+        "textures": textures,
+    }
+    visual_ingest = None
+    asset_source = Path(asset_source)
+    asset_source.mkdir(parents=True, exist_ok=True)
+    obj_path = asset_source / obj_name
+
+    # ========= CASE 1: no visual =========
+    if visual_category == "None":
+        print("[INFO] No visual → assign default gray")
+
+        mesh.visual = trimesh.visual.ColorVisuals(
+            mesh,
+            face_colors=visual_config.get("default_face_color", [128, 128, 128, 255]),
+        )
+        visual_ingest = "no visual"
+
+    # ========= CASE 2: color =========
+    elif visual_category in ["color_face", "color_vertex"]:
+        print("[INFO] Vertex/Face color → export directly")
+        visual_ingest = "Color Visual"
+
+    # ========= CASE 3: texture =========
+    elif visual_category == "texture":
+
+        vis = mesh.visual
+
+        if not uv_present:
+            visual_ingest = "no UV! But detected as Visual.Texture"
+            print("[WARN] texture but no UV → export raw")
+
+        else:
+            # ---------- PBR ----------
+            if material_kind == "pbr" and visual_config.get(
+                "pbr_base_color_only", True
+            ):
+                print("[WARN] PBR → only baseColorTexture will be used")
+
+                base_tex = textures.get("baseColorTexture", {})
+
+                if base_tex.get("present"):
+                    base_img = vis.material.baseColorTexture
+
+                    simple_mat = trimesh.visual.material.SimpleMaterial(image=base_img)
+
+                    mesh.visual = trimesh.visual.texture.TextureVisuals(
+                        uv=vis.uv, image=base_img, material=simple_mat
+                    )
+                    visual_ingest = "Basecolor Texture from PBR as Visual"
+                else:
+                    print("[WARN] No baseColorTexture → fallback raw")
+
+            # ---------- Simple ----------
+            else:
+                visual_ingest = "Simple Texture"
+                print("[INFO] Simple texture → use directly")
+
+    else:
+        print("[WARN] Unknown visual type → export raw")
+
+    if write_files:
+        obj_str, tex_dict = trimesh.exchange.obj.export_obj(
+            mesh,
+            include_normals=export_config.get("include_normals", True),
+            include_color=export_config.get("include_color", True),
+            include_texture=export_config.get("include_texture", True),
+            return_texture=True,
+            write_texture=export_config.get("write_texture", False),
+            mtl_name=mtl_name,
+        )
+
+        # ===== Write OBJ =====
+        with open(obj_path, "w") as f:
+            f.write(obj_str)
+
+        # ===== Write texture / MTL =====
+        for name, data in tex_dict.items():
+            file_path = asset_source / name
+
+            if not file_path.exists():
+                with open(file_path, "wb") as f:
+                    f.write(data)
+
+    return {"visual_ingest": visual_ingest, "visual_source": visual}
+
+
+import bpy
+
+
+def modify_mtl_file(mtl_path: Path, diffuse_name: str, normal_name: str) -> None:
+    """Modify an exported OBJ .mtl to reference baked textures."""
+    mtl_path = Path(mtl_path)
+    if not mtl_path.exists():
+        return
+
+    lines = mtl_path.read_text(encoding="utf-8", errors="ignore").splitlines(True)
+
+    new_lines = []
+    for line in lines:
+        if line.startswith("Ns "):
+            new_lines.append("Ns 500.000000\n")
+        elif line.startswith("Ka "):
+            new_lines.append("Ka 1.000000 1.000000 1.000000\n")
+        elif line.startswith("Ks "):
+            new_lines.append("Ks 0.500000 0.500000 0.500000\n")
+        else:
+            new_lines.append(line)
+
+    new_lines.append(f"map_Kd {diffuse_name}\n")
+    new_lines.append(f"map_Bump {normal_name}\n")
+    new_lines.append(f"bump {normal_name} -bm 1.0\n")
+
+    mtl_path.write_text("".join(new_lines), encoding="utf-8")
+
+
+def blender_remesh_bake(
+    source_file: Path,
+    asset_source: Path,
+    texture_size: int | None = None,
+    png_name: str | None = None,
+    voxel_size: float | None = None,
+    decimate_ratio: float | None = None,
+    obj_name: str = "asset.obj",
+    config: Dict[str, Any] | None = None,
+):
+    """Remesh a high-poly mesh into a low-poly one and bake textures via Blender."""
+    config = config or {}
+    remesh_config = config.get("remesh", {})
+    decimate_config = config.get("decimate", {})
+    uv_config = config.get("uv", {})
+    bake_config = config.get("bake", {})
+    material_config = config.get("material", {})
+
+    texture_size = int(
+        texture_size
+        or bake_config.get("texture_size", config.get("texture_size", 2048))
+    )
+    diffuse_texture_name = png_name or bake_config.get(
+        "diffuse_texture_name", config.get("texture_name", "diffuse.png")
+    )
+    normal_texture_name = bake_config.get(
+        "normal_texture_name", config.get("normal_texture_name", "normal.png")
+    )
+    voxel_size = float(
+        voxel_size
+        if voxel_size is not None
+        else remesh_config.get("voxel_size", config.get("voxel_size", 0.01))
+    )
+    min_voxel_size_ratio = float(remesh_config.get("min_voxel_size_ratio", 0.005))
+    use_smooth_shade = bool(remesh_config.get("use_smooth_shade", True))
+    decimate_ratio = float(
+        decimate_ratio
+        if decimate_ratio is not None
+        else decimate_config.get("ratio", config.get("decimate_ratio", 0.5))
+    )
+    angle_limit = float(uv_config.get("angle_limit", 66.0))
+    island_margin = float(uv_config.get("island_margin", 0.02))
+    cage_extrusion_ratio = float(bake_config.get("cage_extrusion_ratio", 0.05))
+    material_name = material_config.get("name", "BakeMat")
+
+    asset_source = Path(asset_source)
+    asset_source.mkdir(parents=True, exist_ok=True)
+    source_file = Path(source_file)
+
+    bpy.ops.wm.read_factory_settings(use_empty=True)
+
+    ext = source_file.suffix.lower()
+    if ext == ".obj":
+        bpy.ops.wm.obj_import(filepath=str(source_file))
+    elif ext == ".fbx":
+        bpy.ops.import_scene.fbx(filepath=str(source_file))
+    elif ext in [".gltf", ".glb"]:
+        bpy.ops.import_scene.gltf(filepath=str(source_file))
+    elif ext == ".ply":
+        bpy.ops.wm.ply_import(filepath=str(source_file))
+    else:
+        raise RuntimeError(f"Unsupported extension: {ext}")
+
+    if bpy.ops.object.mode_set.poll():
+        bpy.ops.object.mode_set(mode="OBJECT")
+
+    imported_meshes = [obj for obj in bpy.context.scene.objects if obj.type == "MESH"]
+    if not imported_meshes:
+        raise RuntimeError("No mesh object after import")
+
+    bpy.ops.object.select_all(action="DESELECT")
+    for obj in imported_meshes:
+        obj.select_set(True)
+    bpy.context.view_layer.objects.active = imported_meshes[0]
+
+    if len(imported_meshes) > 1:
+        bpy.ops.object.join()
+    high_poly = bpy.context.view_layer.objects.active
+    if not high_poly or high_poly.type != "MESH":
+        raise RuntimeError("No active mesh object after import")
+    high_poly.name = "High_Poly"
+
+    auto_extrusion = max(high_poly.dimensions) * cage_extrusion_ratio
+
+    bpy.ops.object.select_all(action="DESELECT")
+    high_poly.select_set(True)
+    bpy.context.view_layer.objects.active = high_poly
+    bpy.ops.object.duplicate()
+    low_poly = bpy.context.active_object
+    if not low_poly:
+        raise RuntimeError("Failed to duplicate object")
+    low_poly.name = "Low_Poly_Target"
+    try:
+        low_poly.data.materials.clear()
+    except Exception:
+        pass
+
+    rem = low_poly.modifiers.new(name="Remesh", type="REMESH")
+    rem.mode = "VOXEL"
+    rem.voxel_size = max(
+        float(voxel_size), max(high_poly.dimensions) * min_voxel_size_ratio
+    )
+    rem.use_smooth_shade = use_smooth_shade
+    bpy.ops.object.modifier_apply(modifier="Remesh")
+
+    dec = low_poly.modifiers.new(name="Decimate", type="DECIMATE")
+    dec.ratio = float(decimate_ratio)
+    bpy.ops.object.modifier_apply(modifier="Decimate")
+
+    bpy.context.view_layer.objects.active = low_poly
+    bpy.ops.object.mode_set(mode="EDIT")
+    bpy.ops.mesh.select_all(action="SELECT")
+    bpy.ops.uv.smart_project(angle_limit=angle_limit, island_margin=island_margin)
+    bpy.ops.object.mode_set(mode="OBJECT")
+
+    mat = bpy.data.materials.new(name=material_name)
+    mat.use_nodes = True
+    low_poly.data.materials.append(mat)
+    nodes = mat.node_tree.nodes
+    nodes.clear()
+
+    def setup_node(name: str, is_color: bool):
+        img = bpy.data.images.new(
+            name, width=int(texture_size), height=int(texture_size)
+        )
+        node = nodes.new("ShaderNodeTexImage")
+        node.image = img
+        if not is_color:
+            img.colorspace_settings.name = "Non-Color"
+        return node, img
+
+    diff_node, diff_img = setup_node(diffuse_texture_name, True)
+    norm_node, norm_img = setup_node(normal_texture_name, False)
+
+    scene = bpy.context.scene
+    scene.render.engine = "CYCLES"
+    scene.render.bake.use_selected_to_active = True
+    scene.render.bake.cage_extrusion = auto_extrusion
+
+    bpy.ops.object.select_all(action="DESELECT")
+    high_poly.select_set(True)
+    low_poly.select_set(True)
+    bpy.context.view_layer.objects.active = low_poly
+
+    nodes.active = diff_node
+    bpy.ops.object.bake(type="DIFFUSE", pass_filter={"COLOR"})
+    diff_img.filepath_raw = str(asset_source / diffuse_texture_name)
+    diff_img.save()
+
+    nodes.active = norm_node
+    bpy.ops.object.bake(type="NORMAL")
+    norm_img.filepath_raw = str(asset_source / normal_texture_name)
+    norm_img.save()
+
+    export_path = asset_source / obj_name
+    bpy.ops.object.select_all(action="DESELECT")
+    low_poly.select_set(True)
+    bpy.ops.wm.obj_export(filepath=str(export_path), export_selected_objects=True)
+
+    mtl_path = asset_source / Path(obj_name).with_suffix(".mtl").name
+    modify_mtl_file(mtl_path, diffuse_texture_name, normal_texture_name)
+
+    return {
+        "png": str(asset_source / diffuse_texture_name),
+        "obj": str(export_path),
+        "mtl": str(mtl_path.name),
+    }
+
+
+def blender_parse_ingest(
+    source_file: Path,
+    asset_source: Path,
+    trimesh_config: Dict[str, Any] | None = None,
+    **kwargs,
+):
+    res = blender_remesh_bake(
+        source_file=source_file,
+        asset_source=asset_source,
+        **kwargs,
+    )
+    try:
+        asset_obj = Path(res["obj"])
+        vis = trimesh_parse_ingest(
+            asset_obj,
+            asset_source,
+            write_files=False,
+            config=trimesh_config,
+        )
+        if isinstance(vis, dict):
+            res.update(vis)
+    except Exception:
+        pass
+    return res
+
+
+def blender_parser_ingest(source_file: Path, asset_source: Path, **kwargs):
+    return blender_parse_ingest(source_file, asset_source, **kwargs)
diff --git a/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py
new file mode 100644
index 00000000..73db0874
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/simready_utils.py
@@ -0,0 +1,1371 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import argparse
+import base64
+import json
+import os
+import re
+from pathlib import Path
+import numpy as np
+import trimesh
+import pyrender
+from PIL import Image
+from openai import OpenAI
+import itertools
+from scipy.spatial import ConvexHull
+from typing import Dict, Any, List
+
+
+def _load_gen_config() -> Dict[str, Any]:
+    config_path = Path(__file__).resolve().parents[1] / "configs" / "gen_config.json"
+    if not config_path.exists():
+        raise FileNotFoundError(f"gen_config.json not found: {config_path}")
+
+    with config_path.open("r", encoding="utf-8") as f:
+        raw_cfg = json.load(f)
+
+    cfg = raw_cfg.get("llm", {}).get("openai_compatible", {})
+    cfg["api_key"] = os.getenv("OPENAI_API_KEY") or cfg.get("api_key", "")
+    cfg["model"] = os.getenv("OPENAI_MODEL") or cfg.get("model", "")
+    cfg["base_url"] = os.getenv("OPENAI_BASE_URL") or cfg.get("base_url", "")
+    cfg["default_query"] = cfg.get("default_query", {})
+    if cfg["base_url"]:
+        cfg["base_url"] = cfg["base_url"].rstrip("/")
+
+    required = ["api_key", "model", "base_url"]
+    missing = [k for k in required if k not in cfg or not cfg[k]]
+    if missing:
+        raise ValueError(f"Missing required config keys: {missing}")
+
+    return cfg
+
+
+_GEN_CONFIG = _load_gen_config()
+
+DEPLOYMENT = _GEN_CONFIG["model"]
+
+client = OpenAI(
+    api_key=_GEN_CONFIG["api_key"],
+    base_url=_GEN_CONFIG["base_url"],
+    default_query=_GEN_CONFIG.get("default_query") or None,
+)
+
+STRATEGY = None
+
+diagonal_views = [
+    ("view_from_111", np.array([1.3, 1.3, 1.3], dtype=float)),
+    ("view_from_000", np.array([-0.8, -0.8, -0.8], dtype=float)),
+]
+cardinal_views = [
+    ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)),
+    ("view_from_left", np.array([0.5, -1.8, 0.5], dtype=float)),
+    ("view_from_right", np.array([0.5, 1.8, 0.5], dtype=float)),
+    ("view_from_back", np.array([-1.8, 0.5, 0.5], dtype=float)),
+]
+up_down_views = [
+    ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)),
+    ("view_from_bottom_to_up", np.array([0.5, 0.5, -1.2], dtype=float)),
+]
+
+up_views = [
+    ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)),
+]
+
+down_views = [
+    ("view_from_bottom_to_up", np.array([0.5, 0.5, -1.2], dtype=float)),
+]
+
+front_views = [
+    ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)),
+]
+
+side_profile = [
+    ("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float)),
+    ("view_from_front", np.array([1.8, 0.5, 0.5], dtype=float)),
+]
+
+
+def normalize_to_unit_cube(mesh):
+    minb, maxb = mesh.bounds
+    size = maxb - minb
+    size = np.maximum(size, 1e-8)
+    scale = 1.0 / np.max(size)
+    mesh.apply_scale(scale)
+    minb_scaled, maxb_scaled = mesh.bounds
+    center_scaled = (minb_scaled + maxb_scaled) / 2
+    translation = np.array([0.5, 0.5, 0.5]) - center_scaled
+    mesh.apply_translation(translation)
+
+
+def compute_support_area(mesh, eps=1e-2):
+    z_min = mesh.bounds[0][2]
+    verts = np.asarray(mesh.vertices)
+    mask = np.abs(verts[:, 2] - z_min) < eps
+    pts = verts[mask][:, :2]
+    if len(pts) < 3:
+        return 0.0
+    try:
+        hull = ConvexHull(pts)
+        return hull.volume
+    except Exception:
+        return 0.0
+
+
+import numpy as np
+import trimesh
+from pathlib import Path
+
+
+def init_pose(mesh_input):
+
+    fallback_mesh = None
+    mesh: trimesh.Trimesh = None
+
+    if isinstance(mesh_input, trimesh.Trimesh):
+        mesh = mesh_input.copy()
+        fallback_mesh = mesh_input.copy()
+    else:
+        mesh_path = Path(mesh_input).resolve()
+        if not mesh_path.exists():
+            raise FileNotFoundError(f"Mesh file not found: {mesh_path}")
+        mesh = trimesh.load(mesh_path, force="mesh")
+        fallback_mesh = mesh.copy()
+
+    def compute_pca_axes(mesh):
+        verts = np.asarray(mesh.vertices)
+        centroid = verts.mean(axis=0)
+        centered = verts - centroid
+        cov = np.cov(centered.T)
+        U, _, _ = np.linalg.svd(cov)
+        R = U
+        if np.linalg.det(R) < 0:
+            R[:, 2] *= -1
+        return R
+
+    def closest_axis(v):
+        idx = np.argmax(np.abs(v))
+        sign = np.sign(v[idx])
+        axis = np.zeros(3)
+        axis[idx] = sign
+        return axis
+
+    def generate_discrete_flips():
+        rotations = []
+        Rx90 = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+        Ry90 = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]])
+        I = np.eye(3)
+        rotations.append(I)
+        Rx180 = Rx90 @ Rx90
+        rotations.append(Rx180)
+        rotations.append(Rx90)
+        Rx_neg90 = Rx90.T
+        rotations.append(Rx_neg90)
+        rotations.append(Ry90)
+        Ry_neg90 = Ry90.T
+        rotations.append(Ry_neg90)
+        return rotations
+
+    def compute_support_area(mesh):
+        hull = trimesh.convex.convex_hull(mesh)
+        support_poly = hull.project(plane=[0, 0, 1], origin=[0, 0, 0])
+        return support_poly.area
+
+    def stability_score(mesh):
+        area = compute_support_area(mesh)
+        com_z = mesh.center_mass[2]
+        return -(area / (com_z + 1e-6))
+
+    def normalize_to_unit_cube(mesh):
+        extents = mesh.extents
+        scale = 1.0 / np.max(extents)
+        mesh.apply_scale(scale)
+        mesh.vertices -= mesh.vertices.mean(axis=0)
+        z_min = mesh.bounds[0][2]
+        mesh.apply_translation([0, 0, -z_min])
+
+    def process_alignment(initial_mesh, align_type):
+        m = initial_mesh.copy()
+        if align_type == "pca":
+            R_pca = compute_pca_axes(m)
+            T = np.eye(4)
+            T[:3, :3] = R_pca.T
+            m.apply_transform(T)
+            U = compute_pca_axes(m)
+            x, y, z = U[:, 0], U[:, 1], U[:, 2]
+            nx = closest_axis(x)
+            ny = closest_axis(y)
+            nz = closest_axis(z)
+            nz /= np.linalg.norm(nz)
+            nx = nx - nz * np.dot(nx, nz)
+            nx /= np.linalg.norm(nx)
+            ny = np.cross(nz, nx)
+            R_snap = np.column_stack([nx, ny, nz])
+            m.apply_transform(np.eye(4)[:3, :3] @ R_snap)
+
+        elif align_type == "obb":
+            to_origin, _ = trimesh.bounds.oriented_bounds(m)
+            m.apply_transform(to_origin)
+            R = to_origin[:3, :3]
+            if np.linalg.det(R) < 0:
+                m.apply_transform(np.diag([1, 1, -1, 1]))
+        else:
+            raise ValueError(f"Unknown type {align_type}")
+
+        best_score = float("inf")
+        best = None
+        for Rf in generate_discrete_flips():
+            mc = m.copy()
+            Tf = np.eye(4)
+            Tf[:3, :3] = Rf
+            mc.apply_transform(Tf)
+            zmin = mc.bounds[0][2]
+            mc.apply_translation([0, 0, -zmin])
+            s = stability_score(mc)
+            if s < best_score:
+                best_score = s
+                best = mc.copy()
+        return best, best_score
+
+    try:
+        mesh_pca, score_pca = process_alignment(mesh, "pca")
+        mesh_obb, score_obb = process_alignment(mesh, "obb")
+
+        area_pca = compute_support_area(mesh_pca)
+        area_obb = compute_support_area(mesh_obb)
+
+        result_mesh = mesh_obb
+        STRATEGY = "OBB"
+
+        if area_pca > area_obb * 1.3:
+            result_mesh = mesh_pca
+            STRATEGY = "PCA"
+
+        normalize_to_unit_cube(result_mesh)
+        return result_mesh
+
+    except Exception as e:
+        return fallback_mesh
+
+
+def extract_json(text):
+    text = re.sub(r"```json|```", "", text).strip()
+    match = re.search(r"\{.*\}", text, re.DOTALL)
+    if not match:
+        raise ValueError("No JSON object found in response:\n" + text)
+    return json.loads(match.group())
+
+
+def encode_image(p):
+    img_path = Path(p).resolve()
+    if not img_path.exists():
+        raise FileNotFoundError(f"Image file not found: {img_path}")
+    with open(img_path, "rb") as f:
+        return base64.b64encode(f.read()).decode()
+
+
+def build_image_inputs(views_data):
+    content = []
+    for v in views_data:
+        name = v["name"]
+        img_b64 = encode_image(v["path"])
+        content.append({"type": "text", "text": f'View "{name}"'})
+        content.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{img_b64}"},
+            }
+        )
+    return content
+
+
+def render_views(mesh, views, out_dir, res=512):
+    import numpy as np
+    import pyrender
+    from PIL import Image
+    import trimesh
+
+    mesh = mesh.copy()
+
+    mesh.apply_translation(-mesh.bounds.mean(axis=0))
+    scale = 1.0 / np.max(mesh.extents)
+    mesh.apply_scale(scale)
+    mesh_pyr = pyrender.Mesh.from_trimesh(mesh, smooth=True)
+    renderer = pyrender.OffscreenRenderer(res, res)
+    cam = pyrender.PerspectiveCamera(yfov=np.pi / 3.0)
+    results = []
+    for name, eye in views:
+
+        if name in ["view_from_111", "view_from_000"]:
+            up = np.array([-1.0, -1.0, 0.0]) / np.sqrt(2.0)
+        elif name == "view_from_up_to_bottom":
+            up = np.array([-1.0, 0, 0.0])
+        elif name == "view_from_bottom_to_up":
+            up = np.array([1.0, 0, 0.0])
+        else:
+            up = np.array([0.0, 0.0, 1.0])
+
+        target = np.array([0.0, 0.0, 0.0])
+        f = target - eye
+        f_hat = f / np.linalg.norm(f)
+
+        r = np.cross(f_hat, up)
+        r = r / np.linalg.norm(r)
+        u = np.cross(r, f_hat)
+
+        R = np.column_stack((r, u, -f_hat))
+
+        M = np.eye(4)
+        M[:3, :3] = R
+        M[:3, 3] = eye
+
+        scene = pyrender.Scene(bg_color=[230, 235, 245, 255])
+
+        scene.add(mesh_pyr)
+        scene.add(cam, pose=M)
+
+        scene.add(pyrender.DirectionalLight(color=np.ones(3), intensity=4.0), pose=M)
+
+        fill_pose = np.eye(4)
+        fill_pose[:3, 3] = eye + np.array([1.0, 1.0, 1.0])
+        scene.add(
+            pyrender.DirectionalLight(color=np.ones(3), intensity=1.5), pose=fill_pose
+        )
+
+        back_pose = np.eye(4)
+        back_pose[:3, 3] = eye + np.array([-1.0, -1.0, -1.0])
+        scene.add(
+            pyrender.DirectionalLight(color=np.ones(3), intensity=1.2), pose=back_pose
+        )
+
+        color, _ = renderer.render(scene, flags=pyrender.RenderFlags.RGBA)
+
+        img = Image.fromarray(color)
+        img = img.convert("RGB")
+
+        path = out_dir / f"{name}.png"
+        img.save(path, quality=95)
+
+        results.append({"path": str(path), "name": name, "camera_pose": M.tolist()})
+
+    renderer.delete()
+    return results
+
+
+def ask_mllm_detect_and_classify(views_data, extra_text=""):
+
+    instruction_text = """
+            You are a single-purpose multimodal classifier. You will be given several images (multiple views) of a single object plus an OPTIONAL short text note ("Additional context"). Your job is twofold and must be completed in one step:
+            1) Identify the object in plain short form (e.g. "coffee mug", "soccer ball", "laptop", "rock") and put it into the JSON field "detected_object" (string) or null if you truly cannot identify it.
+            2) Classify the object's placement/orientation constraint into exactly one of three categories (0,1,2) using the provided canonical definitions and examples, and provide the additional fields described below.
+
+            Important behavior constraints:
+            - Return ONLY a single valid JSON string (no extra text, no explanation, no comments, no reasoning).
+            - JSON must be syntactically valid, parseable, and use JSON literals (true/false/null where applicable).
+            - Field order MUST be EXACTLY: detected_object, category, main_surface, orientation_requirement.
+            - If a field is not applicable, use the JSON literal null.
+            - Use common/public default usage (not niche). Follow the TIE-BREAKER rule below if ambiguous.
+            - Use all provided views. If any view contradicts others, prioritize views that reveal human-interaction surfaces (front/diagonals) but still obey tie-breaker.
+            - If an OPTIONAL "Additional context" text is provided, use it as auxiliary information to help identification/classification. If the text conflicts with clear visual evidence, prioritize visual evidence. If the images are ambiguous, allow the text to resolve the ambiguity. Do NOT output the additional context—only use it internally for judgment.
+
+            CATEGORY MAPPING (exact):
+            0 = Omnidirectional, no constraint
+            1 = Rotation-insensitive, upright required
+            2 = Has forward-facing primary use surface
+
+            DECISION DEFINITIONS (the ONLY basis for judgment — use common/public default usage):
+
+            Omnidirectional, no constraint (0)
+            - Object is approx spherical or isotropic; function & appearance essentially identical under arbitrary orientation.
+            - No placement posture (upright/sideways/flipped/rotated) is expected in public use.
+
+            Rotation-insensitive, upright required (1)
+            - Object has a stable upright support and a defined upright posture (flat bottom or center-of-gravity alignment).
+            - Rotating around vertical axis does NOT change its function; but it must be upright (not upside-down or on its side) for normal function.
+
+            Has forward-facing primary use surface (2)
+            - Object has a single unique surface that carries its core function or primary human interaction (viewing, operating, aiming, serving, etc.).
+            - In normal public use the object is expected to be oriented so that this surface faces the user/target/line-of-sight. Multiple equivalent faces mean it does NOT qualify.
+
+            TIE-BREAKER / AMBIGUITY RULE (mandatory):
+            - If more than one category could apply, choose the category with the stricter orientation constraint (precedence: 2 → 1 → 0).
+            - Prefer common/public default usage, not niche setups.
+
+            EXTENSIVE CANONICAL EXAMPLES (STRONG PRIOR — MUST FOLLOW)
+            CATEGORY 0 examples: ball, basketball, soccer ball, tennis ball, marble, pebble, orange, balloon(round)
+            CATEGORY 1 examples: cup, coffee cup,moka pot, drinking glass, bottle, vase, bowl, suitcase(standing), candle
+            CATEGORY 2 examples: monitor, laptop, smartphone, table lamp (head facing), flashlight, camera, car, bicycle, oven(front), speaker(front grille), keyboard, painting, wall clock
+
+            OUTPUT JSON FORMAT (strict — EXACT four fields in this order; use JSON literals):
+            {
+            "detected_object": string or null,
+            "category": integer,          // 0 | 1 | 2
+            "main_surface": string or null,
+            "orientation_requirement": string or null
+            }
+
+            FIELD RULES:
+            - "detected_object": short, common object name (lowercase preferred) representing the model's best identification, or null if unidentifiable.
+            - "category": integer 0|1|2.
+            - "main_surface": Only provide a short, specific name of the forward-facing surface when category == 2 (e.g. "screen", "lamp_head", "door_face", "keyboard_surface"). Otherwise null.
+            - "orientation_requirement": Only provide a concise canonical resting-orientation instruction when category == 2. You MUST choose exactly one of the following three semantic directions for the object's normal real-world static pose:
+              * "face_up"      -> the main surface is intended to face upward toward +Z / gravity opposite, e.g. smartphone lying flat with screen up, keyboard on table, tray-like objects.
+              * "face_forward" -> the main surface is intended to face the user/target in a vertical stance, e.g. monitor screen, oven front, speaker grille, camera front.
+              * "face_down"    -> the main surface is intended to face downward in the usual stable static pose, e.g. brush bristles or contact surface downward when naturally placed/used.
+              If the object is category 1 or 0, set null.
+            - Do NOT add any other fields.
+
+            VALIDATION RULES (model must satisfy):
+            - JSON must be syntactically valid and parseable.
+            - Field order must be exactly as above.
+            - No extraneous text.
+
+            INSTRUCTIONS FOR IMAGE USE:
+            - You will be provided a list of labeled views (each labeled with a short tag like "Front", "Back", "Right", "Left", "Diagonal_1", "Diagonal_2"). Use all images to resolve shape, symmetry, handles, screens, bases, cutouts, wheels, or any directional cues.
+            - Remember the mesh was normalized to the unit cube [0,0,0]→[1,1,1] for rendering—do NOT infer real-world size from pixel dimensions; rely on shape & functional features.
+            - If the object is clearly symmetric with no single primary face and no stable base, prefer category 0. If there is a clear base but no single forward-facing use surface, prefer category 1. If there is a screen, grill, face, nozzle, spout, or other unique human-facing surface, prefer category 2.
+            - For category 2 objects, infer the NORMAL STATIC RESTING ORIENTATION in the real world, not merely the visible camera view. Decide whether the primary surface is usually face_up, face_forward, or face_down in its standard placed state.
+
+            NOW: classify the provided object and identify it using the images and the OPTIONAL Additional context text.
+"""
+
+    content = [{"type": "text", "text": instruction_text}]
+
+    if extra_text and extra_text.strip():
+        content.append(
+            {"type": "text", "text": f"Additional context: {extra_text.strip()}"}
+        )
+
+    content.extend(build_image_inputs(views_data))
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        temperature=0.2,
+        messages=[{"role": "user", "content": content}],
+    )
+    raw = resp.choices[0].message.content
+    return extract_json(raw)
+
+
+def ask_mllm_primary_surface(
+    views_data,
+    object_name="None",
+    main_surface="None",
+    orientation_requirement="None",
+    extra_text="",
+):
+
+    instruction_text = f"""
+    You are a single-purpose multimodal classifier. You will be given 6 images of a single object, rendered from different views. Your task is to identify **the image that best shows the object's forward-facing primary use surface**, defined as the surface that:
+
+    - Carries the object's core function (viewing, operating, aiming, serving, pressing, interacting, etc.)
+    - Faces the human user or line-of-sight in normal use
+    - Is unique and human-accessible (not a symmetrical or bottom/support surface)
+    - Should prioritize the **front-facing view**, even if other angles also partially show it (e.g., top-down view of a laptop shows screen but front view is preferred)
+
+    Additional guidance based on prior classification:
+    - Detected object: {object_name}
+    - Possible main surface: {main_surface}
+    - Orientation requirement: {orientation_requirement}
+
+    If {main_surface} or {orientation_requirement} are provided (not "None"), use them to help identify which image shows the main functional surface. If they conflict with visual evidence, prioritize visual evidence. 
+
+    Return a single valid JSON string with exactly one field:
+
+    {{
+    "primary_surface_view": string // the name of the image that best shows the forward-facing primary use surface
+    }}
+
+    Rules:
+    - Use only the image IDs (names) provided in input.
+    - If the object has no clear forward-facing primary surface (fully isotropic or omnidirectional), return null.
+    - Do NOT add any extra text, explanation, or comments.
+    - Ensure the JSON is syntactically valid and parseable.
+
+    Use the six views to judge shape, handles, screens, bases, spouts, lenses, doors, or other directional human-facing cues. Prioritize the image that a person would naturally face to use or interact with the object. You can also use any Additional context text provided: {extra_text if extra_text else "None"}.
+    """
+
+    content = [{"type": "text", "text": instruction_text}]
+
+    if extra_text and extra_text.strip():
+        content.append(
+            {"type": "text", "text": f"Additional context: {extra_text.strip()}"}
+        )
+
+    content.extend(build_image_inputs(views_data))
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        temperature=0.2,
+        messages=[{"role": "user", "content": content}],
+    )
+    raw = resp.choices[0].message.content
+    return extract_json(raw)
+
+
+def ask_llm_upright_2a1(object_name, upright_img_path, flipped_img_path):
+    for p in [upright_img_path, flipped_img_path]:
+        img_path = Path(p).resolve()
+        if not img_path.exists():
+            raise FileNotFoundError(
+                f"Image required by LLM for upright judgment not found: {img_path}"
+            )
+
+    imgs_payload = [
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/png;base64,{encode_image(upright_img_path)}"
+            },
+        },
+        {
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/png;base64,{encode_image(flipped_img_path)}"
+            },
+        },
+    ]
+
+    prompt = f"""
+You are a physical-world perception model.
+
+An object of category: "{object_name}" is shown in TWO images.
+
+IMPORTANT:
+- The two images show the SAME object.
+- One image is physically correct (upright).
+- The other image is rotated 180 degrees (upside-down).
+- Exactly ONE image shows the object in its natural real-world upright orientation.
+
+Your task: choose which image is upright based on common human-world object orientation knowledge.
+
+Image A = first image  
+Image B = second image  
+
+Rules:
+- Think about gravity, support base, typical usage posture.
+- Objects are not used upside-down in normal life.
+- Do NOT say "both", "uncertain", or explanations.
+- You MUST choose one.
+
+OUTPUT JSON ONLY:
+
+{{
+  "upright_image": "A" or "B",
+  "confidence": 0.0-1.0
+}}
+"""
+
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        messages=[
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}, *imgs_payload],
+            }
+        ],
+        temperature=0.0,
+    )
+    return extract_json(resp.choices[0].message.content)
+
+
+def ask_llm_full_side_profile(object_name, views_data):
+    img_paths = []
+    for v in views_data:
+        name = v["name"]
+        path = v["path"]
+        img_paths.append(path)
+
+    for p in img_paths:
+        img_path = Path(p).resolve()
+        if not img_path.exists():
+            raise FileNotFoundError(
+                f"Image required by LLM for upright judgment not found: {img_path}"
+            )
+    imgs_payload = [
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{encode_image(p)}"},
+        }
+        for p in img_paths
+    ]
+
+    prompt = f"""
+            You are a visual reasoning model. 
+
+            An object of category: "{object_name}" is shown in TWO images. Both images show the same object in upright posture, but from different angles.
+
+            Your task: determine **which image shows the object's full height and side profile**—that is, the complete body shape and natural standing posture.
+
+            Rules:
+            - Choose exactly ONE image that best shows the object's full side profile.
+            - Think about how this object would stand in real life.
+            - Do NOT output explanations.
+            - Only return the index of the image.
+
+            OUTPUT JSON ONLY:
+
+            {{
+            "full_side_profile_image": "A" or "B",
+            "confidence": 0.0-1.0
+            }}
+            """
+
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        messages=[
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}, *imgs_payload],
+            }
+        ],
+        temperature=0.0,
+    )
+    return extract_json(resp.choices[0].message.content)
+
+
+def ask_llm_upright_rotation(object_name, rotated_imgs_paths):
+    """
+    rotated_imgs_paths: list of image paths in order [0°, 90°, 180°, 270°]
+    object_name: string, name of the object
+    """
+
+    for p in rotated_imgs_paths:
+        img_path = Path(p).resolve()
+        if not img_path.exists():
+            raise FileNotFoundError(
+                f"Image required by LLM for upright judgment not found: {img_path}"
+            )
+    imgs_payload = [
+        {
+            "type": "image_url",
+            "image_url": {"url": f"data:image/png;base64,{encode_image(p)}"},
+        }
+        for p in rotated_imgs_paths
+    ]
+
+    prompt = f"""
+ou are a physical-world orientation judgment model.
+
+An object of category: "{object_name}" is shown in FOUR images.
+All images show the SAME object from the SAME camera viewpoint.
+
+Your task is to choose the image that best matches the object's natural upright pose in everyday life.
+
+Think about:
+- how the object would normally rest on a table, floor, or other surface
+- gravity and stable support
+- the object's base, feet, bottom, opening, handle, screen, or functional side
+- the orientation people would normally place, hold, or use it in real life
+
+Important:
+- Choose the image that looks most naturally upright and stable in the real world.
+- Do NOT rely on any hidden rotation pattern.
+- Do NOT assume the object is already upright in the original image.
+- Do NOT explain your reasoning.
+- Only return the index of the best upright image.
+The correct answer must be the image that a person would most likely consider the object's normal real-world standing orientation.
+
+Image indices:
+- 0 = first image
+- 1 = second image
+- 2 = third image
+- 3 = fourth image
+
+OUTPUT JSON ONLY:
+
+{{
+  "upright_index": 0|1|2|3,
+  "confidence": 0.0-1.0
+}}
+"""
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        messages=[
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}, *imgs_payload],
+            }
+        ],
+        temperature=0.0,
+    )
+    return extract_json(resp.choices[0].message.content)
+
+
+def ask_llm_dimension(object_name, img_paths, user_text_hint, current_bbox_dims):
+
+    if isinstance(img_paths, (str, Path)):
+        img_paths = [{"path": str(img_paths)}]
+
+    imgs_payload = []
+    for item in img_paths:
+        img_path = Path(item["path"]).resolve()
+        if not img_path.exists():
+            raise FileNotFoundError(f"Image required by LLM not found: {img_path}")
+        imgs_payload.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{encode_image(img_path)}"},
+            }
+        )
+
+    current_bbox_json = json.dumps(current_bbox_dims, ensure_ascii=False)
+
+    prompt = f"""
+You are a robotics perception and scene analysis expert.
+Your task is to estimate the REAL-WORLD physical size of the object in meters.
+
+CONTEXT:
+- The mesh has already been normalized for rendering.
+- You are given the object's CURRENT NORMALIZED AABB SIZE (ordinary axis-aligned bounding box, NOT PCA, NOT minimum-volume OBB).
+- Use that normalized bbox size as a STRONG SHAPE PRIOR.
+- Your output MUST be a plausible real-world size in meters for the exact state shown in the images.
+- You must preserve the object's proportions as much as possible; do NOT invent an anisotropic resize. The downstream system will apply ONLY a uniform scale.
+
+CURRENT NORMALIZED AABB SIZE (unitless, from ordinary bbox):
+{current_bbox_json}
+
+DEFINITIONS:
+- height = vertical size when a human faces the object (top -> bottom), Z axis
+- width  = left-to-right size when facing the object, Y axis
+- depth  = front-back thickness, X axis
+
+USER PROVIDED HINT:
+- object_name: {object_name}
+- extra_hint: {user_text_hint}
+
+INSTRUCTIONS:
+1. Analyze ALL provided images together.
+2. Determine the exact visible state first (open/closed/folded/etc.).
+3. Estimate the object's real-world physical dimensions in meters for that exact state.
+4. Use the normalized bbox as a shape prior so the returned dimensions are consistent with the object's proportions.
+5. If uncertain, give the most plausible central estimate. Do not return null unless completely unrecognizable.
+
+Return JSON ONLY with:
+{{
+  "object_name": string,
+  "object_description": string,
+  "dimensions_m": {{
+    "height": float,
+    "width": float,
+    "depth": float
+  }},
+  "confidence": float
+}}
+
+CRITICAL:
+- JSON only.
+- Units must be meters.
+- Output real physical dimensions, not normalized values.
+- Do not explain anything.
+"""
+
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        messages=[
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}, *imgs_payload],
+            }
+        ],
+        temperature=0.0,
+    )
+    return extract_json(resp.choices[0].message.content)
+
+
+def rotate_image_deg(input_path, deg, output_path):
+    input_path = Path(input_path).resolve()
+    output_path = Path(output_path).resolve()
+
+    if not input_path.exists():
+        raise FileNotFoundError(
+            f"Input file for image rotation not found: {input_path}"
+        )
+
+    img = Image.open(input_path)
+    img_rot = img.rotate(deg, expand=True)
+    img_rot.save(output_path)
+    return str(output_path)
+
+
+def rot_x(deg):
+    r = np.deg2rad(deg)
+    c, s = np.cos(r), np.sin(r)
+    return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
+
+
+def rot_y(deg):
+    r = np.deg2rad(deg)
+    c, s = np.cos(r), np.sin(r)
+    return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
+
+
+def rot_z(deg):
+    r = np.deg2rad(deg)
+    c, s = np.cos(r), np.sin(r)
+    return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
+
+
+def apply_rotations(mesh, rotations):
+    R = np.eye(3)
+    T = np.eye(4)
+    T[:3, :3] = rotations
+    mesh.apply_transform(T)
+
+
+def get_aabb_dims(mesh: trimesh.Trimesh):
+
+    bounds = np.asarray(mesh.bounds, dtype=float)
+    extents = bounds[1] - bounds[0]
+    return {
+        "height": float(extents[2]),
+        "width": float(extents[1]),
+        "depth": float(extents[0]),
+    }
+
+
+def dims_dict_to_xyz(dims: dict):
+
+    return np.array(
+        [
+            float(dims.get("depth", np.nan)),
+            float(dims.get("width", np.nan)),
+            float(dims.get("height", np.nan)),
+        ],
+        dtype=float,
+    )
+
+
+def scale_mesh_uniform_to_dimensions(
+    mesh: trimesh.Trimesh,
+    target_dims: dict,
+    current_dims: dict | None = None,
+    eps: float = 1e-8,
+):
+
+    if current_dims is None:
+        current_dims = get_aabb_dims(mesh)
+
+    cur = dims_dict_to_xyz(current_dims)
+    tgt = dims_dict_to_xyz(target_dims)
+
+    valid = np.isfinite(cur) & np.isfinite(tgt) & (cur > eps) & (tgt > eps)
+    if not np.any(valid):
+        raise ValueError(f"Invalid dims. current={current_dims}, target={target_dims}")
+
+    ratios = tgt[valid] / cur[valid]
+
+    scale = float(np.median(ratios))
+
+    center = mesh.bounds.mean(axis=0)
+    mesh.apply_translation(-center)
+    mesh.apply_scale(scale)
+    mesh.apply_translation(center)
+
+    return mesh, scale
+
+
+def ask_llm_semantics_info(object_name, img_paths, user_text_hint=""):
+
+    imgs_payload = []
+    for item in img_paths:
+        img_path = Path(item["path"]).resolve()
+        if not img_path.exists():
+            raise FileNotFoundError(f"Image required by LLM not found: {img_path}")
+        imgs_payload.append(
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{encode_image(img_path)}"},
+            }
+        )
+
+    prompt = f"""
+You are a robotics asset semantics expert.
+
+Your task is to infer semantic information from multiple rendered views of a 3D object.
+The object will later be used for robotics simulation, physical property estimation, and manipulation planning.
+
+INPUTS:
+- object_name: {object_name}
+- user_hint: {user_text_hint}
+
+INSTRUCTIONS:
+1. Use the front view and diagonal views jointly.
+2. Infer the most likely semantic category of the object.
+3. Identify the most likely main material(s) visible from the object appearance.
+4. Write a concise but information-rich description that includes:
+   - object type / category
+   - likely main material(s)
+   - surface finish / texture
+   - rigid or flexible nature
+   - notable functional or structural parts
+5. Be conservative and grounded in visual evidence.
+6. If material is uncertain, provide the most likely hypothesis rather than leaving it empty.
+7. The output will be used later to derive physical properties such as density, mass, friction, etc., so the description should be useful for that purpose.
+
+SEMANTIC TAG RULES:
+- Use lowercase snake_case.
+- Prefer specific tags when possible, e.g.:
+  - ceramic_mug
+  - plastic_storage_box
+  - wooden_chair
+  - metal_tool
+  - glass_bottle
+  - fabric_soft_toy
+  - electronic_device
+- If uncertain, use a broader but still useful tag such as:
+  - container
+  - kitchenware
+  - hand_tool
+  - furniture
+  - toy
+  - household_item
+
+OUTPUT JSON SCHEMA:
+{{
+  "object_name": string,
+  "semantic_tag": string,
+  "description": string,
+  "primary_materials": [string, ...],
+  "material_confidence": float,
+  "confidence": float
+}}
+
+FIELD GUIDANCE:
+- object_name: canonical short name for the object
+- semantic_tag: concise semantic class tag
+- description: 1-3 sentences; mention likely material and structural/functional semantics
+- primary_materials: list of likely materials in descending plausibility
+- material_confidence: confidence in material estimate, from 0.0 to 1.0
+- confidence: confidence in the semantic classification overall, from 0.0 to 1.0
+
+CRITICAL RULES:
+- OUTPUT JSON ONLY.
+- No markdown.
+- No extra text.
+- Do not return null unless the object is completely unrecognizable.
+"""
+
+    resp = client.chat.completions.create(
+        model=DEPLOYMENT,
+        messages=[
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}, *imgs_payload],
+            }
+        ],
+        temperature=0.0,
+    )
+    return extract_json(resp.choices[0].message.content)
+
+
+def export_final_mesh(mesh, name, out_dir: Path):
+    out_dir = out_dir.resolve()
+    out_dir.mkdir(exist_ok=True, parents=True)
+    bounds = mesh.bounds
+    minb = bounds[0]
+    maxb = bounds[1]
+    bottom_center = np.array(
+        [(minb[0] + maxb[0]) / 2.0, (minb[1] + maxb[1]) / 2.0, minb[2]], dtype=float
+    )
+    T_trans = np.eye(4)
+    T_trans[:3, 3] = -bottom_center
+    mesh.apply_transform(T_trans)
+    out_path = out_dir / f"{name}_simready.obj"
+    out_path = out_path.resolve()
+
+    print(f"Exporting final mesh to: {out_path} (bottom-face center moved to origin)")
+    mesh.export(out_path)
+
+    return str(out_path)
+
+
+def delete_rendered_pngs(output_dir):
+    output_dir = Path(output_dir)
+    if not output_dir.exists():
+        return
+
+    patterns = [
+        "view_*.png",
+        "*_flipped.png",
+    ]
+
+    for pattern in patterns:
+        for p in output_dir.glob(pattern):
+            p.unlink()
+
+
+def process_mesh(file, name=None, extra_text="", out_dir="renders", res=1024):
+    if isinstance(file, (str, Path)):
+        file = Path(file).resolve()
+        name = file.stem
+    out_dir = Path(out_dir).resolve()
+    out_dir.mkdir(exist_ok=True, parents=True)
+    mesh = init_pose(file)
+
+    images_first = render_views(
+        mesh, diagonal_views + cardinal_views + up_down_views, out_dir, res
+    )
+    category_res = ask_mllm_detect_and_classify(images_first, extra_text=extra_text)
+    print(category_res)
+    category = int(category_res.get("category", 0))
+    object_name = str(category_res.get("detected_object", "None"))
+    main_surface = str(category_res.get("main_surface", "None"))
+    orientation_requirement = str(category_res.get("orientation_requirement", "None"))
+
+    if category == 0:
+        pass
+
+    elif category == 1:
+        images_for_1_1 = render_views(mesh, side_profile, out_dir, res)
+        side_profile_result = ask_llm_full_side_profile(object_name, images_for_1_1)
+        print(side_profile_result)
+        side_profile_result = side_profile_result.get("full_side_profile_image", "B")
+        if side_profile_result == "B":
+            upright_img = render_views(mesh, front_views, out_dir, res)
+            upright_img = upright_img[0]["path"]
+            flipped_path = str(
+                Path(upright_img).with_name(
+                    Path(upright_img).stem + f"_180_flipped.png"
+                )
+            )
+            rotate_image_deg(upright_img, 180, flipped_path)
+            upright_result = ask_llm_upright_2a1(object_name, upright_img, flipped_path)
+            print(upright_result)
+            try:
+                upright_choice = upright_result.get("upright_image", "A")
+            except Exception:
+                upright_choice = "A"
+            if upright_choice == "B":
+                x_flip = rot_x(180)
+                apply_rotations(mesh, x_flip)
+
+        elif side_profile_result == "A":
+            upright_img = render_views(
+                mesh,
+                [("view_from_up_to_bottom", np.array([0.5, 0.5, 2.2], dtype=float))],
+                out_dir,
+                res,
+            )
+            upright_img = upright_img[0]["path"]
+            rotated_imgs = []
+            rotated_imgs.append(upright_img)
+            rotate_deg = [90, 180, 270]
+            for deg in rotate_deg:
+                flipped_path = str(
+                    Path(upright_img).with_name(
+                        Path(upright_img).stem + f"_{deg}_flipped.png"
+                    )
+                )
+                rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path))
+            side_rotation_result = ask_llm_upright_rotation(object_name, rotated_imgs)
+            side_rotation_result = side_rotation_result.get("upright_index", 0)
+            print("side rotation is", side_rotation_result)
+            if side_rotation_result == 0:
+                pass
+            elif side_rotation_result == 1:
+                side_r = rot_z(90)
+                apply_rotations(mesh, side_r)
+            elif side_rotation_result == 2:
+                side_r = rot_z(180)
+                apply_rotations(mesh, side_r)
+            elif side_rotation_result == 3:
+                side_r = rot_z(270)
+                apply_rotations(mesh, side_r)
+            else:
+                raise ValueError("no upright index choosen")
+            side_r = rot_y(90)
+            apply_rotations(mesh, side_r)
+        else:
+            raise ValueError("no side profil choosen")
+
+    elif category == 2:
+        images_for_2_1 = render_views(
+            mesh, cardinal_views + up_down_views, out_dir, res
+        )
+        result_main_surface = ask_mllm_primary_surface(
+            images_for_2_1, object_name, main_surface, orientation_requirement
+        )
+        print(result_main_surface)
+        primary_view = result_main_surface.get("primary_surface_view", "None")
+
+        if orientation_requirement == "face_forward":
+
+            if primary_view in [i[0] for i in cardinal_views]:
+                if primary_view == "view_from_front":
+                    print("no need to rotate round z")
+                elif primary_view == "view_from_left":  # left
+                    R = rot_z(90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_right":  # right
+                    R = rot_z(-90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_back":  # back
+                    R = rot_z(180)
+                    apply_rotations(mesh, R)
+
+                else:
+                    raise ValueError("unknow views")
+
+            elif primary_view in [i[0] for i in up_down_views]:
+                if primary_view == "view_from_up_to_bottom":
+                    R = rot_y(90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_bottom_to_up":
+                    R = rot_y(-90)
+                    apply_rotations(mesh, R)
+                else:
+                    raise ValueError("unknow views")
+
+            else:
+                raise ValueError("unknow views")
+            normalize_to_unit_cube(mesh)
+            upright_img = render_views(mesh, front_views, out_dir, res)
+            upright_img = upright_img[0]["path"]
+            rotated_imgs = []
+            rotated_imgs.append(upright_img)
+            rotate_deg = [90, 180, 270]
+            for deg in rotate_deg:
+                flipped_path = str(
+                    Path(upright_img).with_name(
+                        Path(upright_img).stem + f"_{deg}_flipped.png"
+                    )
+                )
+                rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path))
+            result = ask_llm_upright_rotation(object_name, rotated_imgs)
+            print(result)
+            upright_result = result.get("upright_index", 0)
+            if upright_result == 0:
+                pass
+            elif upright_result == 1:
+                upright_deg = rot_x(90)
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 2:
+                upright_deg = rot_x(180)
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 3:
+                upright_deg = rot_x(-90)
+                apply_rotations(mesh, upright_deg)
+            else:
+                raise ValueError("upright index unknow")
+
+        elif orientation_requirement == "face_up":
+
+            if primary_view in [i[0] for i in cardinal_views]:
+                if primary_view == "view_from_front":
+                    R = rot_y(-90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_left":
+                    R = rot_x(-90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_right":
+                    R = rot_x(90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_back":
+                    R = rot_y(90)
+                    apply_rotations(mesh, R)
+                else:
+                    raise ValueError("unknow views")
+
+            elif primary_view in [i[0] for i in up_down_views]:
+                if primary_view == "view_from_up_to_bottom":
+                    print("no need to rotate")
+                elif primary_view == "view_from_bottom_to_up":
+                    R = rot_x(180)
+                    apply_rotations(mesh, R)
+                else:
+                    raise ValueError("unknow views")
+
+            else:
+                raise ValueError("unknow views")
+            normalize_to_unit_cube(mesh)
+            upright_img = render_views(mesh, up_views, out_dir, res)
+            upright_img = upright_img[0]["path"]
+            rotated_imgs = []
+            rotated_imgs.append(upright_img)
+            rotate_deg = [90, 180, 270]
+            for deg in rotate_deg:
+                flipped_path = str(
+                    Path(upright_img).with_name(
+                        Path(upright_img).stem + f"_{deg}_flipped.png"
+                    )
+                )
+                rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path))
+            result = ask_llm_upright_rotation(object_name, rotated_imgs)
+            print(result)
+            upright_result = result.get("upright_index", 0)
+            if upright_result == 0:
+                pass
+            elif upright_result == 1:
+                upright_deg = rot_z(90)
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 2:
+                upright_deg = rot_z(180)
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 3:
+                upright_deg = rot_z(-90)
+                apply_rotations(mesh, upright_deg)
+            else:
+                raise ValueError("upright index unknow")
+
+        elif orientation_requirement == "face_down":
+            if primary_view in [i[0] for i in cardinal_views]:
+                if primary_view == "view_from_front":
+                    R = rot_y(90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_left":
+                    R = rot_x(90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_right":
+                    R = rot_x(-90)
+                    apply_rotations(mesh, R)
+                elif primary_view == "view_from_back":
+                    R = rot_y(-90)
+                    apply_rotations(mesh, R)
+                else:
+                    raise ValueError("unknow views")
+
+            elif primary_view in [i[0] for i in up_down_views]:
+                if primary_view == "view_from_up_to_bottom":
+                    print("no need to rotate")
+                elif primary_view == "view_from_bottom_to_up":
+                    R = rot_x(180)
+                    apply_rotations(mesh, R)
+                else:
+                    raise ValueError("unknow views")
+
+            else:
+                raise ValueError("unknow views")
+            normalize_to_unit_cube(mesh)
+            upright_img = render_views(mesh, down_views, out_dir, res)
+            upright_img = upright_img[0]["path"]
+            rotated_imgs = []
+            rotated_imgs.append(upright_img)
+            rotate_deg = [90, 180, 270]
+            for deg in rotate_deg:
+                flipped_path = str(
+                    Path(upright_img).with_name(
+                        Path(upright_img).stem + f"_{deg}_flipped.png"
+                    )
+                )
+                rotated_imgs.append(rotate_image_deg(upright_img, deg, flipped_path))
+            result = ask_llm_upright_rotation(object_name, rotated_imgs)
+            print(result)
+            upright_result = result.get("upright_index", 0)
+            if upright_result == 0:
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 1:
+                upright_deg = rot_z(90)
+                apply_rotations(mesh, upright_deg)
+            elif upright_result == 2:
+                upright_deg = rot_z(180)
+                pass
+            elif upright_result == 3:
+                upright_deg = rot_z(-90)
+                apply_rotations(mesh, upright_deg)
+            else:
+                raise ValueError("upright index unknow")
+
+        else:
+            raise ValueError("unknow orientationrequirement")
+
+    else:
+        raise ValueError()
+
+    # TODO: Add alignment analysis to avoid tilted outputs.
+
+    normalize_to_unit_cube(mesh)
+
+    current_bbox_dims = get_aabb_dims(mesh)
+
+    dimension_views = render_views(
+        mesh, diagonal_views + cardinal_views + up_down_views, out_dir, res
+    )
+
+    dimension_result = ask_llm_dimension(
+        object_name=object_name,
+        img_paths=dimension_views,
+        user_text_hint=extra_text,
+        current_bbox_dims=current_bbox_dims,
+    )
+    print(dimension_result)
+
+    target_dims = dimension_result.get("dimensions_m", None)
+    if target_dims is None:
+        raise ValueError("LLM failed to return dimensions_m")
+
+    mesh, uniform_scale = scale_mesh_uniform_to_dimensions(
+        mesh=mesh,
+        target_dims=target_dims,
+        current_dims=current_bbox_dims,
+    )
+
+    print(
+        {
+            "uniform_scale": uniform_scale,
+            "current_bbox_dims": current_bbox_dims,
+            "target_dims_m": target_dims,
+        }
+    )
+
+    out_path = export_final_mesh(mesh, name, out_dir)
+
+    semantics_result = ask_llm_semantics_info(
+        object_name=object_name,
+        img_paths=dimension_views,
+        user_text_hint=extra_text,
+    )
+    return {
+        "Path": out_path,
+        "uniform_scale": uniform_scale,
+        "target_dims_m": target_dims,
+        "semantics_result": semantics_result,
+    }
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument(
+        "--file",
+        required=True,
+        help="Path to input 3D mesh file (absolute path supported)",
+    )
+    ap.add_argument(
+        "--extra_text",
+        default="",
+        help="Text description for your object, mainly describe the dimension and category",
+    )
+    ap.add_argument(
+        "--out_dir",
+        default="renders",
+        help="Output directory (absolute path supported)",
+    )
+    ap.add_argument(
+        "--name",
+        default="test",
+        help="Output directory (absolute path supported)",
+    )
+    ap.add_argument("--res", type=int, default=1024, help="Rendered image resolution")
+    args = ap.parse_args()
+    args.file = Path(args.file).resolve()
+    args.out_dir = Path(args.out_dir).resolve()
+    if not args.file.exists():
+        print(f"Error: Input file does not exist - {args.file}")
+        exit(1)
+
+    process_mesh(args.file, args.name, args.extra_text, args.out_dir, args.res)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/embodichain/gen_sim/simready_pipeline/utils/texture_utils.py b/embodichain/gen_sim/simready_pipeline/utils/texture_utils.py
new file mode 100644
index 00000000..7a2898e8
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/texture_utils.py
@@ -0,0 +1,296 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from typing import Any, Dict, List, Optional, Tuple
+import trimesh
+
+PBR_TEXTURE_FIELDS = (
+    "baseColorTexture",
+    "metallicRoughnessTexture",
+    "normalTexture",
+    "occlusionTexture",
+    "emissiveTexture",
+)
+
+PBR_SCALAR_FIELDS = (
+    "baseColorFactor",
+    "metallicFactor",
+    "roughnessFactor",
+    "emissiveFactor",
+    "alphaMode",
+    "alphaCutoff",
+    "doubleSided",
+)
+
+SIMPLE_SCALAR_FIELDS = (
+    "diffuse",
+    "ambient",
+    "specular",
+    "glossiness",
+)
+
+
+def _shape(x: Any) -> Optional[Tuple[int, ...]]:
+    try:
+        return tuple(x.shape)  # numpy / array-like
+    except Exception:
+        return None
+
+
+def _to_jsonable(x: Any) -> Any:
+
+    if x is None:
+        return None
+
+    if hasattr(x, "tolist"):
+        try:
+            return x.tolist()
+        except Exception:
+            pass
+
+    if hasattr(x, "size") and hasattr(x, "mode"):
+        try:
+            return {
+                "type": type(x).__name__,
+                "size": list(x.size),
+                "mode": x.mode,
+            }
+        except Exception:
+            return {"type": type(x).__name__}
+
+    if isinstance(x, (str, int, float, bool)):
+        return x
+
+    return str(x)
+
+
+def _describe_texture_value(value: Any) -> Dict[str, Any]:
+
+    info: Dict[str, Any] = {
+        "present": value is not None,
+        "type": None,
+        "meta": None,
+    }
+
+    if value is None:
+        return info
+
+    info["type"] = type(value).__name__
+    info["meta"] = _to_jsonable(value)
+    return info
+
+
+def _inspect_material(material: Any) -> Dict[str, Any]:
+    """
+    Recursively inspect trimesh materials.
+    """
+    out: Dict[str, Any] = {
+        "material_class": type(material).__name__ if material is not None else None,
+        "material_kind": None,
+        "name": getattr(material, "name", None) if material is not None else None,
+        "main_color": None,
+        "texture_count": 0,
+        "textures": {},
+        "scalars": {},
+        "children": None,
+    }
+
+    if material is None:
+        return out
+
+    out["main_color"] = _to_jsonable(getattr(material, "main_color", None))
+
+    # MultiMaterial: wrapper around a list of Materials
+    if isinstance(material, trimesh.visual.material.MultiMaterial):
+        out["material_kind"] = "multi"
+        children: List[Dict[str, Any]] = []
+        total = 0
+
+        mats = getattr(material, "materials", None) or []
+        for idx, child in enumerate(mats):
+            child_info = _inspect_material(child)
+            child_info["index"] = idx
+            children.append(child_info)
+            total += int(child_info.get("texture_count", 0))
+
+        out["children"] = children
+        out["texture_count"] = total
+        return out
+
+    # PBRMaterial
+    if isinstance(material, trimesh.visual.material.PBRMaterial):
+        out["material_kind"] = "pbr"
+        for field in PBR_SCALAR_FIELDS:
+            out["scalars"][field] = _to_jsonable(getattr(material, field, None))
+
+        texture_count = 0
+        for field in PBR_TEXTURE_FIELDS:
+            tex_value = getattr(material, field, None)
+            out["textures"][field] = _describe_texture_value(tex_value)
+            if tex_value is not None:
+                texture_count += 1
+
+        out["texture_count"] = texture_count
+        return out
+
+    # SimpleMaterial
+    if isinstance(material, trimesh.visual.material.SimpleMaterial):
+        out["material_kind"] = "simple"
+        for field in SIMPLE_SCALAR_FIELDS:
+            out["scalars"][field] = _to_jsonable(getattr(material, field, None))
+
+        image = getattr(material, "image", None)
+        out["textures"]["image"] = _describe_texture_value(image)
+        out["texture_count"] = 1 if image is not None else 0
+        return out
+
+    # Generic Material or unknown subclass
+    out["material_kind"] = "generic_or_unknown"
+    # Collect anything that looks texture-like or important
+    for key, value in getattr(material, "__dict__", {}).items():
+        if "texture" in key.lower() or key.lower() in {"image", "name"}:
+            out["textures"][key] = _describe_texture_value(value)
+
+    return out
+
+
+def classify_visual(mesh: trimesh.Trimesh) -> Dict[str, Any]:
+    """
+    Returns a nested dict with:
+      - top-level visual category
+      - color mode / texture mode
+      - uv presence
+      - material type
+      - material texture slots
+      - total texture count
+      - completeness flags
+    """
+    vis = getattr(mesh, "visual", None)
+
+    result: Dict[str, Any] = {
+        "visual_class": type(vis).__name__ if vis is not None else None,
+        "visual_category": "none",
+        "visual_kind": None,
+        "visual_defined": False,
+        "is_color_visual": False,
+        "is_texture_visual": False,
+        "uv_present": False,
+        "uv_shape": None,
+        "material": None,
+        "material_type": None,
+        "material_kind": None,
+        "texture_count_total": 0,
+        "texture_state": "none",
+        "face_materials_present": False,
+        "face_materials_shape_or_len": None,
+        "color_mode": None,
+        "face_colors_shape": None,
+        "vertex_colors_shape": None,
+        "has_transparency": None,
+        "main_color": None,
+        "notes": [],
+    }
+
+    if vis is None:
+        result["notes"].append("mesh.visual is None")
+        return result
+
+    result["visual_kind"] = getattr(vis, "kind", None)
+    result["visual_defined"] = bool(getattr(vis, "defined", False))
+
+    # -------- TextureVisuals --------
+    if isinstance(vis, trimesh.visual.texture.TextureVisuals):
+        result["visual_category"] = "texture"
+        result["is_texture_visual"] = True
+
+        uv = getattr(vis, "uv", None)
+        result["uv_present"] = uv is not None
+        result["uv_shape"] = _shape(uv)
+
+        # face_materials is an optional constructor arg; inspect defensively
+        face_materials = getattr(vis, "face_materials", None)
+        result["face_materials_present"] = face_materials is not None
+        if face_materials is not None:
+            try:
+                result["face_materials_shape_or_len"] = len(face_materials)
+            except Exception:
+                result["face_materials_shape_or_len"] = _shape(face_materials)
+
+        material = getattr(vis, "material", None)
+        result["material"] = (
+            _inspect_material(material) if material is not None else None
+        )
+        if material is not None:
+            result["material_type"] = type(material).__name__
+            result["material_kind"] = result["material"]["material_kind"]
+            result["main_color"] = result["material"]["main_color"]
+            result["texture_count_total"] = int(result["material"]["texture_count"])
+
+        # TextureVisuals is only really usable when UV exists.
+        if not result["uv_present"]:
+            result["texture_state"] = "texture_visual_missing_uv"
+            result["notes"].append("TextureVisuals exists, but uv is missing.")
+        elif material is None:
+            result["texture_state"] = "texture_visual_missing_material"
+            result["notes"].append("TextureVisuals has uv, but material is missing.")
+        elif result["texture_count_total"] == 0:
+            result["texture_state"] = "texture_visual_material_no_textures"
+            result["notes"].append(
+                "TextureVisuals has uv and material, but material contains no texture slots/images."
+            )
+        else:
+            result["texture_state"] = "texture_visual_complete_or_partially_complete"
+
+        # If the visual has alpha/transparency info through material, expose it.
+        if material is not None and hasattr(material, "alphaMode"):
+            result["notes"].append(f"alphaMode={getattr(material, 'alphaMode', None)}")
+        return result
+
+    # -------- ColorVisuals --------
+    if isinstance(vis, trimesh.visual.color.ColorVisuals):
+        result["visual_category"] = "color"
+        result["is_color_visual"] = True
+        result["color_mode"] = getattr(vis, "kind", None)
+
+        result["face_colors_shape"] = _shape(getattr(vis, "face_colors", None))
+        result["vertex_colors_shape"] = _shape(getattr(vis, "vertex_colors", None))
+        result["has_transparency"] = bool(getattr(vis, "transparency", False))
+        result["main_color"] = _to_jsonable(getattr(vis, "main_color", None))
+
+        if result["color_mode"] == "face":
+            result["texture_state"] = "color_face"
+        elif result["color_mode"] == "vertex":
+            result["texture_state"] = "color_vertex"
+        else:
+            result["texture_state"] = "color_unset_or_default"
+
+        return result
+
+    # -------- Unknown visual subclass --------
+    result["visual_category"] = "unknown"
+    result["notes"].append(
+        f"Unhandled visual type: {type(vis).__name__}. Inspect __dict__ for custom extension."
+    )
+
+    # Best-effort generic dump for custom visuals
+    if hasattr(vis, "__dict__"):
+        result["material"] = {
+            "raw_attributes": {k: _to_jsonable(v) for k, v in vis.__dict__.items()}
+        }
+
+    return result
diff --git a/embodichain/gen_sim/simready_pipeline/utils/usd_utils.py b/embodichain/gen_sim/simready_pipeline/utils/usd_utils.py
new file mode 100644
index 00000000..ed1286de
--- /dev/null
+++ b/embodichain/gen_sim/simready_pipeline/utils/usd_utils.py
@@ -0,0 +1,412 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import argparse
+import json
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Dict, Any, Optional, Union
+
+import numpy as np
+import trimesh
+from pxr import Gf, Sdf, Usd, UsdGeom, UsdPhysics, UsdShade, UsdUtils, Vt
+
+DEFAULT_PHYSICS_PARAMS = {
+    "mass": 1.0,
+    "density": 1000.0,
+    "static_friction": 0.5,
+    "dynamic_friction": 0.5,
+    "restitution": 0.0,
+    "linear_damping": 0.7,
+    "angular_damping": 0.7,
+    "enable_collision": True,
+    "enable_ccd": False,
+    "contact_offset": 0.001,
+    "rest_offset": 0.0,
+    "max_linear_velocity": 100.0,
+    "max_angular_velocity": 50.0,
+    "max_depenetration_velocity": 100.0,
+    "solver_min_position_iters": 4,
+    "solver_min_velocity_iters": 1,
+    "sleep_threshold": 0.001,
+}
+
+
+def parse_glb_with_trimesh(path: Path, texture_dir: Path) -> Dict[str, Any]:
+    scene = trimesh.load(str(path))
+    mesh = scene.dump(concatenate=True) if isinstance(scene, trimesh.Scene) else scene
+
+    tex_filename = "diffuse.png"
+    tex_path = texture_dir / tex_filename
+
+    material = mesh.visual.material
+    if hasattr(material, "image") and material.image is not None:
+        material.image.save(str(tex_path))
+    elif (
+        hasattr(material, "baseColorTexture") and material.baseColorTexture is not None
+    ):
+        material.baseColorTexture.save(str(tex_path))
+
+    return {
+        "vertices": np.asarray(mesh.vertices),
+        "faces": np.asarray(mesh.faces),
+        "uv": (
+            np.asarray(mesh.visual.uv)
+            if getattr(mesh.visual, "uv", None) is not None
+            else None
+        ),
+        "tex_path": f"./textures/{tex_filename}",
+    }
+
+
+def build_clean_usd(
+    data: Dict[str, Any], output_path: Path, physics_params: Dict[str, float]
+) -> None:
+    stage = Usd.Stage.CreateNew(str(output_path))
+    UsdGeom.SetStageUpAxis(stage, UsdGeom.Tokens.z)
+    UsdGeom.SetStageMetersPerUnit(stage, 1.0)
+    UsdPhysics.Scene.Define(stage, "/PhysicsScene")
+
+    root_prim = UsdGeom.Xform.Define(stage, "/RootNode")
+    stage.SetDefaultPrim(root_prim.GetPrim())
+
+    stage.DefinePrim("/RootNode/Looks", "Scope")
+    UsdGeom.Xform.Define(stage, "/RootNode/geometry_inst")
+
+    new_mat_path = "/RootNode/Looks/Material_0"
+    new_geo_path = "/RootNode/geometry_inst/geometry_0"
+
+    # --- A. Mesh Definition ---
+    mesh = UsdGeom.Mesh.Define(stage, new_geo_path)
+    mesh.CreatePointsAttr(Vt.Vec3fArray([Gf.Vec3f(*v) for v in data["vertices"]]))
+    mesh.CreateFaceVertexIndicesAttr(Vt.IntArray(data["faces"].flatten().tolist()))
+    mesh.CreateFaceVertexCountsAttr(Vt.IntArray([3] * len(data["faces"])))
+
+    if data.get("uv") is not None:
+        tex_coords = UsdGeom.PrimvarsAPI(mesh).CreatePrimvar(
+            "st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.varying
+        )
+        tex_coords.Set(Vt.Vec2fArray([Gf.Vec2f(*uv) for uv in data["uv"]]))
+
+    mesh.CreateDoubleSidedAttr(True)
+
+    # --- B. Material Definition ---
+    material = UsdShade.Material.Define(stage, new_mat_path)
+    pbr_shader = UsdShade.Shader.Define(stage, f"{new_mat_path}/PBRShader")
+    pbr_shader.CreateIdAttr("UsdPreviewSurface")
+
+    st_reader = UsdShade.Shader.Define(stage, f"{new_mat_path}/STReader")
+    st_reader.CreateIdAttr("UsdPrimvarReader_float2")
+    st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st")
+
+    tex_sampler = UsdShade.Shader.Define(stage, f"{new_mat_path}/DiffuseSampler")
+    tex_sampler.CreateIdAttr("UsdUVTexture")
+    tex_sampler.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(data["tex_path"])
+    tex_sampler.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(
+        st_reader.ConnectableAPI(), "result"
+    )
+
+    pbr_shader.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(
+        tex_sampler.ConnectableAPI(), "rgb"
+    )
+    material.CreateSurfaceOutput().ConnectToSource(
+        pbr_shader.ConnectableAPI(), "surface"
+    )
+    UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material)
+
+    # --- C. Physics Material Injection ---
+    binding_api = UsdShade.MaterialBindingAPI(mesh.GetPrim())
+    bound_material, _ = binding_api.ComputeBoundMaterial()
+
+    if bound_material:
+        bound_prim = bound_material.GetPrim()
+        UsdPhysics.MaterialAPI.Apply(bound_prim)
+        material_api = UsdPhysics.MaterialAPI(bound_prim)
+        material_api.CreateDensityAttr().Set(physics_params["density"])
+        material_api.CreateRestitutionAttr().Set(physics_params["restitution"])
+        material_api.CreateStaticFrictionAttr().Set(physics_params["static_friction"])
+        material_api.CreateDynamicFrictionAttr().Set(physics_params["dynamic_friction"])
+
+    # --- D. Core Rigid Body ---
+    prim = mesh.GetPrim()
+
+    prim.SetMetadata(
+        "apiSchemas",
+        Sdf.TokenListOp.CreateExplicit(
+            ["PhysicsRigidBodyAPI", "PhysicsMassAPI", "PhysxRigidBodyAPI"]
+        ),
+    )
+
+    prim.SetMetadata("kind", "component")
+
+    collision_api = UsdPhysics.CollisionAPI.Apply(prim)
+    collision_api.CreateCollisionEnabledAttr(physics_params["enable_collision"])
+
+    mesh_collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim)
+    mesh_collision_api.CreateApproximationAttr().Set(
+        UsdPhysics.Tokens.convexDecomposition
+    )
+
+    def set_attr(name, type_name, value):
+        attr = prim.CreateAttribute(name, type_name)
+        attr.Set(value)
+
+    set_attr("physics:rigidBodyEnabled", Sdf.ValueTypeNames.Bool, True)
+    set_attr("physics:kinematicEnabled", Sdf.ValueTypeNames.Bool, False)
+    set_attr("physics:startsAsleep", Sdf.ValueTypeNames.Bool, False)
+
+    set_attr("physics:velocity", Sdf.ValueTypeNames.Vector3f, Gf.Vec3f(0, 0, 0))
+    set_attr("physics:angularVelocity", Sdf.ValueTypeNames.Vector3f, Gf.Vec3f(0, 0, 0))
+    set_attr("physics:centerOfMass", Sdf.ValueTypeNames.Point3f, Gf.Vec3f(0, 0, 0))
+    set_attr("physics:mass", Sdf.ValueTypeNames.Float, physics_params["mass"])
+
+    def set_physx(name, type_name, value):
+        attr = prim.CreateAttribute(f"physxRigidBody:{name}", type_name)
+        attr.Set(value)
+
+    set_physx(
+        "linearDamping", Sdf.ValueTypeNames.Float, physics_params["linear_damping"]
+    )
+    set_physx(
+        "angularDamping", Sdf.ValueTypeNames.Float, physics_params["angular_damping"]
+    )
+
+    set_physx(
+        "maxLinearVelocity",
+        Sdf.ValueTypeNames.Float,
+        physics_params["max_linear_velocity"],
+    )
+    set_physx(
+        "maxAngularVelocity",
+        Sdf.ValueTypeNames.Float,
+        physics_params["max_angular_velocity"],
+    )
+    set_physx(
+        "maxDepenetrationVelocity",
+        Sdf.ValueTypeNames.Float,
+        physics_params["max_depenetration_velocity"],
+    )
+
+    set_physx("enableCCD", Sdf.ValueTypeNames.Bool, physics_params["enable_ccd"])
+    set_physx("enableSpeculativeCCD", Sdf.ValueTypeNames.Bool, False)
+
+    set_physx(
+        "sleepThreshold", Sdf.ValueTypeNames.Float, physics_params["sleep_threshold"]
+    )
+    set_physx("stabilizationThreshold", Sdf.ValueTypeNames.Float, 0.001)
+
+    set_physx(
+        "solverPositionIterationCount",
+        Sdf.ValueTypeNames.Int,
+        physics_params["solver_min_position_iters"],
+    )
+    set_physx(
+        "solverVelocityIterationCount",
+        Sdf.ValueTypeNames.Int,
+        physics_params["solver_min_velocity_iters"],
+    )
+
+    set_physx("lockedPosAxis", Sdf.ValueTypeNames.Int, 0)
+    set_physx("lockedRotAxis", Sdf.ValueTypeNames.Int, 0)
+
+    # --- E. Collision ---
+    collision_api = UsdPhysics.CollisionAPI.Apply(prim)
+    collision_api.CreateCollisionEnabledAttr(physics_params["enable_collision"])
+
+    mesh_collision_api = UsdPhysics.MeshCollisionAPI.Apply(prim)
+    mesh_collision_api.CreateApproximationAttr().Set(
+        UsdPhysics.Tokens.convexDecomposition
+    )
+
+    # --- F. Extended ---
+    prim.CreateAttribute("sim:linearDamping", Sdf.ValueTypeNames.Float).Set(
+        float(physics_params["linear_damping"])
+    )
+    prim.CreateAttribute("sim:angularDamping", Sdf.ValueTypeNames.Float).Set(
+        float(physics_params["angular_damping"])
+    )
+    prim.CreateAttribute("sim:contactOffset", Sdf.ValueTypeNames.Float).Set(
+        float(physics_params["contact_offset"])
+    )
+    prim.CreateAttribute("sim:restOffset", Sdf.ValueTypeNames.Float).Set(
+        float(physics_params["rest_offset"])
+    )
+
+    prim.CreateAttribute("physx:enableCCD", Sdf.ValueTypeNames.Bool).Set(
+        physics_params["enable_ccd"]
+    )
+    prim.CreateAttribute("physx:maxLinearVelocity", Sdf.ValueTypeNames.Float).Set(
+        physics_params["max_linear_velocity"]
+    )
+    prim.CreateAttribute("physx:maxAngularVelocity", Sdf.ValueTypeNames.Float).Set(
+        physics_params["max_angular_velocity"]
+    )
+    prim.CreateAttribute(
+        "physx:solverPositionIterationCount", Sdf.ValueTypeNames.Int
+    ).Set(physics_params["solver_min_position_iters"])
+    prim.CreateAttribute(
+        "physx:solverVelocityIterationCount", Sdf.ValueTypeNames.Int
+    ).Set(physics_params["solver_min_velocity_iters"])
+    prim.CreateAttribute(
+        "physx:maxDepenetrationVelocity", Sdf.ValueTypeNames.Float
+    ).Set(physics_params["max_depenetration_velocity"])
+    prim.CreateAttribute("physx:sleepThreshold", Sdf.ValueTypeNames.Float).Set(
+        physics_params["sleep_threshold"]
+    )
+
+    stage.GetRootLayer().Save()
+    print(f"--- Exported base USD: {output_path} ---")
+
+
+def convert_model_to_usd(
+    input_path: Union[str, Path],
+    out_dir: Union[str, Path] = "./output_usd",
+    physics_params: Optional[Dict[str, float]] = None,
+) -> Dict[str, Path]:
+    """
+    Importable conversion entry point.
+
+    Args:
+        input_path: source .glb / mesh path
+        out_dir: output directory
+        physics_params: optional override of DEFAULT_PHYSICS_PARAMS
+
+    Returns:
+        dict with output paths
+    """
+    input_path = Path(input_path).resolve()
+    output_dir = Path(out_dir).resolve()
+    base_name = input_path.stem
+
+    final_params = DEFAULT_PHYSICS_PARAMS.copy()
+    if physics_params:
+        final_params.update(physics_params)
+
+    if not input_path.exists():
+        raise FileNotFoundError(f"Input file not found: {input_path}")
+
+    with tempfile.TemporaryDirectory() as temp_str:
+        temp_dir = Path(temp_str)
+        print(f"\n>>> Processing: {base_name}")
+
+        temp_tex_dir = temp_dir / "textures"
+        temp_tex_dir.mkdir(parents=True, exist_ok=True)
+
+        temp_base_usd = temp_dir / f"{base_name}_inst_base.usda"
+        temp_inst_usdc = temp_dir / f"{base_name}_inst.usdc"
+        temp_usdz = temp_dir / f"{base_name}_inst.usdz"
+
+        mesh_data = parse_glb_with_trimesh(input_path, temp_tex_dir)
+        build_clean_usd(mesh_data, temp_base_usd, final_params)
+
+        inst_stage = Usd.Stage.CreateNew(str(temp_inst_usdc))
+        UsdGeom.SetStageUpAxis(inst_stage, UsdGeom.Tokens.z)
+        UsdGeom.SetStageMetersPerUnit(inst_stage, 1.0)
+
+        inst_root = UsdGeom.Xform.Define(inst_stage, "/RootNode")
+        inst_stage.SetDefaultPrim(inst_root.GetPrim())
+        inst_root.GetPrim().GetReferences().AddReference(f"./{temp_base_usd.name}")
+        inst_stage.GetRootLayer().Save()
+
+        UsdUtils.CreateNewUsdzPackage(
+            Sdf.AssetPath(str(temp_inst_usdc)), str(temp_usdz)
+        )
+
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        shutil.copy2(temp_base_usd, output_dir / temp_base_usd.name)
+        shutil.copy2(temp_inst_usdc, output_dir / temp_inst_usdc.name)
+
+        if temp_usdz.exists():
+            shutil.copy2(temp_usdz, output_dir / temp_usdz.name)
+        if temp_tex_dir.exists():
+            shutil.copytree(temp_tex_dir, output_dir / "textures", dirs_exist_ok=True)
+
+        print(f"\n>>> Pipeline completed successfully: {output_dir}")
+
+        return {
+            "output_dir": output_dir,
+            "base_usd": output_dir / temp_base_usd.name,
+            "inst_usdc": output_dir / temp_inst_usdc.name,
+            "usdz": output_dir / temp_usdz.name,
+            "textures_dir": output_dir / "textures",
+        }
+
+
+def load_physics_from_json(json_path: Optional[Path]) -> Optional[Dict[str, Any]]:
+
+    if not json_path:
+        return None
+
+    if not json_path.exists():
+        print(
+            f"[Warning] JSON file not found: {json_path}, using default physics params."
+        )
+        return None
+
+    try:
+        with open(json_path, "r", encoding="utf-8") as f:
+            json_data = json.load(f)
+
+        physics_data = json_data.get("physics", {}).get("properties", {}).get("data")
+
+        if physics_data and isinstance(physics_data, dict):
+            print(f"[Info] Successfully loaded physics params from JSON.")
+            return physics_data
+        else:
+            print(
+                f"[Warning] Invalid JSON structure: missing physics.properties.data, using default params."
+            )
+            return None
+
+    except Exception as e:
+        print(
+            f"[Warning] Failed to parse JSON file: {str(e)}, using default physics params."
+        )
+        return None
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="3D Assets to USD/USDZ conversion pipeline with full physics support."
+    )
+    parser.add_argument(
+        "--input", required=True, type=Path, help="Path to the source .glb mesh file."
+    )
+    parser.add_argument(
+        "--json",
+        type=Path,
+        default=None,
+        help="Path to the metadata JSON file (optional, for physics params).",
+    )
+    parser.add_argument(
+        "--out_dir",
+        default=Path("./output_usd"),
+        type=Path,
+        help="Target directory for final USD/USDZ assets.",
+    )
+    args = parser.parse_args()
+
+    user_physics_params = load_physics_from_json(args.json)
+
+    convert_model_to_usd(
+        input_path=args.input, out_dir=args.out_dir, physics_params=user_physics_params
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/embodichain/lab/gym/envs/action_bank/configurable_action.py b/embodichain/lab/gym/envs/action_bank/configurable_action.py
index c0e7130d..964c2b1c 100644
--- a/embodichain/lab/gym/envs/action_bank/configurable_action.py
+++ b/embodichain/lab/gym/envs/action_bank/configurable_action.py
@@ -997,7 +997,7 @@ def get_xpos_name(affordance_name: str) -> str:
 
 
 def get_control_part(env, agent_uid):
-    control_parts = env.metadata["dataset"]["robot_meta"].get("control_parts", [])
+    control_parts = env.cfg.control_parts
 
     if agent_uid in control_parts:
         return agent_uid
@@ -1324,7 +1324,8 @@ def plan_trajectory(
 
         if len(filtered_keyposes) == 1 and len(ref_poses) == 0:
 
-            ret = np.array([filtered_keyposes[0]] * duration)
+            return np.array([filtered_keyposes[0]] * duration).T
+
         else:
             mo_gen = MotionGenerator(
                 cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=env.robot.uid))
diff --git a/embodichain/lab/gym/envs/action_bank/utils.py b/embodichain/lab/gym/envs/action_bank/utils.py
index 8e7d149e..58cfb368 100644
--- a/embodichain/lab/gym/envs/action_bank/utils.py
+++ b/embodichain/lab/gym/envs/action_bank/utils.py
@@ -20,7 +20,6 @@
 from embodichain.utils import logger
 from embodichain.lab.gym.utils.misc import validation_with_process_from_name
 
-
 """Node Generation Utils"""
 
 
diff --git a/embodichain/lab/gym/envs/base_env.py b/embodichain/lab/gym/envs/base_env.py
index fcd89c98..1a0fa89e 100644
--- a/embodichain/lab/gym/envs/base_env.py
+++ b/embodichain/lab/gym/envs/base_env.py
@@ -239,8 +239,7 @@ def add_camera_group_id(self, group_id: int) -> None:
         """
         if not hasattr(self, "_camera_group_ids"):
             self._camera_group_ids: List[int] = []
-        if self.sim.is_rt_enabled:
-            self._camera_group_ids.append(group_id)
+        self._camera_group_ids.append(group_id)
 
     def _setup_scene(self, **kwargs):
         # Init sim manager.
@@ -273,10 +272,9 @@ def _setup_scene(self, **kwargs):
 
         # Setup camera groups for rendering.
         self._camera_group_ids: List[int] = []
-        if self.sim.is_rt_enabled:
-            for sensor in self.sensors.values():
-                if isinstance(sensor, Camera):
-                    self._camera_group_ids.append(sensor.group_id)
+        for sensor in self.sensors.values():
+            if isinstance(sensor, Camera):
+                self._camera_group_ids.append(sensor.group_id)
 
     def _setup_robot(self, **kwargs) -> Robot:
         """Load the robot agent, setup the controller and action space.
@@ -367,10 +365,8 @@ def _get_sensor_obs(self, **kwargs) -> TensorDict[str, any]:
         """
         obs = TensorDict({}, batch_size=[self.num_envs], device=self.device)
 
-        fetch_only = False
-        if self.sim.is_rt_enabled:
-            fetch_only = True
-            self.sim.render_camera_group(self._camera_group_ids)
+        fetch_only = True
+        self.sim.render_camera_group(self._camera_group_ids)
 
         for sensor_name, sensor in self.sensors.items():
             sensor.update(fetch_only=fetch_only)
diff --git a/embodichain/lab/gym/envs/embodied_env.py b/embodichain/lab/gym/envs/embodied_env.py
index 3e699620..18137d87 100644
--- a/embodichain/lab/gym/envs/embodied_env.py
+++ b/embodichain/lab/gym/envs/embodied_env.py
@@ -15,6 +15,7 @@
 # ----------------------------------------------------------------------------
 
 from math import log
+from functools import wraps
 import os
 import torch
 import numpy as np
@@ -53,7 +54,7 @@
     init_rollout_buffer_from_gym_space,
 )
 from embodichain.utils import configclass, logger
-
+from embodichain.data import get_data_path
 
 __all__ = ["EmbodiedEnvCfg", "EmbodiedEnv"]
 
@@ -231,6 +232,27 @@ class EmbodiedEnv(BaseEnv):
     - affordance_datas: The affordance data that can be used to store the intermediate results or information
     """
 
+    @classmethod
+    def __init_subclass__(cls, **kwargs):
+        """Automatically wrap subclass demo-action builders with shape checks.
+
+        Any subclass overriding ``create_demo_action_list`` will be wrapped so its
+        returned action sequence is validated and, when possible, converted to the
+        environment action dimension.
+        """
+        super().__init_subclass__(**kwargs)
+        method = cls.__dict__.get("create_demo_action_list")
+        if method is None or getattr(method, "_demo_action_shape_wrapped", False):
+            return
+
+        @wraps(method)
+        def wrapped_create_demo_action_list(self, *args, **kwargs):
+            action_list = method(self, *args, **kwargs)
+            return self._normalize_demo_action_list(action_list)
+
+        wrapped_create_demo_action_list._demo_action_shape_wrapped = True
+        setattr(cls, "create_demo_action_list", wrapped_create_demo_action_list)
+
     def __init__(self, cfg: EmbodiedEnvCfg, **kwargs):
         self.affordance_datas = {}
         self.action_bank = None
@@ -624,6 +646,112 @@ def _write_rl_rollout_step(
             : self.num_envs, self.current_rollout_step
         ].copy_(truncateds.to(buffer_device), non_blocking=True)
 
+    def _normalize_demo_action_list(
+        self, action_list: Sequence[EnvAction] | torch.Tensor | None
+    ) -> Sequence[EnvAction] | torch.Tensor | None:
+        """Validate/convert demo action outputs to match single action-space dim."""
+        if action_list is None:
+            return None
+
+        expected_dim = int(np.prod(self.action_space.shape))
+
+        if isinstance(action_list, torch.Tensor):
+            return self._normalize_demo_action_tensor(action_list, expected_dim)
+
+        if not isinstance(action_list, Sequence):
+            raise TypeError(
+                "create_demo_action_list must return None, a torch.Tensor, or a sequence of actions. "
+                f"Got {type(action_list)}."
+            )
+
+        normalized_action_list = [
+            self._normalize_demo_action_tensor(action, expected_dim)
+            for action in action_list
+        ]
+        return type(action_list)(normalized_action_list)
+
+    def _normalize_demo_action_tensor(
+        self, action: EnvAction | torch.Tensor, expected_dim: int
+    ) -> EnvAction | torch.Tensor:
+        """Normalize one action tensor to the expected action dimension.
+
+        Conversion rule:
+        - If last-dim equals action-space dim, keep as-is.
+        - If last-dim is larger, slice with ``active_joint_ids``.
+        - If last-dim is smaller, raise ``ValueError``.
+        """
+        if isinstance(action, TensorDict):
+            return self._normalize_demo_action_tensordict(action, expected_dim)
+
+        if not isinstance(action, torch.Tensor):
+            raise TypeError(
+                "Each demo action must be a torch.Tensor or TensorDict. "
+                f"Got {type(action)}."
+            )
+
+        if action.ndim == 0:
+            raise ValueError(
+                "Demo action tensor must have at least one dimension with action features on the last axis."
+            )
+
+        action_dim = int(action.shape[-1])
+        if action_dim == expected_dim:
+            return action
+        if action_dim < expected_dim:
+            raise ValueError(
+                "Demo action dim is smaller than action space dim and cannot be auto-converted. "
+                f"Got action dim={action_dim}, expected={expected_dim}."
+            )
+        return self._slice_action_with_active_joint_ids(
+            action, action_dim, expected_dim
+        )
+
+    def _normalize_demo_action_tensordict(
+        self, action: TensorDict, expected_dim: int
+    ) -> TensorDict:
+        """Normalize tensor entries in a TensorDict action payload."""
+        converted_action = action.clone()
+        for key in ("qpos", "qvel", "qf"):
+            if key not in converted_action:
+                continue
+            value = converted_action[key]
+            if value.ndim == 0:
+                raise ValueError(
+                    f"Demo action TensorDict['{key}'] must have at least one dimension."
+                )
+            action_dim = int(value.shape[-1])
+            if action_dim == expected_dim:
+                continue
+            if action_dim < expected_dim:
+                raise ValueError(
+                    f"Demo action TensorDict['{key}'] dim={action_dim} is smaller than expected action dim={expected_dim}."
+                )
+            converted_action[key] = self._slice_action_with_active_joint_ids(
+                value, action_dim, expected_dim
+            )
+        return converted_action
+
+    def _slice_action_with_active_joint_ids(
+        self, action: torch.Tensor, action_dim: int, expected_dim: int
+    ) -> torch.Tensor:
+        """Slice a high-dimensional action to active joints.
+
+        This is used when demo actions are generated in full-DoF form while the
+        environment action-space only controls active joints.
+        """
+        if len(self.active_joint_ids) != expected_dim:
+            raise ValueError(
+                "Cannot convert demo action by active_joint_ids because their length does not match the action space dim. "
+                f"len(active_joint_ids)={len(self.active_joint_ids)}, expected={expected_dim}."
+            )
+
+        if len(self.active_joint_ids) == 0:
+            raise ValueError(
+                "Cannot convert demo action by active_joint_ids because active_joint_ids is empty."
+            )
+
+        return action[..., self.active_joint_ids]
+
     def _step_action(self, action: EnvAction) -> EnvAction:
         """Set action control command into simulation.
 
@@ -805,6 +933,9 @@ def _setup_lights(self) -> None:
         if self.cfg.light.indirect is not None:
             if "emission_light" in self.cfg.light.indirect:
                 self.sim.set_emission_light(**self.cfg.light.indirect["emission_light"])
+            if "env_map" in self.cfg.light.indirect:
+                path = get_data_path(self.cfg.light.indirect["env_map"])
+                self.sim.set_indirect_lighting(path)
 
     def _setup_background(self) -> None:
         """Setup the static rigid objects in the environment."""
@@ -907,6 +1038,12 @@ def create_demo_action_list(self, *args, **kwargs) -> Sequence[EnvAction] | None
 
         Returns:
             Sequence[EnvAction] | None: A list of actions if a demonstration is available, otherwise None.
+
+        Note:
+            Subclass outputs are automatically post-processed by the base class:
+            action last-dimension must match ``single_action_space``. If larger,
+            actions are sliced by ``active_joint_ids``; if smaller, ``ValueError``
+            is raised.
         """
         raise NotImplementedError(
             "The method 'create_demo_action_list' must be implemented in subclasses."
diff --git a/embodichain/lab/gym/envs/managers/randomization/physics.py b/embodichain/lab/gym/envs/managers/randomization/physics.py
index 7088c25a..1eea74e0 100644
--- a/embodichain/lab/gym/envs/managers/randomization/physics.py
+++ b/embodichain/lab/gym/envs/managers/randomization/physics.py
@@ -25,7 +25,6 @@
 from embodichain.utils.string import resolve_matching_names
 from embodichain.utils import logger
 
-
 if TYPE_CHECKING:
     from embodichain.lab.gym.envs import EmbodiedEnv
 
diff --git a/embodichain/lab/gym/envs/managers/randomization/spatial.py b/embodichain/lab/gym/envs/managers/randomization/spatial.py
index 0b732f5c..1af1c09f 100644
--- a/embodichain/lab/gym/envs/managers/randomization/spatial.py
+++ b/embodichain/lab/gym/envs/managers/randomization/spatial.py
@@ -25,7 +25,6 @@
 from embodichain.utils.math import sample_uniform, matrix_from_euler, matrix_from_quat
 from embodichain.utils import logger
 
-
 if TYPE_CHECKING:
     from embodichain.lab.gym.envs import EmbodiedEnv
 
diff --git a/embodichain/lab/gym/envs/managers/randomization/visual.py b/embodichain/lab/gym/envs/managers/randomization/visual.py
index 66d3d6fb..c49707bd 100644
--- a/embodichain/lab/gym/envs/managers/randomization/visual.py
+++ b/embodichain/lab/gym/envs/managers/randomization/visual.py
@@ -21,6 +21,7 @@
 import random
 import copy
 import numpy as np
+from pathlib import Path
 
 from typing import TYPE_CHECKING, Literal, Union, Dict
 
@@ -59,6 +60,7 @@
     "set_rigid_object_visual_material",
     "set_rigid_object_group_visual_material",
     "randomize_visual_material",
+    "randomize_indirect_lighting",
 ]
 
 
@@ -658,8 +660,6 @@ def __call__(
         roughness_range: tuple[float, float] | None = None,
         ior_range: tuple[float, float] | None = None,
     ):
-        from embodichain.lab.sim.utility import is_rt_enabled
-
         if self.entity_cfg.uid != "default_plane" and self.entity is None:
             return
 
@@ -700,7 +700,7 @@ def __call__(
             )
             randomize_plan["roughness"] = roughness
 
-        if ior_range and is_rt_enabled():
+        if ior_range:
             ior = sample_uniform(
                 lower=torch.tensor(ior_range[0], dtype=torch.float32),
                 upper=torch.tensor(ior_range[1], dtype=torch.float32),
@@ -741,3 +741,137 @@ def __call__(
                         random_texture_prob=random_texture_prob,
                         idx=i,
                     )
+
+        env = self._env.sim.get_env()
+        env.clean_materials()
+
+
+class randomize_indirect_lighting(Functor):
+    """Randomize the environment's indirect (IBL) lighting or emissive light.
+
+    This functor operates in one of two mutually exclusive modes:
+
+    * **HDR mode** — ``path`` is provided. A random ``.hdr`` file is chosen from
+      the folder on every call and applied via :meth:`set_indirect_lighting`.
+    * **Emissive mode** — ``emissive_color_range`` and/or
+      ``emissive_intensity_range`` are provided. The emissive light color and
+      intensity are sampled uniformly on every call and applied via
+      :meth:`set_emission_light`.
+
+    Providing both ``path`` and emissive parameters simultaneously is an error.
+
+    .. attention::
+        This functor applies the same lighting to all environments.
+
+    .. tip::
+        The ``path`` parameter is resolved via :func:`get_data_path`, so it
+        supports absolute paths, data-root-relative paths, and dataset-class
+        paths (e.g. ``"EnvMapHDR"``).
+
+        ``emissive_color_range`` is a pair of ``[r, g, b]`` lists representing
+        the lower and upper bounds for sampling the emissive color, e.g.
+        ``[[0.8, 0.8, 0.8], [1.0, 1.0, 1.0]]``.
+
+        ``emissive_intensity_range`` is a ``[min, max]`` pair for the emissive
+        intensity scalar, e.g. ``[80.0, 150.0]``.
+    """
+
+    def __init__(self, cfg: FunctorCfg, env: EmbodiedEnv):
+        """Initialize the functor.
+
+        Args:
+            cfg: The configuration of the functor.
+
+                * **HDR mode**: set ``params["path"]`` to a folder of ``.hdr`` files.
+                * **Emissive mode**: set ``params["emissive_color_range"]``
+                  (pair of RGB lists) and/or ``params["emissive_intensity_range"]``
+                  (pair of floats).
+
+            env: The environment instance.
+
+        Raises:
+            ValueError: If both HDR and emissive params are provided, or if
+                neither is provided.
+        """
+        super().__init__(cfg, env)
+
+        has_hdr = cfg.params.get("path", None) is not None
+        has_emissive = (
+            cfg.params.get("emissive_color_range", None) is not None
+            or cfg.params.get("emissive_intensity_range", None) is not None
+        )
+
+        if has_hdr and has_emissive:
+            raise ValueError(
+                "randomize_indirect_lighting: 'path' (HDR mode) and emissive "
+                "parameters ('emissive_color_range', 'emissive_intensity_range') "
+                "are mutually exclusive. Configure only one mode."
+            )
+        if not has_hdr and not has_emissive:
+            raise ValueError(
+                "randomize_indirect_lighting: provide either 'path' for HDR "
+                "mode, or 'emissive_color_range'/'emissive_intensity_range' for "
+                "emissive mode."
+            )
+
+        # HDR mode state
+        self._hdr_files: list[Path] = []
+        if has_hdr:
+            path = get_data_path(cfg.params["path"])
+            self._hdr_files = sorted(Path(path).glob("*.hdr"))
+            if not self._hdr_files:
+                logger.log_warning(
+                    f"No .hdr files found in '{path}'. "
+                    f"Indirect lighting randomization will be a no-op."
+                )
+
+        # Emissive mode state
+        self._emissive_color_range: tuple[list[float], list[float]] | None = (
+            cfg.params.get("emissive_color_range", None)
+        )
+        self._emissive_intensity_range: tuple[float, float] | None = cfg.params.get(
+            "emissive_intensity_range", None
+        )
+
+    def __call__(
+        self,
+        env: EmbodiedEnv,
+        env_ids: Union[torch.Tensor, None],
+        path: str | None = None,
+    ) -> None:
+        """Randomize lighting according to the configured mode.
+
+        In HDR mode a random ``.hdr`` file is selected and applied. In emissive
+        mode the emissive color and/or intensity are sampled and applied.
+
+        Args:
+            env: The environment instance.
+            env_ids: Target environment IDs (unused — lighting is global).
+            path: Ignored. Kept for interface compatibility with the event system.
+        """
+        if self._hdr_files:
+            # HDR mode
+            selected = random.choice(self._hdr_files)
+            env.sim.set_indirect_lighting(str(selected))
+            return
+
+        # Emissive mode
+        emissive_color: list[float] | None = None
+        if self._emissive_color_range is not None:
+            color_tensor = sample_uniform(
+                lower=torch.tensor(self._emissive_color_range[0]),
+                upper=torch.tensor(self._emissive_color_range[1]),
+                size=(1, 3),
+            )
+            emissive_color = color_tensor.squeeze(0).tolist()
+
+        emissive_intensity: float | None = None
+        if self._emissive_intensity_range is not None:
+            emissive_intensity = float(
+                np.random.uniform(
+                    self._emissive_intensity_range[0],
+                    self._emissive_intensity_range[1],
+                )
+            )
+
+        env.sim.set_emission_light(color=emissive_color, intensity=emissive_intensity)
diff --git a/embodichain/lab/gym/envs/managers/record.py b/embodichain/lab/gym/envs/managers/record.py
index 7c07ecfd..370645a3 100644
--- a/embodichain/lab/gym/envs/managers/record.py
+++ b/embodichain/lab/gym/envs/managers/record.py
@@ -80,8 +80,7 @@ def __init__(self, cfg: FunctorCfg, env: EmbodiedEnv):
         )
 
         # Add this camera's group ID to the environment for batch rendering when RT is enabled.
-        if getattr(env.sim, "is_rt_enabled", False):
-            env.add_camera_group_id(self.camera.group_id)
+        env.add_camera_group_id(self.camera.group_id)
 
         self._save_path = cfg.params.get("save_path", "./outputs/videos")
         self._current_episode = 0
@@ -158,7 +157,7 @@ def __call__(
         max_env_num: int = 16,
         save_path: str = "./outputs/videos",
     ):
-        self.camera.update(fetch_only=self.camera.is_rt_enabled)
+        self.camera.update(fetch_only=True)
         data = self.camera.get_data()
         rgb = data["color"]
 
@@ -199,7 +198,7 @@ def __call__(
         max_env_num: int = 16,
         save_path: str = "./outputs/videos",
     ):
-        self.camera.update(fetch_only=self.camera.is_rt_enabled)
+        self.camera.update(fetch_only=True)
         data = self.camera.get_data()
         rgb = data["color"]  # shape: (num_envs, H, W, 4)
         if isinstance(rgb, torch.Tensor):
diff --git a/embodichain/lab/gym/envs/tasks/tableware/pour_water/action_bank.py b/embodichain/lab/gym/envs/tasks/tableware/pour_water/action_bank.py
index 20c8a2d7..1a467133 100644
--- a/embodichain/lab/gym/envs/tasks/tableware/pour_water/action_bank.py
+++ b/embodichain/lab/gym/envs/tasks/tableware/pour_water/action_bank.py
@@ -42,7 +42,6 @@
 )
 from embodichain.utils import logger
 
-
 __all__ = ["PourWaterActionBank"]
 
 
diff --git a/embodichain/lab/gym/utils/gym_utils.py b/embodichain/lab/gym/utils/gym_utils.py
index 0a1e2033..fc9a5ffe 100644
--- a/embodichain/lab/gym/utils/gym_utils.py
+++ b/embodichain/lab/gym/utils/gym_utils.py
@@ -737,7 +737,7 @@ def add_env_launcher_args_to_parser(parser: argparse.ArgumentParser) -> None:
         --num_envs: Number of environments to run in parallel (default: 1)
         --device: Device to run the environment on (default: 'cpu')
         --headless: Whether to perform the simulation in headless mode (default: False)
-        --enable_rt: Whether to use RTX rendering backend for the simulation (default: False)
+        --renderer: Renderer backend to use for the simulation. Options are 'hybrid', 'fast-rt', and 'rt'. (default: 'hybrid')
         --gpu_id: The GPU ID to use for the simulation (default: 0)
         --gym_config: Path to gym config file (default: '')
         --action_config: Path to action config file (default: None)
@@ -769,18 +769,19 @@ def add_env_launcher_args_to_parser(parser: argparse.ArgumentParser) -> None:
         default=False,
         action="store_true",
     )
+    parser.add_argument(
+        "--renderer",
+        type=str,
+        choices=["hybrid", "fast-rt", "rt"],
+        default="hybrid",
+        help="Renderer backend to use for the simulation.",
+    )
     parser.add_argument(
         "--arena_space",
         help="The size of the arena space.",
         default=5.0,
         type=float,
     )
-    parser.add_argument(
-        "--enable_rt",
-        help="Whether to use RTX rendering backend for the simulation.",
-        default=False,
-        action="store_true",
-    )
     parser.add_argument(
         "--gpu_id",
         help="The GPU ID to use for the simulation.",
@@ -792,7 +793,7 @@ def add_env_launcher_args_to_parser(parser: argparse.ArgumentParser) -> None:
         type=str,
         help="Path to gym config file.",
         default="",
-        required=True,
+        required=False,
     )
     parser.add_argument(
         "--action_config", type=str, help="Path to action config file.", default=None
@@ -833,7 +834,7 @@ def merge_args_with_gym_config(args: argparse.Namespace, gym_config: dict) -> di
     merged_config["num_envs"] = args.num_envs
     merged_config["device"] = args.device
     merged_config["headless"] = args.headless
-    merged_config["enable_rt"] = args.enable_rt
+    merged_config["renderer"] = args.renderer
     merged_config["gpu_id"] = args.gpu_id
     merged_config["arena_space"] = args.arena_space
     return merged_config
@@ -854,6 +855,7 @@ def build_env_cfg_from_args(
     from embodichain.utils.utility import load_json
     from embodichain.lab.gym.envs import EmbodiedEnvCfg
     from embodichain.lab.sim import SimulationManagerCfg
+    from embodichain.lab.sim.cfg import RenderCfg
 
     gym_config = load_json(args.gym_config)
     gym_config = merge_args_with_gym_config(args, gym_config)
@@ -876,7 +878,7 @@ def build_env_cfg_from_args(
     cfg.sim_cfg = SimulationManagerCfg(
         headless=gym_config["headless"],
         sim_device=gym_config["device"],
-        enable_rt=gym_config["enable_rt"],
+        render_cfg=RenderCfg(renderer=gym_config["renderer"]),
         gpu_id=gym_config["gpu_id"],
         arena_space=gym_config["arena_space"],
     )
diff --git a/embodichain/lab/scripts/preview_asset.py b/embodichain/lab/scripts/preview_asset.py
index 472dca87..49c86de5 100644
--- a/embodichain/lab/scripts/preview_asset.py
+++ b/embodichain/lab/scripts/preview_asset.py
@@ -34,6 +34,16 @@
     python -m embodichain.lab.scripts.preview_asset \\
         --asset_path /path/to/asset.usda \\
         --headless
+
+    # Preview with a built-in environment map
+    python -m embodichain.lab.scripts.preview_asset \\
+        --asset_path /path/to/sugar_box.usda \\
+        --env_map "Studio"
+
+    # Preview with a custom HDR environment map
+    python -m embodichain.lab.scripts.preview_asset \\
+        --asset_path /path/to/sugar_box.usda \\
+        --env_map /path/to/environment.hdr
 """
 
 from __future__ import annotations
@@ -58,12 +68,13 @@ def build_sim_cfg(args: argparse.Namespace):
     Returns:
         SimulationManagerCfg: Simulation configuration.
     """
+    from embodichain.lab.sim.cfg import RenderCfg
     from embodichain.lab.sim.sim_manager import SimulationManagerCfg
 
     return SimulationManagerCfg(
         headless=args.headless,
-        enable_rt=args.enable_rt,
         sim_device=args.sim_device,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
 
@@ -88,9 +99,6 @@ def load_assets(sim: SimulationManager, args: argparse.Namespace):
     )
     from embodichain.lab.sim.shapes import MeshCfg
 
-    # --- light -----------------------------------------------------------
-    sim.set_emission_light(intensity=150)
-
     asset_paths = args.asset_path
     init_pos = tuple(args.init_pos)
     init_rot = tuple(args.init_rot)
@@ -210,6 +218,10 @@ def main(args: argparse.Namespace) -> None:
     sim = SimulationManager(sim_cfg)
 
     try:
+        if args.env_map:
+            log_info(f"Setting environment map: {args.env_map} ...", color="green")
+            sim.set_indirect_lighting(args.env_map)
+
         assets = load_assets(sim, args)
         log_info(f"Loaded {len(assets)} asset(s) successfully.", color="green")
 
@@ -286,7 +298,7 @@ def cli():
         "--body_type",
         type=str,
         choices=["dynamic", "kinematic", "static"],
-        default="kinematic",
+        default="dynamic",
         help="Body type for rigid objects (default: kinematic).",
     )
     parser.add_argument(
@@ -314,10 +326,20 @@ def cli():
         help="Run without rendering window.",
     )
     parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing.",
+        "--renderer",
+        type=str,
+        choices=["hybrid", "fast-rt", "rt"],
+        default="hybrid",
+        help="Renderer backend (default: hybrid).",
+    )
+    parser.add_argument(
+        "--env_map",
+        type=str,
+        default=None,
+        help=(
+            "Environment map for indirect lighting. Accepts a built-in IBL resource "
+            "name (e.g. 'Studio') or an absolute file path (.hdr/.png/.exr)."
+        ),
     )
     parser.add_argument(
         "--preview",
diff --git a/embodichain/lab/scripts/run_agent.py b/embodichain/lab/scripts/run_agent.py
index 912100ef..73c1eacd 100644
--- a/embodichain/lab/scripts/run_agent.py
+++ b/embodichain/lab/scripts/run_agent.py
@@ -27,7 +27,6 @@
 from embodichain.utils.logger import log_error
 from .run_env import main
 
-
 if __name__ == "__main__":
     np.set_printoptions(5, suppress=True)
     torch.set_printoptions(precision=5, sci_mode=False)
diff --git a/embodichain/lab/sim/atom_actions.py b/embodichain/lab/sim/atom_actions.py
index a60a6dbc..2abefea9 100644
--- a/embodichain/lab/sim/atom_actions.py
+++ b/embodichain/lab/sim/atom_actions.py
@@ -39,7 +39,6 @@
     extract_drive_calls,
 )
 
-
 """
 --------------------------------------------Atom action functions----------------------------------------------------
 --------------------------------------------Atom action functions----------------------------------------------------
diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
new file mode 100644
index 00000000..cf1e60ce
--- /dev/null
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -0,0 +1,67 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Atomic action abstraction layer for embodied AI motion generation.
+
+This module provides a unified interface for atomic actions like reach, grasp,
+move, etc., with support for semantic object understanding and extensible
+custom action registration.
+"""
+
+from .core import (
+    Affordance,
+    AntipodalAffordance,
+    InteractionPoints,
+    ObjectSemantics,
+    ActionCfg,
+    AtomicAction,
+)
+from .actions import (
+    MoveAction,
+    PickUpAction,
+    PlaceAction,
+    MoveActionCfg,
+    PickUpActionCfg,
+    PlaceActionCfg,
+)
+from .engine import (
+    AtomicActionEngine,
+    register_action,
+    unregister_action,
+    get_registered_actions,
+)
+
+__all__ = [
+    # Core classes
+    "Affordance",
+    "GraspPose",
+    "InteractionPoints",
+    "ObjectSemantics",
+    "ActionCfg",
+    "AtomicAction",
+    # Action implementations
+    "MoveAction",
+    "PickUpAction",
+    "PlaceAction",
+    "MoveActionCfg",
+    "PickUpActionCfg",
+    "PlaceActionCfg",
+    # Engine
+    "AtomicActionEngine",
+    "register_action",
+    "unregister_action",
+    "get_registered_actions",
+]
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
new file mode 100644
index 00000000..4f2698de
--- /dev/null
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -0,0 +1,634 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import torch
+from typing import Optional, Union, TYPE_CHECKING, Any
+
+from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType
+from embodichain.lab.sim.planners.motion_generator import MotionGenOptions
+from embodichain.lab.sim.planners.toppra_planner import ToppraPlanOptions
+from .core import AtomicAction, ObjectSemantics, AntipodalAffordance, ActionCfg
+from embodichain.utils import logger
+from embodichain.utils import configclass
+from embodichain.lab.sim.utility.action_utils import interpolate_with_distance
+import numpy as np
+
+if TYPE_CHECKING:
+    from embodichain.lab.sim.planners import MotionGenerator
+    from embodichain.lab.sim.objects import Robot
+
+
+@configclass
+class MoveActionCfg(ActionCfg):
+    name: str = "move"
+    """Name of the action, used for identification and logging."""
+
+    sample_interval: int = 50
+    """Number of waypoints to sample for the motion trajectory. Should be large enough to ensure smooth motion, but not too large to cause unnecessary computation overhead."""
+
+
+@configclass
+class GraspActionCfg(MoveActionCfg):
+    """Shared configuration for actions that involve gripper open/close motions."""
+
+    hand_open_qpos: torch.Tensor | None = None
+    """[hand_dof,] of float. Joint positions for open hand state."""
+
+    hand_close_qpos: torch.Tensor | None = None
+    """[hand_dof,] of float. Joint positions for closed hand state."""
+
+    hand_control_part: str = "hand"
+    """Name of the robot part that controls the hand joints."""
+
+    lift_height: float = 0.1
+    """Height (m) to lift the end-effector after the gripper phase."""
+
+    sample_interval: int = 80
+    """Number of waypoints for the full trajectory (approach + hand + lift/back)."""
+
+    hand_interp_steps: int = 5
+    """Number of waypoints for the gripper open/close interpolation phase."""
+
+
+class MoveAction(AtomicAction):
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: MoveActionCfg | None = None,
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        super().__init__(
+            motion_generator, cfg=cfg if cfg is not None else MoveActionCfg()
+        )
+
+        self.n_envs = self.robot.get_qpos().shape[0]
+        self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
+        self.dof = len(self.arm_joint_ids)
+
+    def _resolve_pose_target(
+        self,
+        target: Union[ObjectSemantics, torch.Tensor],
+        *,
+        action_name: str,
+    ) -> tuple[bool, torch.Tensor]:
+        """Resolve a pose target into a batched homogeneous transform tensor."""
+        if isinstance(target, ObjectSemantics):
+            logger.log_error(
+                f"{action_name} currently does not support ObjectSemantics target. "
+                f"Please provide target pose as torch.Tensor of shape (4, 4) or "
+                f"(n_envs, 4, 4)",
+                NotImplementedError,
+            )
+        if not isinstance(target, torch.Tensor):
+            logger.log_error(
+                "Target must be either ObjectSemantics or torch.Tensor of shape "
+                f"(4, 4) or ({self.n_envs}, 4, 4)",
+                TypeError,
+            )
+
+        if target.shape == (4, 4):
+            target = target.unsqueeze(0).repeat(self.n_envs, 1, 1)
+        if target.shape != (self.n_envs, 4, 4):
+            logger.log_error(
+                f"Target tensor must have shape (4, 4) or ({self.n_envs}, 4, 4), but got {target.shape}",
+                ValueError,
+            )
+        return True, target
+
+    def _resolve_start_qpos(
+        self,
+        start_qpos: Optional[torch.Tensor],
+        arm_dof: Optional[int] = None,
+    ) -> torch.Tensor:
+        """Resolve planning start joint positions into batched arm joint positions."""
+        arm_dof = self.dof if arm_dof is None else arm_dof
+        if start_qpos is None:
+            start_qpos = self.robot.get_qpos(name=self.cfg.control_part)
+        if start_qpos.shape == (arm_dof,):
+            start_qpos = start_qpos.unsqueeze(0).repeat(self.n_envs, 1)
+        if start_qpos.shape != (self.n_envs, arm_dof):
+            logger.log_error(
+                f"start_qpos must have shape ({self.n_envs}, {arm_dof}), but got {start_qpos.shape}",
+                ValueError,
+            )
+        return start_qpos
+
+    def _compute_three_phase_waypoints(
+        self,
+        hand_interp_steps: int,
+        *,
+        first_phase_name: str,
+        third_phase_name: str,
+        first_phase_ratio: float = 0.6,
+    ) -> tuple[int, int, int]:
+        """Split total sample interval into motion, hand interpolation, and motion phases."""
+        first_phase_waypoint = int(
+            np.round(self.cfg.sample_interval - hand_interp_steps) * first_phase_ratio
+        )
+        if first_phase_waypoint < 2:
+            logger.log_error(
+                f"Not enough waypoints for {first_phase_name} trajectory. "
+                "Please increase sample_interval or decrease hand_interp_steps.",
+                ValueError,
+            )
+        second_phase_waypoint = hand_interp_steps
+        third_phase_waypoint = (
+            self.cfg.sample_interval - first_phase_waypoint - second_phase_waypoint
+        )
+        if third_phase_waypoint < 2:
+            logger.log_error(
+                f"Not enough waypoints for {third_phase_name} trajectory. "
+                "Please increase sample_interval or decrease hand_interp_steps.",
+                ValueError,
+            )
+        return first_phase_waypoint, second_phase_waypoint, third_phase_waypoint
+
+    def _build_motion_gen_options(
+        self,
+        start_qpos: torch.Tensor,
+        sample_interval: int,
+    ) -> MotionGenOptions:
+        """Build default motion generation options for an atomic action."""
+        return MotionGenOptions(
+            start_qpos=start_qpos[0],
+            control_part=self.cfg.control_part,
+            is_interpolate=True,
+            is_linear=False,
+            interpolate_position_step=0.001,
+            plan_opts=ToppraPlanOptions(
+                sample_interval=sample_interval,
+            ),
+        )
+
+    def _plan_arm_trajectory(
+        self,
+        target_states_list: list[list[PlanState]],
+        start_qpos: torch.Tensor,
+        n_waypoints: int,
+        arm_dof: Optional[int] = None,
+    ) -> tuple[bool, torch.Tensor]:
+        """Plan batched arm trajectories for all environments."""
+        arm_dof = self.dof if arm_dof is None else arm_dof
+
+        n_state = len(target_states_list[0])
+        xpos_traj = torch.zeros(
+            size=(self.n_envs, n_state, 4, 4), dtype=torch.float32, device=self.device
+        )
+        for i, target_states in enumerate(target_states_list):
+            for j, target_state in enumerate(target_states):
+                # [env_i, state_j, 4, 4]
+                xpos_traj[i, j] = target_state.xpos
+
+        trajectory = torch.zeros(
+            size=(self.n_envs, n_state, arm_dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        qpos_seed = start_qpos
+        for j in range(n_state):
+            is_success, qpos = self.robot.compute_ik(
+                pose=xpos_traj[:, j], name=self.cfg.control_part, joint_seed=qpos_seed
+            )
+            if not is_success:
+                logger.log_warning(
+                    f"Failed to compute IK for target state {j} in some environments. "
+                    "The resulting trajectory may be invalid."
+                )
+                return False, trajectory
+            else:
+                trajectory[:, j] = qpos
+                qpos_seed = qpos
+        trajectory = torch.concatenate([start_qpos.unsqueeze(1), trajectory], dim=1)
+        interp_traj = interpolate_with_distance(
+            trajectory=trajectory, interp_num=n_waypoints, device=self.device
+        )
+        return True, interp_traj
+
+    def _interpolate_hand_qpos(
+        self,
+        start_hand_qpos: torch.Tensor,
+        end_hand_qpos: torch.Tensor,
+        n_waypoints: int,
+    ) -> torch.Tensor:
+        """Interpolate hand joint positions between two gripper states."""
+        weights = torch.linspace(0, 1, steps=n_waypoints, device=self.device)
+        hand_qpos_list = [
+            torch.lerp(start_hand_qpos, end_hand_qpos, weight) for weight in weights
+        ]
+        return torch.stack(hand_qpos_list, dim=0)
+
+    def execute(
+        self,
+        target: Union[ObjectSemantics, torch.Tensor],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """execute pick up action
+
+        Args:
+            target (ObjectSemantics): object semantics containing grasp affordance and entity information
+            start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None.
+
+        Returns:
+            tuple[bool, torch.Tensor, list[float]]:
+            is_success,
+            trajectory of shape (n_envs, n_waypoints, dof),
+            joint_ids corresponding to trajectory
+        """
+        is_success, move_xpos = self._resolve_pose_target(
+            target, action_name=self.__class__.__name__
+        )
+        start_qpos = self._resolve_start_qpos(start_qpos)
+
+        # TODO: warning and fallback if no valid grasp pose found
+        if not is_success:
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
+            return False, torch.empty(0), self.arm_joint_ids
+
+        target_states_list = [
+            [
+                PlanState(xpos=move_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_plan_success, trajectory = self._plan_arm_trajectory(
+            target_states_list, start_qpos, self.cfg.sample_interval
+        )
+        return is_plan_success, trajectory, self.arm_joint_ids
+
+    def validate(self, target, start_qpos=None, **kwargs):
+        # TODO: implement proper validation logic for pick up action
+        return True
+
+
+@configclass
+class PickUpActionCfg(GraspActionCfg):
+    name: str = "pick_up"
+    """Name of the action, used for identification and logging."""
+
+    pre_grasp_distance: float = 0.15
+    """Distance to offset back from the grasp pose along the approach direction to get
+    the pre-grasp pose. Should be large enough to avoid collision during approach."""
+
+    approach_direction: torch.Tensor = torch.tensor([0, 0, -1], dtype=torch.float32)
+    """Direction from which the gripper approaches the object for grasping, expressed
+    in the object local frame. Default [0, 0, -1] means approaching from above."""
+
+
+class PickUpAction(MoveAction):
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: PickUpActionCfg | None = None,
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        super().__init__(
+            motion_generator, cfg=cfg if cfg is not None else PickUpActionCfg()
+        )
+        self.cfg = cfg
+        self.approach_direction = self.cfg.approach_direction.to(self.device)
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error("hand_open_qpos must be specified in PickUpActionCfg")
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error("hand_close_qpos must be specified in PickUpActionCfg")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
+
+    def execute(
+        self,
+        target: Union[ObjectSemantics, torch.Tensor],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """execute pick up action
+
+        Args:
+            target (Union[ObjectSemantics, torch.Tensor]): target object semantics or target pose for grasping
+            start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None.
+
+        Returns:
+            tuple[bool, torch.Tensor, list[float]]:
+            is_success,
+            trajectory of shape (n_envs, n_waypoints, dof),
+            joint_ids corresponding to trajectory
+        """
+
+        # Resolve grasp pose
+        if isinstance(target, ObjectSemantics):
+            is_success, grasp_xpos, open_length = self._resolve_grasp_pose(target)
+        else:
+            is_success, grasp_xpos = self._resolve_pose_target(
+                target, action_name=self.__class__.__name__
+            )
+
+        # TODO: warning and fallback if no valid grasp pose found
+        if not is_success:
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
+            return False, torch.empty(0), self.joint_ids
+
+        # Compute pre-grasp pose
+        # TODO: only for parallel gripper, approach in negative grasp z direction
+        grasp_z = grasp_xpos[:, :3, 2]
+        pre_grasp_xpos = self._apply_offset(
+            pose=grasp_xpos,
+            offset=-grasp_z * self.cfg.pre_grasp_distance,
+        )
+        # Compute lift pose
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
+
+        # compute waypoint number for each phase
+        n_approach_waypoint, n_close_waypoint, n_lift_waypoint = (
+            self._compute_three_phase_waypoints(
+                self.cfg.hand_interp_steps,
+                first_phase_name="approach",
+                third_phase_name="lift",
+            )
+        )
+
+        # get pick trajectory
+        target_states_list = [
+            [
+                PlanState(xpos=pre_grasp_xpos[i], move_type=MoveType.EEF_MOVE),
+                PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        pick_trajectory = torch.zeros(
+            size=(self.n_envs, n_approach_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            start_qpos,
+            n_approach_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan approach trajectory.")
+            return False, pick_trajectory, self.joint_ids
+        pick_trajectory[:, :, : self.arm_dof] = plan_traj
+        # Padding hand open qpos to pick trajectory
+        pick_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+
+        # get hand closing trajectory
+        grasp_qpos = pick_trajectory[
+            :, -1, : self.arm_dof
+        ]  # Assuming the last point of pick trajectory is the grasp pose
+        hand_close_path = self._interpolate_hand_qpos(
+            self.hand_open_qpos,
+            self.hand_close_qpos,
+            n_close_waypoint,
+        )
+        hand_close_trajectory = torch.zeros(
+            size=(self.n_envs, n_close_waypoint, self.dof),
+            device=self.device,
+        )
+        hand_close_trajectory[:, :, : self.arm_dof] = grasp_qpos
+        hand_close_trajectory[:, :, self.arm_dof :] = hand_close_path
+
+        # get lift trajectory
+        lift_trajectory = torch.zeros(
+            size=(self.n_envs, n_lift_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        # lift_xpos = self._compute_lift_xpos(grasp_xpos)
+        lift_xpos = self._apply_offset(
+            pose=grasp_xpos,
+            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+        )
+        target_states_list = [
+            [
+                PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            grasp_qpos,
+            n_lift_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan lift trajectory.")
+            return False, lift_trajectory, self.joint_ids
+        lift_trajectory[:, :, : self.arm_dof] = plan_traj
+        # padding hand close qpos to lift trajectory
+        lift_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos
+
+        # concatenate trajectories
+        trajectory = torch.cat(
+            [pick_trajectory, hand_close_trajectory, lift_trajectory], dim=1
+        )
+        return True, trajectory, self.joint_ids
+
+    def _resolve_grasp_pose(
+        self, semantics: ObjectSemantics
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        if not isinstance(semantics.affordance, AntipodalAffordance):
+            logger.log_error(
+                "Grasp pose affordance must be of type AntipodalAffordance"
+            )
+        if semantics.entity is None:
+            logger.log_error(
+                "ObjectSemantics must be associated with an entity to get object pose"
+            )
+        obj_poses = semantics.entity.get_local_pose(to_matrix=True)
+
+        is_success, grasp_xpos, open_length = semantics.affordance.get_best_grasp_poses(
+            obj_poses=obj_poses, approach_direction=self.approach_direction
+        )
+        return is_success, grasp_xpos, open_length
+
+    def validate(self, target, start_qpos=None, **kwargs):
+        # TODO: implement proper validation logic for pick up action
+        return True
+
+
+@configclass
+class PlaceActionCfg(GraspActionCfg):
+    name: str = "place"
+    """Name of the action, used for identification and logging."""
+
+
+class PlaceAction(MoveAction):
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: PlaceActionCfg | None = None,
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        super().__init__(
+            motion_generator, cfg=cfg if cfg is not None else PlaceActionCfg()
+        )
+        self.cfg = cfg
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error("hand_open_qpos must be specified in PlaceActionCfg")
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error("hand_close_qpos must be specified in PlaceActionCfg")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
+
+    def execute(
+        self,
+        target: Union[ObjectSemantics, torch.Tensor],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """execute pick up action
+
+        Args:
+            target (ObjectSemantics): object semantics containing grasp affordance and entity information
+            start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None.
+
+        Returns:
+            tuple[bool, torch.Tensor, list[float]]:
+            is_success,
+            trajectory of shape (n_envs, n_waypoints, dof),
+            joint_ids corresponding to trajectory
+        """
+        is_success, place_xpos = self._resolve_pose_target(
+            target, action_name=self.__class__.__name__
+        )
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
+
+        # TODO: warning and fallback if no valid grasp pose found
+        if not is_success:
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
+            return False, torch.empty(0), self.joint_ids
+
+        # compute waypoint number for each phase
+        n_down_waypoint, n_open_waypoint, n_lift_waypoint = (
+            self._compute_three_phase_waypoints(
+                self.cfg.hand_interp_steps,
+                first_phase_name="approach",
+                third_phase_name="lift",
+            )
+        )
+
+        down_trajectory = torch.zeros(
+            size=(self.n_envs, n_down_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        lift_xpos = self._apply_offset(
+            pose=place_xpos,
+            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+        )
+        target_states_list = [
+            [
+                PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
+                PlanState(xpos=place_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            start_qpos,
+            n_down_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan down trajectory.")
+            return False, down_trajectory, self.joint_ids
+        down_trajectory[:, :, : self.arm_dof] = plan_traj
+        # Padding hand open qpos to pick trajectory
+        down_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos
+
+        # get hand closing trajectory
+        reach_qpos = down_trajectory[
+            :, -1, : self.arm_dof
+        ]  # Assuming the last point of pick trajectory is the grasp pose
+        hand_open_path = self._interpolate_hand_qpos(
+            self.hand_close_qpos,
+            self.hand_open_qpos,
+            n_open_waypoint,
+        )
+        hand_open_trajectory = torch.zeros(
+            size=(self.n_envs, n_open_waypoint, self.dof),
+            device=self.device,
+        )
+        hand_open_trajectory[:, :, : self.arm_dof] = reach_qpos
+        hand_open_trajectory[:, :, self.arm_dof :] = hand_open_path
+
+        # get lift trajectory
+        back_trajectory = torch.zeros(
+            size=(self.n_envs, n_lift_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        target_states_list = [
+            [
+                PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            reach_qpos,
+            n_lift_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan back trajectory.")
+            return False, back_trajectory, self.joint_ids
+        back_trajectory[:, :, : self.arm_dof] = plan_traj
+        # padding hand open qpos to back trajectory
+        back_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+
+        # concatenate trajectories
+        trajectory = torch.cat(
+            [down_trajectory, hand_open_trajectory, back_trajectory], dim=1
+        )
+        return True, trajectory, self.joint_ids
+
+    def validate(self, target, start_qpos=None, **kwargs):
+        # TODO: implement proper validation logic for pick up action
+        return True
diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py
new file mode 100644
index 00000000..08a22fc5
--- /dev/null
+++ b/embodichain/lab/sim/atomic_actions/core.py
@@ -0,0 +1,468 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import torch
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
+
+from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType
+from embodichain.utils import configclass
+
+from embodichain.toolkits.graspkit.pg_grasp import (
+    GraspGenerator,
+    GraspGeneratorCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
+    GripperCollisionCfg,
+)
+from embodichain.lab.sim.common import BatchEntity
+from embodichain.utils import logger
+
+if TYPE_CHECKING:
+    from embodichain.lab.sim.planners import MotionGenerator, MotionGenOptions
+    from embodichain.lab.sim.objects import Robot
+
+
+# =============================================================================
+# Affordance Classes
+# =============================================================================
+
+
+@dataclass
+class Affordance:
+    """Base class for affordance data.
+
+    Affordance represents interaction possibilities for an object.
+    This is the base class for specific affordance types.
+    """
+
+    object_label: str = ""
+    """Label of the object this affordance belongs to."""
+
+    geometry: Dict[str, Any] = field(default_factory=dict)
+    """Geometry dictionary shared with ObjectSemantics.
+
+    The mesh payload is expected to be stored in:
+    - ``mesh_vertices``: torch.Tensor with shape [N, 3]
+    - ``mesh_triangles``: torch.Tensor with shape [M, 3]
+    """
+
+    custom_config: Dict[str, Any] = field(default_factory=dict)
+    """User-defined configuration payload for affordance creation and usage."""
+
+    @property
+    def mesh_vertices(self) -> torch.Tensor | None:
+        """Get mesh vertices from geometry.
+
+        Returns:
+            Mesh vertices tensor [N, 3], or None if unavailable.
+
+        Raises:
+            TypeError: If ``mesh_vertices`` exists but is not a torch tensor.
+        """
+        vertices = self.geometry.get("mesh_vertices")
+        if vertices is None:
+            return None
+        if not isinstance(vertices, torch.Tensor):
+            raise TypeError("geometry['mesh_vertices'] must be a torch.Tensor")
+        return vertices
+
+    @property
+    def mesh_triangles(self) -> torch.Tensor | None:
+        """Get mesh triangles from geometry.
+
+        Returns:
+            Mesh triangle index tensor [M, 3], or None if unavailable.
+
+        Raises:
+            TypeError: If ``mesh_triangles`` exists but is not a torch tensor.
+        """
+        triangles = self.geometry.get("mesh_triangles")
+        if triangles is None:
+            return None
+        if not isinstance(triangles, torch.Tensor):
+            raise TypeError("geometry['mesh_triangles'] must be a torch.Tensor")
+        return triangles
+
+    def set_custom_config(self, key: str, value: Any) -> None:
+        """Set a custom affordance configuration value."""
+        self.custom_config[key] = value
+
+    def get_custom_config(self, key: str, default: Any = None) -> Any:
+        """Get a custom affordance configuration value."""
+        return self.custom_config.get(key, default)
+
+    def get_batch_size(self) -> int:
+        """Return the batch size of this affordance data."""
+        return 1
+
+
+@dataclass
+class AntipodalAffordance(Affordance):
+    generator: GraspGenerator | None = None
+    """Grasp generator instance, initialized lazily when needed."""
+
+    force_reannotate: bool = False
+    """Whether to force re-annotation of grasp generator on each access."""
+
+    is_draw_grasp_xpos: bool = False
+    """Whether to visualize grasp poses in the simulator."""
+
+    def _init_generator(self):
+        if (
+            self.geometry.get("mesh_vertices", None) is None
+            or self.geometry.get("mesh_triangles", None) is None
+        ):
+            logger.log_error(
+                "Mesh vertices and triangles must be provided in geometry to initialize AntipodalAffordance."
+            )
+        self.generator = GraspGenerator(
+            vertices=self.geometry.get("mesh_vertices"),
+            triangles=self.geometry.get("mesh_triangles"),
+            cfg=self.custom_config.get("generator_cfg", None),
+            gripper_collision_cfg=self.custom_config.get("gripper_collision_cfg", None),
+        )
+        if self.force_reannotate:
+            self.generator.annotate()
+        else:
+            if self.generator._hit_point_pairs is None:
+                self.generator.annotate()
+
+    def get_best_grasp_poses(
+        self,
+        obj_poses: torch.Tensor,
+        approach_direction: torch.Tensor = torch.tensor(
+            [0, 0, -1], dtype=torch.float32
+        ),
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        if self.generator is None:
+            self._init_generator()
+
+        grasp_xpos_list = []
+        is_success_list = []
+        open_length_list = []
+        for i, obj_pose in enumerate(obj_poses):
+            is_success, grasp_xpos, open_length = self.generator.get_grasp_poses(
+                obj_pose, approach_direction
+            )
+            if is_success:
+                grasp_xpos_list.append(grasp_xpos.unsqueeze(0))
+            else:
+                logger.log_warning(f"No valid grasp pose found for {i}-th object.")
+                grasp_xpos_list.append(
+                    torch.eye(
+                        4, dtype=torch.float32, device=self.generator.device
+                    ).unsqueeze(0)
+                )  # Default to identity pose if no grasp found
+            is_success_list.append(is_success)
+            open_length_list.append(open_length)
+        is_success = torch.tensor(
+            is_success_list, dtype=torch.bool, device=self.generator.device
+        )
+        grasp_xpos = torch.concatenate(grasp_xpos_list, dim=0)  # [B, 4, 4]
+        open_length = torch.tensor(
+            open_length_list, dtype=torch.float32, device=self.generator.device
+        )
+        if self.is_draw_grasp_xpos:
+            self._draw_grasp_xpos(grasp_xpos, open_length)
+        return is_success, grasp_xpos, open_length
+
+    def _draw_grasp_xpos(self, grasp_xpos: torch.Tensor, open_length: torch.Tensor):
+        sim = SimulationManager.get_instance()
+        axis_xpos = []
+        for i in range(grasp_xpos.shape[0]):
+            axis_xpos.append(grasp_xpos[i].to("cpu").numpy())
+        sim.draw_marker(
+            cfg=MarkerCfg(
+                name="grasp_xpos",
+                axis_xpos=axis_xpos,
+                axis_len=0.05,
+            )
+        )
+
+
+@dataclass
+class InteractionPoints(Affordance):
+    """Interaction points affordance containing a batch of 3D positions.
+
+    Interaction points define specific locations on an object surface
+    that can be used for contact-based interactions (pushing, poking,
+    touching) rather than full grasping.
+    """
+
+    points: torch.Tensor = field(default_factory=lambda: torch.zeros(1, 3))
+    """Batch of 3D interaction points with shape [B, 3].
+
+    Each point is a 3D coordinate in the object's local coordinate frame.
+    """
+
+    normals: torch.Tensor | None = None
+    """Optional surface normals at each interaction point with shape [B, 3].
+
+    Normals indicate the surface orientation at each point,
+    useful for determining approach directions.
+    """
+
+    point_types: List[str] = field(default_factory=list)
+    """Optional labels for each point's interaction type.
+
+    Examples: "push", "poke", "touch", "pinch"
+    """
+
+    def get_points_by_type(self, point_type: str) -> torch.Tensor | None:
+        """Get points by their interaction type.
+
+        Args:
+            point_type: Type of interaction (e.g., "push", "poke")
+
+        Returns:
+            Tensor of points if found, None otherwise
+        """
+        if point_type in self.point_types:
+            indices = [i for i, t in enumerate(self.point_types) if t == point_type]
+            return self.points[indices]
+        return None
+
+    def get_batch_size(self) -> int:
+        """Return the number of interaction points in this affordance."""
+        return self.points.shape[0]
+
+    def get_approach_direction(self, point_idx: int) -> torch.Tensor:
+        """Get recommended approach direction for a given point.
+
+        Args:
+            point_idx: Index of the point
+
+        Returns:
+            3D approach direction vector (normalized)
+        """
+        if self.normals is not None:
+            # Approach from the opposite direction of the surface normal
+            return -self.normals[point_idx]
+        # Default: approach from positive z
+        return torch.tensor(
+            [0, 0, 1], dtype=self.points.dtype, device=self.points.device
+        )
+
+
+# =============================================================================
+# ObjectSemantics
+# =============================================================================
+
+
+@dataclass
+class ObjectSemantics:
+    """Semantic information about interaction target.
+
+    This class encapsulates all semantic and geometric information about
+    an object needed for intelligent interaction planning.
+    """
+
+    affordance: Affordance
+    """Affordance data (GraspPose, InteractionPoints, etc.)."""
+
+    geometry: Dict[str, Any]
+    """Geometric information including bounding box, mesh data."""
+
+    properties: Dict[str, Any] = field(default_factory=dict)
+    """Physical properties: mass, friction, etc."""
+
+    label: str = "none"
+    """Object category label (e.g., 'apple', 'bottle')."""
+
+    entity: BatchEntity | None = None
+    """Optional reference to the underlying simulation entity representing this object."""
+
+    def __post_init__(self) -> None:
+        """Bind affordance metadata to this semantic object.
+
+        The affordance shares the same geometry dict instance as
+        ``ObjectSemantics.geometry`` so mesh tensors are authored in one place.
+        """
+        self.affordance.object_label = self.label
+        self.affordance.geometry = self.geometry
+
+
+# =============================================================================
+# ActionCfg and AtomicAction
+# =============================================================================
+
+
+@configclass
+class ActionCfg:
+    """Configuration for atomic actions."""
+
+    name: str = "default"
+    """Name of the action, used for identification and logging."""
+
+    control_part: str = "arm"
+    """Control part name for the action."""
+
+    interpolation_type: str = "linear"
+    """Interpolation type: 'linear', 'cubic'."""
+
+    velocity_limit: Optional[float] = None
+    """Optional velocity limit for the motion."""
+
+    acceleration_limit: Optional[float] = None
+    """Optional acceleration limit for the motion."""
+
+
+class AtomicAction(ABC):
+    """Abstract base class for atomic actions.
+
+    All atomic actions use PlanResult from embodichain.lab.sim.planners
+    as the return type for execute() method, ensuring consistency with
+    the existing motion planning infrastructure.
+    """
+
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: ActionCfg = ActionCfg(),
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        self.motion_generator = motion_generator
+        self.cfg = cfg
+        self.robot = motion_generator.robot
+        self.control_part = cfg.control_part
+        self.device = self.robot.device
+
+    @abstractmethod
+    def execute(
+        self,
+        target: Union[torch.Tensor, ObjectSemantics],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """execute pick up action
+
+        Args:
+            target (ObjectSemantics): object semantics containing grasp affordance and entity information
+            start_qpos (Optional[torch.Tensor], optional): Planning start qpos. Defaults to None.
+
+        Returns:
+            tuple[bool, torch.Tensor, list[float]]:
+            is_success,
+            trajectory of shape (n_envs, n_waypoints, dof),
+            joint_ids corresponding to trajectory
+        """
+
+    @abstractmethod
+    def validate(
+        self,
+        target: Union[torch.Tensor, ObjectSemantics],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> bool:
+        """Validate if the action is feasible without executing.
+
+        This method performs a quick feasibility check (e.g., IK solvability)
+        without generating a full trajectory.
+
+        Returns:
+            True if action appears feasible, False otherwise
+        """
+        pass
+
+    def _ik_solve(
+        self, target_pose: torch.Tensor, qpos_seed: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        """Solve IK for target pose.
+
+        Args:
+            target_pose: Target pose [4, 4]
+            qpos_seed: Seed configuration [DOF]
+
+        Returns:
+            Joint configuration [DOF]
+
+        Raises:
+            RuntimeError: If IK fails to find a solution
+        """
+        if qpos_seed is None:
+            qpos_seed = self.robot.get_qpos()
+
+        success, qpos = self.robot.compute_ik(
+            pose=target_pose.unsqueeze(0),
+            qpos_seed=qpos_seed.unsqueeze(0),
+            name=self.control_part,
+        )
+
+        if not success.all():
+            raise RuntimeError(f"IK failed for target pose: {target_pose}")
+
+        return qpos.squeeze(0)
+
+    def _fk_compute(self, qpos: torch.Tensor) -> torch.Tensor:
+        """Compute forward kinematics.
+
+        Args:
+            qpos: Joint configuration [DOF] or [B, DOF]
+
+        Returns:
+            End-effector pose [4, 4] or [B, 4, 4]
+        """
+        if qpos.dim() == 1:
+            qpos = qpos.unsqueeze(0)
+
+        xpos = self.robot.compute_fk(
+            qpos=qpos,
+            name=self.control_part,
+            to_matrix=True,
+        )
+
+        return xpos.squeeze(0) if xpos.shape[0] == 1 else xpos
+
+    def _apply_offset(self, pose: torch.Tensor, offset: torch.Tensor) -> torch.Tensor:
+        """Apply offset to pose in local frame.
+
+        Args:
+            pose: Base pose [N, 4, 4]
+            offset: Offset in local frame [N, 3] or [3]
+
+        Returns:
+            Pose with offset applied [N, 4, 4]
+        """
+        if not len(pose.shape) == 3 or pose.shape[1:] != (4, 4):
+            logger.log_error("pose must have shape [N, 4, 4]")
+        if len(offset.shape) == 1:
+            offset = offset.unsqueeze(0)
+        if not len(offset.shape) == 2 or offset.shape[1] != 3:
+            logger.log_error("offset must have shape [N, 3] or [3]")
+        result = pose.clone()
+        result[:, :3, 3] += offset
+        return result
+
+    def plan_trajectory(
+        self,
+        target_states: List[PlanState],
+        options: Optional["MotionGenOptions"] = None,
+    ) -> "PlanResult":
+        """Plan trajectory using motion generator."""
+        from embodichain.lab.sim.planners import MotionGenOptions
+
+        if options is None:
+            options = MotionGenOptions(control_part=self.control_part)
+        return self.motion_generator.generate(target_states, options)
diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py
new file mode 100644
index 00000000..15b868a8
--- /dev/null
+++ b/embodichain/lab/sim/atomic_actions/engine.py
@@ -0,0 +1,340 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import torch
+from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
+
+from embodichain.lab.sim.planners import PlanResult
+from embodichain.utils import logger
+from .core import AtomicAction, ObjectSemantics, ActionCfg
+
+if TYPE_CHECKING:
+    from embodichain.lab.sim.planners import MotionGenerator
+    from embodichain.lab.sim.objects import Robot
+
+
+# =============================================================================
+# Global Action Registry
+# =============================================================================
+
+_global_action_registry: Dict[str, Type[AtomicAction]] = {}
+_global_action_configs: Dict[str, Type[ActionCfg]] = {}
+
+
+def register_action(
+    name: str,
+    action_class: Type[AtomicAction],
+    config_class: Optional[Type[ActionCfg]] = None,
+) -> None:
+    """Register a custom atomic action class globally.
+
+    This function allows registration of custom action types that can then
+    be instantiated by the AtomicActionEngine.
+
+    Args:
+        name: Unique identifier for the action type
+        action_class: The AtomicAction subclass to register
+        config_class: Optional configuration class for the action
+
+    Example:
+        >>> class MyCustomAction(AtomicAction):
+        ...     def execute(self, target, **kwargs):
+        ...         # Implementation
+        ...         pass
+        ...     def validate(self, target, **kwargs):
+        ...         return True
+        >>> register_action("my_custom", MyCustomAction)
+    """
+    _global_action_registry[name] = action_class
+    if config_class is not None:
+        _global_action_configs[name] = config_class
+
+
+def unregister_action(name: str) -> None:
+    """Unregister an action type.
+
+    Args:
+        name: The action type identifier to remove
+    """
+    _global_action_registry.pop(name, None)
+    _global_action_configs.pop(name, None)
+
+
+def get_registered_actions() -> Dict[str, Type[AtomicAction]]:
+    """Get all registered action types.
+
+    Returns:
+        Dictionary mapping action names to their classes
+    """
+    return _global_action_registry.copy()
+
+
+# =============================================================================
+# Semantic Analyzer
+# =============================================================================
+
+
+class SemanticAnalyzer:
+    """Analyzes objects and provides ObjectSemantics for atomic actions."""
+
+    def __init__(self):
+        self._object_cache: Dict[str, ObjectSemantics] = {}
+
+    def analyze(
+        self,
+        label: str,
+        geometry: Optional[Dict[str, Any]] = None,
+        custom_config: Optional[Dict[str, Any]] = None,
+        use_cache: bool = True,
+    ) -> ObjectSemantics:
+        """Analyze object by label and return ObjectSemantics.
+
+        This is a placeholder implementation that should be extended
+        with actual object detection and affordance computation.
+
+        Args:
+            label: Object category label (e.g., "apple", "bottle")
+            geometry: Optional geometry payload. Can include mesh tensors:
+                ``mesh_vertices`` [N, 3] and ``mesh_triangles`` [M, 3].
+            custom_config: Optional user-defined affordance configuration.
+            use_cache: Whether to use cached semantics when available.
+
+        Returns:
+            ObjectSemantics containing affordance data
+        """
+        # Only use cache for default analyze path
+        if (
+            use_cache
+            and geometry is None
+            and custom_config is None
+            and label in self._object_cache
+        ):
+            return self._object_cache[label]
+
+        # Create default semantics (placeholder implementation)
+        from .core import AntipodalAffordance
+
+        # Generate default grasp poses based on object type
+        default_poses = torch.eye(4).unsqueeze(0)
+        default_poses[0, 2, 3] = 0.1  # Default offset
+
+        default_geometry: Dict[str, Any] = {"bounding_box": [0.1, 0.1, 0.1]}
+        if geometry is not None:
+            default_geometry.update(geometry)
+
+        grasp_affordance = AntipodalAffordance(
+            object_label=label,
+            custom_config=custom_config or {},
+        )
+
+        semantics = ObjectSemantics(
+            label=label,
+            affordance=grasp_affordance,
+            geometry=default_geometry,
+            properties={"mass": 1.0, "friction": 0.5},
+        )
+
+        # Cache only default path
+        if use_cache and geometry is None and custom_config is None:
+            self._object_cache[label] = semantics
+        return semantics
+
+    def clear_cache(self) -> None:
+        """Clear the object semantics cache."""
+        self._object_cache.clear()
+
+
+# =============================================================================
+# Atomic Action Engine
+# =============================================================================
+
+
+class AtomicActionEngine:
+    """Central engine for managing and executing atomic actions."""
+
+    def __init__(
+        self,
+        motion_generator: "MotionGenerator",
+        actions_cfg_list: Optional[List[ActionCfg]] = None,
+    ):
+        self.motion_generator = motion_generator
+        self.robot = self.motion_generator.robot
+        self.device = self.motion_generator.device
+
+        # Semantic analyzer for object understanding
+        self._semantic_analyzer = SemanticAnalyzer()
+
+        # Initialize default actions
+        self._actions: Dict[str, AtomicAction] = self._init_actions(actions_cfg_list)
+
+    def _init_actions(
+        self, actions_cfg_list: Optional[List[ActionCfg]] = None
+    ) -> Dict[str, "AtomicAction"]:
+        actions: Dict[str, AtomicAction] = {}
+        from .actions import MoveAction, PickUpAction, PlaceAction
+
+        builtin_action_map: Dict[str, Type[AtomicAction]] = {
+            "move": MoveAction,
+            "pick_up": PickUpAction,
+            "place": PlaceAction,
+        }
+        if actions_cfg_list is not None:
+            for cfg in actions_cfg_list:
+                action_class = builtin_action_map.get(
+                    cfg.name
+                ) or _global_action_registry.get(cfg.name)
+                if action_class is None:
+                    logger.log_error(f"Unknown action name in config: {cfg.name}")
+                    continue
+                instance = action_class(motion_generator=self.motion_generator, cfg=cfg)
+                actions[cfg.name] = instance
+        return actions
+
+    def execute_static(
+        self,
+        target_list: List[Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]]],
+    ) -> tuple[bool, torch.Tensor]:
+        """Execute a sequence of actions to target poses.
+
+        Each element in ``target_list`` corresponds to an action in the order they
+        were registered via ``actions_cfg_list``.
+        """
+        action_names = list(self._actions.keys())
+        if len(target_list) != len(action_names):
+            logger.log_error(
+                f"Length of target_list ({len(target_list)}) must match number of actions ({len(action_names)})."
+            )
+        start_qpos = self.motion_generator.robot.get_qpos()
+        n_envs = start_qpos.shape[0]
+        all_dof = self.motion_generator.robot.dof
+        all_trajectory = torch.empty(
+            size=(n_envs, 0, all_dof), dtype=torch.float32, device=self.device
+        )
+
+        for action_name, target in zip(action_names, target_list):
+            atom_action = self._actions[action_name]
+            target = self._resolve_target(target)
+            control_part = atom_action.control_part
+            arm_joint_ids = self.motion_generator.robot.get_joint_ids(name=control_part)
+            start_qpos_part = start_qpos[:, arm_joint_ids]
+            is_success, traj, joint_ids = atom_action.execute(
+                target=target, start_qpos=start_qpos_part
+            )
+            if not is_success:
+                return False, all_trajectory
+            n_waypoints = traj.shape[1]
+
+            traj_full = torch.zeros(
+                size=(n_envs, n_waypoints, all_dof),
+                dtype=torch.float32,
+                device=self.device,
+            )
+            traj_full[:, :] = start_qpos
+            traj_full[:, :, joint_ids] = traj
+            all_trajectory = torch.cat((all_trajectory, traj_full), dim=1)
+            # update start qpos for the next action
+            start_qpos[:, joint_ids] = traj[:, -1, :]
+        return True, all_trajectory
+
+    def validate(
+        self,
+        action_name: str,
+        target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]],
+        **kwargs,
+    ) -> bool:
+        """Validate if a named action is feasible without executing."""
+        if action_name not in self._actions:
+            logger.log_warning(f"Action '{action_name}' is not registered.")
+            return False
+
+        action = self._actions[action_name]
+        target = self._resolve_target(target)
+        return action.validate(target, **kwargs)
+
+    def _resolve_target(
+        self,
+        target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]],
+    ) -> Union[torch.Tensor, ObjectSemantics]:
+        """Resolve user target input into tensor pose or ObjectSemantics.
+
+        Supports the convenience dict format in ``execute`` and ``validate``.
+        """
+        if isinstance(target, torch.Tensor):
+            return target
+
+        if isinstance(target, ObjectSemantics):
+            return target
+
+        if isinstance(target, str):
+            return self._semantic_analyzer.analyze(target)
+
+        if isinstance(target, dict):
+            if "pose" in target:
+                pose = target["pose"]
+                if not isinstance(pose, torch.Tensor):
+                    raise TypeError("target['pose'] must be a torch.Tensor")
+                return pose
+
+            if "semantics" in target:
+                semantics = target["semantics"]
+                if not isinstance(semantics, ObjectSemantics):
+                    raise TypeError(
+                        "target['semantics'] must be an ObjectSemantics instance"
+                    )
+                return semantics
+
+            label = target.get("label")
+            if label is None:
+                raise ValueError(
+                    "Dict target must provide 'label', or use 'pose'/'semantics'."
+                )
+            if not isinstance(label, str):
+                raise TypeError("target['label'] must be a string")
+
+            geometry = target.get("geometry")
+            custom_config = target.get("custom_config")
+            use_cache = target.get("use_cache", True)
+
+            semantics = self._semantic_analyzer.analyze(
+                label=label,
+                geometry=geometry,
+                custom_config=custom_config,
+                use_cache=use_cache,
+            )
+
+            properties = target.get("properties")
+            if properties is not None:
+                semantics.properties.update(properties)
+
+            uid = target.get("uid")
+            if uid is not None:
+                semantics.uid = uid
+
+            return semantics
+
+        raise TypeError(
+            "target must be torch.Tensor, str, ObjectSemantics, or Dict[str, Any]"
+        )
+
+    def get_semantic_analyzer(self) -> SemanticAnalyzer:
+        """Get the semantic analyzer for object understanding."""
+        return self._semantic_analyzer
+
+    def set_semantic_analyzer(self, analyzer: SemanticAnalyzer) -> None:
+        """Set a custom semantic analyzer."""
+        self._semantic_analyzer = analyzer
diff --git a/embodichain/lab/sim/cfg.py b/embodichain/lab/sim/cfg.py
index 72a755f2..fd653d35 100644
--- a/embodichain/lab/sim/cfg.py
+++ b/embodichain/lab/sim/cfg.py
@@ -23,6 +23,7 @@
 from dataclasses import field, MISSING
 
 from dexsim.types import (
+    Renderer,
     PhysicalAttr,
     ActorType,
     AxisArrowType,
@@ -40,6 +41,40 @@
 
 from .shapes import ShapeCfg, MeshCfg
 
+# Global default renderer settings for simulation
+DEFAULT_RENDERER: Literal["hybrid", "fast-rt", "rt"] = "hybrid"
+
+
+@configclass
+class RenderCfg:
+    renderer: Literal["hybrid", "fast-rt", "rt"] = "hybrid"
+    """Renderer backend to use for the simulation. Options are 'hybrid', 'fast-rt', and 'rt'.
+
+    Note:
+    - 'hybrid' uses ray tracing for shadows and reflections while keeping rasterization for primary rendering,
+        providing a balance between performance and visual quality.
+    - 'fast-rt' is a fully ray-traced renderer for maximum visual fidelity, but may have higher computational cost.
+    - 'rt' is an offline ray-traced renderer for maximum visual fidelity, suitable for high-quality rendering tasks.
+    """
+
+    enable_denoiser: bool = True
+    """Whether to enable denoising. Only valid when renderer is 'hybrid' or 'fast-rt'."""
+
+    spp: int = 64
+    """Samples per pixel for ray tracing rendering. This parameter is only valid when renderer is 'hybrid' or 'fast-rt' and enable_denoiser is False."""
+
+    def to_dexsim_flags(self):
+        if self.renderer == "hybrid":
+            return Renderer.HYBRID
+        elif self.renderer == "fast-rt":
+            return Renderer.FASTRT
+        elif self.renderer == "rt":
+            return Renderer.OFFLINERT
+        else:
+            logger.log_error(
+                f"Invalid renderer type '{self.renderer}' specified. Must be one of 'hybrid', 'fast-rt', or 'rt'."
+            )
+
 
 @configclass
 class PhysicsCfg:
@@ -126,6 +161,43 @@ class MarkerCfg:
     """Index of the arena where the marker should be placed. -1 means all arenas."""
 
 
+@configclass
+class WindowRecordCfg:
+    """Configuration for interactive viewer window recording."""
+
+    enable_hotkey: bool = True
+    """Whether to register the ``r`` hotkey for viewer recording when the window opens."""
+
+    save_path: str | None = None
+    """Optional output path for viewer recordings. If None, use the default outputs directory."""
+
+    fps: int = 20
+    """Frames per second for viewer recording."""
+
+    max_memory: int = 1024
+    """Maximum buffered recording memory in MB before auto-stopping capture."""
+
+    video_prefix: str = "viewer_record"
+    """Video file prefix used when no explicit save path is provided."""
+
+
+@configclass
+class WindowCaptureCfg:
+    """Configuration for interactive viewer window capture."""
+
+    enable_hotkey: bool = True
+    """Whether to register the ``p`` hotkey for single-frame capture when the window opens."""
+
+    save_path: str | None = None
+    """Optional output image path. If None, use the default outputs directory with a timestamp."""
+
+    image_prefix: str = "window_capture"
+    """Image file prefix used when no explicit save path is provided."""
+
+    use_window_pose: bool = True
+    """Whether the hidden capture camera follows the current viewer pose."""
+
+
 @configclass
 class GPUMemoryCfg:
     """A gpu memory configuration dataclass that neatly holds all parameters that configure physics GPU memory for simulation"""
@@ -200,7 +272,7 @@ class RigidBodyAttributesCfg:
     contact_offset: float = 0.002
     """Contact offset for collision detection."""
 
-    rest_offset: float = 0.001
+    rest_offset: float = 0.0
     """Rest offset for collision detection."""
 
     enable_collision: bool = True
@@ -846,6 +918,34 @@ class URDFCfg:
     fpath_prefix: str = EMBODICHAIN_DEFAULT_DATA_ROOT + "/assembled"
     """Output directory prefix for the assembled URDF file."""
 
+    component_prefix: List[tuple[str, Union[str, None]]] = field(
+        default_factory=lambda: [
+            ("chassis", None),
+            ("legs", None),
+            ("torso", None),
+            ("head", None),
+            ("left_arm", "left_"),
+            ("right_arm", "right_"),
+            ("left_hand", "left_"),
+            ("right_hand", "right_"),
+            ("arm", None),
+            ("hand", None),
+        ]
+    )
+    """Component name prefixes used during URDF assembly.
+
+    Preferred form is a list of ``(component_name, prefix)`` tuples. For
+    convenience, a mapping ``{component_name: prefix}`` is also accepted when
+    constructing :class:`URDFCfg` and will be normalized internally.
+    """
+
+    name_case: dict[str, str] = field(
+        default_factory=lambda: {
+            "joint": "upper",
+            "link": "lower",
+        }
+    )
+
     def __init__(
         self,
         components: list[dict[str, str | np.ndarray]] | None = None,
@@ -855,6 +955,8 @@ def __init__(
         fpath_prefix: str = EMBODICHAIN_DEFAULT_DATA_ROOT + "/assembled",
         use_signature_check: bool = True,
         base_link_name: str = "base_link",
+        component_prefix: list[tuple[str, str | None]] | None = None,
+        name_case: dict[str, str] | None = None,
     ):
         """
         Initialize URDFCfg with optional list of components and output path settings.
@@ -871,6 +973,9 @@ def __init__(
             fpath_prefix (str): Output directory prefix for the assembled URDF file.
             use_signature_check (bool): Whether to use signature check when merging URDFs.
             base_link_name (str): Name of the base link in the assembled robot.
+            component_prefix (list[tuple[str, str | None]] | None): Optional
+                list of (component_type, prefix) pairs to override default
+                component name prefixes.
         """
         self.components = {}
         self.sensors = sensors or {}
@@ -880,6 +985,36 @@ def __init__(
         self.fname = fname
         self.fpath_prefix = fpath_prefix
 
+        # Initialize component prefixes (patch-style mapping per component type)
+        if component_prefix is None:
+            # Use the same default as the dataclass field
+            self.component_prefix = [
+                ("chassis", None),
+                ("legs", None),
+                ("torso", None),
+                ("head", None),
+                ("left_arm", "left_"),
+                ("right_arm", "right_"),
+                ("left_hand", "left_"),
+                ("right_hand", "right_"),
+                ("arm", None),
+                ("hand", None),
+            ]
+        elif isinstance(component_prefix, dict):
+            # Allow dict-style config: {"left_hand": "l_", ...}
+            self.component_prefix = list(component_prefix.items())
+        else:
+            # Assume caller provided a list of (component_name, prefix) tuples
+            self.component_prefix = component_prefix
+
+        if name_case is None:
+            self.name_case = {
+                "joint": "upper",
+                "link": "lower",
+            }
+        else:
+            self.name_case = name_case
+
         # Auto-add components if provided
         if components:
             for comp_config in components:
@@ -1041,6 +1176,22 @@ def assemble_urdf(self) -> str:
         # If there are multiple components, merge them into a single URDF file.
         manager = URDFAssemblyManager()
         manager.base_link_name = self.base_link_name
+
+        if self.component_prefix is None:
+            self.component_prefix = [
+                ("left_arm", "left_"),
+                ("right_arm", "right_"),
+                ("left_hand", "left_"),
+                ("right_hand", "right_"),
+            ]
+        if isinstance(self.component_prefix, dict):
+            self.component_prefix = list(self.component_prefix.items())
+        # Forward configured component prefixes to the assembly manager
+        manager.component_prefix = self.component_prefix
+
+        if self.name_case is not None:
+            manager.name_case = self.name_case
+
         for comp_type, comp_config in components:
             params = comp_config.get("params", {})
             success = manager.add_component(
@@ -1094,12 +1245,16 @@ def from_dict(cls, init_dict: Dict) -> "URDFCfg":
         fpath = init_dict.get("fpath", None)
         use_signature_check = init_dict.get("use_signature_check", True)
         base_link_name = init_dict.get("base_link_name", "base_link")
+        component_prefix = init_dict.get("component_prefix", None)
+        name_case = init_dict.get("name_case", None)
         return cls(
             components=components,
             sensors=sensors,
             fpath=fpath,
             use_signature_check=use_signature_check,
             base_link_name=base_link_name,
+            component_prefix=component_prefix,
+            name_case=name_case,
         )
 
 
diff --git a/embodichain/lab/sim/material.py b/embodichain/lab/sim/material.py
index 08c8cb93..7daddb8f 100644
--- a/embodichain/lab/sim/material.py
+++ b/embodichain/lab/sim/material.py
@@ -25,7 +25,6 @@
 from functools import cached_property
 
 from dexsim.engine import MaterialInst, Material
-from embodichain.lab.sim.utility import is_rt_enabled
 from embodichain.utils import configclass, logger
 
 
@@ -42,7 +41,7 @@ class VisualMaterialCfg:
     metallic: float = 0.0
     """Metallic factor (0.0 = dielectric, 1.0 = metallic)"""
 
-    roughness: float = 0.5
+    roughness: float = 0.7
     """Surface roughness (0.0 = smooth, 1.0 = rough)"""
 
     # Additional PBR properties
@@ -120,10 +119,6 @@ def __init__(self, cfg: VisualMaterialCfg, mat: Material):
 
         self._default_mat_inst = self.create_instance(self.uid)
 
-    @cached_property
-    def is_rt_enabled(self) -> bool:
-        return is_rt_enabled()
-
     @property
     def mat(self) -> Material:
         return self._mat
@@ -147,11 +142,8 @@ def set_default_properties(
         mat_inst.set_normal_texture(cfg.normal_texture)
         mat_inst.set_ao_texture(cfg.ao_texture)
 
-        if self.is_rt_enabled:
-            mat_inst.set_ior(cfg.ior)
-            mat_inst.mat.update_pbr_material_type(
-                self.MAT_TYPE_MAPPING[cfg.material_type]
-            )
+        mat_inst.set_ior(cfg.ior)
+        mat_inst.mat.update_pbr_material_type(self.MAT_TYPE_MAPPING[cfg.material_type])
 
     def create_instance(self, uid: str) -> VisualMaterialInst:
         """Create a new material instance from this material template.
@@ -400,9 +392,7 @@ def set_ao_texture(
 
     def set_ior(self, ior: float) -> None:
         """Set index of refraction."""
-        if is_rt_enabled() is False:
-            logger.log_debug("Ray Tracing rendering not enabled, ignoring IOR setting.")
-            return
+
         self.ior = ior
         inst = self._mat.get_inst(self.uid)
-        inst.set_rt_param("ior", ior)
+        inst.set_pbr_param("ior", ior)
diff --git a/embodichain/lab/sim/objects/articulation.py b/embodichain/lab/sim/objects/articulation.py
index 6488fd59..b763bcc4 100644
--- a/embodichain/lab/sim/objects/articulation.py
+++ b/embodichain/lab/sim/objects/articulation.py
@@ -42,7 +42,6 @@
 from embodichain.lab.sim.utility.sim_utils import (
     get_dexsim_drive_type,
     set_dexsim_articulation_cfg,
-    is_rt_enabled,
 )
 from embodichain.lab.sim.utility.solver_utils import (
     create_pk_chain,
@@ -907,7 +906,6 @@ def set_local_pose(
                 logger.log_error(
                     f"Invalid pose shape {pose.shape}. Expected (N, 7) or (N, 4, 4)."
                 )
-
             # TODO: in manual physics mode, the update should be explicitly called after
             # setting the pose to synchronize the state to renderer.
             self._world.update(0.001)
@@ -935,15 +933,6 @@ def set_local_pose(
             )
             self._ps.gpu_compute_articulation_kinematic(gpu_indices=indices)
 
-            # TODO: To be removed when gpu articulation data sync is supported.
-            if is_rt_enabled() is False:
-                self.body_data.body_link_pose
-                link_pose = self.body_data._body_link_pose[local_env_ids]
-                self._world.sync_poses_gpu_to_cpu(
-                    link_pose=CudaArray(link_pose),
-                    articulation_gpu_indices=CudaArray(indices),
-                )
-
     def get_local_pose(self, to_matrix=False) -> torch.Tensor:
         """Get local pose (root link pose) of the articulation.
 
@@ -1056,6 +1045,8 @@ def set_qpos(
         # (e.g., support specifying which methods should be decorated for auto-conversion.)
         if not isinstance(qpos, torch.Tensor):
             qpos = torch.as_tensor(qpos, dtype=torch.float32, device=self.device)
+        else:
+            qpos = qpos.to(device=self.device, dtype=torch.float32)
 
         if joint_ids is None:
             local_joint_ids = torch.arange(
@@ -1066,7 +1057,7 @@ def set_qpos(
                 joint_ids, dtype=torch.int32, device=self.device
             )
         else:
-            local_joint_ids = joint_ids
+            local_joint_ids = joint_ids.to(device=self.device, dtype=torch.int32)
 
         local_env_ids = self._all_indices if env_ids is None else env_ids
 
@@ -1564,16 +1555,6 @@ def reset(self, env_ids: Sequence[int] | None = None) -> None:
             self._ps.gpu_compute_articulation_kinematic(
                 gpu_indices=self.body_data.gpu_indices[local_env_ids]
             )
-
-            # TODO: To be removed when gpu articulation data sync is supported.
-            if is_rt_enabled() is False:
-                self.body_data.body_link_pose
-                link_pose = self.body_data._body_link_pose[local_env_ids]
-                indices = self.body_data.gpu_indices[local_env_ids]
-                self._world.sync_poses_gpu_to_cpu(
-                    link_pose=CudaArray(link_pose),
-                    articulation_gpu_indices=CudaArray(indices),
-                )
         else:
             self._world.update(0.001)
 
@@ -1680,6 +1661,7 @@ def compute_fk(
                 chain=self.pk_chain,
                 root_link_name=root_link_name,
                 end_link_name=end_link_name,
+                device=self.device,
             )
             result = pk_serial_chain.forward_kinematics(th=qpos, end_only=True)
 
@@ -1780,9 +1762,10 @@ def compute_jacobian(
 
         # Create pk_serial_chain
         pk_serial_chain = create_pk_serial_chain(
-            chain=self.pk_chain,
+            urdf_path=self.cfg.fpath,
             root_link_name=root_link_name,
             end_link_name=end_link_name,
+            device=self.device,
         )
 
         # Compute the Jacobian using the kinematics chain
diff --git a/embodichain/lab/sim/objects/gizmo.py b/embodichain/lab/sim/objects/gizmo.py
index 15067772..dc7fea00 100644
--- a/embodichain/lab/sim/objects/gizmo.py
+++ b/embodichain/lab/sim/objects/gizmo.py
@@ -17,7 +17,6 @@
 Gizmo: A reusable controller for interactive manipulation of simulation elements (object, robot, camera, etc.)
 """
 
-
 import numpy as np
 import torch
 import dexsim
@@ -213,10 +212,7 @@ def _setup_camera_gizmo(self):
             camera_pos, camera_rot_matrix, "Camera"
         )
         # New API uses set_flush_localpose_callback
-        try:
-            self._gizmo.set_flush_localpose_callback(self._proxy_gizmo_callback)
-        except Exception as e:
-            logger.log_warning(f"Failed to set gizmo callback for camera: {e}")
+        self._gizmo.set_flush_localpose_callback(self._proxy_gizmo_callback)
 
     def _proxy_gizmo_callback(self, *args):
         """Generic callback for proxy-based gizmo.
diff --git a/embodichain/lab/sim/objects/rigid_object.py b/embodichain/lab/sim/objects/rigid_object.py
index 24de293b..2202bbec 100644
--- a/embodichain/lab/sim/objects/rigid_object.py
+++ b/embodichain/lab/sim/objects/rigid_object.py
@@ -31,7 +31,6 @@
     VisualMaterialInst,
     BatchEntity,
 )
-from embodichain.lab.sim.utility import is_rt_enabled
 from embodichain.utils.math import convert_quat
 from embodichain.utils.math import matrix_from_quat, quat_from_matrix, matrix_from_euler
 from embodichain.utils import logger
@@ -81,6 +80,12 @@ def __init__(
         self._ang_vel = torch.zeros(
             (self.num_instances, 3), dtype=torch.float32, device=self.device
         )
+        self._lin_acc = torch.zeros(
+            (self.num_instances, 3), dtype=torch.float32, device=self.device
+        )
+        self._ang_acc = torch.zeros(
+            (self.num_instances, 3), dtype=torch.float32, device=self.device
+        )
         # center of mass pose in format (x, y, z, qw, qx, qy, qz)
         self.default_com_pose = torch.zeros(
             (self.num_instances, 7), dtype=torch.float32, device=self.device
@@ -162,6 +167,51 @@ def vel(self) -> torch.Tensor:
         """
         return torch.cat((self.lin_vel, self.ang_vel), dim=-1)
 
+    @property
+    def lin_acc(self) -> torch.Tensor:
+        if self.device.type == "cpu":
+            self._lin_acc = torch.as_tensor(
+                np.array(
+                    [entity.get_linear_acceleration() for entity in self.entities],
+                ),
+                dtype=torch.float32,
+                device=self.device,
+            )
+        else:
+            self.ps.gpu_fetch_rigid_body_data(
+                data=self._lin_acc,
+                gpu_indices=self.gpu_indices,
+                data_type=RigidBodyGPUAPIReadType.LINEAR_ACCELERATION,
+            )
+        return self._lin_acc
+
+    @property
+    def ang_acc(self) -> torch.Tensor:
+        if self.device.type == "cpu":
+            self._ang_acc = torch.as_tensor(
+                np.array(
+                    [entity.get_angular_acceleration() for entity in self.entities],
+                ),
+                dtype=torch.float32,
+                device=self.device,
+            )
+        else:
+            self.ps.gpu_fetch_rigid_body_data(
+                data=self._ang_acc,
+                gpu_indices=self.gpu_indices,
+                data_type=RigidBodyGPUAPIReadType.ANGULAR_ACCELERATION,
+            )
+        return self._ang_acc
+
+    @property
+    def acc(self) -> torch.Tensor:
+        """Get the linear and angular accelerations of the rigid bodies.
+
+        Returns:
+            torch.Tensor: The linear and angular accelerations concatenated, with shape (N, 6).
+        """
+        return torch.cat((self.lin_acc, self.ang_acc), dim=-1)
+
     @property
     def com_pose(self) -> torch.Tensor:
         """Get the center of mass pose of the rigid bodies.
@@ -410,10 +460,6 @@ def set_local_pose(
                 gpu_indices=indices,
                 data_type=RigidBodyGPUAPIWriteType.POSE,
             )
-            if is_rt_enabled() is False:
-                self._world.sync_poses_gpu_to_cpu(
-                    rigid_pose=CudaArray(pose), rigid_gpu_indices=CudaArray(indices)
-                )
 
     def get_local_pose(self, to_matrix: bool = False) -> torch.Tensor:
         """Get local pose of the rigid object.
@@ -888,12 +934,9 @@ def set_body_scale(
                 f"Length of env_ids {len(local_env_ids)} does not match scale length {len(scale)}."
             )
 
-        if self.device.type == "cpu":
-            for i, env_idx in enumerate(local_env_ids):
-                scale_np = scale[i].cpu().numpy()
-                self._entities[env_idx].set_body_scale(*scale_np)
-        else:
-            logger.log_error(f"Setting body scale on GPU is not supported yet.")
+        for i, env_idx in enumerate(local_env_ids):
+            scale_np = scale[i].cpu().numpy()
+            self._entities[env_idx].set_body_scale(*scale_np)
 
     def set_com_pose(
         self, com_pose: torch.Tensor, env_ids: Sequence[int] | None = None
diff --git a/embodichain/lab/sim/objects/robot.py b/embodichain/lab/sim/objects/robot.py
index e6dac158..07273e80 100644
--- a/embodichain/lab/sim/objects/robot.py
+++ b/embodichain/lab/sim/objects/robot.py
@@ -934,6 +934,9 @@ def init_solver(self, cfg: Union[SolverCfg, Dict[str, SolverCfg]]) -> None:
                     ):
                         solver_cfg.joint_names = self.cfg.control_parts[part_name]
                     self._solvers[name] = solver_cfg.init_solver(device=self.device)
+                    joint_ids = self.get_joint_ids(name=part_name)
+                    joint_limits = self._data.qpos_limits[0][joint_ids]
+                    self._solvers[name].update_with_robot_limit(joint_limits)
 
     def get_solver(self, name: str | None = None) -> BaseSolver | None:
         """Get the kinematic solver for a specific control part.
diff --git a/embodichain/lab/sim/planners/motion_generator.py b/embodichain/lab/sim/planners/motion_generator.py
index 0682c492..220deeca 100644
--- a/embodichain/lab/sim/planners/motion_generator.py
+++ b/embodichain/lab/sim/planners/motion_generator.py
@@ -33,7 +33,6 @@
 from .utils import MovePart, MoveType, PlanState, PlanResult
 from .utils import calculate_point_allocations, interpolate_xpos
 
-
 __all__ = ["MotionGenerator", "MotionGenCfg", "MotionGenOptions"]
 
 
@@ -508,7 +507,11 @@ def interpolate_trajectory(
 
         qpos_seed = options.start_qpos
         if qpos_seed is None and qpos_list is not None:
+            # first waypoint as seed
             qpos_seed = qpos_list[0]
+        if qpos_seed is None:
+            # fallback to current robot state as seed
+            qpos_seed = self.robot.get_qpos(name=control_part)[0]
 
         # Generate trajectory
         interpolate_qpos_list = []
@@ -551,9 +554,14 @@ def interpolate_trajectory(
             # compute_batch_ik expects (n_envs, n_batch, 7) or (n_envs, n_batch, 4, 4)
             # Here we assume n_envs = 1 or we want to apply this to all envs if available.
             # Since MotionGenerator usually works with self.robot.device, we use its batching capabilities.
+            qpos_seed_repeat = (
+                qpos_seed.unsqueeze(0)
+                .repeat(total_interpolated_poses.shape[0], 1)
+                .unsqueeze(0)
+            )
             success_batch, qpos_batch = self.robot.compute_batch_ik(
                 pose=total_interpolated_poses.unsqueeze(0),
-                joint_seed=None,  # Or use qpos_seed if properly shaped
+                joint_seed=qpos_seed_repeat,  # Or use qpos_seed if properly shaped
                 name=control_part,
             )
 
diff --git a/embodichain/lab/sim/planners/toppra_planner.py b/embodichain/lab/sim/planners/toppra_planner.py
index 0c20ccf9..218d17ed 100644
--- a/embodichain/lab/sim/planners/toppra_planner.py
+++ b/embodichain/lab/sim/planners/toppra_planner.py
@@ -191,11 +191,9 @@ def plan(
             )
 
         # Build waypoints
-        waypoints = []
-        for target in target_states:
-            waypoints.append(np.array(target.qpos))
-
-        waypoints = np.array(waypoints)
+        waypoints = np.array(
+            [target.qpos.to("cpu").numpy() for target in target_states]
+        )
         # Create spline interpolation
         # NOTE: Suitable for dense waypoints
         ss = np.linspace(0, 1, len(waypoints))
diff --git a/embodichain/lab/sim/planners/utils.py b/embodichain/lab/sim/planners/utils.py
index 6e8e4ceb..cfeee443 100644
--- a/embodichain/lab/sim/planners/utils.py
+++ b/embodichain/lab/sim/planners/utils.py
@@ -23,7 +23,6 @@
 
 from embodichain.utils import logger
 
-
 __all__ = [
     "TrajectorySampleMethod",
     "MovePart",
diff --git a/embodichain/lab/sim/robots/cobotmagic.py b/embodichain/lab/sim/robots/cobotmagic.py
index 1ffdcd71..ca8e7f6c 100644
--- a/embodichain/lab/sim/robots/cobotmagic.py
+++ b/embodichain/lab/sim/robots/cobotmagic.py
@@ -181,11 +181,17 @@ def build_pk_serial_chain(
 
 if __name__ == "__main__":
     from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+    from embodichain.lab.sim.cfg import RenderCfg
     from embodichain.lab.sim.robots import CobotMagicCfg
 
     torch.set_printoptions(precision=5, sci_mode=False)
 
-    config = SimulationManagerCfg(headless=False, sim_device="cuda", num_envs=2)
+    config = SimulationManagerCfg(
+        headless=False,
+        sim_device="cpu",
+        num_envs=2,
+        render_cfg=RenderCfg(renderer="fast-rt"),
+    )
     sim = SimulationManager(config)
 
     config = {
@@ -195,7 +201,6 @@ def build_pk_serial_chain(
     cfg = CobotMagicCfg.from_dict(config)
     robot = sim.add_robot(cfg=cfg)
 
-    sim.init_gpu_physics()
     print("CobotMagic added to the simulation.")
 
     from IPython import embed
diff --git a/embodichain/lab/sim/robots/dexforce_w1/cfg.py b/embodichain/lab/sim/robots/dexforce_w1/cfg.py
index c6586b4e..40f95b09 100644
--- a/embodichain/lab/sim/robots/dexforce_w1/cfg.py
+++ b/embodichain/lab/sim/robots/dexforce_w1/cfg.py
@@ -159,7 +159,7 @@ def _build_default_solver_cfg(is_industrial: bool) -> SolverCfg:
                 end_link_name="right_ee",
                 root_link_name="right_arm_base",
                 dh_params=w1_right_arm_params.dh_params,
-                qpos_limits=w1_right_arm_params.qpos_limits,
+                user_qpos_limits=w1_right_arm_params.qpos_limits,
                 T_e_oe=w1_right_arm_params.T_e_oe,
                 T_b_ob=w1_right_arm_params.T_b_ob,
                 link_lengths=w1_right_arm_params.link_lengths,
@@ -170,7 +170,7 @@ def _build_default_solver_cfg(is_industrial: bool) -> SolverCfg:
                 end_link_name="left_ee",
                 root_link_name="left_arm_base",
                 dh_params=w1_left_arm_params.dh_params,
-                qpos_limits=w1_left_arm_params.qpos_limits,
+                user_qpos_limits=w1_left_arm_params.qpos_limits,
                 T_e_oe=w1_left_arm_params.T_e_oe,
                 T_b_ob=w1_left_arm_params.T_b_ob,
                 link_lengths=w1_left_arm_params.link_lengths,
diff --git a/embodichain/lab/sim/robots/dexforce_w1/utils.py b/embodichain/lab/sim/robots/dexforce_w1/utils.py
index c5ebbd0d..58fcbe70 100644
--- a/embodichain/lab/sim/robots/dexforce_w1/utils.py
+++ b/embodichain/lab/sim/robots/dexforce_w1/utils.py
@@ -28,7 +28,6 @@
 from embodichain.lab.sim.solvers import SolverCfg
 from embodichain.lab.sim.cfg import RobotCfg, URDFCfg
 
-
 all = [
     "ChassisManager",
     "TorsoManager",
diff --git a/embodichain/lab/sim/sensors/camera.py b/embodichain/lab/sim/sensors/camera.py
index c5baed17..e672532e 100644
--- a/embodichain/lab/sim/sensors/camera.py
+++ b/embodichain/lab/sim/sensors/camera.py
@@ -17,19 +17,15 @@
 from __future__ import annotations
 
 import dexsim
-import math
 import torch
 import dexsim.render as dr
-import warp as wp
 
 from functools import cached_property
-from typing import Union, Tuple, Sequence, List
+from typing import Tuple, Sequence, List
 
 from embodichain.lab.sim.sensors import BaseSensor, SensorCfg
 from embodichain.utils.math import matrix_from_quat, quat_from_matrix, look_at_to_pose
-from embodichain.utils.warp.kernels import reshape_tiled_image
 from embodichain.utils import logger, configclass
-from embodichain.lab.sim.utility.sim_utils import is_rt_enabled
 
 
 @configclass
@@ -97,17 +93,12 @@ def get_view_attrib(self) -> dr.ViewFlags:
             The view attributes for the camera.
         """
         view_attrib: dr.ViewFlags = dr.ViewFlags.COLOR
-        # TODO: change for fast-rt renderer backend.
         if self.enable_color:
             view_attrib |= dr.ViewFlags.COLOR
         if self.enable_depth:
-            if is_rt_enabled() is False:
-                view_attrib |= dr.ViewFlags.NORMAL
             view_attrib |= dr.ViewFlags.DEPTH
         if self.enable_mask:
             view_attrib |= dr.ViewFlags.MASK
-            if is_rt_enabled() is False:
-                view_attrib |= dr.ViewFlags.DEPTH
         if self.enable_normal:
             view_attrib |= dr.ViewFlags.NORMAL
         if self.enable_position:
@@ -152,55 +143,25 @@ def _build_sensor_from_config(
             arenas = [env]
         num_instances = len(arenas)
 
-        if self.is_rt_enabled:
-            self._frame_buffer = self._world.create_camera_group(
-                [config.width, config.height], num_instances, True
-            )
-
-            view_attrib = config.get_view_attrib()
-            for i, arena in enumerate(arenas):
-                view_name = f"{self.uid}_view{i + 1}"
-                view = arena.create_camera(
-                    view_name,
-                    config.width,
-                    config.height,
-                    True,
-                    view_attrib,
-                    self._frame_buffer,
-                )
-                view.set_intrinsic(config.intrinsics)
-                view.set_near(config.near)
-                view.set_far(config.far)
-                self._entities[i] = view
+        self._frame_buffer = self._world.create_camera_group(
+            [config.width, config.height], num_instances, True
+        )
 
-        else:
-            self._grid_size = math.ceil(math.sqrt(num_instances))
-            frame_width = self._grid_size * config.width
-            frame_height = self._grid_size * config.height
-            view_attrib = config.get_view_attrib()
-            # Create the data frame
-            self._frame_buffer = self._world.create_frame_buffer(
-                [frame_width, frame_height], view_attrib, True
+        view_attrib = config.get_view_attrib()
+        for i, arena in enumerate(arenas):
+            view_name = f"{self.uid}_view{i + 1}"
+            view = arena.create_camera(
+                view_name,
+                config.width,
+                config.height,
+                True,
+                view_attrib,
+                self._frame_buffer,
             )
-            self._frame_buffer.set_read_able(view_attrib)
-
-            # Create camera views
-            for i, arena in enumerate(arenas):
-                col = i // self._grid_size
-                row = i % self._grid_size
-                x = row * config.width
-                y = col * config.height
-                view_name = f"{self.uid}_view{i + 1}"
-
-                view = arena.create_camera_view(
-                    view_name, (x, y), (config.width, config.height), self._frame_buffer
-                )
-                view.set_intrinsic(config.intrinsics)
-                view.set_near(config.near)
-                view.set_far(config.far)
-                view.enable_postprocessing(True)
-
-                self._entities[i] = view
+            view.set_intrinsic(config.intrinsics)
+            view.set_near(config.near)
+            view.set_far(config.far)
+            self._entities[i] = view
 
         # Define a mapping of data types to their respective shapes and dtypes
         buffer_specs = {
@@ -239,15 +200,6 @@ def _build_sensor_from_config(
         if self.cfg.extrinsics.parent is not None:
             self._attach_to_entity()
 
-    @cached_property
-    def is_rt_enabled(self) -> bool:
-        """Check if Ray Tracing rendering backend is enabled in the default dexsim world.
-
-        Returns:
-            bool: True if Ray Tracing rendering is enabled, False otherwise.
-        """
-        return is_rt_enabled()
-
     @cached_property
     def group_id(self) -> int:
         """Get the camera group ID in the dexsim world.
@@ -255,13 +207,7 @@ def group_id(self) -> int:
         Returns:
             int: The camera group ID.
         """
-        if self.is_rt_enabled:
-            return self._frame_buffer.get_group_id()
-        else:
-            logger.log_warning(
-                "Camera group ID is only available for Ray Tracing renderer. Returning -1 for non-RT renderer."
-            )
-            return -1
+        return self._frame_buffer.get_group_id()
 
     @property
     def is_attached(self) -> bool:
@@ -284,81 +230,38 @@ def update(self, **kwargs) -> None:
 
         Args:
             **kwargs: Additional keyword arguments for sensor update.
-                - fetch_only (bool): If True, only fetch the data from dexsim internal frame buffer without performing rendering.
         """
         fetch_only = kwargs.get("fetch_only", False)
         if not fetch_only:
-            if self.is_rt_enabled:
-                self._frame_buffer.apply()
-            else:
-                self._frame_buffer.apply_frame()
-
+            self._frame_buffer.apply()
         self.cfg: CameraCfg
-        # TODO: support fetch data from gpu buffer directly.
+
         if self.cfg.enable_color:
-            if self.is_rt_enabled:
-                self._data_buffer["color"] = self._frame_buffer.get_rgb_gpu_buffer().to(
-                    self.device
-                )
-            else:
-                data = self._frame_buffer.get_color_gpu_buffer().to(self.device)
-                self._update_buffer_impl(data, self._data_buffer["color"])
+            self._data_buffer["color"] = self._frame_buffer.get_rgb_gpu_buffer().to(
+                self.device
+            )
 
         if self.cfg.enable_depth:
-            data = self._frame_buffer.get_depth_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["depth"] = data
-            else:
-                self._update_buffer_impl(
-                    data, self._data_buffer["depth"].unsqueeze_(-1)
-                )
-                self._data_buffer["depth"].squeeze_(-1)
+            self._data_buffer["depth"] = self._frame_buffer.get_depth_gpu_buffer().to(
+                self.device
+            )
 
         if self.cfg.enable_mask:
-            if self.is_rt_enabled:
-                data = self._frame_buffer.get_visible_mask_gpu_buffer().to(
-                    self.device, torch.int32
-                )
-                self._data_buffer["mask"] = data
-            else:
-                data = self._frame_buffer.get_visible_gpu_buffer().to(
-                    self.device, torch.int32
-                )
-                self._update_buffer_impl(data, self._data_buffer["mask"].unsqueeze_(-1))
-                self._data_buffer["mask"].squeeze_(-1)
+            self._data_buffer[
+                "mask"
+            ] = self._frame_buffer.get_visible_mask_gpu_buffer().to(
+                self.device, torch.int32
+            )
 
         if self.cfg.enable_normal:
-            data = self._frame_buffer.get_normal_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["normal"] = data
-            else:
-                self._update_buffer_impl(data, self._data_buffer["normal"])
+            self._data_buffer["normal"] = self._frame_buffer.get_normal_gpu_buffer().to(
+                self.device
+            )[..., :3]
 
         if self.cfg.enable_position:
-            data = self._frame_buffer.get_position_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["position"] = data
-            else:
-                self._update_buffer_impl(data, self._data_buffer["position"])
-
-    def _update_buffer_impl(
-        self, data_buffer: torch.Tensor, data_buffer_out: torch.Tensor
-    ) -> None:
-        device = str(self.device)
-        channel = data_buffer.shape[-1] if data_buffer.dim() >= 3 else 1
-        wp.launch(
-            kernel=reshape_tiled_image,
-            dim=(self.num_instances, self.cfg.height, self.cfg.width),
-            inputs=[
-                wp.from_torch(data_buffer).flatten(),
-                wp.from_torch(data_buffer_out),
-                self.cfg.height,
-                self.cfg.width,
-                channel,
-                self._grid_size,
-            ],
-            device="cuda:0" if device == "cuda" else device,
-        )
+            self._data_buffer["position"] = (
+                self._frame_buffer.get_position_gpu_buffer().to(self.device)[..., :3]
+            )
 
     def _attach_to_entity(self) -> None:
         """Attach the sensor to the parent entity in each environment."""
diff --git a/embodichain/lab/sim/sensors/contact_sensor.py b/embodichain/lab/sim/sensors/contact_sensor.py
index 9b448d57..49ebbe1c 100644
--- a/embodichain/lab/sim/sensors/contact_sensor.py
+++ b/embodichain/lab/sim/sensors/contact_sensor.py
@@ -561,7 +561,7 @@ def set_contact_point_visibility(
             self._visualizer.add_points(
                 points=contact_position_world.to("cpu").numpy(), color=rgba
             )
-            # self._visualizer.set_point_size(point_size)
+            self._visualizer.set_point_size(point_size)
         else:
             if isinstance(self._visualizer, dexsim.models.PointCloud):
                 self._visualizer.clear()
diff --git a/embodichain/lab/sim/sensors/stereo.py b/embodichain/lab/sim/sensors/stereo.py
index dfea8a86..999bedca 100644
--- a/embodichain/lab/sim/sensors/stereo.py
+++ b/embodichain/lab/sim/sensors/stereo.py
@@ -17,21 +17,16 @@
 from __future__ import annotations
 
 import dexsim
-import math
 import torch
 import numpy as np
-import warp as wp
 import dexsim.render as dr
 
 from typing import Dict, Tuple, List, Sequence
-from tensordict import TensorDict
 
 from dexsim.utility import inv_transform
 from embodichain.lab.sim.sensors import Camera, CameraCfg
-from embodichain.utils.warp.kernels import reshape_tiled_image
 from embodichain.utils.math import matrix_from_euler
 from embodichain.utils import logger, configclass
-from embodichain.lab.sim.utility.sim_utils import is_rt_enabled
 
 
 @configclass
@@ -177,97 +172,46 @@ def _build_sensor_from_config(
             arenas = [env]
         num_instances = len(arenas)
 
-        if self.is_rt_enabled:
-            self._frame_buffer = self._world.create_camera_group(
-                [config.width, config.height], num_instances * 2, True
+        self._frame_buffer = self._world.create_camera_group(
+            [config.width, config.height], num_instances * 2, True
+        )
+        view_attrib = config.get_view_attrib()
+        left_list = []
+        right_list = []
+        for i, arena in enumerate(arenas):
+            left_view_name = f"{self.uid}_left_view{i + 1}"
+            left_view = arena.create_camera(
+                left_view_name,
+                config.width,
+                config.height,
+                True,
+                view_attrib,
+                self._frame_buffer,
             )
-            view_attrib = config.get_view_attrib()
-            left_list = []
-            right_list = []
-            for i, arena in enumerate(arenas):
-                left_view_name = f"{self.uid}_left_view{i + 1}"
-                left_view = arena.create_camera(
-                    left_view_name,
-                    config.width,
-                    config.height,
-                    True,
-                    view_attrib,
-                    self._frame_buffer,
-                )
-                left_view.set_intrinsic(config.intrinsics)
-                left_view.set_near(config.near)
-                left_view.set_far(config.far)
-                left_list.append(left_view)
-
-            for i, arena in enumerate(arenas):
-                right_view_name = f"{self.uid}_right_view{i + 1}"
-                right_view = arena.create_camera(
-                    right_view_name,
-                    config.width,
-                    config.height,
-                    True,
-                    view_attrib,
-                    self._frame_buffer,
-                )
-                right_view.set_intrinsic(config.intrinsics_right)
-                right_view.set_near(config.near)
-                right_view.set_far(config.far)
-                right_list.append(right_view)
-
-            for i in range(num_instances):
-                self._entities[i] = PairCameraView(
-                    left_list[i], right_list[i], config.left_to_right.cpu().numpy()
-                )
-
-        else:
-            self._grid_size = math.ceil(math.sqrt(num_instances))
-
-            # stereo camera has two views, we append the right camera to the left camera's view list
-            frame_width = self._grid_size * config.width * 2
-            frame_height = self._grid_size * config.height
-            view_attrib = config.get_view_attrib()
-
-            # Create the data frame
-            self._frame_buffer = self._world.create_frame_buffer(
-                [frame_width, frame_height], view_attrib, True
+            left_view.set_intrinsic(config.intrinsics)
+            left_view.set_near(config.near)
+            left_view.set_far(config.far)
+            left_list.append(left_view)
+
+        for i, arena in enumerate(arenas):
+            right_view_name = f"{self.uid}_right_view{i + 1}"
+            right_view = arena.create_camera(
+                right_view_name,
+                config.width,
+                config.height,
+                True,
+                view_attrib,
+                self._frame_buffer,
+            )
+            right_view.set_intrinsic(config.intrinsics_right)
+            right_view.set_near(config.near)
+            right_view.set_far(config.far)
+            right_list.append(right_view)
+
+        for i in range(num_instances):
+            self._entities[i] = PairCameraView(
+                left_list[i], right_list[i], config.left_to_right.cpu().numpy()
             )
-            self._frame_buffer.set_read_able(view_attrib)
-
-            # Create camera views
-            for i, arena in enumerate(arenas):
-                col = i // self._grid_size
-                row = i % self._grid_size
-                x = row * config.width * 2
-                y = col * config.height
-                left_view_name = f"{self.uid}_left_view{i + 1}"
-
-                left_view = arena.create_camera_view(
-                    left_view_name,
-                    (x, y),
-                    (config.width, config.height),
-                    self._frame_buffer,
-                )
-
-                left_view.set_intrinsic(config.intrinsics)
-                left_view.set_near(config.near)
-                left_view.set_far(config.far)
-                left_view.enable_postprocessing(True)
-
-                right_view_name = f"{self.uid}_right_view{i + 1}"
-                right_view = arena.create_camera_view(
-                    right_view_name,
-                    (x + config.width, y),
-                    (config.width, config.height),
-                    self._frame_buffer,
-                )
-                right_view.set_intrinsic(config.intrinsics_right)
-                right_view.set_near(config.near)
-                right_view.set_far(config.far)
-                right_view.enable_postprocessing(True)
-
-                self._entities[i] = PairCameraView(
-                    left_view, right_view, config.left_to_right.cpu().numpy()
-                )
 
         # Define a mapping of data types to their respective shapes and dtypes
         buffer_specs = {
@@ -348,66 +292,38 @@ def update(self, **kwargs) -> None:
             - disparity: Disparity images with shape (B, H, W, 1) and dtype torch.float32
         Args:
             **kwargs: Additional keyword arguments for sensor update.
-                - fetch_only (bool): If True, only fetch the data from dexsim internal frame buffer without performing rendering.
         """
-
         fetch_only = kwargs.get("fetch_only", False)
         if not fetch_only:
-            if self.is_rt_enabled:
-                self._frame_buffer.apply()
-            else:
-                self._frame_buffer.apply_frame()
+            self._frame_buffer.apply()
 
         self.cfg: StereoCameraCfg
         if self.cfg.enable_color:
-            if self.is_rt_enabled:
-                data = self._frame_buffer.get_rgb_gpu_buffer().to(self.device)
-                self._data_buffer["color"] = data[: self.num_instances, ...]
-                self._data_buffer[f"color_right"] = data[self.num_instances :, ...]
-            else:
-                data = self._frame_buffer.get_color_gpu_buffer().to(self.device)
-                self._update_buffer_impl(data, self._data_buffer_stereo["color"])
+            data = self._frame_buffer.get_rgb_gpu_buffer().to(self.device)
+            self._data_buffer["color"] = data[: self.num_instances, ...]
+            self._data_buffer[f"color_right"] = data[self.num_instances :, ...]
         if self.cfg.enable_depth:
             data = self._frame_buffer.get_depth_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["depth"] = data[: self.num_instances, ...].unsqueeze_(
-                    -1
-                )
-                self._data_buffer[f"depth_right"] = data[
-                    self.num_instances :, ...
-                ].unsqueeze_(-1)
-            else:
-                self._update_buffer_impl(data, self._data_buffer_stereo["depth"])
+            self._data_buffer["depth"] = data[: self.num_instances, ...].unsqueeze_(-1)
+            self._data_buffer[f"depth_right"] = data[
+                self.num_instances :, ...
+            ].unsqueeze_(-1)
         if self.cfg.enable_mask:
-            if self.is_rt_enabled:
-                data = self._frame_buffer.get_visible_mask_gpu_buffer().to(
-                    self.device, torch.int32
-                )
-                self._data_buffer["mask"] = data[: self.num_instances, ...].unsqueeze_(
-                    -1
-                )
-                self._data_buffer[f"mask_right"] = data[
-                    self.num_instances :, ...
-                ].unsqueeze_(-1)
-            else:
-                data = self._frame_buffer.get_visible_gpu_buffer().to(
-                    self.device, torch.int32
-                )
-                self._update_buffer_impl(data, self._data_buffer_stereo["mask"])
+            data = self._frame_buffer.get_visible_mask_gpu_buffer().to(
+                self.device, torch.int32
+            )
+            self._data_buffer["mask"] = data[: self.num_instances, ...].unsqueeze_(-1)
+            self._data_buffer[f"mask_right"] = data[
+                self.num_instances :, ...
+            ].unsqueeze_(-1)
         if self.cfg.enable_normal:
-            data = self._frame_buffer.get_normal_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["normal"] = data[: self.num_instances, ...]
-                self._data_buffer[f"normal_right"] = data[self.num_instances :, ...]
-            else:
-                self._update_buffer_impl(data, self._data_buffer_stereo["normal"])
+            data = self._frame_buffer.get_normal_gpu_buffer().to(self.device)[..., :3]
+            self._data_buffer["normal"] = data[: self.num_instances, ...]
+            self._data_buffer[f"normal_right"] = data[self.num_instances :, ...]
         if self.cfg.enable_position:
-            data = self._frame_buffer.get_position_gpu_buffer().to(self.device)
-            if self.is_rt_enabled:
-                self._data_buffer["position"] = data[: self.num_instances, ...]
-                self._data_buffer[f"position_right"] = data[self.num_instances :, ...]
-            else:
-                self._update_buffer_impl(data, self._data_buffer_stereo["position"])
+            data = self._frame_buffer.get_position_gpu_buffer().to(self.device)[..., :3]
+            self._data_buffer["position"] = data[: self.num_instances, ...]
+            self._data_buffer[f"position_right"] = data[self.num_instances :, ...]
         if self.cfg.enable_disparity:
             disparity = self._data_buffer["disparity"]
             disparity.fill_(0.0)
@@ -421,25 +337,6 @@ def update(self, **kwargs) -> None:
                 self.cfg.fx * distance / depth[valid_depth_mask]
             )
 
-    def _update_buffer_impl(
-        self, data_buffer: torch.Tensor, data_buffer_out: torch.Tensor
-    ) -> None:
-        device = str(self.device)
-        channel = data_buffer.shape[-1] if data_buffer.dim() >= 3 else 1
-        wp.launch(
-            kernel=reshape_tiled_image,
-            dim=(self.num_instances, self.cfg.height, self.cfg.width * 2),
-            inputs=[
-                wp.from_torch(data_buffer).flatten(),
-                wp.from_torch(data_buffer_out),
-                self.cfg.height,
-                self.cfg.width * 2,
-                channel,
-                self._grid_size,
-            ],
-            device="cuda:0" if device == "cuda" else device,
-        )
-
     def get_left_right_arena_pose(self) -> torch.Tensor:
         """Get the local pose of the left and right cameras.
 
diff --git a/embodichain/lab/sim/sim_manager.py b/embodichain/lab/sim/sim_manager.py
index bee36aa5..fbfb13c2 100644
--- a/embodichain/lab/sim/sim_manager.py
+++ b/embodichain/lab/sim/sim_manager.py
@@ -17,7 +17,11 @@
 from __future__ import annotations
 
 import os
+import gc
 import sys
+import queue
+import time
+import threading
 import dexsim
 import torch
 import numpy as np
@@ -26,6 +30,7 @@
 from tqdm import tqdm
 from pathlib import Path
 from copy import deepcopy
+from datetime import datetime
 from functools import cached_property
 from typing import List, Union, Dict, Union, Sequence
 from dataclasses import dataclass, asdict, field, MISSING
@@ -45,9 +50,10 @@
     RigidBodyGPUAPIReadType,
     ArticulationGPUAPIReadType,
 )
+from dexsim.core import TASK_RETURN
 from dexsim.engine import CudaArray, Material
 from dexsim.models import MeshObject
-from dexsim.render import Light as _Light, LightType, Windows
+from dexsim.render import Light as _Light, LightType, Windows, ViewFlags
 from dexsim.engine import GizmoController, ObjectManipulator
 
 from embodichain.lab.sim.objects import (
@@ -68,9 +74,12 @@
     ContactSensor,
 )
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     PhysicsCfg,
     MarkerCfg,
     GPUMemoryCfg,
+    WindowRecordCfg,
+    WindowCaptureCfg,
     LightCfg,
     RigidObjectCfg,
     SoftObjectCfg,
@@ -80,6 +89,7 @@
     RobotCfg,
 )
 from embodichain.lab.sim import VisualMaterial, VisualMaterialCfg
+from embodichain.utils.math import look_at_to_pose
 from embodichain.utils import configclass, logger
 
 __all__ = [
@@ -105,14 +115,8 @@ class SimulationManagerCfg:
     headless: bool = False
     """Whether to run the simulation in headless mode (no Window)."""
 
-    enable_rt: bool = False
-    """Whether to enable ray tracing rendering."""
-
-    enable_denoiser: bool = True
-    """Whether to enable denoising for ray tracing rendering."""
-
-    spp: int = 64
-    """Samples per pixel for ray tracing rendering. This parameter is only valid when ray tracing is enabled and enable_denoiser is False."""
+    render_cfg: RenderCfg = field(default_factory=RenderCfg)
+    """The rendering configuration parameters."""
 
     gpu_id: int = 0
     """The gpu index that the simulation engine will be used. 
@@ -147,6 +151,52 @@ class SimulationManagerCfg:
     gpu_memory_config: GPUMemoryCfg = field(default_factory=GPUMemoryCfg)
     """The GPU memory configuration parameters."""
 
+    window_record: WindowRecordCfg = field(default_factory=WindowRecordCfg)
+    """Viewer window recording settings (hotkey, paths, FPS, memory budget)."""
+
+    window_capture: WindowCaptureCfg = field(default_factory=WindowCaptureCfg)
+    """Viewer window single-frame capture settings (hotkey, paths)."""
+
+
+@dataclass
+class _WindowCaptureRequest:
+    """Pending single-frame window capture parameters."""
+
+    save_path: str | None
+    width: int | None
+    height: int | None
+    camera_pose: np.ndarray | torch.Tensor | None
+    eye: Sequence[float] | None
+    target: Sequence[float] | None
+    up: Sequence[float] | None
+    use_window_pose: bool
+
+
+@dataclass
+class _WindowCaptureAsyncState:
+    """Internal state for async viewer-window capture."""
+
+    pending: _WindowCaptureRequest | None = None
+    task_status: int = TASK_RETURN.TASK_LOOP
+    loop_handle: object | None = None
+
+
+@dataclass
+class _WindowRecordState:
+    """Internal state for viewer-window recording."""
+
+    time_step: float
+    max_memory_bytes: int
+    output_dir: str
+    video_name: str
+    save_kwargs: dict[str, object]
+    record_camera: object | None = None
+    frames: list[np.ndarray] = field(default_factory=list)
+    current_memory_bytes: int = 0
+    last_capture_time: float = field(default_factory=time.time)
+    task_status: int = TASK_RETURN.TASK_LOOP
+    loop_handle: object | None = None
+
 
 class SimulationManager:
     r"""Global Embodied AI simulation manager.
@@ -166,6 +216,8 @@ class SimulationManager:
 
     _instances = {}
 
+    _cleanup_queue: queue.Queue = queue.Queue()
+
     SUPPORTED_SENSOR_TYPES = {
         "Camera": Camera,
         "StereoCamera": StereoCamera,
@@ -189,11 +241,6 @@ def __init__(
         # Mark as initialized
         self.instance_id = instance_id
 
-        if sim_config.enable_rt and instance_id > 0:
-            logger.log_error(
-                f"Ray Tracing rendering backend is only supported for single instance (instance_id=0). "
-            )
-
         # Cache paths
         self._sim_cache_dir = SIM_CACHE_DIR
         self._material_cache_dir = MATERIAL_CACHE_DIR
@@ -220,11 +267,37 @@ def __init__(
 
         self._window: Windows | None = None
         self._is_registered_window_control = False
+        self._window_record_state: _WindowRecordState | None = None
+        self._window_record_camera: object | None = None
+        self._window_capture_camera: object | None = None
+        self._window_capture_camera_size: tuple[int, int] | None = None
+        wr = sim_config.window_record
+        self._window_record_hotkey_cfg: dict[str, object] | None = (
+            {
+                "save_path": wr.save_path,
+                "fps": wr.fps,
+                "max_memory": wr.max_memory,
+                "video_prefix": wr.video_prefix,
+            }
+            if wr.enable_hotkey
+            else None
+        )
+        self._window_record_input_control: ObjectManipulator | None = None
+        self._window_record_save_threads: list[threading.Thread] = []
+        self._window_capture_async_state: _WindowCaptureAsyncState | None = None
+        self._window_capture_input_control: ObjectManipulator | None = None
+        self._window_capture_save_threads: list[threading.Thread] = []
+        wc = sim_config.window_capture
+        self._window_capture_hotkey_cfg: dict[str, object] | None = (
+            {
+                "save_path": wc.save_path,
+                "image_prefix": wc.image_prefix,
+                "use_window_pose": wc.use_window_pose,
+            }
+            if wc.enable_hotkey
+            else None
+        )
 
-        fps = int(1.0 / sim_config.physics_dt)
-        self._world.set_physics_fps(fps)
-
-        self._world.set_time_scale(1.0)
         self._world.set_delta_time(sim_config.physics_dt)
         self._world.show_coordinate_axis(False)
 
@@ -239,13 +312,6 @@ def __init__(
 
         self._env = self._world.get_env()
 
-        # set unique material path to accelerate material creation.
-        # TODO: This will be removed.
-        if self.sim_config.enable_rt is False:
-            self._env.set_unique_mat_path(
-                os.path.join(self._material_cache_dir, "default_mat")
-            )
-
         # arena is used as a standalone space for robots to simulate in.
         self._arenas: List[dexsim.environment.Arena] = []
 
@@ -284,7 +350,7 @@ def __init__(
 
         if sim_config.headless is False:
             self._window = self._world.get_windows()
-            self._register_default_window_control()
+            # self._register_default_window_control()
 
     @classmethod
     def get_instance(cls, instance_id: int = 0) -> SimulationManager:
@@ -334,7 +400,7 @@ def is_instantiated(cls, instance_id: int = 0) -> bool:
         """
         return instance_id in cls._instances
 
-    @property
+    @cached_property
     def num_envs(self) -> int:
         """Get the number of arenas in the simulation.
 
@@ -343,16 +409,10 @@ def num_envs(self) -> int:
         """
         return len(self._arenas) if len(self._arenas) > 0 else 1
 
-    @cached_property
+    @property
     def is_use_gpu_physics(self) -> bool:
         """Check if the physics simulation is using GPU."""
-        world_config = dexsim.get_world_config()
-        return self.device.type == "cuda" and world_config.enable_gpu_sim
-
-    @property
-    def is_rt_enabled(self) -> bool:
-        """Check if Ray Tracing rendering backend is enabled."""
-        return self.sim_config.enable_rt
+        return self.device.type == "cuda"
 
     @property
     def is_physics_manually_update(self) -> bool:
@@ -395,11 +455,10 @@ def _convert_sim_config(
         world_config.length_tolerance = sim_config.physics_config.length_tolerance
         world_config.speed_tolerance = sim_config.physics_config.speed_tolerance
 
-        if sim_config.enable_rt:
-            world_config.renderer = dexsim.types.Renderer.FASTRT
-            if sim_config.enable_denoiser is False:
-                world_config.raytrace_config.spp = sim_config.spp
-                world_config.raytrace_config.open_denoise = False
+        world_config.renderer = sim_config.render_cfg.to_dexsim_flags()
+        if sim_config.render_cfg.enable_denoiser is False:
+            world_config.raytrace_config.spp = sim_config.render_cfg.spp
+            world_config.raytrace_config.open_denoise = False
 
         if type(sim_config.sim_device) is str:
             self.device = torch.device(sim_config.sim_device)
@@ -458,28 +517,6 @@ def init_gpu_physics(self) -> None:
         if self._is_initialized_gpu_physics:
             return
 
-        # init rigid body.
-        rigid_body_num = (
-            0
-            if self._get_non_static_rigid_obj_num() == 0
-            else len(self._ps.get_gpu_rigid_indices())
-        )
-        self._rigid_body_pose = torch.zeros(
-            (rigid_body_num, 7), dtype=torch.float32, device=self.device
-        )
-
-        # init articulation.
-        articulation_num = (
-            0
-            if len(self._articulations) == 0 and len(self._robots) == 0
-            else len(self._ps.get_gpu_articulation_indices())
-        )
-        max_link_count = self._ps.gpu_get_articulation_max_link_count()
-        self._link_pose = torch.zeros(
-            (articulation_num, max_link_count, 7),
-            dtype=torch.float32,
-            device=self.device,
-        )
         for art in self._articulations.values():
             art.reallocate_body_data()
         for robot in self._robots.values():
@@ -498,12 +535,7 @@ def render_camera_group(self, group_ids: list[int]) -> None:
         Note: This interface is only valid when Ray Tracing rendering backend is enabled.
         """
 
-        if self.is_rt_enabled:
-            self._world.render_camera_group(group_ids)
-        else:
-            logger.log_warning(
-                "This interface is only valid when Ray Tracing rendering backend is enabled."
-            )
+        self._world.render_camera_group(group_ids)
 
     def update(self, physics_dt: float | None = None, step: int = 10) -> None:
         """Update the physics.
@@ -524,43 +556,9 @@ def update(self, physics_dt: float | None = None, step: int = 10) -> None:
             for i in range(step):
                 self._world.update(physics_dt)
 
-            if self.sim_config.enable_rt is False:
-                self._sync_gpu_data()
-
         else:
             logger.log_warning("Physics simulation is not manually updated.")
 
-    def _sync_gpu_data(self) -> None:
-        if not self.is_use_gpu_physics:
-            return
-
-        if not self._is_initialized_gpu_physics:
-            logger.log_warning(
-                "GPU physics is not initialized. Skipping GPU data synchronization."
-            )
-            return
-
-        if self.is_window_opened or self._sensors:
-            if len(self._rigid_body_pose) > 0:
-                self._ps.gpu_fetch_rigid_body_data(
-                    data=CudaArray(self._rigid_body_pose),
-                    gpu_indices=self._ps.get_gpu_rigid_indices(),
-                    data_type=RigidBodyGPUAPIReadType.POSE,
-                )
-
-            if len(self._link_pose) > 0:
-                self._ps.gpu_fetch_link_data(
-                    data=CudaArray(self._link_pose),
-                    gpu_indices=self._ps.get_gpu_articulation_indices(),
-                    data_type=ArticulationGPUAPIReadType.LINK_GLOBAL_POSE,
-                )
-
-            # TODO: might be optimized.
-            self._world.sync_poses_gpu_to_cpu(
-                rigid_pose=CudaArray(self._rigid_body_pose),
-                link_pose=CudaArray(self._link_pose),
-            )
-
     def get_env(self, arena_index: int = -1) -> dexsim.environment.Arena:
         """Get the arena or env by index.
 
@@ -589,12 +587,30 @@ def open_window(self) -> None:
         """Open the simulation window."""
         self._world.open_window()
         self._window = self._world.get_windows()
-        self._register_default_window_control()
+
+        # TODO: will open these features after fix the related blocking issues.
+        # self._register_default_window_control()
+        # if (
+        #     self._window_record_hotkey_cfg is not None
+        #     and self._window_record_input_control is None
+        # ):
+        #     self.enable_window_record_hotkey(**self._window_record_hotkey_cfg)
+        if (
+            self._window_capture_hotkey_cfg is not None
+            and self._window_capture_input_control is None
+        ):
+            self.enable_window_capture_hotkey(**self._window_capture_hotkey_cfg)
         self.is_window_opened = True
 
     def close_window(self) -> None:
         """Close the simulation window."""
+        if self.is_window_recording():
+            self.stop_window_record()
+        self._stop_window_capture_async_loop()
         self._world.close_window()
+        self._window = None
+        self._window_record_input_control = None
+        self._window_capture_input_control = None
         self.is_window_opened = False
 
     def _build_multiple_arenas(self, num: int, space: float | None = None) -> None:
@@ -662,6 +678,7 @@ def _create_default_plane(self):
         plane_collision = self._env.create_cube(
             default_length, default_length, default_length / 10
         )
+        plane_collision.set_visible(False)
         plane_collision_pose = np.eye(4, dtype=float)
         plane_collision_pose[2, 3] = -default_length / 20 - 0.001
         plane_collision.set_local_pose(plane_collision_pose)
@@ -682,17 +699,26 @@ def set_default_background(self) -> None:
                 uid=mat_name,
                 base_color_texture=color_texture,
                 roughness_texture=roughness_texture,
+                roughness=0.7,
             )
         )
 
-        if self.sim_config.enable_rt:
-            self.set_emission_light([1.0, 1.0, 1.0], 80.0)
-        else:
-            self.set_indirect_lighting("lab_day")
+        self.set_emission_light([1.0, 1.0, 1.0], 120.0)
 
         self._default_plane.set_material(mat.get_instance("plane_mat").mat)
         self._visual_materials[mat_name] = mat
 
+    def set_ground_plane_visibility(self, visible: bool) -> None:
+        """_summary_
+
+        Args:
+            visible (bool): _description_
+        """
+        if visible:
+            self._default_plane.set_visible(True)
+        else:
+            self._default_plane.set_visible(False)
+
     def set_texture_cache(
         self, key: str, texture: Union[torch.Tensor, List[torch.Tensor]]
     ) -> None:
@@ -1064,17 +1090,20 @@ def arena_offsets(self) -> torch.Tensor:
             )
         return arena_offsets
 
-    def _get_non_static_rigid_obj_num(self) -> int:
-        """Get the number of non-static rigid objects in the scene.
+    def has_non_static_rigid_object(self) -> bool:
+        """Check if there is any non-static rigid object in the simulation.
 
         Returns:
-            int: The number of non-static rigid objects.
+            bool: True if there is at least one non-static rigid object, False otherwise.
         """
-        count = 0
-        for obj in self._rigid_objects.values():
-            if obj.cfg.body_type != "static":
-                count += 1
-        return count
+        for rigid_obj in self._rigid_objects.values():
+            if rigid_obj.body_type != "static":
+                return True
+
+        if len(self._rigid_object_groups) > 0:
+            return True
+
+        return False
 
     def add_articulation(
         self,
@@ -1105,7 +1134,9 @@ def add_articulation(
             if len(env_list) > 1:
                 logger.log_error(f"Currently not supporting multiple arenas for USD.")
             env = self._env
-            results = env.import_from_usd_file(cfg.fpath, return_object=True)
+            results = env.import_from_usd_file(
+                cfg.fpath, return_object=True, cache_dir=self._convex_decomp_dir
+            )
             # print("USD import results:", results)
 
             articulations_found = []
@@ -1558,6 +1589,13 @@ def draw_marker(
             return False
 
         draw_xpos = deepcopy(cfg.axis_xpos)
+        if isinstance(draw_xpos, torch.Tensor):
+            draw_xpos = draw_xpos.detach().cpu().numpy()
+        elif isinstance(draw_xpos, (list, tuple)):
+            draw_xpos = [
+                item.detach().cpu().numpy() if isinstance(item, torch.Tensor) else item
+                for item in draw_xpos
+            ]
         draw_xpos = np.array(draw_xpos)
         if draw_xpos.ndim == 2:
             if draw_xpos.shape == (4, 4):
@@ -1657,11 +1695,6 @@ def _register_default_window_control(self) -> None:
         """Register default window controls for better simulation interaction."""
         from dexsim.types import InputKey
 
-        # TODO: window control has stucking issue with extra sensor under Raster renderer backend.
-        # Will be fixed in next dexsim release.
-        if self.is_rt_enabled is False:
-            return
-
         if self._is_registered_window_control:
             return
 
@@ -1699,6 +1732,547 @@ def add_custom_window_control(self, controls: list[ObjectManipulator]) -> None:
         for control in controls:
             self._window.add_input_control(control)
 
+    def _build_window_record_output(
+        self, save_path: str | None, video_prefix: str
+    ) -> tuple[str, str]:
+        """Resolve the output directory and file name for viewer recording."""
+        if save_path is None:
+            output_dir = os.path.join(os.getcwd(), "outputs", "videos")
+            timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+            video_name = f"{video_prefix}_{timestamp}"
+        else:
+            output_dir = os.path.dirname(save_path) or os.getcwd()
+            video_name = Path(os.path.basename(save_path)).stem
+        return output_dir, video_name
+
+    def _build_window_capture_output(
+        self, save_path: str | None, image_prefix: str
+    ) -> str:
+        """Resolve the output image path for a single-frame window capture."""
+        if save_path is not None:
+            return save_path
+        output_dir = os.path.join(os.getcwd(), "outputs", "window_capture")
+        timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+        return os.path.join(output_dir, f"{image_prefix}_{timestamp}.png")
+
+    def is_window_recording(self) -> bool:
+        """Check whether the viewer window is currently recording."""
+        return self._window_record_state is not None
+
+    def _get_window_capture_camera(self, width: int, height: int) -> object:
+        """Get or create the hidden camera used for window capture."""
+        camera_size = (width, height)
+        if (
+            self._window_capture_camera is None
+            or self._window_capture_camera_size != camera_size
+        ):
+            # TODO: Will change ViewFlags to ViewFlags.COLOR in the future.
+            camera_name = f"window_capture_camera_{self.instance_id}"
+            self._window_capture_camera = self._env.create_camera(
+                camera_name, width, height, True, ViewFlags.ALL
+            )
+            self._window_capture_camera_size = camera_size
+        return self._window_capture_camera
+
+    def _capture_rgb_from_camera(
+        self, record_camera: object, camera_pose: np.ndarray
+    ) -> np.ndarray | None:
+        """Render an RGB frame from a hidden DexSim camera."""
+        if hasattr(record_camera, "is_open") and record_camera.is_open() is False:
+            record_camera.open_camera()
+
+        record_camera.set_world_pose(camera_pose)
+        record_camera.render()
+        rgb = np.asarray(record_camera.get_rgb_map())
+        if rgb.size == 0:
+            return None
+        return np.ascontiguousarray(rgb[..., :3])
+
+    def _resolve_window_capture_pose(
+        self,
+        camera_pose: np.ndarray | torch.Tensor | None,
+        eye: Sequence[float] | None,
+        target: Sequence[float] | None,
+        up: Sequence[float] | None,
+        use_window_pose: bool,
+    ) -> np.ndarray:
+        """Resolve the camera pose for single-frame window capture."""
+        if camera_pose is not None:
+            if isinstance(camera_pose, torch.Tensor):
+                return camera_pose.detach().cpu().numpy().astype(np.float32)
+            return np.asarray(camera_pose, dtype=np.float32)
+
+        if use_window_pose and self._window is not None:
+            return np.asarray(self._window.get_pose_matrix(), dtype=np.float32)
+
+        if eye is None:
+            eye = (2.5, -3.0, 2.0)
+        if target is None:
+            target = (0.0, 0.0, 0.5)
+        if up is None:
+            up = (0.0, 0.0, 1.0)
+
+        pose = look_at_to_pose(eye, target, up)
+        pose[:, :3, 1] = -pose[:, :3, 1]
+        pose[:, :3, 2] = -pose[:, :3, 2]
+        return pose.squeeze(0).cpu().numpy().astype(np.float32)
+
+    def _render_window_capture_frame(
+        self,
+        width: int | None = None,
+        height: int | None = None,
+        camera_pose: np.ndarray | torch.Tensor | None = None,
+        eye: Sequence[float] | None = None,
+        target: Sequence[float] | None = None,
+        up: Sequence[float] | None = None,
+        use_window_pose: bool = True,
+    ) -> np.ndarray | None:
+        """Render a single RGB frame from the hidden window capture camera."""
+        width = self.sim_config.width if width is None else width
+        height = self.sim_config.height if height is None else height
+        record_camera = self._get_window_capture_camera(width, height)
+        resolved_pose = self._resolve_window_capture_pose(
+            camera_pose=camera_pose,
+            eye=eye,
+            target=target,
+            up=up,
+            use_window_pose=use_window_pose,
+        )
+        return self._capture_rgb_from_camera(record_camera, resolved_pose)
+
+    @staticmethod
+    def _save_window_capture_frame(frame: np.ndarray, save_path: str) -> None:
+        """Save a captured RGB frame to disk."""
+        from PIL import Image
+
+        output_dir = os.path.dirname(save_path)
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+        Image.fromarray(frame).save(save_path)
+
+    def _save_window_capture_worker(self, frame: np.ndarray, save_path: str) -> None:
+        """Save a captured frame in a background thread."""
+        try:
+            self._save_window_capture_frame(frame, save_path)
+            logger.log_info(f"Window capture saved to {save_path}")
+        except Exception as exc:
+            logger.log_error(f"Failed to save window capture: {exc}")
+
+    def capture_window(
+        self,
+        save_path: str | None = None,
+        width: int | None = None,
+        height: int | None = None,
+        camera_pose: np.ndarray | torch.Tensor | None = None,
+        eye: Sequence[float] | None = None,
+        target: Sequence[float] | None = None,
+        up: Sequence[float] | None = None,
+        use_window_pose: bool = True,
+    ) -> np.ndarray | None:
+        """Capture a single RGB frame using a hidden render camera.
+
+        When a viewer window is open, the hidden camera follows the current window
+        pose by default. In headless mode, pass ``camera_pose`` or use the
+        ``eye``/``target``/``up`` look-at parameters to render without opening a
+        window.
+
+        Args:
+            save_path: Optional image path. Parent directories are created.
+            width: Capture width. Defaults to the simulation window width.
+            height: Capture height. Defaults to the simulation window height.
+            camera_pose: Optional 4x4 world pose for the hidden camera.
+            eye: Optional look-at camera position used when no window pose exists.
+            target: Optional look-at target used when no window pose exists.
+            up: Optional look-at up vector used when no window pose exists.
+            use_window_pose: Whether to follow the current viewer pose when a
+                window is open and ``camera_pose`` is not provided.
+
+        Returns:
+            Captured RGB frame with shape ``(height, width, 3)``, or ``None`` if
+            rendering did not produce an image.
+        """
+        frame = self._render_window_capture_frame(
+            width=width,
+            height=height,
+            camera_pose=camera_pose,
+            eye=eye,
+            target=target,
+            up=up,
+            use_window_pose=use_window_pose,
+        )
+        if frame is None:
+            logger.log_warning("Window capture did not produce an RGB frame.")
+            return None
+
+        if save_path is not None:
+            self._save_window_capture_frame(frame, save_path)
+            logger.log_info(f"Window capture saved to {save_path}")
+
+        return frame
+
+    def _ensure_window_capture_async_loop(self) -> None:
+        """Start the render-thread loop that processes pending capture requests."""
+        if self._window_capture_async_state is not None:
+            return
+
+        state = _WindowCaptureAsyncState()
+
+        def _window_capture_loop(_: float) -> int:
+            return self._step_window_capture_async(state)
+
+        state.loop_handle = self._world.thread_rt().add_loop(
+            _window_capture_loop, 1.0 / 60.0
+        )
+        self._window_capture_async_state = state
+
+    def _stop_window_capture_async_loop(self) -> None:
+        """Stop the async window capture loop and clear pending requests."""
+        if self._window_capture_async_state is None:
+            return
+        state = self._window_capture_async_state
+        state.task_status = TASK_RETURN.TASK_EXIT
+        state.pending = None
+        self._window_capture_async_state = None
+
+    def _step_window_capture_async(self, state: _WindowCaptureAsyncState) -> int:
+        """Process a pending capture request on the render thread."""
+        if state.task_status != TASK_RETURN.TASK_LOOP:
+            return state.task_status
+        if state.pending is None:
+            return state.task_status
+
+        request = state.pending
+        state.pending = None
+        frame = self._render_window_capture_frame(
+            width=request.width,
+            height=request.height,
+            camera_pose=request.camera_pose,
+            eye=request.eye,
+            target=request.target,
+            up=request.up,
+            use_window_pose=request.use_window_pose,
+        )
+        if frame is None:
+            logger.log_warning("Window capture did not produce an RGB frame.")
+            return state.task_status
+
+        save_path = request.save_path
+        if save_path is None:
+            return state.task_status
+
+        self._window_capture_save_threads = [
+            thread for thread in self._window_capture_save_threads if thread.is_alive()
+        ]
+        save_thread = threading.Thread(
+            target=self._save_window_capture_worker,
+            args=(frame.copy(), save_path),
+            daemon=False,
+        )
+        save_thread.start()
+        self._window_capture_save_threads.append(save_thread)
+        return state.task_status
+
+    def request_window_capture_async(
+        self,
+        save_path: str | None = None,
+        width: int | None = None,
+        height: int | None = None,
+        camera_pose: np.ndarray | torch.Tensor | None = None,
+        eye: Sequence[float] | None = None,
+        target: Sequence[float] | None = None,
+        up: Sequence[float] | None = None,
+        use_window_pose: bool = True,
+        image_prefix: str = "window_capture",
+    ) -> bool:
+        """Queue a single-frame window capture on the render thread.
+
+        The capture uses the current viewer pose when a window is open. Saving to
+        disk is performed in a background thread so the UI loop is not blocked.
+
+        Args:
+            save_path: Optional image path. If None, a timestamped path is used.
+            width: Capture width. Defaults to the simulation window width.
+            height: Capture height. Defaults to the simulation window height.
+            camera_pose: Optional 4x4 world pose for the hidden camera.
+            eye: Optional look-at camera position used when no window pose exists.
+            target: Optional look-at target used when no window pose exists.
+            up: Optional look-at up vector used when no window pose exists.
+            use_window_pose: Whether to follow the current viewer pose when a
+                window is open and ``camera_pose`` is not provided.
+            image_prefix: Image file prefix when ``save_path`` is None.
+
+        Returns:
+            True if the capture request was queued, False otherwise.
+        """
+        if self._window is None:
+            logger.log_warning(
+                "No simulation window available for async window capture."
+            )
+            return False
+
+        resolved_save_path = self._build_window_capture_output(save_path, image_prefix)
+        self._ensure_window_capture_async_loop()
+        if self._window_capture_async_state is None:
+            return False
+
+        self._window_capture_async_state.pending = _WindowCaptureRequest(
+            save_path=resolved_save_path,
+            width=width,
+            height=height,
+            camera_pose=camera_pose,
+            eye=eye,
+            target=target,
+            up=up,
+            use_window_pose=use_window_pose,
+        )
+        logger.log_info(
+            f"Window capture queued. It will be saved to {resolved_save_path}."
+        )
+        return True
+
+    def enable_window_capture_hotkey(
+        self,
+        save_path: str | None = None,
+        image_prefix: str = "window_capture",
+        use_window_pose: bool = True,
+    ) -> bool:
+        """Register the ``p`` key to queue a single-frame window capture."""
+        self._window_capture_hotkey_cfg = {
+            "save_path": save_path,
+            "image_prefix": image_prefix,
+            "use_window_pose": use_window_pose,
+        }
+        if self._window is None:
+            logger.log_warning(
+                "No simulation window available yet. The window capture hotkey "
+                "will be registered after `open_window()`."
+            )
+            return False
+        if self._window_capture_input_control is not None:
+            self._ensure_window_capture_async_loop()
+            return True
+
+        from dexsim.types import InputKey
+
+        sim = self
+        hotkey_cfg = dict(self._window_capture_hotkey_cfg)
+
+        class WindowCaptureEvent(ObjectManipulator):
+            def on_key_down(self, key):
+                if key == InputKey.SCANCODE_P.value:
+                    sim.request_window_capture_async(**hotkey_cfg)
+
+        self._window_capture_input_control = WindowCaptureEvent()
+        self._window.add_input_control(self._window_capture_input_control)
+        self._ensure_window_capture_async_loop()
+        logger.log_info(
+            "Window capture hotkey registered. Press 'p' to capture the current view."
+        )
+        return True
+
+    def _step_window_record(self, state: _WindowRecordState) -> int:
+        """Capture frames in the render thread without blocking the UI loop."""
+        if state.task_status != TASK_RETURN.TASK_LOOP:
+            return state.task_status
+
+        now = time.time()
+        if now - state.last_capture_time < state.time_step:
+            return state.task_status
+
+        state.last_capture_time = now
+        frame: np.ndarray | None = None
+        if self._window is not None and state.record_camera is not None:
+            pose = np.asarray(self._window.get_pose_matrix(), dtype=np.float32)
+            frame = self._capture_rgb_from_camera(state.record_camera, pose)
+        if frame is None:
+            return state.task_status
+
+        state.frames.append(frame)
+        state.current_memory_bytes += frame.nbytes
+        if state.current_memory_bytes > state.max_memory_bytes:
+            logger.log_warning(
+                "Viewer recording exceeded the configured memory budget. "
+                "Press 'r' again to flush the buffered frames to disk."
+            )
+            state.task_status = TASK_RETURN.TASK_EXIT
+
+        return state.task_status
+
+    def _save_window_record_worker(
+        self,
+        frames: list[np.ndarray],
+        output_dir: str,
+        video_name: str,
+        save_kwargs: dict[str, object],
+    ) -> None:
+        """Encode buffered frames into a video file in a background thread."""
+        from dexsim.utility import images_to_video
+
+        try:
+            os.makedirs(output_dir, exist_ok=True)
+            images_to_video(
+                images=frames,
+                output_dir=output_dir,
+                video_name=video_name,
+                **save_kwargs,
+            )
+            logger.log_info(
+                f"Viewer recording saved to {os.path.join(output_dir, video_name + '.mp4')}"
+            )
+        except Exception as exc:
+            logger.log_error(f"Failed to save viewer recording: {exc}")
+
+    def start_window_record(
+        self,
+        save_path: str | None = None,
+        fps: int = 20,
+        max_memory: int = 1024,
+        video_prefix: str = "viewer_record",
+    ) -> bool:
+        """Start asynchronously recording the viewer by buffering frames from a hidden camera
+        that follows the live window camera pose.
+        """
+        if self._window is None:
+            logger.log_warning("No simulation window available for viewer recording.")
+            return False
+        width = self.sim_config.width
+        height = self.sim_config.height
+        if self._window_record_camera is None:
+            camera_name = f"viewer_record_camera_{self.instance_id}"
+            self._window_record_camera = self._env.create_camera(
+                camera_name, width, height, True, ViewFlags.ALL
+            )
+        record_camera = self._window_record_camera
+        if hasattr(record_camera, "is_open") and record_camera.is_open() is False:
+            record_camera.open_camera()
+
+        time_step = 1.0 / float(fps)
+        output_dir, video_name = self._build_window_record_output(
+            save_path, video_prefix
+        )
+        state = _WindowRecordState(
+            time_step=time_step,
+            max_memory_bytes=max_memory * 1024 * 1024,
+            output_dir=output_dir,
+            video_name=video_name,
+            save_kwargs={"fps": fps},
+            record_camera=record_camera,
+            last_capture_time=time.time() - time_step,
+        )
+
+        def _window_record_loop(_: float) -> int:
+            return self._step_window_record(state)
+
+        state.loop_handle = self._world.thread_rt().add_loop(
+            _window_record_loop, time_step
+        )
+        self._window_record_state = state
+
+        logger.log_info(
+            f"Viewer recording started. Press 'r' again to stop and save to "
+            f"{os.path.join(output_dir, video_name + '.mp4')}"
+        )
+        return True
+
+    def stop_window_record(self, save_path: str | None = None) -> bool:
+        """Stop the active viewer recording and save frames in the background."""
+        if self._window_record_state is None:
+            logger.log_warning("No active viewer recording session found.")
+            return False
+
+        state = self._window_record_state
+        state.task_status = TASK_RETURN.TASK_EXIT
+        if save_path is not None:
+            output_dir, video_name = self._build_window_record_output(
+                save_path, "viewer_record"
+            )
+        else:
+            output_dir, video_name = state.output_dir, state.video_name
+
+        if state.record_camera is not None and hasattr(state.record_camera, "is_open"):
+            if state.record_camera.is_open():
+                state.record_camera.close_camera()
+
+        frames = list(state.frames)
+        self._window_record_state = None
+        if len(frames) == 0:
+            logger.log_warning(
+                "Viewer recording stopped, but no frames were captured. Skipping video export."
+            )
+            return False
+
+        self._window_record_save_threads = [
+            thread for thread in self._window_record_save_threads if thread.is_alive()
+        ]
+        save_thread = threading.Thread(
+            target=self._save_window_record_worker,
+            args=(frames, output_dir, video_name, dict(state.save_kwargs)),
+            daemon=False,
+        )
+        save_thread.start()
+        self._window_record_save_threads.append(save_thread)
+        logger.log_info(
+            "Viewer recording stopped. Saving video to "
+            f"{os.path.join(output_dir, video_name + '.mp4')} in background."
+        )
+        return True
+
+    def toggle_window_record(
+        self,
+        save_path: str | None = None,
+        fps: int = 20,
+        max_memory: int = 1024,
+        video_prefix: str = "viewer_record",
+    ) -> bool:
+        """Toggle viewer recording on or off."""
+        if self.is_window_recording():
+            return self.stop_window_record(save_path=save_path)
+        return self.start_window_record(
+            save_path=save_path,
+            fps=fps,
+            max_memory=max_memory,
+            video_prefix=video_prefix,
+        )
+
+    def enable_window_record_hotkey(
+        self,
+        save_path: str | None = None,
+        fps: int = 20,
+        max_memory: int = 1024,
+        video_prefix: str = "viewer_record",
+    ) -> bool:
+        """Register the ``r`` key to start/stop viewer recording."""
+        self._window_record_hotkey_cfg = {
+            "save_path": save_path,
+            "fps": fps,
+            "max_memory": max_memory,
+            "video_prefix": video_prefix,
+        }
+        if self._window is None:
+            logger.log_warning(
+                "No simulation window available yet. The viewer record hotkey will be registered after `open_window()`."
+            )
+            return False
+        if self._window_record_input_control is not None:
+            return True
+
+        from dexsim.types import InputKey
+
+        sim = self
+        hotkey_cfg = dict(self._window_record_hotkey_cfg)
+
+        class WindowRecordEvent(ObjectManipulator):
+            def on_key_down(self, key):
+                if key == InputKey.SCANCODE_R.value:
+                    sim.toggle_window_record(**hotkey_cfg)
+
+        self._window_record_input_control = WindowRecordEvent()
+        self._window.add_input_control(self._window_record_input_control)
+        logger.log_info(
+            "Viewer record hotkey registered. Press 'r' to start/stop recording."
+        )
+        return True
+
     def create_visual_material(self, cfg: VisualMaterialCfg) -> VisualMaterial:
         """Create a visual material with given configuration.
 
@@ -1735,7 +2309,8 @@ def get_visual_material(self, uid: str) -> VisualMaterial:
 
     def clean_materials(self):
         self._visual_materials = {}
-        self._env.clean_materials()
+        if self._env:
+            self._env.clean_materials()
 
     def reset_objects_state(
         self,
@@ -1785,15 +2360,136 @@ def export_usd(self, fpath: str) -> bool:
             logger.log_error(f"Failed to export simulation scene to USD: {e}")
             return False
 
+    @staticmethod
+    def wait_scene_destruction(timeout_ms: int = 10000) -> None:
+        """A public helper to wait for the underlying C++ scenes (dexsim.World) to destruct completely."""
+        import dexsim
+        import gc
+
+        # Force garbage collection to break cycle references
+        gc.collect()
+
+        import time
+
+        wait_times = 0
+        scene_count = dexsim.get_world_num()
+        max_loops = timeout_ms // 10
+        while scene_count > 0 and wait_times < max_loops:
+            time.sleep(0.01)
+            scene_count = dexsim.get_world_num()
+            wait_times += 1
+            if wait_times % 50 == 0:
+                from embodichain.utils import logger
+
+                logger.log_info(
+                    f"Waiting for dexsim.World scenes to destruct. Remaining scenes: {scene_count}"
+                )
+        if scene_count > 0:
+            from embodichain.utils import logger
+
+            logger.log_warning(
+                f"Scene destruction wait timeout, {scene_count} C++ scene(s) still alive!"
+            )
+
     def destroy(self) -> None:
+        """
+        No longer destructs C++ objects in place due to lingering deep local variables;
+        instead, packages itself into a destruction task, submits to the cleanup queue,
+        and waits for top-level delayed consumption.
+        """
+        self._is_pending_kill = True
+
+        # Transfer the actual destruction logic to the cleanup queue
+        SimulationManager._cleanup_queue.put(self._deferred_destroy)
+
+    def _deferred_destroy(self) -> None:
         """Destroy all simulated assets and release resources."""
         # Clean up all gizmos before destroying the simulation
         for uid in list(self._gizmos.keys()):
             self.disable_gizmo(uid)
 
+        import sys, gc
+
         self.clean_materials()
 
-        self._env.clean()
-        self._world.quit()
+        if self._env:
+            self._env.clean()
+        if self._world:
+            self._world.quit()
+
+        # REMOVE INSTANCE FROM POOL
+        instance_id = getattr(self, "instance_id", 0)
+        SimulationManager.reset(instance_id)
+
+        # Helper to aggressively decouple C++ wrapped objects
+        def _sever_wrapper_refs(obj_registry):
+            if not hasattr(self, obj_registry):
+                return
+            registry = getattr(self, obj_registry)
+            if not isinstance(registry, dict):
+                return
+            for uid, obj in registry.items():
+                if hasattr(obj, "_world"):
+                    obj._world = None
+                if hasattr(obj, "_ps"):
+                    obj._ps = None
+                if hasattr(obj, "_env"):
+                    obj._env = None
+                if hasattr(obj, "_entities"):
+                    obj._entities = []
+            registry.clear()
+
+        _sever_wrapper_refs("_gizmos")
+        _sever_wrapper_refs("_markers")
+        _sever_wrapper_refs("_rigid_objects")
+        _sever_wrapper_refs("_rigid_object_groups")
+        _sever_wrapper_refs("_soft_objects")
+        _sever_wrapper_refs("_cloth_objects")
+        _sever_wrapper_refs("_articulations")
+        _sever_wrapper_refs("_robots")
+        _sever_wrapper_refs("_sensors")
+        _sever_wrapper_refs("_lights")
+
+        # Explicitly clear Python references to trigger C++ object destructors
+        self._ps = None
+        self._env = None
+        self._world = None
+        self._default_plane = None
+
+        # Try to break ANY possible frame cycle
+        gc.collect()
+
+        self._visual_materials.clear()
+        self._texture_cache.clear()
+        self._arenas.clear()
+        self._markers.clear()
+        self._gizmos.clear()
 
         SimulationManager.reset(self.instance_id)
+
+        # Forcefully drop underlying C++ object wrappers
+        self._env = None
+        self._world = None
+
+        gc.collect()
+
+    @staticmethod
+    def flush_cleanup_queue():
+        """Dequeue executor and synchronization barrier provided for top-level main loop / Pytest Fixture calls"""
+        import gc
+
+        while not SimulationManager._cleanup_queue.empty():
+            task = SimulationManager._cleanup_queue.get_nowait()
+            try:
+                task()
+            except Exception as e:
+                from embodichain.utils import logger
+
+                logger.log_error(f"Error during delayed destruction: {e}")
+                pass
+
+        # After the queue is emptied, perform a top-level full GC to thoroughly reclaim dead objects that haven't released their RefPtrs yet
+        gc.collect()
+
+        # At this point, wait for the C++ Scene to return to zero, since the stack is at the top level, there will definitely be no deadlock
+        SimulationManager.wait_scene_destruction()
diff --git a/embodichain/lab/sim/solvers/base_solver.py b/embodichain/lab/sim/solvers/base_solver.py
index 143e3a89..c7fc70f2 100644
--- a/embodichain/lab/sim/solvers/base_solver.py
+++ b/embodichain/lab/sim/solvers/base_solver.py
@@ -72,6 +72,13 @@ class SolverCfg:
     when multiple solutions are available.
     """
 
+    user_qpos_limits: List[float] | None = None
+    """
+        User defined Joint position limits [2, DOF] for the solver. 
+        If not provided (None), this value will replace by joint limits defined in urdf when solver init from robot.
+        If provided, the solver will use the intersection of user defined limits and urdf limits as the final joint limits.
+    """
+
     @abstractmethod
     def init_solver(self, device: torch.device, **kwargs) -> "BaseSolver":
         pass
@@ -165,6 +172,14 @@ def __init__(self, cfg: SolverCfg = None, device: str = None, **kwargs):
                 device=self.device,
             )
 
+            self.compiled_fk = torch.compile(
+                self.pk_serial_chain.forward_kinematics_tensor,
+                fullgraph=True,
+                dynamic=True,
+            )
+
+        self._init_qpos_limits()
+
     def set_ik_nearest_weight(
         self, ik_weight: np.ndarray, joint_ids: np.ndarray | None = None
     ) -> bool:
@@ -223,51 +238,126 @@ def get_ik_nearest_weight(self):
         """
         return self.ik_nearest_weight
 
-    def set_position_limits(
+    def _init_qpos_limits(self):
+        self.lower_qpos_limits = None
+        self.upper_qpos_limits = None
+        if self.cfg.user_qpos_limits is not None:
+            # robot qpos limits from config, expected shape [DOF, 2]
+            user_qpos_limits = torch.tensor(
+                self.cfg.user_qpos_limits, dtype=torch.float32, device=self.device
+            )
+            if user_qpos_limits.shape == (2, self.dof):
+                self.set_qpos_limits(
+                    lower_qpos_limits=user_qpos_limits[0],
+                    upper_qpos_limits=user_qpos_limits[1],
+                )
+            elif user_qpos_limits.shape == (self.dof, 2):
+                self.set_qpos_limits(
+                    lower_qpos_limits=user_qpos_limits[:, 0],
+                    upper_qpos_limits=user_qpos_limits[:, 1],
+                )
+            else:
+                logger.log_error(
+                    f"user_qpos_limits must have shape (2, {self.dof}) or ({self.dof}, 2), but got {user_qpos_limits.shape}."
+                )
+        elif self.pk_serial_chain is not None:
+            self.set_qpos_limits(
+                lower_qpos_limits=self.pk_serial_chain.low,
+                upper_qpos_limits=self.pk_serial_chain.high,
+            )
+
+    def update_with_robot_limit(self, robot_qpos_limits: torch.Tensor):
+        """Update with robot joint limits.
+            Make sure the solver's joint limits are within the robot's joint limits.
+
+        Args:
+            robot_qpos_limits (torch.Tensor): [DOF, 2] tensor of joint limits from the robot data
+        """
+        robot_lower_limits = robot_qpos_limits[:, 0]
+        robot_upper_limits = robot_qpos_limits[:, 1]
+        if self.lower_qpos_limits is not None:
+            if torch.any(self.lower_qpos_limits < robot_lower_limits):
+                logger.log_warning(
+                    "Solver lower_qpos_limits are smaller than robot limits. Clamping to robot limits."
+                )
+                self.lower_qpos_limits = torch.max(
+                    self.lower_qpos_limits, robot_lower_limits
+                )
+        else:
+            self.lower_qpos_limits = robot_lower_limits
+        if self.upper_qpos_limits is not None:
+            if torch.any(self.upper_qpos_limits > robot_upper_limits):
+                logger.log_warning(
+                    "Solver upper_qpos_limits are larger than robot limits. Clamping to robot limits."
+                )
+                self.upper_qpos_limits = torch.min(
+                    self.upper_qpos_limits, robot_upper_limits
+                )
+        else:
+            self.upper_qpos_limits = robot_upper_limits
+
+    def set_qpos_limits(
         self,
-        lower_position_limits: List[float],
-        upper_position_limits: List[float],
+        lower_qpos_limits: List[float],
+        upper_qpos_limits: List[float],
     ) -> bool:
         r"""Sets the upper and lower joint position limits.
 
         Parameters:
-            lower_position_limits (List[float]): A list of lower limits for each joint.
-            upper_position_limits (List[float]): A list of upper limits for each joint.
+            lower_qpos_limits (List[float]): A list of lower limits for each joint.
+            upper_qpos_limits (List[float]): A list of upper limits for each joint.
 
         Returns:
             bool: True if limits are successfully set, False if the input is invalid.
         """
-        if (
-            len(lower_position_limits) != self.model.nq
-            or len(upper_position_limits) != self.model.nq
-        ):
-            logger.log_warning("Length of limits must match the number of joints.")
-            return False
 
         if any(
-            lower > upper
-            for lower, upper in zip(lower_position_limits, upper_position_limits)
+            lower > upper for lower, upper in zip(lower_qpos_limits, upper_qpos_limits)
         ):
             logger.log_warning(
                 "Each lower limit must be less than or equal to the corresponding upper limit."
             )
             return False
 
-        self.lower_position_limits = np.array(lower_position_limits)
-        self.upper_position_limits = np.array(upper_position_limits)
+        if isinstance(lower_qpos_limits, list) or isinstance(
+            lower_qpos_limits, np.ndarray
+        ):
+            self.lower_qpos_limits = torch.tensor(
+                lower_qpos_limits, dtype=float, device=self.device
+            )
+        elif isinstance(lower_qpos_limits, torch.Tensor):
+            self.lower_qpos_limits = lower_qpos_limits.clone().to(device=self.device)
+        else:
+            logger.log_error(
+                f"Invalid type for lower_qpos_limits: {type(lower_qpos_limits)}. Must be list, np.ndarray, or torch.Tensor."
+            )
+
+        if isinstance(upper_qpos_limits, list) or isinstance(
+            upper_qpos_limits, np.ndarray
+        ):
+            self.upper_qpos_limits = torch.tensor(
+                upper_qpos_limits, dtype=float, device=self.device
+            )
+        elif isinstance(upper_qpos_limits, torch.Tensor):
+            self.upper_qpos_limits = upper_qpos_limits.clone().to(device=self.device)
+        else:
+            logger.log_error(
+                f"Invalid type for upper_qpos_limits: {type(upper_qpos_limits)}. Must be list, np.ndarray, or torch.Tensor."
+            )
+
         return True
 
-    def get_position_limits(self) -> dict:
+    def get_qpos_limits(self) -> dict:
         r"""Returns the current joint position limits.
 
         Returns:
             dict: A dictionary containing:
-                - lower_position_limits (List[float]): The current lower limits for each joint.
-                - upper_position_limits (List[float]): The current upper limits for each joint.
+                - lower_qpos_limits (List[float]): The current lower limits for each joint.
+                - upper_qpos_limits (List[float]): The current upper limits for each joint.
         """
         return {
-            "lower_position_limits": self.lower_position_limits.tolist(),
-            "upper_position_limits": self.upper_position_limits.tolist(),
+            "lower_qpos_limits": self.lower_qpos_limits.tolist(),
+            "upper_qpos_limits": self.upper_qpos_limits.tolist(),
         }
 
     def set_tcp(self, xpos: np.ndarray):
@@ -339,35 +429,18 @@ def get_fk(self, qpos: torch.tensor, **kwargs) -> torch.Tensor:
         )
         qpos = torch.as_tensor(qpos, dtype=torch.float32, device=self.device)
 
+        if self.pk_serial_chain is None:
+            logger.log_error("Kinematic chain is not initialized.")
+            return torch.eye(4, device=self.device)
         # Compute forward kinematics
-        result = self.pk_serial_chain.forward_kinematics(
-            qpos, end_only=(self.end_link_name is None)
-        )
-
-        # Extract transformation matrices
-        if isinstance(result, dict):
-            matrices = result[self.end_link_name].get_matrix()
-        elif isinstance(result, list):
-            matrices = torch.stack([xpos.get_matrix().squeeze() for xpos in result])
-        else:
-            matrices = result.get_matrix()
-
-        # Ensure batch format
-        if matrices.dim() == 2:
-            matrices = matrices.unsqueeze(0)
-
-        # Create result tensor with proper homogeneous coordinates
-        result = (
-            torch.eye(4, device=self.device).expand(matrices.shape[0], 4, 4).clone()
-        )
-        result[:, :3, :] = matrices[:, :3, :]
+        ee_link_xpos = self.compiled_fk(qpos)[-1, :, :, :]
 
         # Ensure batch format for TCP
-        batch_size = result.shape[0]
+        batch_size = qpos.shape[0]
         tcp_xpos_batch = tcp_xpos.unsqueeze(0).expand(batch_size, -1, -1)
 
         # Apply TCP transformation
-        return torch.bmm(result, tcp_xpos_batch)
+        return torch.bmm(ee_link_xpos, tcp_xpos_batch)
 
     def get_jacobian(
         self,
diff --git a/embodichain/lab/sim/solvers/differential_solver.py b/embodichain/lab/sim/solvers/differential_solver.py
index fc6e596b..12e51bcb 100644
--- a/embodichain/lab/sim/solvers/differential_solver.py
+++ b/embodichain/lab/sim/solvers/differential_solver.py
@@ -25,7 +25,6 @@
     compute_pose_error,
 )
 
-
 if TYPE_CHECKING:
     from typing import Self
 
diff --git a/embodichain/lab/sim/solvers/opw_solver.py b/embodichain/lab/sim/solvers/opw_solver.py
index 4d8f9047..e3202597 100644
--- a/embodichain/lab/sim/solvers/opw_solver.py
+++ b/embodichain/lab/sim/solvers/opw_solver.py
@@ -29,12 +29,11 @@
     OPWparam,
     opw_fk_kernel,
     opw_ik_kernel,
-    opw_best_ik_kernel,
+    opw_ik_select_kernel,
     wp_vec6f,
 )
 from embodichain.utils.device_utils import standardize_device_string
 
-
 if TYPE_CHECKING:
     from typing import Self
 
@@ -72,6 +71,9 @@ class OPWSolverCfg(SolverCfg):
     # Parameters for the inverse-kinematics method.
     ik_params: dict | None = None
 
+    # safe margin for joint limits, in radians
+    safe_margin: float = 0.0  # 5.0 * np.pi / 180.0
+
     def init_solver(
         self, device: torch.device = torch.device("cpu"), **kwargs
     ) -> "OPWSolver":
@@ -125,7 +127,7 @@ def set_tcp(self, xpos: np.ndarray):
         self._tcp_warp = wp.mat44f(self.tcp_xpos)
         tcp_inv = np.eye(4, dtype=float)
         tcp_inv[:3, :3] = self.tcp_xpos[:3, :3].T
-        tcp_inv[:3, 3] = -tcp_inv[:3, :3].T @ self.tcp_xpos[:3, 3]
+        tcp_inv[:3, 3] = -tcp_inv[:3, :3] @ self.tcp_xpos[:3, 3]
         self._tcp_inv_warp = wp.mat44f(tcp_inv)
 
     def _init_warp_solver(self, cfg: OPWSolverCfg, **kwargs):
@@ -247,23 +249,44 @@ def get_ik_warp(
         N_SOL = 8
         DOF = 6
         n_sample = target_xpos.shape[0]
+        kernel_device = standardize_device_string(self.device)
 
         if target_xpos.shape == (4, 4):
-            target_xpos_batch = target_xpos[None, :, :]
+            target_xpos_batch = target_xpos[None, :, :].to(kernel_device)
         else:
-            target_xpos_batch = target_xpos
+            target_xpos_batch = target_xpos.to(kernel_device)
         target_xpos_wp = wp.from_torch(target_xpos_batch.reshape(-1))
 
         all_qpos_wp = wp.zeros(
             n_sample * N_SOL * DOF,
             dtype=float,
-            device=standardize_device_string(self.device),
+            device=standardize_device_string(kernel_device),
         )
         all_ik_valid_wp = wp.zeros(
-            n_sample * N_SOL, dtype=int, device=standardize_device_string(self.device)
+            n_sample * N_SOL, dtype=int, device=standardize_device_string(kernel_device)
         )
 
         # TODO: whether require gradient
+        offsets_ = self.offsets.to(standardize_device_string(kernel_device))
+        sign_corrections_ = self.sign_corrections.to(
+            standardize_device_string(kernel_device)
+        )
+        lower_limits_ = wp_vec6f(
+            self.lower_qpos_limits[0],
+            self.lower_qpos_limits[1],
+            self.lower_qpos_limits[2],
+            self.lower_qpos_limits[3],
+            self.lower_qpos_limits[4],
+            self.lower_qpos_limits[5],
+        )
+        upper_limits_ = wp_vec6f(
+            self.upper_qpos_limits[0],
+            self.upper_qpos_limits[1],
+            self.upper_qpos_limits[2],
+            self.upper_qpos_limits[3],
+            self.upper_qpos_limits[4],
+            self.upper_qpos_limits[5],
+        )
         wp.launch(
             kernel=opw_ik_kernel,
             dim=(n_sample),
@@ -271,26 +294,42 @@ def get_ik_warp(
                 target_xpos_wp,
                 self._tcp_inv_warp,
                 self.params,
-                self.offsets,
-                self.sign_corrections,
+                offsets_,
+                sign_corrections_,
+                lower_limits_,
+                upper_limits_,
+                self.cfg.safe_margin,
             ),
             outputs=[all_qpos_wp, all_ik_valid_wp],
-            device=standardize_device_string(self.device),
+            device=standardize_device_string(kernel_device),
         )
 
         if return_all_solutions:
             all_qpos = wp.to_torch(all_qpos_wp).reshape(n_sample, N_SOL, DOF)
             all_ik_valid = wp.to_torch(all_ik_valid_wp).reshape(n_sample, N_SOL)
             return all_ik_valid, all_qpos
-
         if qpos_seed is not None:
-            qpos_seed_wp = wp.from_torch(qpos_seed.reshape(-1))
+            if qpos_seed.shape == (
+                n_sample,
+                DOF,
+            ):
+                qpos_seed_ = qpos_seed.to(kernel_device)
+            elif qpos_seed.shape == (DOF,):
+                qpos_seed_ = (
+                    qpos_seed.unsqueeze(0).repeat(n_sample, 1).to(kernel_device)
+                )
+            else:
+                logger.log_error(
+                    f"Invalid shape for qpos_seed: {qpos_seed.shape}. Expected ({n_sample}, {DOF}) or ({DOF},)."
+                )
+            qpos_seed_wp = wp.from_torch(qpos_seed_)
         else:
-            qpos_seed_wp = wp.zeros(
-                n_sample * DOF,
-                dtype=float,
-                device=standardize_device_string(self.device),
+            qpos_seed = torch.zeros(
+                (n_sample, DOF), dtype=torch.float32, device=kernel_device
             )
+            qpos_seed_wp = wp.from_torch(qpos_seed)
+        all_qpos_wp = all_qpos_wp.reshape((n_sample, N_SOL, DOF))
+        all_ik_valid_wp = all_ik_valid_wp.reshape((n_sample, N_SOL))
         joint_weight = kwargs.get("joint_weight", torch.ones(size=(DOF,), dtype=float))
         joint_weight_wp = wp_vec6f(
             joint_weight[0],
@@ -301,13 +340,13 @@ def get_ik_warp(
             joint_weight[5],
         )
         best_ik_result_wp = wp.zeros(
-            n_sample * 6, dtype=float, device=standardize_device_string(self.device)
+            (n_sample, 6), dtype=float, device=standardize_device_string(kernel_device)
         )
         best_ik_valid_wp = wp.zeros(
-            n_sample, dtype=int, device=standardize_device_string(self.device)
+            n_sample, dtype=int, device=standardize_device_string(kernel_device)
         )
         wp.launch(
-            kernel=opw_best_ik_kernel,
+            kernel=opw_ik_select_kernel,
             dim=(n_sample),
             inputs=[
                 all_qpos_wp,
@@ -315,11 +354,17 @@ def get_ik_warp(
                 qpos_seed_wp,
                 joint_weight_wp,
             ],
-            outputs=[best_ik_result_wp, best_ik_valid_wp],
-            device=standardize_device_string(self.device),
+            outputs=[
+                best_ik_result_wp,
+                best_ik_valid_wp,
+            ],
+            device=standardize_device_string(kernel_device),
+        )
+
+        best_ik_result = (
+            wp.to_torch(best_ik_result_wp).reshape(n_sample, 1, 6).to(self.device)
         )
-        best_ik_result = wp.to_torch(best_ik_result_wp).reshape(n_sample, 1, 6)
-        best_ik_valid = wp.to_torch(best_ik_valid_wp)
+        best_ik_valid = wp.to_torch(best_ik_valid_wp).to(self.device)
         return best_ik_valid, best_ik_result
 
     def _calculate_dynamic_weights(
diff --git a/embodichain/lab/sim/solvers/pinocchio_solver.py b/embodichain/lab/sim/solvers/pinocchio_solver.py
index ec7e345a..9ddde65b 100644
--- a/embodichain/lab/sim/solvers/pinocchio_solver.py
+++ b/embodichain/lab/sim/solvers/pinocchio_solver.py
@@ -35,7 +35,6 @@
     compute_pinocchio_fk,
 )
 
-
 if TYPE_CHECKING:
     from typing import Self
 
@@ -129,9 +128,6 @@ def __init__(self, cfg: PinocchioSolverCfg, **kwargs):
             self.robot.model.njoints - 1
         )  # Degrees of freedom of reduced robot joints
 
-        self.upper_position_limits = self.robot.model.upperPositionLimit
-        self.lower_position_limits = self.robot.model.lowerPositionLimit
-
         self.ik_nearest_weight = np.ones(self.dof)
 
         # TODO: The Casadi-based solver is currently disabled due to stability issues.
@@ -325,12 +321,14 @@ def qpos_to_limits(
 
         # Generate possible values for each joint
         dof_num = len(q)
+        lower_limits = self.lower_qpos_limits.to("cpu").numpy()
+        upper_limits = self.upper_qpos_limits.to("cpu").numpy()
         for i in range(dof_num):
             current_possible_values = []
 
             # Calculate how many 2π fits into the adjustment to the limits
-            lower_adjustment = (q[i] - self.lower_position_limits[i]) // (2 * np.pi)
-            upper_adjustment = (self.upper_position_limits[i] - q[i]) // (2 * np.pi)
+            lower_adjustment = (q[i] - lower_limits[i]) // (2 * np.pi)
+            upper_adjustment = (upper_limits[i] - q[i]) // (2 * np.pi)
 
             # Consider the current value and its periodic adjustments
             for offset in range(
@@ -339,15 +337,11 @@ def qpos_to_limits(
                 adjusted_value = q[i] + offset * (2 * np.pi)
 
                 # Check if the adjusted value is within limits
-                if (
-                    self.lower_position_limits[i]
-                    <= adjusted_value
-                    <= self.upper_position_limits[i]
-                ):
+                if lower_limits[i] <= adjusted_value <= upper_limits[i]:
                     current_possible_values.append(adjusted_value)
 
             # Also check the original value
-            if self.lower_position_limits[i] <= q[i] <= self.upper_position_limits[i]:
+            if lower_limits[i] <= q[i] <= upper_limits[i]:
                 current_possible_values.append(q[i])
 
             if not current_possible_values:
diff --git a/embodichain/lab/sim/solvers/pytorch_solver.py b/embodichain/lab/sim/solvers/pytorch_solver.py
index cdcdc562..c0fcf465 100644
--- a/embodichain/lab/sim/solvers/pytorch_solver.py
+++ b/embodichain/lab/sim/solvers/pytorch_solver.py
@@ -170,13 +170,11 @@ def __init__(
             max_iterations=self._max_iterations,
             lr=self._dt,
             num_retries=1,
+            use_compile=True,
         )
 
         self.dof = self.pk_serial_chain.n_joints
 
-        self.upper_position_limits = self.pk_serial_chain.high
-        self.lower_position_limits = self.pk_serial_chain.low
-
     def get_iteration_params(self) -> dict:
         r"""Returns the current iteration parameters.
 
@@ -247,6 +245,7 @@ def set_iteration_params(
             max_iterations=self._max_iterations,
             lr=self._dt,
             num_retries=1,
+            use_compile=True,
         )
 
         return True
@@ -284,106 +283,40 @@ def _compute_inverse_kinematics(
         self.pik.initial_config = joint_seed
 
         result = self.pik.solve(tf)
+        return result.converged_any, result.solutions[:, 0, :].squeeze(0)
 
-        if result.converged_any.any().item():
-            return result.converged_any, result.solutions[:, 0, :].squeeze(0)
-
-        return False, torch.empty(0)
-
-    @staticmethod
-    def _qpos_to_limits_single(
-        q: torch.Tensor,
-        joint_seed: torch.Tensor,
-        lower_position_limits: torch.Tensor,
-        upper_position_limits: torch.Tensor,
-        ik_nearest_weight: torch.Tensor,
-        periodic_mask: torch.Tensor = None,  # Optional mask for periodic joints
-    ) -> torch.Tensor:
-        """
-        Adjusts the given joint positions (q) to fit within the specified limits while minimizing the difference to the seed position.
-
-        Args:
-            q (torch.Tensor): The initial joint positions.
-            joint_seed (torch.Tensor): The seed joint positions for comparison.
-            lower_position_limits (torch.Tensor): The lower bounds for the joint positions.
-            upper_position_limits (torch.Tensor): The upper bounds for the joint positions.
-            ik_nearest_weight (torch.Tensor): The weights for the inverse kinematics nearest calculation.
-            periodic_mask (torch.Tensor, optional): Boolean mask indicating which joints are periodic.
-
-        Returns:
-            torch.Tensor: The adjusted joint positions that fit within the limits.
-        """
-        device = q.device
-        joint_seed = joint_seed.to(device)
-        lower = lower_position_limits.to(device)
-        upper = upper_position_limits.to(device)
-        weight = ik_nearest_weight.to(device)
-
-        # If periodic_mask is not provided, assume all joints are periodic
-        if periodic_mask is None:
-            periodic_mask = torch.ones_like(q, dtype=torch.bool, device=device)
-
-        # Only enumerate [-2π, 0, 2π] for periodic joints, single value for non-periodic
-        offsets = torch.tensor([-2 * torch.pi, 0, 2 * torch.pi], device=device)
-        candidate_list = []
-        for i in range(q.size(0)):
-            if periodic_mask[i]:
-                candidate_list.append(q[i] + offsets)
-            else:
-                candidate_list.append(q[i].unsqueeze(0))
-        # Generate all possible combinations
-        mesh = torch.meshgrid(*candidate_list, indexing="ij")
-        candidates = torch.stack([m.reshape(-1) for m in mesh], dim=1)
-        # Filter candidates that are out of limits
-        mask = (candidates >= lower) & (candidates <= upper)
-        valid_mask = mask.all(dim=1)
-        valid_candidates = candidates[valid_mask]
-        if valid_candidates.shape[0] == 0:
-            return torch.tensor([]).to(device)
-        # Compute weighted distance to seed and select the closest
-        diffs = torch.abs(valid_candidates - joint_seed) * weight
-        distances = torch.sum(diffs, dim=1)
-        min_idx = torch.argmin(distances)
-        return valid_candidates[min_idx]
-
-    def _qpos_to_limits(
-        self, qpos_list_split: torch.Tensor, joint_seed: torch.Tensor
-    ) -> torch.Tensor:
-        r"""Adjusts a batch of joint positions to fit within joint limits and minimize the weighted distance to the seed position.
+    def _qpos_map_to_limits(
+        self, qpos: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        r"""Maps a batch of joint positions to fit within joint limits and computes the distance to the seed position.
 
         Args:
-            qpos_list_split (torch.Tensor): Batch of candidate joint positions, shape (N, dof).
-            joint_seed (torch.Tensor): The reference joint positions for comparison, shape (dof,).
-
+            qpos (torch.Tensor): Batch of candidate joint positions, shape (N, dof).
         Returns:
-            torch.Tensor: Batch of adjusted joint positions that fit within the limits, shape (M, dof),
-                        where M <= N (invalid candidates are filtered out).
+            tuple[torch.Tensor, torch.Tensor]: A tuple containing:
+                - torch.Tensor: whether qpos exactly within joint limit, shape (N).
+                - torch.Tensor: qpos that roughly mapped into joint limit, shape (N, dof).
         """
-
-        periodic_mask = torch.ones_like(
-            qpos_list_split[0], dtype=torch.bool, device=self.device
+        two_pi = 2.0 * torch.pi
+        k = torch.ceil((self.lower_qpos_limits - qpos) / two_pi)
+        qpos_mapped = qpos + k * two_pi
+        is_within_limits = (qpos_mapped >= self.lower_qpos_limits) & (
+            qpos_mapped <= self.upper_qpos_limits
         )
 
-        adjusted_qpos_list = [
-            self._qpos_to_limits_single(
-                q,
-                joint_seed,
-                self.lower_position_limits,
-                self.upper_position_limits,
-                self.ik_nearest_weight,
-                periodic_mask,
-            )
-            for q in qpos_list_split
+        # if qpos_mapped is valid near zero, use it
+        k_zero = torch.ceil(
+            (-torch.pi - qpos) / two_pi
+        )  # [-pi, pi] is the valid range near zero
+        qpos_mapped_near_zero = qpos + k_zero * two_pi
+        is_within_limits_near_zero = (
+            qpos_mapped_near_zero >= self.lower_qpos_limits
+        ) & (qpos_mapped_near_zero <= self.upper_qpos_limits)
+        qpos_mapped[is_within_limits_near_zero] = qpos_mapped_near_zero[
+            is_within_limits_near_zero
         ]
 
-        # Filter out empty results
-        adjusted_qpos_list = [q for q in adjusted_qpos_list if q.numel() > 0]
-
-        return (
-            torch.stack(adjusted_qpos_list).to(qpos_list_split.device)
-            if adjusted_qpos_list
-            else torch.tensor([], device=self.device)
-        )
+        return is_within_limits.all(dim=1), qpos_mapped
 
     @ensure_pose_shape
     def get_ik(
@@ -433,27 +366,28 @@ def get_ik(
             qpos_seed = torch.as_tensor(qpos_seed, device=self.device)
 
         # Check qpos_seed dimensions
-        if qpos_seed.dim() == 1:
-            qpos_seed = qpos_seed.unsqueeze(0)
-            qpos_seed_ndim = 1
-        elif qpos_seed.dim() == 2:
-            qpos_seed_ndim = 2
-            if qpos_seed.shape[0] != target_xpos.shape[0]:
-                raise ValueError(
-                    "Batch size of qpos_seed must match batch size of target_xpos when qpos_seed is a 2D tensor."
-                )
+        n_batch = target_xpos.shape[0]
+        if qpos_seed.shape == (n_batch, self.dof):
+            qpos_seed = qpos_seed
+        elif qpos_seed.shape == (self.dof,):
+            qpos_seed = qpos_seed.unsqueeze(0).repeat(n_batch, 1)
         else:
-            raise ValueError("`qpos_seed` must be a tensor of shape (n,) or (n, n).")
+            logger.log_error(
+                f"Invalid qpos_seed shape {qpos_seed.shape} for batch_size {n_batch} and dof {self.dof}",
+                ValueError,
+            )
+        # output qpos_seed shape: (batch_size, dof)
 
         # Transform target_xpos by TCP
         tcp_xpos = torch.as_tensor(
-            deepcopy(self.tcp_xpos), device=self.device, dtype=torch.float32
+            self.tcp_xpos, device=self.device, dtype=torch.float32
         )
-        target_xpos = target_xpos @ torch.inverse(tcp_xpos)
+        tcp_xpos_inv = tcp_xpos.clone()
+        tcp_xpos_inv[:3, :3] = tcp_xpos_inv[:3, :3].T
+        tcp_xpos_inv[:3, 3] = -tcp_xpos_inv[:3, :3] @ tcp_xpos_inv[:3, 3]
+        target_xpos = target_xpos @ tcp_xpos_inv
 
         # Get joint limits and ensure shape matches dof
-        upper_limits = self.upper_position_limits.float()
-        lower_limits = self.lower_position_limits.float()
 
         batch_size = target_xpos.shape[0]
 
@@ -461,79 +395,43 @@ def get_ik(
             num_samples=self._num_samples, dof=self.dof, device=self.device
         )
         random_qpos_seeds = sampler.sample(
-            qpos_seed, lower_limits, upper_limits, batch_size
+            qpos_seed,
+            self.lower_qpos_limits,
+            self.upper_qpos_limits,
+            batch_size,
         )
         target_xpos_repeated = sampler.repeat_target_xpos(
             target_xpos, self._num_samples
         )
 
         # Compute IK solutions for all samples
-        res_list, qpos_list = self._compute_inverse_kinematics(
+        is_ik_success, ik_qpos = self._compute_inverse_kinematics(
             target_xpos_repeated, random_qpos_seeds
         )
-
-        if not isinstance(res_list, torch.Tensor) or not res_list.any():
-            logger.log_warning(
-                "Pk: No valid solutions found for the given target poses and joint seeds."
-            )
-            return torch.zeros(
-                batch_size, dtype=torch.bool, device=self.device
-            ), torch.zeros((batch_size, self.dof), device=self.device)
-
-        # Split res_list and qpos_list according to self._num_samples
-        res_list_split = torch.split(res_list, self._num_samples)
-        qpos_list_split = torch.split(qpos_list, self._num_samples)
-
-        # Initialize the final results and the closest joint positions
-        final_results = []
-        final_qpos = []
-
-        # For each batch, select the closest valid solution to qpos_seed
-        for i in range(batch_size):
-            target_qpos_seed = qpos_seed[i] if qpos_seed_ndim == 2 else qpos_seed
-
-            if not res_list_split[i].any():
-                final_results.append(False)
-                final_qpos.append(torch.zeros((1, self.dof), device=self.device))
-                continue
-
-            result_qpos_limit = self._qpos_to_limits(
-                qpos_list_split[i], target_qpos_seed
-            )
-
-            if result_qpos_limit.shape[0] == 0:
-                final_results.append(False)
-                final_qpos.append(torch.zeros((self.dof), device=self.device))
-                continue
-
-            distances = torch.norm(result_qpos_limit - target_qpos_seed, dim=1)
-            sorted_indices = torch.argsort(distances)
-            # shape: (N, dof)
-            sorted_qpos_array = result_qpos_limit[sorted_indices]
-            final_qpos.append(sorted_qpos_array)
-            final_results.append(True)
-
-        # Pad all batches to the same number of solutions for stacking
-        max_solutions = max([q.shape[0] for q in final_qpos]) if final_qpos else 1
-        final_qpos_tensor = torch.zeros(
-            (batch_size, max_solutions, self.dof), device=self.device
-        )
-        for i, q in enumerate(final_qpos):
-            n = q.shape[0]
-            final_qpos_tensor[i, :n, :] = q
-
-        final_results = torch.tensor(
-            final_results, dtype=torch.bool, device=self.device
-        )
+        if is_ik_success.any().item() is False:
+            logger.log_warning("No IK solutions found for any of the target poses.")
+            failed_state = is_ik_success.reshape(batch_size, self._num_samples)[:, 0]
+            failed_qpos = ik_qpos.reshape(batch_size, self._num_samples, self.dof)[
+                :, 0, :
+            ]
+            return failed_state, failed_qpos
+        # map ik_qpos to within limits and check validity
+        is_mask_valid, ik_qpos_mapped = self._qpos_map_to_limits(ik_qpos)
+        is_success = torch.logical_and(is_ik_success, is_mask_valid)
+
+        all_is_success = is_success.reshape(batch_size, self._num_samples)
+        all_results = ik_qpos_mapped.reshape(batch_size, self._num_samples, self.dof)
 
         if return_all_solutions:
-            # Return all sorted solutions for each batch (shape: batch_size, max_solutions, dof)
-            return final_results, final_qpos_tensor
-
-        # Only return the closest solution for each batch (shape: batch_size, 1, dof)
-        # If multiple solutions, take the first (closest)
-        final_qpos_tensor = final_qpos_tensor[:, :1, :]
-        return final_results, final_qpos_tensor
+            return all_is_success.any(dim=1), all_results
+        qpos_seed_repeat = qpos_seed.unsqueeze(1).repeat(1, self._num_samples, 1)
+        weighed_diff = self.ik_nearest_weight * (all_results - qpos_seed_repeat)
+        qpos_seed_dis = torch.norm(weighed_diff, dim=2)
+        # Tricky: mask out invalid solutions by setting distance to inf, so they won't be selected as closest
+        qpos_seed_dis[~all_is_success] = float("inf")
+        closest_indices = torch.argmin(qpos_seed_dis, dim=1)
+        closest_qpos = all_results[torch.arange(batch_size), closest_indices]
+        return all_is_success.any(dim=1), closest_qpos[:, None, :]
 
     def get_all_fk(self, qpos: torch.tensor) -> torch.tensor:
         r"""Get the forward kinematics for all links from root to end link.
diff --git a/embodichain/lab/sim/solvers/qpos_seed_sampler.py b/embodichain/lab/sim/solvers/qpos_seed_sampler.py
index c6a4ef30..03674506 100644
--- a/embodichain/lab/sim/solvers/qpos_seed_sampler.py
+++ b/embodichain/lab/sim/solvers/qpos_seed_sampler.py
@@ -15,6 +15,7 @@
 # ----------------------------------------------------------------------------
 
 import torch
+from embodichain.utils import logger
 
 
 class QposSeedSampler:
@@ -52,22 +53,29 @@ def sample(
         Returns:
             torch.Tensor: (batch_size * num_samples, dof) joint seeds.
         """
-        joint_seeds_list = []
-        for i in range(batch_size):
-            current_seed = (
-                qpos_seed[i].unsqueeze(0)
-                if qpos_seed.shape[0] == batch_size
-                else qpos_seed
+        if qpos_seed.shape == (batch_size, self.dof):
+            seed_head = qpos_seed[:, None, :]
+        elif qpos_seed.shape == (self.dof,):
+            seed_head = qpos_seed.unsqueeze(0).repeat(batch_size, 1)[:, None, :]
+        else:
+            logger.log_error(
+                f"Invalid qpos_seed shape {qpos_seed.shape} for batch_size {batch_size} and dof {self.dof}",
+                ValueError,
             )
-            if self.num_samples > 1:
-                rand_part = lower_limits + (upper_limits - lower_limits) * torch.rand(
-                    (self.num_samples - 1, self.dof), device=self.device
-                )
-            else:
-                rand_part = torch.empty((0, self.dof), device=self.device)
-            seeds = torch.cat([current_seed, rand_part], dim=0)
-            joint_seeds_list.append(seeds)
-        return torch.cat(joint_seeds_list, dim=0)
+        n_random_samples = self.num_samples - 1
+
+        # seed_random = torch.rand(
+        #     size=(batch_size, n_random_samples, self.dof), device=self.device
+        # )
+
+        # save sampling time, repeat for each batch and sample in one go
+        seed_random = torch.rand(
+            size=(1, n_random_samples, self.dof), device=self.device
+        )
+        seed_random = seed_random.repeat(batch_size, 1, 1)
+        seed_random = lower_limits + (upper_limits - lower_limits) * seed_random
+        joint_seeds = torch.cat([seed_head, seed_random], dim=1)
+        return joint_seeds.reshape(-1, self.dof)
 
     def repeat_target_xpos(
         self, target_xpos: torch.Tensor, num_samples: int
@@ -81,8 +89,6 @@ def repeat_target_xpos(
         Returns:
             torch.Tensor: (batch_size * num_samples, 4, 4) or (batch_size * num_samples, 3, 3)
         """
-        repeated_list = [
-            target_xpos[i].unsqueeze(0).repeat(num_samples, 1, 1)
-            for i in range(target_xpos.shape[0])
-        ]
-        return torch.cat(repeated_list, dim=0)
+
+        target_xpos_repeated = target_xpos.unsqueeze(1).repeat(1, num_samples, 1, 1)
+        return target_xpos_repeated.reshape(-1, 4, 4)
diff --git a/embodichain/lab/sim/solvers/srs_solver.py b/embodichain/lab/sim/solvers/srs_solver.py
index 64c4f492..d68f470b 100644
--- a/embodichain/lab/sim/solvers/srs_solver.py
+++ b/embodichain/lab/sim/solvers/srs_solver.py
@@ -51,9 +51,6 @@ class SRSSolverCfg(SolverCfg):
     dh_params = []
     """Denavit-Hartenberg parameters for the robot's kinematic chain."""
 
-    qpos_limits = []
-    """Joint position limits for the robot."""
-
     T_b_ob = np.eye(4)
     """Base to observed base transform."""
 
@@ -107,9 +104,7 @@ def __init__(self, cfg: SRSSolverCfg, device: torch.device):
         self.device = device
         self.dofs = 7
         self.dh_params = cfg.dh_params
-        self.qpos_limits = cfg.qpos_limits
         self.tcp_xpos = np.eye(4)
-
         # Initialize transformation matrices
         self._parse_params()
 
@@ -122,7 +117,6 @@ def _parse_params(self):
 
         # Convert configuration parameters to numpy arrays for efficient computation.
         self.dh_params_np = np.asarray(self.cfg.dh_params)
-        self.qpos_limits_np = np.asarray(self.cfg.qpos_limits)
         self.link_lengths_np = np.asarray(self.cfg.link_lengths)
         self.rotation_directions_np = np.asarray(self.cfg.rotation_directions)
 
@@ -628,11 +622,6 @@ def _parse_params(self):
             dtype=float,
             device=standardize_device_string(self.device),
         )
-        self.qpos_limits_wp = wp.array(
-            self.qpos_limits_np,
-            dtype=wp.vec2,
-            device=standardize_device_string(self.device),
-        )
         self.link_lengths_wp = wp.array(
             self.link_lengths_np.flatten(),
             dtype=float,
@@ -1197,6 +1186,21 @@ def __init__(self, cfg: SRSSolverCfg, num_envs: int, device: str, **kwargs):
         else:
             self.impl = _CPUSRSSolverImpl(cfg, self.device)
 
+        self._update_impl_qpos_limits()
+
+    def _update_impl_qpos_limits(self):
+        qpos_limits = torch.vstack([self.lower_qpos_limits, self.upper_qpos_limits]).T
+        self.impl.qpos_limits_np = qpos_limits.cpu().numpy()
+        self.impl.qpos_limits_wp = wp.array(
+            self.impl.qpos_limits_np,
+            dtype=wp.vec2,
+            device=standardize_device_string(self.device),
+        )
+
+    def update_with_robot_limit(self, robot_qpos_limits):
+        super().update_with_robot_limit(robot_qpos_limits)
+        self._update_impl_qpos_limits()
+
     def get_ik(
         self,
         target_xpos: torch.Tensor,
diff --git a/embodichain/lab/sim/types.py b/embodichain/lab/sim/types.py
index 0a7f0c22..c727ea83 100644
--- a/embodichain/lab/sim/types.py
+++ b/embodichain/lab/sim/types.py
@@ -20,7 +20,6 @@
 from typing import Sequence, Union
 from tensordict import TensorDict
 
-
 Array = Union[torch.Tensor, np.ndarray, Sequence]
 Device = Union[str, torch.device]
 
diff --git a/embodichain/lab/sim/utility/keyboard_utils.py b/embodichain/lab/sim/utility/keyboard_utils.py
index f0646b25..d64eca18 100644
--- a/embodichain/lab/sim/utility/keyboard_utils.py
+++ b/embodichain/lab/sim/utility/keyboard_utils.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 # ----------------------------------------------------------------------------
 
+from __future__ import annotations
+
 import select
 import sys
 import tty
@@ -24,8 +26,11 @@
 import numpy as np
 
 from scipy.spatial.transform import Rotation as R
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from embodichain.lab.sim.sensors import Camera
 
-from embodichain.lab.sim.sensors import Camera
 from embodichain.utils.logger import log_info, log_error, log_warning
 
 
@@ -47,12 +52,6 @@ def run_keyboard_control_for_camera(
 
     sim = SimulationManager.get_instance()
 
-    if vis_pose and sim.is_rt_enabled:
-        log_warning(
-            "'vis_pose' is not fully supported with ray tracing enabled. Will be fixed in future updates."
-        )
-        return
-
     if isinstance(sensor, str):
         sensor = sim.get_sensor(uid=sensor)
 
@@ -269,12 +268,6 @@ def run_keyboard_control_for_light(
 
     sim = SimulationManager.get_instance()
 
-    if vis_pose and sim.is_rt_enabled:
-        log_warning(
-            "'vis_pose' is not fully supported with ray tracing enabled. Will be fixed in future updates."
-        )
-        return
-
     if isinstance(light, str):
         light: Light = sim.get_light(uid=light)
 
diff --git a/embodichain/lab/sim/utility/sim_utils.py b/embodichain/lab/sim/utility/sim_utils.py
index 088709c3..9a3f1eea 100644
--- a/embodichain/lab/sim/utility/sim_utils.py
+++ b/embodichain/lab/sim/utility/sim_utils.py
@@ -152,7 +152,11 @@ def is_rt_enabled() -> bool:
     """
     config = dexsim.get_world_config()
 
-    return config.renderer == dexsim.types.Renderer.FASTRT
+    return (
+        config.renderer == dexsim.types.Renderer.FASTRT
+        or config.renderer == dexsim.types.Renderer.HYBRID
+        or config.renderer == dexsim.types.Renderer.OFFLINERT
+    )
 
 
 def create_cube(
diff --git a/embodichain/lab/sim/utility/solver_utils.py b/embodichain/lab/sim/utility/solver_utils.py
index 9cdf1bc4..b6eac155 100644
--- a/embodichain/lab/sim/utility/solver_utils.py
+++ b/embodichain/lab/sim/utility/solver_utils.py
@@ -109,7 +109,7 @@ def create_pk_serial_chain(
     else:
         return pk.SerialChain(
             chain=chain, end_frame_name=end_link_name, root_frame_name=root_link_name
-        )
+        ).to(device=device)
 
 
 def build_reduced_pinocchio_robot(
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/caches/base_cache.py b/embodichain/lab/sim/utility/workspace_analyzer/caches/base_cache.py
index 63e40349..20eb407e 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/caches/base_cache.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/caches/base_cache.py
@@ -18,7 +18,6 @@
 from typing import List
 import numpy as np
 
-
 all = [
     "BaseCache",
 ]
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/caches/cache_manager.py b/embodichain/lab/sim/utility/workspace_analyzer/caches/cache_manager.py
index 40fb56a2..13397246 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/caches/cache_manager.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/caches/cache_manager.py
@@ -25,7 +25,6 @@
     CacheConfig,
 )
 
-
 all = [
     "CacheManager",
 ]
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/configs/__init__.py b/embodichain/lab/sim/utility/workspace_analyzer/configs/__init__.py
index f07ad587..549bc124 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/configs/__init__.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/configs/__init__.py
@@ -36,7 +36,6 @@
     DensityConfig,
 )
 
-
 __all__ = [
     "CacheConfig",
     "DimensionConstraint",
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/constraints/base_constraint.py b/embodichain/lab/sim/utility/workspace_analyzer/constraints/base_constraint.py
index a2e59704..8eb55a9d 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/constraints/base_constraint.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/constraints/base_constraint.py
@@ -21,7 +21,6 @@
 
 from embodichain.utils import logger
 
-
 __all__ = [
     "IConstraintChecker",
     "BaseConstraintChecker",
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/constraints/workspace_constraint.py b/embodichain/lab/sim/utility/workspace_analyzer/constraints/workspace_constraint.py
index aa564cfb..0e9f8d5e 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/constraints/workspace_constraint.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/constraints/workspace_constraint.py
@@ -24,7 +24,6 @@
     DimensionConstraint,
 )
 
-
 __all__ = [
     "WorkspaceConstraintChecker",
 ]
@@ -139,6 +138,19 @@ def check_collision(
 
         return valid
 
+    def check_constraints(
+        self, points: torch.Tensor | np.ndarray
+    ) -> torch.Tensor | np.ndarray:
+        """Check all constraints (bounds + collision) in a single call.
+
+        Args:
+            points: Array of shape (N, 3) containing 3D point positions.
+
+        Returns:
+            Boolean array of shape (N,) indicating which points satisfy all constraints.
+        """
+        return self.check_bounds(points) & self.check_collision(points)
+
     def filter_points(
         self, points: torch.Tensor | np.ndarray
     ) -> torch.Tensor | np.ndarray:
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/metrics/density_metric.py b/embodichain/lab/sim/utility/workspace_analyzer/metrics/density_metric.py
index 8b82d857..f91236fe 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/metrics/density_metric.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/metrics/density_metric.py
@@ -92,35 +92,42 @@ def compute(
     def _compute_local_density(self, points: np.ndarray) -> np.ndarray:
         """Compute local density for each point.
 
+        Uses scipy.spatial.cKDTree for O(N log N) performance instead of
+        the O(N^2) brute-force approach. Falls back to brute-force if
+        scipy is unavailable.
+
         Args:
             points: Point cloud, shape (N, 3).
 
         Returns:
             Local densities, shape (N,).
         """
-        n_points = len(points)
-        densities = np.zeros(n_points)
-
-        # Use radius-based density estimation for better performance
         radius = self.config.radius
-
-        for i in range(n_points):
-            # Compute distances to all other points
-            distances = np.linalg.norm(points - points[i], axis=1)
-
-            # Count neighbors within radius
-            num_neighbors = np.sum(distances <= radius) - 1  # Exclude self
-
-            # Density = neighbors / volume of sphere
-            volume = (4.0 / 3.0) * np.pi * (radius**3)
-            densities[i] = num_neighbors / volume if volume > 0 else 0.0
-
-        return densities
+        volume = (4.0 / 3.0) * np.pi * (radius**3)
+
+        try:
+            from scipy.spatial import cKDTree
+
+            tree = cKDTree(points)
+            # Count neighbors within radius for all points at once
+            counts = tree.query_ball_point(points, r=radius, return_length=True)
+            # Subtract 1 to exclude self
+            densities = (counts - 1) / volume if volume > 0 else np.zeros(len(points))
+            return densities
+        except ImportError:
+            # Fallback: brute-force O(N^2)
+            n_points = len(points)
+            densities = np.zeros(n_points)
+            for i in range(n_points):
+                distances = np.linalg.norm(points - points[i], axis=1)
+                num_neighbors = np.sum(distances <= radius) - 1
+                densities[i] = num_neighbors / volume if volume > 0 else 0.0
+            return densities
 
     def _compute_knn_density(self, points: np.ndarray) -> np.ndarray:
         """Compute k-nearest neighbors density.
 
-        Alternative method using k-nearest neighbors instead of fixed radius.
+        Uses scipy.spatial.cKDTree for O(N log N) performance.
 
         Args:
             points: Point cloud, shape (N, 3).
@@ -134,19 +141,25 @@ def _compute_knn_density(self, points: np.ndarray) -> np.ndarray:
         if k <= 0:
             return np.zeros(n_points)
 
-        densities = np.zeros(n_points)
-
-        for i in range(n_points):
-            # Compute distances to all other points
-            distances = np.linalg.norm(points - points[i], axis=1)
-
-            # Find k-nearest neighbors (excluding self)
-            distances[i] = np.inf
-            knn_distances = np.partition(distances, k)[:k]
-
-            # Density = k / volume of sphere containing k neighbors
-            max_distance = knn_distances.max()
-            volume = (4.0 / 3.0) * np.pi * (max_distance**3)
-            densities[i] = k / volume if volume > 0 else 0.0
-
-        return densities
+        try:
+            from scipy.spatial import cKDTree
+
+            tree = cKDTree(points)
+            # Query k+1 nearest (includes self)
+            distances, _ = tree.query(points, k=k + 1)
+            # Use the k-th nearest distance (index k, since 0 is self)
+            max_distances = distances[:, -1]
+            max_distances = np.maximum(max_distances, 1e-10)
+            volumes = (4.0 / 3.0) * np.pi * (max_distances**3)
+            densities = k / volumes
+            return densities
+        except ImportError:
+            densities = np.zeros(n_points)
+            for i in range(n_points):
+                distances = np.linalg.norm(points - points[i], axis=1)
+                distances[i] = np.inf
+                knn_distances = np.partition(distances, k)[:k]
+                max_distance = knn_distances.max()
+                volume = (4.0 / 3.0) * np.pi * (max_distance**3)
+                densities[i] = k / volume if volume > 0 else 0.0
+            return densities
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/metrics/manipulability_metric.py b/embodichain/lab/sim/utility/workspace_analyzer/metrics/manipulability_metric.py
index 16c71c5f..5b0e8d0b 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/metrics/manipulability_metric.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/metrics/manipulability_metric.py
@@ -95,6 +95,9 @@ def compute(
             valid_mask = manipulability_scores >= self.config.jacobian_threshold
             valid_scores = manipulability_scores[valid_mask]
 
+            if len(valid_scores) == 0:
+                valid_scores = np.array([0.0])
+
         self.results = {
             "mean_manipulability": float(valid_scores.mean()),
             "std_manipulability": float(valid_scores.std()),
@@ -112,40 +115,46 @@ def compute(
         return self.results
 
     def _compute_manipulability_index(self, jacobians: np.ndarray) -> np.ndarray:
-        """Compute Yoshikawa manipulability index.
+        """Compute Yoshikawa manipulability index with batched operations.
 
         Args:
-            jacobians: Jacobian matrices, shape (N, 6, num_joints).
+            jacobians: Jacobian matrices, shape (N, rows, cols).
 
         Returns:
             Manipulability indices, shape (N,).
         """
-        # Manipulability index: sqrt(det(J * J^T))
-        manipulability = np.zeros(len(jacobians))
+        # Batch matrix multiply: J @ J^T for all samples
+        JJT = np.matmul(jacobians, np.swapaxes(jacobians, -2, -1))
 
-        for i, J in enumerate(jacobians):
-            JJT = J @ J.T
-            det = np.linalg.det(JJT)
-            manipulability[i] = np.sqrt(max(det, 0))
+        # Batch determinant
+        dets = np.linalg.det(JJT)
 
-        return manipulability
+        # sqrt(max(0, det))
+        return np.sqrt(np.maximum(dets, 0.0))
 
     def _compute_condition_numbers(self, jacobians: np.ndarray) -> np.ndarray:
-        """Compute condition numbers of Jacobian matrices.
+        """Compute condition numbers of Jacobian matrices with batched SVD.
 
         Args:
-            jacobians: Jacobian matrices, shape (N, 6, num_joints).
+            jacobians: Jacobian matrices, shape (N, rows, cols).
 
         Returns:
             Condition numbers, shape (N,).
         """
-        condition_numbers = np.zeros(len(jacobians))
-
-        for i, J in enumerate(jacobians):
-            try:
-                condition_numbers[i] = np.linalg.cond(J)
-            except np.linalg.LinAlgError:
-                # Singular matrix, use infinity as condition number
-                condition_numbers[i] = np.inf
-
-        return condition_numbers
+        try:
+            _, singular_values, _ = np.linalg.svd(jacobians, full_matrices=False)
+            # Condition number = max singular value / min singular value
+            max_sv = singular_values[:, 0]
+            min_sv = singular_values[:, -1]
+            # Avoid division by zero
+            min_sv = np.maximum(min_sv, 1e-15)
+            return max_sv / min_sv
+        except np.linalg.LinAlgError:
+            # Fallback to per-matrix computation if batch SVD fails
+            condition_numbers = np.zeros(len(jacobians))
+            for i, J in enumerate(jacobians):
+                try:
+                    condition_numbers[i] = np.linalg.cond(J)
+                except np.linalg.LinAlgError:
+                    condition_numbers[i] = np.inf
+            return condition_numbers
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/metrics/reachability_metric.py b/embodichain/lab/sim/utility/workspace_analyzer/metrics/reachability_metric.py
index f20f0e1c..39721f7c 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/metrics/reachability_metric.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/metrics/reachability_metric.py
@@ -112,7 +112,7 @@ def compute(
     def _voxelize_points(
         self, points: np.ndarray, voxel_size: float
     ) -> Dict[tuple, int]:
-        """Convert points to voxel grid.
+        """Convert points to voxel grid using vectorized operations.
 
         Args:
             points: Point cloud, shape (N, 3).
@@ -124,14 +124,14 @@ def _voxelize_points(
         # Convert points to voxel indices
         voxel_indices = np.floor(points / voxel_size).astype(int)
 
-        # Count points in each voxel
-        voxel_grid = {}
-        for idx in voxel_indices:
-            key = tuple(idx)
-            voxel_grid[key] = voxel_grid.get(key, 0) + 1
+        # Use np.unique for vectorized counting
+        unique_indices, counts = np.unique(voxel_indices, axis=0, return_counts=True)
 
-        # Filter by minimum points threshold
+        # Filter by minimum points threshold and build dict
         min_points = self.config.min_points_per_voxel
-        voxel_grid = {k: v for k, v in voxel_grid.items() if v >= min_points}
+        voxel_grid = {}
+        for idx, count in zip(unique_indices, counts):
+            if count >= min_points:
+                voxel_grid[tuple(idx)] = int(count)
 
         return voxel_grid
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/samplers/base_sampler.py b/embodichain/lab/sim/utility/workspace_analyzer/samplers/base_sampler.py
index 2685e5ec..30a1bf97 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/samplers/base_sampler.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/samplers/base_sampler.py
@@ -21,7 +21,6 @@
 
 from embodichain.utils import logger
 
-
 __all__ = [
     "ISampler",
     "BaseSampler",
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/samplers/halton_sampler.py b/embodichain/lab/sim/utility/workspace_analyzer/samplers/halton_sampler.py
index 01b005f8..c00c991a 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/samplers/halton_sampler.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/samplers/halton_sampler.py
@@ -176,7 +176,7 @@ def __init__(
         self.bases = bases
         self.skip = skip
 
-    def sample(
+    def _sample_from_bounds(
         self, bounds: torch.Tensor | np.ndarray, num_samples: int
     ) -> torch.Tensor:
         """Generate Halton sequence samples within the given bounds.
@@ -190,13 +190,6 @@ def sample(
 
         Raises:
             ValueError: If bounds are invalid or num_samples is non-positive.
-
-        Examples:
-            >>> sampler = HaltonSampler(skip=100)
-            >>> bounds = torch.tensor([[-1.0, 1.0], [-1.0, 1.0]], dtype=torch.float32)
-            >>> samples = sampler.sample(bounds, num_samples=100)
-            >>> samples.shape
-            torch.Size([100, 2])
         """
         bounds = self._validate_bounds(bounds)
 
@@ -220,14 +213,8 @@ def sample(
                 )
             bases = self.bases[:n_dims]
 
-        # Generate Halton sequence
-        samples_unit = np.zeros((num_samples, n_dims), dtype=np.float32)
-
-        for dim in range(n_dims):
-            base = bases[dim]
-            for i in range(num_samples):
-                index = i + self.skip + 1  # Start from 1, apply skip
-                samples_unit[i, dim] = self._halton_number(index, base)
+        # Generate Halton sequence with vectorized van der Corput
+        samples_unit = self._generate_halton_vectorized(num_samples, n_dims, bases)
 
         # Convert to tensor and scale to bounds
         samples_unit_tensor = self._to_tensor(samples_unit)
@@ -238,30 +225,53 @@ def sample(
 
         return samples
 
-    @staticmethod
-    def _halton_number(index: int, base: int) -> float:
-        """Compute a single Halton number.
+    def _generate_halton_vectorized(
+        self, num_samples: int, n_dims: int, bases: list[int]
+    ) -> np.ndarray:
+        """Generate Halton sequence using vectorized van der Corput computation.
+
+        Args:
+            num_samples: Number of samples to generate.
+            n_dims: Number of dimensions.
+            bases: Prime bases for each dimension.
+
+        Returns:
+            Array of shape (num_samples, n_dims) with values in [0, 1].
+        """
+        indices = np.arange(1, num_samples + 1) + self.skip  # (num_samples,)
+        samples = np.zeros((num_samples, n_dims), dtype=np.float32)
+
+        for dim in range(n_dims):
+            samples[:, dim] = self._van_der_corput_vectorized(indices, bases[dim])
 
-        The Halton sequence is generated by reversing the base-n representation
-        of the index.
+        return samples
+
+    @staticmethod
+    def _van_der_corput_vectorized(indices: np.ndarray, base: int) -> np.ndarray:
+        """Compute van der Corput sequence for multiple indices at once.
 
         Args:
-            index: Sequence index (starting from 1).
+            indices: Array of sequence indices.
             base: Prime base for this dimension.
 
         Returns:
-            Halton number in [0, 1].
+            Array of van der Corput values in [0, 1].
         """
-        result = 0.0
-        f = 1.0 / base
-        i = index
+        # Determine maximum number of digits needed
+        max_idx = int(indices.max())
+        n_digits = int(np.ceil(np.log(max_idx + 1) / np.log(base))) + 1
+
+        result = np.zeros(len(indices), dtype=np.float64)
+        i_vals = indices.astype(np.float64).copy()
+        current_f = 1.0 / base
 
-        while i > 0:
-            result += f * (i % base)
-            i //= base
-            f /= base
+        for _ in range(n_digits):
+            remainders = i_vals % base
+            result += current_f * remainders
+            i_vals = np.floor(i_vals / base)
+            current_f /= base
 
-        return result
+        return result.astype(np.float32)
 
     def get_strategy_name(self) -> str:
         """Get the name of the sampling strategy.
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/samplers/iniform_sampler.py b/embodichain/lab/sim/utility/workspace_analyzer/samplers/iniform_sampler.py
index 8f536817..1db2ce4b 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/samplers/iniform_sampler.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/samplers/iniform_sampler.py
@@ -75,8 +75,8 @@ def _sample_from_bounds(
             bounds: Tensor/Array of shape (n_dims, 2) containing [lower, upper] bounds for each dimension.
             num_samples: Total number of samples to generate. This is used to calculate
                 samples_per_dim if not explicitly provided during initialization.
-                Note: The actual number of samples may differ slightly from this value
-                to maintain a uniform grid.
+                Note: The actual number of samples (samples_per_dim^n_dims) will not
+                exceed this value, but may be less to maintain a uniform grid.
 
         Returns:
             Tensor of shape (actual_num_samples, n_dims) containing the sampled points.
@@ -99,7 +99,8 @@ def _sample_from_bounds(
         # Calculate samples per dimension if not provided
         if self.samples_per_dim is None:
             # Compute samples_per_dim to approximate the desired num_samples
-            samples_per_dim = max(2, int(np.ceil(num_samples ** (1.0 / n_dims))))
+            # Use floor to ensure actual grid size never exceeds num_samples
+            samples_per_dim = max(2, int(num_samples ** (1.0 / n_dims)))
         else:
             samples_per_dim = self.samples_per_dim
 
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/base_visualizer.py b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/base_visualizer.py
index 42541098..4c27bc94 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/base_visualizer.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/base_visualizer.py
@@ -40,7 +40,6 @@
     VisualizationConfig,
 )
 
-
 __all__ = [
     "IVisualizer",
     "BaseVisualizer",
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/sphere_visualizer.py b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/sphere_visualizer.py
index 401cedbc..08bb3c2c 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/sphere_visualizer.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/sphere_visualizer.py
@@ -33,7 +33,6 @@
 
 from embodichain.utils import logger
 
-
 __all__ = ["SphereVisualizer"]
 
 
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/voxel_visualizer.py b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/voxel_visualizer.py
index 47b46fd4..1cfc0647 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/visualizers/voxel_visualizer.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/visualizers/voxel_visualizer.py
@@ -33,7 +33,6 @@
 
 from embodichain.utils import logger
 
-
 __all__ = ["VoxelVisualizer"]
 
 
diff --git a/embodichain/lab/sim/utility/workspace_analyzer/workspace_analyzer.py b/embodichain/lab/sim/utility/workspace_analyzer/workspace_analyzer.py
index 38937ea7..ef523c49 100644
--- a/embodichain/lab/sim/utility/workspace_analyzer/workspace_analyzer.py
+++ b/embodichain/lab/sim/utility/workspace_analyzer/workspace_analyzer.py
@@ -302,6 +302,7 @@ def _create_sampler(self) -> BaseSampler:
         return factory.create_sampler(
             strategy=self.config.sampling.strategy,
             seed=self.config.sampling.seed,
+            device=self.device,
         )
 
     # Note: Geometric constraint creation methods temporarily removed
@@ -893,6 +894,9 @@ def compute_workspace_points(
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """Compute end-effector positions for given joint configurations.
 
+        Uses batched FK computation via ``robot.compute_batch_fk`` for
+        significant speedup on large sample counts.
+
         Args:
             joint_configs: Joint configurations, shape (num_samples, num_joints).
             batch_size: Batch size for FK computation. If None, uses config value.
@@ -903,56 +907,66 @@ def compute_workspace_points(
                 - valid_configs: Valid joint configurations, shape (num_valid, num_joints)
         """
         num_samples = len(joint_configs)
+        batch_size = batch_size or self.config.sampling.batch_size
+        # Cap batch size to total samples
+        batch_size = min(batch_size, num_samples)
+
+        logger.log_info(
+            f"Computing FK for {num_samples} samples (batch_size={batch_size})..."
+        )
 
+        # Pre-allocate lists for results
         workspace_points_list = []
         valid_configs_list = []
-
-        logger.log_info(f"Computing FK for {num_samples} samples...")
-
-        # Track valid points for progress bar
         total_valid = 0
 
-        # Robot expects one configuration at a time (batch_size from robot environments, not samples)
-        # Process each configuration individually
         pbar = self._create_optimized_tqdm(
-            range(num_samples),
-            desc="Forward Kinematics",
-            unit="cfg",
+            range(0, num_samples, batch_size),
+            desc="Forward Kinematics (batched)",
+            unit="batch",
             color="cyan",
             emoji="🤖",
         )
-        for i in pbar:
-            qpos = joint_configs[i : i + 1]  # Keep batch dimension
+
+        for batch_start in pbar:
+            batch_end = min(batch_start + batch_size, num_samples)
+
+            # Reshape to (n_envs=1, batch_size, num_joints) for compute_batch_fk
+            qpos_batch = joint_configs[batch_start:batch_end].unsqueeze(0)
 
             try:
-                # Compute forward kinematics
-                pose = self.robot.compute_fk(
-                    qpos=qpos,
+                # Batched FK: (1, batch, num_joints) -> (1, batch, 4, 4)
+                poses = self.robot.compute_batch_fk(
+                    qpos=qpos_batch,
                     name=self.control_part_name,
                     to_matrix=True,
                 )
 
-                # Extract position (x, y, z)
-                position = pose[:, :3, 3]  # Shape: (1, 3)
+                # Extract positions: (1, batch, 4, 4) -> (batch, 3)
+                positions = poses[0, :, :3, 3]
 
-                # Filter by constraints (bounds + collision check)
-                valid_bounds = self.constraint_checker.check_bounds(position)
-                valid_collision = self.constraint_checker.check_collision(position)
-                valid_mask = valid_bounds & valid_collision
+                # Vectorized constraint check for entire batch
+                valid_mask = self.constraint_checker.check_constraints(positions)
 
-                # Store valid results
                 if valid_mask.any():
-                    workspace_points_list.append(position[valid_mask])
-                    valid_configs_list.append(qpos[valid_mask])
-                    total_valid += 1
+                    workspace_points_list.append(positions[valid_mask])
+                    valid_configs_list.append(
+                        joint_configs[batch_start:batch_end][valid_mask]
+                    )
+                    total_valid += valid_mask.sum().item()
 
-                # Update progress bar with intelligent statistics
                 self._update_progress_with_stats(
-                    pbar, i, total_valid, metric_name="valid", show_rate=True
+                    pbar,
+                    batch_end - 1,
+                    total_valid,
+                    metric_name="valid",
+                    show_rate=True,
                 )
 
             except Exception as e:
-                logger.log_warning(f"FK computation failed for sample {i}: {e}")
+                logger.log_warning(
+                    f"FK computation failed for batch [{batch_start}:{batch_end}]: {e}"
+                )
                 continue
 
         # Concatenate all results
@@ -963,19 +977,19 @@ def compute_workspace_points(
             workspace_points = torch.empty((0, 3), device=self.device)
             valid_configs = torch.empty((0, self.num_joints), device=self.device)
 
-        # Performance summary for FK computation
-        pbar.close()  # Ensure progress bar is closed
-        success_rate = len(workspace_points) / num_samples * 100
+        pbar.close()
+        success_rate = (
+            len(workspace_points) / num_samples * 100 if num_samples > 0 else 0
+        )
 
-        # Performance indicator based on success rate
         if success_rate >= 90:
-            perf_icon = "🏆"  # Trophy for excellent performance
+            perf_icon = "🏆"
         elif success_rate >= 75:
-            perf_icon = "✅"  # Check mark for good performance
+            perf_icon = "✅"
         elif success_rate >= 50:
-            perf_icon = "🟡"  # Yellow circle for moderate performance
+            perf_icon = "🟡"
         else:
-            perf_icon = "⚠️"  # Warning for low performance
+            perf_icon = "⚠️"
 
         logger.log_info(
             f"{perf_icon} FK Results: {len(workspace_points)}/{num_samples} valid points "
@@ -987,7 +1001,13 @@ def compute_workspace_points(
     def compute_reachability(
         self, cartesian_points: torch.Tensor, batch_size: int | None = None
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Compute reachability for Cartesian points using IK.
+        """Compute reachability for Cartesian points using batched IK.
+
+        All ``ik_samples_per_point`` random seeds for a batch of points are
+        merged into the batch dimension and resolved with a **single**
+        ``robot.compute_batch_ik`` call (shape ``(1, n_valid * K, 4, 4)``).
+        This avoids the Python loop overhead and lets the solver process all
+        seeds in one vectorised pass.
 
         Args:
             cartesian_points: Cartesian positions, shape (num_samples, 3).
@@ -1003,208 +1023,125 @@ def compute_reachability(
         """
         num_samples = len(cartesian_points)
         ik_samples_per_point = self.config.ik_samples_per_point
+        batch_size = batch_size or self.config.sampling.batch_size
+        batch_size = min(batch_size, num_samples)
 
-        # Pre-filter Cartesian points by workspace constraints
-        # This eliminates points that are outside bounds or in collision zones
-        valid_cartesian_mask = self.constraint_checker.check_bounds(
+        # Pre-filter by workspace constraints (vectorized)
+        valid_cartesian_mask = self.constraint_checker.check_constraints(
             cartesian_points
-        ) & self.constraint_checker.check_collision(cartesian_points)
+        )
 
         logger.log_info(
             f"Pre-filtered Cartesian points: {valid_cartesian_mask.sum()}/{num_samples} "
             f"points pass workspace constraints ({(valid_cartesian_mask.sum()/num_samples*100):.1f}%)"
         )
 
-        # Store results for all points (including invalid ones for consistent indexing)
+        # Get reference end-effector pose for IK target orientation
+        current_ee_pose = self._get_reference_pose()
+
+        # Initialize result arrays
         all_success_rates = torch.zeros(num_samples, device=self.device)
         reachable_points_list = []
         best_configs_list = []
+        total_reachable = 0
 
-        logger.log_info(
-            f"Computing IK for {num_samples} Cartesian samples "
-            f"({ik_samples_per_point} seeds per point)..."
-        )
-
-        # Create a random sampler for generating IK seeds (avoid UniformSampler issues)
+        # Prepare random seeds for all attempts
         from embodichain.lab.sim.utility.workspace_analyzer.samplers import (
             RandomSampler,
         )
 
-        random_sampler = RandomSampler(seed=self.config.sampling.seed)
-
-        # Get reference end-effector pose for IK target orientation
-        # Priority: use reference_pose if provided, otherwise compute from current joint configuration
-        if (
-            hasattr(self.config, "reference_pose")
-            and self.config.reference_pose is not None
-        ):
-            # Use provided reference pose (should be 4x4 transformation matrix)
-            reference_pose = self.config.reference_pose
-            if isinstance(reference_pose, np.ndarray):
-                reference_pose = torch.from_numpy(reference_pose).to(self.device)
-            if reference_pose.dim() == 2:  # Shape: (4, 4) -> (1, 4, 4)
-                reference_pose = reference_pose.unsqueeze(0)
-            current_ee_pose = reference_pose  # Shape: (1, 4, 4)
-            logger.log_info("Using provided reference pose for IK target orientation")
-        else:
-            # Fallback: compute current end-effector pose from joint configuration
-            try:
-                # Using first environment (index 0) for qpos retrieval
-                current_qpos = self.robot.get_qpos()[0][
-                    self.robot.get_joint_ids(self.control_part_name)
-                ]
-                current_ee_pose = self.robot.compute_fk(
-                    name=self.control_part_name,
-                    qpos=current_qpos.unsqueeze(0),
-                    to_matrix=True,
-                )  # Shape: (1, 4, 4)
-                logger.log_info(
-                    "Computing reference pose from current robot configuration"
-                )
-            except Exception as e:
-                logger.log_warning(f"Failed to compute current robot pose: {e}")
-                # Create identity pose as fallback
-                current_ee_pose = torch.eye(4, device=self.device).unsqueeze(0)
-                current_ee_pose[0, :3, 3] = torch.tensor(
-                    [0.5, 0.0, 1.0], device=self.device
-                )  # Default position
-                logger.log_info("Using default identity pose as fallback")
-
-            # Print current joint configuration and computed pose
-            pose_np = current_ee_pose[0].cpu().numpy()
-            position = pose_np[:3, 3]
-            rotation_matrix = pose_np[:3, :3]
-
-            # Convert rotation matrix to Euler angles
-            import scipy.spatial.transform as spt
-
-            euler_angles = spt.Rotation.from_matrix(rotation_matrix).as_euler(
-                "xyz", degrees=True
-            )
-
-        # Print detailed reference pose information
-        pose_np = current_ee_pose[0].cpu().numpy()
-        position = pose_np[:3, 3]
-        rotation_matrix = pose_np[:3, :3]
-
-        # Convert rotation matrix to Euler angles (ZYX convention)
-        import scipy.spatial.transform as spt
-
-        euler_angles = spt.Rotation.from_matrix(rotation_matrix).as_euler(
-            "xyz", degrees=True
+        random_sampler = RandomSampler(
+            seed=self.config.sampling.seed, device=self.device
         )
 
-        # Format matrix with proper indentation
-        matrix_lines = np.array2string(pose_np, precision=4, suppress_small=True).split(
-            "\n"
-        )
-        matrix_str = "\n".join(f"\t   {line}" for line in matrix_lines)
         logger.log_info(
-            f"🎯 Using provided reference pose for IK target orientation:\n"
-            f"\t Position: [{position[0]:.4f}, {position[1]:.4f}, {position[2]:.4f}] m\n"
-            f"\t Rotation (XYZ Euler): [{euler_angles[0]:.2f}°, {euler_angles[1]:.2f}°, {euler_angles[2]:.2f}°]\n"
-            f"\t Matrix:\n{matrix_str}"
+            f"Computing IK for {num_samples} Cartesian samples "
+            f"(batch_size={batch_size}, {ik_samples_per_point} seeds per point)..."
         )
 
-        # Track statistics for progress bar
-        total_reachable = 0
-
-        # Process each point individually (robot expects batch_size from environments, not samples)
         pbar = self._create_optimized_tqdm(
-            range(num_samples),
-            desc="Inverse Kinematics",
-            unit="pt",
+            range(0, num_samples, batch_size),
+            desc="Inverse Kinematics (batched)",
+            unit="batch",
             color="magenta",
             emoji="🎯",
         )
 
-        for i in pbar:
-            position = cartesian_points[i]  # Shape: (3,)
-
-            # Skip points that don't satisfy workspace constraints
-            if not valid_cartesian_mask[i]:
-                # Mark as unreachable due to constraint violation
-                all_success_rates[i] = 0.0
-                # Update progress bar
-                reachability_rate = total_reachable / (i + 1) * 100
-                if reachability_rate >= 70:
-                    reach_color = "\033[32m"  # Green for high reachability
-                elif reachability_rate >= 40:
-                    reach_color = "\033[33m"  # Yellow for medium reachability
-                else:
-                    reach_color = "\033[31m"  # Red for low reachability
-                pbar.set_postfix_str(
-                    f"🎯 Reachable: {total_reachable}/{i+1} | {reach_color}{reachability_rate:.1f}%\033[0m rate (❌ constraint)"
-                )
+        for batch_start in pbar:
+            batch_end = min(batch_start + batch_size, num_samples)
+            batch_valid_mask = valid_cartesian_mask[batch_start:batch_end]
+            n_valid = batch_valid_mask.sum().item()
+
+            if n_valid == 0:
                 continue
 
-            # Create target pose: use current orientation, replace position with sampled position
-            pose = current_ee_pose.clone()
-            pose[0, :3, 3] = position
-
-            # Try multiple random seeds for this point
-            success_count = 0
-            best_qpos = None
-
-            logger.set_log_level("ERROR")  # Suppress warnings during IK attempts
-            for seed_idx in range(ik_samples_per_point):
-                # Generate random joint seed using RandomSampler
-                random_seed = random_sampler.sample(
-                    bounds=self.qpos_limits, num_samples=1
-                )  # Shape: (1, num_joints)
-
-                try:
-                    # Compute IK
-                    ret, qpos = self.robot.compute_ik(
-                        pose=pose,
-                        joint_seed=random_seed,
-                        name=self.control_part_name,
-                    )
+            # Get valid positions (n_valid, 3)
+            valid_positions = cartesian_points[batch_start:batch_end][batch_valid_mask]
 
-                    # Count successes
-                    if ret is not None and ret[0]:
-                        success_count += 1
-                        # Store first successful configuration
-                        if best_qpos is None:
-                            best_qpos = qpos[0]  # Extract from batch dimension
+            # Build target poses for all seeds in one shot.
+            # Each position is repeated ik_samples_per_point times so that a single
+            # compute_batch_ik call covers all (n_valid * K) targets at once.
+            # Shape: (1, n_valid * K, 4, 4)
+            base_pose = current_ee_pose.unsqueeze(1).expand(1, n_valid, 4, 4).clone()
+            base_pose[0, :, :3, 3] = valid_positions
+            target_poses = base_pose.repeat_interleave(ik_samples_per_point, dim=1)
 
-                except Exception as e:
-                    logger.log_warning(
-                        f"IK computation failed for sample {i}, seed {seed_idx}: {e}"
-                    )
-                    continue
-            logger.set_log_level("INFO")  # Restore log level
-
-            # Calculate success rate for this point
-            success_rate = success_count / ik_samples_per_point
-            all_success_rates[i] = success_rate
-
-            # Filter by success threshold for reachable points
-            if success_rate and best_qpos is not None:
-                reachable_points_list.append(position.unsqueeze(0))  # Add batch dim
-                best_configs_list.append(best_qpos.unsqueeze(0))  # Add batch dim
-                total_reachable += 1
-
-            # Update progress bar with reachability statistics
-            reachability_rate = total_reachable / (i + 1) * 100
-            # Use color coding for the reachability rate
-            if reachability_rate >= 70:
-                reach_color = "\033[32m"  # Green for high reachability
-            elif reachability_rate >= 40:
-                reach_color = "\033[33m"  # Yellow for medium reachability
-            else:
-                reach_color = "\033[31m"  # Red for low reachability
+            # Generate all random seeds at once: (1, n_valid * K, num_joints)
+            all_seeds = random_sampler.sample(
+                bounds=self.qpos_limits, num_samples=n_valid * ik_samples_per_point
+            ).unsqueeze(0)
 
-            # Add success rate indicator for this specific point
-            if success_rate:
-                point_status = "✅ IK"
-            elif success_rate > 0:
-                point_status = f"🟡 IK({success_rate:.1f})"
-            else:
-                point_status = "❌ IK"
+            try:
+                logger.set_log_level("ERROR")
+                success, qpos = self.robot.compute_batch_ik(
+                    pose=target_poses,
+                    joint_seed=all_seeds,
+                    name=self.control_part_name,
+                )
+                logger.set_log_level("INFO")
+
+                # Reshape results from flat batch to (n_valid, K)
+                success_2d = success[0].reshape(n_valid, ik_samples_per_point)
+                qpos_3d = qpos[0].reshape(
+                    n_valid, ik_samples_per_point, self.num_joints
+                )
+
+                # Success rate: fraction of seeds that solved IK for each point
+                success_rates_batch = success_2d.float().mean(dim=1)  # (n_valid,)
+
+                # Pick the joint config from the first successful seed per point
+                any_success = success_2d.any(dim=1)  # (n_valid,)
+                first_success_idx = success_2d.float().argmax(dim=1)  # (n_valid,)
+                best_qpos = qpos_3d[
+                    torch.arange(n_valid, device=self.device), first_success_idx
+                ]  # (n_valid, num_joints)
+
+            except Exception as e:
+                logger.set_log_level("INFO")
+                logger.log_warning(
+                    f"IK computation failed for batch [{batch_start}:{batch_end}]: {e}"
+                )
+                success_rates_batch = torch.zeros(n_valid, device=self.device)
+                any_success = torch.zeros(n_valid, dtype=torch.bool, device=self.device)
+                best_qpos = torch.zeros(n_valid, self.num_joints, device=self.device)
+
+            # Map results back to original (pre-filter) indices
+            valid_local_indices = batch_valid_mask.nonzero(as_tuple=True)[0]
+            global_indices = batch_start + valid_local_indices
+            all_success_rates[global_indices] = success_rates_batch
+
+            # Collect reachable points
+            if any_success.any():
+                reachable_points_list.append(valid_positions[any_success])
+                best_configs_list.append(best_qpos[any_success])
+                total_reachable += any_success.sum().item()
 
-            pbar.set_postfix_str(
-                f"🎯 Reachable: {total_reachable}/{i+1} | {reach_color}{reachability_rate:.1f}%\033[0m rate | {point_status}"
+            self._update_progress_with_stats(
+                pbar,
+                batch_end - 1,
+                total_reachable,
+                metric_name="reachable",
+                show_rate=True,
             )
 
         # Concatenate reachable results
@@ -1215,24 +1152,23 @@ def compute_reachability(
             reachable_points = torch.empty((0, 3), device=self.device)
             best_configs = torch.empty((0, self.num_joints), device=self.device)
 
-        # Create reachability mask
         reachability_mask = all_success_rates > 0
 
-        # Performance summary for IK computation
-        pbar.close()  # Ensure progress bar is closed
-        reachability = len(reachable_points) / num_samples * 100
+        pbar.close()
+        reachability = (
+            len(reachable_points) / num_samples * 100 if num_samples > 0 else 0
+        )
 
-        # Reachability performance indicator
         if reachability >= 80:
-            reach_icon = "🏆"  # Trophy for high reachability
+            reach_icon = "🏆"
         elif reachability >= 60:
-            reach_icon = "🚀"  # Rocket for good reachability
+            reach_icon = "🚀"
         elif reachability >= 40:
-            reach_icon = "🟡"  # Yellow for moderate reachability
+            reach_icon = "🟡"
         elif reachability >= 20:
-            reach_icon = "🟠"  # Orange for low reachability
+            reach_icon = "🟠"
         else:
-            reach_icon = "⚠️"  # Warning for very low reachability
+            reach_icon = "⚠️"
 
         logger.log_info(
             f"{reach_icon} IK Results: {len(reachable_points)}/{num_samples} reachable points "
@@ -1247,6 +1183,42 @@ def compute_reachability(
             best_configs,
         )
 
+    def _get_reference_pose(self) -> torch.Tensor:
+        """Get reference end-effector pose for IK target orientation.
+
+        Returns:
+            Reference pose tensor of shape (1, 4, 4).
+        """
+        if (
+            hasattr(self.config, "reference_pose")
+            and self.config.reference_pose is not None
+        ):
+            reference_pose = self.config.reference_pose
+            if isinstance(reference_pose, np.ndarray):
+                reference_pose = torch.from_numpy(reference_pose).to(self.device)
+            if reference_pose.dim() == 2:
+                reference_pose = reference_pose.unsqueeze(0)
+            logger.log_info("Using provided reference pose for IK target orientation")
+            return reference_pose
+
+        try:
+            current_qpos = self.robot.get_qpos()[0][
+                self.robot.get_joint_ids(self.control_part_name)
+            ]
+            current_ee_pose = self.robot.compute_fk(
+                name=self.control_part_name,
+                qpos=current_qpos.unsqueeze(0),
+                to_matrix=True,
+            )
+            logger.log_info("Computing reference pose from current robot configuration")
+            return current_ee_pose
+        except Exception as e:
+            logger.log_warning(f"Failed to compute current robot pose: {e}")
+            default_pose = torch.eye(4, device=self.device).unsqueeze(0)
+            default_pose[0, :3, 3] = torch.tensor([0.5, 0.0, 1.0], device=self.device)
+            logger.log_info("Using default identity pose as fallback")
+            return default_pose
+
     def analyze(
         self,
         num_samples: int | None = None,
diff --git a/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py b/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py
index f6389ff8..0e61c628 100644
--- a/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py
+++ b/embodichain/toolkits/graspkit/pg_grasp/antipodal_generator.py
@@ -73,12 +73,20 @@ class GraspGeneratorCfg:
     number of sampled surface points, ray perturbation angle, and gripper jaw
     distance limits. See :class:`AntipodalSamplerCfg` for details."""
 
-    max_deviation_angle: float = np.pi / 12
+    max_deviation_angle: float = np.pi / 6
     """Maximum allowed angle (in radians) between the specified approach
     direction and the axis connecting an antipodal point pair. Pairs that
     deviate more than this threshold from perpendicular to the approach are
     discarded during grasp pose computation."""
 
+    is_partial_annotate: bool = False
+    """When ``True``, the annotator allows selecting a partial region of the 
+    mesh for grasp sampling. If ``False``, the entire mesh is used."""
+
+    is_filter_ground_collision: bool = True
+    """Whether to filter out grasp poses that would cause the gripper to 
+    collide."""
+
 
 class GraspGenerator:
     """Antipodal grasp-pose generator for parallel-jaw grippers.
@@ -236,7 +244,12 @@ def annotate(self) -> torch.Tensor:
             torch.Tensor: A tensor of shape (N, 2, 3) representing N antipodal point pairs.
                 Each pair consists of a hit point and its corresponding surface point.
         """
-
+        if self.cfg.is_partial_annotate == False:
+            hit_point_pairs = self._generate_hit_point_pairs(
+                self.vertices, self.triangles
+            )
+            self._cache_hit_point_pairs(hit_point_pairs)
+            return self._hit_point_pairs
         logger.log_info(
             f"[Viser] *****Annotate grasp region in http://localhost:{self.cfg.viser_port}"
         )
@@ -343,7 +356,7 @@ def _(event: viser.ScenePointerEvent) -> None:
                         f"[Selection] Selected {sel_vertex_indices.size} vertices and {sel_face_indices.size} faces."
                     )
 
-                    hit_point_pairs = self._antipodal_sampler.sample(
+                    hit_point_pairs = self._generate_hit_point_pairs(
                         torch.tensor(sel_vertices, device=self.device),
                         torch.tensor(sel_faces, device=self.device),
                     )
@@ -378,13 +391,24 @@ def _(_evt: viser.GuiEvent) -> None:
         while True:
             if return_flag:
                 if hit_point_pairs is not None:
-                    self._hit_point_pairs = hit_point_pairs
-                    cache_path = self._get_cache_dir(self.vertices, self.triangles)
-                    self._save_cache(cache_path, hit_point_pairs)
+                    self._cache_hit_point_pairs(hit_point_pairs)
                 break
             time.sleep(0.5)
         return self._hit_point_pairs
 
+    def _generate_hit_point_pairs(
+        self, vertices: torch.Tensor, triangles: torch.Tensor
+    ) -> torch.Tensor:
+        return self._antipodal_sampler.sample(
+            vertices=vertices,
+            faces=triangles,
+        )
+
+    def _cache_hit_point_pairs(self, hit_point_pairs: torch.Tensor):
+        self._hit_point_pairs = hit_point_pairs
+        cache_path = self._get_cache_dir(self.vertices, self.triangles)
+        self._save_cache(cache_path, hit_point_pairs)
+
     def _get_cache_dir(self, vertices: torch.Tensor, triangles: torch.Tensor):
         vert_bytes = vertices.to("cpu").numpy().tobytes()
         face_bytes = triangles.to("cpu").numpy().tobytes()
@@ -583,7 +607,7 @@ def get_grasp_poses(
         approach_direction: torch.Tensor,
         visualize_collision: bool = False,
         visualize_pose: bool = False,
-    ) -> tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[bool, torch.Tensor, float]:
         """Get grasp pose given approach direction.
 
         Uses the antipodal point pairs stored in ``self._hit_point_pairs``
@@ -603,19 +627,20 @@ def get_grasp_poses(
                 after computation.
 
         Returns:
-            A tuple ``(best_grasp_pose, best_open_length)`` where
-            ``best_grasp_pose`` is a ``(4, 4)`` homogeneous matrix and
-            ``best_open_length`` is a scalar.
+            is_success (bool): Whether a valid grasp pose is found.
+            best_grasp_pose (torch.Tensor): If a valid grasp pose is found, a tensor of shape (4, 4) representing the homogeneous transformation matrix of the best grasp pose in the world frame. Otherwise, an identity matrix.
+            best_open_length (float): If a valid grasp pose is found, a scalar representing the optimal gripper opening length. Otherwise, a zero tensor.
 
         Raises:
             RuntimeError: If :meth:`generate` or :meth:`annotate` has not
                 been called yet.
         """
         if self._hit_point_pairs is None:
-            raise RuntimeError(
+            logger.log_warning(
                 "No antipodal point pairs available. "
                 "Call generate() or annotate() first."
             )
+            return False, torch.eye(4, device=self.device), 0.0
         origin_points = self._hit_point_pairs[:, 0, :]
         hit_points = self._hit_point_pairs[:, 1, :]
         origin_points_ = self._apply_transform(origin_points, object_pose)
@@ -632,6 +657,10 @@ def get_grasp_poses(
         valid_mask = (
             positive_angle - torch.pi / 2
         ).abs() <= self.cfg.max_deviation_angle
+        if valid_mask.sum() == 0:
+            logger.log_warning("No valid antipodal pairs after angle filtering.")
+            return False, torch.eye(4, device=self.device), 0.0
+
         valid_grasp_x = grasp_x[valid_mask]
         valid_centers = centers[valid_mask]
 
@@ -647,9 +676,13 @@ def get_grasp_poses(
             object_pose,
             valid_grasp_poses,
             valid_open_lengths,
+            is_filter_ground_collision=self.cfg.is_filter_ground_collision,
             is_visual=visualize_collision,
             collision_threshold=0.0,
         )
+        if is_colliding.logical_not().sum() == 0:
+            logger.log_warning("No valid antipodal pairs after angle filtering.")
+            return False, torch.eye(4, device=self.device), 0.0
         # get best grasp pose
         valid_grasp_poses = valid_grasp_poses[~is_colliding]
         valid_open_lengths = valid_open_lengths[~is_colliding]
@@ -674,7 +707,7 @@ def get_grasp_poses(
                 grasp_pose=best_grasp_pose,
                 open_length=best_open_length.item(),
             )
-        return best_grasp_pose, best_open_length
+        return True, best_grasp_pose, best_open_length
 
     @staticmethod
     def _grasp_pose_from_approach_direction(
diff --git a/embodichain/toolkits/graspkit/pg_grasp/collision_checker.py b/embodichain/toolkits/graspkit/pg_grasp/collision_checker.py
index fcbfb850..f3b09014 100644
--- a/embodichain/toolkits/graspkit/pg_grasp/collision_checker.py
+++ b/embodichain/toolkits/graspkit/pg_grasp/collision_checker.py
@@ -192,7 +192,10 @@ def query_batch_points(
             collision_threshold: Collision threshold in meters. A point is considered colliding if its signed distance to the hull interior is <= this threshold. This allows for a margin of error in collision checking, where a small positive threshold can be used to consider points near the surface as colliding, and a small negative threshold can be used to allow for slight penetration without considering it a collision.
             is_visual: Whether to visualize the collision checking results for debugging purposes. If set to True, the code will generate visualizations of the query points colored by their collision status (e.g., red for colliding points and green for non-colliding points) along with the original mesh. This can help in understanding and verifying the collision checking process, especially during development and testing.
         Returns:
-            is_pose_collide: [B, ] boolean tensor indicating whether each point cloud in the
+            is_point_collide: [B, n_point] boolean tensor indicating whether a point cloud is collided.
+            point_signed_distance: [B, n_point] of float. Signed distance from the point cloud to the object surface.
+                Negative means the point cloud is penetrating into the object,
+                positive means the point cloud is outside the object.
         """
         n_batch = batch_points.shape[0]
         point_signed_distance, is_point_collide = (
@@ -204,31 +207,7 @@ def query_batch_points(
                 collision_threshold=collision_threshold,
             )
         )
-        is_pose_collide = is_point_collide.any(dim=-1)  # [B]
-        pose_surface_distance = point_signed_distance.min(dim=-1).values  # [B]
-        if is_visual:
-            # visualize result
-            frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
-            for i in range(n_batch):
-                query_points_o3d = o3d.geometry.PointCloud()
-                query_points_np = batch_points[i].cpu().numpy()
-                query_points_o3d.points = o3d.utility.Vector3dVector(query_points_np)
-                query_points_color = np.zeros_like(query_points_np)
-                query_points_color[is_point_collide[i].cpu().numpy()] = [
-                    1.0,
-                    0,
-                    0,
-                ]  # red for colliding points
-                query_points_color[~is_point_collide[i].cpu().numpy()] = [
-                    0,
-                    1.0,
-                    0,
-                ]  # green for non-colliding points
-                query_points_o3d.colors = o3d.utility.Vector3dVector(query_points_color)
-                o3d.visualization.draw_geometries(
-                    [self.mesh, query_points_o3d, frame], mesh_show_back_face=True
-                )
-        return is_pose_collide, pose_surface_distance
+        return is_point_collide, point_signed_distance
 
     def query(
         self,
diff --git a/embodichain/toolkits/graspkit/pg_grasp/gripper_collision_checker.py b/embodichain/toolkits/graspkit/pg_grasp/gripper_collision_checker.py
index 5f02176c..b4d77c43 100644
--- a/embodichain/toolkits/graspkit/pg_grasp/gripper_collision_checker.py
+++ b/embodichain/toolkits/graspkit/pg_grasp/gripper_collision_checker.py
@@ -17,7 +17,8 @@
 from __future__ import annotations
 
 import torch
-
+import open3d as o3d
+import numpy as np
 from typing import Sequence
 
 from embodichain.utils import configclass
@@ -93,6 +94,7 @@ def __init__(
             base_mesh_faces=object_mesh_faces,
             max_decomposition_hulls=cfg.max_decomposition_hulls,
         )
+        self.obj_mesh_verts = object_mesh_verts
         self.device = object_mesh_verts.device
         self.cfg = cfg
         self._init_pc_template()
@@ -152,24 +154,89 @@ def _get_gripper_pc(
         gripper_pc = torch.cat([root_pc, left_pc, right_pc], dim=1)
         return gripper_pc
 
+    def get_ground_height(self, obj_pose: torch.Tensor) -> float:
+        obj_r = obj_pose[:3, :3]
+        obj_t = obj_pose[:3, 3]
+        # obj_verts_world = (obj_r @ self.obj_mesh_verts.T).T + obj_t
+        obj_verts_world = self.obj_mesh_verts @ obj_r.T + obj_t
+        min_z = obj_verts_world[:, 2].min().item()
+        return min_z
+
     def query(
         self,
         obj_pose: torch.Tensor,
         grasp_poses: torch.Tensor,
         open_lengths: torch.Tensor,
         collision_threshold: float = 0.0,
+        is_filter_ground_collision: bool = True,
         is_visual: bool = False,
     ) -> torch.Tensor:
+        """query the collision status of the gripper with the object.
+        The gripper is represented as a point cloud generated from the grasp poses and
+        open lengths, and the collision status is determined by checking the distance
+        between the gripper points and the object mesh.
+
+        Args:
+            obj_pose (torch.Tensor): [4, 4] of float. The homogeneous transformation matrix of the object pose in the world frame.
+            grasp_poses (torch.Tensor): [B, 4, 4] of float. The homogeneous transformation matrices of the gripper root frame for B grasp poses.
+            open_lengths (torch.Tensor): [B, ] of float. The opening lengths of the gripper fingers for B grasp poses.
+            collision_threshold (float, optional): Collision distance threshold. Defaults to 0.0.
+            is_visual (bool, optional): whether to visualize collision result. Defaults to False.
+
+        Returns:
+            torch.Tensor: [B, ] boolean tensor indicating whether a grasp pose is collided.
+        """
         inv_obj_pose = obj_pose.clone()
         inv_obj_pose[:3, :3] = obj_pose[:3, :3].T
         inv_obj_pose[:3, 3] = -obj_pose[:3, 3] @ obj_pose[:3, :3]
         inv_obj_poses = inv_obj_pose[None, :, :].repeat(grasp_poses.shape[0], 1, 1)
         grasp_relative_pose = torch.bmm(inv_obj_poses, grasp_poses)
-        gripper_pc = self._get_gripper_pc(grasp_relative_pose, open_lengths)
-        return self._checker.query_batch_points(
-            gripper_pc, collision_threshold=collision_threshold, is_visual=is_visual
+        gripper_pc_obj = self._get_gripper_pc(grasp_relative_pose, open_lengths)
+        is_obj_gripper_collided, obj_gripper_dis = self._checker.query_batch_points(
+            gripper_pc_obj, collision_threshold=collision_threshold, is_visual=is_visual
         )
 
+        if is_filter_ground_collision:
+            gripper_pc_world = self._get_gripper_pc(grasp_poses, open_lengths)
+            ground_height = self.get_ground_height(obj_pose)
+            gripper_ground_dis = gripper_pc_world[:, :, 2] - ground_height
+            is_gripper_ground_collided = gripper_ground_dis < collision_threshold
+
+            is_gripper_collided = torch.logical_or(
+                is_obj_gripper_collided, is_gripper_ground_collided
+            )
+            gripper_dis = torch.min(obj_gripper_dis, gripper_ground_dis)
+        else:
+            is_gripper_collided = is_obj_gripper_collided
+            gripper_dis = obj_gripper_dis
+
+        if is_visual:
+            n_batch = grasp_poses.shape[0]
+            # visualize all collision result
+            frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
+            for i in range(n_batch):
+                query_points_o3d = o3d.geometry.PointCloud()
+                query_points_np = gripper_pc_obj[i].cpu().numpy()
+                query_points_o3d.points = o3d.utility.Vector3dVector(query_points_np)
+                query_points_color = np.zeros_like(query_points_np)
+                query_points_color[is_gripper_collided[i].cpu().numpy()] = [
+                    1.0,
+                    0,
+                    0,
+                ]  # red for colliding points
+                query_points_color[~is_gripper_collided[i].cpu().numpy()] = [
+                    0,
+                    1.0,
+                    0,
+                ]  # green for non-colliding points
+                query_points_o3d.colors = o3d.utility.Vector3dVector(query_points_color)
+                o3d.visualization.draw_geometries(
+                    [self._checker.mesh, query_points_o3d, frame],
+                    mesh_show_back_face=True,
+                )
+
+        return is_obj_gripper_collided.any(dim=1), obj_gripper_dis.min(dim=1).values
+
 
 def box_surface_grid(
     size: Sequence[float] | torch.Tensor,
diff --git a/embodichain/toolkits/urdf_assembly/component.py b/embodichain/toolkits/urdf_assembly/component.py
index 211ecf18..ae027224 100644
--- a/embodichain/toolkits/urdf_assembly/component.py
+++ b/embodichain/toolkits/urdf_assembly/component.py
@@ -25,7 +25,7 @@
     URDFAssemblyLogger,
 )
 from embodichain.toolkits.urdf_assembly.mesh import URDFMeshManager
-
+from embodichain.toolkits.urdf_assembly.name_normalizer import NameNormalizer
 
 __all__ = ["ComponentRegistry", "URDFComponent", "URDFComponentManager"]
 
@@ -83,12 +83,40 @@ def __post_init__(self):
 
 
 class URDFComponentManager:
-    """Responsible for loading, renaming, and processing meshes for a single component."""
+    """Responsible for loading, renaming, and processing meshes for a single component.
+
+    This manager normalizes link and joint names according to a configurable
+    case policy so that the overall assembly naming scheme can be controlled
+    centrally (e.g. all links lowercase, all joints uppercase).
+    """
+
+    def __init__(
+        self,
+        mesh_manager: URDFMeshManager,
+        name_case: dict[str, str] | None = None,
+    ):
+        """Create a component manager.
+
+        Args:
+            mesh_manager (URDFMeshManager): Mesh manager used for copying and
+                rewriting mesh references.
+            name_case (dict[str, str] | None): Optional mapping controlling
+                how joint and link names are normalized. Supported keys are
+                ``"joint"`` and ``"link"`` with values ``"upper``,
+                ``"lower"`` or ``"none"``. When omitted, joints are
+                uppercased and links are lowercased (the previous default
+                behavior).
+        """
 
-    def __init__(self, mesh_manager: URDFMeshManager):
         self.mesh_manager = mesh_manager
         self.logger = URDFAssemblyLogger.get_logger("component_manager")
 
+        self.name_normalizer = NameNormalizer(name_case)
+
+    def _apply_case(self, kind: str, name: str | None) -> str | None:
+        """Normalize a name using the NameNormalizer."""
+        return self.name_normalizer.normalize(kind, name)
+
     def process_component(
         self,
         comp: str,
@@ -119,12 +147,12 @@ def process_component(
 
             # Safe way to get link and joint names, handling None values
             global_link_names = {
-                link.get("name").lower()
+                self._apply_case("link", link.get("name"))
                 for link in links
                 if link.get("name") is not None
             }
             global_joint_names = {
-                joint.get("name").upper()
+                self._apply_case("joint", joint.get("name"))
                 for joint in joints
                 if joint.get("name") is not None
             }
@@ -143,15 +171,19 @@ def process_component(
 
                 # Generate unique name
                 if prefix:
-                    new_name = self._generate_unique_name(
-                        orig_name, prefix, global_link_names
-                    ).lower()
+                    new_name = self._apply_case(
+                        "link",
+                        self._generate_unique_name(
+                            orig_name, prefix, global_link_names
+                        ),
+                    )
                 else:
                     # For components without prefix, ensure names are unique
-                    if orig_name.lower() in global_link_names:
-                        new_name = f"{comp}_{orig_name}".lower()
+                    normalized_orig = self._apply_case("link", orig_name)
+                    if normalized_orig in global_link_names:
+                        new_name = self._apply_case("link", f"{comp}_{orig_name}")
                     else:
-                        new_name = orig_name.lower()
+                        new_name = normalized_orig
 
                 global_link_names.add(new_name)
 
@@ -160,7 +192,7 @@ def process_component(
                     base_points[comp] = new_name
                     first_link_flag = False
 
-                # Update link name mapping and set link name to lowercase
+                # Update link name mapping and set link name according to policy
                 name_mapping[(comp, orig_name)] = new_name
                 link.set("name", new_name)
                 links.append(link)
@@ -176,9 +208,12 @@ def process_component(
                 if orig_joint_name is None:
                     continue
 
-                new_joint_name = self._generate_unique_name(
-                    orig_joint_name, prefix, global_joint_names
-                ).upper()
+                new_joint_name = self._apply_case(
+                    "joint",
+                    self._generate_unique_name(
+                        orig_joint_name, prefix, global_joint_names
+                    ),
+                )
                 global_joint_names.add(new_joint_name)
 
                 # Build the complete mapping table
@@ -192,16 +227,16 @@ def process_component(
                 # Set the new joint name
                 joint.set("name", new_joint_name)
 
-                # Update parent and child links to lowercase - with None checks
+                # Update parent and child links with case normalization - with None checks
                 parent_elem = joint.find("parent")
                 child_elem = joint.find("child")
 
                 if parent_elem is not None:
                     parent = parent_elem.get("link")
                     if parent is not None:
-                        new_parent_name = name_mapping.get(
-                            (comp, parent), parent
-                        ).lower()
+                        new_parent_name = self._apply_case(
+                            "link", name_mapping.get((comp, parent), parent)
+                        )
                         parent_elem.set("link", new_parent_name)
                     else:
                         self.logger.warning(
@@ -211,7 +246,9 @@ def process_component(
                 if child_elem is not None:
                     child = child_elem.get("link")
                     if child is not None:
-                        new_child_name = name_mapping.get((comp, child), child).lower()
+                        new_child_name = self._apply_case(
+                            "link", name_mapping.get((comp, child), child)
+                        )
                         child_elem.set("link", new_child_name)
                     else:
                         self.logger.warning(
@@ -270,10 +307,14 @@ def _generate_unique_name(
         if orig_name is None:
             orig_name = "unnamed"
 
+        # For uniqueness checks we always operate on a normalized form that is
+        # consistent with the link case policy. This keeps collisions and
+        # generated names aligned with how names are written back to the URDF.
+        base_name = orig_name
         if prefix and not orig_name.lower().startswith(prefix.lower()):
-            new_name = f"{prefix}{orig_name}".lower()
-        else:
-            new_name = orig_name.lower()
+            base_name = f"{prefix}{orig_name}"
+
+        new_name = base_name
 
         # Ensure the new name is unique
         if new_name in existing_names:
diff --git a/embodichain/toolkits/urdf_assembly/connection.py b/embodichain/toolkits/urdf_assembly/connection.py
index 4dad94a1..7309118c 100644
--- a/embodichain/toolkits/urdf_assembly/connection.py
+++ b/embodichain/toolkits/urdf_assembly/connection.py
@@ -14,30 +14,259 @@
 # limitations under the License.
 # ----------------------------------------------------------------------------
 
+from __future__ import annotations
+
 import xml.etree.ElementTree as ET
+from typing import Any
 
 from scipy.spatial.transform import Rotation as R
 
-from embodichain.toolkits.urdf_assembly.logging_utils import (
-    URDFAssemblyLogger,
-)
+from embodichain.toolkits.urdf_assembly.logging_utils import URDFAssemblyLogger
+from embodichain.toolkits.urdf_assembly.name_normalizer import NameNormalizer
 
 __all__ = ["URDFConnectionManager"]
 
 
 class URDFConnectionManager:
-    r"""
-    Responsible for managing connection rules between components and sensor attachments.
-    """
+    r"""Responsible for managing connection rules between components and sensor attachments."""
+
+    _DEFAULT_ORIGIN = {"xyz": "0 0 0", "rpy": "0 0 0"}
 
-    def __init__(self, base_link_name: str):
-        r"""Initialize the URDFConnectionManager.
+    def __init__(self, base_link_name: str, name_case: dict[str, str] | None = None):
+        """Initialize the URDFConnectionManager.
 
         Args:
-            base_link_name (str): The name of the base link to which the chassis or other components may be attached.
+            base_link_name: The name of the base link to which the chassis or other
+                components may be attached.
+            name_case: Optional mapping controlling how joint and link names are
+                normalized. Supported keys are ``"joint"`` and ``"link"`` with
+                values ``"upper"``, ``"lower"`` or ``"none"``.
+
+                When omitted, joints are uppercased and links are lowercased (the
+                previous default behavior).
         """
         self.base_link_name = base_link_name
         self.logger = URDFAssemblyLogger.get_logger("connection_manager")
+        self.name_normalizer = NameNormalizer(name_case)
+
+    def _apply_case(self, kind: str, name: str | None) -> str | None:
+        """Normalize a name using the NameNormalizer."""
+        return self.name_normalizer.normalize(kind, name)
+
+    @staticmethod
+    def _get_attr(obj: Any, key: str, default: Any = None) -> Any:
+        """Read attribute from object or key from dict."""
+        if obj is None:
+            return default
+        if isinstance(obj, dict):
+            return obj.get(key, default)
+        return getattr(obj, key, default)
+
+    @staticmethod
+    def _format_scalar(value: Any) -> str:
+        """Format scalar values for URDF attribute strings."""
+        try:
+            f = float(value)
+        except Exception:
+            return "0"
+
+        # Keep strings stable and compact (avoid long repr / numpy scalars).
+        s = f"{f:.6f}".rstrip("0").rstrip(".")
+        return s if s else "0"
+
+    def _format_vec3(self, vec3: Any) -> str:
+        """Format a 3D vector as URDF 'x y z' string."""
+        try:
+            x, y, z = vec3[0], vec3[1], vec3[2]
+        except Exception:
+            return "0 0 0"
+        return f"{self._format_scalar(x)} {self._format_scalar(y)} {self._format_scalar(z)}"
+
+    def _origin_kwargs_from_transform(self, transform: Any | None) -> dict[str, str]:
+        """Convert a 4x4 transform matrix to URDF origin attributes."""
+        if transform is None:
+            return dict(self._DEFAULT_ORIGIN)
+
+        try:
+            xyz = transform[:3, 3]
+            rotation = R.from_matrix(transform[:3, :3])
+            rpy = rotation.as_euler("xyz")
+        except Exception as exc:
+            self.logger.warning(f"Invalid transform, fallback to identity: {exc}")
+            return dict(self._DEFAULT_ORIGIN)
+
+        return {"xyz": self._format_vec3(xyz), "rpy": self._format_vec3(rpy)}
+
+    @staticmethod
+    def _make_unique(base: str, existing: set[str]) -> str:
+        """Make a unique name by appending suffixes when needed."""
+        if base not in existing:
+            return base
+        idx = 1
+        while f"{base}_{idx}" in existing:
+            idx += 1
+        return f"{base}_{idx}"
+
+    def _collect_existing_joint_names(self, joints: list) -> set[str]:
+        names: set[str] = set()
+        for joint in joints:
+            if not hasattr(joint, "get"):
+                continue
+            raw = joint.get("name")
+            if not raw:
+                continue
+            normalized = self._apply_case("joint", raw)
+            if normalized:
+                names.add(normalized)
+        return names
+
+    def _append_fixed_joint(
+        self,
+        joints: list,
+        existing_joint_names: set[str],
+        joint_name: str,
+        parent_link: str,
+        child_link: str,
+        origin_kwargs: dict[str, str] | None = None,
+    ) -> None:
+        """Append a fixed joint if it doesn't already exist."""
+        normalized_joint_name = self._apply_case("joint", joint_name)
+        if not normalized_joint_name:
+            self.logger.error(f"Empty joint name for joint_name={joint_name!r}")
+            return
+
+        if normalized_joint_name in existing_joint_names:
+            self.logger.warning(f"Duplicate joint: {normalized_joint_name}")
+            return
+
+        joint = ET.Element("joint", name=normalized_joint_name, type="fixed")
+        ET.SubElement(joint, "origin", **(origin_kwargs or dict(self._DEFAULT_ORIGIN)))
+        ET.SubElement(joint, "parent", link=parent_link)
+        ET.SubElement(joint, "child", link=child_link)
+
+        joints.append(joint)
+        existing_joint_names.add(normalized_joint_name)
+
+    def _normalize_link_or_none(self, link_name: str | None) -> str | None:
+        if not link_name:
+            return None
+        return self._apply_case("link", link_name)
+
+    def _connect_chassis_to_base(
+        self,
+        joints: list,
+        base_points: dict,
+        existing_joint_names: set[str],
+        chassis_component: str,
+    ) -> bool:
+        if chassis_component not in base_points:
+            return False
+
+        chassis_first_link = self._normalize_link_or_none(
+            base_points.get(chassis_component)
+        )
+        if not chassis_first_link:
+            self.logger.error("Invalid chassis base link (None)")
+            return True
+
+        self._append_fixed_joint(
+            joints=joints,
+            existing_joint_names=existing_joint_names,
+            joint_name=f"BASE_LINK_TO_{chassis_component}_CONNECTOR",
+            parent_link=self.base_link_name,
+            child_link=chassis_first_link,
+        )
+        self.logger.info(
+            f"[{chassis_component.capitalize()}] connected to [base_link] via ({chassis_first_link})"
+        )
+        return True
+
+    def _connect_orphan_components_to_base(
+        self,
+        joints: list,
+        base_points: dict,
+        connection_rules: list,
+        component_transforms: dict,
+        existing_joint_names: set[str],
+    ) -> None:
+        # Find components that don't have parents in connection_rules
+        components_with_parents = {child for parent, child in connection_rules}
+        orphan_components = [
+            comp for comp in base_points.keys() if comp not in components_with_parents
+        ]
+
+        for comp in orphan_components:
+            comp_first_link = self._normalize_link_or_none(base_points.get(comp))
+            if not comp_first_link:
+                self.logger.error(f"Invalid base link for component [{comp}]")
+                continue
+
+            origin_kwargs = self._origin_kwargs_from_transform(
+                component_transforms.get(comp)
+            )
+            if comp in component_transforms:
+                self.logger.info(
+                    f"Applied transform to base connection {comp}: {origin_kwargs}"
+                )
+
+            self._append_fixed_joint(
+                joints=joints,
+                existing_joint_names=existing_joint_names,
+                joint_name=f"BASE_TO_{comp}_CONNECTOR",
+                parent_link=self.base_link_name,
+                child_link=comp_first_link,
+                origin_kwargs=origin_kwargs,
+            )
+
+            self.logger.info(
+                f"[{comp.capitalize()}] connected to [base_link] via ({comp_first_link})"
+            )
+
+    def _connect_component_pair(
+        self,
+        joints: list,
+        base_points: dict,
+        parent_attach_points: dict,
+        parent: str,
+        child: str,
+        component_transforms: dict,
+        existing_joint_names: set[str],
+    ) -> None:
+        if parent not in parent_attach_points or child not in base_points:
+            self.logger.error(f"Invalid connection rule: {parent} -> {child}")
+            return
+
+        parent_connect_link = self._normalize_link_or_none(
+            parent_attach_points.get(parent)
+        )
+        child_connect_link = self._normalize_link_or_none(base_points.get(child))
+
+        if not parent_connect_link or not child_connect_link:
+            self.logger.error(
+                f"Invalid link in connection: {parent} ({parent_connect_link}) -> {child} ({child_connect_link})"
+            )
+            return
+
+        self.logger.info(
+            f"Connecting [{parent}]-({parent_connect_link}) to [{child}]-({child_connect_link})"
+        )
+
+        origin_kwargs = self._origin_kwargs_from_transform(
+            component_transforms.get(child)
+        )
+        if child in component_transforms:
+            self.logger.info(
+                f"Applied transform to connection {parent} -> {child}: {origin_kwargs}"
+            )
+
+        self._append_fixed_joint(
+            joints=joints,
+            existing_joint_names=existing_joint_names,
+            joint_name=self._apply_case("joint", f"{parent}_TO_{child}_CONNECTOR"),
+            parent_link=parent_connect_link,
+            child_link=child_connect_link,
+            origin_kwargs=origin_kwargs,
+        )
 
     def add_connections(
         self,
@@ -45,168 +274,195 @@ def add_connections(
         base_points: dict,
         parent_attach_points: dict,
         connection_rules: list,
-        component_transforms: dict = None,
-    ):
+        component_transforms: dict | None = None,
+    ) -> None:
         r"""Add connection joints between robot components according to the specified rules.
 
         Args:
-            joints (list): A list to collect joint elements.
-            base_points (dict): A mapping from component names to their child connection link names.
-            parent_attach_points (dict): A mapping from component names to their parent connection link names.
-            connection_rules (list): A list of (parent, child) tuples specifying connection relationships.
-            component_transforms (dict): Optional mapping from component names to their transform matrices.
+            joints: A list to collect joint elements.
+            base_points: Mapping from component names to their child connection link names.
+            parent_attach_points: Mapping from component names to their parent connection link names.
+            connection_rules: A list of (parent, child) tuples specifying connection relationships.
+            component_transforms: Optional mapping from component names to their 4x4 transform matrices.
         """
         chassis_component = "chassis"
         component_transforms = component_transforms or {}
 
-        existing_joint_names = {
-            joint.get("name") for joint in joints if hasattr(joint, "get")
-        }
+        existing_joint_names = self._collect_existing_joint_names(joints)
 
         # chassis is always attached to base_link (no transform applied to this connection)
-        if chassis_component in base_points:
-            chassis_first_link = base_points[chassis_component]
-            joint_name = f"BASE_LINK_TO_{chassis_component.upper()}_CONNECTOR"
-            if joint_name not in existing_joint_names:
-                joint = ET.Element("joint", name=joint_name, type="fixed")
-                ET.SubElement(joint, "origin", xyz="0 0 0", rpy="0 0 0")
-                ET.SubElement(joint, "parent", link=self.base_link_name)
-                ET.SubElement(joint, "child", link=chassis_first_link)
-                joints.append(joint)
-                existing_joint_names.add(joint_name)
-                self.logger.info(
-                    f"[{chassis_component.capitalize()}] connected to [base_link] via ({chassis_first_link})"
-                )
-        else:
+        if not self._connect_chassis_to_base(
+            joints=joints,
+            base_points=base_points,
+            existing_joint_names=existing_joint_names,
+            chassis_component=chassis_component,
+        ):
             # If no chassis, connect components directly to base_link with their transforms
             self.logger.info(
                 "No chassis found, connecting components directly to base_link"
             )
-
-            # Find components that don't have parents in connection_rules
-            components_with_parents = {child for parent, child in connection_rules}
-            orphan_components = [
-                comp
-                for comp in base_points.keys()
-                if comp not in components_with_parents
-            ]
-
-            for comp in orphan_components:
-                comp_first_link = base_points[comp]
-                joint_name = f"BASE_TO_{comp.upper()}_CONNECTOR"
-
-                if joint_name not in existing_joint_names:
-                    joint = ET.Element("joint", name=joint_name, type="fixed")
-
-                    # Apply transform to this specific connection if the component has one
-                    if comp in component_transforms:
-                        transform = component_transforms[comp]
-                        xyz = transform[:3, 3]  # Extract translation
-                        rotation = R.from_matrix(transform[:3, :3])
-                        rpy = rotation.as_euler("xyz")
-
-                        ET.SubElement(
-                            joint,
-                            "origin",
-                            xyz=f"{xyz[0]} {xyz[1]} {xyz[2]}",
-                            rpy=f"{rpy[0]} {rpy[1]} {rpy[2]}",
-                        )
-                        self.logger.info(
-                            f"Applied transform to base connection {comp}: xyz={xyz}, rpy={rpy}"
-                        )
-                    else:
-                        ET.SubElement(joint, "origin", xyz="0 0 0", rpy="0 0 0")
-
-                    ET.SubElement(joint, "parent", link=self.base_link_name)
-                    ET.SubElement(joint, "child", link=comp_first_link)
-                    joints.append(joint)
-                    existing_joint_names.add(joint_name)
-
-                    self.logger.info(
-                        f"[{comp.capitalize()}] connected to [base_link] via ({comp_first_link})"
-                    )
+            self._connect_orphan_components_to_base(
+                joints=joints,
+                base_points=base_points,
+                connection_rules=connection_rules,
+                component_transforms=component_transforms,
+                existing_joint_names=existing_joint_names,
+            )
 
         # Process other connection relationships
         for parent, child in connection_rules:
-            if parent in parent_attach_points and child in base_points:
-                parent_connect_link = parent_attach_points[parent].lower()
-                child_connect_link = base_points[child].lower()
+            self._connect_component_pair(
+                joints=joints,
+                base_points=base_points,
+                parent_attach_points=parent_attach_points,
+                parent=parent,
+                child=child,
+                component_transforms=component_transforms,
+                existing_joint_names=existing_joint_names,
+            )
 
-                self.logger.info(
-                    f"Connecting [{parent}]-({parent_connect_link}) to [{child}]-({child_connect_link})"
-                )
+    def add_sensor_attachments(
+        self, links: list, joints: list, attach_dict: dict, base_points: dict
+    ) -> None:
+        r"""Attach sensors by adding their URDF links/joints and creating a fixed connector.
 
-                # Create a unique joint name
-                base_joint_name = f"{parent.upper()}_TO_{child.upper()}_CONNECTOR"
-                if base_joint_name not in existing_joint_names:
-                    joint = ET.Element("joint", name=base_joint_name, type="fixed")
-
-                    # Apply transform to this specific connection if the child component has one
-                    if child in component_transforms:
-                        transform = component_transforms[child]
-                        xyz = transform[:3, 3]  # Extract translation
-                        rotation = R.from_matrix(transform[:3, :3])
-                        rpy = rotation.as_euler("xyz")
-
-                        ET.SubElement(
-                            joint,
-                            "origin",
-                            xyz=f"{xyz[0]} {xyz[1]} {xyz[2]}",
-                            rpy=f"{rpy[0]} {rpy[1]} {rpy[2]}",
-                        )
-                        self.logger.info(
-                            f"Applied transform to connection {parent} -> {child}: xyz={xyz}, rpy={rpy}"
-                        )
-                    else:
-                        ET.SubElement(joint, "origin", xyz="0 0 0", rpy="0 0 0")
-
-                    ET.SubElement(joint, "parent", link=parent_connect_link)
-                    ET.SubElement(joint, "child", link=child_connect_link)
-                    joints.append(joint)
-                    existing_joint_names.add(base_joint_name)
-                else:
-                    self.logger.warning(
-                        f"Duplicate connection rule: {parent} -> {child}"
-                    )
-            else:
-                self.logger.error(f"Invalid connection rule: {parent} -> {child}")
+        .. attention::
+            This is a legacy helper kept for backward compatibility. Newer code paths
+            use :class:`URDFSensorManager`.
+
+        Args:
+            links: Global list to collect sensor link elements.
+            joints: Global list to collect sensor joint elements.
+            attach_dict: Mapping from sensor names to attachment configs.
+            base_points: Mapping from component names to their base link names.
+        """
+        existing_link_names = {
+            self._apply_case("link", link.get("name"))
+            for link in links
+            if hasattr(link, "get") and link.get("name")
+        }
+        existing_link_names.discard(None)
+
+        existing_joint_names = self._collect_existing_joint_names(joints)
 
-    def add_sensor_attachments(
-        self, joints: list, attach_dict: dict, base_points: dict
-    ):
-        r"""Attach sensors to the robot by creating fixed joints."""
         for sensor_name, attach in attach_dict.items():
-            sensor_urdf = ET.parse(attach.sensor_urdf).getroot()
+            sensor_urdf_path = self._get_attr(attach, "sensor_urdf")
+            if not sensor_urdf_path:
+                self.logger.error(f"Sensor [{sensor_name}] has no sensor_urdf")
+                continue
 
-            # Add sensor links and joints to the main lists
+            try:
+                sensor_urdf = ET.parse(sensor_urdf_path).getroot()
+            except Exception as exc:
+                self.logger.error(
+                    f"Failed to parse sensor URDF for [{sensor_name}]: {exc}"
+                )
+                continue
+
+            link_name_map: dict[str, str] = {}
+            processed_link_names: list[str] = []
+
+            # Add sensor links to the links list (ensure lowercase + uniqueness)
             for link in sensor_urdf.findall("link"):
-                # Ensure sensor link names are lowercase
-                link.set("name", link.get("name").lower())
-                joints.append(link)  # This should be added to links list instead
+                raw_name = link.get("name")
+                if not raw_name:
+                    continue
+
+                normalized_raw = self._apply_case("link", raw_name)
+                if not normalized_raw:
+                    continue
+
+                base_name = normalized_raw
+                sensor_suffix = str(sensor_name).lower()
+                if sensor_suffix and sensor_suffix not in base_name:
+                    base_name = f"{base_name}_{sensor_suffix}"
+
+                unique_name = self._make_unique(base_name, existing_link_names)
+                link.set("name", unique_name)
+
+                link_name_map[normalized_raw] = unique_name
+                processed_link_names.append(unique_name)
+                existing_link_names.add(unique_name)
+                links.append(link)
 
+            # Add sensor joints to the joints list (ensure uppercase + update link references)
             for joint in sensor_urdf.findall("joint"):
-                # Ensure sensor joint names are uppercase and link references are lowercase
-                joint.set("name", joint.get("name").upper())
+                raw_joint_name = joint.get("name") or "sensor_joint"
+
+                normalized_joint_name = self._apply_case(
+                    "joint", f"{sensor_name}_{raw_joint_name}"
+                )
+                if not normalized_joint_name:
+                    continue
+
+                normalized_joint_name = self._make_unique(
+                    normalized_joint_name, existing_joint_names
+                )
+                joint.set("name", normalized_joint_name)
+
                 parent_elem = joint.find("parent")
                 child_elem = joint.find("child")
+
                 if parent_elem is not None:
-                    parent_elem.set("link", parent_elem.get("link").lower())
+                    raw_parent = parent_elem.get("link")
+                    normalized_parent = self._apply_case("link", raw_parent)
+                    if normalized_parent and normalized_parent in link_name_map:
+                        parent_elem.set("link", link_name_map[normalized_parent])
+                    elif normalized_parent:
+                        parent_elem.set("link", normalized_parent)
+
                 if child_elem is not None:
-                    child_elem.set("link", child_elem.get("link").lower())
+                    raw_child = child_elem.get("link")
+                    normalized_child = self._apply_case("link", raw_child)
+                    if normalized_child and normalized_child in link_name_map:
+                        child_elem.set("link", link_name_map[normalized_child])
+                    elif normalized_child:
+                        child_elem.set("link", normalized_child)
+
                 joints.append(joint)
+                existing_joint_names.add(normalized_joint_name)
+
+            if not processed_link_names:
+                self.logger.error(f"Sensor [{sensor_name}] has no <link> elements")
+                continue
 
-            parent_link = base_points.get(
-                attach.parent_component, attach.parent_component
-            ).lower()  # Ensure lowercase
+            # Determine parent link: prefer explicit parent_link if provided.
+            parent_component = self._get_attr(attach, "parent_component")
+            raw_parent_link = self._get_attr(attach, "parent_link")
+            if raw_parent_link:
+                parent_link = self._apply_case("link", raw_parent_link)
+            else:
+                parent_link = self._apply_case(
+                    "link",
+                    base_points.get(parent_component, parent_component),
+                )
 
-            # Create connection joint with uppercase name
-            joint_name = (
-                f"{attach.parent_component.upper()}_TO_{sensor_name.upper()}_CONNECTOR"
+            if not parent_link:
+                self.logger.error(
+                    f"Invalid parent link for sensor [{sensor_name}] on component [{parent_component}]"
+                )
+                continue
+
+            # Create connector joint (apply transform if provided by attachment).
+            origin_kwargs = self._origin_kwargs_from_transform(
+                self._get_attr(attach, "transform")
             )
-            joint = ET.Element("joint", name=joint_name, type="fixed")
-            ET.SubElement(joint, "origin", xyz="0 0 0", rpy="0 0 0")
-            ET.SubElement(joint, "parent", link=parent_link)
-            ET.SubElement(
-                joint, "child", link=sensor_urdf.find("link").get("name").lower()
+
+            connector_joint_name = self._make_unique(
+                self._apply_case(
+                    "joint", f"{parent_component}_TO_{sensor_name}_CONNECTOR"
+                )
+                or self._apply_case(
+                    "joint", f"{parent_component}_TO_{sensor_name}_CONNECTOR".upper()
+                ),
+                existing_joint_names,
+            )
+
+            self._append_fixed_joint(
+                joints=joints,
+                existing_joint_names=existing_joint_names,
+                joint_name=connector_joint_name,
+                parent_link=parent_link,
+                child_link=processed_link_names[0],
+                origin_kwargs=origin_kwargs,
             )
-            joints.append(joint)
diff --git a/embodichain/toolkits/urdf_assembly/file_writer.py b/embodichain/toolkits/urdf_assembly/file_writer.py
index 4ddcd3fe..f1898f58 100644
--- a/embodichain/toolkits/urdf_assembly/file_writer.py
+++ b/embodichain/toolkits/urdf_assembly/file_writer.py
@@ -127,7 +127,7 @@ def generate_header(
         now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 
         # Calculate proper spacing for centered content
-        header_width = 80
+        header_width = 120
         separator_line = "<!--" + "=" * (header_width - 8) + "-->"
 
         def center_comment(text: str) -> str:
diff --git a/embodichain/toolkits/urdf_assembly/name_normalizer.py b/embodichain/toolkits/urdf_assembly/name_normalizer.py
new file mode 100644
index 00000000..ffd9ee16
--- /dev/null
+++ b/embodichain/toolkits/urdf_assembly/name_normalizer.py
@@ -0,0 +1,77 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+
+class NameNormalizer:
+    """Handles name normalization for different entity types."""
+
+    VALID_KEYS = {"joint", "link"}
+    VALID_MODES = {"upper", "lower", "none"}
+
+    def __init__(self, default_case: dict[str, str] | None = None):
+        """Initialize the NameNormalizer with default cases.
+
+        Args:
+            default_case (dict[str, str] | None): Default normalization modes for "joint" and "link".
+        """
+        self._name_case = {
+            "joint": "upper",
+            "link": "lower",
+        }
+        if default_case:
+            for key, mode in default_case.items():
+                if key in self.VALID_KEYS and mode in self.VALID_MODES:
+                    self._name_case[key] = mode
+                else:
+                    raise ValueError(
+                        f"Invalid default_case entry {key}={mode}. "
+                        f"Allowed keys: {self.VALID_KEYS}, allowed modes: {self.VALID_MODES}."
+                    )
+
+    def set_case(self, key: str, mode: str):
+        """Set the normalization mode for a specific key.
+
+        Args:
+            key (str): The entity type ("joint" or "link").
+            mode (str): The normalization mode ("upper", "lower", "none").
+        """
+        if key in self.VALID_KEYS and mode in self.VALID_MODES:
+            self._name_case[key] = mode
+        else:
+            raise ValueError(
+                f"Invalid key or mode: {key}={mode}. "
+                f"Allowed keys: {self.VALID_KEYS}, allowed modes: {self.VALID_MODES}."
+            )
+
+    def normalize(self, kind: str, name: str | None) -> str | None:
+        """Normalize a name according to the configured case policy.
+
+        Args:
+            kind (str): One of "joint" or "link".
+            name (str | None): The original name.
+
+        Returns:
+            str | None: The normalized name, or the original value if kind is unknown or mode is "none".
+        """
+        if name is None:
+            return None
+
+        mode = self._name_case.get(kind, "none")
+        if mode == "lower":
+            return name.lower()
+        if mode == "upper":
+            return name.upper()
+        return name
diff --git a/embodichain/toolkits/urdf_assembly/signature.py b/embodichain/toolkits/urdf_assembly/signature.py
index 3ebbd73a..27a56521 100644
--- a/embodichain/toolkits/urdf_assembly/signature.py
+++ b/embodichain/toolkits/urdf_assembly/signature.py
@@ -62,6 +62,12 @@ def calculate_assembly_signature(self, urdf_dict: dict, output_path: str) -> str
         signature_data = {
             "output_filename": os.path.basename(output_path),
             "components": {},
+            # Optional metadata that can affect the assembly even if the
+            # component URDF files themselves do not change. For example,
+            # the processing order and name prefixes for each component,
+            # and the global casing policy for links/joints.
+            "component_order_and_prefix": [],
+            "name_case": {},
         }
 
         def to_serializable(obj):
@@ -85,8 +91,20 @@ def to_serializable(obj):
             else:
                 return obj
 
-        # Process each component
+        # Process each entry passed in from the assembly manager. Most entries
+        # are components (with URDF files), but some may be metadata such as
+        # the component_order_and_prefix or name_case used during assembly.
         for comp_type, comp_obj in urdf_dict.items():
+            # Special key reserved for component order/prefix metadata
+            if comp_type == "__component_order_and_prefix__":
+                signature_data["component_order_and_prefix"] = to_serializable(comp_obj)
+                continue
+
+            # Special key reserved for global name_case policy (link/joint casing)
+            if comp_type == "__name_case__":
+                signature_data["name_case"] = to_serializable(comp_obj)
+                continue
+
             if comp_obj is None:
                 continue
 
diff --git a/embodichain/toolkits/urdf_assembly/urdf_assembly_manager.py b/embodichain/toolkits/urdf_assembly/urdf_assembly_manager.py
index 9739faa9..4d9fb7b6 100644
--- a/embodichain/toolkits/urdf_assembly/urdf_assembly_manager.py
+++ b/embodichain/toolkits/urdf_assembly/urdf_assembly_manager.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ----------------------------------------------------------------------------
 
+import copy
 import os
 import time
 import logging
@@ -128,6 +129,15 @@ def __init__(
     ):
         self.logger = setup_urdf_logging()
 
+        # Global name normalization strategy for this assembly. By default,
+        # this preserves the legacy behavior: link names are lowercase and
+        # joint names are uppercase. The same mapping is passed down to
+        # managers that deal with naming so that the policy stays consistent.
+        self._name_case: dict[str, str] = {
+            "joint": "upper",
+            "link": "lower",
+        }
+
         # Use registries for components and sensors
         self.component_registry = component_registry or ComponentRegistry()
         self.sensor_registry = sensor_registry or SensorRegistry()
@@ -137,13 +147,13 @@ def __init__(
 
         # Initialize managers for components and sensors
         self.component_manager = component_manager or URDFComponentManager(
-            self.mesh_manager
+            self.mesh_manager, name_case=self._name_case
         )
         self.sensor_manager = sensor_manager or URDFSensorManager(self.mesh_manager)
 
         # Processing order for components with their name prefixes
         # Tuple format: (component_name, prefix)
-        self.component_order = [
+        self._component_order_and_prefix = [
             ("chassis", None),
             ("legs", None),
             ("torso", None),
@@ -205,6 +215,150 @@ def __init__(
         # Initialize signature manager instead of cache manager
         self.signature_manager = URDFAssemblySignatureManager()
 
+    @property
+    def name_case(self):
+        """Get the current name case policy for joints and links.
+
+        Returns:
+            dict[str, str]: A dictionary mapping 'joint' and 'link' to their respective case modes.
+        """
+        return self._name_case
+
+    @name_case.setter
+    def name_case(self, new_name_case: dict[str, str]):
+        """Set a new name case policy for joints and links.
+
+        This method updates the name case policy and propagates it to the component and sensor managers.
+
+        Args:
+            new_name_case (dict[str, str]): A dictionary mapping 'joint' and 'link' to their desired case modes (e.g., 'upper', 'lower', 'none').
+        """
+        if not isinstance(new_name_case, dict):
+            raise ValueError(
+                "name_case must be a dictionary mapping 'joint' and 'link' to case modes."
+            )
+        if "joint" not in new_name_case or "link" not in new_name_case:
+            raise ValueError("name_case must contain keys 'joint' and 'link'.")
+
+        self._name_case = new_name_case
+
+    def _apply_case(self, kind: str, name: str | None) -> str | None:
+        """Normalize a name according to the assembly-wide case policy.
+
+        This helper mirrors the behavior of the managers' own case helpers so
+        that any name sets computed here (e.g. for sensors) stay consistent
+        with how names are written into the URDF.
+
+        Args:
+            kind (str): One of ``"joint"`` or ``"link"``.
+            name (str | None): The original name.
+
+        Returns:
+            str | None: The normalized name, or the original value if the
+            kind is unknown or its mode is ``"none"``.
+        """
+
+        if name is None:
+            return None
+
+        mode = self._name_case.get(kind, "none")
+        if mode == "lower":
+            return name.lower()
+        if mode == "upper":
+            return name.upper()
+        return name
+
+    @property
+    def component_order_and_prefix(self):
+        """Get the internal component order with their name prefixes.
+
+        Note:
+            This exposes the internal list of ``(component_name, prefix)`` pairs
+            used when assembling URDFs. In most user code it is recommended to
+            use :attr:`component_prefix` instead, which focuses on configuring
+            prefixes rather than ordering.
+
+        Returns:
+            list[tuple[str, str | None]]: A list of tuples specifying component
+            names and their prefixes.
+        """
+        return self._component_order_and_prefix
+
+    @component_order_and_prefix.setter
+    def component_order_and_prefix(self, new_order):
+        """Set the internal component prefix configuration.
+        Args:
+            new_order: Value assigned directly to the internal
+                ``_component_order_and_prefix`` attribute, typically a list of
+                ``(component_name, prefix)`` tuples.
+        Note:
+            This setter performs no validation or patch-style merging; it
+            stores ``new_order`` as provided.
+        """
+        self._component_order_and_prefix = new_order
+
+    @property
+    def component_prefix(self):
+        """Configure name prefixes per component type.
+
+        This is a user-facing alias over :attr:`component_order_and_prefix`.
+
+        Semantics:
+            This setter is **patch-only**: it updates prefixes for components that
+            already exist in the current internal order and does **not** allow
+            introducing new component names.
+
+        Returns:
+            list[tuple[str, str | None]]: The internal list of
+            ``(component_name, prefix)`` pairs.
+        """
+
+        return self.component_order_and_prefix
+
+    @component_prefix.setter
+    def component_prefix(self, new_prefixes):
+        if not isinstance(new_prefixes, list) or not all(
+            isinstance(item, tuple) and len(item) == 2 for item in new_prefixes
+        ):
+            raise ValueError(
+                "component_prefix must be a list of (component_name, prefix) tuples."
+            )
+
+        # Treat new_prefixes as a patch on top of the existing/default order:
+        #  - For components already present in self._component_order_and_prefix, update their prefix.
+        #  - Preserve components that are not mentioned, keeping their relative order.
+        #
+        # Note: New/unknown component names are rejected to keep the assembly order
+        # controlled internally.
+
+        # Allowed components are exactly those already present in the default order.
+        existing_components = {comp for comp, _ in self._component_order_and_prefix}
+
+        # Build override map from the incoming list, but only for existing components.
+        override_map = {}
+        for comp, prefix in new_prefixes:
+            if not isinstance(comp, str):
+                raise ValueError("component name in component_prefix must be a string.")
+            if comp not in existing_components:
+                raise ValueError(
+                    f"component_prefix cannot introduce new component '{comp}'. "
+                    f"Allowed components: {sorted(existing_components)}"
+                )
+            override_map[comp] = prefix
+
+        merged_order: list[tuple[str, str | None]] = []
+
+        # First, walk the existing order and apply overrides where available.
+        # The relative order of components is kept internal and usually does
+        # not need to be changed by users.
+        for comp, prefix in self._component_order_and_prefix:
+            if comp in override_map:
+                merged_order.append((comp, override_map.pop(comp)))
+            else:
+                merged_order.append((comp, prefix))
+
+        self._component_order_and_prefix = merged_order
+
     def add_component(
         self,
         component_type: str,
@@ -536,6 +690,40 @@ def _find_end_link(
                 break  # No further links found in the chain
         return current_link
 
+    def _log_names_once(
+        self,
+        kind: str,
+        elems: list[ET.Element],
+        *,
+        max_items: int = 300,
+        max_chars: int = 8000,
+    ) -> None:
+        """Log element names in a single line (truncated)."""
+        names: list[str] = []
+        for e in elems:
+            n = e.get("name")
+            if n:
+                names.append(n)
+
+        total = len(names)
+        shown_names = names[:max_items]
+        text = ", ".join(shown_names)
+
+        truncated_items = max(0, total - len(shown_names))
+        truncated_chars = 0
+        if len(text) > max_chars:
+            text = text[:max_chars] + "..."
+            truncated_chars = 1
+
+        suffix_parts: list[str] = []
+        if truncated_items:
+            suffix_parts.append(f"truncated_items={truncated_items}")
+        if truncated_chars:
+            suffix_parts.append("truncated_chars=1")
+        suffix = f" ({', '.join(suffix_parts)})" if suffix_parts else ""
+
+        self.logger.info(f"[merge_urdfs] {kind}: count={total} names=[{text}]{suffix}")
+
     @performance_monitor
     def merge_urdfs(
         self,
@@ -563,6 +751,16 @@ def merge_urdfs(
         ]
         self.logger.info(f"🔧 Preparing to merge components: {available_components}")
 
+        order_items = " ".join(
+            f"[{comp}]({prefix})" for comp, prefix in self.component_order_and_prefix
+        )
+        self.logger.info(f"[component_order_and_prefix] {order_items}")
+
+        case_keys = [k for k in ("joint", "link") if k in self.name_case]
+        case_keys += [k for k in sorted(self.name_case) if k not in case_keys]
+        case_items = " ".join(f"[{k}]({self.name_case[k]})" for k in case_keys)
+        self.logger.info(f"[name_case] {case_items}")
+
         for comp in available_components:
             comp_obj = self.component_registry.get(comp)
             self.logger.info(f"  [{comp}]: {comp_obj.urdf_path}")
@@ -572,9 +770,21 @@ def merge_urdfs(
                 self.logger.debug(f"    Transform: applied")
 
         if use_signature_check:
-            # Calculate current assembly signature
+            # Calculate current assembly signature. In addition to the component
+            # registry contents, include the current component_order_and_prefix
+            # so that changes to name prefixes also invalidate the cache.
+            component_info = self.component_registry.all().copy()
+            component_info["__component_order_and_prefix__"] = list(
+                self.component_order_and_prefix
+            )
+            # Also include the assembly-wide name_case policy so that
+            # renaming rules (e.g. link/joint casing) participate in the
+            # signature. This ensures that changing naming strategy forces
+            # a rebuild.
+            component_info["__name_case__"] = dict(self._name_case)
+
             assembly_signature = self.signature_manager.calculate_assembly_signature(
-                self.component_registry.all(), output_path
+                component_info, output_path
             )
 
             self.logger.info(f"Current assembly signature: [{assembly_signature}]")
@@ -606,6 +816,46 @@ def merge_urdfs(
         robot_name = os.path.splitext(os.path.basename(output_path))[0]
         merged_urdf = ET.Element("robot", name=robot_name)
 
+        # Global <material> definitions live directly under <robot> and are not part
+        # of links/joints. To avoid polluting the merged URDF, we only merge global
+        # materials that are actually referenced by merged links' visuals.
+        materials: list[ET.Element] = []
+        material_names: set[str] = set()
+        material_sources: list[tuple[ET.Element, str]] = []
+
+        def _register_material_source(root: ET.Element, source: str) -> None:
+            material_sources.append((root, source))
+
+        def _merge_material_if_defined(mat_name: str) -> bool:
+            """Merge a global <material name=...> definition from known sources.
+
+            Only merges if the material is referenced and if a source URDF actually
+            defines it at the <robot> root. This prevents bringing in unused
+            materials from component URDFs.
+            """
+            if not mat_name or mat_name in material_names:
+                return False
+
+            matches: list[tuple[ET.Element, str]] = []
+            for root, source in material_sources:
+                for mat in root.findall("material"):
+                    if mat.get("name") == mat_name:
+                        matches.append((mat, source))
+
+            if not matches:
+                return False
+
+            if len(matches) > 1:
+                self.logger.debug(
+                    f"Material '{mat_name}' defined in multiple URDF sources; using the first: {matches[0][1]}"
+                )
+
+            mat, source = matches[0]
+            materials.append(copy.deepcopy(mat))
+            material_names.add(mat_name)
+            self.logger.debug(f"Merged referenced material '{mat_name}' from {source}")
+            return True
+
         # 2. Create single base link for the entire robot
         base_link = ET.Element("link", name=self.base_link_name)
         # Store links and joints separately for proper ordering
@@ -622,8 +872,12 @@ def merge_urdfs(
         ensure_directory_exists(output_dir, self.logger)
         mesh_manager = URDFMeshManager(output_dir)
         mesh_manager.ensure_dirs()
-        component_manager = URDFComponentManager(mesh_manager)
-        connection_manager = URDFConnectionManager(self.base_link_name)
+        component_manager = URDFComponentManager(
+            mesh_manager, name_case=self._name_case
+        )
+        connection_manager = URDFConnectionManager(
+            self.base_link_name, name_case=self._name_case
+        )
 
         # Initialize sensor manager with mesh_manager
         sensor_manager = URDFSensorManager(mesh_manager)
@@ -647,7 +901,7 @@ def merge_urdfs(
             if comp_obj and comp_obj.transform is not None:
                 component_transforms[comp] = comp_obj.transform
 
-        for comp, prefix in self.component_order:
+        for comp, prefix in self.component_order_and_prefix:
             comp_obj = self.component_registry.get(comp)
             if not comp_obj:
                 continue
@@ -658,6 +912,7 @@ def merge_urdfs(
 
             # Parse component URDF to analyze its structure
             urdf_root = ET.parse(comp_obj.urdf_path).getroot()
+            _register_material_source(urdf_root, str(comp_obj.urdf_path))
 
             # Determine parent component and attachment point for current component
             parent_component = None
@@ -747,16 +1002,32 @@ def merge_urdfs(
             component_transforms,
         )
 
-        # Track existing names for sensor processing
+        # Track existing names for sensor processing. Use the same case policy
+        # as the rest of the assembly so that collision checks are consistent
+        # with how names are written.
         existing_link_names = {
-            link.get("name").lower() for link in links if link.get("name")
+            self._apply_case("link", link.get("name"))
+            for link in links
+            if link.get("name")
         }
         existing_joint_names = {
-            joint.get("name").upper() for joint in joints if joint.get("name")
+            self._apply_case("joint", joint.get("name"))
+            for joint in joints
+            if joint.get("name")
         }
 
         # 5. Process sensor attachments using the new sensor manager
         for sensor_name, sensor_attach in self.sensor_registry.all().items():
+            # Register sensor URDF as a material source (do not merge materials eagerly).
+            try:
+                sensor_root = ET.parse(sensor_attach.sensor_urdf).getroot()
+            except Exception as exc:
+                self.logger.debug(
+                    f"Failed to parse sensor URDF for material sourcing ({sensor_attach.sensor_urdf}): {exc}"
+                )
+            else:
+                _register_material_source(sensor_root, str(sensor_attach.sensor_urdf))
+
             sensor_manager.attach_sensor(
                 sensor_name=sensor_name,
                 sensor_source=sensor_attach.sensor_urdf,
@@ -769,9 +1040,40 @@ def merge_urdfs(
             links, joints, base_points, existing_link_names, existing_joint_names
         )
 
-        # 6. Add all links and joints to merged URDF in proper order
+        # 6. Merge only the global materials that are actually referenced by merged links.
+        # If a link references <material name="X"/> but no source URDF defines a global
+        # <material name="X"> under <robot>, we warn but do not inject guessed fallbacks.
+        referenced_materials: set[str] = set()
+        for link in links:
+            for mat in link.findall(".//visual/material"):
+                mat_name = mat.get("name")
+                if not mat_name:
+                    continue
+                # A material with children is already defined inline.
+                if list(mat):
+                    continue
+                referenced_materials.add(mat_name)
+
+        missing_materials: list[str] = []
+        for mat_name in sorted(referenced_materials):
+            if mat_name in material_names:
+                continue
+            if not _merge_material_if_defined(mat_name):
+                missing_materials.append(mat_name)
+
+        for mat_name in missing_materials:
+            self.logger.warning(
+                f"Material '{mat_name}' referenced but not defined in any source URDF"
+            )
+
+        # Add global materials, then links/joints to merged URDF in proper order
+        for mat in materials:
+            merged_urdf.append(mat)
+
+        self._log_names_once("links", links)
         for link in links:
             merged_urdf.append(link)
+        self._log_names_once("joints", joints)
         for joint in joints:
             merged_urdf.append(joint)
 
diff --git a/embodichain/utils/__init__.py b/embodichain/utils/__init__.py
index 6285965f..b77db093 100644
--- a/embodichain/utils/__init__.py
+++ b/embodichain/utils/__init__.py
@@ -16,7 +16,6 @@
 
 from .configclass import configclass, is_configclass
 
-
 GLOBAL_SEED = 1024
 
 
diff --git a/embodichain/utils/configclass.py b/embodichain/utils/configclass.py
index c9f22ca5..7ca2671a 100644
--- a/embodichain/utils/configclass.py
+++ b/embodichain/utils/configclass.py
@@ -20,7 +20,6 @@
 from typing import Any, ClassVar
 from .string import callable_to_string, string_to_callable
 
-
 _CONFIGCLASS_METHODS = ["to_dict", "replace", "copy", "validate"]
 """List of class methods added at runtime to dataclass."""
 
diff --git a/embodichain/utils/math.py b/embodichain/utils/math.py
index caaa39d2..fbbe75f6 100644
--- a/embodichain/utils/math.py
+++ b/embodichain/utils/math.py
@@ -1219,9 +1219,9 @@ def transform_points_mat(
     Returns:
         transformed: [B, P, 3] transformed point cloud for each pose.
     """
-    R = poses[:, :3, :3]  # [B, 3, 3]
+    r = poses[:, :3, :3]  # [B, 3, 3]
     t = poses[:, :3, 3]  # [B, 3]
-    transformed = torch.einsum("bij, pj -> bpi", R, points) + t.unsqueeze(1)
+    transformed = torch.einsum("bij, pj -> bpi", r, points) + t.unsqueeze(1)
     return transformed
 
 
diff --git a/embodichain/utils/warp/kinematics/opw_solver.py b/embodichain/utils/warp/kinematics/opw_solver.py
index 1f1cf459..877324d1 100644
--- a/embodichain/utils/warp/kinematics/opw_solver.py
+++ b/embodichain/utils/warp/kinematics/opw_solver.py
@@ -18,7 +18,6 @@
 import numpy as np
 from typing import Tuple
 
-
 wp_vec48f = wp.types.vector(length=48, dtype=float)
 wp_vec6f = wp.types.vector(length=6, dtype=float)
 
@@ -30,6 +29,23 @@ def normalize_to_pi(angle: float) -> float:
     return wp.atan2(wp.sin(angle), wp.cos(angle))
 
 
+@wp.func
+def normalize_in_limit(angle: float, lower: float, upper: float) -> float:
+    two_pi = 2.0 * wp.pi
+    k = wp.ceil((lower - angle) / two_pi)
+    result = angle + k * two_pi
+    return result
+
+
+@wp.func
+def is_within_limit(
+    angle: float, lower: float, upper: float, safe_margin: float
+) -> bool:
+    if angle < lower + safe_margin or angle > upper - safe_margin:
+        return False
+    return True
+
+
 @wp.func
 def safe_acos(x: float) -> float:
     return wp.acos(wp.clamp(x, -1.0, 1.0))
@@ -219,6 +235,9 @@ def opw_ik_kernel(
     params: OPWparam,
     offsets: wp.array(dtype=float),
     sign_corrections: wp.array(dtype=float),
+    lower_limits: wp_vec6f,
+    upper_limits: wp_vec6f,
+    safe_margin: float,
     qpos: wp.array(dtype=float),
     ik_valid: wp.array(dtype=int),
 ):
@@ -433,8 +452,10 @@ def opw_ik_kernel(
 
         for k in range(DOF):
             idx = j * DOF + k
-            qpos[qpos_start + k] = normalize_to_pi(
-                (theta[idx] + offsets[k]) * sign_corrections[k]
+            qpos[qpos_start + k] = normalize_in_limit(
+                (theta[idx] + offsets[k]) * sign_corrections[k],
+                lower=lower_limits[k],
+                upper=upper_limits[k],
             )
 
         # filter invalid solutions
@@ -449,42 +470,46 @@ def opw_ik_kernel(
         )
         t_err, r_err = get_transform_err(check_ee_pose, ee_pose)
         # mark invalid solutions (cannot pass ik check)
+        ik_valid[i * N_SOL + j] = 1
+        for k in range(DOF):
+            if not is_within_limit(
+                qpos[qpos_start + k],
+                lower_limits[k],
+                upper_limits[k],
+                safe_margin=safe_margin,
+            ):
+                ik_valid[i * N_SOL + j] = 0
+                break
         if t_err > 1e-2 or r_err > 1e-1:
             ik_valid[i * N_SOL + j] = 0
-        else:
-            ik_valid[i * N_SOL + j] = 1
 
 
 @wp.kernel
-def opw_best_ik_kernel(
-    full_ik_result: wp.array(dtype=float),
-    full_ik_valid: wp.array(dtype=int),
-    qpos_seed: wp.array(dtype=float),
+def opw_ik_select_kernel(
+    full_ik_result: wp.array(dtype=float, ndim=3),  # [n_sample, N_SOL, DOF]
+    full_ik_valid: wp.array(dtype=int, ndim=2),  # [n_sample, N_SOL]
+    qpos_seed: wp.array(dtype=float, ndim=2),  # [n_sample, DOF]
     joint_weights: wp_vec6f,
-    best_ik_result: wp.array(dtype=float),
-    best_ik_valid: wp.array(dtype=int),
+    best_ik_result: wp.array(dtype=float, ndim=2),  # [n_sample, DOF]
+    best_ik_valid: wp.array(dtype=int, ndim=1),  # [n_sample, ]
 ):
-    i = wp.tid()
-    DOF = 6
-    N_SOL = 8
-
+    i = wp.tid()  # index for sample
     best_weighted_dis = float(1e10)
     best_ids = int(-1)
+    DOF = 6
+    N_SOL = 8
     for j in range(N_SOL):
-        is_full_valid = full_ik_valid[i * N_SOL + j]
+        is_full_valid = full_ik_valid[i, j]
         if is_full_valid == 0:
             # invalid ik result
             continue
         weighted_dis = 0.0
         for t in range(DOF):
             weighted_dis += (
-                (full_ik_result[i * N_SOL * DOF + j * DOF + t] - qpos_seed[i * DOF + t])
-                * joint_weights[0]
-                * (
-                    full_ik_result[i * N_SOL * DOF + j * DOF + t]
-                    - qpos_seed[i * DOF + t]
-                )
-                * joint_weights[0]
+                (full_ik_result[i, j, t] - qpos_seed[i, t])
+                * joint_weights[t]
+                * (full_ik_result[i, j, t] - qpos_seed[i, t])
+                * joint_weights[t]
             )
         if weighted_dis < best_weighted_dis:
             best_weighted_dis = weighted_dis
@@ -493,9 +518,7 @@ def opw_best_ik_kernel(
         # found best solution
         best_ik_valid[i] = 1
         for k in range(DOF):
-            best_ik_result[i * DOF + k] = full_ik_result[
-                i * N_SOL * DOF + best_ids * DOF + k
-            ]
+            best_ik_result[i, k] = full_ik_result[i, best_ids, k]
     else:
         # no valid solution
         best_ik_valid[i] = 0
diff --git a/examples/agents/datasets/online_dataset_demo.py b/examples/agents/datasets/online_dataset_demo.py
index 84429a24..3bd07d3b 100644
--- a/examples/agents/datasets/online_dataset_demo.py
+++ b/examples/agents/datasets/online_dataset_demo.py
@@ -28,7 +28,7 @@
 
 Usage::
 
-    python examples/agents/datasets/online_dataset_demo.py 
+    python examples/agents/datasets/online_dataset_demo.py
 """
 
 from __future__ import annotations
@@ -76,7 +76,7 @@ def _build_engine(args: argparse.Namespace) -> OnlineDataEngine:
     gym_config = load_json(config_path)
 
     gym_config["headless"] = True
-    gym_config["enable_rt"] = True
+    gym_config.setdefault("renderer", True)
     gym_config["gpu_id"] = 0
     gym_config["device"] = args.device
     cfg = OnlineDataEngineCfg(
diff --git a/examples/sim/demo/grasp_cup_to_caffe.py b/examples/sim/demo/grasp_cup_to_caffe.py
index c2c69ab6..c59526ed 100644
--- a/examples/sim/demo/grasp_cup_to_caffe.py
+++ b/examples/sim/demo/grasp_cup_to_caffe.py
@@ -28,6 +28,7 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot, RigidObject
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     LightCfg,
     JointDrivePropertiesCfg,
     RigidObjectCfg,
@@ -38,7 +39,7 @@
 from embodichain.lab.sim.shapes import MeshCfg
 from embodichain.data import get_data_path
 from embodichain.utils import logger
-
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.robots.dexforce_w1.cfg import DexforceW1Cfg
 
 
@@ -52,19 +53,7 @@ def parse_arguments():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=9, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
-    parser.add_argument("--headless", action="store_true", help="Enable headless mode")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
-    )
+    add_env_launcher_args_to_parser(parser)
     return parser.parse_args()
 
 
@@ -81,23 +70,13 @@ def initialize_simulation(args) -> SimulationManager:
     config = SimulationManagerCfg(
         headless=True,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         num_envs=args.num_envs,
         arena_space=2.5,
     )
     sim = SimulationManager(config)
 
-    if args.enable_rt:
-        light = sim.add_light(
-            cfg=LightCfg(
-                uid="main_light",
-                color=(0.6, 0.6, 0.6),
-                intensity=30.0,
-                init_pos=(1.0, 0, 3.0),
-            )
-        )
-
     return sim
 
 
@@ -440,6 +419,7 @@ def main():
     table = create_table(sim)
     caffe = create_caffe(sim)
     cup = create_cup(sim)
+    sim.update(step=1)
 
     # apply random perturbation
     apply_random_xy_perturbation(cup, max_perturbation=0.05)
diff --git a/examples/sim/demo/pick_up_cloth.py b/examples/sim/demo/pick_up_cloth.py
index 36d1c243..d6f8e3fa 100644
--- a/examples/sim/demo/pick_up_cloth.py
+++ b/examples/sim/demo/pick_up_cloth.py
@@ -35,6 +35,7 @@
 from embodichain.data import get_data_path
 from embodichain.utils import logger
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     JointDrivePropertiesCfg,
     RobotCfg,
     RigidObjectCfg,
@@ -47,51 +48,7 @@
 import os
 from embodichain.lab.sim.shapes import MeshCfg, CubeCfg
 import tempfile
-
-
-def parse_arguments():
-    """
-    Parse command-line arguments to configure the simulation.
-
-    Returns:
-        argparse.Namespace: Parsed arguments including number of environments, device, and rendering options.
-    """
-    parser = argparse.ArgumentParser(
-        description="Create and simulate a robot in SimulationManager"
-    )
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    return parser.parse_args()
-
-
-def initialize_simulation(args):
-    """
-    Initialize the simulation environment based on the provided arguments.
-
-    Args:
-        args (argparse.Namespace): Parsed command-line arguments.
-
-    Returns:
-        SimulationManager: Configured simulation manager instance.
-    """
-    config = SimulationManagerCfg(
-        headless=True,
-        sim_device="cuda",
-        enable_rt=args.enable_rt,
-        physics_dt=1.0 / 100.0,
-        num_envs=args.num_envs,
-    )
-    sim = SimulationManager(config)
-
-    light = sim.add_light(
-        cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0))
-    )
-
-    return sim
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
 def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]):
@@ -148,18 +105,18 @@ def create_padding_box(sim: SimulationManager):
     padding_box_cfg = RigidObjectCfg(
         uid="padding_box",
         shape=CubeCfg(
-            size=[0.01, 0.04, 0.03],
+            size=[0.02, 0.07, 0.05],
         ),
         attrs=RigidBodyAttributesCfg(
             mass=1.0,
-            static_friction=0.95,
-            dynamic_friction=0.9,
+            static_friction=0.01,
+            dynamic_friction=0.00,
             restitution=0.01,
             min_position_iters=32,
             min_velocity_iters=8,
         ),
         body_type="kinematic",
-        init_pos=[0.5, 0.0, 0.01],
+        init_pos=[0.5, 0.0, 0.026],
         init_rot=[0.0, 0.0, 0.0],
     )
     padding_box = sim.add_rigid_object(cfg=padding_box_cfg)
@@ -219,7 +176,7 @@ def create_cloth(sim: SimulationManager):
                 mass=0.01,
                 youngs=1e10,
                 poissons=0.4,
-                thickness=0.04,
+                thickness=0.06,
                 bending_stiffness=0.01,
                 bending_damping=0.1,
                 dynamic_friction=0.95,
@@ -283,8 +240,26 @@ def main():
     This function initializes the simulation, creates the robot and other objects,
     and performs the press softbody task.
     """
-    args = parse_arguments()
-    sim = initialize_simulation(args)
+    parser = argparse.ArgumentParser(
+        description="Create a simulation scene with SimulationManager"
+    )
+    add_env_launcher_args_to_parser(parser)
+    args = parser.parse_args()
+    # Configure the simulation
+    sim_cfg = SimulationManagerCfg(
+        width=1920,
+        height=1080,
+        num_envs=args.num_envs,
+        headless=True,
+        physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
+        sim_device="cuda",
+        render_cfg=RenderCfg(
+            renderer=args.renderer
+        ),  # Enable ray tracing for better visuals
+    )
+
+    # Create the simulation instance
+    sim = SimulationManager(sim_cfg)
 
     robot = create_robot(sim)
     cloth = create_cloth(sim)
@@ -312,8 +287,7 @@ def main():
     n_waypoint = grab_traj.shape[1]
     for i in range(n_waypoint):
         robot.set_qpos(grab_traj[:, i, :])
-        sim.update(step=4)
-        time.sleep(1e-2)
+        sim.update(step=3)
     input("Press Enter to exit the simulation...")
 
 
diff --git a/examples/sim/demo/press_softbody.py b/examples/sim/demo/press_softbody.py
index 25e1640d..f5fada63 100644
--- a/examples/sim/demo/press_softbody.py
+++ b/examples/sim/demo/press_softbody.py
@@ -34,6 +34,7 @@
 from embodichain.data import get_data_path
 from embodichain.utils import logger
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     LightCfg,
     SoftObjectCfg,
@@ -41,6 +42,7 @@
     SoftbodyPhysicalAttributesCfg,
     URDFCfg,
 )
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.shapes import MeshCfg
 
 
@@ -54,12 +56,7 @@ def parse_arguments():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=9, help="Number of parallel environments"
-    )
+    add_env_launcher_args_to_parser(parser)
     return parser.parse_args()
 
 
@@ -76,16 +73,12 @@ def initialize_simulation(args):
     config = SimulationManagerCfg(
         headless=True,
         sim_device="cuda",
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         num_envs=args.num_envs,
     )
     sim = SimulationManager(config)
 
-    light = sim.add_light(
-        cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0))
-    )
-
     return sim
 
 
diff --git a/examples/sim/demo/scoop_ice.py b/examples/sim/demo/scoop_ice.py
index 00e05d77..b80e8707 100644
--- a/examples/sim/demo/scoop_ice.py
+++ b/examples/sim/demo/scoop_ice.py
@@ -29,6 +29,7 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot, RigidObject, RigidObjectGroup
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     JointDrivePropertiesCfg,
     RobotCfg,
     URDFCfg,
@@ -44,9 +45,10 @@
 from embodichain.lab.sim.solvers import PytorchSolverCfg
 from embodichain.data import get_data_path
 from embodichain.utils import logger
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
-def initialize_simulation():
+def initialize_simulation(args):
     """
     Initialize the simulation environment based on the provided arguments.
 
@@ -58,14 +60,13 @@ def initialize_simulation():
     """
     config = SimulationManagerCfg(
         headless=True,
-        sim_device="cpu",
-        enable_rt=True,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
     )
     sim = SimulationManager(config)
 
     light = sim.add_light(
-        cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0))
+        cfg=LightCfg(uid="main_light", intensity=10.0, init_pos=(0, 0, 2.0))
     )
 
     return sim
@@ -308,7 +309,7 @@ def create_ice_cubes(sim: SimulationManager):
         cfg=VisualMaterialCfg(
             base_color=[1.0, 1.0, 1.0, 1.0],
             ior=1.31,
-            roughness=0.05,
+            roughness=0.2,
             material_type="BSDF",
         )
     )
@@ -529,13 +530,17 @@ def scoop_ice(sim: SimulationManager, robot: Robot, scoop: RigidObject):
 
 
 def main():
+    parser = argparse.ArgumentParser(description="Scoop ice task simulation")
+    add_env_launcher_args_to_parser(parser)
+    args = parser.parse_args()
+
     """
     Main function to demonstrate robot simulation.
 
     This function initializes the simulation, creates the robot and other objects,
     and performs the scoop ice task.
     """
-    sim = initialize_simulation()
+    sim = initialize_simulation(args)
 
     # Create simulation objects
     robot = create_robot(sim)
diff --git a/examples/sim/gizmo/gizmo_camera.py b/examples/sim/gizmo/gizmo_camera.py
index 4cb9071b..296c3be4 100644
--- a/examples/sim/gizmo/gizmo_camera.py
+++ b/examples/sim/gizmo/gizmo_camera.py
@@ -28,9 +28,10 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.sensors import Camera, CameraCfg
-from embodichain.lab.sim.cfg import RigidObjectCfg, RigidBodyAttributesCfg
+from embodichain.lab.sim.cfg import RigidObjectCfg, RigidBodyAttributesCfg, RenderCfg
 from embodichain.lab.sim.shapes import CubeCfg
 from embodichain.utils import logger
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
 def main():
@@ -40,20 +41,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create and simulate a camera with gizmo in SimulationManager"
     )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cpu", "cuda"],
-        help="Device to run simulation on",
-    )
-    parser.add_argument("--headless", action="store_true", help="Run in headless mode")
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -62,7 +50,7 @@ def main():
         height=1080,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     # Create simulation context
diff --git a/examples/sim/gizmo/gizmo_object.py b/examples/sim/gizmo/gizmo_object.py
index 06066e06..b0931f24 100644
--- a/examples/sim/gizmo/gizmo_object.py
+++ b/examples/sim/gizmo/gizmo_object.py
@@ -23,9 +23,9 @@
 import time
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidBodyAttributesCfg
+from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, RenderCfg
 from embodichain.lab.sim.shapes import CubeCfg
-
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.objects import RigidObject, RigidObjectCfg
 from embodichain.utils import logger
 
@@ -37,22 +37,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
-
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -62,7 +47,9 @@ def main():
         headless=args.headless,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device=args.device,
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer
+        ),  # Enable ray tracing for better visuals
     )
 
     # Create the simulation instance
diff --git a/examples/sim/gizmo/gizmo_robot.py b/examples/sim/gizmo/gizmo_robot.py
index c6ccf473..40f0d0c1 100644
--- a/examples/sim/gizmo/gizmo_robot.py
+++ b/examples/sim/gizmo/gizmo_robot.py
@@ -24,11 +24,12 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     URDFCfg,
     JointDrivePropertiesCfg,
 )
-
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.solvers import PinkSolverCfg
 from embodichain.data import get_data_path
 from embodichain.utils import logger
@@ -41,15 +42,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -58,7 +51,7 @@ def main():
         height=1080,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     sim = SimulationManager(sim_cfg)
diff --git a/examples/sim/gizmo/gizmo_scene.py b/examples/sim/gizmo/gizmo_scene.py
index 15144487..a37e6eb8 100644
--- a/examples/sim/gizmo/gizmo_scene.py
+++ b/examples/sim/gizmo/gizmo_scene.py
@@ -30,12 +30,14 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     URDFCfg,
     JointDrivePropertiesCfg,
     RigidObjectCfg,
     RigidBodyAttributesCfg,
 )
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.shapes import CubeCfg
 from embodichain.lab.sim.sensors import CameraCfg
 from embodichain.lab.sim.solvers import PinkSolverCfg
@@ -49,24 +51,17 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
     sim_cfg = SimulationManagerCfg(
         width=1920,
         height=1080,
+        headless=args.headless,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     sim = SimulationManager(sim_cfg)
diff --git a/examples/sim/gizmo/gizmo_w1.py b/examples/sim/gizmo/gizmo_w1.py
index 7eacab29..09779c84 100644
--- a/examples/sim/gizmo/gizmo_w1.py
+++ b/examples/sim/gizmo/gizmo_w1.py
@@ -24,11 +24,12 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     URDFCfg,
     JointDrivePropertiesCfg,
 )
-
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.solvers import PinkSolverCfg
 from embodichain.data import get_data_path
 from embodichain.utils import logger
@@ -41,24 +42,17 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
     sim_cfg = SimulationManagerCfg(
         width=1920,
         height=1080,
+        headless=args.headless,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     sim = SimulationManager(sim_cfg)
diff --git a/examples/sim/scene/scene_demo.py b/examples/sim/scene/scene_demo.py
index 711145c8..1c08af6a 100644
--- a/examples/sim/scene/scene_demo.py
+++ b/examples/sim/scene/scene_demo.py
@@ -24,11 +24,18 @@
 import math
 import embodichain.utils.logger as logger
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, LightCfg, RobotCfg, URDFCfg
+from embodichain.lab.sim.cfg import (
+    RenderCfg,
+    RigidBodyAttributesCfg,
+    LightCfg,
+    RobotCfg,
+    URDFCfg,
+)
 from embodichain.lab.sim.shapes import MeshCfg
 from embodichain.lab.sim.objects import RigidObject, RigidObjectCfg, Robot
 from embodichain.data.assets.scene_assets import SceneData
 from embodichain.data.constants import EMBODICHAIN_DEFAULT_DATA_ROOT
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
 def resolve_asset_path(scene_name: str) -> str:
@@ -91,18 +98,7 @@ def main():
         choices=["kitchen", "factory", "office", "local"],
         help="Choose which scene to load",
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--disable_rt",
-        action="store_true",
-        default=False,
-        help="Disable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     logger.log_info(f"Initializing scene '{args.scene}'")
@@ -121,7 +117,7 @@ def main():
         headless=True,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=not args.disable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         num_envs=args.num_envs,
         arena_space=10.0,
     )
@@ -130,7 +126,7 @@ def main():
     num_lights = 8
     radius = 5
     height = 8
-    intensity = 200
+    intensity = 50
     lights = []
 
     for i in range(num_lights):
diff --git a/examples/sim/sensors/batch_camera.py b/examples/sim/sensors/batch_camera.py
index 7e46b44d..b6eb4824 100644
--- a/examples/sim/sensors/batch_camera.py
+++ b/examples/sim/sensors/batch_camera.py
@@ -19,7 +19,7 @@
 import matplotlib.pyplot as plt
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidObjectCfg, LightCfg
+from embodichain.lab.sim.cfg import RenderCfg, RigidObjectCfg, LightCfg
 from embodichain.lab.sim.shapes import MeshCfg
 from embodichain.lab.sim.objects import RigidObject, Light
 from embodichain.lab.sim.sensors import (
@@ -28,6 +28,7 @@
     CameraCfg,
     StereoCameraCfg,
 )
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.data import get_data_path
 
 
@@ -37,7 +38,7 @@ def main(args):
         sim_device=args.device,
         num_envs=args.num_envs,
         arena_space=2,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
     sim = SimulationManager(config)
 
@@ -48,9 +49,6 @@ def main(args):
             init_pos=(0, 0, 0.2),
         )
     )
-    light: Light = sim.add_light(
-        cfg=LightCfg(light_type="point", init_pos=(0, 0, 2), intensity=50)
-    )
 
     if sim.is_use_gpu_physics:
         sim.init_gpu_physics()
@@ -98,7 +96,7 @@ def main(args):
     # plot rgba into a grid of images
     grid_x = np.ceil(np.sqrt(args.num_envs)).astype(int)
     grid_y = np.ceil(args.num_envs / grid_x).astype(int)
-    fig, axs = plt.subplots(grid_x, grid_y, figsize=(12, 6))
+    fig, axs = plt.subplots(grid_x, grid_y, figsize=(12, 6), squeeze=False)
     axs = axs.flatten()
     for i in range(args.num_envs):
 
@@ -120,22 +118,7 @@ def main(args):
     import argparse
 
     parser = argparse.ArgumentParser(description="Run the batch robot simulation.")
-    parser.add_argument(
-        "--num_envs", type=int, default=4, help="Number of environments to simulate."
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cpu", "cuda"],
-        help="Device to run the simulation on.",
-    )
-    parser.add_argument(
-        "--headless", action="store_true", help="Run the simulation in headless mode."
-    )
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering."
-    )
+    add_env_launcher_args_to_parser(parser)
     parser.add_argument(
         "--sensor_type",
         type=str,
diff --git a/examples/sim/sensors/create_contact_sensor.py b/examples/sim/sensors/create_contact_sensor.py
index 3a1c933a..17c26caf 100644
--- a/examples/sim/sensors/create_contact_sensor.py
+++ b/examples/sim/sensors/create_contact_sensor.py
@@ -25,6 +25,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RigidBodyAttributesCfg,
 )
 from embodichain.lab.sim.sensors import (
@@ -34,6 +35,7 @@
 from embodichain.lab.sim.shapes import CubeCfg
 from embodichain.lab.sim.objects import RigidObject, RigidObjectCfg, Robot, RobotCfg
 from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
 def create_cube(
@@ -177,24 +179,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=64, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -202,10 +187,12 @@ def main():
         width=1920,
         height=1080,
         num_envs=args.num_envs,
-        headless=args.headless,
+        headless=True,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device=args.device,
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer
+        ),  # Enable ray tracing for better visuals
     )
 
     # Create the simulation instance
diff --git a/examples/sim/utility/workspace_analyzer/analyze_cartesian_workspace.py b/examples/sim/utility/workspace_analyzer/analyze_cartesian_workspace.py
index 0871b6ad..8d2b5b9c 100644
--- a/examples/sim/utility/workspace_analyzer/analyze_cartesian_workspace.py
+++ b/examples/sim/utility/workspace_analyzer/analyze_cartesian_workspace.py
@@ -20,7 +20,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.robots import DexforceW1Cfg
-from embodichain.lab.sim.cfg import MarkerCfg
+from embodichain.lab.sim.cfg import MarkerCfg, RenderCfg
 from embodichain.lab.sim.utility.workspace_analyzer.workspace_analyzer import (
     WorkspaceAnalyzer,
     WorkspaceAnalyzerConfig,
@@ -36,10 +36,12 @@
     torch.set_printoptions(precision=5, sci_mode=False)
 
     config = SimulationManagerCfg(
-        headless=False, sim_device="cpu", width=1080, height=1080
+        headless=False,
+        sim_device="cuda",
+        width=1080,
+        height=1080,
     )
     sim = SimulationManager(config)
-    sim.set_manual_update(False)
 
     cfg = DexforceW1Cfg.from_dict(
         {"uid": "dexforce_w1", "version": "v021", "arm_kind": "industrial"}
@@ -48,7 +50,11 @@
     print("DexforceW1 robot added to the simulation.")
 
     # Set left arm joint positions (mirrored)
-    left_qpos = torch.tensor([0, -np.pi / 4, 0.0, -np.pi / 2, -np.pi / 4, 0.0, 0.0])
+    left_qpos = torch.tensor(
+        [0, -np.pi / 4, 0.0, -np.pi / 2, -np.pi / 4, 0.0, 0.0],
+        dtype=torch.float32,
+        device=robot.device,
+    )
     right_qpos = -left_qpos
     robot.set_qpos(
         qpos=left_qpos,
@@ -87,7 +93,7 @@
     wa_cartesian = WorkspaceAnalyzer(
         robot=robot, config=cartesian_config, sim_manager=sim
     )
-    results_cartesian = wa_cartesian.analyze(num_samples=1000, visualize=True)
+    results_cartesian = wa_cartesian.analyze(num_samples=50000, visualize=True)
     print(f"\nCartesian Space Results:")
     print(
         f"  Reachable points: {results_cartesian['num_reachable']} / {results_cartesian['num_samples']}"
diff --git a/examples/sim/utility/workspace_analyzer/analyze_joint_workspace.py b/examples/sim/utility/workspace_analyzer/analyze_joint_workspace.py
index 6ba8ad4c..5c658fa9 100644
--- a/examples/sim/utility/workspace_analyzer/analyze_joint_workspace.py
+++ b/examples/sim/utility/workspace_analyzer/analyze_joint_workspace.py
@@ -20,7 +20,6 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.robots import DexforceW1Cfg
-
 from embodichain.lab.sim.utility.workspace_analyzer.workspace_analyzer import (
     WorkspaceAnalyzer,
 )
@@ -43,7 +42,7 @@
     print("Example: Joint Space Analysis")
 
     wa_joint = WorkspaceAnalyzer(robot=robot, sim_manager=sim_manager)
-    results_joint = wa_joint.analyze(num_samples=3000, visualize=True)
+    results_joint = wa_joint.analyze(num_samples=30000, visualize=True)
 
     print(f"\nJoint Space Results:")
     print(
diff --git a/examples/sim/utility/workspace_analyzer/analyze_plane_workspace.py b/examples/sim/utility/workspace_analyzer/analyze_plane_workspace.py
index 957b3535..8bd1b4ce 100644
--- a/examples/sim/utility/workspace_analyzer/analyze_plane_workspace.py
+++ b/examples/sim/utility/workspace_analyzer/analyze_plane_workspace.py
@@ -25,19 +25,21 @@
     WorkspaceAnalyzerConfig,
     AnalysisMode,
 )
-from embodichain.lab.sim.cfg import MarkerCfg
+from embodichain.lab.sim.cfg import MarkerCfg, RenderCfg
 from embodichain.lab.sim.utility.workspace_analyzer.configs.visualization_config import (
     VisualizationConfig,
 )
 
-
 if __name__ == "__main__":
     # Example usage
     np.set_printoptions(precision=5, suppress=True)
     torch.set_printoptions(precision=5, sci_mode=False)
 
     config = SimulationManagerCfg(
-        headless=False, sim_device="cpu", width=1080, height=1080
+        headless=False,
+        sim_device="cpu",
+        width=1080,
+        height=1080,
     )
     sim = SimulationManager(config)
     sim.set_manual_update(False)
diff --git a/pyproject.toml b/pyproject.toml
index 25b15290..de1e5deb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ dynamic = ["version"]
 # Core install dependencies (kept from requirements.txt). Some VCS links are
 # specified using PEP 508 direct references where present.
 dependencies = [
-  "dexsim_engine==0.3.11",
+  "dexsim_engine==0.4.1",
   "setuptools>=78.1.1",
   "gymnasium>=0.29.1",
   "langchain",
@@ -36,29 +36,34 @@ dependencies = [
   "pin-pink",
   "casadi",
   "qpsolvers[osqp]==4.8.1",
-  "pytorch_kinematics==0.7.6",
+  "pytorch_kinematics==0.10.0",
   "polars==1.31.0",
   "PyYAML>=6.0",
-  "accelerate>=1.10.0",
   "wandb>=0.21.0",
   "tensorboard>=2.20.0",
-  "transformers>=4.53.0",
-  "diffusers>=0.32.1",
-  "deepspeed>=0.16.2",
   "ortools",
   "prettytable",
-  "black==24.3.0",
+  "black==26.3.1",
   "fvcore",
   "h5py",
   "tensordict",
-  "viser==1.0.21"
+  "viser==1.0.21",
+  "lerobot>=0.4.4"
 ]
 
 [project.optional-dependencies]
-lerobot = [
-  "lerobot==0.4.4"
+gensim = [
+  "bpy",
+  "pyrender==0.1.45"
 ]
 
+[tool.uv.sources]
+bpy = { index = "blender" }
+
+[[tool.uv.index]]
+name = "blender"
+url = "https://download.blender.org/pypi/"
+
 [tool.setuptools.dynamic]
 version = { file = ["VERSION"] }
 
diff --git a/scripts/benchmark/__init__.py b/scripts/benchmark/__init__.py
new file mode 100644
index 00000000..dd650e90
--- /dev/null
+++ b/scripts/benchmark/__init__.py
@@ -0,0 +1,15 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
diff --git a/scripts/benchmark/__main__.py b/scripts/benchmark/__main__.py
new file mode 100644
index 00000000..ee9eac0a
--- /dev/null
+++ b/scripts/benchmark/__main__.py
@@ -0,0 +1,103 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Unified CLI entry point for ``python -m scripts.benchmark``.
+
+Usage examples::
+
+    python -m scripts.benchmark rl --tasks push_cube --algorithms ppo --suite default
+    python -m scripts.benchmark rl --rebuild-report-only
+    python -m scripts.benchmark robotics-kinematic-solver -s pytorch
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+
+def _run_robotics_kinematic_solver_cli(args: argparse.Namespace) -> None:
+    """Run robotics kinematic solver benchmark with forwarded CLI args."""
+    from scripts.benchmark.robotics.kinematic_solver.run_benchmark import (
+        run_all_benchmarks,
+    )
+
+    run_all_benchmarks(selected_solvers=args.solvers)
+
+
+def _run_rl_cli(_: argparse.Namespace) -> None:
+    """Run RL benchmark CLI entrypoint."""
+    from scripts.benchmark.rl.run_benchmark import main as rl_main
+
+    rl_main()
+
+
+def main() -> None:
+    """Dispatch to the appropriate benchmark sub-command CLI."""
+    parser = argparse.ArgumentParser(
+        prog="scripts.benchmark",
+        description="EmbodiChain benchmark command-line interface.",
+    )
+    subparsers = parser.add_subparsers(dest="command")
+
+    # -- rl ------------------------------------------------------------------
+    rl_parser = subparsers.add_parser(
+        "rl",
+        help="Run RL benchmark: train, evaluate, aggregate, and report results.",
+    )
+    rl_parser.set_defaults(func=_run_rl_cli)
+
+    # -- robotics-kinematic-solver -------------------------------------------
+    robotics_ks_parser = subparsers.add_parser(
+        "robotics-kinematic-solver",
+        help="Benchmark the OPW kinematic solver (FK/IK accuracy and speed).",
+    )
+    robotics_ks_parser.add_argument(
+        "--solvers",
+        "-s",
+        nargs="+",
+        choices=("opw", "pytorch", "all"),
+        default=["all"],
+        help="Solvers to benchmark. Use one or more of: opw, pytorch, all.",
+    )
+    robotics_ks_parser.set_defaults(func=_run_robotics_kinematic_solver_cli)
+
+    # -- Parse ---------------------------------------------------------------
+    # If no sub-command is given, print help and exit.
+    if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"):
+        parser.print_help()
+        sys.exit(0)
+
+    # Determine which sub-command was selected, then reconstruct argv so
+    # that each sub-command's entry point can call ``parse_args()`` normally.
+    known, _ = parser.parse_known_args()
+
+    if hasattr(known, "func"):
+        # Rewrite sys.argv so the sub-command's argparse sees only its own args.
+        subcommand_argv = [f"scripts.benchmark {sys.argv[1]}"] + sys.argv[2:]
+        original_argv = sys.argv
+        sys.argv = subcommand_argv
+        try:
+            known.func(known)
+        finally:
+            sys.argv = original_argv
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/benchmark/opw_solver.py b/scripts/benchmark/opw_solver.py
deleted file mode 100644
index c248eaba..00000000
--- a/scripts/benchmark/opw_solver.py
+++ /dev/null
@@ -1,155 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ----------------------------------------------------------------------------
-
-import torch
-import numpy as np
-import warp as wp
-from scipy.spatial.transform import Rotation
-from embodichain.lab.sim.solvers.opw_solver import OPWSolver, OPWSolverCfg
-from typing import Tuple, List
-import time
-
-
-def get_pose_err(matrix_a: np.ndarray, matrix_b: np.ndarray) -> Tuple[float, float]:
-    t_err = np.linalg.norm(matrix_a[:3, 3] - matrix_b[:3, 3])
-    relative_rot = matrix_a[:3, :3].T @ matrix_b[:3, :3]
-    cos_angle = (np.trace(relative_rot) - 1) / 2.0
-    cos_angle = np.clip(cos_angle, -1.0, 1.0)
-    r_err = np.arccos(cos_angle)
-    return t_err, r_err
-
-
-def get_poses_err(
-    matrix_a_list: List[np.ndarray], matrix_b_list: List[np.ndarray]
-) -> Tuple[float, float]:
-    t_errs = []
-    r_errs = []
-    for mat_a, mat_b in zip(matrix_a_list, matrix_b_list):
-        t_err, r_err = get_pose_err(mat_a, mat_b)
-        t_errs.append(t_err)
-        r_errs.append(r_err)
-    return np.mean(t_errs), np.mean(r_errs)
-
-
-def check_opw_solver(solver_warp, solver_py_opw, n_samples=1000):
-    DOF = 6
-    qpos_np = np.random.uniform(low=-np.pi, high=np.pi, size=(n_samples, DOF)).astype(
-        float
-    )
-    qpos = torch.tensor(qpos_np, device=torch.device("cuda"), dtype=torch.float32)
-    xpos = solver_warp.get_fk(qpos)
-    qpos_seed = torch.tensor(
-        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-        device=torch.device("cuda"),
-        dtype=torch.float32,
-    )
-
-    warp_ik_start_time = time.time()
-    warp_ik_success, warp_ik_qpos = solver_warp.get_ik(
-        xpos,
-        qpos_seed=qpos_seed,
-        initial_guess=qpos,
-        # return_all_solutions=True,
-    )
-    warp_cost_time = time.time() - warp_ik_start_time
-
-    # TODO: debug code
-    # warp_ik_success_np = warp_ik_success.cpu().numpy()
-    # warp_ik_failure_indices = np.where(warp_ik_success_np == False)[0]
-    # if len(warp_ik_failure_indices) > 0:
-    #     failure_qpos = qpos_np[warp_ik_failure_indices]
-    #     failure_xpos = xpos.cpu().numpy()[warp_ik_failure_indices]
-    #     print("=====warp_ik_failure_qpos:\n", repr(failure_qpos))
-    #     print("=====warp_ik_failure_xpos:\n", repr(failure_xpos))
-
-    #     print("=====xpos:\n", repr(xpos.cpu().numpy()))
-    #     print("=====warp_ik_qpos:\n", repr(warp_ik_qpos.cpu().numpy()))
-    #     print("=====warp_ik_success:\n", repr(warp_ik_success.cpu().numpy()))
-
-    check_xpos = solver_warp.get_fk(warp_ik_qpos)
-    warp_t_mean_err, warp_r_mean_err = get_poses_err(
-        [x.cpu().numpy() for x in xpos],
-        [x.cpu().numpy() for x in check_xpos],
-    )
-
-    py_opw_ik_start_time = time.time()
-    py_opw_ik_success, py_opw_ik_qpos = solver_py_opw.get_ik(
-        xpos, qpos_seed=qpos_seed, initial_guess=qpos
-    )
-    py_opw_cost_time = time.time() - py_opw_ik_start_time
-
-    check_xpos = solver_warp.get_fk(py_opw_ik_qpos.to(torch.device("cuda")))
-    py_opw_t_mean_err, py_opw_r_mean_err = get_poses_err(
-        [x.cpu().numpy() for x in xpos],
-        [x.cpu().numpy() for x in check_xpos],
-    )
-
-    return (
-        warp_cost_time,
-        warp_t_mean_err,
-        warp_r_mean_err,
-        py_opw_cost_time,
-        py_opw_t_mean_err,
-        py_opw_r_mean_err,
-    )
-
-
-def benchmark_opw_solver():
-    cfg = OPWSolverCfg()
-    cfg.a1 = 400.333
-    cfg.a2 = -251.449
-    cfg.b = 0.0
-    cfg.c1 = 830
-    cfg.c2 = 1177.556
-    cfg.c3 = 1443.593
-    cfg.c4 = 230
-    cfg.offsets = (
-        0.0,
-        82.21350356417211 * np.pi / 180.0,
-        -167.21710113148163 * np.pi / 180.0,
-        0.0,
-        0.0,
-        0.0,
-    )
-    cfg.flip_axes = (True, False, True, True, False, True)
-    cfg.has_parallelogram = False
-
-    # TODO: ignore pk_serial_chain for OPW
-    solver_warp = cfg.init_solver(device=torch.device("cuda"), pk_serial_chain="")
-    solver_py_opw = cfg.init_solver(device=torch.device("cpu"), pk_serial_chain="")
-    n_samples = [100, 1000, 10000, 100000]
-    # n_samples = [100]
-    for n_sample in n_samples:
-        # check_opw_solver(solver_warp, solver_py_opw, device=device, n_samples=n_sample)
-        (
-            warp_cost_time,
-            warp_t_mean_err,
-            warp_r_mean_err,
-            py_opw_cost_time,
-            py_opw_t_mean_err,
-            py_opw_r_mean_err,
-        ) = check_opw_solver(solver_warp, solver_py_opw, n_samples=n_sample)
-        print(f"===warp OPW Solver FK/IK test over {n_sample} samples:")
-        print(f"  Warp IK time: {warp_cost_time * 1000:.6f} ms")
-        print(f"Translation mean error: {warp_t_mean_err*1000:.6f} mm")
-        print(f"Rotation mean error: {warp_r_mean_err*180/np.pi:.6f} degrees")
-        print(f"===Py OPW IK time: {py_opw_cost_time * 1000:.6f} ms")
-        print(f"Translation mean error: {py_opw_t_mean_err*1000:.6f} mm")
-        print(f"Rotation mean error: {py_opw_r_mean_err*180/np.pi:.6f} degrees")
-
-
-if __name__ == "__main__":
-    benchmark_opw_solver()
diff --git a/scripts/benchmark/rl/__init__.py b/scripts/benchmark/rl/__init__.py
new file mode 100644
index 00000000..b142c88c
--- /dev/null
+++ b/scripts/benchmark/rl/__init__.py
@@ -0,0 +1,21 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from .runner import BenchmarkRunner
+
+__all__ = ["BenchmarkRunner"]
diff --git a/scripts/benchmark/rl/algorithms/__init__.py b/scripts/benchmark/rl/algorithms/__init__.py
new file mode 100644
index 00000000..dd650e90
--- /dev/null
+++ b/scripts/benchmark/rl/algorithms/__init__.py
@@ -0,0 +1,15 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
diff --git a/scripts/benchmark/rl/algorithms/grpo.yaml b/scripts/benchmark/rl/algorithms/grpo.yaml
new file mode 100644
index 00000000..e33c673b
--- /dev/null
+++ b/scripts/benchmark/rl/algorithms/grpo.yaml
@@ -0,0 +1,24 @@
+name: grpo
+config:
+  policy:
+    name: actor_only
+    actor:
+      type: mlp
+      network_cfg:
+        hidden_sizes: [256, 256]
+        activation: relu
+  algorithm:
+    name: grpo
+    cfg:
+      learning_rate: 0.0001
+      n_epochs: 10
+      batch_size: 8192
+      gamma: 0.99
+      clip_coef: 0.2
+      ent_coef: 0.01
+      kl_coef: 0.0
+      group_size: 4
+      eps: 1.0e-8
+      reset_every_rollout: true
+      truncate_at_first_done: true
+      max_grad_norm: 0.5
diff --git a/scripts/benchmark/rl/algorithms/ppo.yaml b/scripts/benchmark/rl/algorithms/ppo.yaml
new file mode 100644
index 00000000..361c9386
--- /dev/null
+++ b/scripts/benchmark/rl/algorithms/ppo.yaml
@@ -0,0 +1,26 @@
+name: ppo
+config:
+  policy:
+    name: actor_critic
+    actor:
+      type: mlp
+      network_cfg:
+        hidden_sizes: [256, 256]
+        activation: relu
+    critic:
+      type: mlp
+      network_cfg:
+        hidden_sizes: [256, 256]
+        activation: relu
+  algorithm:
+    name: ppo
+    cfg:
+      learning_rate: 0.0001
+      n_epochs: 10
+      batch_size: 8192
+      gamma: 0.99
+      gae_lambda: 0.95
+      clip_coef: 0.2
+      ent_coef: 0.01
+      vf_coef: 0.5
+      max_grad_norm: 0.5
diff --git a/scripts/benchmark/rl/config.py b/scripts/benchmark/rl/config.py
new file mode 100644
index 00000000..da5131d3
--- /dev/null
+++ b/scripts/benchmark/rl/config.py
@@ -0,0 +1,70 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+BENCHMARK_ROOT = Path(__file__).resolve().parent
+
+
+def load_yaml(path: str | Path) -> dict[str, Any]:
+    """Load a YAML file into a dictionary."""
+    with Path(path).open("r", encoding="utf-8") as file:
+        data = yaml.safe_load(file) or {}
+    if not isinstance(data, dict):
+        raise TypeError(f"Expected mapping in YAML file {path}, got {type(data)!r}.")
+    return data
+
+
+def deep_update(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
+    """Recursively merge `override` into `base` and return a new mapping."""
+    merged = deepcopy(base)
+    for key, value in override.items():
+        if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+            merged[key] = deep_update(merged[key], value)
+        else:
+            merged[key] = deepcopy(value)
+    return merged
+
+
+def load_task_spec(name: str) -> dict[str, Any]:
+    """Load a benchmark task specification by name."""
+    return load_yaml(BENCHMARK_ROOT / "tasks" / f"{name}.yaml")
+
+
+def load_algorithm_spec(name: str) -> dict[str, Any]:
+    """Load a benchmark algorithm specification by name."""
+    return load_yaml(BENCHMARK_ROOT / "algorithms" / f"{name}.yaml")
+
+
+def load_suite_spec(name: str = "default") -> dict[str, Any]:
+    """Load a benchmark suite specification by name."""
+    return load_yaml(BENCHMARK_ROOT / "suites" / f"{name}.yaml")
+
+
+__all__ = [
+    "BENCHMARK_ROOT",
+    "deep_update",
+    "load_algorithm_spec",
+    "load_suite_spec",
+    "load_task_spec",
+    "load_yaml",
+]
diff --git a/scripts/benchmark/rl/metrics.py b/scripts/benchmark/rl/metrics.py
new file mode 100644
index 00000000..f1ce9185
--- /dev/null
+++ b/scripts/benchmark/rl/metrics.py
@@ -0,0 +1,253 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from collections import defaultdict
+from math import isnan
+from statistics import mean, pstdev
+from typing import Any
+
+
+def _iter_valid_threshold_points(
+    eval_history: list[dict[str, float]],
+    metric_key: str,
+):
+    """Yield `(step, metric)` pairs with valid numeric values."""
+    for item in eval_history:
+        metric_value = item.get(metric_key)
+        step_value = item.get("global_step")
+        if metric_value is None or step_value is None:
+            continue
+        if not isinstance(metric_value, (int, float)) or not isinstance(
+            step_value, (int, float)
+        ):
+            continue
+        if isnan(metric_value):
+            continue
+        yield int(step_value), float(metric_value)
+
+
+def compute_final_metric_stable(
+    eval_history: list[dict[str, float]],
+    metric_key: str,
+    window_size: int = 3,
+) -> float | None:
+    """Return the mean of the last `window_size` valid metric values."""
+    valid_values = [
+        metric_value
+        for _, metric_value in _iter_valid_threshold_points(eval_history, metric_key)
+    ]
+    if not valid_values:
+        return None
+    effective_window = max(1, window_size)
+    return mean(valid_values[-effective_window:])
+
+
+def compute_steps_to_threshold_first_hit(
+    eval_history: list[dict[str, float]],
+    metric_key: str,
+    threshold: float,
+) -> int | None:
+    """Return the first step where `metric_key` reaches `threshold`."""
+    for step_value, metric_value in _iter_valid_threshold_points(
+        eval_history, metric_key
+    ):
+        if metric_value >= threshold:
+            return step_value
+    return None
+
+
+def compute_steps_to_threshold_sustained(
+    eval_history: list[dict[str, float]],
+    metric_key: str,
+    threshold: float,
+    sustain_count: int = 3,
+) -> int | None:
+    """Return the first step where the threshold is met for `sustain_count` evals."""
+    if sustain_count <= 1:
+        return compute_steps_to_threshold_first_hit(eval_history, metric_key, threshold)
+
+    consecutive_hits = 0
+    first_step_in_window: int | None = None
+    for step_value, metric_value in _iter_valid_threshold_points(
+        eval_history, metric_key
+    ):
+        if metric_value >= threshold:
+            consecutive_hits += 1
+            if first_step_in_window is None:
+                first_step_in_window = step_value
+            if consecutive_hits >= sustain_count:
+                return first_step_in_window
+        else:
+            consecutive_hits = 0
+            first_step_in_window = None
+    return None
+
+
+def aggregate_runs(run_results: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Aggregate run results by task and algorithm."""
+    grouped: dict[tuple[str, str], list[dict[str, Any]]] = defaultdict(list)
+    for result in run_results:
+        grouped[(result["task"], result["algorithm"])].append(result)
+
+    summaries: list[dict[str, Any]] = []
+    for (task, algorithm), runs in sorted(grouped.items()):
+        summary: dict[str, Any] = {
+            "task": task,
+            "algorithm": algorithm,
+            "num_runs": len(runs),
+        }
+        scalar_keys = {
+            "final_reward",
+            "final_success_rate",
+            "final_success_rate_stable",
+            "final_episode_length",
+            "training_fps",
+            "environment_fps",
+            "peak_gpu_memory_mb",
+        }
+        for key in scalar_keys:
+            values = [
+                float(run[key])
+                for run in runs
+                if isinstance(run.get(key), (int, float)) and not isnan(run[key])
+            ]
+            if values:
+                summary[f"{key}_mean"] = mean(values)
+                summary[f"{key}_std"] = pstdev(values) if len(values) > 1 else 0.0
+        step_keys = {
+            "steps_to_success_threshold",
+            "steps_to_success_threshold_first_hit",
+        }
+        for step_key in step_keys:
+            steps = [
+                int(run[step_key]) for run in runs if isinstance(run.get(step_key), int)
+            ]
+            if steps:
+                summary[f"{step_key}_mean"] = mean(steps)
+                summary[f"{step_key}_std"] = pstdev(steps) if len(steps) > 1 else 0.0
+        summaries.append(summary)
+
+    return summaries
+
+
+def _valid_float(value: Any) -> float | None:
+    if isinstance(value, (int, float)) and not isnan(float(value)):
+        return float(value)
+    return None
+
+
+def build_leaderboard(
+    aggregate_results: list[dict[str, Any]],
+    run_results: list[dict[str, Any]] | None = None,
+) -> list[dict[str, Any]]:
+    """Build leaderboard entries from aggregated benchmark summaries."""
+    grouped_summary: dict[str, list[dict[str, Any]]] = defaultdict(list)
+    for item in aggregate_results:
+        grouped_summary[item["algorithm"]].append(item)
+
+    grouped_runs: dict[str, list[dict[str, Any]]] = defaultdict(list)
+    for item in run_results or []:
+        grouped_runs[item["algorithm"]].append(item)
+
+    leaderboard: list[dict[str, Any]] = []
+    for algorithm, items in grouped_summary.items():
+        stable_success_values = [
+            float(item["final_success_rate_stable_mean"])
+            for item in items
+            if isinstance(item.get("final_success_rate_stable_mean"), (int, float))
+            and not isnan(item["final_success_rate_stable_mean"])
+        ]
+        success_values = [
+            float(item["final_success_rate_mean"])
+            for item in items
+            if isinstance(item.get("final_success_rate_mean"), (int, float))
+            and not isnan(item["final_success_rate_mean"])
+        ]
+        reward_values = [
+            float(item["final_reward_mean"])
+            for item in items
+            if isinstance(item.get("final_reward_mean"), (int, float))
+            and not isnan(item["final_reward_mean"])
+        ]
+        score = mean(stable_success_values) if stable_success_values else float("nan")
+        steps_values = [
+            float(item["steps_to_success_threshold_mean"])
+            for item in items
+            if isinstance(item.get("steps_to_success_threshold_mean"), (int, float))
+            and not isnan(item["steps_to_success_threshold_mean"])
+        ]
+        run_success_values = [
+            float(run["final_success_rate"])
+            for run in grouped_runs.get(algorithm, [])
+            if _valid_float(run.get("final_success_rate")) is not None
+        ]
+        task_scores = {
+            item["task"]: float(item["final_success_rate_stable_mean"])
+            for item in items
+            if _valid_float(item.get("final_success_rate_stable_mean")) is not None
+        }
+        raw_task_scores = {
+            item["task"]: float(item["final_success_rate_mean"])
+            for item in items
+            if _valid_float(item.get("final_success_rate_mean")) is not None
+        }
+        leaderboard.append(
+            {
+                "algorithm": algorithm,
+                "score": score,
+                "steps_to_success_threshold": (
+                    mean(steps_values) if steps_values else float("nan")
+                ),
+                "success_rate_std": (
+                    pstdev(run_success_values) if len(run_success_values) > 1 else 0.0
+                ),
+                "avg_success_rate": (
+                    mean(success_values) if success_values else float("nan")
+                ),
+                "avg_success_rate_stable": score,
+                "avg_final_reward": (
+                    mean(reward_values) if reward_values else float("nan")
+                ),
+                "tasks_covered": len(items),
+                "tasks": task_scores,
+                "tasks_raw": raw_task_scores,
+            }
+        )
+
+    leaderboard.sort(
+        key=lambda item: (
+            (
+                -(item["score"])
+                if isinstance(item["score"], float) and not isnan(item["score"])
+                else float("inf")
+            ),
+            item["algorithm"],
+        )
+    )
+    for index, item in enumerate(leaderboard, start=1):
+        item["rank"] = index
+    return leaderboard
+
+
+__all__ = [
+    "aggregate_runs",
+    "build_leaderboard",
+    "compute_final_metric_stable",
+    "compute_steps_to_threshold_first_hit",
+    "compute_steps_to_threshold_sustained",
+]
diff --git a/scripts/benchmark/rl/plots.py b/scripts/benchmark/rl/plots.py
new file mode 100644
index 00000000..e84f6964
--- /dev/null
+++ b/scripts/benchmark/rl/plots.py
@@ -0,0 +1,211 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from collections import defaultdict
+from math import isnan
+from pathlib import Path
+from statistics import mean
+from typing import Any
+
+COLORS = ["#1768ac", "#f26419", "#2a9134", "#c44536", "#6a4c93", "#1982c4"]
+
+
+def _svg_header(width: int, height: int) -> list[str]:
+    return [
+        f'<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">',
+        '<rect width="100%" height="100%" fill="#fffdf8" />',
+    ]
+
+
+def _line_chart_svg(
+    title: str,
+    series: dict[str, list[tuple[float, float]]],
+    width: int = 900,
+    height: int = 420,
+) -> str:
+    margin_left = 70
+    margin_right = 20
+    margin_top = 40
+    margin_bottom = 50
+    plot_width = width - margin_left - margin_right
+    plot_height = height - margin_top - margin_bottom
+
+    all_points = [point for points in series.values() for point in points]
+    xs = [point[0] for point in all_points] or [0.0, 1.0]
+    ys = [point[1] for point in all_points if not isnan(point[1])] or [0.0, 1.0]
+    x_min, x_max = min(xs), max(xs)
+    y_min, y_max = min(ys), max(ys)
+    if x_min == x_max:
+        x_max = x_min + 1.0
+    if y_min == y_max:
+        y_max = y_min + 1.0
+
+    def tx(x: float) -> float:
+        return margin_left + (x - x_min) / (x_max - x_min) * plot_width
+
+    def ty(y: float) -> float:
+        return margin_top + plot_height - (y - y_min) / (y_max - y_min) * plot_height
+
+    lines = _svg_header(width, height)
+    lines.extend(
+        [
+            f'<text x="{margin_left}" y="24" font-size="20" font-family="Arial" fill="#222">{title}</text>',
+            f'<line x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{margin_top + plot_height}" stroke="#333" stroke-width="1.5" />',
+            f'<line x1="{margin_left}" y1="{margin_top + plot_height}" x2="{margin_left + plot_width}" y2="{margin_top + plot_height}" stroke="#333" stroke-width="1.5" />',
+        ]
+    )
+    for idx in range(5):
+        y_val = y_min + (y_max - y_min) * idx / 4.0
+        y_pos = ty(y_val)
+        lines.append(
+            f'<line x1="{margin_left}" y1="{y_pos:.2f}" x2="{margin_left + plot_width}" y2="{y_pos:.2f}" stroke="#e8e1d6" stroke-width="1" />'
+        )
+        lines.append(
+            f'<text x="10" y="{y_pos + 4:.2f}" font-size="12" font-family="Arial" fill="#555">{y_val:.3f}</text>'
+        )
+
+    for idx, (label, points) in enumerate(sorted(series.items())):
+        color = COLORS[idx % len(COLORS)]
+        polyline_points = " ".join(
+            f"{tx(x):.2f},{ty(y):.2f}" for x, y in points if not isnan(y)
+        )
+        lines.append(
+            f'<polyline fill="none" stroke="{color}" stroke-width="3" points="{polyline_points}" />'
+        )
+        legend_y = margin_top + 18 * idx
+        lines.append(
+            f'<line x1="{width - 180}" y1="{legend_y}" x2="{width - 150}" y2="{legend_y}" stroke="{color}" stroke-width="3" />'
+        )
+        lines.append(
+            f'<text x="{width - 140}" y="{legend_y + 4}" font-size="12" font-family="Arial" fill="#333">{label}</text>'
+        )
+
+    lines.append("</svg>")
+    return "\n".join(lines)
+
+
+def _bar_chart_svg(
+    title: str,
+    items: list[tuple[str, float]],
+    width: int = 900,
+    height: int = 420,
+) -> str:
+    margin_left = 80
+    margin_right = 20
+    margin_top = 40
+    margin_bottom = 80
+    plot_width = width - margin_left - margin_right
+    plot_height = height - margin_top - margin_bottom
+    values = [value for _, value in items if not isnan(value)] or [1.0]
+    value_max = max(values)
+    if value_max <= 0:
+        value_max = 1.0
+
+    lines = _svg_header(width, height)
+    lines.append(
+        f'<text x="{margin_left}" y="24" font-size="20" font-family="Arial" fill="#222">{title}</text>'
+    )
+    bar_width = plot_width / max(len(items), 1)
+    for idx, (label, value) in enumerate(items):
+        color = COLORS[idx % len(COLORS)]
+        bar_height = 0.0 if isnan(value) else (value / value_max) * plot_height
+        x = margin_left + idx * bar_width + 10
+        y = margin_top + plot_height - bar_height
+        lines.append(
+            f'<rect x="{x:.2f}" y="{y:.2f}" width="{max(bar_width - 20, 10):.2f}" height="{bar_height:.2f}" fill="{color}" rx="4" />'
+        )
+        lines.append(
+            f'<text x="{x + max(bar_width - 20, 10) / 2:.2f}" y="{margin_top + plot_height + 18}" text-anchor="middle" font-size="12" font-family="Arial" fill="#333">{label}</text>'
+        )
+        lines.append(
+            f'<text x="{x + max(bar_width - 20, 10) / 2:.2f}" y="{y - 8:.2f}" text-anchor="middle" font-size="12" font-family="Arial" fill="#333">{value:.3f}</text>'
+        )
+    lines.append("</svg>")
+    return "\n".join(lines)
+
+
+def build_plot_artifacts(
+    run_results: list[dict[str, Any]],
+    leaderboard: list[dict[str, Any]],
+    output_dir: str | Path,
+) -> dict[str, str]:
+    """Generate SVG plot artifacts and return named paths."""
+    output = Path(output_dir)
+    output.mkdir(parents=True, exist_ok=True)
+    artifacts: dict[str, str] = {}
+
+    grouped_histories: dict[tuple[str, str], dict[float, list[float]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    grouped_rewards: dict[tuple[str, str], dict[float, list[float]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    for result in run_results:
+        key = (result["task"], result["algorithm"])
+        for item in result.get("eval_history", []):
+            step = item.get("global_step")
+            success = item.get("eval/success_rate")
+            reward = item.get("eval/avg_reward")
+            if isinstance(step, (int, float)) and isinstance(success, (int, float)):
+                grouped_histories[key][float(step)].append(float(success))
+            if isinstance(step, (int, float)) and isinstance(reward, (int, float)):
+                grouped_rewards[key][float(step)].append(float(reward))
+
+    tasks = sorted({result["task"] for result in run_results})
+    for task in tasks:
+        success_series = {}
+        reward_series = {}
+        for task_name, algorithm in sorted(grouped_histories.keys()):
+            if task_name != task:
+                continue
+            success_series[algorithm] = sorted(
+                (step, mean(values))
+                for step, values in grouped_histories[(task_name, algorithm)].items()
+            )
+            reward_series[algorithm] = sorted(
+                (step, mean(values))
+                for step, values in grouped_rewards[(task_name, algorithm)].items()
+            )
+        if success_series:
+            path = output / f"{task}_success_rate.svg"
+            path.write_text(
+                _line_chart_svg(f"{task} Success Rate", success_series),
+                encoding="utf-8",
+            )
+            artifacts[f"{task}_success_rate"] = str(path)
+        if reward_series:
+            path = output / f"{task}_reward.svg"
+            path.write_text(
+                _line_chart_svg(f"{task} Evaluation Reward", reward_series),
+                encoding="utf-8",
+            )
+            artifacts[f"{task}_reward"] = str(path)
+
+    leaderboard_path = output / "leaderboard_score.svg"
+    leaderboard_path.write_text(
+        _bar_chart_svg(
+            "Leaderboard Score",
+            [(item["algorithm"], float(item["score"])) for item in leaderboard],
+        ),
+        encoding="utf-8",
+    )
+    artifacts["leaderboard_score"] = str(leaderboard_path)
+    return artifacts
+
+
+__all__ = ["build_plot_artifacts"]
diff --git a/scripts/benchmark/rl/reporting.py b/scripts/benchmark/rl/reporting.py
new file mode 100644
index 00000000..635123df
--- /dev/null
+++ b/scripts/benchmark/rl/reporting.py
@@ -0,0 +1,292 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import math
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+
+def _fmt(value: Any, digits: int = 3) -> str:
+    if isinstance(value, float):
+        return f"{value:.{digits}f}"
+    return str(value)
+
+
+def _safe_divide(numerator: float, denominator: float) -> float:
+    if denominator <= 0:
+        return float("nan")
+    return numerator / denominator
+
+
+def _sortable_success_rate(item: dict[str, Any]) -> float:
+    value = float(item.get("avg_success_rate", float("nan")))
+    if math.isnan(value):
+        return float("-inf")
+    return value
+
+
+def _build_report_leaderboard_rows(
+    leaderboard: list[dict[str, Any]],
+    aggregate_results: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Build complete leaderboard rows and sort by overall success rate."""
+    by_algorithm: dict[str, dict[str, Any]] = {}
+    for item in leaderboard:
+        algorithm = str(item.get("algorithm", ""))
+        if not algorithm:
+            continue
+        by_algorithm[algorithm] = dict(item)
+
+    grouped_aggregate: dict[str, list[dict[str, Any]]] = defaultdict(list)
+    for item in aggregate_results:
+        algorithm = str(item.get("algorithm", ""))
+        if not algorithm:
+            continue
+        grouped_aggregate[algorithm].append(item)
+
+    for algorithm, items in grouped_aggregate.items():
+        if algorithm in by_algorithm:
+            continue
+
+        success_values = [
+            float(entry["final_success_rate_mean"])
+            for entry in items
+            if isinstance(entry.get("final_success_rate_mean"), (int, float))
+            and not math.isnan(float(entry["final_success_rate_mean"]))
+        ]
+        stable_success_values = [
+            float(entry["final_success_rate_stable_mean"])
+            for entry in items
+            if isinstance(entry.get("final_success_rate_stable_mean"), (int, float))
+            and not math.isnan(float(entry["final_success_rate_stable_mean"]))
+        ]
+        by_algorithm[algorithm] = {
+            "algorithm": algorithm,
+            "avg_success_rate": (
+                sum(success_values) / len(success_values)
+                if success_values
+                else float("nan")
+            ),
+            "avg_success_rate_stable": (
+                sum(stable_success_values) / len(stable_success_values)
+                if stable_success_values
+                else float("nan")
+            ),
+            "score": (
+                sum(stable_success_values) / len(stable_success_values)
+                if stable_success_values
+                else float("nan")
+            ),
+            "tasks_covered": len(items),
+        }
+
+    return sorted(
+        by_algorithm.values(),
+        key=lambda item: (
+            -_sortable_success_rate(item),
+            str(item.get("algorithm", "")),
+        ),
+    )
+
+
+def generate_markdown_report(
+    run_results: list[dict[str, Any]],
+    aggregate_results: list[dict[str, Any]],
+    leaderboard: list[dict[str, Any]],
+    plot_artifacts: dict[str, str],
+    protocol: dict[str, Any] | None,
+    output_path: str | Path,
+) -> Path:
+    """Write a benchmark markdown report with exactly three tables."""
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+
+    ordered_runs = sorted(
+        run_results,
+        key=lambda item: (
+            str(item.get("task", "")),
+            str(item.get("algorithm", "")),
+            int(item.get("seed", 0)),
+        ),
+    )
+
+    lines = [
+        "# RL Benchmark Report",
+        "",
+        f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+        "",
+        "## Benchmark Overview",
+        "",
+    ]
+    if protocol:
+        lines.extend(
+            [
+                f"- device: `{protocol.get('device')}`",
+                f"- headless: `{protocol.get('headless')}`",
+                f"- iterations: `{protocol.get('iterations')}`",
+                f"- buffer_size: `{protocol.get('buffer_size')}`",
+                f"- num_envs: `{protocol.get('num_envs')}`",
+                f"- num_eval_envs: `{protocol.get('num_eval_envs')}`",
+                f"- evaluation_interval: `{protocol.get('evaluation_interval')}`",
+                f"- evaluation_episodes: `{protocol.get('evaluation_episodes')}`",
+                f"- threshold_sustain_count: `{protocol.get('threshold_sustain_count', 3)}`",
+                f"- final_eval_window: `{protocol.get('final_eval_window', 3)}`",
+                "",
+            ]
+        )
+    lines.extend(
+        [
+            "## Time & Memory",
+            "",
+            "| task | algorithm | seed | cost_time_ms | cpu_delta_mb | gpu_delta_mb | peak_gpu_mb | training_fps | env_fps |",
+            "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |",
+        ]
+    )
+    for result in ordered_runs:
+        train_steps = float(result.get("train_steps", float("nan")))
+        training_fps = float(result.get("training_fps", float("nan")))
+        cost_time_ms = _safe_divide(train_steps, training_fps) * 1000.0
+        lines.append(
+            "| {task} | {algorithm} | {seed} | {cost_time_ms} | {cpu_delta} | {gpu_delta} | {peak_gpu} | {train_fps} | {env_fps} |".format(
+                task=result["task"],
+                algorithm=result["algorithm"],
+                seed=result["seed"],
+                cost_time_ms=_fmt(cost_time_ms),
+                cpu_delta=_fmt(result.get("cpu_delta_mb", "n/a")),
+                gpu_delta=_fmt(result.get("gpu_delta_mb", "n/a")),
+                peak_gpu=_fmt(result.get("peak_gpu_memory_mb", float("nan"))),
+                train_fps=_fmt(result.get("training_fps", float("nan"))),
+                env_fps=_fmt(result.get("environment_fps", float("nan")), digits=2),
+            )
+        )
+
+    lines.extend(
+        [
+            "",
+            "## Success & Other Metrics",
+            "",
+            "| task | algorithm | seed | success_rate | stable_success_rate | steps_to_threshold | first_hit | final_reward | final_episode_length |",
+            "| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |",
+        ]
+    )
+    for result in ordered_runs:
+        lines.append(
+            "| {task} | {algorithm} | {seed} | {success} | {stable_success} | {steps} | {first_hit} | {reward} | {episode_len} |".format(
+                task=result["task"],
+                algorithm=result["algorithm"],
+                seed=result["seed"],
+                success=_fmt(result.get("final_success_rate", float("nan"))),
+                stable_success=_fmt(
+                    result.get("final_success_rate_stable", float("nan"))
+                ),
+                steps=_fmt(result.get("steps_to_success_threshold", float("nan"))),
+                first_hit=_fmt(
+                    result.get("steps_to_success_threshold_first_hit", float("nan"))
+                ),
+                reward=_fmt(result.get("final_reward", float("nan"))),
+                episode_len=_fmt(result.get("final_episode_length", float("nan"))),
+            )
+        )
+
+    leaderboard_by_success = _build_report_leaderboard_rows(
+        leaderboard=leaderboard,
+        aggregate_results=aggregate_results,
+    )
+    lines.extend(
+        [
+            "",
+            "## Leaderboard",
+            "",
+            "| rank | algorithm | overall_success_rate | stable_success_rate | score | tasks_covered |",
+            "| ---: | --- | ---: | ---: | ---: | ---: |",
+        ]
+    )
+    for rank, item in enumerate(leaderboard_by_success, start=1):
+        lines.append(
+            "| {rank} | {algorithm} | {success} | {stable_success} | {score} | {tasks} |".format(
+                rank=rank,
+                algorithm=item.get("algorithm", "n/a"),
+                success=_fmt(item.get("avg_success_rate", float("nan"))),
+                stable_success=_fmt(item.get("avg_success_rate_stable", float("nan"))),
+                score=_fmt(item.get("score", float("nan"))),
+                tasks=item.get("tasks_covered", 0),
+            )
+        )
+
+    lines.extend(["", "## Notes", ""])
+    if leaderboard_by_success:
+        top = leaderboard_by_success[0]
+        lines.append(
+            "- Top algorithm by overall success rate: "
+            f"`{top.get('algorithm', 'n/a')}` "
+            f"(success_rate={_fmt(top.get('avg_success_rate', float('nan')))})."
+        )
+    if aggregate_results:
+        lines.append(f"- Aggregate summaries available: `{len(aggregate_results)}`.")
+
+    if plot_artifacts:
+        lines.extend(["", "## Plots", ""])
+    for plot_name, plot_path in sorted(plot_artifacts.items()):
+        relative = Path(plot_path).relative_to(output.parent)
+        lines.append(f"- {plot_name}: ![{plot_name}]({relative.as_posix()})")
+
+    output.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return output
+
+
+def generate_leaderboard_markdown(
+    leaderboard: list[dict[str, Any]],
+    output_path: str | Path,
+) -> Path:
+    """Write a dedicated leaderboard markdown artifact sorted by success rate."""
+    output = Path(output_path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    leaderboard_by_success = sorted(
+        leaderboard,
+        key=lambda item: (
+            -_sortable_success_rate(item),
+            str(item.get("algorithm", "")),
+        ),
+    )
+    lines = [
+        "# Benchmark Leaderboard",
+        "",
+        "| Rank | Algorithm | Score | Steps To Threshold (Sustained) | Success Rate Std | Avg Success Rate | Avg Stable Success Rate | Avg Final Reward | Tasks |",
+        "| ---: | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |",
+    ]
+    for rank, item in enumerate(leaderboard_by_success, start=1):
+        lines.append(
+            "| {rank} | {algorithm} | {score} | {steps} | {std} | {success} | {stable_success} | {reward} | {tasks} |".format(
+                rank=rank,
+                algorithm=item["algorithm"],
+                score=_fmt(item.get("score", float("nan"))),
+                steps=_fmt(item.get("steps_to_success_threshold", float("nan"))),
+                std=_fmt(item.get("success_rate_std", float("nan"))),
+                success=_fmt(item.get("avg_success_rate", float("nan"))),
+                stable_success=_fmt(item.get("avg_success_rate_stable", float("nan"))),
+                reward=_fmt(item.get("avg_final_reward", float("nan"))),
+                tasks=item.get("tasks_covered", 0),
+            )
+        )
+    output.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return output
+
+
+__all__ = ["generate_leaderboard_markdown", "generate_markdown_report"]
diff --git a/scripts/benchmark/rl/run_benchmark.py b/scripts/benchmark/rl/run_benchmark.py
new file mode 100644
index 00000000..bd85e12f
--- /dev/null
+++ b/scripts/benchmark/rl/run_benchmark.py
@@ -0,0 +1,106 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Run RL benchmark training/evaluation and generate one markdown report.
+
+Run: python -m scripts.benchmark.rl.run_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+
+from .runner import BenchmarkRunner
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse CLI arguments for full benchmark execution."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--tasks", nargs="*", default=None)
+    parser.add_argument("--algorithms", nargs="*", default=None)
+    parser.add_argument("--seeds", nargs="*", type=int, default=None)
+    parser.add_argument("--suite", type=str, default="default")
+    parser.add_argument(
+        "--output-root", type=str, default="scripts/benchmark/rl/reports"
+    )
+    parser.add_argument("--device", type=str, default=None)
+    parser.add_argument("--iterations", type=int, default=None)
+    parser.add_argument("--buffer-size", type=int, default=None)
+    parser.add_argument("--evaluation-interval", type=int, default=None)
+    parser.add_argument("--evaluation-episodes", type=int, default=None)
+    parser.add_argument("--num-envs", type=int, default=None)
+    parser.add_argument("--num-eval-envs", type=int, default=None)
+    parser.add_argument("--headless", action="store_true")
+    parser.add_argument("--skip-existing", action="store_true")
+    parser.add_argument("--rebuild-report-only", action="store_true")
+    return parser.parse_args()
+
+
+def main() -> None:
+    """Train, evaluate, aggregate, and report benchmark results."""
+    args = parse_args()
+    overrides = {
+        key: value
+        for key, value in {
+            "device": args.device,
+            "iterations": args.iterations,
+            "buffer_size": args.buffer_size,
+            "evaluation_interval": args.evaluation_interval,
+            "evaluation_episodes": args.evaluation_episodes,
+            "num_envs": args.num_envs,
+            "num_eval_envs": args.num_eval_envs,
+            "headless": args.headless if args.headless else None,
+        }.items()
+        if value is not None
+    }
+    runner = BenchmarkRunner(
+        tasks=args.tasks,
+        algorithms=args.algorithms,
+        seeds=args.seeds,
+        suite=args.suite,
+        output_root=args.output_root,
+        overrides=overrides,
+    )
+
+    if args.rebuild_report_only:
+        run_results = runner.collect_existing_run_results()
+        if not run_results:
+            training_runs = runner.collect_existing_training_runs()
+            if training_runs:
+                run_results = runner.run_evaluation(training_runs)
+            else:
+                raise SystemExit(
+                    "No compatible existing benchmark results were found for the requested jobs under "
+                    f"{runner.output_root / 'runs'}. "
+                    "Run once without --rebuild-report-only to generate artifacts, "
+                    "or pass --output-root to the directory containing existing runs."
+                )
+    else:
+        existing_results = (
+            runner.collect_existing_run_results() if args.skip_existing else []
+        )
+        training_runs = runner.run_training(skip_existing=args.skip_existing)
+        new_results = runner.run_evaluation(training_runs)
+        run_results = runner.merge_run_results(existing_results, new_results)
+
+    aggregate_result = runner.aggregate_results(run_results)
+    leaderboard = runner.update_leaderboard(aggregate_result, run_results)
+    report_path = runner.generate_report(run_results, aggregate_result, leaderboard)
+    print(f"Markdown report saved: {report_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/benchmark/rl/runner.py b/scripts/benchmark/rl/runner.py
new file mode 100644
index 00000000..84dcda87
--- /dev/null
+++ b/scripts/benchmark/rl/runner.py
@@ -0,0 +1,415 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import json
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+from .config import deep_update, load_algorithm_spec, load_suite_spec, load_task_spec
+from .metrics import (
+    aggregate_runs,
+    build_leaderboard,
+    compute_final_metric_stable,
+    compute_steps_to_threshold_first_hit,
+    compute_steps_to_threshold_sustained,
+)
+from .plots import build_plot_artifacts
+from .reporting import generate_leaderboard_markdown, generate_markdown_report
+from .runtime import dump_json, evaluate_checkpoint, train_with_config
+
+
+class BenchmarkRunner:
+    """Coordinate benchmark training, evaluation, aggregation, and reporting."""
+
+    def __init__(
+        self,
+        tasks: list[str] | None = None,
+        algorithms: list[str] | None = None,
+        seeds: list[int] | None = None,
+        suite: str = "default",
+        output_root: str | Path = "benchmark/reports",
+        overrides: dict[str, Any] | None = None,
+    ) -> None:
+        suite_spec = load_suite_spec(suite)
+        self.suite = suite
+        self.tasks = tasks or list(suite_spec["tasks"])
+        self.algorithms = algorithms or list(suite_spec["algorithms"])
+        self.seeds = seeds or list(suite_spec["seeds"])
+        self.protocol = deepcopy(suite_spec.get("protocol", {}))
+        if overrides:
+            self.protocol = deep_update(self.protocol, overrides)
+        self.output_root = Path(output_root)
+
+    def build_run_config(
+        self,
+        task_name: str,
+        algorithm_name: str,
+        seed: int,
+    ) -> dict[str, Any]:
+        task_spec = load_task_spec(task_name)
+        algorithm_spec = load_algorithm_spec(algorithm_name)
+
+        cfg = deep_update(task_spec["base_config"], algorithm_spec["config"])
+        cfg["trainer"]["exp_name"] = f"{task_name}_{algorithm_name}_seed{seed}"
+        cfg["trainer"]["seed"] = seed
+        train_eval_enabled = bool(task_spec.get("train_eval_enabled", True))
+        cfg["trainer"]["enable_eval"] = train_eval_enabled
+        if train_eval_enabled:
+            cfg["trainer"]["eval_freq"] = int(self.protocol["evaluation_interval"])
+            cfg["trainer"]["num_eval_episodes"] = int(
+                self.protocol["evaluation_episodes"]
+            )
+        cfg["trainer"]["iterations"] = int(self.protocol["iterations"])
+        cfg["trainer"]["buffer_size"] = int(self.protocol["buffer_size"])
+        cfg["trainer"]["num_envs"] = int(self.protocol["num_envs"])
+        cfg["trainer"]["num_eval_envs"] = int(self.protocol["num_eval_envs"])
+        cfg["trainer"]["device"] = str(self.protocol["device"])
+        cfg["trainer"]["headless"] = bool(self.protocol["headless"])
+        cfg["trainer"]["save_freq"] = int(self.protocol["save_interval"])
+        cfg["trainer"]["use_wandb"] = False
+        return cfg
+
+    def _iter_jobs(self) -> list[tuple[str, str, int]]:
+        jobs = []
+        for task_name in self.tasks:
+            for algorithm_name in self.algorithms:
+                for seed in self.seeds:
+                    jobs.append((task_name, algorithm_name, seed))
+        return jobs
+
+    def _run_dir(self, task_name: str, algorithm_name: str, seed: int) -> Path:
+        return self.output_root / "runs" / task_name / algorithm_name / f"seed_{seed}"
+
+    @staticmethod
+    def _job_key(
+        task_name: str, algorithm_name: str, seed: int
+    ) -> tuple[str, str, int]:
+        return (task_name, algorithm_name, int(seed))
+
+    @staticmethod
+    def _load_json_artifact(path: str | Path) -> dict[str, Any] | None:
+        artifact_path = Path(path)
+        if not artifact_path.exists():
+            return None
+        data = json.loads(artifact_path.read_text(encoding="utf-8"))
+        if not isinstance(data, dict):
+            raise TypeError(
+                f"Expected JSON object at {artifact_path}, got {type(data)!r}."
+            )
+        return data
+
+    @staticmethod
+    def _record_matches_job(
+        record: dict[str, Any],
+        task_name: str,
+        algorithm_name: str,
+        seed: int,
+    ) -> bool:
+        return (
+            record.get("task") == task_name
+            and record.get("algorithm") == algorithm_name
+            and int(record.get("seed", -1)) == int(seed)
+        )
+
+    @staticmethod
+    def _protocol_from_run_config(run_config: dict[str, Any]) -> dict[str, Any]:
+        trainer = run_config.get("trainer", {})
+        return {
+            "device": trainer.get("device"),
+            "headless": trainer.get("headless"),
+            "iterations": trainer.get("iterations"),
+            "buffer_size": trainer.get("buffer_size"),
+            "num_envs": trainer.get("num_envs"),
+            "num_eval_envs": trainer.get("num_eval_envs"),
+            "evaluation_interval": trainer.get("eval_freq"),
+            "evaluation_episodes": trainer.get("num_eval_episodes"),
+        }
+
+    def _expected_protocol_for_job(
+        self,
+        task_name: str,
+        algorithm_name: str,
+        seed: int,
+    ) -> dict[str, Any]:
+        return self._protocol_from_run_config(
+            self.build_run_config(task_name, algorithm_name, seed)
+        )
+
+    def _artifact_is_compatible(
+        self,
+        artifact: dict[str, Any],
+        task_name: str,
+        algorithm_name: str,
+        seed: int,
+        run_dir: Path,
+    ) -> bool:
+        artifact_protocol = artifact.get("protocol")
+        if isinstance(artifact_protocol, dict):
+            return artifact_protocol == self.protocol
+        run_config = self._load_json_artifact(run_dir / "run_config.json")
+        if run_config is None:
+            return False
+        return self._protocol_from_run_config(
+            run_config
+        ) == self._expected_protocol_for_job(task_name, algorithm_name, seed)
+
+    def _load_existing_training_record(
+        self,
+        task_name: str,
+        algorithm_name: str,
+        seed: int,
+    ) -> dict[str, Any] | None:
+        run_dir = self._run_dir(task_name, algorithm_name, seed)
+        record = self._load_json_artifact(run_dir / "train_result.json")
+        if record is None:
+            return None
+        if not self._record_matches_job(record, task_name, algorithm_name, seed):
+            return None
+        if not self._artifact_is_compatible(
+            record, task_name, algorithm_name, seed, run_dir
+        ):
+            return None
+        checkpoint_path = record.get("checkpoint_path")
+        if not checkpoint_path or not Path(checkpoint_path).exists():
+            return None
+        return record
+
+    def collect_existing_run_results(self) -> list[dict[str, Any]]:
+        """Load compatible existing result artifacts for the requested jobs."""
+        results: list[dict[str, Any]] = []
+        for task_name, algorithm_name, seed in self._iter_jobs():
+            run_dir = self._run_dir(task_name, algorithm_name, seed)
+            record = self._load_json_artifact(run_dir / "result.json")
+            if record is None:
+                continue
+            if not self._record_matches_job(record, task_name, algorithm_name, seed):
+                continue
+            if not self._artifact_is_compatible(
+                record, task_name, algorithm_name, seed, run_dir
+            ):
+                continue
+            results.append(record)
+        return results
+
+    def collect_existing_training_runs(self) -> list[dict[str, Any]]:
+        """Load compatible existing training artifacts for the requested jobs."""
+        records: list[dict[str, Any]] = []
+        for task_name, algorithm_name, seed in self._iter_jobs():
+            record = self._load_existing_training_record(
+                task_name, algorithm_name, seed
+            )
+            if record is not None:
+                records.append(record)
+        return records
+
+    def merge_run_results(
+        self,
+        *result_sets: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        """Merge multiple run result lists, preferring later duplicates."""
+        merged: dict[tuple[str, str, int], dict[str, Any]] = {}
+        for result_set in result_sets:
+            for record in result_set:
+                key = self._job_key(
+                    str(record["task"]),
+                    str(record["algorithm"]),
+                    int(record["seed"]),
+                )
+                merged[key] = record
+        return [
+            merged[key]
+            for key in sorted(
+                merged.keys(), key=lambda item: (item[0], item[1], item[2])
+            )
+        ]
+
+    def run_training(self, skip_existing: bool = False) -> list[dict[str, Any]]:
+        """Run benchmark training and store per-run training artifacts."""
+        training_runs: list[dict[str, Any]] = []
+        existing_result_keys = set()
+        if skip_existing:
+            existing_result_keys = {
+                self._job_key(item["task"], item["algorithm"], item["seed"])
+                for item in self.collect_existing_run_results()
+            }
+        for task_name, algorithm_name, seed in self._iter_jobs():
+            run_dir = self._run_dir(task_name, algorithm_name, seed)
+            if (
+                skip_existing
+                and self._job_key(task_name, algorithm_name, seed)
+                in existing_result_keys
+            ):
+                continue
+            if skip_existing:
+                existing_training = self._load_existing_training_record(
+                    task_name, algorithm_name, seed
+                )
+                if existing_training is not None:
+                    training_runs.append(existing_training)
+                    continue
+
+            task_spec = load_task_spec(task_name)
+            run_config = self.build_run_config(task_name, algorithm_name, seed)
+            dump_json(run_config, run_dir / "run_config.json")
+            train_summary = train_with_config(run_config, run_dir)
+            training_record = {
+                "task": task_name,
+                "env_id": task_spec["env_id"],
+                "algorithm": algorithm_name,
+                "seed": seed,
+                "suite": self.suite,
+                "protocol": deepcopy(self.protocol),
+                "train_steps": int(train_summary["global_step"]),
+                "training_fps": train_summary["training_fps"],
+                "peak_gpu_memory_mb": train_summary["peak_gpu_memory_mb"],
+                "checkpoint_path": train_summary["checkpoint_path"],
+                "output_dir": train_summary["output_dir"],
+                "eval_history": train_summary.get("eval_history", []),
+                "train_history": train_summary.get("train_history", []),
+            }
+            dump_json(training_record, run_dir / "train_result.json")
+            training_runs.append(training_record)
+        return training_runs
+
+    def run_evaluation(
+        self, training_runs: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
+        """Evaluate trained checkpoints and write final per-run benchmark results."""
+        results: list[dict[str, Any]] = []
+        for training_record in training_runs:
+            task_name = training_record["task"]
+            algorithm_name = training_record["algorithm"]
+            seed = training_record["seed"]
+            task_spec = load_task_spec(task_name)
+            run_dir = Path(training_record["output_dir"])
+            run_config = self.build_run_config(task_name, algorithm_name, seed)
+            dump_json(run_config, run_dir / "run_config.json")
+            eval_summary = evaluate_checkpoint(
+                cfg_json=run_config,
+                checkpoint_path=training_record["checkpoint_path"],
+                num_episodes=int(self.protocol["evaluation_episodes"]),
+                num_envs=int(self.protocol["num_eval_envs"]),
+            )
+            result = {
+                "task": task_name,
+                "env_id": task_spec["env_id"],
+                "algorithm": algorithm_name,
+                "seed": seed,
+                "suite": self.suite,
+                "protocol": deepcopy(self.protocol),
+                "train_steps": training_record["train_steps"],
+                "final_reward": eval_summary["avg_reward"],
+                "final_success_rate": eval_summary["success_rate"],
+                "final_episode_length": eval_summary["avg_episode_length"],
+                "training_fps": training_record["training_fps"],
+                "environment_fps": eval_summary["environment_fps"],
+                "peak_gpu_memory_mb": training_record["peak_gpu_memory_mb"],
+                "checkpoint_path": training_record["checkpoint_path"],
+                "output_dir": training_record["output_dir"],
+                "eval_history": training_record.get("eval_history", []),
+                "train_history": training_record.get("train_history", []),
+            }
+            threshold = task_spec.get("success_threshold", 0.8)
+            sustain_count = int(self.protocol.get("threshold_sustain_count", 3))
+            stable_eval_window = int(self.protocol.get("final_eval_window", 3))
+            result["final_success_rate_stable"] = compute_final_metric_stable(
+                training_record.get("eval_history", []),
+                metric_key="eval/success_rate",
+                window_size=stable_eval_window,
+            )
+            result["steps_to_success_threshold_first_hit"] = (
+                compute_steps_to_threshold_first_hit(
+                    training_record.get("eval_history", []),
+                    metric_key="eval/success_rate",
+                    threshold=float(threshold),
+                )
+            )
+            result["steps_to_success_threshold"] = compute_steps_to_threshold_sustained(
+                training_record.get("eval_history", []),
+                metric_key="eval/success_rate",
+                threshold=float(threshold),
+                sustain_count=sustain_count,
+            )
+            result["final_metrics"] = eval_summary["metrics"]
+            dump_json(result, run_dir / "result.json")
+            results.append(result)
+        return results
+
+    def aggregate_results(
+        self, run_results: list[dict[str, Any]]
+    ) -> list[dict[str, Any]]:
+        """Aggregate multiple seeds into task-algorithm summaries."""
+        return aggregate_runs(run_results)
+
+    def update_leaderboard(
+        self,
+        aggregate_result: list[dict[str, Any]],
+        run_results: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        """Build and persist leaderboard artifacts."""
+        leaderboard = build_leaderboard(aggregate_result, run_results=run_results)
+        leaderboard_dir = self.output_root / "leaderboard"
+        dump_json({"leaderboard": leaderboard}, leaderboard_dir / "leaderboard.json")
+        generate_leaderboard_markdown(
+            leaderboard=leaderboard,
+            output_path=leaderboard_dir / "leaderboard.md",
+        )
+        return leaderboard
+
+    def generate_report(
+        self,
+        run_results: list[dict[str, Any]],
+        aggregate_result: list[dict[str, Any]],
+        leaderboard: list[dict[str, Any]] | None = None,
+    ) -> Path:
+        """Create a markdown benchmark report and result json files."""
+        leaderboard = leaderboard or self.update_leaderboard(
+            aggregate_result, run_results
+        )
+        plot_artifacts = build_plot_artifacts(
+            run_results=run_results,
+            leaderboard=leaderboard,
+            output_dir=self.output_root / "plots",
+        )
+        dump_json({"runs": run_results}, self.output_root / "benchmark_runs.json")
+        dump_json(
+            {"aggregate": aggregate_result},
+            self.output_root / "benchmark_summary.json",
+        )
+        dump_json(
+            {
+                "suite": self.suite,
+                "tasks": self.tasks,
+                "algorithms": self.algorithms,
+                "seeds": self.seeds,
+                "protocol": self.protocol,
+            },
+            self.output_root / "benchmark_protocol.json",
+        )
+        return generate_markdown_report(
+            run_results=run_results,
+            aggregate_results=aggregate_result,
+            leaderboard=leaderboard,
+            plot_artifacts=plot_artifacts,
+            protocol=self.protocol,
+            output_path=self.output_root / "benchmark_report.md",
+        )
+
+
+__all__ = ["BenchmarkRunner"]
diff --git a/scripts/benchmark/rl/runtime.py b/scripts/benchmark/rl/runtime.py
new file mode 100644
index 00000000..666880f9
--- /dev/null
+++ b/scripts/benchmark/rl/runtime.py
@@ -0,0 +1,441 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import json
+import time
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+from tensordict import TensorDict
+from torch.utils.tensorboard import SummaryWriter
+
+from embodichain.agents.rl.algo import build_algo
+from embodichain.agents.rl.models import build_mlp_from_cfg, build_policy
+from embodichain.agents.rl.utils import dict_to_tensordict, flatten_dict_observation
+from embodichain.agents.rl.utils.trainer import Trainer
+from embodichain.lab.gym.envs.managers.cfg import EventCfg
+from embodichain.lab.gym.envs.tasks.rl import build_env
+from embodichain.lab.gym.utils.gym_utils import DEFAULT_MANAGER_MODULES, config_to_cfg
+from embodichain.lab.sim import SimulationManagerCfg
+from embodichain.utils.module_utils import find_function_from_modules
+from embodichain.utils.utility import load_json
+
+EVENT_MODULES = [
+    "embodichain.lab.gym.envs.managers.randomization",
+    "embodichain.lab.gym.envs.managers.record",
+    "embodichain.lab.gym.envs.managers.events",
+]
+
+
+def resolve_device(device_str: str) -> torch.device:
+    """Resolve a runtime device string into a validated torch device."""
+    device = torch.device(device_str)
+    if device.type == "cuda":
+        if not torch.cuda.is_available():
+            raise ValueError("CUDA requested but no CUDA device is available.")
+        index = (
+            device.index if device.index is not None else torch.cuda.current_device()
+        )
+        if index < 0 or index >= torch.cuda.device_count():
+            raise ValueError(f"CUDA device index {index} is out of range.")
+        torch.cuda.set_device(index)
+        return torch.device(f"cuda:{index}")
+    if device.type != "cpu":
+        raise ValueError(f"Unsupported device type: {device.type}")
+    return device
+
+
+def set_random_seed(seed: int, device: torch.device) -> None:
+    """Set deterministic random seeds for numpy and torch."""
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
+    if device.type == "cuda":
+        torch.cuda.manual_seed_all(seed)
+        torch.cuda.reset_peak_memory_stats(device)
+
+
+def _parse_event_cfg(events_dict: dict[str, Any]) -> dict[str, EventCfg]:
+    parsed: dict[str, EventCfg] = {}
+    for event_name, event_info in events_dict.items():
+        event_func = find_function_from_modules(
+            event_info["func"], EVENT_MODULES, raise_if_not_found=True
+        )
+        parsed[event_name] = EventCfg(
+            func=event_func,
+            mode=event_info.get("mode", "interval"),
+            params=event_info.get("params", {}),
+            interval_step=event_info.get("interval_step", 1),
+        )
+    return parsed
+
+
+def _build_env_cfg(
+    gym_config_path: str,
+    num_envs: int | None,
+    headless: bool,
+    device: torch.device,
+    gpu_id: int,
+):
+    gym_config_data = load_json(gym_config_path)
+    gym_env_cfg = config_to_cfg(
+        gym_config_data, manager_modules=DEFAULT_MANAGER_MODULES
+    )
+    if num_envs is not None:
+        gym_env_cfg.num_envs = int(num_envs)
+    if gym_env_cfg.sim_cfg is None:
+        gym_env_cfg.sim_cfg = SimulationManagerCfg()
+    gym_env_cfg.seed = getattr(gym_env_cfg, "seed", None)
+    gym_env_cfg.sim_cfg.headless = headless
+    gym_env_cfg.sim_cfg.gpu_id = gpu_id
+    gym_env_cfg.sim_cfg.sim_device = device
+    return gym_config_data, gym_env_cfg
+
+
+def _allocate_eval_rollout_buffer(env, policy, device: torch.device) -> TensorDict:
+    """Allocate a small RL-style rollout buffer for evaluation-only environments."""
+    rollout_len = 2
+    return TensorDict(
+        {
+            "obs": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                policy.obs_dim,
+                dtype=torch.float32,
+                device=device,
+            ),
+            "action": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                policy.action_dim,
+                dtype=torch.float32,
+                device=device,
+            ),
+            "sample_log_prob": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.float32,
+                device=device,
+            ),
+            "value": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.float32,
+                device=device,
+            ),
+            "reward": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.float32,
+                device=device,
+            ),
+            "done": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.bool,
+                device=device,
+            ),
+            "terminated": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.bool,
+                device=device,
+            ),
+            "truncated": torch.zeros(
+                env.num_envs,
+                rollout_len + 1,
+                dtype=torch.bool,
+                device=device,
+            ),
+        },
+        batch_size=[env.num_envs, rollout_len + 1],
+        device=device,
+    )
+
+
+def _compact_eval_rollout_buffer(env, rollout_buffer: TensorDict) -> None:
+    """Keep only the previous transition needed by rollout-dependent eval rewards."""
+    if getattr(env, "current_rollout_step", 0) < 2:
+        return
+    for key in ("action", "reward", "done", "terminated", "truncated"):
+        rollout_buffer[key][:, 0].copy_(rollout_buffer[key][:, 1])
+        rollout_buffer[key][:, 1:].zero_()
+    env.current_rollout_step = 1
+
+
+def build_policy_from_env(policy_block: dict[str, Any], env, device: torch.device):
+    """Build a policy using the current environment spaces."""
+    sample_obs, _ = env.reset()
+    sample_obs_td = dict_to_tensordict(sample_obs, device)
+    obs_dim = flatten_dict_observation(sample_obs_td).shape[-1]
+    flat_obs_space = env.flattened_observation_space
+    env_action_dim = env.action_space.shape[-1]
+
+    policy_name = policy_block["name"].lower()
+    if policy_name == "actor_critic":
+        actor = build_mlp_from_cfg(policy_block["actor"], obs_dim, env_action_dim)
+        critic = build_mlp_from_cfg(policy_block["critic"], obs_dim, 1)
+        return build_policy(
+            policy_block,
+            flat_obs_space,
+            env.action_space,
+            device,
+            actor=actor,
+            critic=critic,
+        )
+    if policy_name == "actor_only":
+        actor = build_mlp_from_cfg(policy_block["actor"], obs_dim, env_action_dim)
+        return build_policy(
+            policy_block,
+            flat_obs_space,
+            env.action_space,
+            device,
+            actor=actor,
+        )
+    return build_policy(policy_block, flat_obs_space, env.action_space, device)
+
+
+def train_with_config(
+    cfg_json: dict[str, Any],
+    output_dir: str | Path,
+) -> dict[str, Any]:
+    """Train an RL configuration and return a structured summary."""
+    trainer_cfg = deepcopy(cfg_json["trainer"])
+    policy_block = deepcopy(cfg_json["policy"])
+    algo_block = deepcopy(cfg_json["algorithm"])
+
+    device = resolve_device(trainer_cfg.get("device", "cpu"))
+    seed = int(trainer_cfg.get("seed", 1))
+    set_random_seed(seed, device)
+
+    output_root = Path(output_dir)
+    log_dir = output_root / "logs"
+    checkpoint_dir = output_root / "checkpoints"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    checkpoint_dir.mkdir(parents=True, exist_ok=True)
+
+    gym_config_data, gym_env_cfg = _build_env_cfg(
+        gym_config_path=trainer_cfg["gym_config"],
+        num_envs=trainer_cfg.get("num_envs"),
+        headless=bool(trainer_cfg.get("headless", True)),
+        device=device,
+        gpu_id=int(trainer_cfg.get("gpu_id", 0)),
+    )
+    env = None
+    eval_env = None
+    writer = SummaryWriter(str(log_dir))
+    try:
+        env = build_env(gym_config_data["id"], base_env_cfg=gym_env_cfg)
+
+        enable_eval = bool(trainer_cfg.get("enable_eval", True))
+        if enable_eval:
+            eval_gym_env_cfg = deepcopy(gym_env_cfg)
+            eval_gym_env_cfg.num_envs = int(
+                trainer_cfg.get("num_eval_envs", min(4, gym_env_cfg.num_envs))
+            )
+            eval_gym_env_cfg.sim_cfg.headless = True
+            eval_env = build_env(gym_config_data["id"], base_env_cfg=eval_gym_env_cfg)
+
+        policy = build_policy_from_env(policy_block, env, device)
+        algo = build_algo(algo_block["name"], algo_block["cfg"], policy, device)
+
+        events_dict = trainer_cfg.get("events", {})
+        trainer = Trainer(
+            policy=policy,
+            env=env,
+            algorithm=algo,
+            buffer_size=int(trainer_cfg.get("buffer_size", 2048)),
+            batch_size=int(algo_block["cfg"]["batch_size"]),
+            writer=writer,
+            eval_freq=int(trainer_cfg.get("eval_freq", 0)) if enable_eval else 0,
+            save_freq=int(trainer_cfg.get("save_freq", 0)) or 10**18,
+            checkpoint_dir=str(checkpoint_dir),
+            exp_name=str(trainer_cfg.get("exp_name", "benchmark_run")),
+            use_wandb=False,
+            eval_env=eval_env,
+            event_cfg=_parse_event_cfg(events_dict.get("train", {})),
+            eval_event_cfg=(
+                _parse_event_cfg(events_dict.get("eval", {})) if enable_eval else {}
+            ),
+            num_eval_episodes=int(trainer_cfg.get("num_eval_episodes", 5)),
+        )
+
+        total_steps = (
+            int(trainer_cfg.get("iterations", 1))
+            * int(trainer_cfg.get("buffer_size", 2048))
+            * int(env.num_envs)
+        )
+        start_time = time.perf_counter()
+        summary = trainer.train(total_steps)
+        wall_time = time.perf_counter() - start_time
+        checkpoint_path = trainer.save_checkpoint()
+    finally:
+        writer.close()
+        if eval_env is not None:
+            eval_env.close()
+        if env is not None:
+            env.close()
+
+    peak_gpu_memory_mb = 0.0
+    if device.type == "cuda":
+        peak_gpu_memory_mb = torch.cuda.max_memory_allocated(device=device) / (
+            1024.0 * 1024.0
+        )
+
+    summary.update(
+        {
+            "checkpoint_path": checkpoint_path,
+            "output_dir": str(output_root),
+            "wall_time_sec": float(wall_time),
+            "training_fps": float(total_steps / max(wall_time, 1e-6)),
+            "peak_gpu_memory_mb": float(peak_gpu_memory_mb),
+        }
+    )
+    return summary
+
+
+def evaluate_checkpoint(
+    cfg_json: dict[str, Any],
+    checkpoint_path: str | Path,
+    num_episodes: int,
+    num_envs: int | None = None,
+) -> dict[str, Any]:
+    """Evaluate a checkpoint deterministically and collect task metrics."""
+    trainer_cfg = deepcopy(cfg_json["trainer"])
+    policy_block = deepcopy(cfg_json["policy"])
+
+    device = resolve_device(trainer_cfg.get("device", "cpu"))
+    gym_config_data, gym_env_cfg = _build_env_cfg(
+        gym_config_path=trainer_cfg["gym_config"],
+        num_envs=num_envs if num_envs is not None else trainer_cfg.get("num_eval_envs"),
+        headless=True,
+        device=device,
+        gpu_id=int(trainer_cfg.get("gpu_id", 0)),
+    )
+    env = None
+    try:
+        env = build_env(gym_config_data["id"], base_env_cfg=gym_env_cfg)
+        policy = build_policy_from_env(policy_block, env, device)
+        eval_rollout_buffer = None
+        if hasattr(env, "set_rollout_buffer"):
+            eval_rollout_buffer = _allocate_eval_rollout_buffer(env, policy, device)
+
+        checkpoint = torch.load(checkpoint_path, map_location=device)
+        policy.load_state_dict(checkpoint["policy"])
+        policy.eval()
+
+        target_episodes = int(num_episodes)
+        completed = 0
+        cumulative_reward = torch.zeros(
+            env.num_envs, dtype=torch.float32, device=device
+        )
+        step_count = torch.zeros(env.num_envs, dtype=torch.int32, device=device)
+
+        returns: list[float] = []
+        lengths: list[int] = []
+        successes: list[float] = []
+        metric_values: dict[str, list[float]] = {}
+        env_step_count = 0
+        env_step_time = 0.0
+
+        if eval_rollout_buffer is not None:
+            env.set_rollout_buffer(eval_rollout_buffer)
+        obs, _ = env.reset()
+        while completed < target_episodes:
+            flat_obs = flatten_dict_observation(obs)
+            action_td = TensorDict(
+                {"obs": flat_obs},
+                batch_size=[env.num_envs],
+                device=device,
+            )
+            action_td = policy.get_action(action_td, deterministic=True)
+            action_manager = getattr(env, "action_manager", None)
+            if action_manager is None:
+                action_in = action_td["action"]
+            else:
+                action_in = action_manager.convert_policy_action_to_env_action(
+                    action_td["action"]
+                )
+
+            if eval_rollout_buffer is not None:
+                _compact_eval_rollout_buffer(env, eval_rollout_buffer)
+                eval_rollout_buffer["action"][:, env.current_rollout_step].copy_(
+                    action_td["action"]
+                )
+            step_start = time.perf_counter()
+            obs, reward, terminated, truncated, info = env.step(action_in)
+            env_step_time += time.perf_counter() - step_start
+            env_step_count += env.num_envs
+
+            done = terminated | truncated
+            cumulative_reward += reward.float()
+            step_count += 1
+
+            newly_done = done.nonzero(as_tuple=False).squeeze(-1)
+            for env_id in newly_done.tolist():
+                if completed >= target_episodes:
+                    break
+                returns.append(float(cumulative_reward[env_id].item()))
+                lengths.append(int(step_count[env_id].item()))
+                if "success" in info:
+                    successes.append(float(info["success"][env_id].item()))
+                if "metrics" in info:
+                    for key, value in info["metrics"].items():
+                        metric_values.setdefault(key, []).append(
+                            float(value[env_id].item())
+                        )
+                cumulative_reward[env_id] = 0.0
+                step_count[env_id] = 0
+                completed += 1
+    finally:
+        if env is not None:
+            env.close()
+
+    return {
+        "num_episodes": completed,
+        "avg_reward": float(np.mean(returns)) if returns else float("nan"),
+        "avg_episode_length": float(np.mean(lengths)) if lengths else float("nan"),
+        "success_rate": float(np.mean(successes)) if successes else float("nan"),
+        "environment_fps": float(env_step_count / max(env_step_time, 1e-6)),
+        "metrics": {
+            key: float(np.mean(values))
+            for key, values in metric_values.items()
+            if values
+        },
+    }
+
+
+def dump_json(data: dict[str, Any], path: str | Path) -> Path:
+    """Write a JSON artifact to disk."""
+    output = Path(path)
+    output.parent.mkdir(parents=True, exist_ok=True)
+    output.write_text(json.dumps(data, indent=2), encoding="utf-8")
+    return output
+
+
+__all__ = [
+    "build_policy_from_env",
+    "dump_json",
+    "evaluate_checkpoint",
+    "resolve_device",
+    "set_random_seed",
+    "train_with_config",
+]
diff --git a/scripts/benchmark/rl/suites/__init__.py b/scripts/benchmark/rl/suites/__init__.py
new file mode 100644
index 00000000..dd650e90
--- /dev/null
+++ b/scripts/benchmark/rl/suites/__init__.py
@@ -0,0 +1,15 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
diff --git a/scripts/benchmark/rl/suites/default.yaml b/scripts/benchmark/rl/suites/default.yaml
new file mode 100644
index 00000000..34476006
--- /dev/null
+++ b/scripts/benchmark/rl/suites/default.yaml
@@ -0,0 +1,21 @@
+tasks:
+  - cart_pole
+  - push_cube
+algorithms:
+  - ppo
+  - grpo
+seeds:
+  - 0
+  - 1
+protocol:
+  device: cuda:0
+  headless: true
+  iterations: 200
+  buffer_size: 1024
+  num_envs: 64
+  num_eval_envs: 16
+  evaluation_interval: 200
+  evaluation_episodes: 20
+  threshold_sustain_count: 3
+  final_eval_window: 3
+  save_interval: 200
diff --git a/scripts/benchmark/rl/suites/smoke.yaml b/scripts/benchmark/rl/suites/smoke.yaml
new file mode 100644
index 00000000..4bb1e67f
--- /dev/null
+++ b/scripts/benchmark/rl/suites/smoke.yaml
@@ -0,0 +1,20 @@
+tasks:
+  - cart_pole
+  - push_cube
+algorithms:
+  - ppo
+  - grpo
+seeds:
+  - 0
+protocol:
+  device: cpu
+  headless: true
+  iterations: 10
+  buffer_size: 128
+  num_envs: 32
+  num_eval_envs: 8
+  evaluation_interval: 2
+  evaluation_episodes: 10
+  threshold_sustain_count: 3
+  final_eval_window: 3
+  save_interval: 1000
diff --git a/scripts/benchmark/rl/tasks/__init__.py b/scripts/benchmark/rl/tasks/__init__.py
new file mode 100644
index 00000000..dd650e90
--- /dev/null
+++ b/scripts/benchmark/rl/tasks/__init__.py
@@ -0,0 +1,15 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
diff --git a/scripts/benchmark/rl/tasks/cart_pole.yaml b/scripts/benchmark/rl/tasks/cart_pole.yaml
new file mode 100644
index 00000000..8b90a61f
--- /dev/null
+++ b/scripts/benchmark/rl/tasks/cart_pole.yaml
@@ -0,0 +1,20 @@
+name: cart_pole
+env_id: CartPoleRL
+success_threshold: 0.8
+base_config:
+  trainer:
+    gym_config: configs/agents/rl/basic/cart_pole/gym_config.json
+    exp_name: cart_pole
+    device: cpu
+    headless: true
+    gpu_id: 0
+    num_envs: 64
+    iterations: 200
+    buffer_size: 1024
+    enable_eval: true
+    num_eval_envs: 8
+    num_eval_episodes: 10
+    eval_freq: 200
+    save_freq: 200
+    use_wandb: false
+    events: {}
diff --git a/scripts/benchmark/rl/tasks/push_cube.yaml b/scripts/benchmark/rl/tasks/push_cube.yaml
new file mode 100644
index 00000000..3f524685
--- /dev/null
+++ b/scripts/benchmark/rl/tasks/push_cube.yaml
@@ -0,0 +1,21 @@
+name: push_cube
+env_id: PushCubeRL
+success_threshold: 0.6
+train_eval_enabled: false
+base_config:
+  trainer:
+    gym_config: configs/agents/rl/push_cube/gym_config.json
+    exp_name: push_cube
+    device: cpu
+    headless: true
+    gpu_id: 0
+    num_envs: 64
+    iterations: 200
+    buffer_size: 1024
+    enable_eval: true
+    num_eval_envs: 8
+    num_eval_episodes: 10
+    eval_freq: 200
+    save_freq: 200
+    use_wandb: false
+    events: {}
diff --git a/scripts/benchmark/robotics/kinematic_solver/run_benchmark.py b/scripts/benchmark/robotics/kinematic_solver/run_benchmark.py
new file mode 100644
index 00000000..5f4451ae
--- /dev/null
+++ b/scripts/benchmark/robotics/kinematic_solver/run_benchmark.py
@@ -0,0 +1,722 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Unified benchmark for OPW and Pytorch kinematic solvers.
+
+Measures IK wall-clock latency, pose accuracy, success rate, and memory usage
+across OPW (Warp CUDA vs CPU) and Pytorch solver (CPU vs optional CUDA).
+Run: python -m scripts.benchmark.robotics.kinematic_solver.run_benchmark
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import psutil
+import torch
+
+from embodichain.data import get_data_path
+from embodichain.lab.sim.solvers.opw_solver import OPWSolverCfg
+from embodichain.lab.sim.solvers.pytorch_solver import PytorchSolver, PytorchSolverCfg
+
+OPW_LOWER_LIMITS = [-2.618, 0.0, -2.967, -1.745, -1.22, -2.0944]
+OPW_UPPER_LIMITS = [2.618, 3.14159, 0.0, 1.745, 1.22, 2.0944]
+
+# TODO: Easy to failed if use full joint range, consider adding a margin to avoid sampling near the joint limits.
+# PYTORCH_LOWER_LIMITS = [-6.2832, -6.2832, -3.1416, -6.2832, -6.2832, -6.2832]
+# PYTORCH_UPPER_LIMITS = [6.2832, 6.2832, 3.1416, 6.2832, 6.2832, 6.2832]
+PYTORCH_LOWER_LIMITS = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+PYTORCH_UPPER_LIMITS = [2.5, 2.5, 2.5, 2.5, 2.5, 2.5]
+
+SAMPLE_SIZES = [100, 1000, 10000]
+SUPPORTED_SOLVERS = ("opw", "pytorch")
+
+
+def _parse_args() -> argparse.Namespace:
+    """Parse command line arguments for selecting benchmark solvers."""
+    parser = argparse.ArgumentParser(
+        description="Run kinematic solver benchmarks for selected solver backends."
+    )
+    parser.add_argument(
+        "--solvers",
+        "-s",
+        nargs="+",
+        choices=(*SUPPORTED_SOLVERS, "all"),
+        default=["all"],
+        help=(
+            "Solvers to benchmark. Use one or more of: opw, pytorch, all. "
+            "Default: all"
+        ),
+    )
+    return parser.parse_args()
+
+
+def _normalize_selected_solvers(selected_solvers: list[str] | None) -> set[str]:
+    """Normalize selected solver names to a canonical set."""
+    if not selected_solvers or "all" in selected_solvers:
+        return set(SUPPORTED_SOLVERS)
+    return {solver for solver in selected_solvers if solver in SUPPORTED_SOLVERS}
+
+
+def _sync_cuda() -> None:
+    """Synchronize CUDA stream when available."""
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+
+
+def _reset_peak_gpu_memory() -> None:
+    """Reset PyTorch peak GPU memory stats when CUDA is available."""
+    if torch.cuda.is_available():
+        torch.cuda.reset_peak_memory_stats()
+
+
+def _peak_gpu_memory_mb() -> float:
+    """Return peak GPU memory allocated by PyTorch in MB."""
+    if not torch.cuda.is_available():
+        return 0.0
+    return torch.cuda.max_memory_allocated() / 1024**2
+
+
+def _memory_snapshot() -> dict[str, float]:
+    """Return current process memory usage snapshot in MB."""
+    process = psutil.Process(os.getpid())
+    cpu_mb = process.memory_info().rss / 1024**2
+    gpu_mb = (
+        torch.cuda.memory_allocated() / 1024**2 if torch.cuda.is_available() else 0.0
+    )
+    return {"cpu_mb": cpu_mb, "gpu_mb": gpu_mb}
+
+
+def _format_markdown_table(rows: list[dict[str, object]]) -> list[str]:
+    """Format rows into a markdown table."""
+    if not rows:
+        return ["No data."]
+
+    headers = list(rows[0].keys())
+    lines = [
+        "| " + " | ".join(headers) + " |",
+        "| " + " | ".join(["---"] * len(headers)) + " |",
+    ]
+    for row in rows:
+        lines.append("| " + " | ".join(str(row[h]) for h in headers) + " |")
+    return lines
+
+
+def _build_leaderboard_rows(
+    metric_rows: list[dict[str, object]],
+) -> list[dict[str, object]]:
+    """Aggregate and rank algorithms by overall success rate."""
+    aggregate: dict[str, dict[str, float]] = {}
+    for row in metric_rows:
+        impl = str(row["impl"])
+        if impl not in aggregate:
+            aggregate[impl] = {
+                "success_sum": 0.0,
+                "t_err_sum": 0.0,
+                "r_err_sum": 0.0,
+                "count": 0.0,
+            }
+
+        aggregate[impl]["success_sum"] += float(row["success_rate"])
+        aggregate[impl]["t_err_sum"] += float(row["translation_err_mm"])
+        aggregate[impl]["r_err_sum"] += float(row["rotation_err_deg"])
+        aggregate[impl]["count"] += 1.0
+
+    ranked = sorted(
+        aggregate.items(),
+        key=lambda item: item[1]["success_sum"] / max(item[1]["count"], 1.0),
+        reverse=True,
+    )
+
+    leaderboard_rows: list[dict[str, object]] = []
+    for rank, (algorithm, stats) in enumerate(ranked, start=1):
+        count = max(stats["count"], 1.0)
+        leaderboard_rows.append(
+            {
+                "rank": rank,
+                "algorithm": algorithm,
+                "overall_success_rate": f"{stats['success_sum'] / count:.2%}",
+                "avg_translation_err_mm": f"{stats['t_err_sum'] / count:.6f}",
+                "avg_rotation_err_deg": f"{stats['r_err_sum'] / count:.6f}",
+            }
+        )
+    return leaderboard_rows
+
+
+def _write_markdown_report(
+    benchmark_name: str,
+    perf_rows: list[dict[str, object]],
+    metric_rows: list[dict[str, object]],
+    leaderboard_rows: list[dict[str, object]],
+    notes: list[str] | None = None,
+) -> Path:
+    """Write benchmark results to a markdown report with three tables."""
+    output_dir = Path("outputs/benchmarks")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    report_path = output_dir / f"{benchmark_name}_{timestamp}.md"
+
+    lines: list[str] = [
+        f"# {benchmark_name} Benchmark Report",
+        "",
+        f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+        "",
+        "## Time & Memory",
+        "",
+    ]
+    lines.extend(_format_markdown_table(perf_rows))
+    lines.extend(["", "## Success & Other Metrics", ""])
+    lines.extend(_format_markdown_table(metric_rows))
+
+    lines.extend(["", "## Leaderboard", ""])
+    lines.extend(_format_markdown_table(leaderboard_rows))
+
+    if notes:
+        lines.extend(["", "## Notes", ""])
+        lines.extend([f"- {note}" for note in notes])
+
+    report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return report_path
+
+
+def get_pose_err(
+    matrix_a: np.ndarray | torch.Tensor,
+    matrix_b: np.ndarray | torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Return translation and rotation errors between paired poses.
+
+    Supports either a single 4x4 pose or a batch with shape (N, 4, 4).
+    """
+    tensor_a = torch.as_tensor(matrix_a, dtype=torch.float64)
+    tensor_b = torch.as_tensor(matrix_b, dtype=torch.float64, device=tensor_a.device)
+
+    if tensor_a.ndim == 2:
+        tensor_a = tensor_a.unsqueeze(0)
+    if tensor_b.ndim == 2:
+        tensor_b = tensor_b.unsqueeze(0)
+
+    t_err = torch.linalg.norm(tensor_a[:, :3, 3] - tensor_b[:, :3, 3], dim=-1)
+
+    relative_rot = torch.matmul(
+        tensor_a[:, :3, :3].transpose(-1, -2),
+        tensor_b[:, :3, :3],
+    )
+    trace = torch.diagonal(relative_rot, dim1=-2, dim2=-1).sum(dim=-1)
+    cos_angle = torch.clamp((trace - 1.0) / 2.0, min=-1.0, max=1.0)
+    r_err = torch.arccos(cos_angle)
+    return t_err, r_err
+
+
+def _timed_ik_call(
+    solver, xpos: torch.Tensor, qpos_seed: torch.Tensor, initial_guess: torch.Tensor
+) -> tuple[float, dict[str, float], float, torch.Tensor, torch.Tensor]:
+    """Run a timed IK call and return elapsed seconds, memory deltas, and outputs."""
+    _reset_peak_gpu_memory()
+    mem_before = _memory_snapshot()
+    _sync_cuda()
+
+    start = time.perf_counter()
+    ik_success, ik_qpos = solver.get_ik(
+        xpos,
+        qpos_seed=qpos_seed,
+        initial_guess=initial_guess,
+    )
+    _sync_cuda()
+    elapsed = time.perf_counter() - start
+
+    mem_after = _memory_snapshot()
+    deltas = {
+        "cpu_mb": mem_after["cpu_mb"] - mem_before["cpu_mb"],
+        "gpu_mb": mem_after["gpu_mb"] - mem_before["gpu_mb"],
+    }
+    return elapsed, deltas, _peak_gpu_memory_mb(), ik_success, ik_qpos
+
+
+def _init_pytorch_solver(device: torch.device) -> PytorchSolver:
+    """Initialize Pytorch kinematic solver on the target device."""
+    solver_cfg = PytorchSolverCfg(
+        urdf_path=get_data_path("UniversalRobots/UR10/UR10.urdf"),
+        end_link_name="ee_link",
+        root_link_name="base_link",
+        joint_names=["J1", "J2", "J3", "J4", "J5", "J6"],
+        user_qpos_limits=[PYTORCH_LOWER_LIMITS, PYTORCH_UPPER_LIMITS],
+    )
+    return PytorchSolver(solver_cfg, device=device)
+
+
+def _sample_qpos(
+    n_samples: int,
+    lower_limits: list[float],
+    upper_limits: list[float],
+    margin: float,
+    device: torch.device,
+    dtype: torch.dtype,
+) -> torch.Tensor:
+    """Sample joint positions with margin from lower/upper limits."""
+    qpos_np = np.random.uniform(
+        low=np.array(lower_limits) + margin,
+        high=np.array(upper_limits) - margin,
+        size=(n_samples, 6),
+    ).astype(float)
+    return torch.tensor(qpos_np, device=device, dtype=dtype)
+
+
+def _timed_pytorch_ik_call(
+    solver: PytorchSolver,
+    fk_xpos: torch.Tensor,
+    qpos_seed: torch.Tensor,
+) -> tuple[float, dict[str, float], float, torch.Tensor, torch.Tensor]:
+    """Run a timed Pytorch IK call and return elapsed/memory/outputs."""
+    _reset_peak_gpu_memory()
+    mem_before = _memory_snapshot()
+    _sync_cuda()
+
+    start = time.perf_counter()
+    for i in range(3):
+        if i == 1:  # skip first run to avoid initialization overhead
+            start = time.perf_counter()
+        ik_success, ik_qpos = solver.get_ik(
+            fk_xpos,
+            joint_seed=qpos_seed,
+            return_all_solutions=False,
+        )
+    _sync_cuda()
+    elapsed = time.perf_counter() - start
+    elapsed /= 2.0
+
+    mem_after = _memory_snapshot()
+    deltas = {
+        "cpu_mb": mem_after["cpu_mb"] - mem_before["cpu_mb"],
+        "gpu_mb": mem_after["gpu_mb"] - mem_before["gpu_mb"],
+    }
+    return elapsed, deltas, _peak_gpu_memory_mb(), ik_success, ik_qpos[:, 0, :]
+
+
+def check_opw_solver(
+    solver_warp, solver_py_opw, n_samples: int = 1000
+) -> dict[str, float]:
+    """Run Warp and CPU OPW IK/FK checks and return timing, memory, and accuracy."""
+    dof = 6
+    qpos_np = np.random.uniform(
+        low=np.array(OPW_LOWER_LIMITS)
+        + 5.1 / 180.0 * np.pi,  # add a margin to avoid sampling near the joint limits
+        high=np.array(OPW_UPPER_LIMITS) + -5.1 / 180.0 * np.pi,
+        size=(n_samples, dof),
+    ).astype(float)
+
+    qpos_cuda = torch.tensor(qpos_np, device=torch.device("cuda"), dtype=torch.float32)
+    xpos_cuda = solver_warp.get_fk(qpos_cuda)
+    qpos_seed = torch.tensor(
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        device=torch.device("cuda"),
+        dtype=torch.float32,
+    )
+
+    (
+        warp_elapsed,
+        warp_mem,
+        warp_peak_gpu,
+        warp_ik_success,
+        warp_ik_qpos,
+    ) = _timed_ik_call(
+        solver=solver_warp,
+        xpos=xpos_cuda,
+        qpos_seed=qpos_seed,
+        initial_guess=qpos_cuda,
+    )
+
+    check_xpos = solver_warp.get_fk(warp_ik_qpos)
+    warp_t_err, warp_r_err = get_pose_err(xpos_cuda, check_xpos)
+    warp_t_mean_err, warp_r_mean_err = (
+        warp_t_err.mean().item(),
+        warp_r_err.mean().item(),
+    )
+
+    xpos_cpu = xpos_cuda.to(torch.device("cpu"))
+    qpos_seed_cpu = qpos_seed.to(torch.device("cpu"))
+    qpos_cpu = qpos_cuda.to(torch.device("cpu"))
+
+    (
+        cpu_elapsed,
+        cpu_mem,
+        cpu_peak_gpu,
+        py_opw_ik_success,
+        py_opw_ik_qpos,
+    ) = _timed_ik_call(
+        solver=solver_py_opw,
+        xpos=xpos_cpu,
+        qpos_seed=qpos_seed_cpu,
+        initial_guess=qpos_cpu,
+    )
+
+    check_xpos = solver_warp.get_fk(py_opw_ik_qpos.to(torch.device("cuda")))
+    py_opw_t_err, py_opw_r_err = get_pose_err(xpos_cpu, check_xpos)
+    py_opw_t_mean_err, py_opw_r_mean_err = (
+        py_opw_t_err.mean().item(),
+        py_opw_r_err.mean().item(),
+    )
+
+    warp_success_rate = float(warp_ik_success.float().mean().item())
+    cpu_success_rate = float(py_opw_ik_success.float().mean().item())
+
+    return {
+        "warp_ms": warp_elapsed * 1000.0,
+        "warp_t_err_mm": warp_t_mean_err * 1000.0,
+        "warp_r_err_deg": warp_r_mean_err * 180.0 / np.pi,
+        "warp_success_rate": warp_success_rate,
+        "warp_cpu_delta_mb": warp_mem["cpu_mb"],
+        "warp_gpu_delta_mb": warp_mem["gpu_mb"],
+        "warp_peak_gpu_mb": warp_peak_gpu,
+        "cpu_ms": cpu_elapsed * 1000.0,
+        "cpu_t_err_mm": py_opw_t_mean_err * 1000.0,
+        "cpu_r_err_deg": py_opw_r_mean_err * 180.0 / np.pi,
+        "cpu_success_rate": cpu_success_rate,
+        "cpu_cpu_delta_mb": cpu_mem["cpu_mb"],
+        "cpu_gpu_delta_mb": cpu_mem["gpu_mb"],
+        "cpu_peak_gpu_mb": cpu_peak_gpu,
+    }
+
+
+def benchmark_pytorch_solver() -> (
+    tuple[list[dict[str, object]], list[dict[str, object]]]
+):
+    """Benchmark Pytorch solver for CPU and optional CUDA implementations."""
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    cpu_solver = _init_pytorch_solver(device=torch.device("cpu"))
+    has_cuda = torch.cuda.is_available()
+    cuda_solver = (
+        _init_pytorch_solver(device=torch.device("cuda")) if has_cuda else None
+    )
+
+    print("\n=== Pytorch Kinematic Benchmark ===")
+    if not has_cuda:
+        print("  CUDA unavailable; CUDA benchmark is skipped.")
+
+    for n_sample in SAMPLE_SIZES:
+        print(f"**** Test over {n_sample} samples:")
+
+        qpos_cpu = _sample_qpos(
+            n_samples=n_sample,
+            lower_limits=PYTORCH_LOWER_LIMITS,
+            upper_limits=PYTORCH_UPPER_LIMITS,
+            margin=1e-1,
+            device=torch.device("cpu"),
+            dtype=torch.float64,
+        )
+        fk_xpos_cpu = cpu_solver.get_fk(qpos_cpu)
+        (
+            cpu_elapsed,
+            cpu_mem,
+            cpu_peak_gpu,
+            cpu_success,
+            cpu_ik_qpos,
+        ) = _timed_pytorch_ik_call(cpu_solver, fk_xpos_cpu, qpos_cpu)
+        check_xpos_cpu = cpu_solver.get_fk(cpu_ik_qpos)
+        cpu_t_err, cpu_r_err = get_pose_err(fk_xpos_cpu, check_xpos_cpu)
+
+        cpu_result = {
+            "cost_time_ms": cpu_elapsed * 1000.0,
+            "cpu_delta_mb": cpu_mem["cpu_mb"],
+            "gpu_delta_mb": cpu_mem["gpu_mb"],
+            "peak_gpu_mb": cpu_peak_gpu,
+            "success_rate": float(cpu_success.float().mean().item()),
+            "translation_err_mm": cpu_t_err.mean().item() * 1000.0,
+            "rotation_err_deg": cpu_r_err.mean().item() * 180.0 / np.pi,
+        }
+
+        perf_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "pytorch_cpu",
+                "component": "pytorch_ik",
+                "cost_time_ms": f"{cpu_result['cost_time_ms']:.6f}",
+                "cpu_delta_mb": f"{cpu_result['cpu_delta_mb']:.6f}",
+                "gpu_delta_mb": f"{cpu_result['gpu_delta_mb']:.6f}",
+                "peak_gpu_mb": f"{cpu_result['peak_gpu_mb']:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "pytorch_cpu",
+                "component": "pytorch_ik",
+                "success_rate": f"{cpu_result['success_rate']:.6f}",
+                "translation_err_mm": f"{cpu_result['translation_err_mm']:.6f}",
+                "rotation_err_deg": f"{cpu_result['rotation_err_deg']:.6f}",
+            }
+        )
+
+        print(f"===Pytorch CPU IK time:  {cpu_result['cost_time_ms']:.6f} ms")
+        print(f"   Translation mean error: {cpu_result['translation_err_mm']:.6f} mm")
+        print(
+            f"   Rotation mean error:    {cpu_result['rotation_err_deg']:.6f} degrees"
+        )
+        print(f"   Success rate:           {cpu_result['success_rate'] * 100.0:.2f}%")
+        print(
+            "   "
+            f"CPU Δ={cpu_result['cpu_delta_mb']:+.1f} MB  "
+            f"GPU Δ={cpu_result['gpu_delta_mb']:+.1f} MB  "
+            f"peak GPU={cpu_result['peak_gpu_mb']:.1f} MB"
+        )
+
+        if has_cuda and cuda_solver is not None:
+            qpos_cuda = qpos_cpu.to(torch.device("cuda"))
+            fk_xpos_cuda = cuda_solver.get_fk(qpos_cuda)
+            (
+                cuda_elapsed,
+                cuda_mem,
+                cuda_peak_gpu,
+                cuda_success,
+                cuda_ik_qpos,
+            ) = _timed_pytorch_ik_call(cuda_solver, fk_xpos_cuda, qpos_cuda)
+            check_xpos_cuda = cuda_solver.get_fk(cuda_ik_qpos)
+            cuda_t_err, cuda_r_err = get_pose_err(fk_xpos_cuda, check_xpos_cuda)
+
+            cuda_result = {
+                "cost_time_ms": cuda_elapsed * 1000.0,
+                "cpu_delta_mb": cuda_mem["cpu_mb"],
+                "gpu_delta_mb": cuda_mem["gpu_mb"],
+                "peak_gpu_mb": cuda_peak_gpu,
+                "success_rate": float(cuda_success.float().mean().item()),
+                "translation_err_mm": cuda_t_err.mean().item() * 1000.0,
+                "rotation_err_deg": cuda_r_err.mean().item() * 180.0 / np.pi,
+            }
+
+            perf_rows.append(
+                {
+                    "sample_size": n_sample,
+                    "impl": "pytorch_cuda",
+                    "component": "pytorch_ik",
+                    "cost_time_ms": f"{cuda_result['cost_time_ms']:.6f}",
+                    "cpu_delta_mb": f"{cuda_result['cpu_delta_mb']:.6f}",
+                    "gpu_delta_mb": f"{cuda_result['gpu_delta_mb']:.6f}",
+                    "peak_gpu_mb": f"{cuda_result['peak_gpu_mb']:.6f}",
+                }
+            )
+            metric_rows.append(
+                {
+                    "sample_size": n_sample,
+                    "impl": "pytorch_cuda",
+                    "component": "pytorch_ik",
+                    "success_rate": f"{cuda_result['success_rate']:.6f}",
+                    "translation_err_mm": f"{cuda_result['translation_err_mm']:.6f}",
+                    "rotation_err_deg": f"{cuda_result['rotation_err_deg']:.6f}",
+                }
+            )
+
+            print(f"===Pytorch CUDA IK time: {cuda_result['cost_time_ms']:.6f} ms")
+            print(
+                f"   Translation mean error: {cuda_result['translation_err_mm']:.6f} mm"
+            )
+            print(
+                f"   Rotation mean error:    {cuda_result['rotation_err_deg']:.6f} degrees"
+            )
+            print(
+                f"   Success rate:           {cuda_result['success_rate'] * 100.0:.2f}%"
+            )
+            print(
+                "   "
+                f"CPU Δ={cuda_result['cpu_delta_mb']:+.1f} MB  "
+                f"GPU Δ={cuda_result['gpu_delta_mb']:+.1f} MB  "
+                f"peak GPU={cuda_result['peak_gpu_mb']:.1f} MB"
+            )
+
+    return perf_rows, metric_rows
+
+
+def benchmark_opw_solver() -> tuple[list[dict[str, object]], list[dict[str, object]]]:
+    """Benchmark OPW solver for multiple sample sizes."""
+    if not torch.cuda.is_available():
+        print("\n=== OPW Solver Benchmark ===")
+        print("  Skipped -- requires CUDA for Warp implementation comparison.")
+        return [], [
+            {
+                "sample_size": "N/A",
+                "impl": "opw_solver",
+                "component": "opw_ik",
+                "success_rate": "N/A",
+                "other_metrics": "skipped: requires CUDA for Warp comparison",
+            }
+        ]
+
+    cfg = OPWSolverCfg(
+        joint_names=("J1", "J2", "J3", "J4", "J5", "J6"),
+        user_qpos_limits=(OPW_LOWER_LIMITS, OPW_UPPER_LIMITS),
+    )
+    cfg.a1 = 400.333
+    cfg.a2 = -251.449
+    cfg.b = 0.0
+    cfg.c1 = 830
+    cfg.c2 = 1177.556
+    cfg.c3 = 1443.593
+    cfg.c4 = 230
+    cfg.offsets = (
+        0.0,
+        82.21350356417211 * np.pi / 180.0,
+        -167.21710113148163 * np.pi / 180.0,
+        0.0,
+        0.0,
+        0.0,
+    )
+    cfg.flip_axes = (True, False, True, True, False, True)
+    cfg.has_parallelogram = False
+
+    solver_warp = cfg.init_solver(device=torch.device("cuda"), pk_serial_chain="")
+    solver_py_opw = cfg.init_solver(device=torch.device("cpu"), pk_serial_chain="")
+
+    print("\n=== OPW Solver Benchmark ===")
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    for n_sample in SAMPLE_SIZES:
+        result = check_opw_solver(solver_warp, solver_py_opw, n_samples=n_sample)
+        print(f"**** Test over {n_sample} samples:")
+        print(f"===Warp CUDA IK time: {result['warp_ms']:.6f} ms")
+        print(f"   Translation mean error: {result['warp_t_err_mm']:.6f} mm")
+        print(f"   Rotation mean error:    {result['warp_r_err_deg']:.6f} degrees")
+        print(f"   Success rate:           {result['warp_success_rate'] * 100.0:.2f}%")
+        print(
+            "   "
+            f"CPU Δ={result['warp_cpu_delta_mb']:+.1f} MB  "
+            f"GPU Δ={result['warp_gpu_delta_mb']:+.1f} MB  "
+            f"peak GPU={result['warp_peak_gpu_mb']:.1f} MB"
+        )
+        print(f"===CPU OPW IK time:  {result['cpu_ms']:.6f} ms")
+        print(f"   Translation mean error: {result['cpu_t_err_mm']:.6f} mm")
+        print(f"   Rotation mean error:    {result['cpu_r_err_deg']:.6f} degrees")
+        print(f"   Success rate:           {result['cpu_success_rate'] * 100.0:.2f}%")
+        print(
+            "   "
+            f"CPU Δ={result['cpu_cpu_delta_mb']:+.1f} MB  "
+            f"GPU Δ={result['cpu_gpu_delta_mb']:+.1f} MB  "
+            f"peak GPU={result['cpu_peak_gpu_mb']:.1f} MB"
+        )
+
+        perf_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "opw_cuda",
+                "component": "opw_ik",
+                "cost_time_ms": f"{result['warp_ms']:.6f}",
+                "cpu_delta_mb": f"{result['warp_cpu_delta_mb']:.6f}",
+                "gpu_delta_mb": f"{result['warp_gpu_delta_mb']:.6f}",
+                "peak_gpu_mb": f"{result['warp_peak_gpu_mb']:.6f}",
+            }
+        )
+        perf_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "opw_cpu",
+                "component": "opw_ik",
+                "cost_time_ms": f"{result['cpu_ms']:.6f}",
+                "cpu_delta_mb": f"{result['cpu_cpu_delta_mb']:.6f}",
+                "gpu_delta_mb": f"{result['cpu_gpu_delta_mb']:.6f}",
+                "peak_gpu_mb": f"{result['cpu_peak_gpu_mb']:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "opw_cuda",
+                "component": "opw_ik",
+                "success_rate": f"{result['warp_success_rate']:.6f}",
+                "translation_err_mm": f"{result['warp_t_err_mm']:.6f}",
+                "rotation_err_deg": f"{result['warp_r_err_deg']:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n_sample,
+                "impl": "opw_cpu",
+                "component": "opw_ik",
+                "success_rate": f"{result['cpu_success_rate']:.6f}",
+                "translation_err_mm": f"{result['cpu_t_err_mm']:.6f}",
+                "rotation_err_deg": f"{result['cpu_r_err_deg']:.6f}",
+            }
+        )
+
+    return perf_rows, metric_rows
+
+
+def run_all_benchmarks(selected_solvers: list[str] | None = None) -> None:
+    """Run unified OPW + Pytorch kinematic solver benchmarks."""
+    solvers_to_run = _normalize_selected_solvers(selected_solvers)
+
+    print("=" * 60)
+    print("Kinematic Solver Performance Benchmarks")
+    print("=" * 60)
+
+    print("\nSelected solvers:", ", ".join(sorted(solvers_to_run)))
+
+    print("\nConfiguration differences:")
+    print(
+        "- OPW solver: analytic OPW parameters via OPWSolverCfg with "
+        "opw-specific joint limits."
+    )
+    print("- Pytorch solver: UR10 URDF-based PytorchSolver with " "UR10 joint limits.")
+
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    if "opw" in solvers_to_run:
+        opw_perf_rows, opw_metric_rows = benchmark_opw_solver()
+        perf_rows.extend(opw_perf_rows)
+        metric_rows.extend(opw_metric_rows)
+
+    if "pytorch" in solvers_to_run:
+        pytorch_perf_rows, pytorch_metric_rows = benchmark_pytorch_solver()
+        perf_rows.extend(pytorch_perf_rows)
+        metric_rows.extend(pytorch_metric_rows)
+
+    leaderboard_rows = _build_leaderboard_rows(metric_rows)
+
+    benchmark_name = "kinematic_solver"
+
+    print("\n" + "=" * 60)
+    print("Benchmarks complete.")
+    print("=" * 60)
+
+    report_path = _write_markdown_report(
+        benchmark_name=benchmark_name,
+        perf_rows=perf_rows,
+        metric_rows=metric_rows,
+        leaderboard_rows=leaderboard_rows,
+        notes=[
+            "CPU/GPU memory fields are deltas measured around timed calls.",
+            "This report contains exactly three tables: Time & Memory, Success & Other Metrics, and Leaderboard.",
+        ]
+        + (
+            [
+                "OPW and Pytorch solvers use different initialization paths and different lower/upper joint limits."
+            ]
+            if solvers_to_run == set(SUPPORTED_SOLVERS)
+            else []
+        ),
+    )
+    print(f"Markdown report saved: {report_path}")
+
+
+if __name__ == "__main__":
+    args = _parse_args()
+    run_all_benchmarks(selected_solvers=args.solvers)
diff --git a/scripts/benchmark/workspace_analyzer/benchmark_workspace_analyzer.py b/scripts/benchmark/workspace_analyzer/benchmark_workspace_analyzer.py
new file mode 100644
index 00000000..67185059
--- /dev/null
+++ b/scripts/benchmark/workspace_analyzer/benchmark_workspace_analyzer.py
@@ -0,0 +1,488 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+"""Benchmark script for workspace analyzer performance optimizations.
+
+Measures each optimization independently across multiple sample sizes.
+Run: python -m scripts.benchmark.workspace_analyzer.benchmark_workspace_analyzer
+"""
+
+import os
+import time
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import psutil
+import torch
+
+SAMPLE_SIZES_SMALL = [100, 1000, 10000, 50000]
+SAMPLE_SIZES_MEDIUM = [1000, 10000, 100000, 500000]
+
+
+def _sync_cuda() -> None:
+    """Synchronize CUDA stream when available."""
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+
+
+def _reset_peak_gpu_memory() -> None:
+    """Reset PyTorch peak GPU memory stats when CUDA is available."""
+    if torch.cuda.is_available():
+        torch.cuda.reset_peak_memory_stats()
+
+
+def _peak_gpu_memory_mb() -> float:
+    """Return peak GPU memory allocated by PyTorch in MB."""
+    if not torch.cuda.is_available():
+        return 0.0
+    return torch.cuda.max_memory_allocated() / 1024**2
+
+
+def _memory_snapshot() -> dict[str, float]:
+    """Return current process memory usage snapshot in MB."""
+    process = psutil.Process(os.getpid())
+    cpu_mb = process.memory_info().rss / 1024**2
+    gpu_mb = (
+        torch.cuda.memory_allocated() / 1024**2 if torch.cuda.is_available() else 0.0
+    )
+    return {"cpu_mb": cpu_mb, "gpu_mb": gpu_mb}
+
+
+def _time_call(callable_fn) -> tuple[float, dict[str, float], float, object]:
+    """Time a callable and return elapsed seconds, memory deltas, and result."""
+    _reset_peak_gpu_memory()
+    before = _memory_snapshot()
+    _sync_cuda()
+
+    start = time.perf_counter()
+    result = callable_fn()
+    _sync_cuda()
+    elapsed = time.perf_counter() - start
+
+    after = _memory_snapshot()
+    deltas = {
+        "cpu_mb": after["cpu_mb"] - before["cpu_mb"],
+        "gpu_mb": after["gpu_mb"] - before["gpu_mb"],
+    }
+    return elapsed, deltas, _peak_gpu_memory_mb(), result
+
+
+def _format_perf_line(
+    n: int,
+    elapsed_s: float,
+    memory_delta: dict[str, float],
+    peak_gpu_mb: float,
+    extra_info: str,
+) -> str:
+    """Format one benchmark output line with aligned fields."""
+    return (
+        f"  n={n:>7d}: {elapsed_s * 1000:>10.2f} ms | "
+        f"CPU Δ={memory_delta['cpu_mb']:+.1f} MB  "
+        f"GPU Δ={memory_delta['gpu_mb']:+.1f} MB  "
+        f"peak GPU={peak_gpu_mb:.1f} MB" + (f" | {extra_info}" if extra_info else "")
+    )
+
+
+def _format_markdown_table(rows: list[dict[str, object]]) -> list[str]:
+    """Format rows into a markdown table."""
+    if not rows:
+        return ["No data."]
+
+    headers = list(rows[0].keys())
+    lines = [
+        "| " + " | ".join(headers) + " |",
+        "| " + " | ".join(["---"] * len(headers)) + " |",
+    ]
+    for row in rows:
+        lines.append("| " + " | ".join(str(row[h]) for h in headers) + " |")
+    return lines
+
+
+def _write_markdown_report(
+    benchmark_name: str,
+    perf_rows: list[dict[str, object]],
+    metric_rows: list[dict[str, object]],
+    notes: list[str] | None = None,
+) -> Path:
+    """Write benchmark results to a markdown report with two tables."""
+    output_dir = Path("outputs/benchmarks")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    report_path = output_dir / f"{benchmark_name}_{timestamp}.md"
+
+    lines: list[str] = [
+        f"# {benchmark_name} Benchmark Report",
+        "",
+        f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+        "",
+        "## Time & Memory",
+        "",
+    ]
+    lines.extend(_format_markdown_table(perf_rows))
+    lines.extend(["", "## Success & Other Metrics", ""])
+    lines.extend(_format_markdown_table(metric_rows))
+
+    if notes:
+        lines.extend(["", "## Notes", ""])
+        lines.extend([f"- {note}" for note in notes])
+
+    report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
+    return report_path
+
+
+def benchmark_halton_sampler() -> (
+    tuple[list[dict[str, object]], list[dict[str, object]]]
+):
+    """Benchmark Halton sampler: vectorized vs loop-based."""
+    from embodichain.lab.sim.utility.workspace_analyzer.samplers.halton_sampler import (
+        HaltonSampler,
+    )
+
+    sampler = HaltonSampler(seed=42)
+    bounds = torch.tensor(
+        [
+            [-3.14, 3.14],
+            [-3.14, 3.14],
+            [-3.14, 3.14],
+            [-3.14, 3.14],
+            [-3.14, 3.14],
+            [-3.14, 3.14],
+        ],
+        dtype=torch.float32,
+    )
+
+    print("\n=== Halton Sampler Benchmark ===")
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    for n in [100, 1000, 10000, 100000]:
+        elapsed, mem_delta, peak_gpu, samples = _time_call(
+            lambda: sampler.sample(num_samples=n, bounds=bounds)
+        )
+        elapsed_ms = elapsed * 1000.0
+        print(
+            _format_perf_line(
+                n=n,
+                elapsed_s=elapsed,
+                memory_delta=mem_delta,
+                peak_gpu_mb=peak_gpu,
+                extra_info=f"shape={tuple(samples.shape)}",
+            )
+        )
+
+        perf_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "halton_sampler",
+                "cost_time_ms": f"{elapsed_ms:.6f}",
+                "cpu_delta_mb": f"{mem_delta['cpu_mb']:.6f}",
+                "gpu_delta_mb": f"{mem_delta['gpu_mb']:.6f}",
+                "peak_gpu_mb": f"{peak_gpu:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "halton_sampler",
+                "success_rate": "N/A",
+                "other_metrics": f"shape={tuple(samples.shape)}",
+            }
+        )
+
+    return perf_rows, metric_rows
+
+
+def benchmark_density_metric() -> (
+    tuple[list[dict[str, object]], list[dict[str, object]]]
+):
+    """Benchmark density metric: KDTree vs brute-force."""
+    from embodichain.lab.sim.utility.workspace_analyzer.metrics.density_metric import (
+        DensityMetric,
+    )
+    from embodichain.lab.sim.utility.workspace_analyzer.configs.metric_config import (
+        DensityConfig,
+    )
+
+    config = DensityConfig(radius=0.05, compute_distribution=False)
+    metric = DensityMetric(config)
+
+    print("\n=== Density Metric Benchmark ===")
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    for n in SAMPLE_SIZES_SMALL:
+        points = np.random.randn(n, 3).astype(np.float32) * 0.5
+
+        elapsed, mem_delta, peak_gpu, result = _time_call(
+            lambda: metric.compute(points)
+        )
+        elapsed_ms = elapsed * 1000.0
+        print(
+            _format_perf_line(
+                n=n,
+                elapsed_s=elapsed,
+                memory_delta=mem_delta,
+                peak_gpu_mb=peak_gpu,
+                extra_info=f"mean_density={result['mean_density']:.2f}",
+            )
+        )
+
+        perf_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "density_metric",
+                "cost_time_ms": f"{elapsed_ms:.6f}",
+                "cpu_delta_mb": f"{mem_delta['cpu_mb']:.6f}",
+                "gpu_delta_mb": f"{mem_delta['gpu_mb']:.6f}",
+                "peak_gpu_mb": f"{peak_gpu:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "density_metric",
+                "success_rate": "N/A",
+                "other_metrics": f"mean_density={result['mean_density']:.6f}",
+            }
+        )
+
+    return perf_rows, metric_rows
+
+
+def benchmark_voxelization() -> tuple[list[dict[str, object]], list[dict[str, object]]]:
+    """Benchmark voxelization: np.unique vs dict-based."""
+    from embodichain.lab.sim.utility.workspace_analyzer.metrics.reachability_metric import (
+        ReachabilityMetric,
+    )
+    from embodichain.lab.sim.utility.workspace_analyzer.configs.metric_config import (
+        ReachabilityConfig,
+    )
+
+    config = ReachabilityConfig(voxel_size=0.01, compute_coverage=True)
+    metric = ReachabilityMetric(config)
+
+    print("\n=== Voxelization Benchmark ===")
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    for n in SAMPLE_SIZES_MEDIUM:
+        points = np.random.randn(n, 3).astype(np.float32) * 0.5
+
+        elapsed, mem_delta, peak_gpu, result = _time_call(
+            lambda: metric.compute(points)
+        )
+        elapsed_ms = elapsed * 1000.0
+        print(
+            _format_perf_line(
+                n=n,
+                elapsed_s=elapsed,
+                memory_delta=mem_delta,
+                peak_gpu_mb=peak_gpu,
+                extra_info=(
+                    f"volume={result['volume']:.4f}, " f"voxels={result['num_voxels']}"
+                ),
+            )
+        )
+
+        perf_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "voxelization",
+                "cost_time_ms": f"{elapsed_ms:.6f}",
+                "cpu_delta_mb": f"{mem_delta['cpu_mb']:.6f}",
+                "gpu_delta_mb": f"{mem_delta['gpu_mb']:.6f}",
+                "peak_gpu_mb": f"{peak_gpu:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "voxelization",
+                "success_rate": "N/A",
+                "other_metrics": (
+                    f"volume={result['volume']:.6f}, num_voxels={result['num_voxels']}"
+                ),
+            }
+        )
+
+    return perf_rows, metric_rows
+
+
+def benchmark_manipulability() -> (
+    tuple[list[dict[str, object]], list[dict[str, object]]]
+):
+    """Benchmark manipulability: batch vs per-sample."""
+    from embodichain.lab.sim.utility.workspace_analyzer.metrics.manipulability_metric import (
+        ManipulabilityMetric,
+    )
+    from embodichain.lab.sim.utility.workspace_analyzer.configs.metric_config import (
+        ManipulabilityConfig,
+    )
+
+    config = ManipulabilityConfig(compute_isotropy=True)
+    metric = ManipulabilityMetric(config)
+
+    print("\n=== Manipulability Metric Benchmark ===")
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    for n in SAMPLE_SIZES_SMALL:
+        points = np.random.randn(n, 3).astype(np.float32) * 0.5
+        jacobians = np.random.randn(n, 6, 6).astype(np.float32) * 0.1
+
+        elapsed, mem_delta, peak_gpu, result = _time_call(
+            lambda: metric.compute(points, jacobians=jacobians)
+        )
+        elapsed_ms = elapsed * 1000.0
+        print(
+            _format_perf_line(
+                n=n,
+                elapsed_s=elapsed,
+                memory_delta=mem_delta,
+                peak_gpu_mb=peak_gpu,
+                extra_info=f"mean_manip={result['mean_manipulability']:.6f}",
+            )
+        )
+
+        perf_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "manipulability_metric",
+                "cost_time_ms": f"{elapsed_ms:.6f}",
+                "cpu_delta_mb": f"{mem_delta['cpu_mb']:.6f}",
+                "gpu_delta_mb": f"{mem_delta['gpu_mb']:.6f}",
+                "peak_gpu_mb": f"{peak_gpu:.6f}",
+            }
+        )
+        metric_rows.append(
+            {
+                "sample_size": n,
+                "impl": "workspace_analyzer",
+                "component": "manipulability_metric",
+                "success_rate": "N/A",
+                "other_metrics": (
+                    f"mean_manipulability={result['mean_manipulability']:.6f}"
+                ),
+            }
+        )
+
+    return perf_rows, metric_rows
+
+
+def benchmark_batch_fk() -> tuple[list[dict[str, object]], list[dict[str, object]]]:
+    """Benchmark batch FK vs sequential FK (requires GPU robot setup).
+
+    This benchmark requires a running simulation with a robot.
+    It is skipped if no simulation is available.
+    """
+    print("\n=== Batch FK Benchmark (requires robot/simulation) ===")
+    print("  Skipped -- requires live SimulationManager and Robot.")
+    print("  To run manually, integrate with your robot setup:")
+    print("    analyzer.compute_workspace_points(joint_configs, batch_size=512)")
+    return [], [
+        {
+            "sample_size": "N/A",
+            "impl": "workspace_analyzer",
+            "component": "batch_fk",
+            "success_rate": "N/A",
+            "other_metrics": "skipped: requires live SimulationManager and Robot",
+        }
+    ]
+
+
+def benchmark_batch_ik() -> tuple[list[dict[str, object]], list[dict[str, object]]]:
+    """Benchmark batch IK vs sequential IK (requires GPU robot setup).
+
+    This benchmark requires a running simulation with a robot.
+    It is skipped if no simulation is available.
+    """
+    print("\n=== Batch IK Benchmark (requires robot/simulation) ===")
+    print("  Skipped -- requires live SimulationManager and Robot.")
+    print("  To run manually, integrate with your robot setup:")
+    print("    analyzer.compute_reachability(cartesian_points, batch_size=512)")
+    return [], [
+        {
+            "sample_size": "N/A",
+            "impl": "workspace_analyzer",
+            "component": "batch_ik",
+            "success_rate": "N/A",
+            "other_metrics": "skipped: requires live SimulationManager and Robot",
+        }
+    ]
+
+
+def run_all_benchmarks() -> None:
+    """Run all benchmarks and print summary."""
+    print("=" * 60)
+    print("Workspace Analyzer Performance Benchmarks")
+    print("=" * 60)
+
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    perf_part, metric_part = benchmark_halton_sampler()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    perf_part, metric_part = benchmark_density_metric()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    perf_part, metric_part = benchmark_voxelization()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    perf_part, metric_part = benchmark_manipulability()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    perf_part, metric_part = benchmark_batch_fk()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    perf_part, metric_part = benchmark_batch_ik()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+
+    print("\n" + "=" * 60)
+    print("Benchmarks complete.")
+    print("=" * 60)
+
+    report_path = _write_markdown_report(
+        benchmark_name="workspace_analyzer",
+        perf_rows=perf_rows,
+        metric_rows=metric_rows,
+        notes=[
+            "CPU/GPU memory fields are deltas measured around timed calls.",
+            "This report contains exactly two tables: Time & Memory, and Success & Other Metrics.",
+        ],
+    )
+    print(f"Markdown report saved: {report_path}")
+
+
+if __name__ == "__main__":
+    run_all_benchmarks()
diff --git a/scripts/tutorials/grasp/grasp_generator.py b/scripts/tutorials/grasp/grasp_generator.py
index bab09c03..1bfdeda6 100644
--- a/scripts/tutorials/grasp/grasp_generator.py
+++ b/scripts/tutorials/grasp/grasp_generator.py
@@ -30,8 +30,10 @@
 from embodichain.lab.sim.shapes import MeshCfg
 from embodichain.lab.sim.solvers import PytorchSolverCfg
 from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.utils import logger
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     JointDrivePropertiesCfg,
     RobotCfg,
     LightCfg,
@@ -59,19 +61,7 @@ def parse_arguments():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
-    parser.add_argument("--headless", action="store_true", help="Enable headless mode")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
-    )
+    add_env_launcher_args_to_parser(parser)
     return parser.parse_args()
 
 
@@ -88,21 +78,20 @@ def initialize_simulation(args) -> SimulationManager:
     config = SimulationManagerCfg(
         headless=True,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         arena_space=2.5,
     )
     sim = SimulationManager(config)
 
-    if args.enable_rt:
-        light = sim.add_light(
-            cfg=LightCfg(
-                uid="main_light",
-                color=(0.6, 0.6, 0.6),
-                intensity=30.0,
-                init_pos=(1.0, 0, 3.0),
-            )
+    light = sim.add_light(
+        cfg=LightCfg(
+            uid="main_light",
+            color=(0.6, 0.6, 0.6),
+            intensity=30.0,
+            init_pos=(1.0, 0, 3.0),
         )
+    )
 
     return sim
 
@@ -238,6 +227,8 @@ def get_grasp_traj(sim: SimulationManager, robot: Robot, grasp_xpos: torch.Tenso
         antipodal_sampler_cfg=AntipodalSamplerCfg(
             n_sample=20000, max_length=0.088, min_length=0.003
         ),
+        is_partial_annotate=True,
+        is_filter_ground_collision=True,
     )
     sim.open_window()
 
@@ -271,11 +262,23 @@ def get_grasp_traj(sim: SimulationManager, robot: Robot, grasp_xpos: torch.Tenso
     )
     obj_poses = mug.get_local_pose(to_matrix=True)
     grasp_xpos_list = []
-    for obj_pose in obj_poses:
-        grasp_pose, _ = grasp_generator.get_grasp_poses(
-            obj_pose, approach_direction, visualize_pose=False
+
+    rest_xpos = robot.compute_fk(
+        qpos=robot.get_qpos("arm"), name="arm", to_matrix=True
+    )[0]
+    for i, obj_pose in enumerate(obj_poses):
+        is_success, grasp_pose, open_length = grasp_generator.get_grasp_poses(
+            obj_pose,
+            approach_direction,
+            visualize_collision=False,
+            visualize_pose=False,
         )
-        grasp_xpos_list.append(grasp_pose.unsqueeze(0))
+        if is_success:
+            grasp_xpos_list.append(grasp_pose.unsqueeze(0))
+        else:
+            logger.log_warning(f"No valid grasp pose found for {i}-th object.")
+            grasp_xpos_list.append(rest_xpos.unsqueeze(0))
+
     grasp_xpos = torch.cat(grasp_xpos_list, dim=0)
     cost_time = time.time() - start_time
     logger.log_info(f"Get grasp pose cost time: {cost_time:.2f} seconds")
diff --git a/scripts/tutorials/gym/modular_env.py b/scripts/tutorials/gym/modular_env.py
index 9c8bfd66..4bfbb5b3 100644
--- a/scripts/tutorials/gym/modular_env.py
+++ b/scripts/tutorials/gym/modular_env.py
@@ -33,6 +33,7 @@
 from embodichain.lab.sim.sensors import StereoCameraCfg, SensorCfg
 from embodichain.lab.sim.shapes import MeshCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     LightCfg,
     ArticulationCfg,
     RobotCfg,
@@ -78,7 +79,7 @@ class ExampleEventCfg:
             ),
             "position_range": [[-0.5, -0.5, 2], [0.5, 0.5, 2]],
             "color_range": [[0.6, 0.6, 0.6], [1, 1, 1]],
-            "intensity_range": [50.0, 100.0],
+            "intensity_range": [10.0, 30.0],
         },
     )
 
@@ -144,7 +145,7 @@ class ExampleCfg(EmbodiedEnvCfg):
                 uid="point",
                 light_type="point",
                 color=(1.0, 1.0, 1.0),
-                intensity=50.0,
+                intensity=20.0,
                 init_pos=(0, 0, 2),
             )
         ]
@@ -209,12 +210,20 @@ def __init__(self, cfg: EmbodiedEnvCfg, **kwargs):
     import argparse
 
     from embodichain.lab.sim import SimulationManagerCfg
+    from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--enable_rt", action="store_true", help="Enable ray tracing")
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
-    env_cfg = ExampleCfg(sim_cfg=SimulationManagerCfg(enable_rt=args.enable_rt))
+    env_cfg = ExampleCfg(
+        sim_cfg=SimulationManagerCfg(
+            render_cfg=RenderCfg(renderer=args.renderer),
+            headless=args.headless,
+            sim_device=args.device,
+            num_envs=args.num_envs,
+        )
+    )
 
     # Create the Gym environment
     env = gym.make("ModularEnv-v1", cfg=env_cfg)
diff --git a/scripts/tutorials/gym/random_reach.py b/scripts/tutorials/gym/random_reach.py
index 4aca9ab3..b55a7a8e 100644
--- a/scripts/tutorials/gym/random_reach.py
+++ b/scripts/tutorials/gym/random_reach.py
@@ -24,6 +24,7 @@
 from embodichain.lab.sim.shapes import CubeCfg
 from embodichain.lab.sim.objects import RigidObject, Robot
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     RigidObjectCfg,
     RigidBodyAttributesCfg,
@@ -43,11 +44,15 @@ def __init__(
         num_envs=1,
         headless=False,
         device="cpu",
+        renderer="hybrid",
         **kwargs,
     ):
         env_cfg = EnvCfg(
             sim_cfg=SimulationManagerCfg(
-                headless=headless, arena_space=2.0, sim_device=device
+                headless=headless,
+                arena_space=2.0,
+                sim_device=device,
+                render_cfg=RenderCfg(renderer=renderer),
             ),
             num_envs=num_envs,
         )
@@ -112,19 +117,12 @@ def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
     import argparse
     import time
 
+    from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+
     parser = argparse.ArgumentParser(
         description="Demo for running a random reach environment."
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="number of environments to run"
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
-    )
-    parser.add_argument("--headless", action="store_true", help="run in headless mode")
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     env = gym.make(
@@ -132,6 +130,7 @@ def _extend_obs(self, obs: EnvObs, **kwargs) -> EnvObs:
         num_envs=args.num_envs,
         headless=args.headless,
         device=args.device,
+        renderer=args.renderer,
     )
 
     for episode in range(10):
diff --git a/scripts/tutorials/sim/atomic_actions.py b/scripts/tutorials/sim/atomic_actions.py
new file mode 100644
index 00000000..02b4bded
--- /dev/null
+++ b/scripts/tutorials/sim/atomic_actions.py
@@ -0,0 +1,348 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""
+Tutorial: Atomic Actions for Robot Motion Generation
+=====================================================
+
+This script shows how to use the atomic action system to plan and execute
+a pick-and-place task with a robot arm.
+
+Key concepts covered:
+  1. Setting up a MotionGenerator and AtomicActionEngine
+  2. Describing what to pick using ObjectSemantics and AntipodalAffordance
+  3. Running a pick → place → move sequence with execute_static()
+
+Run with:
+    python atomic_actions.py [--num_envs N] [--renderer hybrid|fast-rt|rt]
+"""
+
+import argparse
+import numpy as np
+import time
+import torch
+
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.objects import Robot, RigidObject
+from embodichain.lab.sim.shapes import MeshCfg
+from embodichain.lab.sim.solvers import PytorchSolverCfg
+from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim.cfg import (
+    JointDrivePropertiesCfg,
+    RenderCfg,
+    RobotCfg,
+    RigidObjectCfg,
+    RigidBodyAttributesCfg,
+    LightCfg,
+    URDFCfg,
+)
+from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
+from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
+    GripperCollisionCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
+    GraspGenerator,
+    GraspGeneratorCfg,
+    AntipodalSamplerCfg,
+)
+
+# Import everything from the public atomic_actions API
+from embodichain.lab.sim.atomic_actions import (
+    AtomicActionEngine,
+    ObjectSemantics,
+    AntipodalAffordance,
+    PickUpActionCfg,
+    PlaceActionCfg,
+    MoveActionCfg,
+)
+
+
+def parse_arguments():
+    """
+    Parse command-line arguments to configure the simulation.
+
+    Returns:
+        argparse.Namespace: Parsed arguments including number of environments, device, and rendering options.
+    """
+    parser = argparse.ArgumentParser(
+        description="Create and simulate a robot in SimulationManager"
+    )
+    add_env_launcher_args_to_parser(parser)
+    return parser.parse_args()
+
+
+def initialize_simulation(args):
+    """
+    Initialize the simulation environment based on the provided arguments.
+
+    Args:
+        args (argparse.Namespace): Parsed command-line arguments.
+
+    Returns:
+        SimulationManager: Configured simulation manager instance.
+    """
+    sim_cfg = SimulationManagerCfg(
+        width=1920,
+        height=1080,
+        headless=True,
+        sim_device="cuda",
+        physics_dt=1.0 / 100.0,
+        num_envs=args.num_envs,
+        render_cfg=RenderCfg(renderer=args.renderer),
+    )
+    sim = SimulationManager(sim_cfg)
+
+    light = sim.add_light(
+        cfg=LightCfg(uid="main_light", intensity=50.0, init_pos=(0, 0, 2.0))
+    )
+
+    return sim
+
+
+def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]):
+    """
+    Create and configure a robot with an arm and a dexterous hand in the simulation.
+
+    Args:
+        sim (SimulationManager): The simulation manager instance.
+
+    Returns:
+        Robot: The configured robot instance added to the simulation.
+    """
+    # Retrieve URDF paths for the robot arm and hand
+    ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
+    gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf")
+    # Configure the robot with its components and control properties
+    cfg = RobotCfg(
+        uid="UR10",
+        urdf_cfg=URDFCfg(
+            components=[
+                {"component_type": "arm", "urdf_path": ur10_urdf_path},
+                {"component_type": "hand", "urdf_path": gripper_urdf_path},
+            ]
+        ),
+        drive_pros=JointDrivePropertiesCfg(
+            stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e2},
+            damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e1},
+            max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e3},
+            drive_type="force",
+        ),
+        control_parts={
+            "arm": ["JOINT[0-9]"],
+            "hand": ["FINGER[1-2]"],
+        },
+        solver_cfg={
+            "arm": PytorchSolverCfg(
+                end_link_name="ee_link",
+                root_link_name="base_link",
+                tcp=[
+                    [0.0, 1.0, 0.0, 0.0],
+                    [-1.0, 0.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.12],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            )
+        },
+        init_qpos=[0.0, -np.pi / 2, -np.pi / 2, np.pi / 2, -np.pi / 2, 0.0, 0.0, 0.0],
+        init_pos=position,
+    )
+    return sim.add_robot(cfg=cfg)
+
+
+def create_mug(sim: SimulationManager) -> RigidObject:
+    mug_cfg = RigidObjectCfg(
+        uid="mug",
+        shape=MeshCfg(
+            fpath=get_data_path("CoffeeCup/cup.ply"),
+        ),
+        attrs=RigidBodyAttributesCfg(
+            mass=0.01,
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[0.55, 0.0, 0.01],
+        init_rot=[0.0, 0.0, -90],
+        body_scale=(4, 4, 4),
+    )
+    mug = sim.add_rigid_object(cfg=mug_cfg)
+    return mug
+
+
+def main():
+    """Pick up a mug and place it at a new location using atomic actions."""
+    args = parse_arguments()
+
+    # ------------------------------------------------------------------ #
+    # Step 1: Set up simulation, robot, and object                        #
+    # ------------------------------------------------------------------ #
+    sim: SimulationManager = initialize_simulation(args)
+    robot = create_robot(sim)
+    mug = create_mug(sim)
+
+    # ------------------------------------------------------------------ #
+    # Step 2: Create a MotionGenerator for the robot                      #
+    # MotionGenerator handles trajectory planning (IK + TOPPRA smoothing) #
+    # ------------------------------------------------------------------ #
+    motion_gen = MotionGenerator(
+        cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+    )
+
+    # ------------------------------------------------------------------ #
+    # Step 3: Configure the three atomic actions                          #
+    #                                                                     #
+    #  PickUpAction  — approach → close gripper → lift                   #
+    #  PlaceAction   — lower → open gripper → retract                    #
+    #  MoveAction    — free-space move to a target EEF pose               #
+    # ------------------------------------------------------------------ #
+    # Gripper joint values for this robot (DH_PGC_140):
+    #   open  = [0.00, 0.00]   (fully open)
+    #   close = [0.025, 0.025] (grasping width)
+    hand_open = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device)
+    hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=sim.device)
+
+    pickup_cfg = PickUpActionCfg(
+        control_part="arm",
+        hand_control_part="hand",
+        hand_open_qpos=hand_open,
+        hand_close_qpos=hand_close,
+        # Approach the object from directly above (negative world-Z)
+        approach_direction=torch.tensor(
+            [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device
+        ),
+        pre_grasp_distance=0.15,  # hover 15 cm above before descending
+        lift_height=0.15,  # lift 15 cm after grasping
+    )
+
+    place_cfg = PlaceActionCfg(
+        control_part="arm",
+        hand_control_part="hand",
+        hand_open_qpos=hand_open,
+        hand_close_qpos=hand_close,
+        lift_height=0.15,
+    )
+
+    move_cfg = MoveActionCfg(
+        control_part="arm",
+    )
+
+    # ------------------------------------------------------------------ #
+    # Step 4: Build the AtomicActionEngine                                #
+    #                                                                     #
+    # actions_cfg_list defines the ORDER of actions that execute_static() #
+    # will run. Each entry is matched positionally to target_list.        #
+    # ------------------------------------------------------------------ #
+    atomic_engine = AtomicActionEngine(
+        motion_generator=motion_gen,
+        actions_cfg_list=[pickup_cfg, place_cfg, move_cfg],
+    )
+
+    sim.init_gpu_physics()
+    if not args.headless:
+        sim.open_window()
+
+    # ------------------------------------------------------------------ #
+    # Step 5: Describe the mug with ObjectSemantics                       #
+    #                                                                     #
+    # ObjectSemantics bundles together:                                   #
+    #   - geometry (mesh vertices/triangles for grasp annotation)         #
+    #   - affordance (how to grasp the object — here antipodal grasps)   #
+    #   - entity reference (so the action can read the live object pose)  #
+    # ------------------------------------------------------------------ #
+    mug_grasp_affordance = AntipodalAffordance(
+        object_label="mug",
+        force_reannotate=False,
+        custom_config={
+            "gripper_collision_cfg": GripperCollisionCfg(
+                max_open_length=0.088, finger_length=0.078, point_sample_dense=0.012
+            ),
+            "generator_cfg": GraspGeneratorCfg(
+                viser_port=11801,
+                antipodal_sampler_cfg=AntipodalSamplerCfg(
+                    n_sample=20000, max_length=0.088, min_length=0.003
+                ),
+            ),
+        },
+    )
+    mug_semantics = ObjectSemantics(
+        label="mug",
+        geometry={
+            "mesh_vertices": mug.get_vertices(env_ids=[0], scale=True)[0],
+            "mesh_triangles": mug.get_triangles(env_ids=[0])[0],
+        },
+        affordance=mug_grasp_affordance,
+        entity=mug,  # needed so PickUpAction can read the mug's live pose
+    )
+
+    # ------------------------------------------------------------------ #
+    # Step 6: Define target poses for place and final rest                #
+    #                                                                     #
+    # Poses are 4×4 homogeneous transforms (rotation | translation).     #
+    # For PickUpAction the target is mug_semantics — the action computes  #
+    # the grasp pose automatically from the affordance.                   #
+    # ------------------------------------------------------------------ #
+    # Place the mug 20 cm to the left and 40 cm forward from its pickup pose
+    place_xpos = torch.tensor(
+        [
+            [-0.0539, -0.9985, -0.0022, 0.2489],
+            [-0.9977, 0.0540, -0.0401, 0.3970],
+            [0.0401, 0.0000, -0.9992, 0.2400],
+            [0.0000, 0.0000, 0.0000, 1.0000],
+        ],
+        dtype=torch.float32,
+        device=sim.device,
+    )
+
+    # Move the arm to a safe resting pose after placing
+    rest_xpos = torch.tensor(
+        [
+            [-0.0539, -0.9985, -0.0022, 0.5000],
+            [-0.9977, 0.0540, -0.0401, 0.0000],
+            [0.0401, 0.0000, -0.9992, 0.5000],
+            [0.0000, 0.0000, 0.0000, 1.0000],
+        ],
+        dtype=torch.float32,
+        device=sim.device,
+    )
+
+    # ------------------------------------------------------------------ #
+    # Step 7: Plan and execute the full sequence                          #
+    #                                                                     #
+    # execute_static() plans all three actions in order and returns a     #
+    # single concatenated joint trajectory (n_envs, n_waypoints, dof).   #
+    # We then replay it frame-by-frame in the simulator.                 #
+    # ------------------------------------------------------------------ #
+    print("Planning pick → place → move trajectory...")
+    is_success, traj = atomic_engine.execute_static(
+        target_list=[mug_semantics, place_xpos, rest_xpos]
+    )
+
+    if not is_success:
+        print("Planning failed. Check that the target poses are reachable.")
+        return
+
+    print(f"Success! Replaying {traj.shape[1]} waypoints...")
+    for i in range(traj.shape[1]):
+        robot.set_qpos(traj[:, i])
+        sim.update(step=4)
+        time.sleep(1e-2)
+
+    input("Press Enter to exit...")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/tutorials/sim/create_cloth.py b/scripts/tutorials/sim/create_cloth.py
index b81f2bf6..1f0d883c 100644
--- a/scripts/tutorials/sim/create_cloth.py
+++ b/scripts/tutorials/sim/create_cloth.py
@@ -27,7 +27,9 @@
 import open3d as o3d
 from dexsim.utility.path import get_resources_data_path
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RigidObjectCfg,
     RigidBodyAttributesCfg,
     ClothObjectCfg,
@@ -78,21 +80,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -100,11 +88,10 @@ def main():
         width=1920,
         height=1080,
         headless=True,
+        num_envs=args.num_envs,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device="cuda",  # soft simulation only supports cuda device
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
-        num_envs=args.num_envs,  # Number of parallel environments
-        arena_space=2.0,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     # Create the simulation instance
@@ -128,7 +115,7 @@ def main():
             init_rot=[0, 0, 0],
             physical_attr=ClothPhysicalAttributesCfg(
                 mass=0.01,
-                youngs=1e10,
+                youngs=1e9,
                 poissons=0.4,
                 thickness=0.04,
                 bending_stiffness=0.01,
diff --git a/scripts/tutorials/sim/create_rigid_object_group.py b/scripts/tutorials/sim/create_rigid_object_group.py
index 1b734015..d681dc91 100644
--- a/scripts/tutorials/sim/create_rigid_object_group.py
+++ b/scripts/tutorials/sim/create_rigid_object_group.py
@@ -22,7 +22,8 @@
 import time
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidBodyAttributesCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, RenderCfg
 from embodichain.lab.sim.shapes import CubeCfg
 from embodichain.lab.sim.objects import (
     RigidObjectGroup,
@@ -38,24 +39,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -65,7 +49,9 @@ def main():
         headless=True,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device=args.device,
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer
+        ),  # Enable ray tracing for better visuals
         num_envs=args.num_envs,
         arena_space=3.0,
     )
diff --git a/scripts/tutorials/sim/create_robot.py b/scripts/tutorials/sim/create_robot.py
index 614abb7b..3fe3f9fd 100644
--- a/scripts/tutorials/sim/create_robot.py
+++ b/scripts/tutorials/sim/create_robot.py
@@ -31,11 +31,13 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     JointDrivePropertiesCfg,
     RobotCfg,
     URDFCfg,
 )
 from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 
 
 def main():
@@ -45,20 +47,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=4, help="Number of environments to simulate"
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cpu", "cuda"],
-        help="Device to run simulation on",
-    )
-    parser.add_argument("--headless", action="store_true", help="Run in headless mode")
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Initialize simulation
@@ -67,7 +56,7 @@ def main():
         headless=True,
         sim_device=args.device,
         arena_space=3.0,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         num_envs=args.num_envs,
     )
diff --git a/scripts/tutorials/sim/create_scene.py b/scripts/tutorials/sim/create_scene.py
index 4f440ca1..b7507f7b 100644
--- a/scripts/tutorials/sim/create_scene.py
+++ b/scripts/tutorials/sim/create_scene.py
@@ -23,10 +23,13 @@
 import time
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidBodyAttributesCfg
+from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, RenderCfg
 from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
 from embodichain.lab.sim.objects import RigidObject, RigidObjectCfg
-from dexsim.utility.path import get_resources_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.data import get_data_path
+
+DEFAULT_CAPTURE_PATH = "./outputs/window_capture/create_scene.png"
 
 
 def main():
@@ -36,23 +39,12 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
+    add_env_launcher_args_to_parser(parser)
     parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
+        "--max-steps",
+        type=int,
+        default=10000,
+        help="Optional number of simulation steps to run before exiting.",
     )
     args = parser.parse_args()
 
@@ -63,7 +55,9 @@ def main():
         headless=True,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device=args.device,
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer,
+        ),
         num_envs=args.num_envs,
         arena_space=3.0,
     )
@@ -71,7 +65,7 @@ def main():
     # Create the simulation instance
     sim = SimulationManager(sim_cfg)
 
-    # Add objects to the scene
+    # Add cube object to the scene
     cube: RigidObject = sim.add_rigid_object(
         cfg=RigidObjectCfg(
             uid="cube",
@@ -83,27 +77,63 @@ def main():
                 static_friction=0.5,
                 restitution=0.1,
             ),
-            init_pos=[0.0, 0.0, 1.0],
+            init_pos=[0, 0.0, 1.0],
+        )
+    )
+
+    # Add chair object to the scene
+    path = get_data_path("Chair/chair.glb")
+    chair: RigidObject = sim.add_rigid_object(
+        cfg=RigidObjectCfg(
+            uid="chair",
+            shape=MeshCfg(fpath=path),
+            body_type="dynamic",
+            attrs=RigidBodyAttributesCfg(
+                mass=3.0,
+            ),
+            body_scale=[0.5, 0.5, 0.5],
+            init_pos=[0.0, 0.0, 0.2],
+            init_rot=[90.0, 0.0, 0.0],
         )
     )
 
     print("[INFO]: Scene setup complete!")
     print(f"[INFO]: Running simulation with {args.num_envs} environment(s)")
     print("[INFO]: Press Ctrl+C to stop the simulation")
+    if not args.headless:
+        print(
+            "[INFO]: Press 'p' in the viewer to capture a frame "
+            "(saved under ./outputs/window_capture/)"
+        )
 
     # Open window when the scene has been set up
     if not args.headless:
         sim.open_window()
 
     # Run the simulation
-    run_simulation(sim)
+    run_simulation(
+        sim,
+        capture_window=args.headless,
+        capture_path=DEFAULT_CAPTURE_PATH,
+        max_steps=args.max_steps,
+    )
 
 
-def run_simulation(sim: SimulationManager):
+def run_simulation(
+    sim: SimulationManager,
+    capture_window: bool = False,
+    capture_path: str = DEFAULT_CAPTURE_PATH,
+    max_steps: int | None = None,
+) -> None:
     """Run the simulation loop.
 
     Args:
         sim: The SimulationManager instance to run
+        capture_window: Whether to capture a single frame with the hidden window
+            capture camera. Enabled by default when ``--headless`` is passed.
+            When a viewer window is open, press ``p`` to capture asynchronously instead.
+        capture_path: Path where the captured image is saved.
+        max_steps: Optional number of steps to run before exiting.
     """
 
     # Initialize GPU physics if using CUDA
@@ -111,6 +141,10 @@ def run_simulation(sim: SimulationManager):
         sim.init_gpu_physics()
 
     step_count = 0
+    capture_done = False
+    capture_step = 10
+    if max_steps is not None:
+        capture_step = max(1, min(capture_step, max_steps))
 
     try:
         last_time = time.time()
@@ -133,6 +167,20 @@ def run_simulation(sim: SimulationManager):
                 last_time = current_time
                 last_step = step_count
 
+            if capture_window and not capture_done and step_count >= capture_step:
+                frame = sim.capture_window(save_path=capture_path)
+                if frame is None:
+                    raise RuntimeError("Window capture failed to produce a frame.")
+                print(
+                    f"[INFO]: Captured window frame at {capture_path} "
+                    f"with shape {frame.shape}"
+                )
+                capture_done = True
+
+            if max_steps is not None and step_count >= max_steps:
+                print(f"[INFO]: Reached max steps: {max_steps}")
+                break
+
     except KeyboardInterrupt:
         print("\n[INFO]: Stopping simulation...")
     finally:
diff --git a/scripts/tutorials/sim/create_sensor.py b/scripts/tutorials/sim/create_sensor.py
index f4279090..39534d32 100644
--- a/scripts/tutorials/sim/create_sensor.py
+++ b/scripts/tutorials/sim/create_sensor.py
@@ -29,9 +29,11 @@
 from scipy.spatial.transform import Rotation as R
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.sensors import Camera, CameraCfg
 from embodichain.lab.sim.objects import Robot
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     JointDrivePropertiesCfg,
     RobotCfg,
     URDFCfg,
@@ -73,20 +75,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of environments to simulate"
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cpu", "cuda"],
-        help="Device to run simulation on",
-    )
-    parser.add_argument("--headless", action="store_true", help="Run in headless mode")
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
+    add_env_launcher_args_to_parser(parser)
     parser.add_argument(
         "--attach_sensor",
         action="store_true",
@@ -100,7 +89,7 @@ def main():
         headless=True,
         sim_device=args.device,
         arena_space=3.0,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         num_envs=args.num_envs,
     )
diff --git a/scripts/tutorials/sim/create_softbody.py b/scripts/tutorials/sim/create_softbody.py
index 087f35ec..3b8973ef 100644
--- a/scripts/tutorials/sim/create_softbody.py
+++ b/scripts/tutorials/sim/create_softbody.py
@@ -23,7 +23,9 @@
 import time
 from dexsim.utility.path import get_resources_data_path
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     SoftbodyVoxelAttributesCfg,
     SoftbodyPhysicalAttributesCfg,
 )
@@ -41,21 +43,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--num_envs", type=int, default=4, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -63,9 +51,12 @@ def main():
         width=1920,
         height=1080,
         headless=True,
+        num_envs=args.num_envs,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device="cuda",  # soft simulation only supports cuda device
-        enable_rt=args.enable_rt,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer
+        ),  # Enable ray tracing for better visuals
     )
 
     # Create the simulation instance
diff --git a/scripts/tutorials/sim/export_usd.py b/scripts/tutorials/sim/export_usd.py
index 90e81691..c6cb91c7 100644
--- a/scripts/tutorials/sim/export_usd.py
+++ b/scripts/tutorials/sim/export_usd.py
@@ -15,14 +15,16 @@
 # ----------------------------------------------------------------------------
 
 """
-This script demonstrates how to export a simulation scene to a usd file using the SimulationManager. 
+This script demonstrates how to export a simulation scene to a usd file using the SimulationManager.
 """
 
 import argparse
 import numpy as np
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.objects import Robot, RigidObject
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     LightCfg,
     JointDrivePropertiesCfg,
     RigidObjectCfg,
@@ -46,17 +48,7 @@ def parse_arguments():
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
-
-    parser.add_argument(
-        "--enable_rt", action="store_true", help="Enable ray tracing rendering"
-    )
-    parser.add_argument("--headless", action="store_true", help="Enable headless mode")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        help="device to run the environment on, e.g., 'cpu' or 'cuda'",
-    )
+    add_env_launcher_args_to_parser(parser)
     return parser.parse_args()
 
 
@@ -73,22 +65,21 @@ def initialize_simulation(args) -> SimulationManager:
     config = SimulationManagerCfg(
         headless=True,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
         physics_dt=1.0 / 100.0,
         num_envs=1,
         arena_space=2.5,
     )
     sim = SimulationManager(config)
 
-    if args.enable_rt:
-        light = sim.add_light(
-            cfg=LightCfg(
-                uid="main_light",
-                color=(0.6, 0.6, 0.6),
-                intensity=30.0,
-                init_pos=(1.0, 0, 3.0),
-            )
+    light = sim.add_light(
+        cfg=LightCfg(
+            uid="main_light",
+            color=(0.6, 0.6, 0.6),
+            intensity=30.0,
+            init_pos=(1.0, 0, 3.0),
         )
+    )
 
     return sim
 
diff --git a/scripts/tutorials/sim/gizmo_robot.py b/scripts/tutorials/sim/gizmo_robot.py
index 1f314549..6d6613f9 100644
--- a/scripts/tutorials/sim/gizmo_robot.py
+++ b/scripts/tutorials/sim/gizmo_robot.py
@@ -23,7 +23,9 @@
 import argparse
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RobotCfg,
     URDFCfg,
     JointDrivePropertiesCfg,
@@ -41,18 +43,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--num_envs", type=int, default=1, help="Number of parallel environments"
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
-    parser.add_argument(
-        "--enable_rt",
-        action="store_true",
-        default=False,
-        help="Enable ray tracing for better visuals",
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -61,7 +52,7 @@ def main():
         height=1080,
         physics_dt=1.0 / 100.0,
         sim_device=args.device,
-        enable_rt=args.enable_rt,
+        render_cfg=RenderCfg(renderer=args.renderer),
     )
 
     sim = SimulationManager(sim_cfg)
diff --git a/scripts/tutorials/sim/import_usd.py b/scripts/tutorials/sim/import_usd.py
index 59dfac62..ada74edf 100644
--- a/scripts/tutorials/sim/import_usd.py
+++ b/scripts/tutorials/sim/import_usd.py
@@ -24,13 +24,14 @@
 import time
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.cfg import RigidBodyAttributesCfg
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim.cfg import RigidBodyAttributesCfg, RenderCfg
 from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
 from embodichain.lab.sim.objects import (
     RigidObject,
     RigidObjectCfg,
-    ArticulationCfg,
-    Articulation,
+    RobotCfg,
+    Robot,
 )
 from embodichain.data import get_data_path
 
@@ -42,15 +43,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create a simulation scene with SimulationManager"
     )
-    parser.add_argument(
-        "--headless",
-        action="store_true",
-        default=False,
-        help="Run simulation in headless mode",
-    )
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Simulation device (cuda or cpu)"
-    )
+    add_env_launcher_args_to_parser(parser)
     args = parser.parse_args()
 
     # Configure the simulation
@@ -60,7 +53,9 @@ def main():
         headless=True,
         physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
         sim_device=args.device,
-        enable_rt=True,  # Enable ray tracing for better visuals
+        render_cfg=RenderCfg(
+            renderer=args.renderer,
+        ),  # Enable ray tracing for better visuals
         num_envs=1,
         arena_space=3.0,
     )
@@ -98,12 +93,12 @@ def main():
     # Add objects to the scene
     h1_path = get_data_path("UnitreeH1Usd/H1_usd/h1.usd")
     print(f"Loading USD file from: {h1_path}")
-    h1: Articulation = sim.add_articulation(
-        cfg=ArticulationCfg(
+    h1: Robot = sim.add_robot(
+        cfg=RobotCfg(
             uid="h1",
             fpath=h1_path,
             build_pk_chain=False,
-            init_pos=[-0.2, -0.2, 1.0],
+            init_pos=[-0.2, -0.2, 1.05],
             use_usd_properties=False,
         )
     )
diff --git a/skills/add-atomic-action/SKILL.md b/skills/add-atomic-action/SKILL.md
new file mode 100644
index 00000000..9ae574a5
--- /dev/null
+++ b/skills/add-atomic-action/SKILL.md
@@ -0,0 +1,197 @@
+---
+name: add-atomic-action
+description: Use when adding a new observation, event, reward, action, dataset, or randomization functor to an EmbodiChain environment
+---
+
+# Add Atomic Action
+
+Scaffold a new atomic action following EmbodiChain's `ActionCfg` / `AtomicAction` pattern.
+
+## When to Use
+
+- User asks to add a new motion primitive (push, wipe, insert, hand-over, …)
+- User says "add a new atomic action", "create a custom action", "implement a push action"
+- User wants to extend `AtomicActionEngine` with a behaviour not covered by the built-ins
+
+## Key Files
+
+| Purpose | Path |
+|---------|------|
+| Base classes (`ActionCfg`, `AtomicAction`, `ObjectSemantics`) | `embodichain/lab/sim/atomic_actions/core.py` |
+| Built-in actions (reference implementations) | `embodichain/lab/sim/atomic_actions/actions.py` |
+| Engine + global registry (`register_action`) | `embodichain/lab/sim/atomic_actions/engine.py` |
+| Public API exports | `embodichain/lab/sim/atomic_actions/__init__.py` |
+| Reference docs | `docs/source/overview/sim/atomic_actions.md` |
+
+## Steps
+
+### 1. Define the config
+
+Add a `@configclass`-decorated class that extends `ActionCfg` (or `MoveActionCfg` /
+`GraspActionCfg` if the new action reuses arm/gripper fields).
+
+Place it in `embodichain/lab/sim/atomic_actions/actions.py` alongside the existing configs,
+or in a new file if the action is large.
+
+```python
+from embodichain.utils import configclass
+from embodichain.lab.sim.atomic_actions.core import ActionCfg   # or MoveActionCfg
+
+@configclass
+class PushActionCfg(ActionCfg):
+    name: str = "push"                # must match the registry key
+    push_distance: float = 0.05       # metres to push forward
+    push_speed: int = 30              # waypoints for the push phase
+    control_part: str = "arm"         # robot segment to control
+```
+
+**Rules:**
+- `name` must be unique and match the string passed to `register_action`.
+- Inherit from `GraspActionCfg` when the action needs hand open/close fields.
+- All fields must have defaults — configs are instantiated without arguments in tests.
+
+### 2. Implement the action class
+
+Subclass `AtomicAction` and implement the two abstract methods.
+
+```python
+import torch
+from typing import Optional, Union
+from embodichain.lab.sim.atomic_actions.core import AtomicAction, ObjectSemantics
+
+class PushAction(AtomicAction):
+    """Push an object forward by a fixed distance."""
+
+    def __init__(self, motion_generator, cfg: PushActionCfg | None = None):
+        super().__init__(motion_generator, cfg=cfg or PushActionCfg())
+        self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
+
+    # ------------------------------------------------------------------
+    def execute(
+        self,
+        target: Union[torch.Tensor, ObjectSemantics],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list]:
+        """Plan the push motion and return a joint trajectory.
+
+        Args:
+            target: EEF pose tensor (n_envs, 4, 4) or ObjectSemantics.
+            start_qpos: Starting joint positions (n_envs, dof). Uses current
+                robot state when None.
+
+        Returns:
+            Tuple of (is_success, trajectory, joint_ids) where
+            trajectory has shape (n_envs, n_waypoints, len(joint_ids)).
+        """
+        # 1. Resolve target pose
+        # 2. Plan trajectory with self.motion_generator
+        # 3. Return result
+        return is_success, trajectory, self.arm_joint_ids
+
+    # ------------------------------------------------------------------
+    def validate(
+        self,
+        target: Union[torch.Tensor, ObjectSemantics],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> bool:
+        """Fast feasibility check — no trajectory generated.
+
+        Returns:
+            True if the action can be attempted.
+        """
+        return True  # add IK reachability check here if needed
+```
+
+**Rules:**
+- `execute()` must always return `(is_success: bool, trajectory: Tensor, joint_ids: list)`.
+- `trajectory` shape: `(n_envs, n_waypoints, len(joint_ids))`.
+- `joint_ids` tells the engine which DOF columns the trajectory covers.
+- `validate()` must be cheap — no motion planning allowed.
+- Call `super().__init__()` — it sets `self.robot`, `self.motion_generator`, and `self.cfg`.
+
+### 3. Register the action
+
+Register the new class so `AtomicActionEngine` can discover it by name.
+
+**Option A — register at module load (built-ins style)**
+
+In `embodichain/lab/sim/atomic_actions/engine.py`, add to the `_builtin_action_map` dict:
+
+```python
+_builtin_action_map: dict[str, type[AtomicAction]] = {
+    "move":   MoveAction,
+    "pickup": PickUpAction,
+    "place":  PlaceAction,
+    "push":   PushAction,   # ← add here
+}
+```
+
+**Option B — register at runtime (custom / plugin style)**
+
+```python
+from embodichain.lab.sim.atomic_actions import register_action
+register_action("push", PushAction)
+```
+
+### 4. Export from the public API
+
+Add config and action class to `embodichain/lab/sim/atomic_actions/__init__.py`:
+
+```python
+from .actions import PushAction, PushActionCfg
+
+__all__ = [
+    ...,
+    "PushAction",
+    "PushActionCfg",
+]
+```
+
+### 5. Update the supported actions table
+
+Add a row to the table in `docs/source/overview/sim/atomic_actions.md` under
+"Supported Actions":
+
+```markdown
+| `PushAction` | `PushActionCfg` | `Tensor (4,4)` — contact pose | Approach → push forward |
+```
+
+### 6. Write a test
+
+Add a test in `tests/sim/atomic_actions/` (append to an existing file or create a new one):
+
+```python
+def test_push_action_cfg_defaults():
+    cfg = PushActionCfg()
+    assert cfg.name == "push"
+    assert cfg.push_distance == 0.05
+
+def test_push_action_validate(mock_motion_generator):
+    action = PushAction(mock_motion_generator)
+    assert action.validate(target=torch.eye(4)) is True
+```
+
+## Common Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| `name` in config doesn't match registry key | Keep `cfg.name` identical to the string in `register_action("push", ...)` |
+| Returning `trajectory` without `joint_ids` | Always return the 3-tuple `(bool, Tensor, list)` |
+| `trajectory` shape `(n_envs, dof, n_waypoints)` | Correct shape is `(n_envs, n_waypoints, dof)` |
+| Doing motion planning inside `validate()` | `validate()` must be fast — IK check only |
+| Not calling `super().__init__()` | Required to set `self.robot`, `self.motion_generator`, `self.cfg` |
+| Inheriting `MoveActionCfg` instead of `ActionCfg` | Use `MoveActionCfg` only when the action reuses arm-control fields; otherwise use `ActionCfg` |
+| Forgetting to export from `__init__.py` | Users import from the public API — missing exports cause `ImportError` |
+
+## Quick Reference
+
+| Step | Action |
+|------|--------|
+| 1 | Define `@configclass` config extending `ActionCfg` with `name` field |
+| 2 | Subclass `AtomicAction`, implement `execute()` and `validate()` |
+| 3 | Register: add to `_builtin_action_map` or call `register_action()` |
+| 4 | Export from `__init__.py` |
+| 5 | Add row to supported-actions table in overview docs |
+| 6 | Write tests for config defaults and `validate()` |
diff --git a/skills/add-functor/SKILL.md b/skills/add-functor/SKILL.md
new file mode 100644
index 00000000..6133d435
--- /dev/null
+++ b/skills/add-functor/SKILL.md
@@ -0,0 +1,156 @@
+---
+name: add-functor
+description: Use when adding a new observation, event, reward, action, dataset, or randomization functor to an EmbodiChain environment
+---
+
+# Add Functor
+
+Scaffold a new functor following EmbodiChain's Functor/FunctorCfg pattern.
+
+## When to Use
+
+- User asks to add an observation term, reward function, event handler, action term, dataset functor, or randomizer
+- User says "add a reward", "new observation", "create a randomizer", "add event functor"
+- Any new function needs to be registered in a manager config
+
+## Determine Functor Type
+
+| Functor Type | Config Class | Module File | Manager | Signature |
+|-------------|-------------|-------------|---------|-----------|
+| Observation | `ObservationCfg` (extends `FunctorCfg`) | `managers/observations.py` | `ObservationManager` | `(env, obs, entity_cfg, ...) -> Tensor` |
+| Reward | `RewardCfg` (extends `FunctorCfg`) | `managers/rewards.py` | `RewardManager` | `(env, obs, action, info, ...) -> Tensor` |
+| Event | `EventCfg` (extends `FunctorCfg`) | `managers/events.py` | `EventManager` | `(env, env_ids, ...) -> None` |
+| Action | `ActionTermCfg` (extends `FunctorCfg`) | `managers/actions.py` | `ActionManager` | Varies |
+| Dataset | `DatasetFunctorCfg` (extends `FunctorCfg`) | `managers/datasets.py` | `DatasetManager` | `(env, ...) -> dict` |
+| Randomization | `EventCfg` (randomizations ARE events) | `managers/randomization/<type>.py` | `EventManager` | `(env, env_ids, entity_cfg, ...) -> None` |
+
+## Two Functor Styles
+
+### Function-style (Preferred for Simple Functors)
+
+A plain function with the right signature. Registered via `FunctorCfg(func=my_function, params={...})`.
+
+```python
+def my_reward(
+    env: EmbodiedEnv,
+    obs: dict,
+    action: EnvAction,
+    info: dict,
+    my_param: float = 1.0,       # params become keyword args
+) -> torch.Tensor:
+    """Short one-line summary.
+
+    Longer description if needed.
+
+    Args:
+        env: The environment instance.
+        obs: The observation dictionary.
+        action: The action taken.
+        info: The info dictionary.
+        my_param: Description of this parameter.
+
+    Returns:
+        Reward tensor of shape (num_envs,).
+    """
+    # implementation
+    return result
+```
+
+### Class-style (Required When Functor Has State)
+
+A class inheriting `Functor`, with `__init__(cfg, env)` and `__call__(env, ...)`. Registered via `FunctorCfg(func=MyClass, params={...})`.
+
+```python
+class my_randomizer(Functor):
+    """One-line summary."""
+
+    def __init__(self, cfg: FunctorCfg, env: EmbodiedEnv):
+        super().__init__(cfg, env)
+        # Extract params and initialize state
+        self.entity_cfg: SceneEntityCfg = cfg.params["entity_cfg"]
+
+    def __call__(self, env: EmbodiedEnv, env_ids: torch.Tensor, **kwargs):
+        """Apply the randomization.
+
+        Args:
+            env: The environment instance.
+            env_ids: Target environment IDs.
+        """
+        # implementation
+```
+
+## Steps
+
+### 1. Identify Functor Type and Style
+
+Ask the user:
+1. **Which manager?** (observation / reward / event / action / dataset / randomization)
+2. **Function or class style?** (function for stateless, class for stateful)
+3. **What does it do?** (brief description for naming + docstring)
+
+### 2. Choose the Right Module File
+
+Place the functor in the existing module for its type:
+
+| Type | File |
+|------|------|
+| Observation | `embodichain/lab/gym/envs/managers/observations.py` |
+| Reward | `embodichain/lab/gym/envs/managers/rewards.py` |
+| Event | `embodichain/lab/gym/envs/managers/events.py` |
+| Action | `embodichain/lab/gym/envs/managers/actions.py` |
+| Dataset | `embodichain/lab/gym/envs/managers/datasets.py` |
+| Physics randomization | `embodichain/lab/gym/envs/managers/randomization/physics.py` |
+| Visual randomization | `embodichain/lab/gym/envs/managers/randomization/visual.py` |
+| Spatial randomization | `embodichain/lab/gym/envs/managers/randomization/spatial.py` |
+| Geometry randomization | `embodichain/lab/gym/envs/managers/randomization/geometry.py` |
+
+### 3. Write the Functor
+
+Follow the template for function-style or class-style (see above).
+
+Key rules:
+- First argument is always `env: EmbodiedEnv` (use `TYPE_CHECKING` guard for the import)
+- Use `from __future__ import annotations` at the top
+- Use `SceneEntityCfg` for entity references, not raw strings
+- For observation functors: add `shape` key to `FunctorCfg.extra` dict
+- For randomization functors: second arg is `env_ids: torch.Tensor | list[int]`
+- For reward functors: return shape must be `(num_envs,)`
+
+### 4. Update `__all__`
+
+Add the new functor to the module's `__all__` list. If no `__all__` exists, create one.
+
+### 5. Write a Test
+
+Place at `tests/gym/envs/managers/test_<functor_type>.py` (append to existing file if present).
+
+For functors that don't need a live simulation, use mock objects (`MockEnv`, `MockSim`, etc.) following the pattern in `tests/gym/envs/managers/test_reward_functors.py`.
+
+### 6. Run `black`
+
+```bash
+black embodichain/lab/gym/envs/managers/<module>.py
+black tests/gym/envs/managers/test_<functor_type>.py
+```
+
+## Common Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| Wrong first argument signature | Observation: `(env, obs, ...)`, Reward: `(env, obs, action, info, ...)`, Event/Randomization: `(env, env_ids, ...)` |
+| Importing `EmbodiedEnv` at module level | Use `TYPE_CHECKING` guard to avoid circular imports |
+| Forgetting `SceneEntityCfg` for entity refs | Always use `SceneEntityCfg(uid="...")` not bare strings |
+| Returning wrong tensor shape | Rewards must return `(num_envs,)`, observations must match declared shape |
+| Missing `from __future__ import annotations` | Required in every file |
+| Class-style functor not calling `super().__init__` | Always call `super().__init__(cfg, env)` |
+| Adding randomizer as standalone | Randomizations ARE events — they go in `randomization/` but use `EventCfg` |
+
+## Quick Reference
+
+| Step | Action |
+|------|--------|
+| 1 | Identify manager type + function vs class style |
+| 2 | Write functor in the correct module file |
+| 3 | Update `__all__` in that module |
+| 4 | Write test with mocks (no sim needed for most) |
+| 5 | Run `black` on changed files |
diff --git a/skills/add-task-env/SKILL.md b/skills/add-task-env/SKILL.md
new file mode 100644
index 00000000..b6092cfc
--- /dev/null
+++ b/skills/add-task-env/SKILL.md
@@ -0,0 +1,107 @@
+---
+name: add-task-env
+description: Use when creating a new task environment for EmbodiChain, including expert demonstration tasks, RL tasks or any EmbodiedEnv subclass
+---
+
+# Add Task Environment
+
+Scaffold a new task environment following EmbodiChain's conventions and patterns.
+
+## When to Use
+
+- User asks to create a new task or environment
+- User says "add a task", "new env", "create environment for X"
+
+## Steps
+
+### 1. Determine Task Category
+
+Ask the user:
+
+- **Category**: `tableware`, `rl`, or `special` (maps to `embodichain/lab/gym/envs/tasks/<category>/`)
+- **Task name** (snake_case, e.g. `pick_place`)
+- **Gym ID** (e.g. `PickPlace-v1`)
+- **Task type**: RL task (needs reward functors) or expert demonstration task (needs `create_demo_action_list`)
+
+### 2. Create the Task File
+
+Place at `embodichain/lab/gym/envs/tasks/<category>/<name>.py`.
+
+Template:
+
+```python
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import torch
+from typing import Dict, Any, Tuple
+
+from embodichain.lab.gym.utils.registration import register_env
+from embodichain.lab.gym.envs import EmbodiedEnv, EmbodiedEnvCfg
+from embodichain.lab.sim.types import EnvObs
+
+__all__ = ["<CamelCaseName>Env"]
+
+
+@register_env("<GymId>")
+class <CamelCaseName>Env(EmbodiedEnv):
+    """<One-line description of the task>.
+
+    <Longer description of what the task involves and its reward structure.>
+    """
+
+    def __init__(self, cfg: EmbodiedEnvCfg = None, **kwargs):
+        if cfg is None:
+            cfg = EmbodiedEnvCfg()
+        super().__init__(cfg, **kwargs)
+
+    # Expert demo tasks: implement `create_demo_action_list`.
+    # RL tasks: implement `check_truncated`, `get_reward`, `compute_task_state`.
+```
+
+### 3. Update Exports
+
+Add to `embodichain/lab/gym/envs/tasks/__init__.py`:
+
+```python
+from embodichain.lab.gym.envs.tasks.<category>.<name> import <CamelCaseName>Env
+```
+
+Add `"<CamelCaseName>Env"` to the `__all__` list.
+
+### 4. Create Test Stub
+
+Place at `tests/gym/envs/tasks/test_<name>.py`.
+
+### 5. Format
+
+```bash
+black embodichain/lab/gym/envs/tasks/<category>/<name>.py
+black tests/gym/envs/tasks/test_<name>.py
+```
+
+## Checklist
+
+- [ ] File has Apache 2.0 header
+- [ ] Uses `from __future__ import annotations`
+- [ ] `@register_env` decorator with unique gym ID
+- [ ] `__all__` defined in the task module
+- [ ] Default `cfg = EmbodiedEnvCfg()` in `__init__`
+- [ ] Import and `__all__` added to `tasks/__init__.py`
+- [ ] Test stub created
+- [ ] `black` run on both files
diff --git a/skills/add-test/SKILL.md b/skills/add-test/SKILL.md
new file mode 100644
index 00000000..d780154c
--- /dev/null
+++ b/skills/add-test/SKILL.md
@@ -0,0 +1,246 @@
+---
+name: add-test
+description: Use when writing tests for EmbodiChain modules, including observation functors, reward functors, solvers, sensors, environments, or any Python module
+---
+
+# Add Test
+
+Write tests following EmbodiChain's conventions and patterns.
+
+## When to Use
+
+- User asks to "add a test", "write tests for X", "test this module"
+- A new public module or function needs test coverage
+- PR checklist requires tests
+
+## Test File Location
+
+Tests mirror the source tree under `tests/`:
+
+```
+embodichain/lab/sim/solvers/pytorch_solver.py  →  tests/sim/solvers/test_pytorch_solver.py
+embodichain/lab/gym/envs/managers/rewards.py    →  tests/gym/envs/managers/test_reward_functors.py
+embodichain/toolkits/graspkit/pg_grasp/foo.py   →  tests/toolkits/test_pg_grasp.py
+embodichain/lab/gym/envs/tasks/rl/push_cube.py  →  tests/gym/envs/tasks/test_push_cube.py
+```
+
+Rules:
+- File name: `test_<module>.py`
+- Directory path mirrors `embodichain/` structure under `tests/`
+- Create `__init__.py` files in new `tests/` subdirectories if needed
+
+## Two Test Styles
+
+### pytest Style — For Pure-Python Logic (No Sim)
+
+Use when: testing functors, utility functions, pure math, config validation — anything that doesn't need a `SimulationManager`.
+
+```python
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# ...
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from embodichain.my_module import my_function
+
+
+def test_expected_output():
+    result = my_function(input_value)
+    assert result == expected_value
+
+
+def test_edge_case():
+    result = my_function(edge_input)
+    assert result is not None
+```
+
+### Class Style — For Sim-Dependent or Ordered Tests
+
+Use when: tests need `SimulationManager`, GPU setup, or must run in a specific order. Share state via `setup_method`/`teardown_method`.
+
+```python
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# ...
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+
+
+class TestMySimComponent:
+    def setup_method(self):
+        config = SimulationManagerCfg(headless=True, sim_device="cpu")
+        self.sim = SimulationManager(config)
+        # ... setup ...
+
+    def teardown_method(self):
+        self.sim.destroy()
+
+    def test_basic_behavior(self):
+        result = self.sim.do_something()
+        assert result == expected_result
+
+    def test_raises_on_bad_input(self):
+        with pytest.raises(ValueError):
+            self.sim.do_something(bad_input)
+```
+
+## Mocking Patterns for Functor Tests
+
+Most functor tests don't need a live simulation. Use mock objects following the pattern in `tests/gym/envs/managers/test_reward_functors.py`:
+
+```python
+from unittest.mock import MagicMock, Mock
+
+
+class MockSim:
+    """Mock simulation for functor tests."""
+
+    def __init__(self, num_envs: int = 4):
+        self.num_envs = num_envs
+        self.device = torch.device("cpu")
+        self._rigid_objects: dict = {}
+
+    def get_rigid_object(self, uid: str):
+        return self._rigid_objects.get(uid)
+
+    def add_rigid_object(self, obj):
+        self._rigid_objects[obj.uid] = obj
+
+
+class MockEnv:
+    """Mock environment for functor tests."""
+
+    def __init__(self, num_envs: int = 4):
+        self.num_envs = num_envs
+        self.device = torch.device("cpu")
+        self.sim = MockSim(num_envs)
+```
+
+Key points for mock objects:
+- Set `num_envs` and `device` attributes (functors use these)
+- Mock only the sim methods the functor actually calls
+- Use `MagicMock(uid="...")` for `SceneEntityCfg` parameters
+
+## Steps
+
+### 1. Identify What to Test
+
+Ask the user:
+1. **Which module/function?** — determines file path
+2. **Does it need a live simulation?** — determines test style
+3. **Key behaviors to verify** — happy path, edge cases, error cases
+
+### 2. Determine Test File Path
+
+Map the source path to test path:
+
+```
+embodichain/<subpath>/<module>.py  →  tests/<subpath>/test_<module>.py
+```
+
+Check if the test file already exists — append new test classes/functions if so.
+
+### 3. Choose Test Style
+
+```dot
+digraph test_style {
+    rankdir=LR;
+    "Needs SimulationManager?" -> "Class style" [label="yes"];
+    "Needs SimulationManager?" -> "pytest style" [label="no"];
+    "Tests share state/order?" -> "Class style" [label="yes"];
+    "Tests share state/order?" -> "pytest style" [label="no"];
+}
+```
+
+### 4. Write the Test
+
+Use the appropriate template (pytest or class style above).
+
+Rules:
+- **Apache 2.0 header** — required on every test file
+- **`from __future__ import annotations`** — after header, before imports
+- **No magic numbers** — define expected values as named constants or comment their origin
+- **Test function names** — `test_<scenario>` (descriptive, not just `test_foo`)
+- **One assertion concept per test** — don't bundle unrelated checks
+
+### 5. Add `if __name__ == "__main__"` Block
+
+Include this for tests that support optional visual/interactive debugging:
+
+```python
+if __name__ == "__main__":
+    # For visual debugging: set is_visual=True when calling env methods
+    test_obj = TestMyComponent()
+    test_obj.setup_method()
+    # ... manually run test logic ...
+```
+
+### 6. Run the Test
+
+```bash
+# Single file
+pytest tests/<subpath>/test_<module>.py -v
+
+# Single test function
+pytest tests/<subpath>/test_<module>.py::test_expected_output -v
+
+# Single test class method
+pytest tests/<subpath>/test_<module>.py::TestMyClass::test_basic_behavior -v
+```
+
+### 7. Run `black`
+
+```bash
+black tests/<subpath>/test_<module>.py
+```
+
+## Conventions Summary
+
+| Convention | Rule |
+|-----------|------|
+| File header | Apache 2.0 copyright block (same 15 lines as source) |
+| File naming | `test_<module>.py` |
+| Function naming | `test_<scenario>` |
+| `from __future__` | Required after header |
+| Magic numbers | Define as named constants with explanatory comments |
+| Simulation tests | Initialize/teardown in `setup_method`/`teardown_method` |
+| Pure-logic tests | Use mock objects, no real sim |
+| `SceneEntityCfg` | Use `MagicMock(uid="...")` in tests |
+| Assertions | `assert`, `pytest.approx`, `torch.allclose`, `pytest.raises` |
+| Entry block | `if __name__ == "__main__"` for visual debugging support |
+
+## Common Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| Missing Apache header on test file | Copy the 15-line copyright block |
+| Using real `SimulationManager` for functor tests | Use `MockEnv`/`MockSim` — much faster, no GPU needed |
+| Hardcoded numbers without explanation | Define as `EXPECTED_DISTANCE = 0.5  # cube at origin, target at (0.5, 0, 0)` |
+| Testing multiple concepts in one function | Split into separate `test_<scenario>` functions |
+| Forgetting `teardown_method` | Always call `self.sim.destroy()` in teardown |
+| Not running `black` on test file | CI checks all files including tests |
+
+## Quick Reference
+
+| Action | Command |
+|--------|---------|
+| Run all tests | `pytest tests/` |
+| Run single file | `pytest tests/<path>/test_<name>.py -v` |
+| Run single test | `pytest tests/<path>::test_<name> -v` |
+| Run with print output | `pytest -s tests/<path>/test_<name>.py` |
+| Format | `black tests/<path>/test_<name>.py` |
diff --git a/skills/benchmark/SKILL.md b/skills/benchmark/SKILL.md
new file mode 100644
index 00000000..e95ffe05
--- /dev/null
+++ b/skills/benchmark/SKILL.md
@@ -0,0 +1,479 @@
+---
+name: benchmark
+description: Write benchmark scripts for EmbodiChain modules following project conventions
+---
+
+# EmbodiChain Benchmark Script Writer
+
+This skill guides you through writing well-structured benchmark scripts for EmbodiChain modules, covering performance measurement of solvers, samplers, metrics, and other computationally intensive components.
+
+## Usage
+
+Invoke this skill when:
+- A user asks to write or extend a benchmark script for any EmbodiChain module
+- Comparing CPU vs GPU implementations (e.g., Warp CUDA vs pure-Python)
+- Measuring throughput of samplers, metrics, FK/IK solvers, or data pipelines
+- The file path contains `scripts/benchmark/` or the word "benchmark" appears in the request
+
+## Key Conventions
+
+### File Location
+
+Place benchmark scripts under:
+
+```
+scripts/benchmark/<domain>/<benchmark_name>.py
+```
+
+Examples:
+- `scripts/benchmark/robotics/kinematic_solver/opw_solver.py`
+- `scripts/benchmark/workspace_analyzer/benchmark_workspace_analyzer.py`
+
+### File Header
+
+Every benchmark file **must** begin with the Apache 2.0 copyright header followed by a module-level docstring:
+
+```python
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""One-line summary of what this benchmark measures.
+
+Longer description of the optimizations or comparisons being evaluated.
+Run: python -m scripts.benchmark.<domain>.<benchmark_name>
+"""
+```
+
+---
+
+## Steps
+
+### 1. Identify What to Benchmark
+
+Ask yourself:
+- **What implementations are being compared?** (e.g., Warp CUDA vs. CPU, vectorized vs. loop-based)
+- **What is the primary metric?** (wall-clock time, mean error, throughput)
+- **What sample sizes cover realistic usage?** Typically: `[100, 1000, 10000, 100000]`
+
+### 2. Structure the Script
+
+Use one helper function per concern, then a single orchestrator:
+
+```
+benchmark_<component_a>()   # e.g., benchmark_halton_sampler()
+benchmark_<component_b>()   # e.g., benchmark_density_metric()
+...
+run_all_benchmarks()        # calls all of the above + prints header/footer
+```
+
+### 3. Write Individual Benchmark Functions
+
+Each benchmark function follows this pattern:
+
+```python
+def benchmark_<name>():
+    """One-line description of what is being measured."""
+    from embodichain.<module.path> import SomeClass, SomeCfg
+
+    # --- Setup (not timed) ---
+    cfg = SomeCfg(...)
+    obj = cfg.init_solver(...)  # or SomeClass(cfg)
+
+    print("\n=== <Name> Benchmark ===")
+    for n in [100, 1000, 10000, 100000]:
+        # Prepare inputs (not timed)
+        inputs = ...
+
+        # --- Timed block ---
+        start = time.perf_counter()
+        result = obj.compute(inputs)       # or obj.get_ik(...) etc.
+        elapsed = time.perf_counter() - start
+
+        print(f"  n={n:>7d}: {elapsed*1000:>10.2f} ms (...)")
+```
+
+Key rules:
+- Use `time.perf_counter()` for high-resolution wall-clock timing, **not** `time.time()`.
+- Only time the core computation — exclude setup, data preparation, and print statements.
+- Print results in milliseconds (`elapsed * 1000`) with consistent column alignment using `>` format specs.
+
+> **Exception**: When benchmarking GPU (Warp/CUDA) code alongside a CPU baseline, it is acceptable to use `time.time()` for coarser comparison timing, as seen in `opw_solver.py`. Prefer `time.perf_counter()` for CPU-only benchmarks.
+
+### 4. Comparing Two Implementations
+
+When the benchmark compares two backends (e.g., Warp CUDA vs. Python OPW):
+
+```python
+def check_<name>(solver_a, solver_b, n_samples=1000):
+    """Run both solvers and return timing + accuracy metrics."""
+    # shared input generation
+    qpos = ...
+
+    # --- Solver A (e.g., Warp CUDA) ---
+    start = time.time()
+    success_a, result_a = solver_a.get_ik(xpos, ...)
+    time_a = time.time() - start
+    t_err_a, r_err_a = get_poses_err(...)
+
+    # --- Solver B (e.g., CPU) ---
+    start = time.time()
+    success_b, result_b = solver_b.get_ik(xpos, ...)
+    time_b = time.time() - start
+    t_err_b, r_err_b = get_poses_err(...)
+
+    return time_a, t_err_a, r_err_a, time_b, t_err_b, r_err_b
+
+
+def benchmark_<name>():
+    cfg = ...
+    solver_a = cfg.init_solver(device=torch.device("cuda"), ...)
+    solver_b = cfg.init_solver(device=torch.device("cpu"),  ...)
+
+    for n in [100, 1000, 10000, 100000]:
+        time_a, t_err_a, r_err_a, time_b, t_err_b, r_err_b = check_<name>(
+            solver_a, solver_b, n_samples=n
+        )
+        print(f"**** Test over {n} samples:")
+        print(f"===Impl A time:  {time_a * 1000:.6f} ms")
+        print(f"   Translation mean error: {t_err_a * 1000:.6f} mm")
+        print(f"   Rotation mean error:    {r_err_a * 180 / np.pi:.6f} degrees")
+        print(f"===Impl B time:  {time_b * 1000:.6f} ms")
+        ...
+```
+
+### 5. Report Accuracy Alongside Speed
+
+For FK/IK solvers, always verify correctness by running FK on the IK output and measuring pose error:
+
+```python
+def get_pose_err(matrix_a: np.ndarray, matrix_b: np.ndarray) -> tuple[float, float]:
+    """Return (translation_error_m, rotation_error_rad)."""
+    t_err = np.linalg.norm(matrix_a[:3, 3] - matrix_b[:3, 3])
+    relative_rot = matrix_a[:3, :3].T @ matrix_b[:3, :3]
+    cos_angle = np.clip((np.trace(relative_rot) - 1) / 2.0, -1.0, 1.0)
+    r_err = np.arccos(cos_angle)
+    return t_err, r_err
+
+
+def get_poses_err(
+    matrix_a_list: list[np.ndarray], matrix_b_list: list[np.ndarray]
+) -> tuple[float, float]:
+    t_errs, r_errs = [], []
+    for a, b in zip(matrix_a_list, matrix_b_list):
+        t, r = get_pose_err(a, b)
+        t_errs.append(t)
+        r_errs.append(r)
+    return float(np.mean(t_errs)), float(np.mean(r_errs))
+```
+
+### 6. Handle Benchmarks That Require External Resources
+
+If a benchmark requires a live simulation, robot, or GPU device that may not be available, **skip gracefully** rather than raising an error:
+
+```python
+def benchmark_batch_fk():
+    """Benchmark batch FK (requires GPU robot setup)."""
+    print("\n=== Batch FK Benchmark (requires robot/simulation) ===")
+    print("  Skipped -- requires live SimulationManager and Robot.")
+    print("  To run manually, integrate with your robot setup:")
+    print("    analyzer.compute_workspace_points(joint_configs, batch_size=512)")
+```
+
+### 7. Write the Orchestrator
+
+```python
+def run_all_benchmarks():
+    """Run all benchmarks and print summary."""
+    print("=" * 60)
+    print("<Module Name> Performance Benchmarks")
+    print("=" * 60)
+
+    benchmark_component_a()
+    benchmark_component_b()
+    # ...
+
+    print("\n" + "=" * 60)
+    print("Benchmarks complete.")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    run_all_benchmarks()
+```
+
+### 8. Save Results to One Markdown Report (Required)
+
+Every benchmark script must write its final results to **one Markdown file** after execution.
+
+- Output directory recommendation: `outputs/benchmarks/`
+- File naming recommendation: `<benchmark_name>_<YYYYMMDD_HHMMSS>.md`
+- Requirement: output **exactly three Markdown tables** in the report
+    1. `Time & Memory` table (cost time + memory columns)
+    2. `Success & Other Metrics` table (success rate + quality/accuracy/extra metrics)
+    3. `Leaderboard` table (algorithm ranking by overall success rate, descending)
+- `Leaderboard` coverage rule: include **all algorithms evaluated in the current benchmark scope**. If a provided leaderboard artifact is incomplete, backfill missing algorithms from aggregate summaries before rendering.
+
+Use this pattern:
+
+```python
+from datetime import datetime
+from pathlib import Path
+
+
+def write_markdown_report(
+    benchmark_name: str,
+    perf_rows: list[dict[str, object]],
+    metric_rows: list[dict[str, object]],
+    leaderboard_rows: list[dict[str, object]],
+    notes: list[str] | None = None,
+) -> Path:
+    """Write benchmark results into a single markdown report file."""
+    output_dir = Path("outputs/benchmarks")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    report_path = output_dir / f"{benchmark_name}_{ts}.md"
+
+    lines: list[str] = [
+        f"# {benchmark_name} Benchmark Report",
+        "",
+        f"Generated at: {datetime.now().isoformat(timespec='seconds')}",
+        "",
+        "## Time & Memory",
+        "",
+    ]
+
+    if perf_rows:
+        perf_headers = list(perf_rows[0].keys())
+        lines.append("| " + " | ".join(perf_headers) + " |")
+        lines.append("| " + " | ".join(["---"] * len(perf_headers)) + " |")
+        for row in perf_rows:
+            lines.append("| " + " | ".join(str(row[h]) for h in perf_headers) + " |")
+    else:
+        lines.append("No time/memory rows were produced.")
+
+    lines.extend(["", "## Success & Other Metrics", ""])
+
+    if metric_rows:
+        metric_headers = list(metric_rows[0].keys())
+        lines.append("| " + " | ".join(metric_headers) + " |")
+        lines.append("| " + " | ".join(["---"] * len(metric_headers)) + " |")
+        for row in metric_rows:
+            lines.append(
+                "| " + " | ".join(str(row[h]) for h in metric_headers) + " |"
+            )
+    else:
+        lines.append("No success/metric rows were produced.")
+
+    lines.extend(["", "## Leaderboard", ""])
+
+    if leaderboard_rows:
+        leaderboard_headers = list(leaderboard_rows[0].keys())
+        lines.append("| " + " | ".join(leaderboard_headers) + " |")
+        lines.append("| " + " | ".join(["---"] * len(leaderboard_headers)) + " |")
+        for row in leaderboard_rows:
+            lines.append(
+                "| " + " | ".join(str(row[h]) for h in leaderboard_headers) + " |"
+            )
+    else:
+        lines.append("No leaderboard rows were produced.")
+
+    if notes:
+        lines.extend(["", "## Notes", ""])
+        lines.extend([f"- {note}" for note in notes])
+
+    report_path.write_text("\\n".join(lines) + "\\n", encoding="utf-8")
+    return report_path
+```
+
+And call it at the end of `run_all_benchmarks()`:
+
+```python
+def run_all_benchmarks() -> None:
+    perf_rows: list[dict[str, object]] = []
+    metric_rows: list[dict[str, object]] = []
+
+    perf_part, metric_part = benchmark_halton_sampler()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+    perf_part, metric_part = benchmark_density_metric()
+    perf_rows.extend(perf_part)
+    metric_rows.extend(metric_part)
+    # ...
+
+    leaderboard_rows = build_leaderboard_rows(metric_rows)
+    # `build_leaderboard_rows` should aggregate per algorithm and sort by
+    # overall success rate in descending order.
+
+    report_path = write_markdown_report(
+        benchmark_name="workspace_analyzer",
+        perf_rows=perf_rows,
+        metric_rows=metric_rows,
+        leaderboard_rows=leaderboard_rows,
+        notes=["CPU/GPU memory fields are deltas measured around timed calls."],
+    )
+    print(f"Markdown report saved: {report_path}")
+```
+
+---
+
+## Output Format Reference
+
+| Scenario | Print format |
+|----------|-------------|
+| Single implementation, many sizes | `n={n:>7d}: {elapsed*1000:>10.2f} ms \| CPU Δ={...:+.1f} MB  GPU Δ={...:+.1f} MB  peak GPU={...:.1f} MB` |
+| Two implementations compared | `===<Impl> time: {ms:.6f} ms` then error & memory lines indented 3 spaces |
+| Markdown report path | `Markdown report saved: outputs/benchmarks/<name>_<timestamp>.md` |
+| Markdown table 1 (Time & Memory) | `| sample_size | impl | cost_time_ms | cpu_delta_mb | gpu_delta_mb | peak_gpu_mb |` |
+| Markdown table 2 (Success & Metrics) | `| sample_size | impl | success_rate | translation_err_mm | rotation_err_deg | ... |` |
+| Markdown table 3 (Leaderboard) | `| rank | algorithm | overall_success_rate | ... |` (sorted by `overall_success_rate` descending) |
+| Section header | `\n=== <Name> Benchmark ===` |
+| Top-level separator | `"=" * 60` |
+
+---
+
+## Measuring Memory Usage
+
+Always measure **both GPU VRAM and CPU RAM** alongside wall-clock time. Use the helpers below.
+
+### GPU VRAM (via PyTorch CUDA)
+
+```python
+import torch
+
+def get_gpu_memory_mb() -> float:
+    """Return current GPU VRAM allocated by PyTorch in MB."""
+    if torch.cuda.is_available():
+        return torch.cuda.memory_allocated() / 1024 ** 2
+    return 0.0
+
+# Usage pattern inside a benchmark loop:
+torch.cuda.reset_peak_memory_stats()          # reset peak counter before timed block
+mem_before = get_gpu_memory_mb()
+
+start = time.perf_counter()
+result = obj.compute(inputs)
+elapsed = time.perf_counter() - start
+
+mem_after = get_gpu_memory_mb()
+peak_vram = torch.cuda.max_memory_allocated() / 1024 ** 2  # peak during timed block
+
+print(
+    f"  n={n:>7d}: {elapsed*1000:>10.2f} ms | "
+    f"VRAM delta={mem_after - mem_before:+.1f} MB  peak={peak_vram:.1f} MB"
+)
+```
+
+### CPU RAM (via `psutil`)
+
+```python
+import psutil, os
+
+def get_cpu_memory_mb() -> float:
+    """Return current process RSS (resident set size) in MB."""
+    process = psutil.Process(os.getpid())
+    return process.memory_info().rss / 1024 ** 2
+
+# Usage pattern:
+mem_before = get_cpu_memory_mb()
+
+start = time.perf_counter()
+result = obj.compute(inputs)
+elapsed = time.perf_counter() - start
+
+mem_after = get_cpu_memory_mb()
+
+print(
+    f"  n={n:>7d}: {elapsed*1000:>10.2f} ms | "
+    f"RAM delta={mem_after - mem_before:+.1f} MB"
+)
+```
+
+### Combined Helper (recommended)
+
+For benchmarks that use both CPU and GPU, combine into a single snapshot:
+
+```python
+import os, psutil, torch
+
+def memory_snapshot() -> dict:
+    """Return a dict with current CPU RSS and GPU allocated memory in MB."""
+    process = psutil.Process(os.getpid())
+    cpu_mb = process.memory_info().rss / 1024 ** 2
+    gpu_mb = torch.cuda.memory_allocated() / 1024 ** 2 if torch.cuda.is_available() else 0.0
+    return {"cpu_mb": cpu_mb, "gpu_mb": gpu_mb}
+
+# Usage:
+torch.cuda.reset_peak_memory_stats()
+before = memory_snapshot()
+
+start = time.perf_counter()
+result = obj.compute(inputs)
+elapsed = time.perf_counter() - start
+
+after = memory_snapshot()
+peak_gpu = torch.cuda.max_memory_allocated() / 1024 ** 2
+
+print(
+    f"  n={n:>7d}: {elapsed*1000:>10.2f} ms | "
+    f"CPU Δ={after['cpu_mb'] - before['cpu_mb']:+.1f} MB  "
+    f"GPU Δ={after['gpu_mb'] - before['gpu_mb']:+.1f} MB  peak GPU={peak_gpu:.1f} MB"
+)
+```
+
+> Add `psutil` to the project's dev-dependencies if not already present (`pip install psutil`).
+
+---
+
+## Common Imports
+
+```python
+import os
+import time
+import psutil
+import numpy as np
+import torch
+import warp as wp                          # only when GPU kernels are benchmarked
+from scipy.spatial.transform import Rotation  # only when needed
+from typing import Tuple, List             # or use built-in generics (Python ≥ 3.10)
+```
+
+---
+
+## Quick Checklist
+
+Before finishing a benchmark script:
+
+- [ ] Apache 2.0 copyright header is present
+- [ ] Module-level docstring with `Run:` line
+- [ ] Each function has a one-line docstring
+- [ ] Setup code is **outside** the timed block
+- [ ] Timing uses `time.perf_counter()` (or `time.time()` when comparing GPU/CPU coarsely)
+- [ ] CPU RAM measured with `psutil` (delta MB before/after timed block)
+- [ ] GPU VRAM measured with `torch.cuda.memory_allocated()` + `torch.cuda.max_memory_allocated()` (delta + peak)
+- [ ] `torch.cuda.reset_peak_memory_stats()` called before each timed block
+- [ ] Accuracy metrics reported alongside timing (for solver benchmarks)
+- [ ] Graceful skip for benchmarks that need unavailable hardware
+- [ ] `run_all_benchmarks()` orchestrator with formatted separators
+- [ ] Results are written to exactly one Markdown report file per run
+- [ ] Report contains exactly three Markdown tables: `Time & Memory`, `Success & Other Metrics`, and `Leaderboard`
+- [ ] `Time & Memory` table includes `cost_time_ms`, `cpu_delta_mb`, `gpu_delta_mb`, `peak_gpu_mb`
+- [ ] `Success & Other Metrics` table includes `success_rate` and domain-specific quality metrics
+- [ ] `Leaderboard` table ranks algorithms by overall success rate in descending order
+- [ ] `Leaderboard` table includes all benchmarked algorithms (missing entries are backfilled from aggregate summaries if needed)
+- [ ] Console log includes final report path
+- [ ] `if __name__ == "__main__":` entry point
+- [ ] `black .` formatting applied
diff --git a/.claude/skills/pr/SKILL.md b/skills/pr/SKILL.md
similarity index 83%
rename from .claude/skills/pr/SKILL.md
rename to skills/pr/SKILL.md
index 59c3d3b6..e31b1628 100644
--- a/.claude/skills/pr/SKILL.md
+++ b/skills/pr/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: pr
-description: Create a pull request for EmbodiChain following the project's PR template and conventions
+description: Create a pull request for EmbodiChain following the project's PR template and conventions, including selecting proper GitHub repository labels
 ---
 
 # EmbodiChain Pull Request Creator
@@ -99,6 +99,36 @@ Use the gh CLI with the proper PR template:
 gh pr create --title "<PR Title>" --body "<PR Body>"
 ```
 
+### 9. Select and Apply Labels
+
+After creating the PR, select proper labels from the repository label list and apply them.
+
+First, list available labels:
+
+```bash
+gh label list
+```
+
+Then choose labels based on change type and scope. Typical mapping:
+
+- Bug fix: `bug`
+- Enhancement: `enhancement`
+- New feature: `feature`
+- Documentation update: `docs`
+- Affected area labels when available (for example): `physics`, `robot`, `agent`, `dataset`, `dexsim`
+
+Apply labels to the PR:
+
+```bash
+gh pr edit <pr-number> --add-label "bug" --add-label "env"
+```
+
+If needed, remove incorrect labels:
+
+```bash
+gh pr edit <pr-number> --remove-label "<label-name>"
+```
+
 ## PR Template
 
 Use this template for the PR body:
@@ -161,6 +191,8 @@ Fixes #<issue-number>
 | `git checkout -b branch-name` | Create branch |
 | `git push -u origin branch` | Push to remote |
 | `gh pr create` | Create PR |
+| `gh label list` | List repository labels |
+| `gh pr edit <pr-number> --add-label ...` | Apply labels to PR |
 
 ## Notes
 
diff --git a/skills/pre-commit-check/SKILL.md b/skills/pre-commit-check/SKILL.md
new file mode 100644
index 00000000..41ec4d3d
--- /dev/null
+++ b/skills/pre-commit-check/SKILL.md
@@ -0,0 +1,158 @@
+---
+name: pre-commit-check
+description: Use before committing or creating a PR for EmbodiChain to verify code style, headers, annotations, exports, and docstrings pass CI checks
+---
+
+# Pre-Commit Check
+
+Run all local checks that the CI pipeline enforces, catching issues before pushing.
+
+## When to Use
+
+- Before creating a commit or PR
+- User says "check my changes", "pre-commit", "verify before commit", "ready to push"
+- After making any code changes to `.py` files
+
+## Steps
+
+### 1. Identify Changed Files
+
+```bash
+git diff --name-only HEAD
+git diff --name-only --cached
+git status --short
+```
+
+Collect all changed/added `.py` files.
+
+### 2. Run Black Formatting Check
+
+This is the **first CI gate** and will cause immediate failure:
+
+```bash
+black --check --diff --color ./
+```
+
+If it fails, run `black .` and review the formatting changes.
+
+### 3. Check Apache 2.0 Copyright Header
+
+Every `.py` file must begin with the 15-line copyright block. For each changed/new `.py` file, verify the first line is:
+
+```
+# ----------------------------------------------------------------------------
+```
+
+The full header template:
+
+```python
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+```
+
+### 4. Check `from __future__ import annotations`
+
+Every `.py` file must have this import (after the header, before other imports). This enables `A | B` syntax and forward references.
+
+### 5. Check `__all__` in Public Modules
+
+For any new or modified module under `embodichain/`, verify it defines `__all__` listing all public symbols. Example:
+
+```python
+__all__ = ["MyClass", "my_function"]
+```
+
+Skip this check for `__init__.py` files that only re-export via `from . import *`.
+
+### 6. Check Docstrings on Public APIs
+
+For any new public function, class, or method:
+- Must have a Google-style docstring
+- Must include `Args:` section if it takes parameters
+- Must include `Returns:` section if it returns a value
+- Use `.. attention::` or `.. tip::` directives for non-obvious behavior
+
+### 7. Check Type Annotations
+
+For any new public API:
+- All parameters must have type hints
+- Return type must be annotated
+- Use `A | B` over `Union[A, B]`
+- Use `TYPE_CHECKING` guard for imports that would cause circular dependencies
+
+### 8. Check `@configclass` Usage
+
+For any new configuration class:
+- Must use `@configclass` decorator (not bare `@dataclass`)
+- Must use `from dataclasses import MISSING` for required fields
+- Import from `embodichain.utils import configclass`
+
+### 9. Check Test Coverage
+
+For any new public module or function:
+- A corresponding test must exist at `tests/<subpackage>/test_<module>.py`
+- Test file must also have the Apache 2.0 header
+- Report if tests are missing
+
+### 10. Summary Report
+
+Output a pass/fail summary:
+
+```
+Pre-Commit Check Results
+========================
+[PASS] Black formatting
+[PASS] Apache 2.0 headers (5/5 files)
+[FAIL] from __future__ import annotations — missing in: foo.py
+[PASS] __all__ exports
+[PASS] Docstrings on public APIs
+[PASS] Type annotations
+[PASS] @configclass usage
+[WARN] Missing tests for: bar.py
+
+Fix the above issues before committing.
+```
+
+## What CI Checks
+
+The project's CI pipeline (`.github/workflows/main.yml`) runs:
+
+1. **lint** job: `black --check --diff --color ./`
+2. **test** job: `pytest tests`
+3. **build** job: Sphinx docs build
+
+This skill covers items 1 and 2 locally. Docs build is heavier and typically only needed for documentation changes.
+
+## Common Mistakes
+
+| Mistake | Fix |
+|---------|-----|
+| Running `black` on only one file | Run `black .` on the whole project — CI checks everything |
+| Forgetting test Apache header | Test files also need the 15-line copyright block |
+| Using `Union[A, B]` | Use `A \| B` (with `from __future__ import annotations`) |
+| Using bare `@dataclass` | Use `@configclass` from `embodichain.utils` |
+| Missing `__all__` in new module | Add `__all__` with all public symbols |
+
+## Quick Reference
+
+| Check | Command/Method |
+|-------|---------------|
+| Black formatting | `black --check --diff --color ./` |
+| Auto-fix formatting | `black .` |
+| Header check | Verify first line is `# ---...---` |
+| `__future__` import | Grep for `from __future__ import annotations` |
+| `__all__` export | Grep for `__all__` in module |
+| Run tests | `pytest tests/<path>` |
diff --git a/tests/agents/test_online_data.py b/tests/agents/test_online_data.py
index fb358b81..10c3c13e 100644
--- a/tests/agents/test_online_data.py
+++ b/tests/agents/test_online_data.py
@@ -111,14 +111,14 @@ def _make_fake_engine(
     engine.buffer_size = buffer_size
     engine.device = shared_buffer.device
 
-    # Interprocess primitives — use mp objects so the locking logic works.
+    # Interprocess primitives — use the same mp context consistently to avoid
     engine._mp_ctx = mp.get_context("spawn")
-    engine._lock_index = mp.Array("i", [lock_start, lock_end])
-    engine._fill_signal = mp.Event()
-    engine._init_signal = mp.Event()
+    engine._lock_index = engine._mp_ctx.Array("i", [lock_start, lock_end])
+    engine._fill_signal = engine._mp_ctx.Event()
+    engine._init_signal = engine._mp_ctx.Event()
     engine._init_signal.set()  # mark as initialised
-    engine._close_signal = mp.Event()
-    engine._sample_count = mp.Value("i", 0)
+    engine._close_signal = engine._mp_ctx.Event()
+    engine._sample_count = engine._mp_ctx.Value("i", 0)
 
     engine.start()
 
diff --git a/tests/agents/test_shared_rollout.py b/tests/agents/test_shared_rollout.py
index 37dd34fa..4701540f 100644
--- a/tests/agents/test_shared_rollout.py
+++ b/tests/agents/test_shared_rollout.py
@@ -21,6 +21,7 @@
 import torch
 from tensordict import TensorDict
 
+from embodichain.lab.sim.cfg import RenderCfg
 from embodichain.agents.rl.buffer import RolloutBuffer
 from embodichain.agents.rl.collector import SyncCollector
 from embodichain.agents.rl.utils import flatten_dict_observation
@@ -186,7 +187,7 @@ def test_embodied_env_writes_next_fields_into_external_rollout():
     env_cfg.sim_cfg = SimulationManagerCfg(
         headless=True,
         sim_device=torch.device("cpu"),
-        enable_rt=False,
+        render_cfg=RenderCfg(renderer="hybrid"),
         gpu_id=0,
     )
 
diff --git a/tests/benchmark/test_leaderboard.py b/tests/benchmark/test_leaderboard.py
new file mode 100644
index 00000000..4412d746
--- /dev/null
+++ b/tests/benchmark/test_leaderboard.py
@@ -0,0 +1,72 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from benchmark.rl.metrics import build_leaderboard
+
+
+def test_build_leaderboard_ranks_higher_success_first():
+    aggregate_results = [
+        {
+            "algorithm": "ppo",
+            "task": "cart_pole",
+            "final_success_rate_mean": 0.8,
+            "final_success_rate_stable_mean": 0.7,
+            "final_reward_mean": 10.0,
+            "steps_to_success_threshold_mean": 100.0,
+        },
+        {
+            "algorithm": "ppo",
+            "task": "push_cube",
+            "final_success_rate_mean": 0.6,
+            "final_success_rate_stable_mean": 0.5,
+            "final_reward_mean": 5.0,
+            "steps_to_success_threshold_mean": 200.0,
+        },
+        {
+            "algorithm": "grpo",
+            "task": "cart_pole",
+            "final_success_rate_mean": 0.7,
+            "final_success_rate_stable_mean": 0.8,
+            "final_reward_mean": 8.0,
+            "steps_to_success_threshold_mean": 150.0,
+        },
+        {
+            "algorithm": "grpo",
+            "task": "push_cube",
+            "final_success_rate_mean": 0.5,
+            "final_success_rate_stable_mean": 0.7,
+            "final_reward_mean": 4.0,
+            "steps_to_success_threshold_mean": 250.0,
+        },
+    ]
+    run_results = [
+        {"algorithm": "ppo", "final_success_rate": 0.8},
+        {"algorithm": "ppo", "final_success_rate": 0.6},
+        {"algorithm": "grpo", "final_success_rate": 0.7},
+        {"algorithm": "grpo", "final_success_rate": 0.5},
+    ]
+
+    leaderboard = build_leaderboard(aggregate_results, run_results=run_results)
+
+    assert leaderboard[0]["algorithm"] == "grpo"
+    assert leaderboard[0]["rank"] == 1
+    assert "avg_success_rate_stable" in leaderboard[0]
+    assert "steps_to_success_threshold" in leaderboard[0]
+    assert "success_rate_std" in leaderboard[0]
+    assert "tasks" in leaderboard[0]
+    assert leaderboard[1]["algorithm"] == "ppo"
diff --git a/tests/benchmark/test_metrics.py b/tests/benchmark/test_metrics.py
new file mode 100644
index 00000000..2d4d163b
--- /dev/null
+++ b/tests/benchmark/test_metrics.py
@@ -0,0 +1,108 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from benchmark.rl.metrics import (
+    aggregate_runs,
+    compute_final_metric_stable,
+    compute_steps_to_threshold_first_hit,
+    compute_steps_to_threshold_sustained,
+)
+
+
+def test_compute_steps_to_threshold_first_hit_returns_first_matching_step():
+    eval_history = [
+        {"global_step": 128.0, "eval/success_rate": 0.2},
+        {"global_step": 256.0, "eval/success_rate": 0.75},
+        {"global_step": 384.0, "eval/success_rate": 0.81},
+    ]
+
+    assert (
+        compute_steps_to_threshold_first_hit(eval_history, "eval/success_rate", 0.8)
+        == 384
+    )
+
+
+def test_compute_steps_to_threshold_sustained_requires_consecutive_hits():
+    eval_history = [
+        {"global_step": 100.0, "eval/success_rate": 0.81},
+        {"global_step": 200.0, "eval/success_rate": 0.70},
+        {"global_step": 300.0, "eval/success_rate": 0.82},
+        {"global_step": 400.0, "eval/success_rate": 0.84},
+        {"global_step": 500.0, "eval/success_rate": 0.83},
+    ]
+
+    assert (
+        compute_steps_to_threshold_sustained(
+            eval_history, "eval/success_rate", 0.8, sustain_count=3
+        )
+        == 300
+    )
+
+
+def test_compute_final_metric_stable_uses_last_window():
+    eval_history = [
+        {"global_step": 100.0, "eval/success_rate": 0.2},
+        {"global_step": 200.0, "eval/success_rate": 0.4},
+        {"global_step": 300.0, "eval/success_rate": 0.6},
+        {"global_step": 400.0, "eval/success_rate": 0.8},
+    ]
+
+    assert compute_final_metric_stable(eval_history, "eval/success_rate", 2) == 0.7
+
+
+def test_aggregate_runs_groups_by_task_and_algorithm():
+    run_results = [
+        {
+            "task": "cart_pole",
+            "algorithm": "ppo",
+            "seed": 0,
+            "final_reward": 1.0,
+            "final_success_rate": 0.9,
+            "final_success_rate_stable": 0.85,
+            "final_episode_length": 50.0,
+            "training_fps": 100.0,
+            "environment_fps": 500.0,
+            "peak_gpu_memory_mb": 0.0,
+            "steps_to_success_threshold": 1000,
+            "steps_to_success_threshold_first_hit": 800,
+        },
+        {
+            "task": "cart_pole",
+            "algorithm": "ppo",
+            "seed": 1,
+            "final_reward": 3.0,
+            "final_success_rate": 0.7,
+            "final_success_rate_stable": 0.75,
+            "final_episode_length": 40.0,
+            "training_fps": 200.0,
+            "environment_fps": 700.0,
+            "peak_gpu_memory_mb": 0.0,
+            "steps_to_success_threshold": 2000,
+            "steps_to_success_threshold_first_hit": 1200,
+        },
+    ]
+
+    summaries = aggregate_runs(run_results)
+
+    assert len(summaries) == 1
+    assert summaries[0]["task"] == "cart_pole"
+    assert summaries[0]["algorithm"] == "ppo"
+    assert summaries[0]["final_reward_mean"] == 2.0
+    assert summaries[0]["final_success_rate_stable_mean"] == 0.8
+    assert summaries[0]["steps_to_success_threshold_mean"] == 1500
+    assert summaries[0]["steps_to_success_threshold_first_hit_mean"] == 1000
diff --git a/tests/benchmark/test_plots.py b/tests/benchmark/test_plots.py
new file mode 100644
index 00000000..484da225
--- /dev/null
+++ b/tests/benchmark/test_plots.py
@@ -0,0 +1,67 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from benchmark.rl.plots import build_plot_artifacts
+
+
+def test_build_plot_artifacts_writes_svg_files(tmp_path):
+    run_results = [
+        {
+            "task": "cart_pole",
+            "algorithm": "ppo",
+            "eval_history": [
+                {
+                    "global_step": 100.0,
+                    "eval/success_rate": 0.2,
+                    "eval/avg_reward": 1.0,
+                },
+                {
+                    "global_step": 200.0,
+                    "eval/success_rate": 0.8,
+                    "eval/avg_reward": 2.0,
+                },
+            ],
+        },
+        {
+            "task": "cart_pole",
+            "algorithm": "grpo",
+            "eval_history": [
+                {
+                    "global_step": 100.0,
+                    "eval/success_rate": 0.1,
+                    "eval/avg_reward": 0.5,
+                },
+                {
+                    "global_step": 200.0,
+                    "eval/success_rate": 0.6,
+                    "eval/avg_reward": 1.5,
+                },
+            ],
+        },
+    ]
+    leaderboard = [
+        {"algorithm": "ppo", "score": 0.8},
+        {"algorithm": "grpo", "score": 0.6},
+    ]
+
+    artifacts = build_plot_artifacts(run_results, leaderboard, tmp_path)
+
+    assert "cart_pole_success_rate" in artifacts
+    assert "leaderboard_score" in artifacts
+    for path in artifacts.values():
+        assert path.endswith(".svg")
diff --git a/tests/benchmark/test_reporting.py b/tests/benchmark/test_reporting.py
new file mode 100644
index 00000000..55784b11
--- /dev/null
+++ b/tests/benchmark/test_reporting.py
@@ -0,0 +1,97 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from benchmark.rl.reporting import generate_markdown_report
+
+
+def test_generate_markdown_report_writes_expected_sections(tmp_path):
+    run_results = [
+        {
+            "task": "cart_pole",
+            "algorithm": "ppo",
+            "seed": 0,
+            "final_reward": 1.5,
+            "final_success_rate": 0.8,
+            "final_success_rate_stable": 0.75,
+            "steps_to_success_threshold": 256,
+            "steps_to_success_threshold_first_hit": 128,
+            "checkpoint_path": "outputs/checkpoint.pt",
+        }
+    ]
+    aggregate_results = [
+        {
+            "task": "cart_pole",
+            "algorithm": "ppo",
+            "num_runs": 1,
+            "final_reward_mean": 1.5,
+            "final_success_rate_mean": 0.8,
+            "final_success_rate_stable_mean": 0.75,
+            "final_success_rate_std": 0.1,
+            "training_fps_mean": 100.0,
+            "environment_fps_mean": 500.0,
+            "peak_gpu_memory_mb_mean": 0.0,
+            "steps_to_success_threshold_mean": 256.0,
+            "steps_to_success_threshold_first_hit_mean": 128.0,
+        },
+        {
+            "task": "cart_pole",
+            "algorithm": "grpo",
+            "num_runs": 1,
+            "final_reward_mean": 1.7,
+            "final_success_rate_mean": 0.85,
+            "final_success_rate_stable_mean": 0.8,
+            "final_success_rate_std": 0.05,
+            "training_fps_mean": 90.0,
+            "environment_fps_mean": 480.0,
+            "peak_gpu_memory_mb_mean": 0.0,
+            "steps_to_success_threshold_mean": 200.0,
+            "steps_to_success_threshold_first_hit_mean": 160.0,
+        },
+    ]
+    leaderboard = [
+        {
+            "rank": 1,
+            "algorithm": "ppo",
+            "score": 0.8,
+            "steps_to_success_threshold": 256.0,
+            "success_rate_std": 0.1,
+            "avg_success_rate": 0.8,
+            "avg_success_rate_stable": 0.75,
+            "avg_final_reward": 1.5,
+            "tasks_covered": 1,
+        }
+    ]
+    plot_artifacts = {"cart_pole_success_rate": str(tmp_path / "plot.svg")}
+    (tmp_path / "plot.svg").write_text("<svg></svg>", encoding="utf-8")
+
+    output_path = tmp_path / "benchmark_report.md"
+    generate_markdown_report(
+        run_results,
+        aggregate_results,
+        leaderboard,
+        plot_artifacts,
+        {"device": "cpu", "iterations": 10},
+        output_path,
+    )
+    report = output_path.read_text(encoding="utf-8")
+    assert "RL Benchmark Report" in report
+    assert "Benchmark Overview" in report
+    assert "Leaderboard" in report
+    assert "Plots" in report
+    assert "cart_pole" in report
+    assert "grpo" in report
diff --git a/tests/common.py b/tests/common.py
index 962d9f2d..bbbdc8fc 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -17,7 +17,6 @@
 from unittest import TestLoader
 from fnmatch import fnmatchcase
 
-
 __all__ = ["UnittestMetaclass", "OrderedTestLoader"]
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..d0824fd0
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,86 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+import os
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--renderer",
+        action="store",
+        default="hybrid",
+        help="Specify the renderer backend: hybrid, or fast-rt",
+    )
+
+
+def pytest_configure(config):
+    renderer = config.getoption("--renderer")
+    if renderer:
+        if renderer not in ["hybrid", "fast-rt"]:
+            pytest.exit(
+                f"Invalid renderer: {renderer}. Must be one of 'hybrid', 'fast-rt'"
+            )
+
+        # Override the global default renderer in the simulation config
+        from embodichain.lab.sim import cfg
+
+        cfg.DEFAULT_RENDERER = renderer
+
+        # PREVENT IMPLICIT INITIALIZATION BY EXPLICITLY INITIALIZING DEXSIM HERE
+        import dexsim
+        import dexsim.types
+
+        # Map string to dexsim configuration types
+        renderer_map = {
+            "hybrid": dexsim.types.Renderer.HYBRID,
+            "fast-rt": dexsim.types.Renderer.FASTRT,
+        }
+        backend_map = {
+            "hybrid": dexsim.types.Backend.VULKAN,
+            "fast-rt": dexsim.types.Backend.VULKAN,
+        }
+
+        if dexsim.get_world_num() == 0:
+            sim_config = dexsim.WorldConfig()
+            sim_config.renderer = renderer_map.get(
+                renderer, dexsim.types.Renderer.HYBRID
+            )
+            sim_config.backend = backend_map.get(renderer, dexsim.types.Backend.VULKAN)
+            sim_config.open_windows = False
+            # This triggers initialization with the correct properties immediately.
+            dexsim.init_sim_engine(sim_config)
+
+
+@pytest.fixture(autouse=True, scope="function")
+def wait_scene_destruction_after_test():
+    """Ensure C++ engine scenes are fully destructed globally after each test exits."""
+    yield
+
+    # [Improvement - delayed destruction]: top-level dequeue and traceback cleanup.
+    # Pytest retains Tracebacks on failure; breaking the exception stack ensures
+    # that local variables of temporary objects on the stack can be garbage collected.
+    import sys
+    import gc
+
+    sys.last_traceback = None
+    sys.last_value = None
+    sys.last_type = None
+
+    # [Core fix]: drain the cleanup queue to consume SimManager and related objects
+    from embodichain.lab.sim.sim_manager import SimulationManager
+
+    SimulationManager.flush_cleanup_queue()
diff --git a/tests/docs/__init__.py b/tests/docs/__init__.py
new file mode 100644
index 00000000..dd650e90
--- /dev/null
+++ b/tests/docs/__init__.py
@@ -0,0 +1,15 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
diff --git a/tests/docs/conftest.py b/tests/docs/conftest.py
new file mode 100644
index 00000000..d0a9f91f
--- /dev/null
+++ b/tests/docs/conftest.py
@@ -0,0 +1,39 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_SCRIPT = _REPO_ROOT / "docs" / "scripts" / "merge_published_site.py"
+
+
+def _load_merge_module():
+    spec = importlib.util.spec_from_file_location("merge_published_site", _SCRIPT)
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Cannot load {_SCRIPT}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["merge_published_site"] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+_merge = _load_merge_module()
+load_versions_manifest = _merge.load_versions_manifest
+merge_published_site = _merge.merge_published_site
diff --git a/tests/docs/test_merge_published_site.py b/tests/docs/test_merge_published_site.py
new file mode 100644
index 00000000..e80369fc
--- /dev/null
+++ b/tests/docs/test_merge_published_site.py
@@ -0,0 +1,154 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Tests for multi-version docs merge (CI GitHub Pages)."""
+
+from __future__ import annotations
+
+import json
+import shutil
+from pathlib import Path
+
+import pytest
+
+from .conftest import load_versions_manifest, merge_published_site
+
+
+def _write_published_site(root: Path, versions: list[str], latest: str) -> None:
+    root.mkdir(parents=True, exist_ok=True)
+    manifest = {
+        "latest": latest,
+        "versions": [
+            {
+                "name": v,
+                "url": f"./{v}/index.html",
+                "type": "tag" if v.startswith("v") else "branch",
+            }
+            for v in versions
+        ],
+    }
+    (root / "versions.json").write_text(json.dumps(manifest), encoding="utf-8")
+    for v in versions:
+        d = root / v
+        d.mkdir(parents=True, exist_ok=True)
+        (d / "index.html").write_text(f"<html>{v} published</html>", encoding="utf-8")
+
+
+@pytest.fixture
+def published_site(tmp_path: Path) -> Path:
+    published = tmp_path / "published"
+    _write_published_site(published, ["v0.1.0", "v0.2.0", "main"], latest="v0.2.0")
+    return published
+
+
+@pytest.fixture
+def build_dir(tmp_path: Path) -> Path:
+    build = tmp_path / "build" / "html"
+    build.mkdir(parents=True)
+    (build / "main").mkdir()
+    (build / "main" / "index.html").write_text(
+        "<html>stale main from cache</html>", encoding="utf-8"
+    )
+    return build
+
+
+def test_load_manifest_from_local(published_site: Path) -> None:
+    manifest = load_versions_manifest(published_root=published_site)
+    assert manifest is not None
+    assert manifest["latest"] == "v0.2.0"
+    assert len(manifest["versions"]) == 3
+
+
+def test_merge_fills_missing_tags_from_published(
+    build_dir: Path, published_site: Path
+) -> None:
+    """Simulates main push: cache only has main/, live site has release tags."""
+    merged = merge_published_site(
+        build_dir,
+        published_root=published_site,
+        skip_versions=frozenset({"main"}),
+    )
+    assert merged == ["v0.1.0", "v0.2.0"]
+    assert (
+        (build_dir / "v0.1.0" / "index.html")
+        .read_text(encoding="utf-8")
+        .startswith("<html>v0.1.0")
+    )
+    assert (build_dir / "v0.2.0").is_dir()
+    assert (build_dir / "main" / "index.html").read_text(encoding="utf-8") == (
+        "<html>stale main from cache</html>"
+    )
+
+
+def test_merge_does_not_overwrite_existing_version(
+    build_dir: Path, published_site: Path
+) -> None:
+    (build_dir / "v0.2.0").mkdir()
+    (build_dir / "v0.2.0" / "index.html").write_text(
+        "<html>v0.2.0 local cache</html>", encoding="utf-8"
+    )
+    merged = merge_published_site(
+        build_dir,
+        published_root=published_site,
+        skip_versions=frozenset({"main"}),
+    )
+    assert merged == ["v0.1.0"]
+    assert "local cache" in (build_dir / "v0.2.0" / "index.html").read_text(
+        encoding="utf-8"
+    )
+
+
+def test_merge_skip_version_for_fresh_tag_build(
+    build_dir: Path, published_site: Path
+) -> None:
+    """Simulates tag push: do not pull the tag being built from published."""
+    merged = merge_published_site(
+        build_dir,
+        published_root=published_site,
+        skip_versions=frozenset({"v0.3.0"}),
+    )
+    assert "v0.3.0" not in merged
+    assert (build_dir / "v0.1.0").is_dir()
+
+
+def test_main_push_after_tag_preserves_releases(
+    build_dir: Path, published_site: Path, tmp_path: Path
+) -> None:
+    """End-to-end: stale cache + published site (post-tag) + rebuild main/."""
+    _write_published_site(
+        published_site,
+        ["v0.1.0", "v0.2.0", "v0.3.0", "main"],
+        latest="v0.3.0",
+    )
+    (published_site / "v0.3.0" / "index.html").write_text(
+        "<html>v0.3.0 published</html>", encoding="utf-8"
+    )
+
+    merge_published_site(
+        build_dir,
+        published_root=published_site,
+        skip_versions=frozenset({"main"}),
+    )
+
+    shutil.rmtree(build_dir / "main")
+    (build_dir / "main").mkdir()
+    (build_dir / "main" / "index.html").write_text(
+        "<html>main rebuilt</html>", encoding="utf-8"
+    )
+
+    for name in ("v0.1.0", "v0.2.0", "v0.3.0"):
+        assert (build_dir / name).is_dir(), f"missing {name} after main push simulation"
+    assert "rebuilt" in (build_dir / "main" / "index.html").read_text(encoding="utf-8")
diff --git a/tests/gen_sim/simready_pipeline/test_config.py b/tests/gen_sim/simready_pipeline/test_config.py
new file mode 100644
index 00000000..9e0d885f
--- /dev/null
+++ b/tests/gen_sim/simready_pipeline/test_config.py
@@ -0,0 +1,116 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+CONFIG_PATH = (
+    REPO_ROOT
+    / "embodichain"
+    / "gen_sim"
+    / "simready_pipeline"
+    / "configs"
+    / "gen_config.json"
+)
+ALLOWED_SCENE_MESH_STRATEGIES = {"first", "concatenate"}
+
+
+@pytest.fixture(scope="module")
+def gen_config() -> dict[str, Any]:
+    with CONFIG_PATH.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def test_gen_config_uses_mesh_processing_schema(gen_config: dict[str, Any]) -> None:
+    assert "ingest" in gen_config
+    assert "mesh_processing" in gen_config
+    assert "llm" in gen_config
+
+
+def test_mesh_processing_declares_expected_stages(
+    gen_config: dict[str, Any],
+) -> None:
+    mesh_processing = gen_config["mesh_processing"]
+
+    assert "trimesh_ingest" in mesh_processing
+    assert "blender_remesh_bake" in mesh_processing
+    assert "blender_cleanup_decimate" in mesh_processing
+    assert "simready_finalize" in mesh_processing
+
+
+def test_ingest_config_declares_canonical_mesh_formats(
+    gen_config: dict[str, Any],
+) -> None:
+    ingest_config = gen_config["ingest"]
+    parseable_mesh_formats = ingest_config["parseable_mesh_formats"]
+
+    assert ingest_config["canonical_asset_name"].endswith(".obj")
+    assert isinstance(parseable_mesh_formats, list)
+    assert parseable_mesh_formats
+    assert all(fmt.startswith(".") for fmt in parseable_mesh_formats)
+
+
+def test_trimesh_ingest_config_values_are_valid(
+    gen_config: dict[str, Any],
+) -> None:
+    trimesh_config = gen_config["mesh_processing"]["trimesh_ingest"]
+    export_config = trimesh_config["export"]
+
+    assert trimesh_config["scene_mesh_strategy"] in ALLOWED_SCENE_MESH_STRATEGIES
+    assert trimesh_config["mtl_name"].endswith(".mtl")
+    assert isinstance(trimesh_config["visual"]["default_face_color"], list)
+    assert isinstance(trimesh_config["visual"]["pbr_base_color_only"], bool)
+    assert isinstance(export_config["include_normals"], bool)
+    assert isinstance(export_config["include_color"], bool)
+    assert isinstance(export_config["include_texture"], bool)
+    assert isinstance(export_config["write_texture"], bool)
+
+
+def test_blender_mesh_processing_values_are_valid(
+    gen_config: dict[str, Any],
+) -> None:
+    mesh_processing = gen_config["mesh_processing"]
+    remesh_bake = mesh_processing["blender_remesh_bake"]
+    cleanup_decimate = mesh_processing["blender_cleanup_decimate"]
+
+    assert remesh_bake["remesh"]["voxel_size"] > 0.0
+    assert remesh_bake["remesh"]["min_voxel_size_ratio"] > 0.0
+    assert 0.0 < remesh_bake["decimate"]["ratio"] <= 1.0
+    assert remesh_bake["bake"]["texture_size"] > 0
+    assert isinstance(cleanup_decimate["enabled"], bool)
+    assert cleanup_decimate["cleanup"]["merge_dist"] > 0.0
+    assert isinstance(cleanup_decimate["cleanup"]["remove_non_manifold"], bool)
+    assert isinstance(cleanup_decimate["cleanup"]["triangulate"], bool)
+    assert 0.0 < cleanup_decimate["simplify"]["ratio"] <= 1.0
+    assert cleanup_decimate["simplify"]["weld_distance"] > 0.0
+    assert isinstance(cleanup_decimate["simplify"]["collapse_triangulate"], bool)
+
+
+def test_simready_finalize_config_values_are_valid(
+    gen_config: dict[str, Any],
+) -> None:
+    render_resolution = gen_config["mesh_processing"]["simready_finalize"][
+        "render_resolution"
+    ]
+
+    assert isinstance(render_resolution, int)
+    assert render_resolution > 0
diff --git a/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py
new file mode 100644
index 00000000..7c20c677
--- /dev/null
+++ b/tests/gen_sim/simready_pipeline/test_trimesh_ingest.py
@@ -0,0 +1,153 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+from __future__ import annotations
+
+import importlib
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+trimesh = pytest.importorskip("trimesh")
+
+BOX_VERTEX_COUNT = 8
+CONCATENATED_BOX_VERTEX_COUNT = BOX_VERTEX_COUNT * 2
+DEFAULT_VISUAL_RESULT: dict[str, Any] = {
+    "visual_category": "None",
+    "material_kind": None,
+    "material": {"textures": {}},
+    "uv_present": False,
+    "texture_count_total": 0,
+}
+
+
+def _import_ingest_utils():
+    return importlib.import_module(
+        "embodichain.gen_sim.simready_pipeline.utils.ingest_utils"
+    )
+
+
+def _write_box_obj(path: Path) -> None:
+    mesh = trimesh.creation.box(extents=(1.0, 1.0, 1.0))
+    mesh.export(path)
+
+
+def test_load_one_trimesh_uses_first_scene_geometry(monkeypatch) -> None:
+    ingest_utils = _import_ingest_utils()
+    first_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0))
+    second_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0))
+    scene = trimesh.Scene({"first": first_box, "second": second_box})
+    monkeypatch.setattr(ingest_utils.trimesh, "load_mesh", lambda _: scene)
+
+    mesh = ingest_utils.load_one_trimesh("unused.obj", scene_mesh_strategy="first")
+
+    assert len(mesh.vertices) == BOX_VERTEX_COUNT
+
+
+def test_load_one_trimesh_concatenates_scene_geometry(monkeypatch) -> None:
+    ingest_utils = _import_ingest_utils()
+    first_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0))
+    second_box = trimesh.creation.box(extents=(1.0, 1.0, 1.0))
+    scene = trimesh.Scene({"first": first_box, "second": second_box})
+    monkeypatch.setattr(ingest_utils.trimesh, "load_mesh", lambda _: scene)
+
+    mesh = ingest_utils.load_one_trimesh(
+        "unused.obj", scene_mesh_strategy="concatenate"
+    )
+
+    assert len(mesh.vertices) == CONCATENATED_BOX_VERTEX_COUNT
+
+
+def test_trimesh_parse_ingest_writes_canonical_obj(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    ingest_utils = _import_ingest_utils()
+    source_file = tmp_path / "source.obj"
+    asset_source = tmp_path / "asset_source"
+    _write_box_obj(source_file)
+    monkeypatch.setattr(
+        ingest_utils,
+        "classify_visual",
+        lambda _: DEFAULT_VISUAL_RESULT,
+    )
+
+    result = ingest_utils.trimesh_parse_ingest(
+        source_file=source_file,
+        asset_source=asset_source,
+        obj_name="asset.obj",
+        config={
+            "visual": {"default_face_color": [128, 128, 128, 255]},
+            "export": {
+                "include_normals": True,
+                "include_color": True,
+                "include_texture": True,
+                "write_texture": False,
+            },
+        },
+    )
+
+    assert (asset_source / "asset.obj").is_file()
+    assert result["visual_ingest"] == "no visual"
+    assert result["visual_source"]["visual_category"] == "None"
+    assert result["visual_source"]["uv_present"] is False
+    assert result["visual_source"]["textures"] == {}
+
+
+def test_trimesh_parse_ingest_passes_export_config(
+    tmp_path: Path,
+    monkeypatch,
+) -> None:
+    ingest_utils = _import_ingest_utils()
+    source_file = tmp_path / "source.obj"
+    asset_source = tmp_path / "asset_source"
+    captured_export_kwargs: dict[str, Any] = {}
+    _write_box_obj(source_file)
+    monkeypatch.setattr(
+        ingest_utils,
+        "classify_visual",
+        lambda _: DEFAULT_VISUAL_RESULT,
+    )
+
+    def fake_export_obj(mesh, **kwargs):
+        captured_export_kwargs.update(kwargs)
+        return "o asset\n", {}
+
+    monkeypatch.setattr(
+        ingest_utils.trimesh.exchange.obj, "export_obj", fake_export_obj
+    )
+
+    ingest_utils.trimesh_parse_ingest(
+        source_file=source_file,
+        asset_source=asset_source,
+        obj_name="asset.obj",
+        config={
+            "mtl_name": "custom_asset.mtl",
+            "export": {
+                "include_normals": False,
+                "include_color": False,
+                "include_texture": False,
+                "write_texture": True,
+            },
+        },
+    )
+
+    assert captured_export_kwargs["mtl_name"] == "custom_asset.mtl"
+    assert captured_export_kwargs["include_normals"] is False
+    assert captured_export_kwargs["include_color"] is False
+    assert captured_export_kwargs["include_texture"] is False
+    assert captured_export_kwargs["write_texture"] is True
diff --git a/tests/gym/envs/managers/test_dataset_functors.py b/tests/gym/envs/managers/test_dataset_functors.py
index d18010fc..1acd54b6 100644
--- a/tests/gym/envs/managers/test_dataset_functors.py
+++ b/tests/gym/envs/managers/test_dataset_functors.py
@@ -22,7 +22,6 @@
 
 from unittest.mock import MagicMock, Mock, patch
 
-
 # Skip all tests if LeRobot is not available
 try:
     from embodichain.lab.gym.envs.managers.datasets import (
diff --git a/tests/gym/envs/managers/test_event_functors.py b/tests/gym/envs/managers/test_event_functors.py
index e7e206de..981e44ab 100644
--- a/tests/gym/envs/managers/test_event_functors.py
+++ b/tests/gym/envs/managers/test_event_functors.py
@@ -283,6 +283,13 @@ def get_rigid_object_group(self, uid: str):
     def update(self, step: int = 1):
         pass
 
+    def set_indirect_lighting(self, path: str) -> None:
+        self._last_indirect_lighting = path
+
+    def set_emission_light(self, color=None, intensity=None) -> None:
+        self._last_emission_color = color
+        self._last_emission_intensity = intensity
+
 
 class MockEnv:
     """Mock environment for event functor tests."""
@@ -324,6 +331,10 @@ def __init__(self, num_envs: int = 4, num_joints: int = 6):
 from embodichain.lab.gym.envs.managers.randomization.spatial import (
     randomize_articulation_root_pose,
 )
+from embodichain.lab.gym.envs.managers.randomization.visual import (
+    randomize_indirect_lighting,
+)
+from embodichain.lab.gym.envs.managers import FunctorCfg
 
 
 class TestResolveUids:
@@ -815,3 +826,187 @@ def test_handles_nonexistent_link_pattern(self):
                 mass_range=(0.5, 2.0),
                 link_names="nonexistent_link",
             )
+
+
+class TestRandomizeIndirectLighting:
+    """Tests for the randomize_indirect_lighting functor."""
+
+    def _make_cfg(self, params: dict) -> FunctorCfg:
+        cfg = FunctorCfg(func=randomize_indirect_lighting)
+        cfg.params = params
+        return cfg
+
+    # ------------------------------------------------------------------
+    # Init validation
+    # ------------------------------------------------------------------
+
+    def test_raises_when_no_params(self, tmp_path):
+        """Raises ValueError when neither HDR path nor emissive params given."""
+        env = MockEnv()
+        cfg = self._make_cfg({})
+        with pytest.raises(ValueError, match="provide either"):
+            randomize_indirect_lighting(cfg, env)
+
+    def test_raises_when_both_hdr_and_emissive(self, tmp_path):
+        """Raises ValueError when HDR path and emissive params are both set."""
+        hdr_dir = tmp_path / "hdr"
+        hdr_dir.mkdir()
+        (hdr_dir / "a.hdr").write_text("")
+        env = MockEnv()
+        cfg = self._make_cfg(
+            {
+                "path": str(hdr_dir),
+                "emissive_color_range": [[0.5, 0.5, 0.5], [1.0, 1.0, 1.0]],
+            }
+        )
+        with pytest.raises(ValueError, match="mutually exclusive"):
+            randomize_indirect_lighting(cfg, env)
+
+    # ------------------------------------------------------------------
+    # HDR mode
+    # ------------------------------------------------------------------
+
+    def test_hdr_mode_calls_set_indirect_lighting(self, tmp_path):
+        """HDR mode calls set_indirect_lighting with one of the .hdr files."""
+        hdr_dir = tmp_path / "hdr"
+        hdr_dir.mkdir()
+        files = ["sky1.hdr", "sky2.hdr", "sky3.hdr"]
+        for f in files:
+            (hdr_dir / f).write_text("")
+        env = MockEnv()
+        cfg = self._make_cfg({"path": str(hdr_dir)})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)
+
+        chosen = env.sim._last_indirect_lighting
+        assert chosen.endswith(".hdr")
+        assert any(chosen.endswith(f) for f in files)
+
+    def test_hdr_mode_does_not_call_set_emission_light(self, tmp_path):
+        """HDR mode must not touch emissive light."""
+        hdr_dir = tmp_path / "hdr"
+        hdr_dir.mkdir()
+        (hdr_dir / "sky.hdr").write_text("")
+        env = MockEnv()
+        cfg = self._make_cfg({"path": str(hdr_dir)})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        # Ensure attribute not set by HDR call
+        env.sim._last_emission_color = "sentinel"
+        env.sim._last_emission_intensity = "sentinel"
+
+        functor(env, None)
+
+        assert env.sim._last_emission_color == "sentinel"
+        assert env.sim._last_emission_intensity == "sentinel"
+
+    def test_hdr_mode_noop_when_no_hdr_files(self, tmp_path):
+        """HDR mode is a no-op (no crash) when the folder has no .hdr files."""
+        hdr_dir = tmp_path / "empty"
+        hdr_dir.mkdir()
+        env = MockEnv()
+        cfg = self._make_cfg({"path": str(hdr_dir)})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)  # must not raise
+
+        assert not hasattr(env.sim, "_last_indirect_lighting")
+
+    def test_hdr_mode_selects_from_available_files(self, tmp_path):
+        """HDR mode always selects a file from the provided folder over many calls."""
+        hdr_dir = tmp_path / "hdr"
+        hdr_dir.mkdir()
+        names = [f"env{i}.hdr" for i in range(5)]
+        for n in names:
+            (hdr_dir / n).write_text("")
+        env = MockEnv()
+        cfg = self._make_cfg({"path": str(hdr_dir)})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        chosen_set = set()
+        for _ in range(50):
+            functor(env, None)
+            chosen_set.add(env.sim._last_indirect_lighting)
+
+        # All chosen paths must be valid HDR files from the folder
+        valid_paths = {str(hdr_dir / n) for n in names}
+        assert chosen_set.issubset(valid_paths)
+
+    # ------------------------------------------------------------------
+    # Emissive mode
+    # ------------------------------------------------------------------
+
+    def test_emissive_color_mode_calls_set_emission_light(self):
+        """Emissive mode calls set_emission_light with color in range."""
+        env = MockEnv()
+        cfg = self._make_cfg(
+            {"emissive_color_range": [[0.2, 0.3, 0.4], [0.6, 0.7, 0.8]]}
+        )
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)
+
+        color = env.sim._last_emission_color
+        assert color is not None
+        assert len(color) == 3
+        assert 0.2 <= color[0] <= 0.6
+        assert 0.3 <= color[1] <= 0.7
+        assert 0.4 <= color[2] <= 0.8
+        assert env.sim._last_emission_intensity is None
+
+    def test_emissive_intensity_mode_calls_set_emission_light(self):
+        """Emissive mode calls set_emission_light with intensity in range."""
+        env = MockEnv()
+        cfg = self._make_cfg({"emissive_intensity_range": [50.0, 150.0]})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)
+
+        assert env.sim._last_emission_color is None
+        intensity = env.sim._last_emission_intensity
+        assert intensity is not None
+        assert 50.0 <= intensity <= 150.0
+
+    def test_emissive_color_and_intensity_together(self):
+        """Both color and intensity can be set together in emissive mode."""
+        env = MockEnv()
+        cfg = self._make_cfg(
+            {
+                "emissive_color_range": [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]],
+                "emissive_intensity_range": [80.0, 120.0],
+            }
+        )
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)
+
+        color = env.sim._last_emission_color
+        intensity = env.sim._last_emission_intensity
+        assert color is not None and len(color) == 3
+        assert all(0.0 <= c <= 1.0 for c in color)
+        assert 80.0 <= intensity <= 120.0
+
+    def test_emissive_mode_does_not_call_set_indirect_lighting(self):
+        """Emissive mode must not touch indirect lighting (HDR)."""
+        env = MockEnv()
+        cfg = self._make_cfg({"emissive_intensity_range": [100.0, 100.0]})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        functor(env, None)
+
+        assert not hasattr(env.sim, "_last_indirect_lighting")
+
+    def test_emissive_values_vary_across_calls(self):
+        """Emissive intensity is sampled fresh on each call (not fixed)."""
+        env = MockEnv()
+        cfg = self._make_cfg({"emissive_intensity_range": [0.0, 1000.0]})
+        functor = randomize_indirect_lighting(cfg, env)
+
+        intensities = set()
+        for _ in range(20):
+            functor(env, None)
+            intensities.add(round(env.sim._last_emission_intensity, 4))
+
+        # Over 20 draws from [0, 1000] all values being identical is astronomically unlikely
+        assert len(intensities) > 1
diff --git a/tests/gym/envs/test_base_env.py b/tests/gym/envs/test_base_env.py
index fbf3c0de..27767bef 100644
--- a/tests/gym/envs/test_base_env.py
+++ b/tests/gym/envs/test_base_env.py
@@ -116,15 +116,18 @@ def _extend_obs(self, obs, **kwargs):
 class BaseEnvTest:
     """Shared test logic for CPU and CUDA."""
 
-    def setup_simulation(self, sim_device):
-        self.env = gym.make(
+    @classmethod
+    def setup_simulation_hook(cls, sim_device):
+        if hasattr(cls, "env"):
+            return
+        cls.env = gym.make(
             "RandomReach-v1",
             num_envs=NUM_ENVS,
             headless=True,
             device=sim_device,
         )
-        self.device = self.env.get_wrapper_attr("device")
-        self.num_envs = self.env.get_wrapper_attr("num_envs")
+        cls.device = cls.env.get_wrapper_attr("device")
+        cls.num_envs = cls.env.get_wrapper_attr("num_envs")
 
     def test_env_rollout(self):
         """Test environment rollout."""
@@ -168,19 +171,39 @@ def test_env_rollout(self):
         assert obs.get("robot") is not None, "Expected 'robot' in the obs dict"
 
     def teardown_method(self):
+        pass
+
+    @classmethod
+    def teardown_class(cls):
         """Clean up resources after each test method."""
-        self.env.close()
+        if hasattr(cls, "env") and cls.env is not None:
+            cls.env.close()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        import gc
+
+        gc.collect()
 
 
+# @pytest.mark.skip(reason="Skipping tests temporarily")
 class TestBaseEnvCPU(BaseEnvTest):
     def setup_method(self):
-        self.setup_simulation("cpu")
+        pass
 
+    @classmethod
+    def setup_class(cls):
+        cls.setup_simulation("cpu")
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
+
+# @pytest.mark.skip(reason="Skipping tests temporarily")
 class TestBaseEnvCUDA(BaseEnvTest):
     def setup_method(self):
-        self.setup_simulation("cuda")
+        pass
+
+    @classmethod
+    def setup_class(cls):
+        cls.setup_simulation("cuda")
 
 
 if __name__ == "__main__":
@@ -189,3 +212,21 @@ def setup_method(self):
     test_cpu.setup_method()
     test_cpu.test_env_rollout()
     test_cpu.teardown_method()
+
+# Patch BaseEnvTest
+import sys
+
+
+def new_setup_simulation(cls, sim_device):
+    print(">>> ENTERING setup_simulation", file=sys.stderr)
+    if hasattr(cls, "env"):
+        return
+    cls.env = gym.make(
+        "RandomReach-v1", num_envs=NUM_ENVS, headless=True, device=sim_device
+    )
+    cls.device = cls.env.get_wrapper_attr("device")
+    cls.num_envs = cls.env.get_wrapper_attr("num_envs")
+    print(">>> EXITING setup_simulation", file=sys.stderr)
+
+
+BaseEnvTest.setup_simulation = classmethod(new_setup_simulation)
diff --git a/tests/gym/envs/test_embodied_env.py b/tests/gym/envs/test_embodied_env.py
index feebdeda..9539381e 100644
--- a/tests/gym/envs/test_embodied_env.py
+++ b/tests/gym/envs/test_embodied_env.py
@@ -20,6 +20,7 @@
 import numpy as np
 import gymnasium as gym
 
+from embodichain.lab.sim.cfg import RenderCfg
 from embodichain.lab.gym.envs import EmbodiedEnvCfg
 from embodichain.lab.sim.objects import RigidObject, Robot
 from embodichain.lab.gym.utils.gym_utils import config_to_cfg, DEFAULT_MANAGER_MODULES
@@ -27,7 +28,7 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.data import get_data_path
 
-NUM_ENVS = 10
+NUM_ENVS = 2
 
 urdf_path = get_data_path("UniversalRobots/UR5/UR5.urdf")
 METADATA = {
@@ -119,13 +120,14 @@
 class EmbodiedEnvTest:
     """Shared test logic for CPU and CUDA."""
 
-    def setup_simulation(self, sim_device, enable_rt):
+    def setup_simulation(self, sim_device):
         cfg: EmbodiedEnvCfg = config_to_cfg(
             METADATA, manager_modules=DEFAULT_MANAGER_MODULES
         )
         cfg.num_envs = NUM_ENVS
         cfg.sim_cfg = SimulationManagerCfg(
-            headless=True, sim_device=sim_device, enable_rt=enable_rt
+            headless=True,
+            sim_device=sim_device,
         )
 
         self.env = gym.make(id=METADATA["id"], cfg=cfg)
@@ -159,22 +161,23 @@ def test_env_rollout(self):
 
     def teardown_method(self):
         """Clean up resources after each test method."""
-        self.env.close()
+        if hasattr(self, "env") and self.env is not None:
+            self.env.close()
+        import embodichain.lab.sim as om
 
+        om.SimulationManager.flush_cleanup_queue()
+        import gc
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
-class TestCPU(EmbodiedEnvTest):
-    def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=False)
+        gc.collect()
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
-class TestCPURT(EmbodiedEnvTest):
+# @pytest.mark.skip(reason="Skipping tests temporarily")
+class TestCPU(EmbodiedEnvTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=True)
+        self.setup_simulation("cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
+# @pytest.mark.skip(reason="Skipping tests temporarily")
 class TestCUDA(EmbodiedEnvTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=False)
+        self.setup_simulation("cuda")
diff --git a/tests/sim/atomic_actions/__init__.py b/tests/sim/atomic_actions/__init__.py
new file mode 100644
index 00000000..0671165d
--- /dev/null
+++ b/tests/sim/atomic_actions/__init__.py
@@ -0,0 +1,17 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Tests for atomic actions module."""
diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py
new file mode 100644
index 00000000..ba7324cc
--- /dev/null
+++ b/tests/sim/atomic_actions/test_actions.py
@@ -0,0 +1,304 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Tests for atomic action implementations (MoveAction, PickUpAction, PlaceAction)."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+from unittest.mock import MagicMock, Mock
+
+from embodichain.lab.sim.atomic_actions.core import (
+    ActionCfg,
+    Affordance,
+    ObjectSemantics,
+)
+from embodichain.lab.sim.atomic_actions.actions import (
+    MoveAction,
+    MoveActionCfg,
+    PickUpAction,
+    PickUpActionCfg,
+    PlaceAction,
+    PlaceActionCfg,
+)
+
+# ---------------------------------------------------------------------------
+# Mock Helpers
+# ---------------------------------------------------------------------------
+
+NUM_ENVS = 2  # number of parallel environments used in tests
+ARM_DOF = 6  # typical arm joint count
+HAND_DOF = 2  # typical hand joint count
+TOTAL_DOF = ARM_DOF + HAND_DOF
+
+
+def _make_mock_robot(
+    num_envs: int = NUM_ENVS,
+    arm_dof: int = ARM_DOF,
+    hand_dof: int = HAND_DOF,
+) -> Mock:
+    """Create a mock Robot with arm and hand control parts."""
+    robot = Mock()
+    robot.device = torch.device("cpu")
+    robot.dof = arm_dof + hand_dof
+
+    def get_qpos(name=None):
+        if name == "arm":
+            return torch.zeros(num_envs, arm_dof)
+        if name == "hand":
+            return torch.zeros(num_envs, hand_dof)
+        # Full qpos
+        return torch.zeros(num_envs, arm_dof + hand_dof)
+
+    robot.get_qpos = get_qpos
+
+    def get_joint_ids(name=None):
+        if name == "arm":
+            return list(range(arm_dof))
+        if name == "hand":
+            return list(range(arm_dof, arm_dof + hand_dof))
+        return list(range(arm_dof + hand_dof))
+
+    robot.get_joint_ids = get_joint_ids
+
+    # compute_ik: return success and identity-like qpos
+    def compute_ik(pose=None, qpos_seed=None, name=None, joint_seed=None):
+        seed = joint_seed if joint_seed is not None else qpos_seed
+        if seed is None:
+            seed = torch.zeros(num_envs, arm_dof)
+        success = torch.ones(num_envs, dtype=torch.bool)
+        return success, seed.clone()
+
+    robot.compute_ik = compute_ik
+
+    # compute_fk: return identity-like poses
+    def compute_fk(qpos=None, name=None, to_matrix=True):
+        n = qpos.shape[0] if qpos is not None else num_envs
+        poses = torch.eye(4).unsqueeze(0).repeat(n, 1, 1)
+        return poses
+
+    robot.compute_fk = compute_fk
+
+    return robot
+
+
+def _make_mock_motion_generator(robot: Mock | None = None) -> Mock:
+    """Create a mock MotionGenerator."""
+    mg = Mock()
+    mg.robot = robot or _make_mock_robot()
+    mg.device = mg.robot.device
+    return mg
+
+
+# ---------------------------------------------------------------------------
+# MoveAction
+# ---------------------------------------------------------------------------
+
+
+class TestMoveActionHelpers:
+    """Tests for MoveAction helper methods that don't need simulation."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+        self.cfg = MoveActionCfg(sample_interval=50)
+        self.action = MoveAction(self.mg, cfg=self.cfg)
+
+    def test_init_sets_attributes(self):
+        assert self.action.n_envs == NUM_ENVS
+        assert self.action.dof == ARM_DOF
+        assert self.action.device == torch.device("cpu")
+
+    def test_resolve_pose_target_from_4x4(self):
+        target = torch.eye(4)
+        is_success, result = self.action._resolve_pose_target(
+            target, action_name="TestAction"
+        )
+        assert is_success is True
+        assert result.shape == (NUM_ENVS, 4, 4)
+        # Single pose should be repeated for all envs
+        for i in range(NUM_ENVS):
+            assert torch.equal(result[i], torch.eye(4))
+
+    def test_resolve_pose_target_from_batched(self):
+        target = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1)
+        target[:, 2, 3] = 0.5  # offset z for each env
+        is_success, result = self.action._resolve_pose_target(
+            target, action_name="TestAction"
+        )
+        assert is_success is True
+        assert result.shape == (NUM_ENVS, 4, 4)
+        for i in range(NUM_ENVS):
+            assert result[i, 2, 3].item() == pytest.approx(0.5)
+
+    def test_resolve_start_qpos_defaults_to_current(self):
+        result = self.action._resolve_start_qpos(None)
+        assert result.shape == (NUM_ENVS, ARM_DOF)
+
+    def test_resolve_start_qpos_broadcasts_single(self):
+        single = torch.ones(ARM_DOF)
+        result = self.action._resolve_start_qpos(single)
+        assert result.shape == (NUM_ENVS, ARM_DOF)
+        for i in range(NUM_ENVS):
+            assert torch.equal(result[i], single)
+
+    def test_compute_three_phase_waypoints_sums_to_sample_interval(self):
+        hand_interp_steps = 5
+        first, second, third = self.action._compute_three_phase_waypoints(
+            hand_interp_steps,
+            first_phase_name="approach",
+            third_phase_name="lift",
+        )
+        assert first + second + third == self.cfg.sample_interval
+        assert first >= 2
+        assert third >= 2
+
+    def test_interpolate_hand_qpos_shape(self):
+        n_waypoints = 10
+        start = torch.zeros(HAND_DOF)
+        end = torch.ones(HAND_DOF)
+        result = self.action._interpolate_hand_qpos(start, end, n_waypoints)
+        assert result.shape == (n_waypoints, HAND_DOF)
+        # First and last should match endpoints
+        assert torch.allclose(result[0], start)
+        assert torch.allclose(result[-1], end)
+
+    def test_interpolate_hand_qpos_linear(self):
+        """Verify linear interpolation between two hand configs."""
+        n_waypoints = 3
+        start = torch.tensor([0.0, 0.0])
+        end = torch.tensor([1.0, 1.0])
+        result = self.action._interpolate_hand_qpos(start, end, n_waypoints)
+        expected_mid = torch.tensor([0.5, 0.5])
+        assert torch.allclose(result[1], expected_mid, atol=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# PickUpAction
+# ---------------------------------------------------------------------------
+
+
+class TestPickUpActionInit:
+    """Tests for PickUpAction initialization and config validation."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+
+    def _make_cfg(self, **overrides):
+        defaults = dict(
+            hand_open_qpos=torch.tensor([0.0, 0.0]),
+            hand_close_qpos=torch.tensor([0.025, 0.025]),
+            control_part="arm",
+            hand_control_part="hand",
+            pre_grasp_distance=0.15,
+            lift_height=0.15,
+            approach_direction=torch.tensor([0.0, 0.0, -1.0]),
+        )
+        defaults.update(overrides)
+        return PickUpActionCfg(**defaults)
+
+    def test_init_sets_hand_joint_ids(self):
+        cfg = self._make_cfg()
+        action = PickUpAction(self.mg, cfg=cfg)
+        assert action.hand_joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF))
+        assert action.joint_ids == list(range(ARM_DOF)) + list(
+            range(ARM_DOF, ARM_DOF + HAND_DOF)
+        )
+        assert action.dof == TOTAL_DOF
+
+
+# ---------------------------------------------------------------------------
+# PlaceAction
+# ---------------------------------------------------------------------------
+
+
+class TestPlaceActionInit:
+    """Tests for PlaceAction initialization."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+
+    def _make_cfg(self, **overrides):
+        defaults = dict(
+            hand_open_qpos=torch.tensor([0.0, 0.0]),
+            hand_close_qpos=torch.tensor([0.025, 0.025]),
+            control_part="arm",
+            hand_control_part="hand",
+            lift_height=0.15,
+        )
+        defaults.update(overrides)
+        return PlaceActionCfg(**defaults)
+
+    def test_init_sets_hand_joint_ids(self):
+        cfg = self._make_cfg()
+        action = PlaceAction(self.mg, cfg=cfg)
+        assert action.hand_joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF))
+        assert action.dof == TOTAL_DOF
+
+
+# ---------------------------------------------------------------------------
+# AtomicAction._apply_offset
+# ---------------------------------------------------------------------------
+
+
+class TestAtomicActionApplyOffset:
+    """Tests for the shared _apply_offset method inherited from AtomicAction."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+        self.cfg = MoveActionCfg()
+        self.action = MoveAction(self.mg, cfg=self.cfg)
+
+    def test_apply_offset_batched(self):
+        # [N, 4, 4] poses, [N, 3] offsets
+        poses = torch.eye(4).unsqueeze(0).repeat(3, 1, 1)
+        offsets = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+        result = self.action._apply_offset(poses, offsets)
+        assert result.shape == (3, 4, 4)
+        assert result[0, :3, 3].tolist() == pytest.approx([1.0, 0.0, 0.0])
+        assert result[1, :3, 3].tolist() == pytest.approx([0.0, 1.0, 0.0])
+        assert result[2, :3, 3].tolist() == pytest.approx([0.0, 0.0, 1.0])
+
+    def test_apply_offset_broadcasts_single_offset(self):
+        # [N, 4, 4] poses, [3] single offset broadcast to all
+        poses = torch.eye(4).unsqueeze(0).repeat(2, 1, 1)
+        offset = torch.tensor([0.1, 0.2, 0.3])
+        result = self.action._apply_offset(poses, offset)
+        assert result.shape == (2, 4, 4)
+        for i in range(2):
+            assert result[i, :3, 3].tolist() == pytest.approx([0.1, 0.2, 0.3])
+
+    def test_apply_offset_preserves_rotation(self):
+        """Offset only affects translation; rotation part stays unchanged."""
+        poses = torch.eye(4).unsqueeze(0).repeat(1, 1, 1)
+        # Set a non-trivial rotation
+        poses[0, 0, 1] = -1.0
+        poses[0, 1, 0] = 1.0
+        offset = torch.tensor([1.0, 2.0, 3.0])
+        result = self.action._apply_offset(poses, offset)
+        # Rotation block should be unchanged
+        assert torch.equal(result[0, :3, :3], poses[0, :3, :3])
+
+
+if __name__ == "__main__":
+    # For visual debugging
+    test = TestMoveActionHelpers()
+    test.setup_method()
+    test.test_compute_three_phase_waypoints_sums_to_sample_interval()
diff --git a/tests/sim/atomic_actions/test_core.py b/tests/sim/atomic_actions/test_core.py
new file mode 100644
index 00000000..7cebaa7b
--- /dev/null
+++ b/tests/sim/atomic_actions/test_core.py
@@ -0,0 +1,171 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Tests for atomic action core module (Affordance, InteractionPoints, ObjectSemantics, ActionCfg)."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from embodichain.lab.sim.atomic_actions.core import (
+    ActionCfg,
+    Affordance,
+    InteractionPoints,
+    ObjectSemantics,
+)
+
+# ---------------------------------------------------------------------------
+# Affordance
+# ---------------------------------------------------------------------------
+
+
+class TestAffordance:
+    """Tests for the Affordance base dataclass."""
+
+    def test_default_values(self):
+        aff = Affordance()
+        assert aff.object_label == ""
+        assert aff.geometry == {}
+        assert aff.custom_config == {}
+
+    def test_mesh_vertices_returns_tensor(self):
+        vertices = torch.randn(10, 3)
+        aff = Affordance(geometry={"mesh_vertices": vertices})
+        assert torch.equal(aff.mesh_vertices, vertices)
+
+    def test_mesh_vertices_returns_none_when_missing(self):
+        aff = Affordance()
+        assert aff.mesh_vertices is None
+
+    def test_mesh_vertices_raises_on_wrong_type(self):
+        aff = Affordance(geometry={"mesh_vertices": [1, 2, 3]})
+        with pytest.raises(TypeError, match="must be a torch.Tensor"):
+            _ = aff.mesh_vertices
+
+    def test_mesh_triangles_returns_tensor(self):
+        triangles = torch.randint(0, 10, (5, 3))
+        aff = Affordance(geometry={"mesh_triangles": triangles})
+        assert torch.equal(aff.mesh_triangles, triangles)
+
+    def test_mesh_triangles_returns_none_when_missing(self):
+        aff = Affordance()
+        assert aff.mesh_triangles is None
+
+    def test_mesh_triangles_raises_on_wrong_type(self):
+        aff = Affordance(geometry={"mesh_triangles": "bad"})
+        with pytest.raises(TypeError, match="must be a torch.Tensor"):
+            _ = aff.mesh_triangles
+
+    def test_custom_config_get_set(self):
+        aff = Affordance()
+        aff.set_custom_config("key_a", 42)
+        assert aff.get_custom_config("key_a") == 42
+        assert aff.get_custom_config("missing") is None
+        assert aff.get_custom_config("missing", "default") == "default"
+
+    def test_get_batch_size_returns_one(self):
+        # Base Affordance always returns 1
+        assert Affordance().get_batch_size() == 1
+
+
+# ---------------------------------------------------------------------------
+# InteractionPoints
+# ---------------------------------------------------------------------------
+
+
+class TestInteractionPoints:
+    """Tests for InteractionPoints affordance."""
+
+    def test_default_points_shape(self):
+        ip = InteractionPoints()
+        assert ip.points.shape == (1, 3)
+
+    def test_get_batch_size_matches_points(self):
+        points = torch.randn(5, 3)
+        ip = InteractionPoints(points=points)
+        assert ip.get_batch_size() == 5
+
+    def test_get_points_by_type_returns_matching_subset(self):
+        points = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
+        ip = InteractionPoints(points=points, point_types=["push", "poke", "push"])
+        result = ip.get_points_by_type("push")
+        assert result is not None
+        assert result.shape == (2, 3)
+        assert torch.equal(result[0], points[0])
+        assert torch.equal(result[1], points[2])
+
+    def test_get_points_by_type_returns_none_for_missing_type(self):
+        ip = InteractionPoints(points=torch.zeros(2, 3), point_types=["push", "push"])
+        assert ip.get_points_by_type("poke") is None
+
+    def test_get_approach_direction_from_normals(self):
+        normals = torch.tensor([[0.0, 0.0, 1.0], [1.0, 0.0, 0.0]])
+        ip = InteractionPoints(points=torch.zeros(2, 3), normals=normals)
+        # Approach is opposite of normal
+        assert torch.equal(ip.get_approach_direction(0), torch.tensor([0.0, 0.0, -1.0]))
+        assert torch.equal(ip.get_approach_direction(1), torch.tensor([-1.0, 0.0, 0.0]))
+
+    def test_get_approach_direction_default_without_normals(self):
+        ip = InteractionPoints(points=torch.zeros(1, 3))
+        direction = ip.get_approach_direction(0)
+        assert torch.equal(direction, torch.tensor([0.0, 0.0, 1.0]))
+
+
+# ---------------------------------------------------------------------------
+# ObjectSemantics
+# ---------------------------------------------------------------------------
+
+
+class TestObjectSemantics:
+    """Tests for ObjectSemantics dataclass."""
+
+    def test_post_init_binds_label_and_geometry(self):
+        geometry = {"bounding_box": [0.1, 0.2, 0.3]}
+        aff = Affordance()
+        sem = ObjectSemantics(
+            affordance=aff,
+            geometry=geometry,
+            label="mug",
+        )
+        assert sem.affordance.object_label == "mug"
+        assert sem.affordance.geometry is geometry
+
+    def test_default_optional_fields(self):
+        sem = ObjectSemantics(
+            affordance=Affordance(),
+            geometry={},
+        )
+        assert sem.label == "none"
+        assert sem.properties == {}
+        assert sem.entity is None
+
+
+# ---------------------------------------------------------------------------
+# ActionCfg
+# ---------------------------------------------------------------------------
+
+
+class TestActionCfg:
+    """Tests for ActionCfg defaults."""
+
+    def test_default_values(self):
+        cfg = ActionCfg()
+        assert cfg.name == "default"
+        assert cfg.control_part == "arm"
+        assert cfg.interpolation_type == "linear"
+        assert cfg.velocity_limit is None
+        assert cfg.acceleration_limit is None
diff --git a/tests/sim/atomic_actions/test_engine.py b/tests/sim/atomic_actions/test_engine.py
new file mode 100644
index 00000000..52dc034d
--- /dev/null
+++ b/tests/sim/atomic_actions/test_engine.py
@@ -0,0 +1,191 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Tests for atomic action engine (registry, SemanticAnalyzer, AtomicActionEngine)."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+from unittest.mock import MagicMock, Mock
+
+from embodichain.lab.sim.atomic_actions.core import (
+    ActionCfg,
+    Affordance,
+    ObjectSemantics,
+)
+from embodichain.lab.sim.atomic_actions.engine import (
+    AtomicActionEngine,
+    SemanticAnalyzer,
+    get_registered_actions,
+    register_action,
+    unregister_action,
+)
+
+# ---------------------------------------------------------------------------
+# Global Action Registry
+# ---------------------------------------------------------------------------
+
+
+class TestGlobalRegistry:
+    """Tests for register_action / unregister_action / get_registered_actions."""
+
+    def teardown_method(self):
+        # Clean up any test registrations
+        unregister_action("_test_dummy")
+
+    def test_register_and_retrieve(self):
+        mock_cls = Mock()
+        register_action("_test_dummy", mock_cls)
+        registry = get_registered_actions()
+        assert "_test_dummy" in registry
+        assert registry["_test_dummy"] is mock_cls
+
+    def test_unregister_removes_entry(self):
+        register_action("_test_dummy", Mock())
+        unregister_action("_test_dummy")
+        assert "_test_dummy" not in get_registered_actions()
+
+    def test_unregister_nonexistent_is_noop(self):
+        # Should not raise
+        unregister_action("_nonexistent_action")
+
+    def test_get_registered_actions_returns_copy(self):
+        """Mutating the returned dict should not affect the global registry."""
+        result = get_registered_actions()
+        result["_should_not_persist"] = Mock()
+        assert "_should_not_persist" not in get_registered_actions()
+
+
+# ---------------------------------------------------------------------------
+# SemanticAnalyzer
+# ---------------------------------------------------------------------------
+
+
+class TestSemanticAnalyzer:
+    """Tests for SemanticAnalyzer."""
+
+    def setup_method(self):
+        self.analyzer = SemanticAnalyzer()
+
+    def test_analyze_returns_object_semantics(self):
+        sem = self.analyzer.analyze("mug")
+        assert isinstance(sem, ObjectSemantics)
+        assert sem.label == "mug"
+        assert isinstance(sem.affordance, Affordance)
+
+    def test_analyze_caches_by_default(self):
+        sem1 = self.analyzer.analyze("bottle")
+        sem2 = self.analyzer.analyze("bottle")
+        assert sem1 is sem2
+
+    def test_analyze_bypasses_cache_with_geometry(self):
+        sem1 = self.analyzer.analyze("bottle")
+        sem2 = self.analyzer.analyze(
+            "bottle", geometry={"bounding_box": [0.2, 0.2, 0.2]}
+        )
+        assert sem1 is not sem2
+
+    def test_analyze_no_cache(self):
+        sem1 = self.analyzer.analyze("cup", use_cache=False)
+        sem2 = self.analyzer.analyze("cup", use_cache=False)
+        assert sem1 is not sem2
+
+    def test_clear_cache(self):
+        self.analyzer.analyze("can")
+        self.analyzer.clear_cache()
+        # After clearing, a new object should be created
+        sem1 = self.analyzer.analyze("can")
+        sem2 = self.analyzer.analyze("can")
+        assert sem1 is sem2  # re-cached after clear
+
+
+# ---------------------------------------------------------------------------
+# AtomicActionEngine._resolve_target
+# ---------------------------------------------------------------------------
+
+
+class TestResolveTarget:
+    """Tests for AtomicActionEngine._resolve_target with various input types."""
+
+    def setup_method(self):
+        self.robot = Mock()
+        self.robot.device = torch.device("cpu")
+        self.robot.dof = 6
+        self.robot.get_qpos.return_value = torch.zeros(1, 6)
+        self.robot.get_joint_ids.return_value = list(range(6))
+
+        self.mg = Mock()
+        self.mg.robot = self.robot
+        self.mg.device = torch.device("cpu")
+
+        self.engine = AtomicActionEngine(self.mg, actions_cfg_list=[])
+
+    def test_tensor_passthrough(self):
+        tensor = torch.eye(4)
+        result = self.engine._resolve_target(tensor)
+        assert result is tensor
+
+    def test_object_semantics_passthrough(self):
+        sem = ObjectSemantics(affordance=Affordance(), geometry={})
+        result = self.engine._resolve_target(sem)
+        assert result is sem
+
+    def test_string_resolved_via_semantic_analyzer(self):
+        result = self.engine._resolve_target("mug")
+        assert isinstance(result, ObjectSemantics)
+        assert result.label == "mug"
+
+    def test_dict_with_pose_key(self):
+        pose = torch.eye(4)
+        result = self.engine._resolve_target({"pose": pose})
+        assert result is pose
+
+    def test_dict_with_pose_raises_on_non_tensor(self):
+        with pytest.raises(TypeError, match="must be a torch.Tensor"):
+            self.engine._resolve_target({"pose": "not_a_tensor"})
+
+    def test_dict_with_semantics_key(self):
+        sem = ObjectSemantics(affordance=Affordance(), geometry={}, label="bottle")
+        result = self.engine._resolve_target({"semantics": sem})
+        assert result is sem
+
+    def test_dict_with_semantics_raises_on_wrong_type(self):
+        with pytest.raises(TypeError, match="must be an ObjectSemantics"):
+            self.engine._resolve_target({"semantics": "wrong"})
+
+    def test_dict_with_label_uses_analyzer(self):
+        result = self.engine._resolve_target({"label": "apple"})
+        assert isinstance(result, ObjectSemantics)
+        assert result.label == "apple"
+
+    def test_dict_without_label_raises(self):
+        with pytest.raises(ValueError, match="must provide 'label'"):
+            self.engine._resolve_target({"geometry": {}})
+
+    def test_dict_with_non_string_label_raises(self):
+        with pytest.raises(TypeError, match="must be a string"):
+            self.engine._resolve_target({"label": 123})
+
+    def test_unsupported_type_raises(self):
+        with pytest.raises(TypeError, match="target must be"):
+            self.engine._resolve_target(42)
+
+
+if __name__ == "__main__":
+    test = TestSemanticAnalyzer()
+    test.setup_method()
+    test.test_analyze_returns_object_semantics()
diff --git a/tests/sim/objects/test_articulation.py b/tests/sim/objects/test_articulation.py
index 8140b775..6f2dc692 100644
--- a/tests/sim/objects/test_articulation.py
+++ b/tests/sim/objects/test_articulation.py
@@ -248,6 +248,13 @@ def test_get_joint_drive_with_joint_ids(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
 
 
 class TestArticulationCPU(BaseArticulationTest):
@@ -255,7 +262,6 @@ def setup_method(self):
         self.setup_simulation("cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestArticulationCUDA(BaseArticulationTest):
     def setup_method(self):
         self.setup_simulation("cuda")
diff --git a/tests/sim/objects/test_cloth_object.py b/tests/sim/objects/test_cloth_object.py
index d7182b66..afa182e5 100644
--- a/tests/sim/objects/test_cloth_object.py
+++ b/tests/sim/objects/test_cloth_object.py
@@ -68,7 +68,6 @@ def setup_simulation(self):
             headless=True,
             physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
             sim_device="cuda",
-            enable_rt=False,  # Enable ray tracing for better visuals
             num_envs=4,
             arena_space=3.0,
         )
@@ -133,6 +132,13 @@ def test_get_current_vertex_positions(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
 
 
 class TestSoftObjectCUDA(BaseSoftObjectTest):
diff --git a/tests/sim/objects/test_light.py b/tests/sim/objects/test_light.py
index ac3b70cc..7e9d58c4 100644
--- a/tests/sim/objects/test_light.py
+++ b/tests/sim/objects/test_light.py
@@ -152,3 +152,10 @@ def test_set_and_get_local_pose_matrix_and_vector(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
diff --git a/tests/sim/objects/test_rigid_object.py b/tests/sim/objects/test_rigid_object.py
index 55bc73a9..5beebe26 100644
--- a/tests/sim/objects/test_rigid_object.py
+++ b/tests/sim/objects/test_rigid_object.py
@@ -29,6 +29,8 @@
 from embodichain.data import get_data_path
 from dexsim.types import ActorType
 
+from embodichain.lab.sim.cfg import RenderCfg, RigidObjectCfg
+
 DUCK_PATH = "ToyDuck/toy_duck.glb"
 TABLE_PATH = "ShopTableSimple/shop_table_simple.ply"
 CHAIR_PATH = "Chair/chair.glb"
@@ -44,7 +46,7 @@ def setup_simulation(self, sim_device):
             headless=True, sim_device=sim_device, num_envs=NUM_ARENAS
         )
         self.sim = SimulationManager(config)
-
+        self.sim.enable_physics(False)
         duck_path = get_data_path(DUCK_PATH)
         assert os.path.isfile(duck_path)
         table_path = get_data_path(TABLE_PATH)
@@ -235,6 +237,44 @@ def test_set_velocity(self):
             duck_ang_vel, ang_vel
         ), f"Angular velocity not set correctly: expected {ang_vel}, got {duck_ang_vel}"
 
+    def test_get_acceleration(self):
+        """Test that lin_acc, ang_acc, and acc return correct shapes and values."""
+
+        # Apply a force to generate non-zero acceleration
+        force = (
+            torch.tensor([10.0, 0.0, 0.0], device=self.sim.device)
+            .unsqueeze(0)
+            .repeat(NUM_ARENAS, 1)
+        )
+        self.duck.add_force_torque(force=force)
+        self.sim.update(0.01)
+
+        # Read back accelerations
+        duck_lin_acc = self.duck.body_data.lin_acc
+        duck_ang_acc = self.duck.body_data.ang_acc
+        duck_acc = self.duck.body_data.acc
+
+        assert duck_lin_acc.shape == (
+            NUM_ARENAS,
+            3,
+        ), f"Linear acceleration shape mismatch: expected ({NUM_ARENAS}, 3), got {duck_lin_acc.shape}"
+        assert duck_ang_acc.shape == (
+            NUM_ARENAS,
+            3,
+        ), f"Angular acceleration shape mismatch: expected ({NUM_ARENAS}, 3), got {duck_ang_acc.shape}"
+        assert duck_acc.shape == (
+            NUM_ARENAS,
+            6,
+        ), f"Concatenated acceleration shape mismatch: expected ({NUM_ARENAS}, 6), got {duck_acc.shape}"
+
+        # Verify concatenated acceleration matches individual components
+        assert torch.allclose(
+            duck_acc[:, :3], duck_lin_acc
+        ), "First 3 columns of acc should match lin_acc"
+        assert torch.allclose(
+            duck_acc[:, 3:], duck_ang_acc
+        ), "Last 3 columns of acc should match ang_acc"
+
     def test_set_visual_material(self):
         """Test that set_material correctly assigns the material to the duck."""
 
@@ -541,6 +581,13 @@ def test_misc_properties(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
 
 
 class TestRigidObjectCPU(BaseRigidObjectTest):
@@ -548,7 +595,6 @@ def setup_method(self):
         self.setup_simulation("cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestRigidObjectCUDA(BaseRigidObjectTest):
     def setup_method(self):
         self.setup_simulation("cuda")
diff --git a/tests/sim/objects/test_rigid_object_group.py b/tests/sim/objects/test_rigid_object_group.py
index b6802743..896f5ad3 100644
--- a/tests/sim/objects/test_rigid_object_group.py
+++ b/tests/sim/objects/test_rigid_object_group.py
@@ -119,6 +119,13 @@ def test_set_visible(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
 
 
 class TestRigidObjectGroupCPU(BaseRigidObjectGroupTest):
@@ -126,7 +133,6 @@ def setup_method(self):
         self.setup_simulation("cpu")
 
 
-# TODO: Fix CUDA tests issue.
 @pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestRigidObjectGroupCUDA(BaseRigidObjectGroupTest):
     def setup_method(self):
diff --git a/tests/sim/objects/test_robot.py b/tests/sim/objects/test_robot.py
index 784aeaee..83b1414d 100644
--- a/tests/sim/objects/test_robot.py
+++ b/tests/sim/objects/test_robot.py
@@ -24,7 +24,6 @@
 from embodichain.lab.sim.robots.dexforce_w1 import DexforceW1Cfg
 from embodichain.data import get_data_path
 
-
 # Define control parts
 CONTROL_PARTS = {
     "left_arm": [
@@ -50,10 +49,13 @@
 
 # Base test class for CPU and CUDA
 class BaseRobotTest:
-    def setup_simulation(self, sim_device):
+    @classmethod
+    def setup_simulation(cls, sim_device):
+        if hasattr(cls, "sim"):
+            return
         # Set up simulation with specified device (CPU or CUDA)
         config = SimulationManagerCfg(headless=True, sim_device=sim_device, num_envs=10)
-        self.sim = SimulationManager(config)
+        cls.sim = SimulationManager(config)
 
         cfg = DexforceW1Cfg.from_dict(
             {
@@ -63,11 +65,11 @@ def setup_simulation(self, sim_device):
             }
         )
 
-        self.robot: Robot = self.sim.add_robot(cfg=cfg)
+        cls.robot: Robot = cls.sim.add_robot(cfg=cfg)
 
         # Initialize GPU physics if needed
-        if sim_device == "cuda" and getattr(self.sim, "is_use_gpu_physics", False):
-            self.sim.init_gpu_physics()
+        if sim_device == "cuda" and getattr(cls.sim, "is_use_gpu_physics", False):
+            cls.sim.init_gpu_physics()
 
     def test_get_joint_ids(self):
         left_joint_ids = self.robot.get_joint_ids("left_arm")
@@ -139,6 +141,7 @@ def test_compute_fk(self):
                 ],
             ],
             dtype=torch.float32,
+            device=self.sim.device,
         ).unsqueeze_(0)
 
         assert torch.allclose(
@@ -287,8 +290,20 @@ def test_robot_cfg_merge(self):
         ), "Solver config merge failed."
 
     def teardown_method(self):
-        """Clean up resources after each test method."""
-        self.sim.destroy()
+        pass
+
+    @classmethod
+    def teardown_class(cls):
+        """Clean up resources after each test class."""
+        if hasattr(cls, "sim"):
+            cls.sim.destroy()
+            import embodichain.lab.sim as om
+
+            om.SimulationManager.flush_cleanup_queue()
+            del cls.sim
+            import gc
+
+            gc.collect()
 
     def test_set_physical_visible(self):
         self.robot.set_physical_visible(
@@ -311,7 +326,6 @@ def setup_method(self):
         self.setup_simulation("cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestRobotCUDA(BaseRobotTest):
     def setup_method(self):
         self.setup_simulation("cuda")
@@ -319,6 +333,6 @@ def setup_method(self):
 
 if __name__ == "__main__":
     # Run tests directly
-    test_cpu = TestRobotCPU()
+    test_cpu = TestRobotCUDA()
     test_cpu.setup_method()
-    test_cpu.test_fk("left_arm")
+    test_cpu.test_compute_jacobian()
diff --git a/tests/sim/objects/test_soft_object.py b/tests/sim/objects/test_soft_object.py
index b3955d88..06b3c1dc 100644
--- a/tests/sim/objects/test_soft_object.py
+++ b/tests/sim/objects/test_soft_object.py
@@ -18,6 +18,7 @@
 from dexsim.utility.path import get_resources_data_path
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     SoftbodyVoxelAttributesCfg,
     SoftbodyPhysicalAttributesCfg,
 )
@@ -39,7 +40,6 @@ def setup_simulation(self):
             headless=True,
             physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
             sim_device="cuda",
-            enable_rt=False,  # Enable ray tracing for better visuals
             num_envs=4,
             arena_space=3.0,
         )
@@ -91,6 +91,13 @@ def test_remove(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
+
+        gc.collect()
 
 
 class TestSoftObjectCUDA(BaseSoftObjectTest):
diff --git a/tests/sim/objects/test_usd.py b/tests/sim/objects/test_usd.py
index 350c9daf..a5558a39 100644
--- a/tests/sim/objects/test_usd.py
+++ b/tests/sim/objects/test_usd.py
@@ -23,6 +23,7 @@
 )
 from embodichain.lab.sim.objects import Articulation, RigidObject
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     ArticulationCfg,
     RigidObjectCfg,
     JointDrivePropertiesCfg,
@@ -39,7 +40,9 @@ class BaseUsdTest:
 
     def setup_simulation(self, sim_device):
         config = SimulationManagerCfg(
-            headless=True, sim_device=sim_device, num_envs=NUM_ARENAS, enable_rt=False
+            headless=True,
+            sim_device=sim_device,
+            num_envs=NUM_ARENAS,
         )
         self.sim = SimulationManager(config)
 
@@ -166,8 +169,16 @@ def export_usd(self):
     def teardown_method(self):
         """Clean up resources after each test method."""
         self.sim.destroy()
+        import embodichain.lab.sim as om
 
+        om.SimulationManager.flush_cleanup_queue()
+        self.__dict__.clear()
+        import gc
 
+        gc.collect()
+
+
+@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestUsdCPU(BaseUsdTest):
     def setup_method(self):
         self.setup_simulation("cpu")
diff --git a/tests/sim/planners/test_motion_generator.py b/tests/sim/planners/test_motion_generator.py
index 511189d6..300d191b 100644
--- a/tests/sim/planners/test_motion_generator.py
+++ b/tests/sim/planners/test_motion_generator.py
@@ -33,6 +33,7 @@
     MoveType,
     MovePart,
 )
+from embodichain.lab.sim.cfg import RenderCfg
 
 
 def to_numpy(tensor):
@@ -45,8 +46,10 @@ def to_numpy(tensor):
 
 
 class BaseTestMotionGenerator(object):
-    @classmethod
-    def setup_class(cls):
+    def setup_simulation(self):
+        cls = type(self)
+        if hasattr(cls, "robot_sim"):
+            return
         cls.config = SimulationManagerCfg(headless=True, sim_device="cpu")
         cls.robot_sim = SimulationManager(cls.config)
         cls.robot_sim.set_manual_update(False)
@@ -157,11 +160,15 @@ def _execute_trajectory(self, qpos_list, forward=True, delay=0.01):
 
     @classmethod
     def teardown_class(cls):
-        try:
+        if hasattr(cls, "robot_sim"):
             cls.robot_sim.destroy()
-            print("robot_sim destroyed successfully")
-        except Exception as e:
-            print(f"Error during robot_sim.destroy(): {e}")
+            import embodichain.lab.sim as om
+
+            om.SimulationManager.flush_cleanup_queue()
+            del cls.robot_sim
+            import gc
+
+            gc.collect()
 
     def _execute_forward_trajectory(self, robot, qpos_list, delay=0.1):
         """Helper method to execute trajectory"""
@@ -183,6 +190,12 @@ def _execute_backward_trajectory(self, robot, qpos_list, delay=0.1):
 class TestMotionGenerator(BaseTestMotionGenerator):
     """Test suite for MotionGenerator trajectory generation"""
 
+    def setup_method(self):
+        self.setup_simulation()
+
+    def teardown_method(self):
+        pass
+
     @pytest.mark.parametrize("is_linear", [True, False])
     def test_create_trajectory_with_xpos(self, is_linear):
         """Test trajectory generation with cartesian positions"""
diff --git a/tests/sim/planners/test_toppra_planner.py b/tests/sim/planners/test_toppra_planner.py
index d46f7e12..604581df 100644
--- a/tests/sim/planners/test_toppra_planner.py
+++ b/tests/sim/planners/test_toppra_planner.py
@@ -17,11 +17,14 @@
 from embodichain.lab.sim.planners.utils import PlanState, TrajectorySampleMethod
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.robots import CobotMagicCfg
+from embodichain.lab.sim.cfg import RenderCfg
 
 
 class TestToppraPlanner:
-    @classmethod
-    def setup_class(cls):
+    def setup_simulation(self):
+        cls = type(self)
+        if hasattr(cls, "sim"):
+            return
         cls.sim_config = SimulationManagerCfg(headless=True, sim_device="cpu")
         cls.sim = SimulationManager(cls.sim_config)
 
@@ -32,16 +35,28 @@ def setup_class(cls):
         }
         cls.robot = cls.sim.add_robot(cfg=CobotMagicCfg.from_dict(cfg_dict))
 
-    @classmethod
-    def teardown_class(cls):
-        cls.sim.destroy()
-
     def setup_method(self):
+        self.setup_simulation()
         cfg = ToppraPlannerCfg(
             robot_uid="CobotMagic_toppra",
         )
         self.planner = ToppraPlanner(cfg=cfg)
 
+    def teardown_method(self):
+        pass
+
+    @classmethod
+    def teardown_class(cls):
+        if hasattr(cls, "sim"):
+            cls.sim.destroy()
+            import embodichain.lab.sim as om
+
+            om.SimulationManager.flush_cleanup_queue()
+            del cls.sim
+            import gc
+
+            gc.collect()
+
     def test_initialization(self):
         assert self.planner.device == torch.device("cpu")
 
diff --git a/tests/sim/sensors/test_camera.py b/tests/sim/sensors/test_camera.py
index 0a70d35a..d95f0c4f 100644
--- a/tests/sim/sensors/test_camera.py
+++ b/tests/sim/sensors/test_camera.py
@@ -23,19 +23,21 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.sensors import Camera, SensorCfg, CameraCfg
 from embodichain.lab.sim.objects import Articulation
-from embodichain.lab.sim.cfg import ArticulationCfg
+from embodichain.lab.sim.cfg import ArticulationCfg, RenderCfg
 from embodichain.data import get_data_path
 
-
 NUM_ENVS = 4
 ART_PATH = "SlidingBoxDrawer/SlidingBoxDrawer.urdf"
 
 
 class CameraTest:
-    def setup_simulation(self, sim_device, enable_rt):
+    def setup_simulation(self, sim_device, renderer="hybrid"):
         # Setup SimulationManager
         config = SimulationManagerCfg(
-            headless=True, sim_device=sim_device, enable_rt=enable_rt, num_envs=NUM_ENVS
+            headless=True,
+            sim_device=sim_device,
+            render_cfg=RenderCfg(renderer=renderer),
+            num_envs=NUM_ENVS,
         )
         self.sim = SimulationManager(config)
         # Create batch of cameras
@@ -137,30 +139,46 @@ def test_set_intrinsics(self):
 
     def teardown_method(self):
         """Clean up resources after each test method."""
-        self.sim.destroy()
+        if (
+            hasattr(self, "camera")
+            and getattr(self.camera, "uid", None) is not None
+            and hasattr(self, "sim")
+        ):
+            self.sim.remove_asset(self.camera.uid)
+        if hasattr(self, "sim"):
+            self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        import gc
 
+        gc.collect()
 
-class TestCameraRaster(CameraTest):
+
+class TestCameraHybrid(CameraTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=False)
+
+        self.setup_simulation("cpu", renderer="hybrid")
 
 
-class TestCameraRaster(CameraTest):
+class TestCameraHybridCUDA(CameraTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=False)
+
+        self.setup_simulation("cuda", renderer="hybrid")
 
 
 class TestCameraFastRT(CameraTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=True)
+        self.setup_simulation("cpu", renderer="fast-rt")
 
 
-class TestCameraFastRT(CameraTest):
+class TestCameraFastRTCUDA(CameraTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=True)
+
+        self.setup_simulation("cuda", renderer="fast-rt")
 
 
 if __name__ == "__main__":
-    test = CameraTest()
-    test.setup_simulation("cpu", enable_rt=False)
+    test = TestCameraFastRT()
+    test.setup_method()
     test.test_attach_to_parent()
diff --git a/tests/sim/sensors/test_contact.py b/tests/sim/sensors/test_contact.py
index 07ad6c9a..aa38fc22 100644
--- a/tests/sim/sensors/test_contact.py
+++ b/tests/sim/sensors/test_contact.py
@@ -23,6 +23,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.cfg import (
+    RenderCfg,
     RigidBodyAttributesCfg,
 )
 from embodichain.lab.sim.sensors import (
@@ -38,7 +39,7 @@
 
 
 class ContactTest:
-    def setup_simulation(self, sim_device, enable_rt):
+    def setup_simulation(self, sim_device, renderer="hybrid"):
         sim_cfg = SimulationManagerCfg(
             width=1920,
             height=1080,
@@ -46,7 +47,7 @@ def setup_simulation(self, sim_device, enable_rt):
             headless=True,
             physics_dt=1.0 / 100.0,  # Physics timestep (100 Hz)
             sim_device=sim_device,
-            enable_rt=enable_rt,  # Enable ray tracing for better visuals
+            render_cfg=RenderCfg(renderer=renderer),
         )
 
         # Create the simulation instance
@@ -63,9 +64,9 @@ def setup_simulation(self, sim_device, enable_rt):
         contact_filter_art_cfg.link_name_list = ["finger1_link", "finger2_link"]
         contact_filter_cfg.articulation_cfg_list = [contact_filter_art_cfg]
         contact_filter_cfg.filter_need_both_actor = True
-        self.contact_sensor = self.sim.add_sensor(sensor_cfg=contact_filter_cfg)
 
         self.to_grasp_pose(cube2)
+        self.contact_sensor = self.sim.add_sensor(sensor_cfg=contact_filter_cfg)
 
     def create_cube(self, uid: str, position: list = (0.0, 0.0, 0)) -> RigidObject:
         """create cube
@@ -78,7 +79,7 @@ def create_cube(self, uid: str, position: list = (0.0, 0.0, 0)) -> RigidObject:
         Returns:
             RigidObject: rigid object
         """
-        cube_size = (0.025, 0.025, 0.025)
+        cube_size = (0.05, 0.05, 0.05)
         cube: RigidObject = self.sim.add_rigid_object(
             cfg=RigidObjectCfg(
                 uid=uid,
@@ -175,12 +176,14 @@ def to_grasp_pose(self, cube: RigidObject):
         approach_xpos = target_xpos.clone()
         approach_xpos[:, 2, 3] += 0.1
 
-        is_success, approach_qpos = self.robot.compute_ik(
+        is_success_approach, approach_qpos = self.robot.compute_ik(
             pose=approach_xpos, joint_seed=rest_arm_qpos, name="arm"
         )
-        is_success, target_qpos = self.robot.compute_ik(
+        print(f"Approach IK success: {is_success_approach}")
+        is_success_target, target_qpos = self.robot.compute_ik(
             pose=target_xpos, joint_seed=approach_qpos, name="arm"
         )
+        print(f"Target IK success: {is_success_target}")
         self.robot.set_qpos(approach_qpos, joint_ids=arm_ids)
         self.sim.update(step=40)
 
@@ -192,11 +195,22 @@ def to_grasp_pose(self, cube: RigidObject):
             .repeat(self.sim.num_envs, 1)
         )
         self.robot.set_qpos(hand_close_qpos, joint_ids=gripper_ids)
-        self.sim.update(step=20)
+        self.sim.update(step=200)
+
+        finger1_pose = self.robot.get_link_pose("finger1_link")
+        finger2_pose = self.robot.get_link_pose("finger2_link")
+        cube_pose = cube.get_local_pose()
+        print(f"Finger 1 pose: {finger1_pose[0][:3]}")
+        print(f"Finger 2 pose: {finger2_pose[0][:3]}")
+        print(f"Cube pose at end of grasp: {cube_pose[0][:3]}")
 
     def test_fetch_contact(self):
-        self.sim.update(step=1)
-        self.contact_sensor.update()
+        # In a test suite, run multiple steps until contact is actually detected
+        for i in range(50):
+            self.sim.update(step=20)
+            self.contact_sensor.update()
+            if getattr(self.contact_sensor, "total_current_contacts", 0) > 0:
+                break
         contact_report = self.contact_sensor.get_data()
 
         # Check that contact data has correct shape (num_envs, max_contacts_per_env, ...)
@@ -230,7 +244,13 @@ def test_fetch_contact(self):
         finger1_user_ids = (
             self.sim.get_robot("UR10_PGI").get_user_ids("finger1_link").reshape(-1)
         )
-        filter_user_ids = torch.cat([cube2_user_ids, finger1_user_ids])
+        filter_user_ids = torch.cat(
+            [
+                cube2_user_ids,
+                self.sim.get_robot("UR10_PGI").get_user_ids("finger1_link").reshape(-1),
+                self.sim.get_robot("UR10_PGI").get_user_ids("finger2_link").reshape(-1),
+            ]
+        )
         filter_contact_report = self.contact_sensor.filter_by_user_ids(filter_user_ids)
         n_filtered_contact = filter_contact_report["position"].shape[0]
         assert n_filtered_contact > 0, "No contact detected between gripper and cube."
@@ -241,27 +261,46 @@ def test_fetch_contact(self):
 
     def teardown_method(self):
         """Clean up resources after each test method."""
-        self.sim.destroy()
+        if (
+            hasattr(self, "contact_sensor")
+            and getattr(self.contact_sensor, "uid", None) is not None
+            and hasattr(self, "sim")
+        ):
+            self.sim.remove_asset(self.contact_sensor.uid)
+        if hasattr(self, "sim"):
+            self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        import gc
 
+        gc.collect()
 
-class TestContactRaster(ContactTest):
+
+class TestContactHybrid(ContactTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=False)
+
+        self.setup_simulation("cpu", renderer="hybrid")
 
 
-class TestContactRasterCuda(ContactTest):
+@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
+class TestContactHybridCuda(ContactTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=False)
+
+        self.setup_simulation("cuda", renderer="hybrid")
 
 
 class TestContactFastRT(ContactTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=True)
 
+        self.setup_simulation("cpu", renderer="fast-rt")
 
-class TestContactFastRTCuda(ContactTest):
+
+@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
+class TestContactFastRTCUDA(ContactTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=True)
+
+        self.setup_simulation("cuda", renderer="fast-rt")
 
 
 def test_contact_sensor_from_dict():
@@ -295,6 +334,6 @@ def test_contact_sensor_from_dict():
 
 
 if __name__ == "__main__":
-    test = ContactTest()
-    test.setup_simulation("cuda", enable_rt=True)
+    test = TestContactHybridCuda()
+    test.setup_simulation("cuda", renderer="hybrid")
     test.test_fetch_contact()
diff --git a/tests/sim/sensors/test_stereo.py b/tests/sim/sensors/test_stereo.py
index d74b9f77..58c5caed 100644
--- a/tests/sim/sensors/test_stereo.py
+++ b/tests/sim/sensors/test_stereo.py
@@ -16,18 +16,22 @@
 
 import pytest
 import torch
+
+from embodichain.lab.sim.cfg import RenderCfg
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.sensors import StereoCamera, SensorCfg
 
-
 NUM_ENVS = 4
 
 
 class StereoCameraTest:
-    def setup_simulation(self, sim_device, enable_rt):
+    def setup_simulation(self, sim_device, renderer="hybrid"):
         # Setup SimulationManager
         config = SimulationManagerCfg(
-            headless=True, sim_device=sim_device, enable_rt=enable_rt, num_envs=NUM_ENVS
+            headless=True,
+            sim_device=sim_device,
+            num_envs=NUM_ENVS,
+            render_cfg=RenderCfg(renderer=renderer),
         )
         self.sim = SimulationManager(config)
         # Create batch of cameras
@@ -138,24 +142,41 @@ def test_set_intrinsics(self):
 
     def teardown_method(self):
         """Clean up resources after each test method."""
-        self.sim.destroy()
+        if (
+            hasattr(self, "camera")
+            and getattr(self.camera, "uid", None) is not None
+            and hasattr(self, "sim")
+        ):
+            self.sim.remove_asset(self.camera.uid)
+        if hasattr(self, "sim"):
+            self.sim.destroy()
+        import embodichain.lab.sim as om
+
+        om.SimulationManager.flush_cleanup_queue()
+        import gc
 
+        gc.collect()
 
-class TestStereoCameraRaster(StereoCameraTest):
+
+class TestStereoCameraHybrid(StereoCameraTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=False)
+
+        self.setup_simulation("cpu", renderer="hybrid")
 
 
-class TestStereoCameraRaster(StereoCameraTest):
+class TestStereoCameraHybridCUDA(StereoCameraTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=False)
+
+        self.setup_simulation("cuda", renderer="hybrid")
 
 
 class TestStereoCameraFastRT(StereoCameraTest):
     def setup_method(self):
-        self.setup_simulation("cpu", enable_rt=True)
 
+        self.setup_simulation("cpu", renderer="fast-rt")
 
-class TestStereoCameraFastRT(StereoCameraTest):
+
+class TestStereoCameraFastRTCUDA(StereoCameraTest):
     def setup_method(self):
-        self.setup_simulation("cuda", enable_rt=True)
+
+        self.setup_simulation("cuda", renderer="fast-rt")
diff --git a/tests/sim/solvers/test_differential_solver.py b/tests/sim/solvers/test_differential_solver.py
index ace1c5d1..0e22a567 100644
--- a/tests/sim/solvers/test_differential_solver.py
+++ b/tests/sim/solvers/test_differential_solver.py
@@ -21,7 +21,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
-from embodichain.lab.sim.cfg import RobotCfg
+from embodichain.lab.sim.cfg import RobotCfg, RenderCfg
 from embodichain.data import get_data_path
 
 
diff --git a/tests/sim/solvers/test_opw_solver.py b/tests/sim/solvers/test_opw_solver.py
index fe04f4b4..7dae255d 100644
--- a/tests/sim/solvers/test_opw_solver.py
+++ b/tests/sim/solvers/test_opw_solver.py
@@ -21,6 +21,7 @@
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
 from embodichain.lab.sim.robots import CobotMagicCfg
+from embodichain.lab.sim.cfg import RenderCfg
 
 
 def grid_sample_qpos_from_limits(
@@ -28,6 +29,7 @@ def grid_sample_qpos_from_limits(
     steps_per_joint: int = 4,
     device=None,
     max_samples: int = 4096,
+    safe_margin: float = 5 / 180 * np.pi,  # 5 degrees in radians
 ) -> torch.Tensor:
     """Generate grid samples for qpos from qpos_limits.
 
@@ -44,8 +46,8 @@ def grid_sample_qpos_from_limits(
         device = qpos_limits.device
 
     limits = qpos_limits.squeeze(0) if qpos_limits.dim() == 3 else qpos_limits
-    lows = limits[:, 0].to(device)
-    highs = limits[:, 1].to(device)
+    lows = limits[:, 0].to(device) + safe_margin * 1.01
+    highs = limits[:, 1].to(device) - safe_margin * 1.01
 
     # create per-joint linspaces
     grids = [
@@ -97,13 +99,26 @@ def setup_simulation(self, sim_device):
                     "class_type": "OPWSolver",
                     "end_link_name": "left_link6",
                     "root_link_name": "left_arm_base",
-                    "tcp": [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.143], [0, 0, 0, 1]],
+                    "tcp": [
+                        [0, 0, -1, 0],
+                        [0, 1, 0, 0],
+                        [1, 0, 0, 0.143],
+                        [0, 0, 0, 1],
+                    ],
+                    "qpos_limits": [
+                        [-2.618, 0.0, -2.967, -1.745, -1.22, -2.0944],
+                        [2.618, 3.14159, 0.0, 1.745, 1.22, 2.0944],
+                    ],
                 },
                 "right_arm": {
                     "class_type": "OPWSolver",
                     "end_link_name": "right_link6",
                     "root_link_name": "right_arm_base",
                     "tcp": [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.143], [0, 0, 0, 1]],
+                    "qpos_limits": [
+                        [-2.618, 0.0, -2.967, -1.745, -1.22, -2.0944],
+                        [2.618, 3.14159, 0.0, 1.745, 1.22, 2.0944],
+                    ],
                 },
             },
         }
@@ -165,7 +180,7 @@ def test_ik(self, arm_name: str):
             device=self.robot.device,
         )
         res, ik_qpos = self.robot.compute_ik(
-            pose=invalid_pose, joint_seed=ik_qpos, name=arm_name
+            pose=invalid_pose, joint_seed=ik_qpos[:, 0, :], name=arm_name
         )
         dof = ik_qpos.shape[-1]
         assert res[0] == False
@@ -181,7 +196,6 @@ def setup_method(self):
         self.setup_simulation("cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestOPWSolverCUDA(BaseSolverTest):
     def setup_method(self):
         self.setup_simulation("cuda")
diff --git a/tests/sim/solvers/test_pink_solver.py b/tests/sim/solvers/test_pink_solver.py
index a8fda5fd..d5589fde 100644
--- a/tests/sim/solvers/test_pink_solver.py
+++ b/tests/sim/solvers/test_pink_solver.py
@@ -21,7 +21,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
-from embodichain.lab.sim.cfg import RobotCfg
+from embodichain.lab.sim.cfg import RobotCfg, RenderCfg
 from embodichain.data import get_data_path
 
 
diff --git a/tests/sim/solvers/test_pinocchio_solver.py b/tests/sim/solvers/test_pinocchio_solver.py
index 34c91c47..698cb1f9 100644
--- a/tests/sim/solvers/test_pinocchio_solver.py
+++ b/tests/sim/solvers/test_pinocchio_solver.py
@@ -21,7 +21,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
-from embodichain.lab.sim.cfg import RobotCfg
+from embodichain.lab.sim.cfg import RobotCfg, RenderCfg
 from embodichain.data import get_data_path
 
 
diff --git a/tests/sim/solvers/test_pytorch_solver.py b/tests/sim/solvers/test_pytorch_solver.py
index 5339c130..64bafee8 100644
--- a/tests/sim/solvers/test_pytorch_solver.py
+++ b/tests/sim/solvers/test_pytorch_solver.py
@@ -21,8 +21,48 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
-from embodichain.lab.sim.cfg import RobotCfg
+from embodichain.lab.sim.cfg import RobotCfg, RenderCfg
 from embodichain.data import get_data_path
+from embodichain.utils.utility import reset_all_seeds
+
+
+def grid_sample_qpos_from_limits(
+    qpos_limits: torch.Tensor,
+    steps_per_joint: int = 4,
+    device=None,
+    max_samples: int = 4096,
+) -> torch.Tensor:
+    """Generate grid samples for qpos from qpos_limits.
+
+    Args:
+        qpos_limits: tensor of shape (1, n, 2) or (n, 2) where each row is [low, high].
+        steps_per_joint: number of values per joint (defaults to 2: low and high).
+        device: torch device to place the samples on.
+        max_samples: cap the number of returned samples (take first N if grid is larger).
+
+    Returns:
+        Tensor of shape (N, n) where N <= max_samples.
+    """
+    if device is None:
+        device = qpos_limits.device
+
+    limits = qpos_limits.squeeze(0) if qpos_limits.dim() == 3 else qpos_limits
+    lows = limits[:, 0].to(device) + 1e-2
+    highs = limits[:, 1].to(device) - 1e-2
+
+    # create per-joint linspaces
+    grids = [
+        torch.linspace(l.item(), h.item(), steps_per_joint, device=device)
+        for l, h in zip(lows, highs)
+    ]
+
+    # meshgrid and stack
+    mesh = torch.meshgrid(*grids, indexing="ij")
+    stacked = torch.stack([m.reshape(-1) for m in mesh], dim=1)
+
+    if stacked.shape[0] > max_samples:
+        return stacked[:max_samples]
+    return stacked
 
 
 # Base test class for CPU and CUDA
@@ -50,11 +90,13 @@ def setup_simulation(self, solver_type: str):
                     "end_link_name": "left_ee",
                     "root_link_name": "left_arm_base",
                     "ik_nearest_weight": [1.0, 1.0, 1.0, 0.9, 0.9, 0.1, 0.1],
+                    "num_samples": 30,
                 },
                 "right_arm": {
                     "class_type": solver_type,
                     "end_link_name": "right_ee",
                     "root_link_name": "right_arm_base",
+                    "num_samples": 30,
                 },
             },
         }
@@ -66,27 +108,46 @@ def setup_simulation(self, solver_type: str):
 
     @pytest.mark.parametrize("arm_name", ["left_arm", "right_arm"])
     def test_ik(self, arm_name: str):
-        # Test inverse kinematics (IK) with a 1x4x4 homogeneous matrix pose and a joint_seed
+        reset_all_seeds(0)
+        qpos_limit = torch.tensor(
+            [
+                [0.2, 0.8],
+                [0.2, 0.8],
+                [0.2, 0.8],
+                [0.2, 0.8],
+                [0.2, 0.8],
+                [0.2, 0.8],
+                [0.2, 0.8],
+            ]
+        )
+        # generate a small grid of qpos samples from the joint limits (low/high)
+        sample_qpos = grid_sample_qpos_from_limits(
+            qpos_limit, steps_per_joint=3, device=self.robot.device, max_samples=200
+        )
+        sample_qpos = sample_qpos[None, :, :]
 
-        qpos_fk = torch.tensor(
-            [[0.0, 0.0, 0.0, -np.pi / 4, 0.0, 0.0, 0.0]], dtype=torch.float32
+        fk_xpos = self.robot.compute_batch_fk(
+            qpos=sample_qpos, name=arm_name, to_matrix=True
+        )
+        fk_xpos_xyzquat = self.robot.compute_batch_fk(
+            qpos=sample_qpos, name=arm_name, to_matrix=False
         )
 
-        fk_xpos = self.robot.compute_fk(qpos=qpos_fk, name=arm_name, to_matrix=True)
+        res, ik_qpos = self.robot.compute_batch_ik(
+            pose=fk_xpos, joint_seed=sample_qpos, name=arm_name
+        )
 
-        res, ik_qpos = self.robot.compute_ik(pose=fk_xpos, name=arm_name)
+        res, ik_qpos_xyzquat = self.robot.compute_batch_ik(
+            pose=fk_xpos_xyzquat, joint_seed=sample_qpos, name=arm_name
+        )
 
-        if ik_qpos.dim() == 3:
-            ik_xpos = self.robot.compute_fk(
-                qpos=ik_qpos[0][0], name=arm_name, to_matrix=True
-            )
-        else:
-            ik_xpos = self.robot.compute_fk(qpos=ik_qpos, name=arm_name, to_matrix=True)
+        ik_xpos = self.robot.compute_batch_fk(
+            qpos=ik_qpos_xyzquat, name=arm_name, to_matrix=True
+        )
 
         assert torch.allclose(
-            fk_xpos, ik_xpos, atol=1e-2, rtol=1e-2
-        ), f"FK and IK results do not match for {arm_name}"
-
+            fk_xpos, ik_xpos, atol=5e-3, rtol=5e-3
+        ), f"FK and IK xpos do not match for {arm_name}"
         # test for failed xpos
         invalid_pose = torch.tensor(
             [
@@ -101,10 +162,10 @@ def test_ik(self, arm_name: str):
             device=self.robot.device,
         )
         res, ik_qpos = self.robot.compute_ik(
-            pose=invalid_pose, joint_seed=ik_qpos, name=arm_name
+            pose=invalid_pose, joint_seed=ik_qpos[:, 0, :], name=arm_name
         )
         dof = ik_qpos.shape[-1]
-        assert res[0] == False
+        assert res[0].item() == False
         assert ik_qpos.shape == (1, dof)
 
     def teardown_method(self):
diff --git a/tests/sim/solvers/test_srs_solver.py b/tests/sim/solvers/test_srs_solver.py
index a4a375ed..cfd970e0 100644
--- a/tests/sim/solvers/test_srs_solver.py
+++ b/tests/sim/solvers/test_srs_solver.py
@@ -21,7 +21,7 @@
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot
-from embodichain.lab.sim.cfg import RobotCfg
+from embodichain.lab.sim.cfg import RobotCfg, RenderCfg
 from embodichain.data import get_data_path
 
 from embodichain.lab.sim.solvers.srs_solver import SRSSolver, SRSSolverCfg
@@ -73,7 +73,7 @@ def setup_solver(self, solver_type: str, device: str = "cpu"):
             )
             cfg.urdf_path = urdf
             cfg.dh_params = arm_params.dh_params
-            cfg.qpos_limits = arm_params.qpos_limits
+            cfg.user_qpos_limits = arm_params.qpos_limits
             cfg.T_e_oe = arm_params.T_e_oe
             cfg.T_b_ob = arm_params.T_b_ob
             cfg.link_lengths = arm_params.link_lengths
@@ -289,7 +289,6 @@ def setup_method(self):
         self.setup_simulation(solver_type="SRSSolver", device="cpu")
 
 
-@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
 class TestSRSCUDARobotSolver(BaseRobotSolverTest):
     def setup_method(self):
         self.setup_simulation(solver_type="SRSSolver", device="cuda")
diff --git a/tests/toolkits/test_batch_convex_collision.py b/tests/toolkits/test_batch_convex_collision.py
index 4bf852c8..291e15e1 100644
--- a/tests/toolkits/test_batch_convex_collision.py
+++ b/tests/toolkits/test_batch_convex_collision.py
@@ -60,9 +60,11 @@ def batch_convex_collision_query(device=torch.device("cuda")):
     obj_faces = torch.tensor(obj_mesh.faces, dtype=torch.int32, device=device)
     test_pc = transform_points_mat(obj_verts, poses)
 
-    is_pose_collide, pose_surface_distance = collision_checker.query_batch_points(
+    is_point_collide, point_surface_distance = collision_checker.query_batch_points(
         test_pc, collision_threshold=0.003, is_visual=False
     )
+    is_pose_collide = is_point_collide.any(dim=1)
+    pose_surface_distance = point_surface_distance.min(dim=1).values
     assert is_pose_collide.sum().item() == 1
     assert abs(pose_surface_distance.max().item() - 0.8492) < 1e-2