From f29967f6eee5015fb9ed82c489dbbb60aa5b580a Mon Sep 17 00:00:00 2001
From: "fangyaozheng@bytedance.com" <fangyaozheng@bytedance.com>
Date: Mon, 15 Jun 2026 19:39:42 +0800
Subject: [PATCH 1/2] feat(harness): surface skill-load errors + per-invoke
 max_llm_calls

Skill loading no longer silently skips failures (which left the agent with no
skills while the model was told it had them). build_skill_toolset now fast-fails
with a SkillLoadError naming the skill and reason:
- a base skill that fails aborts server startup (deploy surfaces the bad config);
- a once-time override skill that fails is caught in /harness/invoke and its
  reason is returned to the caller (HTTP 200, in the response output).

Add max_llm_calls, threaded into the runner's RunConfig:
- a harness default (harness.yaml -> MAX_LLM_CALLS env -> HarnessConfig), and
- a per-invocation override via run_agent_request.max_llm_calls.
CLI: 'veadk harness add --max-llm-calls' and 'veadk harness invoke --max-llm-calls'.

Verified locally: bad skill returns its error (e.g. ADK's 'description must be at
most 1024 characters'); a valid skill (route-weaver) loads and the model uses it;
max_llm_calls=2/7 per call and the 50 default all show up in the run config.
---
 veadk/cli/cli_harness.py         | 23 +++++++++++++++++++-
 veadk/cloud/harness_app/app.py   | 36 +++++++++++++++++++++++++++-----
 veadk/cloud/harness_app/types.py |  8 +++++++
 veadk/cloud/harness_app/utils.py | 29 +++++++++++++++++--------
 4 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/veadk/cli/cli_harness.py b/veadk/cli/cli_harness.py
index 29c793bf..d08aee67 100644
--- a/veadk/cli/cli_harness.py
+++ b/veadk/cli/cli_harness.py
@@ -384,6 +384,13 @@ def _override_options(func):
 @click.option(
     "--short-term-memory-type", default=None, help="Short-term memory backend."
 )
+@click.option(
+    "--max-llm-calls",
+    "max_llm_calls",
+    type=int,
+    default=None,
+    help="Default max LLM calls per run (overridable per invocation).",
+)
 @_connection_options
 @click.option(
     "--path",
@@ -395,6 +402,7 @@ def add(
     knowledgebase_type: str | None,
     long_term_memory_type: str | None,
     short_term_memory_type: str | None,
+    max_llm_calls: int | None,
     path: str,
     model_name: str | None,
     tools: str | None,
@@ -415,6 +423,8 @@ def add(
 
     if harness_name is not None:
         data["harness_name"] = harness_name
+    if max_llm_calls is not None:
+        data["max_llm_calls"] = max_llm_calls
     if model_name is not None:
         model = data.get("model")
         if not isinstance(model, dict):
@@ -812,6 +822,13 @@ def _create_runtime_with_harness_tag(self, request):
     default="cli-session",
     help="Session id for the call.",
 )
+@click.option(
+    "--max-llm-calls",
+    "max_llm_calls",
+    type=int,
+    default=None,
+    help="Override max LLM calls for this call (falls back to the harness default).",
+)
 @click.option(
     "--url",
     default=None,
@@ -836,6 +853,7 @@ def invoke(
     message_opt,
     user_id,
     session_id,
+    max_llm_calls,
     url,
     key,
     path,
@@ -864,10 +882,13 @@ def invoke(
             "or pass --url/--key."
         )
 
+    run_agent_request: dict = {"user_id": user_id, "session_id": session_id}
+    if max_llm_calls is not None:
+        run_agent_request["max_llm_calls"] = max_llm_calls
     body: dict = {
         "prompt": message,
         "harness_name": harness_name,
-        "run_agent_request": {"user_id": user_id, "session_id": session_id},
+        "run_agent_request": run_agent_request,
     }
     override = {name: value for name, value in overrides.items() if value is not None}
     if override:
diff --git a/veadk/cloud/harness_app/app.py b/veadk/cloud/harness_app/app.py
index 1c936bb9..69228d27 100644
--- a/veadk/cloud/harness_app/app.py
+++ b/veadk/cloud/harness_app/app.py
@@ -29,6 +29,7 @@
 from pathlib import Path
 
 from fastapi import FastAPI
+from google.adk.agents import RunConfig
 
 from veadk import Agent
 from veadk.cloud.harness_app.agent import agent, short_term_memory
@@ -36,7 +37,7 @@
     InvokeHarnessRequest,
     InvokeHarnessResponse,
 )
-from veadk.cloud.harness_app.utils import spawn_harness_agent
+from veadk.cloud.harness_app.utils import SkillLoadError, spawn_harness_agent
 from veadk.memory.short_term_memory import ShortTermMemory
 from veadk.runner import Runner
 from veadk.utils.logger import get_logger
@@ -44,6 +45,9 @@
 logger = get_logger(__name__)
 
 HARNESS_NAME = os.getenv("HARNESS_NAME", "default")
+# Default max LLM calls per run, baked into the runtime from harness.yaml
+# (overridable per invocation via run_agent_request.max_llm_calls).
+DEFAULT_MAX_LLM_CALLS = int(os.getenv("MAX_LLM_CALLS", "100"))
 
 
 class HarnessApp:
@@ -52,11 +56,13 @@ def __init__(
         agent: Agent,
         short_term_memory: ShortTermMemory,
         harness_name: str = "default",
+        max_llm_calls: int = 100,
     ):
         self.app = FastAPI()
         self.agent = agent
         self.short_term_memory = short_term_memory
         self.harness_name = harness_name
+        self.max_llm_calls = max_llm_calls
         self.runner = Runner(
             agent=agent,
             short_term_memory=short_term_memory,
@@ -70,6 +76,12 @@ def mount(self):
         async def invoke_harness(
             request: InvokeHarnessRequest,
         ) -> InvokeHarnessResponse:
+            # max LLM calls: per-call override, else the harness default.
+            max_llm_calls = (
+                request.run_agent_request.max_llm_calls or self.max_llm_calls
+            )
+            run_config = RunConfig(max_llm_calls=max_llm_calls)
+
             if request.harness is not None:
                 logger.info(f"Applying once-time harness override: {request.harness}")
                 # The override clones the base agent and may download incremental
@@ -77,9 +89,19 @@ async def invoke_harness(
                 # the agent runs, so the dir is removed (and the one-off agent +
                 # runner dropped) only after the run finishes.
                 with tempfile.TemporaryDirectory(prefix="harness_invoke_") as work_dir:
-                    agent = spawn_harness_agent(
-                        self.agent, request.harness, download_dir=Path(work_dir)
-                    )
+                    try:
+                        agent = spawn_harness_agent(
+                            self.agent, request.harness, download_dir=Path(work_dir)
+                        )
+                    except SkillLoadError as e:
+                        # A once-time skill failed to load; return the reason to
+                        # the caller instead of running with a wrong skill set.
+                        logger.error(f"Once-time skill load failed: {e}")
+                        return InvokeHarnessResponse(
+                            harness_name=self.harness_name,
+                            overwrite=True,
+                            output=str(e),
+                        )
                     runner = Runner(
                         agent=agent,
                         short_term_memory=self.short_term_memory,
@@ -89,12 +111,14 @@ async def invoke_harness(
                         messages=[request.prompt],
                         user_id=request.run_agent_request.user_id,
                         session_id=request.run_agent_request.session_id,
+                        run_config=run_config,
                     )
             else:
                 output = await self.runner.run(
                     messages=[request.prompt],
                     user_id=request.run_agent_request.user_id,
                     session_id=request.run_agent_request.session_id,
+                    run_config=run_config,
                 )
 
             return InvokeHarnessResponse(
@@ -109,7 +133,9 @@ def serve(self, host: str = "0.0.0.0", port: int = 8000) -> None:
         uvicorn.run(self.app, host=host, port=port)
 
 
-harness_app = HarnessApp(agent, short_term_memory, HARNESS_NAME)
+harness_app = HarnessApp(
+    agent, short_term_memory, HARNESS_NAME, max_llm_calls=DEFAULT_MAX_LLM_CALLS
+)
 app = harness_app.app
 
 
diff --git a/veadk/cloud/harness_app/types.py b/veadk/cloud/harness_app/types.py
index 7a874265..5d671ef3 100644
--- a/veadk/cloud/harness_app/types.py
+++ b/veadk/cloud/harness_app/types.py
@@ -74,11 +74,19 @@ class HarnessConfig(HarnessOverrides):
     longterm_memory_type: str = Field(default="")
     shortterm_memory_type: str = Field(default="local")
     runtime: Literal["adk", "codex"] = Field(default="adk")
+    max_llm_calls: int = Field(
+        default=100,
+        description="Default max LLM calls per run; overridable per invocation.",
+    )
 
 
 class RunAgentRequest(BaseModel):
     user_id: str
     session_id: str
+    max_llm_calls: int | None = Field(
+        default=None,
+        description="Override max LLM calls for this single call (falls back to the harness default).",
+    )
 
 
 class InvokeHarnessRequest(BaseModel):
diff --git a/veadk/cloud/harness_app/utils.py b/veadk/cloud/harness_app/utils.py
index 52388d7b..ab32ee06 100644
--- a/veadk/cloud/harness_app/utils.py
+++ b/veadk/cloud/harness_app/utils.py
@@ -51,6 +51,7 @@
     "HarnessOverrides",
     "split_csv",
     "build_skill_toolset",
+    "SkillLoadError",
     "config_from_env",
     "init_harness_agent",
     "spawn_harness_agent",
@@ -75,6 +76,7 @@
     "knowledgebase_type": "KNOWLEDGEBASE_TYPE",
     "longterm_memory_type": "LONG_TERM_MEMORY_TYPE",
     "shortterm_memory_type": "SHORT_TERM_MEMORY_TYPE",
+    "max_llm_calls": "MAX_LLM_CALLS",
 }
 
 
@@ -143,6 +145,14 @@ def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path:
     return skill_dir
 
 
+class SkillLoadError(RuntimeError):
+    """A skill failed to download or load (e.g. a malformed ``SKILL.md``).
+
+    Raised instead of silently skipping so the failure surfaces — at the server
+    startup for a base skill, or in the invoke response for a per-call override.
+    """
+
+
 def build_skill_toolset(
     skills: list[str], download_dir: Path | None = None
 ) -> SkillToolset | None:
@@ -152,13 +162,18 @@ def build_skill_toolset(
     and loaded via ``load_skill_from_dir``. The directory is **not** cleaned up
     here: a skill's scripts/assets are read from disk while the agent runs, so
     the caller owns the directory's lifetime (the base agent keeps its skills for
-    the server's lifetime; a per-invoke override cleans up after the run). Skills
-    that fail to download or load (e.g. a malformed ``SKILL.md``) are skipped with
-    a warning so the rest still load.
+    the server's lifetime; a per-invoke override cleans up after the run).
+
+    Fast-fail: if *any* skill fails to download or load (e.g. a ``SKILL.md`` whose
+    description exceeds ADK's limit), a :class:`SkillLoadError` is raised naming
+    the skill and the reason — the whole call is aborted rather than running with
+    a partial skill set.
 
     Returns:
-        A :class:`SkillToolset` of the loaded skills, or ``None`` if none loaded.
+        A :class:`SkillToolset` of the loaded skills, or ``None`` for no skills.
     """
+    if not skills:
+        return None
     if download_dir is None:
         download_dir = Path(tempfile.mkdtemp(prefix="harness_skills_"))
     loaded_skills = []
@@ -168,11 +183,7 @@ def build_skill_toolset(
                 load_skill_from_dir(_download_and_extract_skill(skill, download_dir))
             )
         except Exception as e:
-            logger.warning(f"Skipping skill '{skill}': {e}")
-
-    if not loaded_skills:
-        logger.warning("No skills loaded successfully; skipping skill toolset.")
-        return None
+            raise SkillLoadError(f"Skill '{skill}' failed to load: {e}") from e
     return SkillToolset(skills=loaded_skills)
 
 

From ddb9a3e6a4efb6be696254b4f01288a1cc8dd929 Mon Sep 17 00:00:00 2001
From: "fangyaozheng@bytedance.com" <fangyaozheng@bytedance.com>
Date: Mon, 15 Jun 2026 19:56:38 +0800
Subject: [PATCH 2/2] fix(harness): drop the hardcoded max_llm_calls default;
 fix contract tests

max_llm_calls is now optional everywhere (HarnessConfig / HarnessApp): when
neither the harness default nor the per-call override is set, the runner uses
ADK RunConfig's own default (500) instead of a forced 100.

Update the contract tests to include the new max_llm_calls field on
HarnessConfig and RunAgentRequest.
---
 tests/cloud/test_harness_app_contract.py |  7 ++++++-
 veadk/cloud/harness_app/app.py           | 19 +++++++++++++------
 veadk/cloud/harness_app/types.py         |  8 ++++----
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/tests/cloud/test_harness_app_contract.py b/tests/cloud/test_harness_app_contract.py
index c7cedd87..62d4bb04 100644
--- a/tests/cloud/test_harness_app_contract.py
+++ b/tests/cloud/test_harness_app_contract.py
@@ -83,6 +83,7 @@ def test_adds_creation_time_fields(self):
             "knowledgebase_type",
             "longterm_memory_type",
             "shortterm_memory_type",
+            "max_llm_calls",
         }
 
     def test_component_defaults(self):
@@ -103,7 +104,11 @@ def test_app_name_populated_via_name_alias(self):
 
 class TestRequestResponseSchemas:
     def test_run_agent_request_fields(self):
-        assert set(_fields(RunAgentRequest)) == {"user_id", "session_id"}
+        assert set(_fields(RunAgentRequest)) == {
+            "user_id",
+            "session_id",
+            "max_llm_calls",
+        }
 
     def test_invoke_request_fields(self):
         assert set(_fields(InvokeHarnessRequest)) == {
diff --git a/veadk/cloud/harness_app/app.py b/veadk/cloud/harness_app/app.py
index 69228d27..36245624 100644
--- a/veadk/cloud/harness_app/app.py
+++ b/veadk/cloud/harness_app/app.py
@@ -45,9 +45,11 @@
 logger = get_logger(__name__)
 
 HARNESS_NAME = os.getenv("HARNESS_NAME", "default")
-# Default max LLM calls per run, baked into the runtime from harness.yaml
-# (overridable per invocation via run_agent_request.max_llm_calls).
-DEFAULT_MAX_LLM_CALLS = int(os.getenv("MAX_LLM_CALLS", "100"))
+# Optional harness default max LLM calls per run, from harness.yaml (overridable
+# per invocation). Unset -> falls through to ADK RunConfig's own default.
+DEFAULT_MAX_LLM_CALLS = (
+    int(os.environ["MAX_LLM_CALLS"]) if os.environ.get("MAX_LLM_CALLS") else None
+)
 
 
 class HarnessApp:
@@ -56,7 +58,7 @@ def __init__(
         agent: Agent,
         short_term_memory: ShortTermMemory,
         harness_name: str = "default",
-        max_llm_calls: int = 100,
+        max_llm_calls: int | None = None,
     ):
         self.app = FastAPI()
         self.agent = agent
@@ -76,11 +78,16 @@ def mount(self):
         async def invoke_harness(
             request: InvokeHarnessRequest,
         ) -> InvokeHarnessResponse:
-            # max LLM calls: per-call override, else the harness default.
+            # max LLM calls: per-call override, else the harness default; if
+            # neither is set, fall through to ADK RunConfig's own default.
             max_llm_calls = (
                 request.run_agent_request.max_llm_calls or self.max_llm_calls
             )
-            run_config = RunConfig(max_llm_calls=max_llm_calls)
+            run_config = (
+                RunConfig(max_llm_calls=max_llm_calls)
+                if max_llm_calls is not None
+                else RunConfig()
+            )
 
             if request.harness is not None:
                 logger.info(f"Applying once-time harness override: {request.harness}")
diff --git a/veadk/cloud/harness_app/types.py b/veadk/cloud/harness_app/types.py
index 5d671ef3..18a4d749 100644
--- a/veadk/cloud/harness_app/types.py
+++ b/veadk/cloud/harness_app/types.py
@@ -74,9 +74,9 @@ class HarnessConfig(HarnessOverrides):
     longterm_memory_type: str = Field(default="")
     shortterm_memory_type: str = Field(default="local")
     runtime: Literal["adk", "codex"] = Field(default="adk")
-    max_llm_calls: int = Field(
-        default=100,
-        description="Default max LLM calls per run; overridable per invocation.",
+    max_llm_calls: int | None = Field(
+        default=None,
+        description="Default max LLM calls per run; unset follows ADK RunConfig's default. Overridable per invocation.",
     )
 
 
@@ -85,7 +85,7 @@ class RunAgentRequest(BaseModel):
     session_id: str
     max_llm_calls: int | None = Field(
         default=None,
-        description="Override max LLM calls for this single call (falls back to the harness default).",
+        description="Override max LLM calls for this single call (falls back to the harness default, then ADK's).",
     )