From f29967f6eee5015fb9ed82c489dbbb60aa5b580a Mon Sep 17 00:00:00 2001 From: "fangyaozheng@bytedance.com" Date: Mon, 15 Jun 2026 19:39:42 +0800 Subject: [PATCH 1/2] feat(harness): surface skill-load errors + per-invoke max_llm_calls Skill loading no longer silently skips failures (which left the agent with no skills while the model was told it had them). build_skill_toolset now fast-fails with a SkillLoadError naming the skill and reason: - a base skill that fails aborts server startup (deploy surfaces the bad config); - a once-time override skill that fails is caught in /harness/invoke and its reason is returned to the caller (HTTP 200, in the response output). Add max_llm_calls, threaded into the runner's RunConfig: - a harness default (harness.yaml -> MAX_LLM_CALLS env -> HarnessConfig), and - a per-invocation override via run_agent_request.max_llm_calls. CLI: 'veadk harness add --max-llm-calls' and 'veadk harness invoke --max-llm-calls'. Verified locally: bad skill returns its error (e.g. ADK's 'description must be at most 1024 characters'); a valid skill (route-weaver) loads and the model uses it; max_llm_calls=2/7 per call and the 50 default all show up in the run config. --- veadk/cli/cli_harness.py | 23 +++++++++++++++++++- veadk/cloud/harness_app/app.py | 36 +++++++++++++++++++++++++++----- veadk/cloud/harness_app/types.py | 8 +++++++ veadk/cloud/harness_app/utils.py | 29 +++++++++++++++++-------- 4 files changed, 81 insertions(+), 15 deletions(-) diff --git a/veadk/cli/cli_harness.py b/veadk/cli/cli_harness.py index 29c793bf..d08aee67 100644 --- a/veadk/cli/cli_harness.py +++ b/veadk/cli/cli_harness.py @@ -384,6 +384,13 @@ def _override_options(func): @click.option( "--short-term-memory-type", default=None, help="Short-term memory backend." ) +@click.option( + "--max-llm-calls", + "max_llm_calls", + type=int, + default=None, + help="Default max LLM calls per run (overridable per invocation).", +) @_connection_options @click.option( "--path", @@ -395,6 +402,7 @@ def add( knowledgebase_type: str | None, long_term_memory_type: str | None, short_term_memory_type: str | None, + max_llm_calls: int | None, path: str, model_name: str | None, tools: str | None, @@ -415,6 +423,8 @@ def add( if harness_name is not None: data["harness_name"] = harness_name + if max_llm_calls is not None: + data["max_llm_calls"] = max_llm_calls if model_name is not None: model = data.get("model") if not isinstance(model, dict): @@ -812,6 +822,13 @@ def _create_runtime_with_harness_tag(self, request): default="cli-session", help="Session id for the call.", ) +@click.option( + "--max-llm-calls", + "max_llm_calls", + type=int, + default=None, + help="Override max LLM calls for this call (falls back to the harness default).", +) @click.option( "--url", default=None, @@ -836,6 +853,7 @@ def invoke( message_opt, user_id, session_id, + max_llm_calls, url, key, path, @@ -864,10 +882,13 @@ def invoke( "or pass --url/--key." ) + run_agent_request: dict = {"user_id": user_id, "session_id": session_id} + if max_llm_calls is not None: + run_agent_request["max_llm_calls"] = max_llm_calls body: dict = { "prompt": message, "harness_name": harness_name, - "run_agent_request": {"user_id": user_id, "session_id": session_id}, + "run_agent_request": run_agent_request, } override = {name: value for name, value in overrides.items() if value is not None} if override: diff --git a/veadk/cloud/harness_app/app.py b/veadk/cloud/harness_app/app.py index 1c936bb9..69228d27 100644 --- a/veadk/cloud/harness_app/app.py +++ b/veadk/cloud/harness_app/app.py @@ -29,6 +29,7 @@ from pathlib import Path from fastapi import FastAPI +from google.adk.agents import RunConfig from veadk import Agent from veadk.cloud.harness_app.agent import agent, short_term_memory @@ -36,7 +37,7 @@ InvokeHarnessRequest, InvokeHarnessResponse, ) -from veadk.cloud.harness_app.utils import spawn_harness_agent +from veadk.cloud.harness_app.utils import SkillLoadError, spawn_harness_agent from veadk.memory.short_term_memory import ShortTermMemory from veadk.runner import Runner from veadk.utils.logger import get_logger @@ -44,6 +45,9 @@ logger = get_logger(__name__) HARNESS_NAME = os.getenv("HARNESS_NAME", "default") +# Default max LLM calls per run, baked into the runtime from harness.yaml +# (overridable per invocation via run_agent_request.max_llm_calls). +DEFAULT_MAX_LLM_CALLS = int(os.getenv("MAX_LLM_CALLS", "100")) class HarnessApp: @@ -52,11 +56,13 @@ def __init__( agent: Agent, short_term_memory: ShortTermMemory, harness_name: str = "default", + max_llm_calls: int = 100, ): self.app = FastAPI() self.agent = agent self.short_term_memory = short_term_memory self.harness_name = harness_name + self.max_llm_calls = max_llm_calls self.runner = Runner( agent=agent, short_term_memory=short_term_memory, @@ -70,6 +76,12 @@ def mount(self): async def invoke_harness( request: InvokeHarnessRequest, ) -> InvokeHarnessResponse: + # max LLM calls: per-call override, else the harness default. + max_llm_calls = ( + request.run_agent_request.max_llm_calls or self.max_llm_calls + ) + run_config = RunConfig(max_llm_calls=max_llm_calls) + if request.harness is not None: logger.info(f"Applying once-time harness override: {request.harness}") # The override clones the base agent and may download incremental @@ -77,9 +89,19 @@ async def invoke_harness( # the agent runs, so the dir is removed (and the one-off agent + # runner dropped) only after the run finishes. with tempfile.TemporaryDirectory(prefix="harness_invoke_") as work_dir: - agent = spawn_harness_agent( - self.agent, request.harness, download_dir=Path(work_dir) - ) + try: + agent = spawn_harness_agent( + self.agent, request.harness, download_dir=Path(work_dir) + ) + except SkillLoadError as e: + # A once-time skill failed to load; return the reason to + # the caller instead of running with a wrong skill set. + logger.error(f"Once-time skill load failed: {e}") + return InvokeHarnessResponse( + harness_name=self.harness_name, + overwrite=True, + output=str(e), + ) runner = Runner( agent=agent, short_term_memory=self.short_term_memory, @@ -89,12 +111,14 @@ async def invoke_harness( messages=[request.prompt], user_id=request.run_agent_request.user_id, session_id=request.run_agent_request.session_id, + run_config=run_config, ) else: output = await self.runner.run( messages=[request.prompt], user_id=request.run_agent_request.user_id, session_id=request.run_agent_request.session_id, + run_config=run_config, ) return InvokeHarnessResponse( @@ -109,7 +133,9 @@ def serve(self, host: str = "0.0.0.0", port: int = 8000) -> None: uvicorn.run(self.app, host=host, port=port) -harness_app = HarnessApp(agent, short_term_memory, HARNESS_NAME) +harness_app = HarnessApp( + agent, short_term_memory, HARNESS_NAME, max_llm_calls=DEFAULT_MAX_LLM_CALLS +) app = harness_app.app diff --git a/veadk/cloud/harness_app/types.py b/veadk/cloud/harness_app/types.py index 7a874265..5d671ef3 100644 --- a/veadk/cloud/harness_app/types.py +++ b/veadk/cloud/harness_app/types.py @@ -74,11 +74,19 @@ class HarnessConfig(HarnessOverrides): longterm_memory_type: str = Field(default="") shortterm_memory_type: str = Field(default="local") runtime: Literal["adk", "codex"] = Field(default="adk") + max_llm_calls: int = Field( + default=100, + description="Default max LLM calls per run; overridable per invocation.", + ) class RunAgentRequest(BaseModel): user_id: str session_id: str + max_llm_calls: int | None = Field( + default=None, + description="Override max LLM calls for this single call (falls back to the harness default).", + ) class InvokeHarnessRequest(BaseModel): diff --git a/veadk/cloud/harness_app/utils.py b/veadk/cloud/harness_app/utils.py index 52388d7b..ab32ee06 100644 --- a/veadk/cloud/harness_app/utils.py +++ b/veadk/cloud/harness_app/utils.py @@ -51,6 +51,7 @@ "HarnessOverrides", "split_csv", "build_skill_toolset", + "SkillLoadError", "config_from_env", "init_harness_agent", "spawn_harness_agent", @@ -75,6 +76,7 @@ "knowledgebase_type": "KNOWLEDGEBASE_TYPE", "longterm_memory_type": "LONG_TERM_MEMORY_TYPE", "shortterm_memory_type": "SHORT_TERM_MEMORY_TYPE", + "max_llm_calls": "MAX_LLM_CALLS", } @@ -143,6 +145,14 @@ def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path: return skill_dir +class SkillLoadError(RuntimeError): + """A skill failed to download or load (e.g. a malformed ``SKILL.md``). + + Raised instead of silently skipping so the failure surfaces — at the server + startup for a base skill, or in the invoke response for a per-call override. + """ + + def build_skill_toolset( skills: list[str], download_dir: Path | None = None ) -> SkillToolset | None: @@ -152,13 +162,18 @@ def build_skill_toolset( and loaded via ``load_skill_from_dir``. The directory is **not** cleaned up here: a skill's scripts/assets are read from disk while the agent runs, so the caller owns the directory's lifetime (the base agent keeps its skills for - the server's lifetime; a per-invoke override cleans up after the run). Skills - that fail to download or load (e.g. a malformed ``SKILL.md``) are skipped with - a warning so the rest still load. + the server's lifetime; a per-invoke override cleans up after the run). + + Fast-fail: if *any* skill fails to download or load (e.g. a ``SKILL.md`` whose + description exceeds ADK's limit), a :class:`SkillLoadError` is raised naming + the skill and the reason — the whole call is aborted rather than running with + a partial skill set. Returns: - A :class:`SkillToolset` of the loaded skills, or ``None`` if none loaded. + A :class:`SkillToolset` of the loaded skills, or ``None`` for no skills. """ + if not skills: + return None if download_dir is None: download_dir = Path(tempfile.mkdtemp(prefix="harness_skills_")) loaded_skills = [] @@ -168,11 +183,7 @@ def build_skill_toolset( load_skill_from_dir(_download_and_extract_skill(skill, download_dir)) ) except Exception as e: - logger.warning(f"Skipping skill '{skill}': {e}") - - if not loaded_skills: - logger.warning("No skills loaded successfully; skipping skill toolset.") - return None + raise SkillLoadError(f"Skill '{skill}' failed to load: {e}") from e return SkillToolset(skills=loaded_skills) From ddb9a3e6a4efb6be696254b4f01288a1cc8dd929 Mon Sep 17 00:00:00 2001 From: "fangyaozheng@bytedance.com" Date: Mon, 15 Jun 2026 19:56:38 +0800 Subject: [PATCH 2/2] fix(harness): drop the hardcoded max_llm_calls default; fix contract tests max_llm_calls is now optional everywhere (HarnessConfig / HarnessApp): when neither the harness default nor the per-call override is set, the runner uses ADK RunConfig's own default (500) instead of a forced 100. Update the contract tests to include the new max_llm_calls field on HarnessConfig and RunAgentRequest. --- tests/cloud/test_harness_app_contract.py | 7 ++++++- veadk/cloud/harness_app/app.py | 19 +++++++++++++------ veadk/cloud/harness_app/types.py | 8 ++++---- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/tests/cloud/test_harness_app_contract.py b/tests/cloud/test_harness_app_contract.py index c7cedd87..62d4bb04 100644 --- a/tests/cloud/test_harness_app_contract.py +++ b/tests/cloud/test_harness_app_contract.py @@ -83,6 +83,7 @@ def test_adds_creation_time_fields(self): "knowledgebase_type", "longterm_memory_type", "shortterm_memory_type", + "max_llm_calls", } def test_component_defaults(self): @@ -103,7 +104,11 @@ def test_app_name_populated_via_name_alias(self): class TestRequestResponseSchemas: def test_run_agent_request_fields(self): - assert set(_fields(RunAgentRequest)) == {"user_id", "session_id"} + assert set(_fields(RunAgentRequest)) == { + "user_id", + "session_id", + "max_llm_calls", + } def test_invoke_request_fields(self): assert set(_fields(InvokeHarnessRequest)) == { diff --git a/veadk/cloud/harness_app/app.py b/veadk/cloud/harness_app/app.py index 69228d27..36245624 100644 --- a/veadk/cloud/harness_app/app.py +++ b/veadk/cloud/harness_app/app.py @@ -45,9 +45,11 @@ logger = get_logger(__name__) HARNESS_NAME = os.getenv("HARNESS_NAME", "default") -# Default max LLM calls per run, baked into the runtime from harness.yaml -# (overridable per invocation via run_agent_request.max_llm_calls). -DEFAULT_MAX_LLM_CALLS = int(os.getenv("MAX_LLM_CALLS", "100")) +# Optional harness default max LLM calls per run, from harness.yaml (overridable +# per invocation). Unset -> falls through to ADK RunConfig's own default. +DEFAULT_MAX_LLM_CALLS = ( + int(os.environ["MAX_LLM_CALLS"]) if os.environ.get("MAX_LLM_CALLS") else None +) class HarnessApp: @@ -56,7 +58,7 @@ def __init__( agent: Agent, short_term_memory: ShortTermMemory, harness_name: str = "default", - max_llm_calls: int = 100, + max_llm_calls: int | None = None, ): self.app = FastAPI() self.agent = agent @@ -76,11 +78,16 @@ def mount(self): async def invoke_harness( request: InvokeHarnessRequest, ) -> InvokeHarnessResponse: - # max LLM calls: per-call override, else the harness default. + # max LLM calls: per-call override, else the harness default; if + # neither is set, fall through to ADK RunConfig's own default. max_llm_calls = ( request.run_agent_request.max_llm_calls or self.max_llm_calls ) - run_config = RunConfig(max_llm_calls=max_llm_calls) + run_config = ( + RunConfig(max_llm_calls=max_llm_calls) + if max_llm_calls is not None + else RunConfig() + ) if request.harness is not None: logger.info(f"Applying once-time harness override: {request.harness}") diff --git a/veadk/cloud/harness_app/types.py b/veadk/cloud/harness_app/types.py index 5d671ef3..18a4d749 100644 --- a/veadk/cloud/harness_app/types.py +++ b/veadk/cloud/harness_app/types.py @@ -74,9 +74,9 @@ class HarnessConfig(HarnessOverrides): longterm_memory_type: str = Field(default="") shortterm_memory_type: str = Field(default="local") runtime: Literal["adk", "codex"] = Field(default="adk") - max_llm_calls: int = Field( - default=100, - description="Default max LLM calls per run; overridable per invocation.", + max_llm_calls: int | None = Field( + default=None, + description="Default max LLM calls per run; unset follows ADK RunConfig's default. Overridable per invocation.", ) @@ -85,7 +85,7 @@ class RunAgentRequest(BaseModel): session_id: str max_llm_calls: int | None = Field( default=None, - description="Override max LLM calls for this single call (falls back to the harness default).", + description="Override max LLM calls for this single call (falls back to the harness default, then ADK's).", )