diff --git a/tests/cloud/test_harness_app_contract.py b/tests/cloud/test_harness_app_contract.py index c7cedd87..62d4bb04 100644 --- a/tests/cloud/test_harness_app_contract.py +++ b/tests/cloud/test_harness_app_contract.py @@ -83,6 +83,7 @@ def test_adds_creation_time_fields(self): "knowledgebase_type", "longterm_memory_type", "shortterm_memory_type", + "max_llm_calls", } def test_component_defaults(self): @@ -103,7 +104,11 @@ def test_app_name_populated_via_name_alias(self): class TestRequestResponseSchemas: def test_run_agent_request_fields(self): - assert set(_fields(RunAgentRequest)) == {"user_id", "session_id"} + assert set(_fields(RunAgentRequest)) == { + "user_id", + "session_id", + "max_llm_calls", + } def test_invoke_request_fields(self): assert set(_fields(InvokeHarnessRequest)) == { diff --git a/veadk/cli/cli_harness.py b/veadk/cli/cli_harness.py index 29c793bf..d08aee67 100644 --- a/veadk/cli/cli_harness.py +++ b/veadk/cli/cli_harness.py @@ -384,6 +384,13 @@ def _override_options(func): @click.option( "--short-term-memory-type", default=None, help="Short-term memory backend." ) +@click.option( + "--max-llm-calls", + "max_llm_calls", + type=int, + default=None, + help="Default max LLM calls per run (overridable per invocation).", +) @_connection_options @click.option( "--path", @@ -395,6 +402,7 @@ def add( knowledgebase_type: str | None, long_term_memory_type: str | None, short_term_memory_type: str | None, + max_llm_calls: int | None, path: str, model_name: str | None, tools: str | None, @@ -415,6 +423,8 @@ def add( if harness_name is not None: data["harness_name"] = harness_name + if max_llm_calls is not None: + data["max_llm_calls"] = max_llm_calls if model_name is not None: model = data.get("model") if not isinstance(model, dict): @@ -812,6 +822,13 @@ def _create_runtime_with_harness_tag(self, request): default="cli-session", help="Session id for the call.", ) +@click.option( + "--max-llm-calls", + "max_llm_calls", + type=int, + default=None, + help="Override max LLM calls for this call (falls back to the harness default).", +) @click.option( "--url", default=None, @@ -836,6 +853,7 @@ def invoke( message_opt, user_id, session_id, + max_llm_calls, url, key, path, @@ -864,10 +882,13 @@ def invoke( "or pass --url/--key." ) + run_agent_request: dict = {"user_id": user_id, "session_id": session_id} + if max_llm_calls is not None: + run_agent_request["max_llm_calls"] = max_llm_calls body: dict = { "prompt": message, "harness_name": harness_name, - "run_agent_request": {"user_id": user_id, "session_id": session_id}, + "run_agent_request": run_agent_request, } override = {name: value for name, value in overrides.items() if value is not None} if override: diff --git a/veadk/cloud/harness_app/app.py b/veadk/cloud/harness_app/app.py index 1c936bb9..36245624 100644 --- a/veadk/cloud/harness_app/app.py +++ b/veadk/cloud/harness_app/app.py @@ -29,6 +29,7 @@ from pathlib import Path from fastapi import FastAPI +from google.adk.agents import RunConfig from veadk import Agent from veadk.cloud.harness_app.agent import agent, short_term_memory @@ -36,7 +37,7 @@ InvokeHarnessRequest, InvokeHarnessResponse, ) -from veadk.cloud.harness_app.utils import spawn_harness_agent +from veadk.cloud.harness_app.utils import SkillLoadError, spawn_harness_agent from veadk.memory.short_term_memory import ShortTermMemory from veadk.runner import Runner from veadk.utils.logger import get_logger @@ -44,6 +45,11 @@ logger = get_logger(__name__) HARNESS_NAME = os.getenv("HARNESS_NAME", "default") +# Optional harness default max LLM calls per run, from harness.yaml (overridable +# per invocation). Unset -> falls through to ADK RunConfig's own default. +DEFAULT_MAX_LLM_CALLS = ( + int(os.environ["MAX_LLM_CALLS"]) if os.environ.get("MAX_LLM_CALLS") else None +) class HarnessApp: @@ -52,11 +58,13 @@ def __init__( agent: Agent, short_term_memory: ShortTermMemory, harness_name: str = "default", + max_llm_calls: int | None = None, ): self.app = FastAPI() self.agent = agent self.short_term_memory = short_term_memory self.harness_name = harness_name + self.max_llm_calls = max_llm_calls self.runner = Runner( agent=agent, short_term_memory=short_term_memory, @@ -70,6 +78,17 @@ def mount(self): async def invoke_harness( request: InvokeHarnessRequest, ) -> InvokeHarnessResponse: + # max LLM calls: per-call override, else the harness default; if + # neither is set, fall through to ADK RunConfig's own default. + max_llm_calls = ( + request.run_agent_request.max_llm_calls or self.max_llm_calls + ) + run_config = ( + RunConfig(max_llm_calls=max_llm_calls) + if max_llm_calls is not None + else RunConfig() + ) + if request.harness is not None: logger.info(f"Applying once-time harness override: {request.harness}") # The override clones the base agent and may download incremental @@ -77,9 +96,19 @@ async def invoke_harness( # the agent runs, so the dir is removed (and the one-off agent + # runner dropped) only after the run finishes. with tempfile.TemporaryDirectory(prefix="harness_invoke_") as work_dir: - agent = spawn_harness_agent( - self.agent, request.harness, download_dir=Path(work_dir) - ) + try: + agent = spawn_harness_agent( + self.agent, request.harness, download_dir=Path(work_dir) + ) + except SkillLoadError as e: + # A once-time skill failed to load; return the reason to + # the caller instead of running with a wrong skill set. + logger.error(f"Once-time skill load failed: {e}") + return InvokeHarnessResponse( + harness_name=self.harness_name, + overwrite=True, + output=str(e), + ) runner = Runner( agent=agent, short_term_memory=self.short_term_memory, @@ -89,12 +118,14 @@ async def invoke_harness( messages=[request.prompt], user_id=request.run_agent_request.user_id, session_id=request.run_agent_request.session_id, + run_config=run_config, ) else: output = await self.runner.run( messages=[request.prompt], user_id=request.run_agent_request.user_id, session_id=request.run_agent_request.session_id, + run_config=run_config, ) return InvokeHarnessResponse( @@ -109,7 +140,9 @@ def serve(self, host: str = "0.0.0.0", port: int = 8000) -> None: uvicorn.run(self.app, host=host, port=port) -harness_app = HarnessApp(agent, short_term_memory, HARNESS_NAME) +harness_app = HarnessApp( + agent, short_term_memory, HARNESS_NAME, max_llm_calls=DEFAULT_MAX_LLM_CALLS +) app = harness_app.app diff --git a/veadk/cloud/harness_app/types.py b/veadk/cloud/harness_app/types.py index 7a874265..18a4d749 100644 --- a/veadk/cloud/harness_app/types.py +++ b/veadk/cloud/harness_app/types.py @@ -74,11 +74,19 @@ class HarnessConfig(HarnessOverrides): longterm_memory_type: str = Field(default="") shortterm_memory_type: str = Field(default="local") runtime: Literal["adk", "codex"] = Field(default="adk") + max_llm_calls: int | None = Field( + default=None, + description="Default max LLM calls per run; unset follows ADK RunConfig's default. Overridable per invocation.", + ) class RunAgentRequest(BaseModel): user_id: str session_id: str + max_llm_calls: int | None = Field( + default=None, + description="Override max LLM calls for this single call (falls back to the harness default, then ADK's).", + ) class InvokeHarnessRequest(BaseModel): diff --git a/veadk/cloud/harness_app/utils.py b/veadk/cloud/harness_app/utils.py index 52388d7b..ab32ee06 100644 --- a/veadk/cloud/harness_app/utils.py +++ b/veadk/cloud/harness_app/utils.py @@ -51,6 +51,7 @@ "HarnessOverrides", "split_csv", "build_skill_toolset", + "SkillLoadError", "config_from_env", "init_harness_agent", "spawn_harness_agent", @@ -75,6 +76,7 @@ "knowledgebase_type": "KNOWLEDGEBASE_TYPE", "longterm_memory_type": "LONG_TERM_MEMORY_TYPE", "shortterm_memory_type": "SHORT_TERM_MEMORY_TYPE", + "max_llm_calls": "MAX_LLM_CALLS", } @@ -143,6 +145,14 @@ def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path: return skill_dir +class SkillLoadError(RuntimeError): + """A skill failed to download or load (e.g. a malformed ``SKILL.md``). + + Raised instead of silently skipping so the failure surfaces — at the server + startup for a base skill, or in the invoke response for a per-call override. + """ + + def build_skill_toolset( skills: list[str], download_dir: Path | None = None ) -> SkillToolset | None: @@ -152,13 +162,18 @@ def build_skill_toolset( and loaded via ``load_skill_from_dir``. The directory is **not** cleaned up here: a skill's scripts/assets are read from disk while the agent runs, so the caller owns the directory's lifetime (the base agent keeps its skills for - the server's lifetime; a per-invoke override cleans up after the run). Skills - that fail to download or load (e.g. a malformed ``SKILL.md``) are skipped with - a warning so the rest still load. + the server's lifetime; a per-invoke override cleans up after the run). + + Fast-fail: if *any* skill fails to download or load (e.g. a ``SKILL.md`` whose + description exceeds ADK's limit), a :class:`SkillLoadError` is raised naming + the skill and the reason — the whole call is aborted rather than running with + a partial skill set. Returns: - A :class:`SkillToolset` of the loaded skills, or ``None`` if none loaded. + A :class:`SkillToolset` of the loaded skills, or ``None`` for no skills. """ + if not skills: + return None if download_dir is None: download_dir = Path(tempfile.mkdtemp(prefix="harness_skills_")) loaded_skills = [] @@ -168,11 +183,7 @@ def build_skill_toolset( load_skill_from_dir(_download_and_extract_skill(skill, download_dir)) ) except Exception as e: - logger.warning(f"Skipping skill '{skill}': {e}") - - if not loaded_skills: - logger.warning("No skills loaded successfully; skipping skill toolset.") - return None + raise SkillLoadError(f"Skill '{skill}' failed to load: {e}") from e return SkillToolset(skills=loaded_skills)