Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion tests/cloud/test_harness_app_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def test_adds_creation_time_fields(self):
"knowledgebase_type",
"longterm_memory_type",
"shortterm_memory_type",
"max_llm_calls",
}

def test_component_defaults(self):
Expand All @@ -103,7 +104,11 @@ def test_app_name_populated_via_name_alias(self):

class TestRequestResponseSchemas:
def test_run_agent_request_fields(self):
assert set(_fields(RunAgentRequest)) == {"user_id", "session_id"}
assert set(_fields(RunAgentRequest)) == {
"user_id",
"session_id",
"max_llm_calls",
}

def test_invoke_request_fields(self):
assert set(_fields(InvokeHarnessRequest)) == {
Expand Down
23 changes: 22 additions & 1 deletion veadk/cli/cli_harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,13 @@ def _override_options(func):
@click.option(
"--short-term-memory-type", default=None, help="Short-term memory backend."
)
@click.option(
"--max-llm-calls",
"max_llm_calls",
type=int,
default=None,
help="Default max LLM calls per run (overridable per invocation).",
)
@_connection_options
@click.option(
"--path",
Expand All @@ -395,6 +402,7 @@ def add(
knowledgebase_type: str | None,
long_term_memory_type: str | None,
short_term_memory_type: str | None,
max_llm_calls: int | None,
path: str,
model_name: str | None,
tools: str | None,
Expand All @@ -415,6 +423,8 @@ def add(

if harness_name is not None:
data["harness_name"] = harness_name
if max_llm_calls is not None:
data["max_llm_calls"] = max_llm_calls
if model_name is not None:
model = data.get("model")
if not isinstance(model, dict):
Expand Down Expand Up @@ -812,6 +822,13 @@ def _create_runtime_with_harness_tag(self, request):
default="cli-session",
help="Session id for the call.",
)
@click.option(
"--max-llm-calls",
"max_llm_calls",
type=int,
default=None,
help="Override max LLM calls for this call (falls back to the harness default).",
)
@click.option(
"--url",
default=None,
Expand All @@ -836,6 +853,7 @@ def invoke(
message_opt,
user_id,
session_id,
max_llm_calls,
url,
key,
path,
Expand Down Expand Up @@ -864,10 +882,13 @@ def invoke(
"or pass --url/--key."
)

run_agent_request: dict = {"user_id": user_id, "session_id": session_id}
if max_llm_calls is not None:
run_agent_request["max_llm_calls"] = max_llm_calls
body: dict = {
"prompt": message,
"harness_name": harness_name,
"run_agent_request": {"user_id": user_id, "session_id": session_id},
"run_agent_request": run_agent_request,
}
override = {name: value for name, value in overrides.items() if value is not None}
if override:
Expand Down
43 changes: 38 additions & 5 deletions veadk/cloud/harness_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,27 @@
from pathlib import Path

from fastapi import FastAPI
from google.adk.agents import RunConfig

from veadk import Agent
from veadk.cloud.harness_app.agent import agent, short_term_memory
from veadk.cloud.harness_app.types import (
InvokeHarnessRequest,
InvokeHarnessResponse,
)
from veadk.cloud.harness_app.utils import spawn_harness_agent
from veadk.cloud.harness_app.utils import SkillLoadError, spawn_harness_agent
from veadk.memory.short_term_memory import ShortTermMemory
from veadk.runner import Runner
from veadk.utils.logger import get_logger

logger = get_logger(__name__)

HARNESS_NAME = os.getenv("HARNESS_NAME", "default")
# Optional harness default max LLM calls per run, from harness.yaml (overridable
# per invocation). Unset -> falls through to ADK RunConfig's own default.
DEFAULT_MAX_LLM_CALLS = (
int(os.environ["MAX_LLM_CALLS"]) if os.environ.get("MAX_LLM_CALLS") else None
)


class HarnessApp:
Expand All @@ -52,11 +58,13 @@ def __init__(
agent: Agent,
short_term_memory: ShortTermMemory,
harness_name: str = "default",
max_llm_calls: int | None = None,
):
self.app = FastAPI()
self.agent = agent
self.short_term_memory = short_term_memory
self.harness_name = harness_name
self.max_llm_calls = max_llm_calls
self.runner = Runner(
agent=agent,
short_term_memory=short_term_memory,
Expand All @@ -70,16 +78,37 @@ def mount(self):
async def invoke_harness(
request: InvokeHarnessRequest,
) -> InvokeHarnessResponse:
# max LLM calls: per-call override, else the harness default; if
# neither is set, fall through to ADK RunConfig's own default.
max_llm_calls = (
request.run_agent_request.max_llm_calls or self.max_llm_calls
)
run_config = (
RunConfig(max_llm_calls=max_llm_calls)
if max_llm_calls is not None
else RunConfig()
)

if request.harness is not None:
logger.info(f"Applying once-time harness override: {request.harness}")
# The override clones the base agent and may download incremental
# skills into a temp dir; the skill files are read from disk while
# the agent runs, so the dir is removed (and the one-off agent +
# runner dropped) only after the run finishes.
with tempfile.TemporaryDirectory(prefix="harness_invoke_") as work_dir:
agent = spawn_harness_agent(
self.agent, request.harness, download_dir=Path(work_dir)
)
try:
agent = spawn_harness_agent(
self.agent, request.harness, download_dir=Path(work_dir)
)
except SkillLoadError as e:
# A once-time skill failed to load; return the reason to
# the caller instead of running with a wrong skill set.
logger.error(f"Once-time skill load failed: {e}")
return InvokeHarnessResponse(
harness_name=self.harness_name,
overwrite=True,
output=str(e),
)
runner = Runner(
agent=agent,
short_term_memory=self.short_term_memory,
Expand All @@ -89,12 +118,14 @@ async def invoke_harness(
messages=[request.prompt],
user_id=request.run_agent_request.user_id,
session_id=request.run_agent_request.session_id,
run_config=run_config,
)
else:
output = await self.runner.run(
messages=[request.prompt],
user_id=request.run_agent_request.user_id,
session_id=request.run_agent_request.session_id,
run_config=run_config,
)

return InvokeHarnessResponse(
Expand All @@ -109,7 +140,9 @@ def serve(self, host: str = "0.0.0.0", port: int = 8000) -> None:
uvicorn.run(self.app, host=host, port=port)


harness_app = HarnessApp(agent, short_term_memory, HARNESS_NAME)
harness_app = HarnessApp(
agent, short_term_memory, HARNESS_NAME, max_llm_calls=DEFAULT_MAX_LLM_CALLS
)
app = harness_app.app


Expand Down
8 changes: 8 additions & 0 deletions veadk/cloud/harness_app/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,19 @@ class HarnessConfig(HarnessOverrides):
longterm_memory_type: str = Field(default="")
shortterm_memory_type: str = Field(default="local")
runtime: Literal["adk", "codex"] = Field(default="adk")
max_llm_calls: int | None = Field(
default=None,
description="Default max LLM calls per run; unset follows ADK RunConfig's default. Overridable per invocation.",
)


class RunAgentRequest(BaseModel):
user_id: str
session_id: str
max_llm_calls: int | None = Field(
default=None,
description="Override max LLM calls for this single call (falls back to the harness default, then ADK's).",
)


class InvokeHarnessRequest(BaseModel):
Expand Down
29 changes: 20 additions & 9 deletions veadk/cloud/harness_app/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"HarnessOverrides",
"split_csv",
"build_skill_toolset",
"SkillLoadError",
"config_from_env",
"init_harness_agent",
"spawn_harness_agent",
Expand All @@ -75,6 +76,7 @@
"knowledgebase_type": "KNOWLEDGEBASE_TYPE",
"longterm_memory_type": "LONG_TERM_MEMORY_TYPE",
"shortterm_memory_type": "SHORT_TERM_MEMORY_TYPE",
"max_llm_calls": "MAX_LLM_CALLS",
}


Expand Down Expand Up @@ -143,6 +145,14 @@ def _download_and_extract_skill(skill: str, dest_dir: Path) -> Path:
return skill_dir


class SkillLoadError(RuntimeError):
"""A skill failed to download or load (e.g. a malformed ``SKILL.md``).

Raised instead of silently skipping so the failure surfaces — at the server
startup for a base skill, or in the invoke response for a per-call override.
"""


def build_skill_toolset(
skills: list[str], download_dir: Path | None = None
) -> SkillToolset | None:
Expand All @@ -152,13 +162,18 @@ def build_skill_toolset(
and loaded via ``load_skill_from_dir``. The directory is **not** cleaned up
here: a skill's scripts/assets are read from disk while the agent runs, so
the caller owns the directory's lifetime (the base agent keeps its skills for
the server's lifetime; a per-invoke override cleans up after the run). Skills
that fail to download or load (e.g. a malformed ``SKILL.md``) are skipped with
a warning so the rest still load.
the server's lifetime; a per-invoke override cleans up after the run).

Fast-fail: if *any* skill fails to download or load (e.g. a ``SKILL.md`` whose
description exceeds ADK's limit), a :class:`SkillLoadError` is raised naming
the skill and the reason — the whole call is aborted rather than running with
a partial skill set.

Returns:
A :class:`SkillToolset` of the loaded skills, or ``None`` if none loaded.
A :class:`SkillToolset` of the loaded skills, or ``None`` for no skills.
"""
if not skills:
return None
if download_dir is None:
download_dir = Path(tempfile.mkdtemp(prefix="harness_skills_"))
loaded_skills = []
Expand All @@ -168,11 +183,7 @@ def build_skill_toolset(
load_skill_from_dir(_download_and_extract_skill(skill, download_dir))
)
except Exception as e:
logger.warning(f"Skipping skill '{skill}': {e}")

if not loaded_skills:
logger.warning("No skills loaded successfully; skipping skill toolset.")
return None
raise SkillLoadError(f"Skill '{skill}' failed to load: {e}") from e
return SkillToolset(skills=loaded_skills)


Expand Down
Loading