From 65edb2fcde6a55130c7daa8d276ae2fdb7898388 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:36:45 +0200 Subject: [PATCH 01/30] fix(api): add embedding-auth problem type and exception (#329) --- app/core/exceptions.py | 30 ++++++++++++++++++++++++++++++ app/core/problem_details.py | 1 + 2 files changed, 31 insertions(+) diff --git a/app/core/exceptions.py b/app/core/exceptions.py index fd5e2b36..19cff085 100644 --- a/app/core/exceptions.py +++ b/app/core/exceptions.py @@ -223,6 +223,36 @@ def __init__( ) +class EmbeddingProviderAuthError(ForecastLabError): + """502 — the embedding provider rejected the configured credentials. + + Raised when the RAG embedding provider returns an authentication/ + authorization failure (HTTP 401/403 — an invalid, placeholder, or + unauthorized API key) rather than a transient connection/server failure. + Keeps the public ``/rag`` status at 502 (an upstream/gateway failure from + the caller's perspective) but emits a *machine-readable* ``EMBEDDING_AUTH`` + problem ``type``/``code`` so consumers — notably the showcase demo + pipeline — can classify it and SKIP the knowledge phase gracefully instead + of hard-failing (issue #329). Disambiguated from a generic embedding 502 + (bare ``{"detail": ...}``) via the ``type`` URI in the problem+json body, + mirroring the :class:`UnprocessableEntityError` 422 precedent. + """ + + error_type_uri: str = ERROR_TYPES["EMBEDDING_AUTH"] + + def __init__( + self, + message: str = "Embedding provider rejected the configured credentials", + details: dict[str, Any] | None = None, + ) -> None: + super().__init__( + message=message, + code="EMBEDDING_AUTH", + status_code=502, + details=details, + ) + + # ============================================================================= # Exception Handlers (RFC 7807) # ============================================================================= diff --git a/app/core/problem_details.py b/app/core/problem_details.py index 7de6e462..c789b922 100644 --- a/app/core/problem_details.py +++ b/app/core/problem_details.py @@ -36,6 +36,7 @@ "BAD_REQUEST": f"{ERROR_TYPE_BASE}/bad-request", "SERVICE_UNAVAILABLE": f"{ERROR_TYPE_BASE}/service-unavailable", "GATEWAY_TIMEOUT": f"{ERROR_TYPE_BASE}/gateway-timeout", + "EMBEDDING_AUTH": f"{ERROR_TYPE_BASE}/embedding-auth", } From 1f93cf065165ac1279d0a3c66dbfc7c2dfe438ca Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:36:45 +0200 Subject: [PATCH 02/30] fix(rag): classify embedding provider auth failures (#329) --- app/features/rag/embeddings.py | 47 +++++++++++++++++- app/features/rag/routes.py | 31 +++++++++++- app/features/rag/tests/test_embeddings.py | 58 +++++++++++++++++++++++ app/features/rag/tests/test_routes.py | 40 +++++++++++++++- 4 files changed, 172 insertions(+), 4 deletions(-) diff --git a/app/features/rag/embeddings.py b/app/features/rag/embeddings.py index cffa1b1d..4f5cc5bc 100644 --- a/app/features/rag/embeddings.py +++ b/app/features/rag/embeddings.py @@ -16,7 +16,12 @@ import httpx import structlog import tiktoken -from openai import AsyncOpenAI, RateLimitError +from openai import ( + AsyncOpenAI, + AuthenticationError, + PermissionDeniedError, + RateLimitError, +) from app.core.config import get_settings @@ -32,6 +37,21 @@ class EmbeddingError(Exception): pass +class EmbeddingAuthError(EmbeddingError): + """Embedding provider rejected the credentials (HTTP 401/403). + + A *distinct* subclass of :class:`EmbeddingError` so callers can tell an + authentication/authorization failure (invalid or placeholder API key) apart + from a transient connection/server failure. The RAG routes map this to a + machine-readable ``EMBEDDING_AUTH`` problem so the showcase demo pipeline + can SKIP the knowledge phase gracefully instead of hard-failing on a bad + key (issue #329). Carries no secret material — only the provider's own + error string, which never contains the key value. + """ + + pass + + class EmbeddingProvider(ABC): """Abstract base class for embedding providers. @@ -293,6 +313,18 @@ async def _embed_batch( await asyncio.sleep(wait_time) continue + except (AuthenticationError, PermissionDeniedError) as e: + # Invalid / placeholder / unauthorized key (401/403). Not + # retryable — surface as a distinct auth failure so callers can + # classify it (issue #329). Log the type only, never the key. + logger.warning( + "rag.embedding_auth_error", + error_type=type(e).__name__, + batch_size=len(texts), + provider="openai", + ) + raise EmbeddingAuthError(f"OpenAI rejected the embedding credentials: {e}") from e + except Exception as e: last_error = e logger.error( @@ -418,6 +450,19 @@ async def embed_texts( f"Ollama model '{self.settings.ollama_embedding_model}' not found. " f"Run: ollama pull {self.settings.ollama_embedding_model}" ) from e + if e.response.status_code in (401, 403): + # Auth rejected (e.g. Ollama behind an authenticating proxy + # with a bad/placeholder credential). Not retryable — + # surface as a distinct auth failure (issue #329). + logger.warning( + "rag.embedding_auth_error", + error_type=type(e).__name__, + status_code=e.response.status_code, + provider="ollama", + ) + raise EmbeddingAuthError( + f"Ollama embedding endpoint rejected the credentials: {e}" + ) from e if e.response.status_code >= 500 and attempt < max_retries: # Server error - retry wait_time = retry_delay * (2**attempt) diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py index e4474fb2..4585c5e6 100644 --- a/app/features/rag/routes.py +++ b/app/features/rag/routes.py @@ -6,9 +6,9 @@ from app.core.config import get_settings from app.core.database import get_db -from app.core.exceptions import DatabaseError +from app.core.exceptions import DatabaseError, EmbeddingProviderAuthError from app.core.logging import get_logger -from app.features.rag.embeddings import EmbeddingError +from app.features.rag.embeddings import EmbeddingAuthError, EmbeddingError from app.features.rag.schemas import ( DeleteResponse, IndexProjectDocsRequest, @@ -110,6 +110,15 @@ async def index_document( detail=str(e), ) from e + except EmbeddingAuthError as e: + logger.warning( + "rag.index_request_auth_failed", + error_type=type(e).__name__, + ) + raise EmbeddingProviderAuthError( + message=f"Embedding provider rejected the credentials: {e}", + ) from e + except EmbeddingError as e: logger.error( "rag.index_request_failed", @@ -195,6 +204,15 @@ async def index_project_docs( return response + except EmbeddingAuthError as e: + logger.warning( + "rag.index_project_docs_request_auth_failed", + error_type=type(e).__name__, + ) + raise EmbeddingProviderAuthError( + message=f"Embedding provider rejected the credentials: {e}", + ) from e + except EmbeddingError as e: logger.error( "rag.index_project_docs_request_failed", @@ -298,6 +316,15 @@ async def retrieve( return response + except EmbeddingAuthError as e: + logger.warning( + "rag.retrieve_request_auth_failed", + error_type=type(e).__name__, + ) + raise EmbeddingProviderAuthError( + message=f"Embedding provider rejected the credentials: {e}", + ) from e + except EmbeddingError as e: logger.error( "rag.retrieve_request_failed", diff --git a/app/features/rag/tests/test_embeddings.py b/app/features/rag/tests/test_embeddings.py index 2eb59b70..2783a3af 100644 --- a/app/features/rag/tests/test_embeddings.py +++ b/app/features/rag/tests/test_embeddings.py @@ -4,8 +4,10 @@ import httpx import pytest +from openai import AuthenticationError from app.features.rag.embeddings import ( + EmbeddingAuthError, EmbeddingError, EmbeddingProvider, EmbeddingService, @@ -153,6 +155,36 @@ async def test_embed_texts_batching(self): assert len(result) == 4 assert mock_client.embeddings.create.call_count == 2 + @pytest.mark.asyncio + async def test_embed_texts_invalid_key_raises_auth_error(self): + """#329 — a 401 from OpenAI surfaces as EmbeddingAuthError, not retried.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.openai_api_key = "sk-placeholder-invalid" + mock_settings.return_value.rag_embedding_model = "text-embedding-3-small" + mock_settings.return_value.rag_embedding_dimension = 1536 + mock_settings.return_value.rag_embedding_batch_size = 100 + + provider = OpenAIEmbeddingProvider() + + auth_error = AuthenticationError( + "Incorrect API key provided", + response=httpx.Response( + 401, + request=httpx.Request("POST", "https://api.openai.com/v1/embeddings"), + ), + body=None, + ) + mock_client = MagicMock() + mock_client.embeddings.create = AsyncMock(side_effect=auth_error) + provider._client = mock_client + + with pytest.raises(EmbeddingAuthError) as exc_info: + await provider.embed_texts(["text"]) + # Subclass of EmbeddingError so existing callers still catch it. + assert isinstance(exc_info.value, EmbeddingError) + # Not retried: a single create() call, no backoff loop. + assert mock_client.embeddings.create.call_count == 1 + @pytest.mark.asyncio async def test_embed_query_returns_single_embedding(self): """Test embed_query returns single embedding.""" @@ -327,6 +359,32 @@ async def test_embed_texts_model_not_found(self): assert "not found" in str(exc_info.value).lower() assert "ollama pull" in str(exc_info.value) + @pytest.mark.asyncio + async def test_embed_texts_auth_rejected_raises_auth_error(self): + """#329 — a 401/403 from the Ollama endpoint surfaces as EmbeddingAuthError.""" + with patch("app.features.rag.embeddings.get_settings") as mock_settings: + mock_settings.return_value.ollama_base_url = "http://localhost:11434" + mock_settings.return_value.ollama_embedding_model = "nomic-embed-text" + mock_settings.return_value.rag_embedding_dimension = 768 + + provider = OllamaEmbeddingProvider() + + mock_response = MagicMock() + mock_response.status_code = 401 + error = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response, + ) + mock_client = MagicMock(spec=httpx.AsyncClient) + mock_client.post = AsyncMock(side_effect=error) + provider._client = mock_client + + with pytest.raises(EmbeddingAuthError) as exc_info: + await provider.embed_texts(["test"]) + assert isinstance(exc_info.value, EmbeddingError) + assert "rejected the credentials" in str(exc_info.value) + @pytest.mark.asyncio async def test_embed_texts_connection_error(self): """Test error handling when Ollama not reachable.""" diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py index f898a9f1..f6efacd2 100644 --- a/app/features/rag/tests/test_routes.py +++ b/app/features/rag/tests/test_routes.py @@ -13,7 +13,7 @@ import pytest from httpx import AsyncClient -from app.features.rag.embeddings import EmbeddingError, EmbeddingService +from app.features.rag.embeddings import EmbeddingAuthError, EmbeddingError, EmbeddingService from app.features.rag.service import RAGService # ============================================================================= @@ -565,3 +565,41 @@ async def test_embedding_failure_returns_502(self, client: AsyncClient, tmp_path response = await client.post("/rag/index/project-docs", json={}) assert response.status_code == 502 + + @pytest.mark.asyncio + async def test_embedding_auth_failure_returns_502_with_marker( + self, client: AsyncClient, tmp_path + ): + """#329 — an embedding auth failure stays 502 but carries the + + machine-readable EMBEDDING_AUTH problem marker so the demo pipeline can + classify it (vs a generic embedding 502) without brittle text matching. + """ + (tmp_path / "docs").mkdir() + (tmp_path / "docs" / "auth-doc.md").write_text( + "# Delta\n\nDelta content.", encoding="utf-8" + ) + mock_service = MagicMock(spec=EmbeddingService) + mock_service.embed_texts = AsyncMock( + side_effect=EmbeddingAuthError("OpenAI rejected the embedding credentials") + ) + + with ( + patch( + "app.features.rag.routes.RAGService", + partial(RAGService, base_dir=str(tmp_path)), + ), + patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ), + ): + response = await client.post("/rag/index/project-docs", json={}) + + # Status stays 502 (public contract stable); body is RFC 7807 with a + # stable type/code an automated consumer can branch on. + assert response.status_code == 502 + body = response.json() + assert body["code"] == "EMBEDDING_AUTH" + assert body["type"].endswith("/embedding-auth") + assert body["status"] == 502 From 6fc5501ee4a4d529291bcb528031dd09de8bc467 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:36:45 +0200 Subject: [PATCH 03/30] fix(api): skip showcase knowledge phase on embedding auth failure (#329) --- app/features/demo/pipeline.py | 74 +++++++++++++++++----- app/features/demo/tests/test_pipeline.py | 79 ++++++++++++++++++++++++ tests/test_e2e_demo.py | 40 +++++++----- 3 files changed, 160 insertions(+), 33 deletions(-) diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index c56ae925..a826117c 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -389,6 +389,28 @@ async def _embedding_provider_reachable(client: _Client) -> tuple[bool, str]: return (False, provider) +# PRP-42 (#329) — the RFC 7807 ``code`` the RAG routes stamp on an +# embedding-provider auth failure (401/403). The probe only checks key +# *presence*, so a placeholder/invalid key passes the probe but the indexing +# call then 502s with this code; the knowledge steps classify it and SKIP +# gracefully instead of hard-failing. Mirrors EmbeddingProviderAuthError in +# app/core/exceptions.py (memory anchor: [[rag-runtime-config-and-corpus-state]]). +_EMBEDDING_AUTH_CODE = "EMBEDDING_AUTH" + + +def _is_embedding_auth_error(exc: _StepError) -> bool: + """True when a _StepError is the embedding-provider auth 502 (#329). + + Classifies on the machine-readable RFC 7807 ``code`` / ``type`` from the + problem+json body — never on brittle ``detail`` text matching. + """ + problem = exc.problem + if problem.get("code") == _EMBEDDING_AUTH_CODE: + return True + type_uri = problem.get("type") + return isinstance(type_uri, str) and type_uri.endswith("/embedding-auth") + + def _select_winner( backtest_results: dict[str, dict[str, float]], ) -> tuple[str, float] | None: @@ -1382,17 +1404,27 @@ async def step_rag_index_subset(ctx: DemoContext, client: _Client) -> StepResult if ctx.embedding_unreachable: return ("skip", "embedding provider unreachable", {}) - body = await client.request( - "rag_index_subset", - "POST", - "/rag/index/project-docs", - json_body={ - "include_docs": True, - "include_prps": False, - "include_root": False, - "path_prefix": "docs/user-guide", - }, - ) + try: + body = await client.request( + "rag_index_subset", + "POST", + "/rag/index/project-docs", + json_body={ + "include_docs": True, + "include_prps": False, + "include_root": False, + "path_prefix": "docs/user-guide", + }, + ) + except _StepError as exc: + # PRP-42 (#329) — the probe only checks key *presence*; a placeholder / + # invalid key passes it but the index call 502s with EMBEDDING_AUTH. + # Treat it like an unreachable provider: SKIP (not FAIL) and mark the + # context so the retrieve probe skips too, without a second 401 round-trip. + if _is_embedding_auth_error(exc): + ctx.embedding_unreachable = True + return ("skip", "embedding provider rejected credentials", {}) + raise results = body.get("results") or [] total_chunks = int(body.get("total_chunks", 0)) failed = int(body.get("failed", 0)) @@ -1428,12 +1460,20 @@ async def step_rag_retrieve_probe(ctx: DemoContext, client: _Client) -> StepResu if ctx.embedding_unreachable: return ("skip", "embedding provider unreachable", {}) - body = await client.request( - "rag_retrieve_probe", - "POST", - "/rag/retrieve", - json_body={"query": "How do I run the demo pipeline?", "top_k": 3}, - ) + try: + body = await client.request( + "rag_retrieve_probe", + "POST", + "/rag/retrieve", + json_body={"query": "How do I run the demo pipeline?", "top_k": 3}, + ) + except _StepError as exc: + # PRP-42 (#329) — same auth-classified graceful skip as the index step, + # in case retrieve is reached with a freshly-rejecting key. + if _is_embedding_auth_error(exc): + ctx.embedding_unreachable = True + return ("skip", "embedding provider rejected credentials", {}) + raise results = body.get("results") or [] if not results: return ( diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index 6e9fd7ea..5be6bffb 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -1468,6 +1468,60 @@ async def test_rag_index_subset_skips_when_provider_unreachable(): assert client.calls == [] +async def test_rag_index_subset_skips_on_embedding_auth_502(): + """#329 — an EMBEDDING_AUTH 502 (invalid/placeholder key) SKIPs, not FAILs. + + The probe only checks key presence, so a bad key reaches the index call and + 502s with the machine-readable EMBEDDING_AUTH marker. The step classifies it + and skips, and marks the context so the retrieve probe skips too. + """ + ctx = _make_showcase_ctx() + assert ctx.embedding_unreachable is False + client = _RecordingClient( + None, + errors={ + ("POST", "/rag/index/project-docs"): pipeline._StepError( + "rag_index_subset", + 502, + { + "type": "/errors/embedding-auth", + "title": "Embedding Auth", + "status": 502, + "code": "EMBEDDING_AUTH", + "detail": "Embedding provider rejected the credentials", + }, + ), + }, + ) + status, detail, _ = await pipeline.step_rag_index_subset(ctx, _as_client(client)) + assert status == "skip" + assert "rejected credentials" in detail + # The call WAS attempted (unlike the unreachable case)... + assert len(client.calls) == 1 + # ...and the context is now marked so the retrieve probe skips too. + assert ctx.embedding_unreachable is True + + +async def test_rag_index_subset_reraises_non_auth_502(): + """#329 — a non-auth 502 (e.g. connection failure) still propagates as FAIL.""" + import pytest + + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + errors={ + ("POST", "/rag/index/project-docs"): pipeline._StepError( + "rag_index_subset", + 502, + {"title": "Bad Gateway", "detail": "Embedding generation failed: timeout"}, + ), + }, + ) + with pytest.raises(pipeline._StepError): + await pipeline.step_rag_index_subset(ctx, _as_client(client)) + assert ctx.embedding_unreachable is False + + async def test_rag_retrieve_probe_happy_path(): """PRP-40 — top hit + similarity score surface on PASS.""" ctx = _make_showcase_ctx() @@ -1521,6 +1575,31 @@ async def test_rag_retrieve_probe_skips_when_provider_unreachable(): assert client.calls == [] +async def test_rag_retrieve_probe_skips_on_embedding_auth_502(): + """#329 — retrieve also classifies an EMBEDDING_AUTH 502 as SKIP, not FAIL.""" + ctx = _make_showcase_ctx() + client = _RecordingClient( + None, + errors={ + ("POST", "/rag/retrieve"): pipeline._StepError( + "rag_retrieve_probe", + 502, + { + "type": "/errors/embedding-auth", + "title": "Embedding Auth", + "status": 502, + "code": "EMBEDDING_AUTH", + "detail": "Embedding provider rejected the credentials", + }, + ), + }, + ) + status, detail, _ = await pipeline.step_rag_retrieve_probe(ctx, _as_client(client)) + assert status == "skip" + assert "rejected credentials" in detail + assert ctx.embedding_unreachable is True + + async def test_run_pipeline_showcase_rich_runs_planning_and_knowledge(monkeypatch, tmp_path): """PRP-40 — end-to-end SHOWCASE_RICH reaches the 5 new steps + greens.""" artifact = tmp_path / "artifacts" / "models" / "model_abc123def456.joblib" diff --git a/tests/test_e2e_demo.py b/tests/test_e2e_demo.py index 31d263d4..ac3a5278 100644 --- a/tests/test_e2e_demo.py +++ b/tests/test_e2e_demo.py @@ -504,23 +504,31 @@ def test_run_demo_showcase_rich_full_epic( f"status={scenario_step['status']!r} detail={scenario_step['detail']!r}" ) - # Any OTHER failed step must be an environment-dependent knowledge-phase step - # (embedding provider unreachable / misconfigured key) -- those skip - # gracefully when the provider is absent (RUNBOOKS 20-22), but a real 401 - # surfaces as a fail locally. Not the #324 cascade. - ENV_DEPENDENT_KNOWLEDGE_STEPS = {"rag_index_subset", "rag_retrieve_probe"} + # ---- PR1 (PRP-42, #329) — knowledge phase must never hard-fail ----------- + # The embedding-provider knowledge steps now SKIP gracefully whether the + # provider is truly unreachable OR rejects an invalid/placeholder key (the + # 401/403 -> EMBEDDING_AUTH classification, RUNBOOKS 20-22). They may pass + # (provider reachable + corpus matches), skip (unreachable / bad key), or + # warn (retrieve indexed but found no hits) -- but they must NOT fail. + KNOWLEDGE_STEPS = {"rag_index_subset", "rag_retrieve_probe"} + for name in KNOWLEDGE_STEPS: + step = by_name.get(name) + if step is not None: + assert step["status"] in {"pass", "skip", "warn"}, ( + f"{name} must skip/warn gracefully on an unreachable/invalid " + f"embedding key (#329), got status={step['status']!r} " + f"detail={step['detail']!r}" + ) + + # No step may hard-fail on showcase_rich now: #324 is fixed and the + # knowledge phase skips instead of 401/502-failing. Any fail is a regression. failed = [s for s in result["steps"] if s["status"] == "fail"] - for step in failed: - assert step["step_name"] in ENV_DEPENDENT_KNOWLEDGE_STEPS, ( - f"unexpected showcase_rich failure (not #324, not env-dependent): " - f"{step['step_name']!r} detail={step['detail']!r}" - ) - # With no env-dependent failures, the per-step statuses and the overall - # status must agree -- the whole pipeline reports pass. - if not failed: - assert result["overall_status"] == "pass", ( - f"no failed steps but overall_status={result['overall_status']!r}" - ) + assert not failed, "unexpected showcase_rich failure(s): " + ", ".join( + f"{s['step_name']!r} (detail={s['detail']!r})" for s in failed + ) + assert result["overall_status"] == "pass", ( + f"no failed steps but overall_status={result['overall_status']!r}" + ) @pytest.mark.integration From 582e62f815564664beba3430b20577b5d6ca44c2 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:36:45 +0200 Subject: [PATCH 04/30] docs(docs): document showcase knowledge auth skip (#329) --- docs/_base/RUNBOOKS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_base/RUNBOOKS.md b/docs/_base/RUNBOOKS.md index a3b5b1ba..4ba53dca 100644 --- a/docs/_base/RUNBOOKS.md +++ b/docs/_base/RUNBOOKS.md @@ -126,6 +126,7 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log 18. **`scenario_simulate_and_save` step fails with `Cannot parse artifact-key from artifact_uri` (PRP-40, `showcase_rich` only)** — FIXED in #324. The cascade had two root causes: `safer_promote_flow` (PRP-39) swapped the `demo-production` alias to a worse-WAPE run whose placeholder `artifact_uri` (`demo/safer-promote-placeholder.joblib`) the `_parse_artifact_key` regex (`r"model_([0-9a-f]+)(?:\.joblib)?$"`) could not match, and `scenario_simulate_and_save` then resolved that corrupted alias. The fix: the planning step now resolves the champion via `ctx.winning_run_id` (recorded by `register`, never touched by the swap) instead of the live alias, and `safer_promote_flow` writes a real-shape parseable `artifact_uri`. The orchestrator also runs an alias-restore safeguard (`_restore_demo_alias_after_failure`) on any mid-run failure so `demo-production` is never left on the worse-WAPE run. If you still hit this on a forked pipeline, the run's `artifact_uri` is irregular: confirm it matches one of the V1 (`demo/{model_type}-model_{KEY}.joblib`) or V2 (`artifacts/models/model_{KEY}.joblib`) shapes via `GET /registry/runs/{run_id}`, re-run the showcase (the next `register` step rewrites the artifact_uri), or extend `_ARTIFACT_KEY_RE` if a new shape is intentional. 19. **`multi_plan_compare` step shows ⚠️ with `holiday-plan save failed: ...; price-cut plan still saved` (PRP-40, `showcase_rich` only)** — the second `POST /scenarios` returned 4xx (most likely 422). The price-cut plan was still saved (partial success — R19), so the run keeps going green. Fix: read the RFC 7807 body in the detail; common causes are a horizon out of range or a malformed `holiday.dates` payload. Re-running the showcase regenerates both plans from scratch. 20. **`embedding_provider_probe` step shows ✅ but `reachable=False` (PRP-40, `showcase_rich` only)** — expected when no embedding provider is configured. The probe always emits PASS so the pipeline still greens; downstream `rag_index_subset` and `rag_retrieve_probe` will emit ⏭️ skip with `detail="embedding provider unreachable"`. Fix only if you want the knowledge phase to run: set `OPENAI_API_KEY` (when `RAG_EMBEDDING_PROVIDER=openai`) or start Ollama on `OLLAMA_BASE_URL` (when `RAG_EMBEDDING_PROVIDER=ollama`), then re-run. + - **Invalid / placeholder key (PRP-42, #329):** the probe only checks key *presence*, so a non-empty-but-invalid key (e.g. the `.env.example` placeholder) reports `reachable=true` and the index call then gets a provider **401/403**. As of #329 the RAG routes classify that auth failure as a machine-readable `EMBEDDING_AUTH` problem (RFC 7807 `type=/errors/embedding-auth`, `code="EMBEDDING_AUTH"`) — the public `/rag/index/project-docs` and `/rag/retrieve` endpoints still return **502**, but `rag_index_subset` / `rag_retrieve_probe` now **⏭️ skip** with `detail="embedding provider rejected credentials"` (instead of hard-failing) and the pipeline still greens. Fix only if you want the knowledge phase to run: set a *valid* key for the configured provider and re-run. 21. **`rag_index_subset` step fails with `path_prefix escapes the project root` (PRP-40, `showcase_rich` only)** — the demo step hard-codes `path_prefix="docs/user-guide"`, so a real-world hit means `RAGService._base_dir` no longer points at the repo root (e.g. a misconfigured container start). Fix: confirm the backend was started from the repo root (or that `RAGService(base_dir=...)` was constructed with the right path); rerun the showcase. The path-traversal guard is load-bearing security — never relax it. 22. **`rag_retrieve_probe` step shows ⚠️ with `no hits — corpus indexed but query did not match` (PRP-40, `showcase_rich` only)** — the 5-file corpus was indexed (the prior step PASSed) but the canned query `"How do I run the demo pipeline?"` returned zero hits. Common cause: the embedding-provider was switched mid-showcase and indexed chunks are now orphaned (memory anchor: `[[rag-runtime-config-and-corpus-state]]`); the pgvector column has one fixed dimension per provider. Fix: stick to one provider, or clear the RAG corpus (`DELETE /rag/sources/{id}` per source) and re-run. 23. **`agent_hitl_flow` step shows ⏭️ with `no API key matching agent_default_model provider` (PRP-41, `showcase_rich` only)** — expected when no LLM key is set for the configured `agent_default_model` provider. Pipeline still goes green. Fix only if you want the HITL phase to run: set `OPENAI_API_KEY` / `ANTHROPIC_API_KEY` / `GOOGLE_API_KEY` to match the provider prefix in `agent_default_model` (e.g. `anthropic:claude-...` → `ANTHROPIC_API_KEY`). From aa7d6927e39ef167d913270efaf591b4294c8edf Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:44:25 +0200 Subject: [PATCH 05/30] refactor(api,rag): centralize embedding-auth marker and share route helper (#329) --- app/core/exceptions.py | 5 +++-- app/core/problem_details.py | 8 ++++++- app/features/demo/pipeline.py | 24 ++++++++++++--------- app/features/rag/routes.py | 39 ++++++++++++++++------------------- 4 files changed, 42 insertions(+), 34 deletions(-) diff --git a/app/core/exceptions.py b/app/core/exceptions.py index 19cff085..1e6279ea 100644 --- a/app/core/exceptions.py +++ b/app/core/exceptions.py @@ -10,6 +10,7 @@ from app.core.logging import get_logger from app.core.problem_details import ( + EMBEDDING_AUTH_CODE, ERROR_TYPES, ProblemDetailResponse, problem_response, @@ -238,7 +239,7 @@ class EmbeddingProviderAuthError(ForecastLabError): mirroring the :class:`UnprocessableEntityError` 422 precedent. """ - error_type_uri: str = ERROR_TYPES["EMBEDDING_AUTH"] + error_type_uri: str = ERROR_TYPES[EMBEDDING_AUTH_CODE] def __init__( self, @@ -247,7 +248,7 @@ def __init__( ) -> None: super().__init__( message=message, - code="EMBEDDING_AUTH", + code=EMBEDDING_AUTH_CODE, status_code=502, details=details, ) diff --git a/app/core/problem_details.py b/app/core/problem_details.py index c789b922..f8bba455 100644 --- a/app/core/problem_details.py +++ b/app/core/problem_details.py @@ -23,6 +23,12 @@ # Base URI for error types (relative URIs for portability) ERROR_TYPE_BASE = "/errors" +# Machine-readable code for an embedding-provider auth failure (#329). Single +# source of truth shared by the producer (EmbeddingProviderAuthError, which +# stamps this as the problem ``code``/``type``) and the consumer (the showcase +# demo pipeline's classifier) so the marker never drifts between the two. +EMBEDDING_AUTH_CODE = "EMBEDDING_AUTH" + ERROR_TYPES = { "NOT_FOUND": f"{ERROR_TYPE_BASE}/not-found", "VALIDATION_ERROR": f"{ERROR_TYPE_BASE}/validation", @@ -36,7 +42,7 @@ "BAD_REQUEST": f"{ERROR_TYPE_BASE}/bad-request", "SERVICE_UNAVAILABLE": f"{ERROR_TYPE_BASE}/service-unavailable", "GATEWAY_TIMEOUT": f"{ERROR_TYPE_BASE}/gateway-timeout", - "EMBEDDING_AUTH": f"{ERROR_TYPE_BASE}/embedding-auth", + EMBEDDING_AUTH_CODE: f"{ERROR_TYPE_BASE}/embedding-auth", } diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index a826117c..3eb64533 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -39,6 +39,7 @@ from app.core.config import get_settings from app.core.logging import get_logger +from app.core.problem_details import EMBEDDING_AUTH_CODE, ERROR_TYPES from app.features.demo.schemas import DemoRunRequest, StepEvent, StepStatus from app.shared.seeder.config import ScenarioPreset @@ -389,26 +390,29 @@ async def _embedding_provider_reachable(client: _Client) -> tuple[bool, str]: return (False, provider) -# PRP-42 (#329) — the RFC 7807 ``code`` the RAG routes stamp on an -# embedding-provider auth failure (401/403). The probe only checks key -# *presence*, so a placeholder/invalid key passes the probe but the indexing -# call then 502s with this code; the knowledge steps classify it and SKIP -# gracefully instead of hard-failing. Mirrors EmbeddingProviderAuthError in -# app/core/exceptions.py (memory anchor: [[rag-runtime-config-and-corpus-state]]). -_EMBEDDING_AUTH_CODE = "EMBEDDING_AUTH" +# PRP-42 (#329) — the RAG routes stamp an embedding-provider auth failure +# (401/403) with the machine-readable EMBEDDING_AUTH code/type. The probe only +# checks key *presence*, so a placeholder/invalid key passes the probe but the +# indexing call then 502s with this marker; the knowledge steps classify it and +# SKIP gracefully instead of hard-failing. Both the code and the type slug come +# from the single source of truth in app/core/problem_details.py (mirrors +# EmbeddingProviderAuthError; memory anchor: [[rag-runtime-config-and-corpus-state]]). +_EMBEDDING_AUTH_TYPE_SLUG = ERROR_TYPES[EMBEDDING_AUTH_CODE].rsplit("/", 1)[-1] def _is_embedding_auth_error(exc: _StepError) -> bool: """True when a _StepError is the embedding-provider auth 502 (#329). Classifies on the machine-readable RFC 7807 ``code`` / ``type`` from the - problem+json body — never on brittle ``detail`` text matching. + problem+json body — never on brittle ``detail`` text matching. The ``type`` + match is lenient (final path segment) so a fully-qualified problem URI + classifies the same as the canonical relative one. """ problem = exc.problem - if problem.get("code") == _EMBEDDING_AUTH_CODE: + if problem.get("code") == EMBEDDING_AUTH_CODE: return True type_uri = problem.get("type") - return isinstance(type_uri, str) and type_uri.endswith("/embedding-auth") + return isinstance(type_uri, str) and type_uri.rsplit("/", 1)[-1] == _EMBEDDING_AUTH_TYPE_SLUG def _select_winner( diff --git a/app/features/rag/routes.py b/app/features/rag/routes.py index 4585c5e6..57d42928 100644 --- a/app/features/rag/routes.py +++ b/app/features/rag/routes.py @@ -26,6 +26,21 @@ router = APIRouter(prefix="/rag", tags=["rag"]) +def _embedding_auth_failure(log_event: str, exc: EmbeddingAuthError) -> EmbeddingProviderAuthError: + """Map a low-level embedding auth failure to the marked 502 (#329). + + Shared by all three RAG routes (``index_document`` / ``index_project_docs`` + / ``retrieve``) so the warning log + RFC 7807 ``EMBEDDING_AUTH`` mapping + never drift between handlers. The caller passes a route-specific + ``log_event`` for tracing and re-raises the returned error ``from`` the + original exception. Logs the exception TYPE only, never a key value. + """ + logger.warning(log_event, error_type=type(exc).__name__) + return EmbeddingProviderAuthError( + message=f"Embedding provider rejected the credentials: {exc}", + ) + + # ============================================================================= # Index Endpoint # ============================================================================= @@ -111,13 +126,7 @@ async def index_document( ) from e except EmbeddingAuthError as e: - logger.warning( - "rag.index_request_auth_failed", - error_type=type(e).__name__, - ) - raise EmbeddingProviderAuthError( - message=f"Embedding provider rejected the credentials: {e}", - ) from e + raise _embedding_auth_failure("rag.index_request_auth_failed", e) from e except EmbeddingError as e: logger.error( @@ -205,13 +214,7 @@ async def index_project_docs( return response except EmbeddingAuthError as e: - logger.warning( - "rag.index_project_docs_request_auth_failed", - error_type=type(e).__name__, - ) - raise EmbeddingProviderAuthError( - message=f"Embedding provider rejected the credentials: {e}", - ) from e + raise _embedding_auth_failure("rag.index_project_docs_request_auth_failed", e) from e except EmbeddingError as e: logger.error( @@ -317,13 +320,7 @@ async def retrieve( return response except EmbeddingAuthError as e: - logger.warning( - "rag.retrieve_request_auth_failed", - error_type=type(e).__name__, - ) - raise EmbeddingProviderAuthError( - message=f"Embedding provider rejected the credentials: {e}", - ) from e + raise _embedding_auth_failure("rag.retrieve_request_auth_failed", e) from e except EmbeddingError as e: logger.error( From cf01a94ec373ee2f4e7a95a933b7e3cea0f31b38 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:44:25 +0200 Subject: [PATCH 06/30] test(api,rag): cover embedding-auth route mapping and type-only classification (#329) --- app/features/demo/tests/test_pipeline.py | 33 +++++++++++++ app/features/rag/tests/test_routes.py | 60 ++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index 5be6bffb..971e1dd7 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -1502,6 +1502,39 @@ async def test_rag_index_subset_skips_on_embedding_auth_502(): assert ctx.embedding_unreachable is True +async def test_rag_index_subset_skips_on_embedding_auth_type_only(): + """#329 — classification by `type` alone (no `code`) still SKIPs gracefully. + + The classifier accepts a problem whose `type` URI's final path segment is + `embedding-auth` even when there is no `code` field — and even when the + `type` is a fully-qualified absolute URI rather than the canonical relative + one. The step must still skip and flag the context. + """ + ctx = _make_showcase_ctx() + assert ctx.embedding_unreachable is False + client = _RecordingClient( + None, + errors={ + ("POST", "/rag/index/project-docs"): pipeline._StepError( + "rag_index_subset", + 502, + { + # No "code" key — only an absolute "type" ending in the slug. + "type": "https://errors.example.com/rag/embedding-auth", + "title": "Embedding Auth", + "status": 502, + "detail": "Embedding provider rejected the credentials", + }, + ), + }, + ) + status, detail, _ = await pipeline.step_rag_index_subset(ctx, _as_client(client)) + assert status == "skip" + assert "rejected credentials" in detail + assert len(client.calls) == 1 + assert ctx.embedding_unreachable is True + + async def test_rag_index_subset_reraises_non_auth_502(): """#329 — a non-auth 502 (e.g. connection failure) still propagates as FAIL.""" import pytest diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py index f6efacd2..202b95d2 100644 --- a/app/features/rag/tests/test_routes.py +++ b/app/features/rag/tests/test_routes.py @@ -168,6 +168,37 @@ async def test_index_file_not_found(self, client: AsyncClient): ) assert response.status_code == 404 + @pytest.mark.asyncio + async def test_index_embedding_auth_failure_returns_502_with_marker(self, client: AsyncClient): + """#329 — /rag/index maps an embedding auth failure to the marked 502. + + Mirrors the /rag/index/project-docs assertion so all three RAG routes + stay aligned on the same RFC 7807 type/code. + """ + mock_service = create_mock_embedding_service() + mock_service.embed_texts = AsyncMock( + side_effect=EmbeddingAuthError("OpenAI rejected the embedding credentials") + ) + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + response = await client.post( + "/rag/index", + json={ + "source_type": "markdown", + "source_path": "test-index-auth-001", + "content": "# Auth\n\nContent that needs embedding.", + }, + ) + + assert response.status_code == 502 + body = response.json() + assert body["code"] == "EMBEDDING_AUTH" + assert body["type"].endswith("/embedding-auth") + assert body["status"] == 502 + # ============================================================================= # Retrieve Endpoint Tests @@ -281,6 +312,35 @@ async def test_retrieve_validates_query(self, client: AsyncClient): ) assert response.status_code == 422 + @pytest.mark.asyncio + async def test_retrieve_embedding_auth_failure_returns_502_with_marker( + self, client: AsyncClient + ): + """#329 — /rag/retrieve maps an embedding auth failure to the marked 502. + + Keeps the retrieve handler aligned with the two index handlers on the + same RFC 7807 type/code. + """ + mock_service = create_mock_embedding_service() + auth_error = EmbeddingAuthError("OpenAI rejected the embedding credentials") + mock_service.embed_query = AsyncMock(side_effect=auth_error) + mock_service.embed_texts = AsyncMock(side_effect=auth_error) + + with patch( + "app.features.rag.service.get_embedding_service", + return_value=mock_service, + ): + response = await client.post( + "/rag/retrieve", + json={"query": "anything", "top_k": 5, "similarity_threshold": 0.0}, + ) + + assert response.status_code == 502 + body = response.json() + assert body["code"] == "EMBEDDING_AUTH" + assert body["type"].endswith("/embedding-auth") + assert body["status"] == 502 + # ============================================================================= # Sources Endpoint Tests From 109cf9b3a7c82efb679f6b74adcf6b600d3b0840 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 21:48:57 +0200 Subject: [PATCH 07/30] test(rag): type embedding-auth mocks as MagicMock to satisfy mypy (#329) --- app/features/rag/tests/test_routes.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/features/rag/tests/test_routes.py b/app/features/rag/tests/test_routes.py index 202b95d2..fa60ac80 100644 --- a/app/features/rag/tests/test_routes.py +++ b/app/features/rag/tests/test_routes.py @@ -175,10 +175,15 @@ async def test_index_embedding_auth_failure_returns_502_with_marker(self, client Mirrors the /rag/index/project-docs assertion so all three RAG routes stay aligned on the same RFC 7807 type/code. """ - mock_service = create_mock_embedding_service() + # MagicMock var (not the EmbeddingService-typed factory return) so mypy + # permits the method assignment — same pattern as + # test_embedding_failure_returns_502. + mock_service = MagicMock(spec=EmbeddingService) mock_service.embed_texts = AsyncMock( side_effect=EmbeddingAuthError("OpenAI rejected the embedding credentials") ) + mock_service.count_tokens = MagicMock(side_effect=lambda text: len(text.split())) + mock_service.truncate_to_tokens = MagicMock(side_effect=lambda text, max_tokens: text) with patch( "app.features.rag.service.get_embedding_service", @@ -321,7 +326,10 @@ async def test_retrieve_embedding_auth_failure_returns_502_with_marker( Keeps the retrieve handler aligned with the two index handlers on the same RFC 7807 type/code. """ - mock_service = create_mock_embedding_service() + # MagicMock var (not the EmbeddingService-typed factory return) so mypy + # permits the method assignment — same pattern as + # test_embedding_failure_returns_502. + mock_service = MagicMock(spec=EmbeddingService) auth_error = EmbeddingAuthError("OpenAI rejected the embedding credentials") mock_service.embed_query = AsyncMock(side_effect=auth_error) mock_service.embed_texts = AsyncMock(side_effect=auth_error) From ff6ebbfa1836231f3a987a37f5fd086a6d14c383 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Sun, 31 May 2026 23:54:36 +0200 Subject: [PATCH 08/30] docs(docs): complete showcase dogfood screenshots (#331) --- docs/user-guide/img/agent-hitl-approve.png | Bin 0 -> 15302 bytes docs/user-guide/img/inspect-artifacts-panel.png | Bin 0 -> 67907 bytes docs/user-guide/img/kpi-strip.png | Bin 0 -> 9599 bytes docs/user-guide/img/ops-snapshot-tiles.png | Bin 0 -> 16725 bytes docs/user-guide/img/run-history-strip.png | Bin 0 -> 11215 bytes docs/user-guide/showcase-walkthrough.md | 10 +++++----- 6 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 docs/user-guide/img/agent-hitl-approve.png create mode 100644 docs/user-guide/img/inspect-artifacts-panel.png create mode 100644 docs/user-guide/img/kpi-strip.png create mode 100644 docs/user-guide/img/ops-snapshot-tiles.png create mode 100644 docs/user-guide/img/run-history-strip.png diff --git a/docs/user-guide/img/agent-hitl-approve.png b/docs/user-guide/img/agent-hitl-approve.png new file mode 100644 index 0000000000000000000000000000000000000000..d0d4bd2a95437becf5bfba685325292606b11462 GIT binary patch literal 15302 zcmch;Rao226DZu4QbAjsVl7(SEm(`YyB2pRxU|LHosi-X++7L;cL?qd!QJ_2`~J^& ze=g2m@x-HQN(mtijuPzF#EAGukzfdQc_CF@(w zR)f4-1Kh)I>5lGvOV|8cVcv3b=n6YNqmQbyl(sgOws85+JdFUJ#>T*sC+_|<%#@Od zh?W)xA-&2|-(Z7ulxxcBi?a$uM4{g&wJ}THYKe(7N8I?!y0~2aiUdxv&(~|XH8yAY zLRsEPg8_rHG5^rp48J3NEc9-cqQEJ5Ha0N*#YO(6jMGWkmKGMHwe zpfWW*`AJQHR4nmEO-;^4f3&r+affPS8@}^4_xSB!($9FiDDf*^Dh=4e7WWT!a@$20 zh{u$4l&CBZd-1tX!S6K7WA|E8!pU@*l|e zcf?ccQ#|xKItw?zbC);}XjieMo?0>D-}J98j&Bu7H{puO-iEPNtrOp&r2i(Kc}Y{@ z*EoN6n%g*eASJ8$^!(rWwxLl^3~k&E5s|Nu7{~txJ4U9IoPluKxV{HcAt7mX#J|Ab zpVI|+o0`@>oftWv_vz6H|48`m?Cd&So^50N`}GtTN(x{T2aCly4zkbsaPLaITlpVI z4?*O0rt*mRJ^K==HT!=;^mgf5W99|`vZW&G-iwOS%s%`a?;2P6H%l3I{D!D8ZXrO4 zXUl6MtNVlY{B_`PLvn0j(PvMc`c=(DODud=4UvmpzB!i?G}mz-DJQE%B4`_F4%e1Z z5B$)NT!luowzvV=mgg;RjS+iPVAk&5oaMzszW%JuX*@mg=_GkPx+ZyZ%`M`}F5~0E zr&3Ax!QNSCmUB+qj%Y_PJx0DR`ZcB^*t+l5EP6Z?ycmV~cRSx7pX@BjF{7sq5V?;A7t)Bc;0)*yz~pbUcBl>=}3NsZ#5?{ zys6&vdMflzZ{sdh4c73lSgqbJtA)cNie#@k%;M&D-Fua|CEPb?&%r?BOb zfqtGEhar6Qgcc1_M+{TA0W8WyFyJ9~NIkmvyn3LxYVOV8}t7$f6 zc0vjrKtJBr{u?@Yk3O+9o38x7=jKQqRX{ zXPi?vi6IGBddob=Ut&&~AWw6xI;e6YWh`u;xIIz!u8}cu)_(0&M3&LdixH;!?_`Yp zDrRA)h%D#g+8lX3O7L>E>TLei#u3=;UL-Dw(U_&jr|rr=!lI~m;S|k@feY4A4yPYw z-A+E3t3Pw7J%5pT`b9!kq_Ixb!h(#+7_!JhHC(}L1;OT|e!~nzLek~*>D%U{Wo(R6 zHL36M>D@Ke5TLwx&3dlZzIb&MSn#Q!zJAH;0R!H?gyC}#PE&A1eFZ87@Gi*>-glc? za+@w5qr*WvH1{jhV!`VAnH*{bMEie=mJo9@U3)huK$%WktFgMqbegP^j$@q15MN-> z-EdbbqF5VM#_pJ>IbByY^RRiU&5k zP}Egt+}CM$yE!j=SDK8Y>X=}Su4UqOs(2%FPP#ck@oDZpy}W$AvD8PPGa1m8w&+=) ztzW-B94O#=4He)u?4TLuQZuSwZQgIWP*t|PYN_>jy9Lf;U>`m^W}$xE^iUyOR7j7e zsL`kx`E4|v@>fT+@kzT5X=~`ez5kZjHOs>6d)Q`dsi#FfBs0u@tj+Js+HR8C7sI${ zkBSuj;Al2&FchAq6~6v8th+hl623z@_?Y|RHO&^6{Pd?!AvgwY6zJd5SjvFcz4+%B zrHsk?U(|<7)Qub&xS1b~0|m-bY&m=A!R2dbU7?h$;F=Yz0$s`elg`>!ucg6@`-gBl z94Xi}du&0awY$J?7AF1PjWGr39yV`|qx*fSD=&rWs=et)(TUjO3GIi$^6gd%o$HJy z5f!<`fEM$5WsH$r+rC2Bz%^PCtx9dqI<G`29)fu>E7l`XJP>qoeR^hsWKvlo0gmP1Uot7rd#-f9h$Z&7k{#J7J@?`60 zR;`C-KGM3%7C%bABo0jAaZedp)zK|knO_Z?suvzwV3me&-uK;*HGcfAbRM!PqDq;y z5KYsRNd~bGe=C-!an~VJFVuEBcfZ}F*B!p4U?G30zgsjc24(Qqyt>>Q&-jRFVVN zM=8a_zSXX}5%|8h_!CGxdvfT8X z^xNK0sr=#Ezkw4CvfA~!Yng{jQ&k6$qM|zGl*a_uw%9k~L{Gmkt)YQCw4lsmfNuJi zQzbW4aASOy>D;NzH1xU-uT{R53edL3BvI5jc=v+hF`Ez1s^?Hu-15jS?wgA^1-jjw z_odLz^J>RJvj=NKSz=o50JhU4e%VGeQq?O74g5DpbSYp}U~3;G(X3@o*o zYIYOMtE_Gn-kP$~35Xs7!HvR_#(9@!tjumY|oa(qf%flGOZf&OBQE?!{vj`N)FiQvLRkd`IW7Ho}* z3rzd~uv_s^rinS)I=uHBv}?@G9Q<{u+FVX==gnezEVks;(q*FDE9Ry$HrMa0o!j2_ z1&lhV=Ur|eF}bv|ZI&-Pf9!6sG{^nBH_@4IycV-X?Fg>ddbc=UbND`?*ku_`kZq1o zDQ8M|22O4?e)uuhIc;-X*e&oT*)lUduHVbXKk_c=ar}@*{VwmGY^}qKeTc!BZ^vbs zi}q%`4z(ne!p|bVjIoAMp|rQ+^sIdDa;se+pzd<9hg7RQ5MtZVm< zMKNBqOLp-~lCA1)k}OqZdPJ(;12{1Te9y}~Z=9WnEkg)2)zdm--uia(ywoE%+!@QO z!9=Jlr1KG%4IzW*@7;H=^pSh|F3^A`b#eun<{dqSo z_LMvwP7i&quOv?gF4H&2{tK3V-A|4vkfa%3{Vx|VWAkfV)XAyq?956W>4o2aG35VU&yb>@r-Ensi!9Nnkd(CMM*`qY z?%qTE2git)R@W@0!FQKENReXxghQ6(NAf7+w`*d+3NJ~p6;tMM!!{PXhQ6&AUg7SH z*s|cOCx%>=w=v}{X?Z??py;#f#>}&FOfmzeGt#we`cZ-6DW@$5(|NmLJa)yY#qUQIOR)69(75fTxQ%=RhQ9J#xF%UH&P_A6iD|$Gi|Ujz=VKZ; zE{sfvfuFoDOoUS|VJloSnS5UvRHgsuUZ$0gPJ1v-l_*AqlkPKovPSmQy&fT$aLe>Acjk#@E{*iarqmB<^#;+evqa9Hy|7wlhD!GEJ+ZsK62x!6k4 z=@8J|ZEy&)xs#9)VZJqFwR{_l)=w#1=G^_9qy?Ze>aOQM#YTeOpVl%HXuqQDUgJk{ zvLZSjm%nXth%l#IJgV#rcrbF{2O%V{?eQ0K*G|~4r9&qsjaJhpL|jjokvrg6rDdok>U3=`w?sU@m8$0o1_3*LN== z9SR|=hBZ0(fqUMt@QjQpHU$pd;Sj0!8;MIQWyL1kkH_(eDwc|Tz#VM|XM1W`nBY_! zDy=phUP#K%PNl?3WT-Wz=M1*w9rb+k)H)ovlgQbp@41TB63hlwvrLi7vqkz7peygx6 z_l?1*MFs&z#?p!>`HX{w3RcN9L}yCq{Qi%1;0a*LR$ zhP(C5U1GmJ4~K-d&I_j<3ZM;oaORl4I*?Fsw|X-}u|9#D+uccmLn~C{Hf_6DUw(W) zK7hK5?~6dWptjOQIHu}-{iP{>ZWXw~>RL&6CiVKlor>&2JhRjO^k8skuVGJ#DiTK$ z5fZ0nX@9dg{h~5H)nL9Diz3l*RU{q-SRD_Xp}Nei^00n0V^7qf0pv z!!vWNmw>cz$LWD=A`NDfr-ls1R`#}Ali1WKtKufim$NANFQEvHCm-0-Et*Mn7End_ z5Zl@i?D8+^JD9ATOOpHmEYc2K!^G?C5(?P1f&z3an|-1dR1;iD3rE*raze%8*2doS z9dQ|uFP>*D+N%v_*EKVn$_@Q}!q_KJ!r@vC;*%B1%FU*%aRICXgd5U%@{WO)9-4{!=mSh zjI6I9vWAFm^3O&)G!=?XuZ3CVqn(FRo9UuAw?ofAI}Lt8Lw9K(ft@UrApgIWL| zN&AH+73{NWd<`ske&c;eDLlU^cgb{s2eQG}DR^A0a$DHUG4+D@)V9jJ6AS4MNJoWd zCP#Xi%2^D;AleGCiJCoji+g4wY>kheHuU_(^%6U=+<@Fk8%Hxg*4k#aeIx0K_Zve| z8{+|B?jkHxpKAJ=fiW2CNcT|O=VWamNL=sc8{P!Qw-`*8j;pImS>sn89<4)lYVg5+ zW@?~ge*f1VGdf0EirDa=3R3D5kZr(XP@gkO=)9R*Wl1=kbQ?4o(5LpaM9h4=z7; zTV1g>JwvdhYU!eUfRQ};FTU2vSh?^GyqiiKM!nUmm+W88PL2nL zT0OH=IhNvzfy36orHv_kZd*@ixSYrfhEP?yURtRZ7YcI`Sp~8wdZQdn@P~s&x--U> z7Am#zNwx!!LLPtAGtTG0k0!*02sQ~K#=5wT{Q6Q|UkWB%yxqn(7e6{KAJZdAD2Ud# zXpJB-0=NxZjP~2_V%Ak*fE@0rRIB+K`ieXu%G!<6c{+G;&8n=8b51hPLXC98AnxWS z$uL8ryMldAMXdDkJWkbi%UxKEARY=CW)1$hO$%_zNS5s34sq8lXtN)8%NU%G)z1rB z(00viN}KhsG^xOjso2vDs2p~x3@`|9Hu3OL{-s<)y@NO06U1tsJ3f{Sp_#(U-mD$g zXavr!Qr(C>7E@$;CW9U&(O+9++w32-el%5c)-_Hg()sji*f6J-pxOR(gWqlDcw$)> z8(;xp^4dhOkbN^guEET_d*xC3Fp#loW318`9c7zK&0tQ~ zJE(?qH`Ty1VlH>P!N}OR1O{dpMf--{oG5c_fbY&GbZUPMT5czND&9yz4{7A@oDR)f50*mBz5-8>I4_jlF5seY zPggT_LP#S{L2&-(WWKD)4TOcY6`MXM=3Y~V>fu$dB2!8pp5k(f8r5Kc;S*WW~ zSL!^(C$BOk`ciZ3ha@7lUB!%vC?TtuQM6fb|N8I(XBa>!KFhCW1+R9a|3WVu2_^uY_hO;e<9*eU0ZV0mWoX zMYfC+-T0Ths!AC}iB$wM1Dlr-IQR+Ulmr`=(@C4nv^lFLEOsCL^w|f~utMO*M$#E% z9m2~1p^gDQ5l0;|W*5~Qjcq{EksBq7ifl!*L?+v+>|vWYRxif%JiVYK|yw)H;RZf5)nDkV|EVWAQuvliAdq$Ml0IhXcUl4wb?J{0g>(9Fr1>ha3RDrkO`Y;2~=b{lg6{ys#4%vzuRRD|()otX^F(^y$@{cH8~}i|_B;n2=qEaBRvB!n(}nmKra4 zR*XvO@xi!y?7_YtBMLLJph6Rz0AU2=IZB6-^`f+=TJ2kIR3RQoWF@Ft>ma; zo>JCe9sBXt9jLO^Qu~@>FssACq$px>M*=Z6B%o<~dqOf5=D~qduO+9&4aCc-*Wkos z&_~lGE`mUrWco(rc_D>c=@)8uG1XKwIxBlbc)Q!YzDi!ZYt1Shp40QzhWV$G_AT3r z24{i6Xz1BO+Eo7f_Nh*!NNoyNIE&W`Hm${ zK#ni#W9D|FplpucT2q1;QJq0ia~B(UqsCwg}TI>ONa74)G7o! zOGBGR0e~02tHy>kI7`DT&5lA}pZ0Z7{DY+}I}2BhuTQNI5!2_}t~8UK|K$P@0)8(* zAlXF`k_Cf`Tjr_IRTHm5>gHY{r9gk%moxEQVm`!Rep)BQ>LGP>NNKuINRc zjLMJE^3XE8VjYuowPk2&a405%0C_c2E|loO>05E^OjMnMBX@DZw&$kKZ-dJfa9@^1 zz}KzH3rb@Q6~;^O0OXM?X~?xb2!((jGPD(z3iL`$a!)l5x4Lah#2r-Q!-B|?5->kA zPB~fd)CCzjI%pMbi&kbOZoVzdh6UIzQ^mjsGk|nfyL}P_5!oS+ZJHwyO z1k=iPj2QU6DUKQ#XuW#E#fAK~{Gc)DlBCTDNStp?Jh`~#A1nt} zazgqf>y*iDVpP{bJTws12>$S%DhDm5X8KDf16gCkanRV_x}SE=sR8Vzz^lOF1G>Q> zT5H7`nz$^;(S7RHtjFN?05_0fZ4=a$QZa$Kb3M2#Q*K(%opPaiD=RYe%R)&-lNYp3 z)vcY4)BC}Py00!Nev0Ytlf)@YrGs}g&305)gYuZUi8XJqHPn#A(H@{yf#sFgROt4Wv5lH)&N04cIZ=`Y=b=^ZOpUFw z*b^{gy1UvzdyjjU7MjK-tF3iaEh6fGNi}1-`uo#A5lfn8i|XQDKrZ1Sm6(csVQIJx zn&=WMih7P!>}Mcup~m=<>|n0zxcoBFS#_m2_f^FWtlEwCua1%716!p!`}1@m64aSA z9Fx9d)7qM<#z``mTmv;~kM(czQkhW(O=h{J*O*d6B1B1s^&9D2jDq?@kwObvB_aO7 zH4Z6?X}8u4$<6b?BiwgmV|!8sZmo;s~32ar&`VxhFeZG9bA(zg6u|cE;66I_PcT<`926 zZf1ln8n5ieGY}sxL)1n=AAU_bflx|BH&paN$E3&6S?}rl{;Y=!PPes?_;ikAZ(gQe z#+bY?c&lX*IPJw$taIm8Lh@@;05}}!SY}#0k0mPWO-ztXN0!?zaKmUJSKNHLW)e|& z`7%05NsI%$OFlpIoOWT3p|8h``HGm}GU}|eitGpTS=J+;E*hGhlFSk7Dx=1ZB3^3K z{{A}E)WP%?>(O1b2ThYn(je<#^G^=;E^yfG$FzqZ4lXW?sYOVz7>UYe7et`9q*`jy zEEj_dn0|;Te~iCtqcfDM7E~vNniOtw$2`UA?5NW`c>gvx`Z^{4 zCtrCqBj{sir%kWHCHKr&i>Cc%-En+5vHON{+S7ml?>E(Yc*akt)Ux`4sXP7S9| zIju}`bJ8QOj~w5^kcTpO@=odZbl;+G=4netpha5bI`)(>&e7vj3K?7ERkzQ|q|}(H znSn1^2YO!+bWAwdX9<3THnnQ)>%N^ai+AAO^j){eDR28t z?^Jo&Yz{!qYfCJ(4duh<e~E}+R$D!0J-2ev3BaM^)e11{&e`dhb?g& zJ<_@{K83o*X_2?vX(V8_b4!U~>XvI!Y2}vTi?xg-h&)w75&-KT#V`PL*vs^!}E-bGI|qyL&KnY-eq)C z@zEAnY2Smj;&iF0iF$LL$5zQr+3UycjSKIsw|aI>QM$UDy?qwhA7jV+|0kF!naqcx z^OGL*SYBGs(ijQ^QR}>`us!BO?;!`y1lH!@1?+hP>-3pds;rZFL7@aKfq?ywgFKlx)v$MWC4X07nhGhT?2B{^6PyE$(NK>Eu&&CeL~@Iq z7WbJLm>9S&O{-8clvo-zO1ozVoS|}i2yms(@#gz6yGis!3`Stm*&1?mEOe~*T+aj0 zNgmb+*{f7kYG&#HjwG`Z7oWjQsK&V4U~swE{F|o@VUBj95fOw|vRF{@yg}rx*%Q?~ z@z)fZiocV1dGLvL5GS7H<&a={!QP}lW};2h&L;@?Se^|_yC?pHaL>9vtn1G4ZM~wD zoW~*=wx?FzK44wc`*qmM!l{P!?!2u!o3uwfD&oO;-Ei;x`|Is=ri!L2sa5>NAA)Fq zo+KrGr*xlGNCC&B{pw@FlPN!)en+_dQ$Lxq;z=17*q_k~_2XAAz0sKxuvScq{oq0q zJR*Q>zOh_?9A51l)J*bpW2S^3{oB?;_ABi3kpGg*_m;Z^{80{1!SGe1sy6$3@3j9O zG(8l)``^r){};WpUcbz}-GtsoK%+-KE+5}ZVi^FyFC(w8w6McuM}e{HY^J+Dbkt^T zqqd6t55s0`(Sl;;9;T^Hv2A3LNIF=c1tdYEkP-yO8ef{?NdQS;Em%jV+0cR#HJS59Pl_t|D6Ytxj#?;&76?K>#<872bL&V{6q> zdE9+a){QAUz;~}K$=E?%t7F`OP1ovr-caA>d2tXm&He&b%MR@Qd0Kac`Xsq4y%dwK z_pd}m5QjEmj>$^J1@LgxKB}N#NtAYp5XRmTOZL}$_C%74c*e+tdvYImwA|L`i=QW+ z!raBE7^xVf=0powNy$o)UC4Fo?+~kh_y!_ZMYQA~+P}BQI9&9~5$-g)jHuXvZ5hTu zaD{bpFRs>-L8n~n`r8x4TPSl-2MLMu9{V*P|1%50d3HCSsh-eSQZ&^eWVwd}2hVqh zo<>HZ)d_+T;|Z)H%{pnZcO{K$+AfyXe0SgWCl!?x7X*p%Oc#f$GD%{WfNYd&K@=!l z*K3jHjSKZ_^`!xOa%kzCXuR$_lB*_`3hVmCdY4WQE-ik4j?NV*>6x555rbVe1|aZ~ zq*0U16yMQ*@yBm(SxA|f13L#POM#+n67M1X%I^bS3w4HKae+IDG4|jz^8v>bRJ4nl z9JP2HaxVw29AQay!t~o4dN|*<3dN3X3DRJbP*C+@MyHpq*P6D4jzu8_y+b>Qpw44U zQVU0=>aqHsURO3I%#s45{@YH1*Kb|kaOUq#T%XujeBszt9hXNdU!im>ecz~TGY~`7 z2OM!bLB&?l9_lD)qN_+&QzVKc2~{6AmRGVZ+36h&f4CgU!t7P_28(b?76h$-yj%Kq z7#4I&p_u1JX#RDtBq&?5y&C5T%b{MrncG6IMamzC&|R;oPAMLvK5+Fe7uO|t``g8= zMQyA6qk|{swOLe@NY?@rHOaozhq2llD;_!M7yhC5%06`Wq{oIXyW;HU(2rv=ej`ky z^GEp~AuIWg>#;$;q%?PIxgThFzojFBg1u;etU(KVKIg~1%g}Rm9pNpZxt0W>ZqaJ0 z)0MP^0#Y~a11(=e(B85iJl7@9;H-$Rbk+TRnlQAc(9AxiBhF79E#4f?j(u{I<>)5T zwSr>Un(u~VG(Q_IYh4;qm-}%GEI=YzJ$rM-%A>sWdSnXvbr&nE1o@JFMN4@~#OL;% z0o}GWxI8+6Ttt{~sE%QatD)4#MQnr@_nhTyq;A}heR2Ti~ zo0ypE`(^CUN7Uuoq-lW!dV297TDw{MGwIb)vfu5iVU|wrlDg3t@9onsZ472PmxSqy zxjdOb4AylxiSj$d=}E39M#|3)HASC;QlivMyC{3_(G>iTJ-mWHX9>qwthxJK5XHZm zVRo_mARO}^KiYus4$5h6igsPo=@m7Sx)?j9U7UnV>_fRP|EyP9Ja1Kn<|NJ5=g>)* zg=cwWG;8Jg!NeQa5xnX4-Z?pTNUmUzpAv=kWxa3UG-{^d7Pc;2>CvVm65^V+NWOO+j2)9WPKsgV1s+*LWKQR6KZ)3G(A`2pxJYxN*R#5p!4lHFrY1=Jxw za4hAmd!*xgn`8kF@`dRt^e%axp5x37^oXC2h7-eZ^(8tvcT;JV^?G9Yul!_h%z4$i z4bHqiC90}oqD5v|d`T#0PgGWuA|9n<|6?fpILdvfQ~Sir$VsDnKyZGoI)_L=t*TCx zLhq@UTKsJpXbUQn+1?q?Ye*TWtcR%svg93}r2aTBaqmVMOTpON(?p{V7@j1v%L}~F zVaGDUvTMrRgw*UFz~kN}Dqt=f$waDxnh1BITdPNUxpFjVs03+F&L|D#K^5k(5}z77 zTtA-`9*>U!QEEn%@#?p)E=?xXYn~rJzL9Tra#*EQk<2$DRHBK-C~0ER@5tpkXko-D z6PF5A*444o`J`SLBIvK%A}b*wWotNp1Fn9hsnTF0Ierf;uagyaY^x~Q6oN+87FlF>2np(fE(rPH#tNV1czyr&F> zDaL%FDLZ$_tblna&!rN%=6*hUpy{Cze_~T@KkCxdh zBI~FA=JJt~&+7bixbDU_ycyC*R9z(w5sB_kSJYq%axujqQYW<}1iGPYnOD;Ayef35!+(vy_7c>Fomcpye9XLmlJyu2K+ zcHGvXve-wqv~hPI`bAZ7`!)6buy3@QN%q8>rdMN8W2==Ox*F_YK#8fUk3;jNCa0gigdhHv? z3JJDfQgoB5&L(%96NL31g$?ToW5Y0j-}DEJ&w&K@UvJ`zZLb=C!lNIMK{|S#@NWSZ zRHyRcc!|Gt4hxS-=E+F@2<-qh5~4V zs&f2oNK^{kiEwlv7GMQiptv5&h-*p2@9o>4yW)nKQ3S|d2nhFnT}TB#d)Y<*`tM8+ zl+N4Y=*()~o|LuQTWPKW4i^YFZ}I624NJcLUX&8J;0svKWBmbH0d!L@k8ldyYdcZ& zNjcE1+8qip)AQC4-XCmbl8ifZ#dnIN(M+-k8#5P}36B!|k|%@VaEEq}I-kbpzAa3t zr7gwD$sQT@GqShhyL;gkPAg17?-hzvB2l!(eLfsmH1x~C|0jL##8*^aB|l>;%cA@u z{kL8SFPdCKklwC1goh_<<3wK!<*A5MoV0+H5N?8s?L*ELZtwgagN3I$n5FfY>JNA# zh)*EFO2dk|bAhJx+BZ|QUQ|M(a$*Pdj1D~sZn_>ro@&Z*V7!6m1_734!4kKa?5dAs zWUL3(TRV16=TuWPa@Oissz`jJO1ja+_%{;am;5lDgl0CJv24W56p!4`0=#sF41tmo zm+8nz=Ey7Z%(yUq$k<{EM5#&{ImxZkE zgba~$sb0~z-~}HhUt)Mg9q;q_00RpmN6k0Qeq$&w)eY>-O63Z(^cth+UDe(Pb^iyc zxt(n54n+m+fpV)G8)0*(O0?e^`8JWM6fH7&np^urpznwV8P2f8x36*-(!=@6ub)Z}J4usq_B$9%nS>;w?$bI+Sp>2>;yd?yB~4o{`2gY1!QD)Wk}s z+5*nU+1g7Rv*>QGV&vQrGA76-+#0qQLU3@DNw=@~KO4lSXwC^P)BI<}} zxyDGM*_%aTf#p`#Zh5HN`t?(0m*DvWO{wZTt?ob!?yKxRR91ti|QVt>9%Z1H;`Sn0Dpzn;aKyB_Gaci8KvT1X-xX}*MCLP#_La|`g<6<@7b zh*U}9XTn;-lN@kL3Q_rPwG#U-pSH&X-KX|*z3xM&liByWMDOcYL}&uaozWKwzKHo| zfoG{o#a*pc)~gqvp9VZ~iw-1;g083MXt|h9i>J~fv(QA#+FVQ@{X{L!=%XE9^Qp#A z9GzgMhtHSo;$agOpKHlopU^YekfXSc%8bnW!gyXl;N0v zJYU^%`jX=8Rla?7es-0^ROV^pgcjk=ow^J#K{3A0CrnOF27)KEQdgD)$NknQaOQT4 zW3X4p_aNS8g(jz@hjQa5_RX#pv-h=rs`A&3lyIIwl$?bxF#v=SYEmVix<;a@&3 z)E9L;kKn~hs_PhP`Dq@}V=XJc6nUHb7wrA}2Yv?wx}#YPS@53UMioD=I>zgBvn$y9__Oi<61Zcu*0yNtsK`U&{sO(DwIyZRzNxzkhc`BzEBC?JzopEWB12(IadNefQ}mKl?%gLQc+n;0)yh zlSQV_=Ayl5MOKnVWlP8k4GeT9V4T(b`_wMLrMS-Gg%J@Dc;su<`dV24t@4dd(O}l7 z4Ql4J+?dq5&`BR%f}OeDuWr9&Vt};N-_SYeDFEyl8IN~y#!Ony_a-!X-=LlDL6lK8qxm4ee2*$ybAcvE*^pO9@X!k<)7kDM%@dOThbXC)X5+VXVqJM zcIIJ=N2VYDK!=$b7sQE8ymp zv`%tUVP&J(3~a#{?C7ejaEvA-r>3VCI5!Y zw|91CW@ObK+sNWwi>^eZ|1Gl2JV_cG3jg4y9l^k-7Qd}1(g0|nB+N>#8K1DcBQ1&V z$Np~z#8ODcQ!@4)n44Xq0Vp)z$epIesR%rr&d+kyHKzxEIYurlEO5I`@vJ-i%ZNRT zOx~=CAlP8X$=&NEB-7F1qxqJ-Dom79Q5kr4CjUsq2LkEH431Tt=&d6`vn66pWNXVS znEvDb2+zzq@zGsumNZt&G5o2d@~PMo;5WoE_PDpVr`#QWCg9P~*x(LmU4!Xvvu&kBI&hD&ZxW_-}u0haxU6k(rfJ8GG3*;o*uJrfP}FAHs< z_U?~j^h*9u^d?5h&Q};<0$Q`PT66J46A29kmuG-~WR4m$gn~*+(6A{!Fr9~^CdW>{ zI@2P8#oABUXWh6#0Dwo5XU`rVIXs;lpAcUCaj_Ae`Atb~nVHtF{UISCA}d@X`0eNa E2Sn5PegFUf literal 0 HcmV?d00001 diff --git a/docs/user-guide/img/inspect-artifacts-panel.png b/docs/user-guide/img/inspect-artifacts-panel.png new file mode 100644 index 0000000000000000000000000000000000000000..f8ed83f90a57ad8ea2c9fd1f097d19156717ba51 GIT binary patch literal 67907 zcmeFZWpEuk(*_vFu^lrr#S}9$GsnyvGc$9{%*@Qp%*^&NGc)5cdwsw6=B@gwcK6>_ zZEdZ7&6#PnTHTtK=8>KgA}cKl3xx&s<;xdXaWNtJFJHcGefa`n_xE7`#4|4r+c!`6yfIj(U>J z%yv-%{Z8drrw2{lWuVs7J_)eCbWAUyOGUyta z@ZYN4vwmqMT`V^x`VE6-=DSA4J0_*=J{0Du@7IS})Ig0A^>|io{HVX;e~fo0FM;@t zVfS|q@6Sn#t!X7fpbc$~ZkJxC*j1;IvjD!oqTO}G7N-^_UfESex9Td`_snXiciuT%&sBTw`sny+p26cbCo-SQcZ%|_Shv03YOy}^h^3`v zEz61dqc_sXZjdGJ)f)M~0)GxIa%Wt&^PD~%+-xh+zNGTQ<*_gXNw&#ULixe;bg6p62`Op=`8lpTFApsQExq|5fov{SP?BUv&*% zU={yfn}J{z|Gl33%Jg6JioyQ>2LAQS|DB5>F?fyNR(DC0qgXXCv?v6oVzbFsE-lUT zn%>~iAZP4vUMHntAuBH&dk=Yf)*Lq@r2h?}i?qlzAL<)c-L#mx=jA&qZz0;r^;CFq z3)Ksh3e^tUfz4y| z4BlG1N?qrpA!a))S%xHo(>(UId}650VhsJqzg?0=)}Ow@Y_*(f zJ*!_Zcl?S@5O^7NjCkRYf8oN4G`UD1Br?^DT#p52cx=qpjL5Miz_c2mL4=lDBs>@G zeiDj6;Fm*P`QQmJga0?FWOl{y_(|jd$Zqh-ZdTe~O9n>L>Le;fK4wqPL;8`!t=njp z(NyLb>6K*T9Bn$fP7Tc4_xeX@GJSF&rjn5RTzs+HxPzB_GlQ&{P%>VDv0WNzDcBv{ zmsvwrcUXJK@mVXtYY+J2!HVlaU}lM&2bIE{$GyQ4miJ;)!K(MmCj^{2((w zI~x#Z?y0qRh~0dyh}iWKl@S4E%gI9sw**sVVtLMBy&M_$#3i>lBT}VhpCA8NqZd^K zTSIG@mkP`KO)SdbrWU^$px<$K*;D{=fT~3(qqa@k|48?Kb=kH*Rq{DO0NF4}HO7Fa zP48n0{Q+Aac2o|ok5W6|&176x5msxo?He&mTBnJ4IOo)!-#m8gld8+sVn5Eb4(=Nf zHprOC1j4#6{+E1RE}z(JHR?09Ny}VD=%{!=?_PuAL)r zv&B)RIcd%Bl_}Di)9!q`J4mX|CZi|3tWuan#hU5ca%Txad8aCk^$W`e*?!o&NZ$8R znj|Frh_vTP6hVK9{k9gwmV#)~Gmg!1h11IU(EP(N-YwJtPsFr|xJ#k&vWns2^~^p~ z3C@;d>*Ckp$zT{7H*g`?56BGs^o`=`P3Q*A2vf-e`PzrGUsZU|=20RZPU|Wt%o!`~ zbd`<5jWXKH``q}?(9)U9>>b9b%~$GscAJT2uD|Ow**lN9@kT=*7)dHQc#RKxfsw3^ z)GyUBaUI6T5wTTBaI_aGS6qB^h$Yv3Lzy2ZJktGT9VylJBloF9raCS=r@hd%b002zPQNv#1v?Obr_ z2h?0dflNfF(%jtYt+NJm!A*Hpt@lgCh1Xlly;H}~uu0=ZYQLvl7t(H%i9hKylsOf^>)MiHvA zP6t2EcgN7SydB17J-pbg%W~~QNxBT=T7Ao{&SC{HR_g`ypO@~&I5p7H%HSJ6EJr8L z(oDx9LiWj6IBO{`c0tEp=28MPlds$kNOBz|=yg1A?A|8EQb>V;-vY{=^pBQ2T^b2J zhlr2m$4N|49bJkNW^Q4XtjzL_i(Hr*2p4jv1?AjTx&-s-kmd()IaaeD$*PMBQcqf+ zZ)xGrA7Zu8w2k%<;c5_j=81Fx3*-a3;uZ`0q2)vbvsc>c@^C`h+xIKJp~XaBcf&wG z-R3hJ8MllYFj6ySc_m#M<&Ei@up}&a5kpkFKz?8GXI*FTLYu+w24SV>Kf&Yjmht|U zVDjza{rQiBgE>V)8h$#pkX{ag$|E_JA_aYBK>wn5FejtwEE4M!pXgMlxq_B6PPnR` z@3dZHj*YhLiC}++ZGi||)aVqijx-ZC3>Qn#(;F-+t5jm(Be5|~C9<|;*Ca4+F8@$W zl!!XX8~0`4muZ-JAtx3G=mL|5*^VJW7gtf><*yEp4zwI`#~V zicsDMi2{gbt&`hM(#Ef;?&2%Qzt8X+O$q3wCsn&>Ud9{tM67yJ7&?3Vrb=_+n3D_- zB|-I1My1NugTc@$H9QYI#K5~~c$<{zGSoaPG*{+uS)GB4A59odR&Peh-;KSJCk6lG z^go9uY98z)~(6D@1D8MsX@V^ zx9N|z6BI${!V-&JAx4$C;RLBy%TS=ZI3ndQd9_c?_I04t%Q1*kwPv&_hXwr#SF#k>Qj2Qd{vd1W>4eqJush3m9gI<`f zX8^}a49HXY#z*BO+$YWEgS(sj%Y2sJR$hD>PMErG!$UBe!OOd?eeB`3G%?y#cZ(Jr##LEgr7y1AY%MC^3Qr1wDynMzboWD;kCy21v!LR@oDW6RH+meY zdhxE2VqE{yq~ju{sijQG2a^DA2RD5({d+vwD3Z;wH?9XfZ3O@?8bt~x{4hoLgz z@K8S3hwZ0Tzx+&eaOqT5Y{O<@AW2b0TCR@}Nsv`~%fbx`7EkRS689O3co%qK+~)M~ zWnFOK$T%~EaV39mEWSX}8)X|Jvmr6smB-$0oQwsA@laWb3Bouj2cl5JB6dOF778eq zQccA$b;&jnC-}9|ZmRn7i=(N-PU$Qrc)zBhLFZMN|JCuELS_^=KRr`Vk1QE+;iDI| z_o_0jKs(W~gNc=R9)cQzUvchW+PuKu1?Z#*$AAhk%J63Lit&-vkBOF1Us%V4#WrhXt*{N)9s&FqPA_h=OE44f8Vqz6X9V$@5 zPMWzZ;k(B6mD;21I@5HIzcfod1})T-p@K|unC+zpw2&#Ox~F&>fm=1@IyPa@a#E!1 zBSuoru=G0_5+6G5 z_#d1TA?+W~;aoF_#HE_o@#r$=cLxVDQqAEF!DYQ(vN57JgnaByM! ztBfIHuV%ZWv2V-HR}MGc6H*+_moFa2oK;tBE%j}AB-?y@C2PK~Yx%Tr8J0qSvzH|? zQSn5Jds0 zcSxnacZs`;8L=&Qwg`ItLpk8EGF}oNdP;bmKn-n-$I92&}O@zqmx0tQ2)Th;d9=FI)Lmd3WX>*_(FFX+7xtX0%i1)&Lsw zjO8Q=v>f)LeJ#9NLA-22nREo41n_5?keQrCbNf8u4<_8uS|@)j1ZFbYa8$dLFlOkT z1OgpR;YF!-($$;pP_EGf>2aBRBxC;*3zta|I^WRM6jlioQa&i_#JTT5bg9BUe(Cl_ z%S-fwBP#)O;?f{(yIyPDbt-Yz^1shi%HE1FcNZWNbay}IQ!ZPJ49)03^QPV~4s;i} zA4QlmYfSDZg=?$i3Tl-HjXk^EE{Hv!&g4MTz|2+JB)e)9`uZ>V9E#7N#mK-1otkZh zRUU>3*B3kiY|ByB(oG30hY0IC6|DQB7MzQdUtlpO==WsrPQc)vQSPFXE6=K*hsqTQ z;5riN9ag<|?4O*=WHApI3prC@-rev@Zv(6JlU5x_`kG6^9ttzoIm?WH#hUG~*a!Nx z6!_dl2=({XYjbwWJa%~xXJ!_>R~S3iLe37ZTQ*nB=457s@8@>Zw0Rm!^)6=o%G#^B zGGk+9k7es4pnw@AFiiO*FNDwD1S_?d+@LYy8r`nom`H%YcR<9BhmkV0)uYEr($f&Ki z>=}`)hdaLqNMrB3q-YBeJ;UMrN3z@GE36{(&t}`JzHS$*#VVM_m;9O4zj#agO}0c@mtM&^ zK1$F|P!>(E^II}Y|Y-uRE@_5WQxMMmZWz85>eah`cqPX4WWFIVl zJd7Ru+PX#eV{-$;ae@{UN4x6lq2a9h)p@8l{-yiX)V}gfeAOdDs9OLf^VwO|!8PQU zFMJab{6qxp9yXU=(m`kFmY({Ig#|AbGS1KxZZyM8+?2B+4p@-fkPHb{1{Q@?!gww< zjVF0AE#V1eYp~NtKRV;EGlP8v)-V8!q{ocozlJc(5P4bc<)cuYn#nGuftp4b(@`H%-FO0FslNqg%dp6S z_WCwAe|_n`kDMw=Rnp_0LKt8!(Jj+#k9!?z})% zJ)ij9)!7;y4MoIDSq>ek7VzGp2whM8rmU7rDr`vm63x9N5QSxBt4<8VWG2>}AnbVl zu-+Br*=OmQ{VHSmm4Kl(YquN2szB3_A|#+dcuo0j$j^G`SW{topWk)--Fm7KqqTVg zwuWx*r{fit|MWY4ZRhgdfV-@YtgB2eoG8=XuZnO1pIeyrWJN4Mg-`}eNwIgM3e=~e zvX=d@ibA<5HD{KNA^6LeZWGjxPTmH3iIlTb64>5^<*m_fOm)qrhA@8nM{U0=%PiIk zBhunxQ32Qad&wO=(ZDyMEH z1X#-J`GjpDA_=0{$#@Aq9zdBcq>5U4B(=sH+^ePsrNX$KeR&lYY;+c&3jB{T(k zjTsR(ejBi_j@1%#w_RBUHO5S&7hJxqo`#sqJD+!{((=b(F9_#e-|{Zk0s`mMWANP> z%Cff!)Vy>1sNlV)M&2&2V-I%L^pBs?)pHt4&T(jUtVQKVnqGiS&B7M(@Wa8E`GH=_ z7C|yS2Q{_zW~7s0d$>$Fk0ugw9W<-wQMn9hTfv641khZ**On%P#L!W{W`=Bkr{8)e^PlTIsANb!fwUIi4Qb^({<;qFMVYp zw*55>hcE7?si>@gZKjSNUvZ9r(Xi1ewG1BO=Pm2WLvl(g?HK28cDRS!ec-jqnqbgz z0i+oc60y4=LTMp}jwweM4PYOQWV&2^GYxoji}0Qu5p?Q6sVqrM_NLr6aMIql%rr<( zxi`2J-nf9;_0l^Yu!FQYFf7cVHFf^JakObic;>5n6bV!VgsrOId78ZLqPnZJoYcdU zaM>UQ7)1gbIA?<}sQ3ZsUm5h7i%T;Q zRi))~P|!#M>wJ%N>#+OtsLekbvb1KFDKxvZa;=qjkfomPRAGGjuo*yfg8XVBzg;U%vdWkmW`u~}JyhEvPYwpM&UZgBG{4jxyG*#73IeqYx_ zN&6<}CMQkAty2#E9M)}q*EwT)^oyNzl*FLu9&@TR|SHMtz1|+T zQNbWZs31I*$o_8(4PiNt+H%Tf40vUv%Fa+wSGoiVK|wOg>RTR1VkaEox#7?xz=00v z6j5f^nQKh2S~5o`ArfH@5Q1i#BYUYRqePYU1jE4={zn1fyLEjXh+q%R9I;RV!Jpfu#<0@qJ1w!n$ zpVvP(z$X#&72*$PG$1`uyC^Ly6d*8<_l?Lp`$423&9JJj zYv*R&i4t1ge&g7a9QOi*Ag8dH_}1a>oo$4)Y>m_bCH?i`NNY7a8mOi|!A=@=8G5$V zG&74+Mcty49LsAyQ(BBbLHi-(^197Z5uvf{Cb&{3B%hU)zWelEIYm9<46V02n`?); zt!>1O%5dE=cD?ztp7V1^FEnkJ@~z3+GG*NL0APDWaMSN*YpzR-IC&fzu9i^=HSvk= zHfxkwT<_(C0hPmqmQ6q+*3_7b4GuJKy+vy>y?Fzz0jc$osPK`+2RB~ArRoae-QT~6 z(Jf*0R?UV`lIqzQgHyBoHRLB}n{1f}OdSQ()DaH{sJRextam%eu0R#QTk_u}wB;1i zFy+XHh%mr3`|tGcGi4+J3=ZnmX0UP3f8wxWTwaS~+7maG({K^3hcn;ar3-5k0y6av z-zyR#5wl!IPedwi8t#|I1dfOYc*XlASeKMN9X__}&W&zud41oLS@KzX25V#}9B1sc zFxP&vMH{tJ=%p0Yo32j|By5cK`g$!&>Qd|WjO=mkA-1w6)0k3}v>S_^vX1*?^8LhS$T{TR}W8nim-)ntLVVQ64;hxj*2oV>s~upJ_gr z9!w#hpyRn_74po|IAAwHTDDaG0v1xFet?N(_fz zX&&-f&{k4gFRWVTq$hD~v5jdlSs11E`lXnVg5y}J-fJfyL6dG|udTVcwRyCw7ax*CsRiB+pw1k3bxY=}MiXfa zs7+a>>gU|r(A>dJJB5I*Mi45T2#58X7D^nDo|Mc>`rC3&LfmvTq&OMWRpbG`e63R{ zEDgn*@L^A=455)=gHv2#sMmGkjL&|jSXyZ4G{xHCG^ymeAOwb1NIG*Phs90I7v`GJ z6I$NU~2fhLeVaPoW=|H*jEGcdITNo?f|HLFDK=sC{c$Chi5Kt zZs~}MRaJ58mZjJ$b)8&{jH7TF-BIQMCM>GoxjvxNV2w$yAn2NS*}_j2bz3Qtv?nKZ z(fMdM=o|ebSFF9r<+mQcqd@Bc1PHc3#1%Cy4F?0<`S*uJv}36!)1>u{O~>w7%``_k z3aca`M!kdznhRQ4r)d2r;g}b1GUl5NN0DEset)rmKT_!Nq*=cXz85=r3MhHdlKx~k zsWiMAT!(wcRXQ%pd+?@wBJS5%!CffZyCjDS)SRQdCl-bNOg%6Y3hv?nRjj_~xk>Zm z?xykHb~i0!Yj4rON|3(!u|6LM&MM{L$9eVy;)k!+D$v~Pkj*;YwbFw0hm{uX`r&Y6 zFMeu9fgEmBGCJNyqNdvS)0;zlfF_{MmWdUg;S%Qyr>tvp=hAIsXb=gx?My>>_U|m7 z_3pPuhC#DNAkNF8BkcHE$67hDRDttL*H%tPX?C)4Kb$&UN{%3Ny{%eJx4dTT)HY@0 zO8p`_icQ2Ck4>H7(UBVa#68P78Fg_I9WHLYP2D!0t;vp&bWHT59TQQB7$rHR-XjDp z&^~3{O>ioF-(*eOwuS#Q+9&Pc{dyWF4?G}(?n%zLAx!ml59#{E+brJ=M}EYF>r!i> za;?tH?A&j2F&29GRV28&I>UPUsZ^%#ADQqj&u7xF9ZrL{YXf!@w(f{cXX_nERa+y?jA6lKm_Qk1P!^77ERGynT z)l*N!6tz6p+G1w^;GjKrhUu?%13Y6O%%>rm`X|89OU2>FoGS*fn;ad|$9tK@P+bk0 zSJ9zUTh3EqX0JyJFH6GXv`a!$Y3)-RaNBu*GdehGJKx=<^x@1VJofJs%rXt_OfAz? zdg7U(?iz{LMaID+0ytnEzUo*T$d>trYEu{=$Pp2ukBcfOlz!{_R@d^`Z3MRTSr{$a!>n!UvPG0a) z@nj@-9hRt};Iz(q8=DL{S4LOo1e#>+2mF?#IuUFIT!YcvtY}5A0W{-_V<@f68LAOup9*-b#d?tnNWpa*a7>C7rM( ztx<=DkBUs<2ae0vDy)?nZ&xQ}JOWza^4-!C3~EFedSC3A7IPSI4l5lPEvU)6Z7WZH zq?toYtVVbywTdJAoL?Hw@*?(A@3SWacMMPA4s|iLwVbkk#5=#loUsL|-;_}yPe;c& zkp1UHoSDVqPjLM;XSR}7cEM9##kJ*Jc?fR}#i6cIda98M4YcOiyuE&0a8lzA&VLEa zY|r7xvmTfeOqhW8YI8iROXdQ(PI0JRa8Yt6Io)lnITJ+^H-;%aKQg1`IoQvDvqs?f z2GJ^mv1cVNq8P}dq`msXG*&FO@xcpTErip>nX~?$^u%di!Xe`4i3HT?HzZ)@`!SrA z;H}^2-d6VB#eKaRuAZeNIvR6svG3sn$D5U$c338K8ow2-N~*G6zIeHz*%TnjI?=-5 z(k%W>fq#|=d#rI~ha=Xo+A#~yN>@o+S>DLjw9iuwY{+;Z$C!&u+y#DZqqWdl*%VA?7+e2p&G(nY@aG(g{h zJFCRpY;3yp05bpb3y~>9IW=bOw4&#I2Jr;^v`b?NSImfd;2@JcDwga^hZ{)9wqE&C zr&m5g%#} zBy#<8I(n67c+S9MjTU7prkwcKd;|1R=+L27>Bxgvm^G?v^dNbL9MR zp<&eQOu^pyYt}ni$fU(ckt>TQRU&MAdDmkt0u5QsfN;;{xLY>NbnqNkclIO$F406< zF=DN}Yb}*TA;O$S2sVW>GR)xKpkO}6w+p)YUO}Lx-}sKQl7YD5c>$}3|4k<@gDH%f zWSlh2mh&6MakLna<{&9cX5_sjzf=4NHB7_VOOr;wZ=o{N#=3eGSAO5AJ&FXf1Ym_Q z-h_Uv3d97cnh(Q7D?L83UVxp`AQZ$q!1Byj@gj)amFV-RT6<|5id7F4qM+tns?*oh-tpvc>j@}N*M}} zj}@G)dt(KEQ8T$;?*D^+TI0gj7Kcb_rD^LJ;PVHh%Bw0PTInumOBd6&YTy|!6~N_t zJBiRl`%#v-p-Q~B(LvjId22M|vulv8crK2DI|kp&bNg2DD&Quv_ds=9K%grz6tg#Y zw=D6Q(c!&LsEkf(d{2xDjdUyeJy_@ofkz|q;NEPi_A&?8TY04%Fc3uI^7KuSikn7z zriM&%dc@+C2OMIl(J?_Ok?mwegT-$2NT$U>MSFCL0XG-}6O@p2wRoI1d3|4LNO>sh{Or35^(eL zBOfm@YJ2$!4aFoLHXGsyz~-yNo=Mr&KS#)KvdjgPP4|M70j|`FuoARhK@jy|jhVGW zT*&5IRrGaQB_8KsjHEbP+D?_lA3{ulku2hiTaJU@cCvCFf-ju%ZJp}St028Lh2=34 zR5wCJ{F`TZ5*I0lo6XZ#765D9qV@L|=gLU)n%Dcq{S2v=u=8tPm%$k32AA31mv;QF z`~}h5CNe{;_Nb->cWg%saN!Rp?Xf4lZN!v~YL_d8sJ2Sj#NgUu=M$byRml<^AAE{TK?`h0Qe)GDhWmqCKW5$8U8SPHHRJOGoM+ar z`f7-AE?(G)`&|lO$?(@J4kYT2m?k=*OFEwoQgh&1Rptplc1Y$bqu-ld!^@tLo~rt7 z;w%Vr?f|TdOq7|arCbct4+*)LoGGybGjc)R%H*Ql2^ME{aZe>-uC1c?M5~Xjv2?LEn4Z`UX$FJIGsNA}vTe#cyCC41Z z#HOA0@JVzI6(B^ShAcU$%cl+VX#4X{bU36~W(pNJ_v(^2Q!0(K=cI6RKNYnYU?wn_ z;^nWzfdE0Otp&(p=`fED+>nqvOKSKwmSsu>q8vRRq#c1`fNbB1aXf1ynl`{1|E%I7soDeHpRk;3fnLpej6uoQyK|8TSi7CWv8f1 zf64B+mv)slvO?4m$fR3d<*y^Y77{>gYLFTp*ws&IYAtPy!dEmR*_&tOee59H57H;m zKhvawotfodsL?{)@`sYar*379oi`11fMhMYaxza7qFP``+c)rFWo6k%1Nv0A5MHWE zG&K4JDO5f6sVmo|PkaNDBfTws>=pH}nh7Y+GRu!jFk{rjxp#TOWT&obtIkITDCQ^J z5Y03>vRWP*7$u4QY`x29E-(!~h4@-oZ$&^QKGY`5j3xwFSR`7f9wl!9img?)rj*+G zwf=Ww8jfxAEk57Upm^P@ty-B34f-@xUVqkFGH{hRk!fFis<`DRElcO0BI`@D74JLI z{dv;U)#fxF8Zq$>4~Qc1dzlc-^b7F{CU8H8uKePs7*&9}g3Xx$f85#DRY4YWO$7ox z>OvC_f}MG3+2eWXP4LONWGZwg{Ij!xdR8dIlVcLtWO(B16?c$6%^F218Epv}Ek&gj z8a%snJ*<}6uIp3D$fKA|<+y}&WrC6~$y)AOR;V{AmU}bpG0lZ!t{D)khsnW~-Xl6I zb-F{ZAha;-WNWd(sb1jzaMrp3i+x4h-8(Rgp~G2-*irs#%p9=vCN>IH0>_9mH4Y*; z9i$dAWHP;YT|Rv=mo$Sn#JeGqnGv^}>J0trWQ%8V9j zSUnJi8uF56rW*Z2*@3A;J=4$K*zD%pD*FPlv;;tg8F+D3L)<)pvS`# zaFFV|)K940#pvZwL`GeuxzaJS;g9W}f6dXHR_;u3tlrjqWFNO1N4vP@?oaC}XQd9J zvLQn+mWaRlTfSvnq`9=d7ES2#d=D2`DH7g(!ALM;qQF6{W~ z?g%y#b1b#8&=2}h3I~WDX%v`;3<0YZVk@LV4EE?Dv_*=u3+_Ke%O^hkjmOubPbk~*RO%qyWRCk`?fy9vQXdIj zdVEH-E5{x2^{+6N+l&b5M;T%#J~35gGdsOK2qEVe4E@7>Em|NBurSV2!)R|bd5Ac* zMzXs!o1nAT*<^5K9M-7}E=p#S9_v+ij%OGd=dB}@=!%|nw3BbZ{#0bbKfb;icEXZa zF!A<91AY0S1U7o}YONtAH9E7+dJG=^N-EW;g|J`k86C-QUra{asy6d9o7YC2yzEcx z{n=7@V=`21opTouJO)FWhXyRDj}-+8@^tIqa!xLLCmcr2{;j+#B4W)|`GJ{nG)caL zA9po7!pD?+dBz=ZRUHUtZtL=0u*sUjQRTi~weFBZDRZN%KsK%6@pmv0*4L4Y*WgD3 zQ>1WuryE#DFV8LVR(`Y8-|jQmALyKremt6 z>OU<2TE3H}JXS?b&zsZBKl-{Of3X1Ys@VRl$PK)OK&DNH+XrKW?v&nX3ZUqI(VTwm@hrg^QIRvkgyivyMTY7u~ z3K0=gY}h)s%1%EP@HA2ql?5Y^sms91dZ&+%lBeRXl^VToIW&rpN zQW*>1fGH~C49?Int~P`$2e)h%5vd09i*oW5-hb!db$@V#DI$VG%L! z0%>gC`<-T%NF z!&@1D;>;(G*aNWc=nxg@!!miRF^w^vvcoSM$a})AkqvQ5k|)owz>sC`NuM0ag3Z~& ziq{`8KKjFh52}QSUfuhCW4SFf=+DN=>9oFb65!IW!)W<^^+4!rcV`!1v+f$_ba14y z02hD?bA<6zc^Io2ZtqF zK$v8F-O;`L+{TE@0qgmvaywGy#p3uMV?%Z_k&nDVKk-aY3^3UKQcGRZ#hPmkZz2PC1IVym@&D~GkiIZ5WF zPqCL-sZvKi{8s+jeQ-Q}_>C4HfH9KMTinmI(<237V_aC%0wWG&qPVMXN4)JH-f()?(F+p+2u<4Tft@uVQgH$Rz>m1W zS5%N}aN?+s?gFPG)!uz>I!3^3o@w5;-xc@?WG{BO6~gv!l+Pb|OA3(IXGx)fF|4vr z@Oa!VOS-0df4<1-&qr{r+pauw^ouWSadC$H)0%qv_8X zh&EC)S=Gn+M%E``Nfs^evj+PAs?i5^$s+>b>sTdZV#y}>qKyaBI=PP4Bb(^|DT@2Q zLZ# z;QwEWViiYyJR)nG!fMm7=>;9js{Jy#K4>J2dg-h%IAv#Ux4aX-S0Bb z=PNBZ@YmMfHS+l3(3}$?GZ9&o_RT|o6X=eF@aQ{NJrTO*GjNq{@V&F`^Dsm(o%g1I zxp`8<$9AcffRFChA^!95y`?!0`|bo99+eU*YDaEa-OuAJnuUcuu<4kh=D%$2MNM8sb8LUy2s+%kqB@>Vt|+j1tG{@s-JW|e+Q0Udq+TSWCE%oz z(s~zAIj`>VJ~35+kYhW^W1#^1d32R_^Hv?#^*pE6b57(%9s9}YJ`Ip;zez7zy#M@! zt+g)Q8+!{j|2^dwkeO=rVFMtuOZ)kG_v{eb{7FG@oY=58o6cXXPVVw*VW+(vqowI3 zGv`I|Y!g3ln3H!qw-ay~4%}tUv}#$1#>Q-9WT*`1t(}0w+4IY*>AO9-_;}dNh*X@{ zAj&)J|Gm+Zb@`#}>I9|$>T~F4!9KF)e^Z?r=eef?qI(#m_4O_)WpYqGSJ7TI!||yr zDXxigHmDvKwvAcs-Vx{>X;Vb-B{RHu?{VnTlHD<X>n*FuQZ`Eb(D_>Dp{!d2SCZ^6^r6l7ib@8zP69Vq7=q!f`q6KhnTOMS zxQCqtJd|%Z*R)n~b@f&=YHmq$jJmLuzu_i?NdtJ zlzOjKMjMSgrOxcTy_Ig6s_%%jjQ!xwYqeFmW_BjA$JUzK0TSYPju%PP%TF&}R3F_BbZFJn*8|rF)0VNgZmzJu!$*((-RAJ+c66jCz#p%<#mi-<^?uy2BBLepHAggM`# z1c;@a)ER&m?61|7qq7N)xZSLVg5FrGE)y~HW!1e6oqcSSQoY*7@VqzVgiFSoyDZsn zir#Ad9BVyHIu|>^R70K1XbAHDkR#{Prapi1c**)GA$yZKs-<4$BJQjm z&=&wcdMplx=sLOX<>^ASc=-118GdZ9ms*E^Dg3{SETb`IicQg@ayojLIFYuyJ2v z^RM*qU0Gc~jK@lKFYnX!R&~^S(cSOxjm##CA#qoyXv+!x%aY{jr~Wc{a)RUPg)!)BdU=6y%`VkA>#<-2)Rpx6y{4uD z>C_`v+s$E?^&+aa#IsQyHfT;|T8YDZOGDU4R}?DKm|kCMawHyg7UnQ-(}88cu$|XaDzep`-xalu>t#YlgN(Du-06jQp*a&m72_9~0=_ArlBj(+{*FGDmNhQrlH~ ziUg-xj?avAfVvPM(p>o{;^IBf*9@^6AO!&VVNeI`;Fm?p5Aiv$JB;uq4V&BFnr_TG zxL8N*cT#DmM$DK&aa&?nG3Bkz)OZPyQ5VCs`UqaJG1G&Bb7d9w4d3F5Eyg)ji6cms zo6DBlk9Zh=yMjS1dZVZXn&*#PE{lm8Vg~@qsvOXoVpH2aGPB1TJxo1@iw>L52D(O} zWQy)dcY|*n$*h~!yAf=^ea+-QFBgZp(ZWqpn<2vzMQ7F$kAS{`4!)?2`A&QJ zH0`~9-77*6Ad>rWB9}~4Hzqgp**j!QUfJnn)|W^G5NV_H z*6BW@9#JvFCj^D6I;^U(IPxwK?%=3(GVKiy{3Ixl(au)q=p@NV@)-i=cPYTy zjIw5JvTV{42Fw!XN9Ng3azVpX{lx+p^qRVw?#5m(4usX#S~Ese{MjZ3BDjaXfX|Yj zoW*M?gbsEml{+8j&(IqEJld5s`sJZ_mFPWva&Mpff&Cbn{pvEB$E%5fqMQGvI zr~aG?k6!tf<2)4{i^K0as~bvUm6E9SuJk9>97-~u)0B+T`U2%TlqM+{VZJ9p82udy zd+V)Af&FV>TT~oD*OgfH2X7Q2wu&Qf>?PgpZ1&aIL~yvjl!~6~z1ktHrq^3C{K`r=)k;I9VVFt|_Bc}k7f;G#m7W#ONuwhc= zBEmfe;rMGK{z&_Ba5y!rP*>st0aSs)d}O|2l7)nH5hazzdgqhHaJuWK@{&a+tf(BlDOQuV+wdD{-|Zx3-^VXuM8ewmL<;wm zG@P6=Ed?|(i)7H5)W`LG#b{RDPA3n}+eDOc$77T%H-zff|SunhCcp%!gD{BBobW61Q^{#{JXtnXu(`>ra@-;pr5V~zjEJX zX);;gkMdN-&Q>R{_<5y3-k)l{z3_p8@$o6B^Vimn;*F+Bjs=Bp^?<|7M*SG8byn&} zmnI?k5lpwn{gtbr86;CU1!Gp)LnCOI zMM!jxlv1vGE2w5CHH|DSJ$Bep5c7M|vt%V|wF@9t%EmuX|3smV{xgiwdZS$_1;s>kPSOe?1GnARe-3Jmy>_eY>{+Afq`D2q(EmIHgvv0_rKPlhVqS`#AuIc46oB}8Q53x%X#RSHJbvs$Mq6n9pPvSxFC7I|-kNorj4u))4ALTQ}dLwSE8 zXz0L7{}8_(-7GY~v0=PqJ(|Ir)XD03QV`I2&RBxNWh$)B1`!GToWoJ=!{$R;z&R1 zKJII~{%EUsGrulktM}_zas{6p9=-U9xM^x6To>j@g+CjILykQWIc;lZM{k^VLt)Xe z0L2r&0Vci8IOfL1_J_$}&?)!)Lsig4TA-ke&v#D(c1X#|kfeq|RB;6+b*3LvKrutP ziAmh8aa(O2nN&8Vp#q!!zQ1w&NY62 zM;%_n;-5cybZ!gkLRLKi2d4?;xdQQ670=H`*;mPcz4BxFu+X;c81cT5!0A2`h29u& zxm(ZMhRB(|vtrCIG3l8+4V(d6YP<+s$9#G4codtRZQ6E7%ty}*GND_n??K4zsq-*F zpC874{E5W;98`*=AvkkSRqARg3(h)PIRW)8Jzw9m>GMFTJ$cMw?h!wlYgiE)x)I!a z)m>$=U#Jt;&m^0-gc(ba*ivlPP#ndO>gHA|iCAuogV}rSU2KG*w7T`fY9!|4P>}MU zu9BU2z9neNcMXnpl9fo0m)nrQuYNw4T^K=c|0|i;Tqg@RWhPID6vATFisK4UyX%{* zX_+6OR84+J?}C~X2VkHP64&*3;ULss11w_71i-g52p1eX`MmQXeP;JCazdCW8t`~c zJy@2pZPs>)!J{MT?3c5)HajlJ+ynM*EkiWNY4~bGJrvX4FNC)utbzAxD>2bYR<7aM zYlA0im%Y5Sc*@JT!j77Nuhd9rD~pWN%24O?gP?-%6nZ$vs1FaLx?K4YTC34x_4qPlQz(@KnnDl*Z;g9i1pgsCmp7Pfb6{bCqFx zfSWT2NWZ1@rSCH}iGIropG1fJc4aQBD0>VMa;_ z17(#4w?2n)|8e1~H}0mCRT@?~ukHlxQTbg&HdY0+qyE{;u=H=29#tSaJy1zIb|*ZA)?jP|$B`-esUL?E3ng z^@M#)Nxc7Ie<_iFGa~(G^op+4j+NI8^fek~(mUj#4JvE#p{yac9Z+;~;vCQq&*xP` zlWVE&Hk$6V!?my2ry?nV%gC;uE#!Khb&;49oe;Q=u{C!HL6tgDR=)bK960im0O0L% z;1{pd%$GMoi<4908DAj!7D=Uzy2Um!AiI&qs~s;zs8vvH zCLap<93pA>%0TD-cY^FMaXnYe zMP`S>@i%n7?Am)F%B#LqY9{>A$BXG8p{!Ty)#TLaF{04OaH)UzBhGx3OvjEvVW|OE zgqx^pi}`VJR;^Oha$@y-@j2Nn+$g%VB|+Fnn2;RMcJ{F$&DU-?vqD^Do0WtRgf>f5 z$#Vor<_gUYd^z4@N)`l^eF=N9Jj{8TXZtb7g&@4ZAuIrClcTXng1EYls+*1~OxV%q zoa8GVrNW^*!2^st1aeCV84uK2Qn38(>5vC+T=3|RW$3Vi+|G*|U&<>jwJ%J(p`ifJ+@ied!p|AJ2k^Y5ecqG|PVK9JFtfhK#+6P4_q8

=+8&s zSW5Tcz7HZ(A3klHD9;eXFl@S{q)r%2eApAK2>iM!N#^b_ow#^gr_i?PeWlpz8555@ zCoCj&ZWmyxgH0mE9$hZ_J`041+Zt-*2X4Z6Z=JJNf^zY(5!s;AI+XZlq>OGY-Dc{_bm?BGME$@XiDG_Roq-fzH`%%aDmx1bL{ckD_$w1f@aH;m zR$j+i+C<`}xc5_JC*e`HSaXM3P+@p@%M+^w#nh_^pW4s5)8>_WTP8%7Ai|xHgD+lY z@R+=`O{!}@4Mr+|{74HB^SPg|NI*cfsaU#;zWCU{hK8q{Pc;eDO_k$8i~P(Q2-e=} zU3a1IXD3c|BxCQH%a)LOWw2^*O~p#YK{OnwjU$V+)^+!xV#G4w8vL;K_*9Ds0U=>A zcQ=ta#q9s?!=G=OQmClL@2UNSo*!x$Q^X@CFHNYefiWYmDNjdW#yOf`V+id3%8r5g zHJxcXR6!dFD&GkZy#5PE$wb5I5hDTGKmW>SrcSK=1n!Fhb3qyhQL{%|-D|I6#i!O- zJ%_ptHH{9;NX~d1NZ;k};S0?znNP}hM@Ybg5d*EeW-k~eZ_|{o=aM|}-6Asnq;|{J@B^79#0da8C|Iq>x=h*k*@rObp)yLo#TMb`%`m-^6 zscpmVqQSxnZU z^!$10b9dK4b()bUPOOP}1&);rm{l8gr*{1z3}wu_WsR}w8f?1gl=>>FXmXU(X{Q$< z@X=kt)SEGrm*=!AS(Zql{5yYje`#Q%gMA(HXKJv`(DEn&Cq|^_R8tM z+3s~Xb1ha*8RRyX*P^u{DJ@XfnO}RJs5z#=v^{Z$U}Asnu3eqC`HEkA@#M&zXTLZQ zx?Bk!RFt>dOYx#fXQfPJ;3c)YaAFha$gm~NIB@*ieTiZGlXiS~W#`bLXQszd?d2>hcVPUJRb^yczlm=M;7;QqG)oE=@72;>JN9$+Tyx=)ir`}UWw{= z`wAuoW5b84!N(D6bX^(W!CT?=T!TV&xs%Gy$Qxxu1KotF#&MXY-4C{!VH2_I?HQdtj%8)DU zUDv9+VJ^Vrr=n{PoB445yawSRg^qNdas`kyr*kt5=(#f3OUbQ)Hcd89wI2o_F;MC?A^r(Pc z-S@QU)G{|%!#~sHrFd5NZRYvAsA}7_w0>uN^_Kh_)-V>ZE)AEV=CP24PMP~<(mP?K zyCK65zq7TJHx=wyaQVG6_`3aDb(rn_KJl13=Q{nhvc-v5`Fs+K0G&dGp$Cj?Kb3a) zUnR!BFo-bkqTkgbFQ}bh6@0w@avzq0eTx1uI zW@~y*M!}h%DgM0XY#{jhO6F@tDIDy%GI2EfCcb@hq1s3KVwr{39dWwFfWbVAi;_^W zv%!SvB&U===9h2`VrO6Iz}Jpwo{{W$Ty9OwvD?7QUPAVHF;#LE_ctAmL14L%Hb9Ih zr!|E_;fDs zD#>crMb<9?NspUs(||i=vNM(B)7=#_ABK2|IG>e-d)ALU?3T9=U7{+f*~!}};;iiM z$@553T+A2OA-Lu^<`lM7IE8VZ?x$#Pjug*n;X=qF@h}4(?T(b`VYDhrBD%>+^+%nl#E<7V)A!C`6FSnM zQ)QT5yQQPia62*a;rJdUPQkmeRibK%Mm#621Ze6}X+E@6FTZeFZ?-_Gb!j6Gv*{=i zHn;xPrDnc!$<9SM*BthQRjY#{2Ze(v*9XI!!*-?MC!$)uP<=+nGr*yo^t1nSUE8Pj z=gr8o0+$L$n}oS9u()M8o;}tVNNg4KKp=^V2koaOrq!FjFf+_*3#&g()kepwbbt-reph>_p z?XN-tVWzsQUX^Q|(7aOnzYr9Bl?Qa_GXe8kBt|Rl>-$lgt!LA-$Y0`ntvUY8Mm!Z>NTF=4g$sSPATmp8eh37GqZ|T-Y>wSg#@@T^=Dq+Tg2IGI-s@CJ zljufll?}X%&6`txic_SdQDIh0u;7t$=0vShW;4Hr4RjmQIS2`lxx1OnsjM53$Blw* z1Q_I3SsPKoIB4NKlYH(m9;Oygb`g@?^32**Pn+{l9oZoww$-;3HiZ{+ z$WlSLp7~(I_|7VEyoOQp$}Js}j%o?pJJaQrs!EMwGs#Z9(Y5%i%h3j;&I)^CP?#Zp zX3|gL6IoEE)(`Tkb1nC)?za2!)vkKmNRC|FQ#^eKzumdL=;k!YVjXrYJ&)mUuo_Li zT#BKjN?YznL@zEVTv4KejvBzk?*p}*A8VR~m>s$2%Mpd8n5myaT&35fLwJ|XQ62P~ z9f?`Ki(iYyqTO>DjMi>hn{8YV^HVR-#%60hu`C=2c0`IqZT zz}jp?$;$KCrY>Uhe$+wREAnGu{SR;i6N6PMKRR#G$LcjTy)AErdzsnv`VxQb_%$g} zRg)uun)GAQ6*1`*FIb-yl!%3YGfMEM4a%;iM7r(N+~F?A%aR*-;_)2rMoRA5X4Aow z=cCb$h8xa2ZToFSzjNBI_xs_n2nV?i9+DT&Q`1o;!x8J1s=EZG3 z8F_rZ3F0C9x@j47U;H~+%SO@E*PU|=Me!LOcjOAq;B9JwNk@Af+;M!*9dr=n>q z*P-^k6HYyD&zCK%imT^>`*vEyH2+U#JEDLdQ)t;M{MGB>7PzpS`7>$y&TT)^t9Ph{ zr=@24O4>pVc&6bFx=Qbzh4-%n493@I>dG}gRpw(qh`T4Jd3u5)&k1LG{gL>d1;sf3M7{JwzQZIq8LV73B0ijNo0h&j z{u-Z7C8ghb8!9TqB7Lm=+pRa^E20U@@R>y(_9vsWR`VKC8yDifm(IiTyyWXoGX{-* z(GS1O@~`H`tBH-z9ewXFP(LxiA@$27`Ys22a0`@Q)3)C`ga7=?V^W`<)6Hg3;HEU` zo?PXT#j>Lp-;d=&tO7;mleJr#cm!Q6xmz@ptkWU5>*ez}i{5S9W0xTICY<6r4OkPamBk=|b zi}FU5+YPvKL;Qxqdq_6cB^m$Nq=34L@+z%pF6fsdU|`!i7jAEkh`1N?GBnyv<1NjK z&!?-^Uu3GD?EHl3`5(twQA0^l{i=J-_pw=;g*o&7uGQpe>~+cPi?ZEzR>kf6(3hvy zq=f1XkDol5w-3LEuu3VPf?Q5>HZ~rkNr%6_?AvnQEDGK*3sPyqBjIIHJy*rJq6>X#nmCPBZjO)f0>y zR&&Qai`%HmH&%ac)vb7MXpw3JnT~iZB7rPLkUQ2_AuaZYpjqa&n-3;$mvP6bi`G#6{AhD)at%9*GA)mU8)=lM%Bx9JqfI|vSPhmbb^cag4Ha)CNulxXNJj7j z>12_lioRP5^vy-RerzvC7Z+xCu@|*6{8$j!<8TM3xifG8A2zo&ZSaJUD!)0@#nEgw zAi!YYv_l8WASz(!LN&^52-fW!S1w$KdD>+gJHcLNoB6zi)-VSi^l5I=g z;98USf_rA2v`OTCQu$1_^HRb?e?Q;$GCm^2%n1f`f#M1?C5ux5+v`qOUv!TQ%OgYV zdq?telke}5(p$a!O_SSb*~CgOZu%525qAyZVy>#G8{Q?2-y{dUJrD=QUa zJ@lrQ-(s?21%-0-z`ip-yPlzlZN;s#IZC5m3=dQB#O0l+&$fhmZg{`X4V>vcYi z3W!YoaTuyM^m02-rH>>>-yK{McZf$j`jHpWtd^dY3^g4L+7<@ZY-2JTkT9`9oK!^? zfwQ-0#p%;48zNSxAlvx#Tq}P<6@D*#mvV{Ue7D?1hiKKjJ%xNEjAXEhceSDP@X*-y z(IF%K!`3>nwmh~%JBFe_dr>DZKCvHt5xsYMEQyihj{*t-U6I=xqS!eTK3mJN+MY^^ zJ8M%zrd91=*Pk1(ca11zZS)Lu?;xX_@_?~g2$I{IPFa|4;5D+`>CntT>;t(g_dq@} zjViq>38<2j?x+Sdl+Q+I^k2v$mJ}rBBJv)jmvWLeaN0EHLitO@iDcQr%R}iO{c|`# zt9LJk4+M#2z`5tr{px`@lxWm_UrX6ozhJIF$T2(wj zW9U3%O7eF-6?MD{8WwcW$eFVdCQXs{3!j}X)|*R-+~3$Io1Rj7zAK!=7jM6*ZZ|uc zts2DicoLe(M-$$(exL)H5fgmquUghvarsOpAq^doXRVCtupHesLs`8ilI;@+)Lf&T zE(f7INQnX0cHg+dvtf|-+rl}Lq8j2&@y2P5S_VRpx``tCsQbE2+YiPQbUDZBRv^W= z&usbPOflar{pC5oNJgQn{q&d=@4Ehu$vjBzF7iw5X>**#(ntK?*0p)5CrZshjTpkc zTgUWB9b&8!EnD2r{2FK9sXV4gn<-`gFb=Qz_3Uz%FC766%at3j_WSa894-F=B%O=! zm4=;Th9Ag_DKkxQG6~+5R|C&|smNDar%@>pj>)%2)J*|!OvmXN_kRrK_ORyG#2ypq z!?9qiLS@VOnwCZWl)52=t4An3IVnprqTp7i@v745Z#x|%X5)Z#3$EKU+pOXm3B{_n z+{;L)5Tw#}`C~Cz3B5Jdt!|^_z!{s&g+`Glgtd)u{co&S;f=0IH{!FTgXUZS{&70 z9}8JzdN7HLPpxS)RH3X@aZ@kEXoF(p9ptz&FPwIu0~k6xKhY z^YO&rz0nqE`YfU=ya!2L&6(RI^McBHzv!>MbE`2V*1U@5@R8zcbpy^-|2X&V5KA+z z%h0SCsn+=hw#DT>ujkLH$=spY^LwJ@pr#g^e%i z+e7en$S%RG3sxO}i@m6=Xb2do2Lp3IT4>Vg;%^z7H#licRei}hip@~X+O0o6t^h$` z9yxJb=3kvp*P|EiqmqlR+Yf{$!$&6wwL7WjG`X%jcxGV2m{)X)_3}Z)rz}Z7z z&oExnY((=s52JRxO!b=l7Rg+!acunBhes$55i68Nyp~?Pfv!d5L|+yXNZEhdqIH z3?i#-Q1kP26}j?xWJ_#LLol8E8h6%oh2T^T-x%TP9SL_=t{}nk)V5)GKo|{mg*H-L zV&Dr@`?@Gg*}^|;k=rS1VW71_dO}%A45&2N)7Q|bT2o^<$IiLS1QB8r+^A-Uqzio{ zLsFn+antvaQtD=a2P=!UTH?`Z6q+{pT5^2lsiKWTXwj`k$ z#29sXorRo!(@0wt)pL+|3c{4_WSd~rYX>V|Ior-s4z-2BtfrQ>ShYR-tl~nk;bC!+ z<-DA_u+%mHdsGwi_BKdgOu}IBg{@cId^?|fFI?wSt|7`6hrI#b9eoaKDlBKeiWLYX z^-xGbMtWJV@;4zjv*HT8T7rFp#8=(E+tR6pX4tW^4<3{lq{xW!ndo>{ZVkk}x{4Ar z&2bfS=|8(*1&Xg?#?I58C1yWn$hziAltA23Gg0I@P}%~)V;Q?rpjuojpruadscMy# zPepFJAYP1wvW`=wRl;b!&E!mRek|0a>ZlPS@#>W9-5;@Dtx0WJx?SsuS+ui76^BCz}JQQ%2Ez2J!Md^oA z7Q)|0Y1Q4wig#R&vvmm>ye-TQA*;H5hXa-8n(c1wkDqK{u=0W^vkJm{kKy48xvJ{y z_!H29zlpjhN0&8AWF|yh0+-u}@P#Z8KKMCVkDFKcws$GVbim8{P zx`LBKCR$KXKqmLOpzws@LNCQBlXOy9osvz4Cx{OAsA=Q~Fo$DxeSM{w9By2g$ZQz@L230j(%SU}MmNQ#B9!4m z6oJ5ZUvqh!*adWxx*kh?{hd}&$##H3^#AOA_wU4{hxG z%E{AKO${Z1&A#j_L@@`?E`O2hbV_EY8+98#NAuC@)x&lJQ9%CPF|C-9;!|7w8rA#I zHij89oX%6m)D;;GUY*`^$+5A!rSF~`{sivVM<2l)iGwsj1*H@SIlVq_HMs>kD!#z#ifsM6gyCxECi@h8Cw+m21?_&{6(9ry)6ceY48SYo;$z z4co`fJ;CRQk0ZKo+CN(H_7F%47dix95sy-ck9uJe=<^8)a)jik!ng@;PYKNlsgHDw zoXS?U?FKgODU`%uL8X;liC6V(^c)_m(8^$UhWCk#ZZB93ajZ8!_1icTd5+F=OkojhMM)aKIiETD7j^4scZFYYfD4fZif)CsyRzQk+or$E z?ylK59b$vXb;@B)+y27Ap={-?PS2e6cY4f-!_;gTzI49+tY$kRQ3R<--WIBMoWYSv zMeh86}%sXbx+?#JJj5mx@sI z1_zNeFI4XWF{n6xO=U;#Q4*pbp>wgF13hDLrzLGr73C!inLyd8DMhqG0Ek4wLli&| zm9nI1)GYhc1Lw4h4KAK!Tn%8o#9XX>Gfrp}BG*K?A8J2@ zCeI4WH<%Yd!wwL)B5Bz5IUJOGksacnb2)I+AUb@jD@>cW08-O-DEgzg6Rh_9gu1lo zna+HMA({M&8K+a;*V}UAE|0|3-Sa_be{K)s@`VluFa22$7!c%gMM?D?xy1MRxh#OS z1f2=;$~}441Fw|=H;rMEc-8c&xU8mj91*2!XzwhnnfOP1gNw+gk4#QC%ZCTp98X5$ z8#jGZU6!3*LU^C_jq}E{xMDf_UBSSkDHto^TZWH49h9dJH#m$R|4z8}!>}nV4PbwZ zy>Su;?YV?z*TuSfufqnlz zRl`}lh})H==T%TLbW3dGY8DEsCd)V4BUUD&u4ISVzYVThd_LEH}IV}!Ab^yl$tE=LgmTt z31%Age6Ra?{1_EmUQAE_4i0lxk~V#>O^2zNk{y570?cTq-DUo2i}m)~sQlNYJaLX}=gZlhw6tX}4pJl<3>aFa2(JKe|-B+%tk z275tli!WNwPY+4kiIHAid<_QXtduAGol12zu)A=fv!)2Y3BeVYe_yto_P9IoUrK8y zL;g5fc=U}8lc_EuU$Tl{y`dcblE@Q)kiVvj399^dVI;MY1>N1T<$;Y0iid@~YGyNUXcyI$it+vQQ--JU8ONVm9u^fz)fpYuK3%6X-IK@8uS z5yINh)fMj=-N?H!Z682iOtVYcbD_PZ|EqB1AM3J-?h^RP&1UB4IxuooYPx{do>`W2 zE$*_yo!JQKYejh{fQ9BzNW&=BP~XOKv&ks>{1!Zj-PO>`*;A2RYg{Kc)jX5x`MkF~ zZZQ3Mcb-#l9kSc{A1wfKKK`=qL>*`5Vgx~gSZReSK4 zMpAt5zP`+eEu2bXfW{VR&Yo&rB$ZEHI#uzk&;*>)pY1Adj>J+?TW1n~{H|X?R%fqa z+igQHi5$)Ey~fhxeXuW{rP*|U>TpT>u(=#^k61%^PJ?eo5ebfrS-~7SMgawD!_i-} z?!xk;`LAaHPIb^Xd(c)|ZKkYTZuz9u zZ1_h01g^Y_<<2L+^u5ey)n=JCrtbnQ1K$EJm@aqec(y)5_NEd`YdwV52#+dC(Jz z3dAJBR?%!Y@s1jzCvP{1v;ZDPeKpX^lY?vKtkEf9)?mSm* zg^PaXN3!Qy#MFu#**9>J!rb)s7(<@l@zYLLrv^1Yr&recR({Q>YV|ag6n{g-0_P9>(XuUsVrIULh4E!n+v;Fx zh@D&AVK9=lj$LdPo1+I?HspMzc_M4Si>PavSRzGyLVG=jiz<&&D>GT=+gtt5!Q4cX z=Qu<3tJ!+o5cBjY!d7HqTi1eZd&RykhW52hBOz>6Yb!Oo0}P~npo+2Q#%0$zz-0W)PFy%9PwacK zudt_Tx8ZBl4-_eQH2Y)8;rsOMhypI0FC2@PRRw6}ym_)$Nb6kd7#}Zs1`r{O31!&T z@b@8w8}P*?A%j+@&Vdgqzq5Ur`?u?1WJwf=B#$@(waY@MC^d^zt)$>6$tuzr4_pO# zI?G$WifpxIDuy*yZ;8-$zWMg-JA^Vgkysq#Z@0Vi9B~{(4FKROMA^`lraXd|C9Py) zU;Es2${4BPg+NtpU>6}DqD!QYpgmA4H7F9n@s~m>Wh<`)zeUkF6$$*27*b%O)(wF) zYwvGQ@pVwImyX)h2-QK#QIR~ya5x0yIrSpBp%s`1BXiVmU03}VE5UegZt~ByiVgV3`GT`1ZYW1D`8v@`)&;a_01e=Tcl4^;ZoG5qmfB zzNq>oB@kpdU!g@*JPsaFcqUu1f>N)B0a?6Cxwp7uv zIse9Y0twgOrdnlAQO);|E`;4y13hdN``<=~^|bTfL(!}Lc*9TZg;05}tw&mCWl{{T7M6&lh?47$f6z!nvLF7OFuBjNUnvvDZxU6}wc z!cjeW^S3YvY!f7uFBM+E@aO=|f=AG`_{Z4q5JN8%)vZHdc>=tXxyPNU?_FL}ScF@X zu+6d!bZM(Ir2alxA1)e{7`bsU(7Ew11Re+|v}+KO4}y2_$(<-*%P=UwJKk_ z`??yn5KQ>#UoTqLe5ZdTH}T=U9lvgL-}3qzHC8&U`0p z$RP2kXEX}@@7KWle7nFdSiNW0P3$kf3S<*^FKtYX9Zc)GNzH6cm1hzrKI6sPSRd;T z5nizssv-xGD;QL{5^uM>J%5$caAhs`TCl*x+A4qPZ2xOodKlum8u%S8F7Wl%c7`P^tXoQ1Of1~V4g6$L>h=yyLj{O=D6hcMfV2hYJlA>KfKS#rfBM0y zl|!3xRcwwzSSFM`=e(NB*}zfVs;OWca}|{r@2zF@$x=g^xzX^OaQBcBK9_LSQ4b+t z>FbK?<+r1Zm3o)rbboMiX>)}-hwoOsp@OsUh-}CA=fq1#zQyWKegd%`5#>b`luWes zyH&`SSbFSRbVbEW9$fX_iX023e~!)1?%Hif8w`Nep|Tjqy$bj`X{Ow-7`V3rn;gx2 z-F_w#iDxBhfx?@-5&$|OQBRkPup~CLORV*8hb_LlzW5S4B3(3xgcmR=T$1d+jTTaz z*rb?4$WOae6K)q^>R)RRx=CF1d0NZ9UpEYpMgE|54Z}jt z_MHwmXc2t<9EE$)bQLU(XHFjzkgds5HNvr4x+8w6LH(<}FOTgGyBBHn!?zE*hJcdZ zOkpx-@OI-Zg4pNB>8cy8mPr&pKLwq6vBn(i$kKk@)`T`3KQ$!j%l`)N{I3?nJo7L| zxm(-7dU6${Ln$v^EKu`Ew*lo4G08u9h1OakjMHU;WT2d4WfsY?H z$h#QihFf{V;gFPnNFF9Nbv=dt@*rV6A;dKg$SOz3s4-joX}!IdmzR&JIxkh|BJ1dn zUFCcc4BD$L9G9d{U8;>()b$alp>qaHy*gdF2^z!(m>%xzzI4(Gt%VNwpE`D{$;@HC zW!x?LeWpA2C!r78@k~S!O>cl(*^Ri8CF0jyxcLy-;ONQklw5Y_e^q-wQwz#x1Cb1m zavOIV>qxA(G1roh_TpBBLZ@M&4KGrf$B^Z!troV?pF8bH$hH}`rpLCOty0wd(&yK- z7yfT=7m3*4#9Q)evg;_{MTR*+Oa2$7RKdPyv%=+N8Pz~oAKmvuZ9sNiY*C4l+eP96)GUY8XQkw!#liJH7b^hxw$_W@s z56wVAaz>7PRt9uw%jN!=!I^PhbnEq&`08tQ_D3(Z{z5R1k6r0mG_^h4;t@b1JYsVq zf>;IXZAp;H@d_EvF2-ka>_hy{S#!Aw`WQDIqUdnh@118pH2i~ z40SvjfW2y*?;bBH-2-tP@k(Yk{?{wNLtd~3oS)h(vlmQV6M!uH|FR$AN1ZKNJX>Z> z-+Rs&UV4hgD)Wt_buyOosBu+Q0 za_Y8gDm#orv&UicdbogA4Cz~ym|X70`f9?fG5n`aU**JAuh+&24cj!2Rs8W>$BUAm zsa<;>Xgw1JIX%l41>B`bzZFt&Q}1SX4_S-xqc~BjV6fB<^I`vAh8AL75n{dEy`R5C zCPM@I4yBzBHiU0?ZppA*A+rYR=PK`F_vVzD@|JFxm{NcUKY?zIM>^!FoAWtlPsS@6 z_bG<@V9W7aPu-rLhv+$NUd^c{n8VXxw)w zQuqPUXR3~<-BEK2|4T^*zt9--mayLpgyaD&kAHfuP#+vLxWYB$70%8~CyxF|(?8|| z8fuxiE6KgauxzGlc5I*CHUQ_&=a|PKz9p^!!f9hoLQa+;=f>90`&{pVCDXtSbGk>L-Y{pN zPho%NX0ed+J@T7x`St5QB?+t&f}K>+dkXBD>mfg+%v$Gj_$5aAkBplZa^fVk^u47qIB@!{wI=IQ7 zw4u|k{wHz8)H(?z;n-VN3|pb;LL}7Sa@_}=?}tmDIjR+QX^wWuI5=UFKnI20YG`2b zF_8~g-V&IR9U04xFpFuw{N5kne|(_LsVjx=O{ zSJ_8GzRmlj@0v?ncFa@INyM#hNu-$8@aWL$vU_!}dCm!gel8Ro78sY^EknB&(bp`h z!gwHhn(0xJE3FPb_Gls!@+PE?6KFc>(kVQ~)FP(0rm3Uf;6&N1&1qVHvLMgXdkIEd zyofJ&{|gTItI9!;#NMD2wuZE1tfACEn1feQ$s`I|E=;0bs)O>Mq^s!5y7V>RS`{-k zG1jZ@{vRDRFzHd2RH1(e7-S@V54#e(KyMW*k9!3}B9Y1_tMv{jU#u}-R6!cO*?MyF zl$*a>X6#+bhg)Iu77J1pv*4bZEuh9a*n*wim&?z$w~=ZgVbzH{@%B2lg`{xU)2Q_f z&QH2}%*1*FqCZCuYHD5yktx)*FvnocYU|Yi*&*#LKd6QUubc5f7N+42lL#MZ)A*F0?1oQ@P{ zk#XEnxZX>iZu^H)R>SASV?#SQ}W+(Q|ak0XA4>VMSc)wrek!TPXGkf=v|PC zfj0OmvfUv4fjU~OEU`Zazv%LiZ^|6M8hPpn^i{=mfm#TKD^rGuSCs6-DG!IjT>&qZ zwr(YbcjiY!!=9k}C<=**^NZT%Ia1)1#X(Fdo#Ye?z&Tz}2X><4)gO@{I04Dvb*B(a z>w`h_oL99fAT>=Ubyg<{0GPxZgH@u=T7|51@h+|XVUi4WG;bA9(*9YylG_2HdBE-J zwV+k)I07Qy$5x900uzKyPk%Nv2WT|m2n+G0&nV=Yrf$#Z^V*JKVON?ggsq5?%akDW zaQcRo`MTsnDZxhd3QG&@+;X~TintCPoKWQTlDLdxeyIGFlir&-iyW$aa*y-Wa&j6!@{zv_WGi#pa z*p_#WzT4qa{9TGSk;!B^4`F>rB^+isZA7kT5P`NS)o2jF2p@*3DrI@EP^wu?WyN6E zcu_Ol)0(bK9CBpIjkws?b6Ed>NPFk#N}9HfH#3(+S^Pkx}>L7(iX6> zvCxyA?x+#sk8e$>xv^1_Hi&rD7j`ixR^XacDkyILDc4Cc&GCR$5hKk&m^i}kMD{q-Vl!wFDJX~9a@I{wl^^Z^&0phUL=W!6cPmS)dZD0GIG?qMWG5qays{9 z=PGp*z#M8*VyL`RH$8b%m(|mgJN>c)=T4FKBy}moL+Lk(ya0H;pd#K{%d2ChCJ;Qt zuEh5(BHiyb9}|#PR;~jxL=jMM#j(4s2ohfo`bF;DxWmhVL1~E)`OHiax8Moq{B;Q~ zRCjn=m~NP;ng$k20U;IsGLj=mKu%6HUV(grvq&0kTkPe>r~FFLAS0K-0v1tU*k$6& zW|YYdhScqsz==JCm~D~e=l*6jqUz#u_Oc+eoN(_}lWp_}W((e5lcRn9QuFlnbV$l5 zicwT$x1vA8sL;>$oa=s;COX~nPmHGzQ<9Y?D4z5o5IHYWO)@q#(6QhF?AEQ%>nT?_ zg3ei^G?J{_`M4)|SMw80MHYuTmP~cuJD&}aew)OlB?><=xGpu%5|OhY81C zEjMP|0J{W_gS@>MkAo^c>Vu4u${Dq)i&*^=AjZs1P2AxTC9GTko@+~yHmBAIYIE#e zph#Diy#z=oDEv&@@pg^Oob)PdR2S9?&lb$RLN-9ZkLRs(Pvue+KC8 zmC1Lj5UTt8s?QO?LngC&U%}8wDIGGuBeNumgD7z1<8Y4(*ZDPKSvlII>f2rLeYlaL z#3P~1NA)up0f&JeE_dKoU12lGAGl8iWTP@TIS&vrOts**{B(RwX39Mej*^oC=_Rzc$h>$3OX(=>AU>s#9X+F9ff(X&_S!=EGIz2ZbHnGNKw?9jMp4rc~N>XYml{0-%}ZSspU zX)pC08*xX7^@OtMxiqyujRf)rOm^uzL$BH6p_G<_#`)p1i3=W!$g2qZF(gEw_(uQO z2oD-AmDq?gvIrxEs*L37p{%Q>jz`?)6_C0=nY%t$54^(Ans?w}$en<1 zgsn=m2?~y$k!(Gd>ICuTTm9{6e1j zd5MMxy8y?iFz)F4=Tcw~)!qoZfBm>xQweRCL!qPX3Oa^q#`m(EdaZw5J<(y!BsZSgsKp%J<=MnnR)3oYft2$TV!> z2X0EkR**@7V``x^0}G;SC%?dY7F-u*KP!=Rx3SSsrUk*?t!dD>jT9lbi$l>CyM6UF z(_`3hrsoHvTXowq?4x4H9*J3H8Y*pLXUi{~BmNdT*c#1# zltYDY=8w;JUPF>DjSngq_fRq&s>+C8aGwrYmRg{1Y-l31Fu@w($yB@_jb?i*+KXy{ zG8emop&+ObbLUe2*8SPB)KHg(=6?rd(^gpY;yOI!m*1snMM7Ls$sO1Zw-DZa z;;==J3Vfv3``!KR0{U9KHPM9bFVs+l&X!_30U?%2Up@p*N3m|qAw#JsDD3_A5HQ(? z-TFHDGc2mHbe98g)d{XmcZ=DhOyuf@M~nwrirO$OVdp3hI#|)((s+G=bD3w0&u2kz zCp_Et`1<_QYbH{n0dWa6am;#cQ$nJzWX$NI3Jhwtr~dYLXUVdBjiNW0-f=?p)UuH5R8Vl{YT1P8u9@ z`>bxk?QeO9jgwm3(2lP030xR&x*s-3uf4-}=4oB(#nU@sV17QRj+tDMP64N-i9deW z^8QeW=J7F6K|n>P%sQq}pc7&Ak0~N*@;&jkK&@wROPa0EPUpc`cJ*IwsZ-Ju5D@H( z=JQSh=JXNM9bHf4#KiaLYDd$TcZ=GV7bVgJrUtj5t>dwhCR`~k zOW`B3n}AFCv1(G>wVJ~oOWDCs~5XU z3akpw!y=%`*g4$`uR&I}(fIHTKXJ7bUhrbhCrX+=JN zGyuSZT3<>tUr{Rdu z>*8>52q*+g>PLzR{l$)X_(%LtVSm2N8+95@@pueYjQmu5<#>-M-+57Ac<^B7S0!$0 z310aC(~Yro^7o|pP*r8y$aB=hzzup?=33kHOjw9BBo3Sg#9iD$@A9VaT@^#(moOFR z>G~QfrABN(pj}KmWsIr}I55vAL`te&JF{E-ij4cx$)w0BoF<9t(@pbz#9qn;r|Ya! zerm>QSWAxzNDEBkyC>z|!drN71PRT}yOfq#q9Y-kma!7uWr}X$!%>&$yH3Xsc{(eE zg85vG6x_f`4-Ka)AIY@VZBdJBt9<7yqdxLbQ@04EF|{5n&0mb+Ni_s@?ckkOyeA6@ z=9b&0Q@9qD!U0dBWx@&((aX=%F|ZEL^r3S~yp#Jo77+3a%s7P-vT3xBuB)1D2!M6y z0S8%SvkZM4>DPamRd5T}TwQ++6quWGiGD4=jRnL-$wg1FEmGRi&wUU-lv4YTeo$e+ z3Ic))cCP*tneBQ+u;SAl&{}Qdu6^T!YCW)@IY+lNKmRZSa2wpQWwgZL8w?kt80YP( z6) zWzg=HNZTprpFHD34B9%nd0_!j`oqtJg_?F$Z}S$n{6L{VI0AQ!FH}rLdNx)MIVsEi z<)v57arZF2EiCNK+~yU-oY-Sc(ywcS2J_zEJ%uSuS_&SaD5k4nU)+#eq+N%++L$Ds z+C|m;L+?*Fk?G8dwyE2Bd&M22#5--Q#guye5$`?=(Az+)J#xEde&Rz&V^URqZiTQ2 zU7}WFI)QGjO`kCv$RNn!Cc_e5Fl4Q?Vu)6;D-AQ8EZu=$VG$gGWs-S+=c-0A?Ruek z=?ELHTiwgsb+-Ihj!?!)8ey8=06nU>)@)3_KmvfN%gw$IQhFw`7Pr+3LoeA77sB_#7Z)j z1H&Qve&}3!rx0}UhE(Oa;O9#5{_Lvu81C-;0QYC@2~bWSh)#LoY;}$a9eV#KrbI&Z zKds=srhqXDfjHfNZX})t`1*x{Wk-7OxAhnlxHX1F;bGxyxT z_4Y-&U0in+-Y}0OaEh=}T?VP>hXhAhO7bc(TJVa>r^M5(jL;g@6IY4pX#{cSl;t+o z0)p$<%3mG|oS>jEW8omYhv8*FC@jHrYT@w@TRld%F(TFJBjC$}9%+F(L$kXmc5r=p;MF znrv`%cIPM1F?ydY%|E)~vJ{YU0Sfg(Sxf(Qu5_~;sbYoM@bE`0`fDANty;edoxLbT zNc}vDZP@<#v)c*{t%;(GP?sgb_RKxNv#poQiGOtc0ed+lTwm$I`Bq^_4R-kFPn)Sy zMOkgjuMV*Q`JH9VEn6{+1$e?aVAF*qp1NEm4@E{O>bqN9i;}&KfgmN{Hb*1AT4@a) z>wc%2_S<#g;0_3Eq+N|_S*bqYS77bYd)ij;Nth{($2qW_KQ)0_vNcXuQQrvI!E?I& zM$Q8PrLzIX!8e3vq4?1Id2!kVDacMGdIJ33U$R( z;)o<`RMTltDlpr1F1SnyMDff{+TPwxxlG8436@^i)amU4VGj~bqF-=k(KJmD*utT7 zQ)r-?$yZi}MyK1xGr z$0dWl?&;4o?(-UskX-zjq?Rbfl-un3!HfMnZ{J!ww|_-g^a(U1f61&A0EaCWww`HP2K; zOEP_OD+;K!e{k|c#?^`w<-!Eb_mmRnhO_Q6dy9-!a57 z37aTmD~Oy2;{e*eWFlV{$~0;TMm>(@W>BZ2XMZWJE{KUn1Z@TYTK9KxivJNz3q+Ws zZ}yX{oT`{WRqZIIAA!MR1fcO&p)1n6l?Vm+tME0@#;*1dCY8lKu|fA>PZOc9sGG3t z9;_LY;?PkKl@Kg7eyi(GUQWq5P8HjRpGhB!zAHT~x=UIDaZ`0wx-MK8_Z2)Eh(KBM zp;P?hyq9)eUnzhH8R!3g@7f_DPulw7JKn%>8d@7!9W_?)eSq0W-&Sw*z(W?W)Gd=> z!j1HA`1QZ22?R$U>h3Z$(NLOZFkz>Y*&9_NEQy)L=^fSULG_6)U?DqJe$|pZ`ZCfOsuy3K@xPM1-`jWrb6mm~)rdAHYFD~HXVxFJ-sJ6n^c3LF z?B_pn$$xzOACvE2l`{X8C=Sj2#SVa?h~j+w0&;Q=B=(H#haz7FB<01y{sl9c>}iRJ zD$Ob5wKzc{1YkqX(sZezum4YM$n5tyqL5yMFP$Sf{8Upo+t5JWn5|}jzBmMhhY_L! za7KcmxmsV)M1ki-BIXBOy#Ob-a)u>2obj&)3`M7-@Bc)yV7fRx&}AEURlN0$ic~a5 zMF2UWzb;mwlFNGc2m`^*#YCA>C z{MsOZ#!&sMF=2SE$Zz(0%bw_%p?ar^_oJrHZx@Zi2Ui)M?{{?zmpos*FyO@Bofc_a zE(>$Er5Jj={2}caB>?mo^MBK0t}o-C$^4kD7WuLH2wH`}7UtHy`FV z^yNQ*8O+f?XIH>={r5EmJPH50)?R9V*dzS;sSfD30m7fflk3K!SH38|v2mp6&-Y*;8Q|yv&rP`&d8(~zoJ91aqp8}dOI|17yD$H|*r!Fl zEcXDTk3r1BSFi{i&eKXpy6-@RRmVc<=d9-LXUey)tB*I(6jK| zKEWK8Un03WM>?l{*Pkoj(lgBuO8}!U0mPRDZSB9-K)wyxQH(Mlk2ez>ju-0R?}2E+ zHDVV&94;n4-Ym%=`2j~j{_6-GKSx{un`hk|9s_d{4u}>ORP(?hh z{86>uKA4G|2yoBv`|z2Zz`^Q^0-8Myp#J=$*|(SHKZ&#%X4c8)r}2K@h|3sy8rA=I zRp{9P&sXq@S^x(KtBU_M``iEilfIV!5t0C<)BLwp+h614|Mwv4LWKJb&)MH0eRX$? zg8MYN0R(?N>r+tknhCbPxngSj^w{uU;G6h2fUodII}IY~Cdeph1JCS%g0 z{p+rmJQKl}^!o8^T1{&J{x`XYUpf?E-$CE5^4q=RMRPSl&~R+%$zXq{40xOorZV<$ z;pe*$;wUL72~(z_8z-vm9utPj4Q%iYJrTA}+5W2+A8orA^NmR9Ke(1oat;YRHmEPhX<`&{Ti(&1 zJU&joLSnwt@(2iLc=`**ZJHeJg13dw+iyUVp$jvtVqFs)!xaQq&$q8B4xPX(H}J5} zEY9s4OI+75<6cx#dsb#w){s!L@9S~HVu)+Tl3YgyMtW`Fxh-}k%#}5vgWdOQn<8Xe z$H$oEOVuD-6ZGiPD&N4z=~%D~4NZG_f5!#vGYL1pxpBf|i4P_DaDPv3E6~fU&3u)R zxJ^P`GCH!g&d56VUK^xr&Mo+Ua8T;0t~Bo2AIno=qoq)Nap4p(Z#wS4G-569fIk(h zX;Zybgw-3;Q9U7QxFeb^;h(~Uwq}JJ6T&!H_O_F*PkQR5h$`DN7XucdX(Cg?IX`5% z8|)m$-{{v$pO=l{VgRjxVSCQ8|Fuqo$1~@|f`W=oTP7@W$WFVb+Wdn0jqzPK&8t|t zAP|#nNqymzt04PW)NonC9`AiDu{YB=6aKK2t;Nt#U>A08qP;>u?TtA6o;n@`8IYyq z%ZY^d6U&i3V#K;sDAvUcNO|ka^496c{16*mFWa(a3!D6Y#{7c-wlVu?`*EdXPElxr zrMJT#0<#g;`d;Jm%@eX_!o4AJYH6@<4_beDy>#TG&CO(N!|3ZR2?g8xm8-gxCYfp0 zk4;_uzheR4ZpmMpDT_p_2h9exk>)6?Gmvr>C_Tr( zp{lML##exqHh&%7aj@91C2LblJ7&UgFs(j5Isd@g;$AHKm?KA?o2*IM7I(J|FTlp( zYW{zR)+irB+L*+~nZ`EoWl!kkW%PVR-#dww5LQnuILC|ydc5hsR5!C5Tz)&M*Vm%y zEiAnQ@N3#RN!K#}Mc8>D+6uS&^e~HH%4u`rD8Zh;tcrO+`F00pp`3BMV)`MUj(bye zurgoHVa32dhx3xO>qvH#dTTU+r8PPmDHYYNdC`HC2CcqLZ*9|AB$QopwCUox&`=iC zZiV{ciAxzN%^N#jZ=>c(Z&A~Mc)rS%a@xE*Vk>CqC_a!BwyxE9zZg~yI*~a*n6h#G zQ2CMi+CWPOqm(0ANRyA%7+y!ESS@`)xUj6_A<^~)=V&~nY< z(Ad($)bfs`hyp6xN7)r338fTQBm$E*7dlF#RdrocR2^jdm+F*H?KCaGK_Y0DXWj#i zUBf3)lL)%~<_QKP&O;aPByH{}TiuG@w_&^-p#Ck%}Oez8XHLUE)4gvD< z!dA39?Gd@8#9>3&qQq@#2ieEHGSURQrD>l#6mU4xu3tIeBfd602D}L`r!mS9j36@b z>=UZ0GqrYG33n4D`G2;;0+*YdJPQReTAY)>6gCLN%RbR`CuTie#$6VCQ=Mh?Z6w3OV zemkJn=34mt!8q=&Kgh>XY{p#^HLxyEmvxgzJg3(egS{?@^j>X=6FzZ;;~J#OkhV7^ z0Mg@j{ak90`%GmXOT?2B%-Nq*ri0cf}mk!hDrgaxnllu^^_btR^E~VBYa| zRfK#0d`AoReD6Y2M`@is45pVZ*Hnfa8G!ZYls+ z_Z}}&3}61T$t&L0{D*Z?V92(+q34-GG`v%=G)v(q5^!vAMJlSCO9bx(lzH?3UjSiV zs<*8^pCfx+4G9NW0Qt}TR#8w9V73aCnYv67wOLtXeoRc`@wJ!rY56&Y8&z9QiI8dr zmItArR3@CSp}d9co`(scQ72V~m^2kUSuqqi9}6l|Vs9wWlgu?(ar z4Em7Dq!t@qGoO{+1z^Q#aa29vJ%##ngb2>Ze5tc;MGYO;61|-wYm=d%jqcFBKXJQ?$ zf$d=_#o>1I)gme$vb%?RR<|qeN|{+a4-|#E7!ey6z2T>#5zJS@ z;RC2*@(B?k6DV$jz zoln6b=T;3;mlfnuBI&qaCg2M34#E-|BF(=iE02hdrsvbbCbY^7bf7Cw_H>az>P*2C zjor7()bU9F>FNkf>gCLp3$C)h8ejIvsx9I&$!WxB{y zd21u1<+Ab^dK1l zu%C|}z3pGT$E*|$1cVM8=V5Pj#w`qziHPr0%>ue~6x&)NWidDxo#6D-a`u@>IJcj- zTC6@ODKXuCGl%*Q1bMfQO$(UKvZ|#lbi<7Le7br4uX4qy~-|Jx}39SWCL z?I-X1T#7g|6oZ(v7(-66E-tJQtR4T6VgX;B^$DuQAd%FhuW`83UPmyQCKzeE2j_Y7 zZ9B;yFr^e@7q`ql${WT2vuv+`b`9!~&oWLfUA&6;Mb%1((gMR9Q%ySY_sya!wT)Zb=vYQ}WF z0Q<5paLZyi6Yj+R%Jw>$G13&)+vkIC)(Zcj z=-kT~_ys-Ag0w^+t(Fd|3AdHoM7+fkm))cYW0hS9PjPXL!E#W?W3jt7y&qH@Ohx(k zP3Nk|HAHu?xm~98oR|etyws`rX(d@hX45FIx4OAS)wX`S45V(gZ@HX37JWpu0H+)K z;e~V<@;qMe9jqSb*GnJhdW00v$Oi2KWZN-ZUi$h30JEeT%`HG+vI{~V?9ev`p5Gi@ zHC9LyrLi%(Vo`8l4mfYWzr#oAN1#|hyL18UNS`F9Q9UfDp;jCEGZxuvNXyxa&UsVz zG4Qtf1_wjh4(2W)5acKWOI{Tn-pbVPM1M6lDN7PnbhQe$%Ue$p^oM3jax{3bRxG&3 zHXgFRBvJ*pIx}3@tWU%pWi-zS&NUU-JhEPVG1xBA0AO}#x>K$OEN?X=i)Vh7onj~9w3>T32X%|&Ps`A~4J^(XZ{m!7!Lb0z zbW%!&6ZnQcbpmctVXLAxT9a4SDIa_e@h=96OX+8MW#3bc2D^10ZjCekRpUaG_LBw+ zH9p3+S-)l9hF>=}9q0-~yQvV@cGl3Za_qX&`fr3*Ss?9AG1Tr#7mLm0GRyV2r6TxW zho$;7-2xNa1Ef7}G*^>qS~ju+El6_jJx}?Z>9{wuD75qB2luv&^1iJpTd=zwYwV5L zZdnAQCVLt{Utz>uS^DXHl|9ng02dgz1b7<`-wW2+JK{mIEj_uxBdoGI)&Wd9T06b$M*)$PD-ieX*;sq<#H#cDQ8|r?}8`N@%m0ZT6YFlN)Dbp{> zm7E>7$S%>9Nil99%4MWYQgxN~8RZcI$mFQ{wxF&XEVQ0%4J{g2o56!J*6bf{F9yuA zOW&y%tm(45&@zv+c>%_uL}fOZ5~|fYFXQ$eYwqf8JvNcL9iHV1*s_fxvHLk0AFfXa z8MHf5n7MWlVz(nmv7f~HlxJ*d+A_AjQiNmAb1)R(6gH-(XikezvqhZj^)u`*QrOLc z-@v-pQq{va(%=BdHN{e@TIPOc>Ba|)_z0N@+p6>eP>q&HTE5!Uop=7r@?ZprgV{+6${$-Kp%1(%U;bvSUdZ^w+I zH21A4jm}u(^8n~WZ-`J9DH(_U;3e+{({o4iBg8@ zVmn>q080x9=kD~Aaw>(%yPw)PqEvv+nVpL!-cdPhSG?PWKl-_@jl~)#Zikw@!zsQ89<5NH?-_I<06>%cS;zfp!GOo3Z>qi0Hr|&FZJeY7VEK<`} zkd~+?mwc(PwVYIo6rE=BEuXKdPm&TFA9t1(7$RDg`@70U(4?X`@9F;LA{CH0mu|oz zunJdc@zvdcgX=JERmCw3&_ZIh#!E0{PL38NleK9la!gcl*_;Up!EdCcyXVDoBIpLH z{2dEm8tsWqn5$#)sAF_pmW=7gcMq;vYkXV|U>(Q(`Xul}-uz(rrX?n=i+rg@r5HiL zD?js>?Clu|y3wF~U3M~$2*l31P8S?aq{;I+H$#x_K=;rzUU@7n5@#0`OC4=zT&fu< zMwg&K{)O-9Z4*-LG9S_z;NM8@{bBhkPMToFGrnJivMz+o)>isME>BE_)8_LCWx_G! z{S`t;{j$F;UCc(alklPPwIp#7sq0~JtT(pGsb$kyzOLEZ8n zJyFaKL={w%$&`rB0JJ1R0*thJ2C6}Dji~QrrvjiALYO|TdjX#{Y}1dKXQBp)A?u9*%`=?ibs#0kw>QVjw@0T{X>rUV+#Aj^7iMj9+(;8tYrKkswhp_khOxtY9fYy$SRM<6IBO4ezaa5|Vrh04 z)a!ePJS_=!P>^_IeA_{f_)|tk%s5UgAO*sPd!EYQN>s#tXVX*L2>=Q~$=EVkU;dD_ z(3)tCkP1|=5G%Citk6d?TWhj7SHls)#sFNNU1qNvHsh+*r{5hw!YBO zG=yu^G19$Tz7G9Ii`fcsk@XZ^0gYaNS&K@ixY%J*Y6XDHQ8kq2BpQ9Gksn#7J0sQL zQ3tP|MCZ_a`uL3eGP~@12fQJJR4fX-&S7k)T7ukW!j1_!u0be7UmoBp`Ss$tmWndR z;BJHJnM(W!lXuvnx4K}y#RN%A34_M3QbE&ygnT4UjBuD2r9Hdro)w7zzE&E!HFEj| z-4nJLbjCd#8f5M_yJq0c*#bwiCl_EO?tZ&I&WArkc|W`G3t&R9MLp)_H{czK<`|R6 zUYS;a_>r&nO#%SOm>vpjQjR-*^{S>YenQlrS&;yb{b~n_9JUKYMiLYF6v=4; zImXv6YzW}wyn#a&Zav*h0BlIqixas>(AvUh(Zi_I_Wa6mBTi_{nF=N3S4;4U$7qSW z6=mdAf>$1q{L_ZKf<);m89K4aU@Nq#64prt#`5-{(~uA$x0@Mbk;M+Uuf!|nww>}* zLII|lZ+w0PCc6M@cdU4`^-8;Sj(QTLs^Pe+uX2R%CGV~F_@?h$Y>Gqt+^^B}y2T9K z8z+x$MWFF*8LSK6eyAY~bz6Q9Evg1e;r7$-LMt_$@03fX%4?bqit4_mWYWF0k!!g; z-%ZeX?BUDznq+Oe?c9`5#9w#Ab*(gW$& zyFxu2QL61E(40GzEe%^yiA}|bem;^Q#pSu{q8n%^+1vtBo`$M`X(smK!9DfNM?F`h z?T^1AHNF&@4ub|U)(LcxkkB+VvlmztbkKeXCoW7vd}DC56|r8D#792TPJq>glhDrg zTI>rt<`w9?wKs^Jm1pN6wO#2_9pKK%=QfNZeAo!o{S?-vB_iR9*7a#K%m4dFV)tV; zt7QcyWIltyqt>DkX8QYB_raoo`&r$>^hB!CYi{i!g`B1Tg|At9TAkGMJ1E<8tQWFn z8hA$%0%yf-C9mO6&A<3B)ROQta97{tG$Gtng%^NfF`y0v8IDkEFcFm} zsH(GamgU}K`Io>U55CZZf{SiqC0#`6i^0dw*W(3`b?@~O;`WpUK0qRq~vZK${r$TNB4dp3hNMEqHQM6ajcp3m!){>fpS8XuuKnWyTdw0UmNFW&tA3aAT4FhyZ4EO zO@>G$NF;rG`J-9XmusVFoqLWeF--Cvqd@lUE@wAFMi#pLA`csqQ8SUB* zB0|wA4_5TkDZ<_c?=WIW3W@3AXBe#5fv3nJ*Fq7)S?+cpx)Akz^NNR|lcB0cFC6t* zG7Ci72wH3ISFZ@s1?*3Bq$GC9`}2uOic#~W(a!~;Sej~6oWR50z1Oraa{JmEZfE^2 zX%X{15eu)gOIH0p2k)g4Gr*X4$e=u=4-ktdS2jaYzrL?-Il2KGW~@f4Hg=s{Sd%nz zPZA02s)o2-{qQ=@dMy8}+Dv%NxPcA@5%!D{@=L5KVS(m~$FO^%X%UeWx(L_R-}Y>E z(NgZQw}b}XM?WhUnb$U?@h;{1jeH`M(f?YP$7m8WxLb6s&Sxu>aV?&1QR1W8L0 z+x)4hlnd&X;$(L}7iH6XlIAd_#&z*p?E&^v`A4%xa*9^rqXcXx+F{oq7zCUU7%FvJ|43>s%=de z$vl05hKy}uCO|OcFEC1B8CeDx3a1p+t`DQ+^f_8yxXUW zXc@ZHdY2o5Rk|e`;wy#l$Tr)CfMJSMP}?nk*dWy7Sp5Og_!;F>&9liDVrPjBer;zt zs}lvraSI2dQ`EN&R!N<*SCW&Lx%Z22PN*}8a**PFp+zV@54p+XPmIi?J>e7e=(ha#(iJ7q$8>|>?_4~FgPqqE;RJHgcQ;b|N!EWzI-8!?wy)5ow!aPJ zwtcEeD$LJT5J@{mOm;YyKccLLUhQe;uKemq!BtugF9{v8N*BhN`UQO+zIbs{X5`gF z+2Qth7qu6AIG$^FsKwz}LWcY~korS={G zMbJ6L=F@gMcq(e_lU4uuDXwz2a^wiWMMb;n(q9Ih_)mGwJJMI0w-v%~>83aU-c4XL zeOJwOcX!LxDNh@p+~a=9L^`-96aQ!Wz8aR_!Zgm4zXfDos8Y8luvwUL@TSq95NyP4 zYAP<_lNoKxL2AdmTz-L4{HX*X)Z=S+L&eUoWB%Iu7Jx(2>E2z-n5&ORavu;2k00ITc0w~rhK$k~9UYgE7;GCW*EZ?YJc<=qNU~#$m zj)$$D^K)AzzYc!SmrM}| zv;#=v{y4xZ3Lw$>W2L&to(7;aeg@+F^tYbn8$w0fz1nlqOmIdlil!it7by^}=pQmV zG@A3I&Y>OB#EFQaGXcO4rDObk&~NE-1s|}Y%5BdUQ^|sJ1$Sys0^T>dZ4N9xpyOXp&Ysh0_`T~s+3qQXGajzlWbV^H zotiis?Z3UQWIX34K0ef5V!vFULmNZ(sG${dh-&U{RU+=WLlg=l7x34o(HQghBcqEwEBVI7PTOw~<6GCK4QOe*2tsICH6gk_l(p8{W{l~xX#JcO03r+r8G zP=Z5F(&$q|t}^R&R#mw**P|L@A(&KZ_p3inoCxPpf!n4X3X`OzFm?*OC2gj4^7MoJ zS=chE+c?43%sBn6M_NU3wzQ|3k%;h{moNa`L9}l}VaZZwI-L|RRnx}vqvL&c=ClZ< zo~XI*28(2;XwAaq^C>yzaad89x&lZ4@cZ3?6|j3;pa<7JkFEIAY-M1RzA(Xr@>eLc zR~CU&4KwkWkD!)s6zdtDdeRa~2dR%WuK8cfJvulzX#!>lbQc>ZRI6RmG8odP+t@xG zhybjtQmavxi6Q_9bBseHytmrd(X7G78xUm^L6VvM3fL8*4=gW3Q8!X1J68wyiV4Ov z?}o=LotU{#tvsqH6cHLaU0Z`hZ~9x1h0d}sglw?zLz6pPCCTy$FBdc-HZG>=XXRlQ zP>mBXLU)o@hmN(fD!`r&&N0oEO9%ES9IaV-iP*ho56iVs zWub+;I(i6uDK1M~+qSroQ0RnE8{uG|^^7b_OC;seN&en4-%bHNTYv2PxN49;Q63g< z$LoHiNv)YJX|#VFsP!!^3dg5Di&FgNhpav2ifqE8W;Zpt$B*6JOlgy{!QfC@m)qPv zkhxSYcw&@=hNbGTJ!y`8tKmewlz?Pp|nJr5HE=d}(nMbp~96i6i zx)|#wRTimda?*C_&U12KJBS7|_9&}=RY6U}Kzb1g)QYC1{onQt5T4JQucpQT$e zt|fNg-HH%X@qWz)_Nr}!bVv8bvcVV~;KN>4m;8?TrZQX)hn+D2A4>PL>J4>FAF)Tt zxrHEPjiub4w7KG3lN-%`-D8i&&`wL0BFM8PE;2F7N7|QpDvu-1rItlgL@K$`tVBHH zS*fGshqSvw45ll6L*+g9bc3C0E$iT?Jpm!11Wtie={E#(W{>vyIE9aY(pOk&xXeF( zEe^vdRFfy^oihN4yFv@!mweRWGxChV%)Zv6YxYc?VVehTo$rm(kAU>5((uiuJ31ENkM+*PnErXXFWfS zESpxl20RAJuJ}Hrx%E!$s3UB7#Z1(6?Av@z4j5US;MEHN(_^OosgZa%ImH$S!dl+8 zfm9R#7cw77WgKrbuGS>hu(`6>9v}`Ci~|7mPFA0&86K8G-J8;$BFa|h*-K)V5e+%l zol&_`XUotOqo$14O;1hRUoj-$dWRuWH(XkP9h+p7Jn1yU;xfF47@K&Z8b&T%O|TT(IX;E*hg<{->D~ca?Hg zWi|Z)GC>jL=>FtteYsst^gT(X0X1x#fPVq%Y!&BG+(1LV$iIA{^b#69jkpdqhy7IM z3y5&3OZ)k&4X7G2fedZ(b#+2U%q4Yo`7U%TFNBzz?H;IZDR0pv0sC&qes9&`P;#rl zqobifO=$L~B2f&5(l3GQ=_X~MQGht2IzSdbV8^fn5mJAfdLwnx!moB6$O)cN-yx=) zUry&qoqzzpPyc>gll`Jr=$LqwLJ zh{?-w8hREi1w5t)8xh=v)J5sS&713XAe_w*Yj|8X%APksWaj*4bCUWLdxNELzdTk# zyBu@5Tf&_#^!2e4MIwi(d%@yt?6AN=j;{}aFDS=?3B7y=7F^Kd!z)iAk`sDH!uV9a;+`nxbQ^ruT*5j6+`N8E#1 zZX(R3QPt;nu={D3zK@CZHZP{Ay^w5e3S>z>(%$d7M3~@FFqM+e64eLd<5=4kn~3HC zVGF<+5m#-nP6gId1%@o+kB9X~4z&P7yoe(P>rl53&qNo!pnmMl5$HuLlX>_rgVx7s{^McE)o#1u|KR2T^&2Zs9 z51QnfuB>HF-3^^`dScueWvN>gFbf2ein>c--m;`Cl<86QS@0EK3wMyZ?U6~rtd(8G z@=tQAX1kkmaXjU+l}3EcbBZLjMBLS^!!!*mx?J;1Rnz^UZiiOLHaX)9?3-Ldw~P9! zO&d)X;%L%AG6+f5&c}PYcM>POw{<_=*hIcLl!f~vDBRrH6gS7(MVkH{1{=@mSt9EUx%6(V2OKMXb zY`Dm{YuShGW~FB6*%`gD7?xSFslWx*Oy{PkQtHo#&+_KRSz!t3FQQLJ5isT&cRgb- zP-<7j=D6`0Be?GUv@+&H6YlCLaZabVmQt4SSub_{*+RU_+%r+sCC#*Z2~*fDs*TUp z)|OkAo|=gz#3wY$_6Wh?B`!akxd>cShD|5gdq{}`ZoMu1nIg_zR?Z*~*z<1N5Ayx% z=2@Klljgl_t=t#;Q@vwe=({l``)~HUjv`2UWNns;OGZ<3TWLKst?i`q)v(OmlaHt> zyMJyBvB}6iMhr|Z&JSZUn=Em>*g}g{vA@I=f3#cLCyuyX@d;YCZVd`M5A}|D-zdBI z9r<&eWD_uizy6%F{Lr~sxhho>uc3VdF1s@qa1)ztJ)s(lm><$q6hvmZXc{EfhFNP# z)t=8|Sn1or^JJp=}KcX!v|?v~($;0^(TJA=y%5+Y(q?ylO`*t{wLcbdpb6K z>pr^Y#VU@LP1laY>alsfYUXH&h5a>J`~^SsbASHhR%XqLkQN5_M2ggT4?9lltI)hY zMe{P~tv)L|T#NuvPk#VYs+$upkr0(E88{vRLT+Bs*^H0*a8+)kQTKThPlMHDMi99i z$|RZLo%j442lPMr8qrlAX&wtGoh_{QdC<^NyE|w31|jJ^UAp(*wTuc5$*yx~8gj1J z#$*rSCQX0uZb|4nZ!Gz#&?if@hx2a95crf4-t#d+?Us_;p4;t$&pFz+{zmZWB869F z8Jnp$$2yV(P|#MrC;0Tqf9LI;@EW}ygXf}2zy}_e;|#%ZaVB9|3jf%eH`zmd`~m)x z=axP+Pr3CJ`~nucXE*JSH_PoE`_HwI2pCWGP(+@4ahvOO1^+7c(r0UJCjGl%rFU&y zL*i+T5DUCj=iLoNaHlSEq@`= zMrqECfYY%jGuPS`IVYN!99DEw=B7gLt90%q1RI~(d`iAqEiFesp=L;MLJ@kkhfn{j zOMz$;=aV%nU@N4B0&>$+gtg$78WVHu$&6BnEVh(Qu8cMgKySRqu`3#gaL8Tx>@G1$#Hoc- zKVqKo>O~E3ZY{pRPAUekMoWwqkSZyV2(cgtml^&g-7|=#5}yb0AObJC8Y1){lcQh; z7Q=G{c>dTG?=WGAcT3;=W;JslNmH+w-ePfyoZ}fn&CX;X##+&#w@hhRtImQICavkP z_+6BQqoUHPk?)eWc`nDd0bE<1GbQdEGG660=UsI=8StomH)443oRSO*jG6FwVq8yH zaCf#YBRt@0=Qpxp?K!8P+_%R1}Cl8wFy=0W7gX25Nz~#~eWD2t~Cx)_i@9X^a zPzh$NP3J7NdW$lsJ9f3p6rpfq!766R;%k@vDrXb1zReS)BFOwa$0x3}O}^=CIXKPI z$o2X(-j`AV&jv83exZM30mG#bh9*G!%bd?i;+PEE9-v!DF zx&*@EF-1DM8KO>Ke%3`jIgN$h75=6l&Qi=Zj~T9Z!gh*HE`sVo3N}{J2vyHhC9<1*(EHAoEQi<+Nfk_HEY)hOk~d>CCgA zX-K(R=X6whDr=5NcC-3Xz0MtFD)Kqaq-`Ic%`jgTN!nC zEBRQyjX5mt`Tz|XnFXr)cY^gq?Ok)XQ}K(_H&~QO`k<-6*LTbPbAuIadJnsOA!pql z9>AGjeum*n3M&tozT}a6L?=xLt&eDUPUjv*R+WT;pHHqlW8Z$e{s7>!UJ*7-xwO3+ zsr`)JSuc7+erz;26`4|6PSPfMVC#3XzkXuUImce^Wmap@V!=#PbqpWYC*Fl>@a(Qm)${(YV;fN9nNF^mIrU?v27}`gK3CvFUUTWs^xjv|6BkP_x)2& z_u+uFe~m7|@ea*Gt;{Ej3lDFZ9>@U(0pvOXtp-#6Y$SUK-XjG4&J#CER>&fI(+ znI@YIg;^uCAi2fzmGQ*y7 zyk=+C;^!_&dZiXV!{$!Yn4Y$>%EIhX{-BWZ-br_bf+1>jMfZg7>iJ_{%Fu?0gPCZX ziY5jr{#4h-qjK6{)mxMO zz@KUPeVc`z<3EB(0|x_wWSa~7V}eaU5U;hd49-4qLJ-+Lu zr>3!2v*WL$1-_N2fk!^I8*DuOxY#CH6|v2QeB|W_DXVR65YLfhdRAnxU&Ve*N^FOh z_s^pf6ruBjdwh-|Dvug@UU#_VkE%y=4YHmzl>GX#Dxl`Wm2W1-ruBp2^@(zT=!GU` zE|M2EXzb+Y(FWDKJFu(Yp))S8vJsd-|A-%^T0;F;B!bunG_;6b85sKWB zt2HuI+3!_ncJ7<4AN%=i4~DpA^lDA=-qMcQtgRMFdflllkd18M=_ixS<)YR=oX53i zKiJaWPy*~m>4MZ&K5P^0(>xLz5(S>jGA=ijS1Upu*RQp|EIbb3mf_U9sO`qz^4q;Z z6vRX?TIAm3kQwD>Ex z;HysuOfAq%OGf6-vQ47Z5;S*VnVzs9j-d#%CUFDtvgCS*SZ5l+=3j)^L;$7NC+c4u zVK2uwXFmx##2Gvg69OJX-diwzE9jHqR%G{oIYxMIe}o$X&tdo>E2Cx1>WlEQCT067 zZ9@#$3?p4@#~j`sF+o;8rhRhL*^}aSfx9^kxwj^cN~4U6V~bbAe?^CA*ytb zw7jzXY$?!z0me5QIH5d<-$9lj}M^WClrdr*4EUB=eX>G}E7h{Mv>WzA=THBsF(KEzze$Wsjw6)LY&Q6>d+GdRUIRZt-mlNy$Qz9OKKr z1Jto)t?aCC6G9dqZ%A;aNMNI^{T1FH^-f(THz0Zj(Dms@M!jL^TXXc~G+)%HO(G52 zwyG?vz3mHlPuBQ~Hl|BwLgdk+%E2xIht4f7K_)pz(*6FezKnAh zB?7U@;y!&A2U?TX%t^Y`}sjW|sq^k`aJxlUlf zP%|%Y!uNbUvzb?e-A?(fGzK@34ajScmXtvbMBf7Jy;J(0K56GGa^(XX81*gsYRT&U z`q#|C5Pw6FuWWEmVcs9E5oZLHd_0p!yfbuGpgi4NHgrI@63**!DM?4PZ=V9hF0fTO2XOEMfpfX7k{-6oc z*3)|;y=Zfgu6(Nn1bxA`3s>u*;JrZD%2)X%1%cC-Yz@b8dyc>&CVAfF0ih>f-5>61m*Q|aScE(e5Glv4aj=RrVhQcJ+LMv)Q_ ztF{n(OHtV|%4;B<8!O<@(XKxsD{I6fvoOwSX;>~A<$9}xtwjUD@*W53-6~Fk<9-8N)N)c?G*|f1% zr9t;C<&D%l&r?6f#RXb`r@G$Ed+BKI}JMNxJ6L=Jl;Nut6 z?76_YOV8BV`aUBwtE(N#6oP;ghJM3y_$IC&e7*HGu}rW3y_)F|qo-VOU*YjmmPljz zd-IA{%^5`sx~5F@kY`z7M))+n6F;SyvvHX*{mUXdnj-H)Z13UE!0^=WOP%wy>*WfO zk0m^qnw=sKy!8wVx(52`F4b^?vzV*0jFN&+uLxz|0N3^C3*(*NeUp|9GVU;PruOTx zbJASqx|j$g)#IOUqLNeuh{tyvz9vX1ZgXEf@u`n12{pB{F`*`FRhHw{h{k7ItcagY zsMJm_YTN4u+TS8$#kuFdJv;t|48gL;Fcs78#u5;ZNn$CSVY%9h`}L?ou3m z+PeuJ#cgjs4{$yHcG13w*VbK217`9c|y*lLo2LeEpvT?n#dmV4vN>UO0Fu#PuK`ET#B0P zme|QvZgvpz^}gMkHiClbJ|>dXhxJoH$Kr*RX1A-a1cEX`IvW&z!F{k6FlRPFwBz@Y!Typ$Z27bOD6GgI?eQwUT=0d zK8-DKsF8)$N04_$(~uBYIh&C364>%`$iU)?^qPkXx+2J_p34m&fT!`&0sY~At^WCD zYHnywV=-vvmtJKyAD-Iz4pC=s0N+RMEA16=!<~B0;nOtAEb`?waRV#7<(Zea#jSJ8 z=UKNJ9B-Q|#9!P|WL`0t z<3-o=yc{EPC{j4+#6L3>#nmcn%4d8+Z^KLGtuW0|GSZ(BF0KfJw$$wY0Kj*D`F8K#+3WvpeUY! z9pMi>R_OK1nob2p02?Xp?ft#h5}O_LEc|h}txpu5SQHbNi+T$XhtNdj3qsLl?T$*a zQ^*F|mm~t7^pJIbSLD+zzjXY9e?2HGBl-Tmy8y{4%UbzNI4|kTbcH9V0CsSxJ=3y$ zW_zmzD|IT=dGZ^#udFw%%e}?aN7-l@Q}o z?7Mu{{u@A2)TnZ$MI@yh!Ts45l7-YL?ZDV17p=5XaK~3A7olRYTIMtiCmNiF;Jn?} z-=I2_RLPYiRD)74NKcA0ni5}#9`V3huz>#N!i0g#`kp>sL>9uM`^rgMI@s@Xk95k^ z0n)@rv7fTeYTC`&4^NTk?x##4;<%E(_EGgur}vWh;D6M<&8Lr|HtCc%XO!f(E(cju z`}~saj?IFp0ao6{Y#AVb<|dbG&{_8xVr#1scYN~##>oFOnBBA4z!qgs=#gr@M@P3Y z1*c#styNU;4&nBfo(6R7h4D?}!9;0ouxgU=q|oDG=YWn)S!B43)N&n_YDl_+K7E_U z@?xZz)4Vv?PjIS94*y-E**9kCDGHBy|wD2|S ze?G94XAvQLY!QRLV^gITTYW*r(|f3KbZ)wUika1|VPV;}(`X)fYi97+4@JSGvTF_s zuk>dlz6%z8>^t=>@rEVGwnkph1}3oD#e!-fHFcE-^n-}9{5|0E z9dkkr#;$Go{?;yMgeYZx+5*;tpmsrAZcQ|4qrwmxacn6Tb;gpT7E3ps;MMN!=lbTp ziTv`8UR4@qr^e0lO2Dnb7}3HdVqS?R{b8#qX?CjcU^P}ftXFtNWl5yY9n=eGgwqcd zQaQ-R*}d($aMG-;h0 z*eS-ig3*=HBT8T$-)r|=0(d%s;-;W>Zom{zU06eyxsMv=)^-3<^KbfDf4Q=J?;Jkp z-0K-V>ujsY=;w#F1buqWS`Fm7YpLHls0~$lN-IA+o9OdihRV@V(!LYLa=s5XZSsQD z0KY^YdB+{%;fR&5x`@ia!vay~ESY(2K#5E^DQeSsVxk5Q zhoa^O*VhB33Q^b%HIr!N>1P&P5l~m9FRYVfI~0#)k(MYnPI06q6EcW8bGk0g$1~tS~tE7+*(l6UZ>r?=4H# zw*Q8RFKn}}j9kk6DY6I3;f#zT-lBlnFElmHG;W)-wqMsHt4rnv_&DrH^hbq2iAv+u%RJL8R+~F|LPs?n? zow+I5^67%he4K~)$UB=cfgLN$HCUE0f}l;?Z6lsiOeSeZqqPE{3?fEbtY{1?bC>CD z#8c+ad`-`%+)cv!7Mc5ACc#vSC7fbxQc|FFI> z17MC1=!d_V>oYpw<(|L3Ev~rgJcUw`o3GJEUXg~8nBzwu;9M+m;(TL59rZKhel%Da z>^2e%h8tTXNoOssn9p*sciIUyX(5+Ip)&*`@wKnexsI8o6+lQLk1gRUk~tMR@8gX_ zQG3%I<U*KAZ#C*Egqc!+bvEn=qtUKWW+x9K_X^+SrOTDuoP$P|)bf@J zM8Pr4tI|k3+r-)D+ZD)yCKbWG;4;PLn)laZ_SN7{+-A+!9n$bFBR75B=e;YLs)al~ zNBqZfo>834dr`O7GsMf+$!~W!&^VGv+rn#J+bjDY|AG~K9UHyynB$Xz;y~_~ zF2i^9DSWM<7+f%TXkB)USM2s)Q8i1>9x8bmR0+rK`49g|nK@Ur z7IKKSvne<0;pfSZpOZ}gU=BlCi@(6g^jFM!?Lxz1PGtXOyGo1rMP<2%&i|wbKs(yu zNDU&-4fX{}r8066y7cNa1>GzOy*AIxuLZqM@1{2ei&T6SE;FXPS6U|J+4P{TH|$je zwZ81~OID3kHOvlS#^?{Pw@GKd12ZZGX=qc<^Gi8&>klks?}e(sVp9J_N2$;dRbT}V zaL7#BA6?w^mb*KxtG%}dp>lf5E9)mqH2OYjWU671>t-ewZ(y;LiNagFsu{J|O{kPt z)TYBO_hj(1Ycaamz%cQ14#ZZ#O)aCnXEN;_$u5X_-fqVSx}xV50gP@TBh^XYQu%}- z41IBh?`&xBT7c;e2i}25^S=zf(_JFg015DR`)xFU7+*9$zMohVZ=%?Pnm;Z4YS; zIWqhCiSReyu_^_(twM)5_b@{ghKDsGp(t`Pm$yo6ui7IEB6t%dNV>mSHu;KSNZ#E` zN*}0%2jF~xcKgHjgLadZPosh_GF5E141aQiJ-9VN$zj6LXH-|0q`03dJf>H;bLDHq zsowxD7Nu_Aa{w`s?p~ z!|}Hn&_~Q1R^Uc9G1^omj25ee{7r8a5|($k&7C}ri3dKXQ0gZDF_*2IS7nUSqP4@Tll)gM%=!5iz9)b@1( zuW{C6b=TNDYTV{jHcHG#j7(>+|Ds-C`@&b`k*y0!MOk%~mPnCbb#gzoNH}ukB}oUe zBs;Gper}G8r6!_}e~hw{SWI=Noda;eHh#}75|7I={_Gy(l)ZIZcg*k6R*IBvw^O~3 z)FPz1Kifu;$*bauMNEn}W+<2OB7wx>|L~FOIdsaF;B9U# zm5Dvyabb^$TP==_S2+z8brmI+U{6l;TRzUg*aU$#2lQKv&)c8HQ;wOz2_lrHF*Fu8 z#$gkK$lD0FU-1ze-|g!DS|`j8#~)!m=MW*{zUN}Wd5Fts*iICtcT7M3O4<73)bHnd zfQCpjnHQ44F0zg|g4 zk%$F__FIqijv^WhO8PK{aN&sr4ISLQrNp+$2G*S@cYr8c?Vs;t!{{>zy_L{ zyTXJ!PZ}~avg`W1GG7ZVmrp+hI4t4@JNA#ZypK6N=oc{=tUkQ>1$}vLdWkPpAbNa^ zeYDstC zE>?p|T+epq7?@GqiQGeG$nMKEc3BEeRS*i)8~l^sX^8(Em`Css$>m?Aupm`UwzK($Q;JE2rkB($4RVzK)V} z1;3>?*eYQusYNtcl3HVkB5ZOX`h(y;Y67GN2v!HDB1Qb@-iE~U=cSlh7G7RJ18 z%P}n0Vj!UE7b&j9e+^n>KZHzn%jt=F26DCl3y*T7uvpHh2Jtct2q%xZ6RAx<{>Tr@ z3H>m`p`!dMPEX`rO0EVJq6WtE*|f%DRU};Km({i~g*pHD(6+`^QPtxG|*E%di?Dw!O_3gp1$KpwwJi?-FC z@}q0F?cC3!YaH}nxJbwp9J~Q$la$%J!pDt~Gz?NzbXFk7X!<$*s7gNC*ihf-ut8hXT3%&k`Qw$M~m{vy3X`YmUou#|aZtJ$<+E) z6_nP9r#A@Wtrl2H>_h>GZ)sW5nzS9CkPTLiOpQA(Uu?Qh;E^59gw29mH#R6a`F9!HL?WjCS?#x|q_ZRunt!{drsW?8SbR56?W=qQTOX~h?lhh27Q{2)~;W9F(PQG9@ zJ@3d(VCJ0b)EjEY07S(T3L0r-+1ch2s`iz2IjpC_aW_Qt161D*)uM`O+@z!Df*E-+ zo22(IGErkH#y#in@B`$5w zQ41RB*UE_c{!Y2<&V@FkVmYV#Xc_j2*d$E$m&j|bytRN|Xca*dRGCHp2DO`ahkUN5 z6T9VnLA`4w9D^hED&7SFbu{LUOKnqgF+7S-`9sfXM1&e_NJtCt8&nm#cj`5F4`|m%n;$Nn%bb;-H zqDJ1iVlv*JetsBsFgFJeFpOH@xG6u}xPcpsfGaCBEW+gsVi^D6i!lxfCR)LX&>qwD zhyz0PlZ-gs;YNQ$x2`FD_n7VSH~egfZEU?95@cb`Y5AWp@UxM%Tq>h>ba`g=T``@a zMAO=oQ}=Vr$cV}vtV1s<=P8|4v9G+ux83f~*+j}x`8q|>&V0N0U;Y5(?E{RF0AdK{ z^rYF*R)wrQh7hUW+`pmU&VoiL^lK8>rtR35*)3Y)PHXu?Pq@mwU-~YI3wSo3Fx=q1 zI3RX8%Lu1j@-h>Xx216Tw~_p`w8C?@5vw^k>w0C>MRqTCk-xg3iqf*F>#HinxS7e(FEOu2 zNG*I9a1vOSihXEAw+Sil;Jp|^nu{3NB*xClZ+GjR-y1(?8m1+;S=UuzZcR!|j}GlG zIT>Xa0cuz69;T1(5qc-o@i^U9NchC7%|lGwWoCMYbFLrQrmG`T^ACB+zVTlfBoPS` zYDh*t%!QWF4?ft3og&mS|cp%uNS`1AzLhX3&tevS>cBAj*#Q%@-cI5rKJ$I^ zxiIW#9ep>u8L^eprT{>)<1gL(l7I5%(-+bc#jm=1aSn8WOvrdP6V9o+erbrg?4vs! z;;P%ks|aj@NN`*E3LX?MbxZ9p``3^b>Un(h;uEd*qH?^yHTZkHwKD)r1lO5P2q`^T zy+so9pw5-CCWK^?rUjTC5K9VJJ4mX4Rv~u4=IOv(SBj?LAngN$Xki4HA*v(X3BiX0 zn(V2X>B9Y#uHct2Fhb!U#>&AdKu(00HnCWR-siK&x0W40(DI*h?rWUlP|3AM+#5OF zZ(lmkK&&rfR*AjM8qvqkXPtgi(ShPaEfI8FV<1{6+lmgMA>+`mf7#nQe!j}Bo4)H2 z>^Z+@ok4^4ye?MS)yK3+roT&Mz2tuwdK(uIIr;16G-L>FEY0#F6*|Dho(HiWy;W67 zQw$zd@$09as}i(SL{`_;x{ozWC+Ns-$ne-*t%vilad21g>+7!qPY9yHPgzvdy5!=l z1|J_TS0`RNM@g2ZjOIMGPW+}D5VDUSSOjGQjt7A^CC3imB#-?N&N1tt0h%@DmtjQ3 zJnc_J8Rv@a{lZYd9#%xNa=HCbLV7de>*yWbMX2|}(qety1o3NL>k+bc$R^71Y?*(H zWtp|_J5i~6n@JAK>vm0|Uc*u)>Yu*A$z0IzZLeU%#NaO*Sp$XO+>(q`|5<9;)Eb{i zh+2C6gkYpYe!KDe5A7)r>E{lZuh` z{hO;z$@Qh9zN=Z7_l{=`i{bd6*gmyH-quP4)FVYGB;Ud1&UiX|wbSP#f zVIRMiW_XDRb3^*}FSDRjETj_uL8nJtcmw?DFKE3kL>MqDQ=ggt@vg8yK>EixXb2hO zk8#k78^>Ri4%{9cE;*S7iJehA;}ijAvb>xf`1>;{Rx2YpE{QR3*@I_ROHWD1eBV;qWNOs_+f%xzsG@Mv zb8KFLNxwk-6$oqwc6qOf!G75JeRDQK#~$0ds^LOOwm#zue|q5IShe79?xXhI>3)OV zT)`WPvz+Gg+@{Kc!LDSwgc|oE`mW;7cMBm&_0t-7m`Sm1RZ;&oQUW-Z?scZ1c>?1uQvkbleUI#rrRPB6^*&pQ4gACi!qdo)AHGOu~Kl+Z?jp}t1l_qE=k=O3Uf zjBu|`t0cIBh=a{oBt*caY|*Cv!e#ychGm9Em1nwgq#ovOT+%Uwta;jj-qdR!z-2|J zD{y9(B4w!+$ZJMww*|Xu6GEIEifoc!QJB$LIWXM1)_u*YL-7P&UKp&aSIm`~19a#8 z&Y42~5>KZZByqlJe!0V%K&k(qa&hTErZq|*KR*{>7*2!Sklz{ftM;wuuO7VLuMQwv zAQN?XoZxymdR&&jq=V*gQF}3xFX35XG=&oq4+v?>@-an_LZL35T_c79$>5_A2YIjq zzkN~UnivY~3)^t6Z&7@tmG3n6s_Wbq4l{$jGeQ@9mge__tGPQKVO3X2%`g% zyx6LoE9dvGJR#vUmwnmcD-u-^_ZNy`-?|uI(^Q!U_FdW?^@CX(W`f@aicW#0o(*pf z#+Iuv#QTcOvYTsi^Bbxm++UxmWk@8-&5t(@HZ=^HpDxx#ywW9pp)mNRSwW!>?gArEAM(xd3eHyN}7d=D+i)hHq=fI{v$W~OW>vzrrM{n&_ zK4!!6jpeFzCO9$I2C<>2ZAhI~v3-LHQ)fv>It&aFm$aC$nuG5JEpp!>i2FimN;wFZ zd9{WY^UIHboW+=yHibGdTET9}A+l?6fg>@|BD0dgV()&;Pe=?Oe1PkIcQ?ZSj-dG^ zR4%o6xTUuaTx{;v>^#^h&J)AHz@z>3zFxT$#t=iAf{d|qJt}gI@klQx$wWeyOQlpc z5)F|$dj|5Ec7uD==H5eyY((Yh-XIybwu_x@;JXQAVC;SjE8{f)BXutXZ4xfgDY*a;z0s3yu@^RjE`e0<@ zNz+aEd;1ZHid(}2HL3&x3?J1;XjlXtux!LI{zH`V@S)JKD)@jE-0rjBVP4q!7Ba7mAq;y*g^3=M)0~ zW`+717p^7=e}l}r)xN1Wzw-QJ=&TYho12UEUe25ji%LPEP%S;oiWTF{Oz*wm*BI|o z^QVfEb1S?TD@iEtmqo5#x?uh#s6c;nTfBAis%$#;d}N4i6*>}F*30a~@$w2?y*!`& z9VjEY{^jtJTDypzaeQCIb@pMo)Mj0b;ReGU7G{feXYG%_9;>BtjNKI8nH>o~X#=f7 zJ#_XHJ~J%Lr)Ct`KLf!hZU#n97eo8438q}`hX5<6U*C|-zg`2spGi5)UtI^9{-pil zuRPE+OfBqx=sW&vq#u3rJ(N*L3N0WKCY4uePnY6d@#1S}_`(g1Paa-!6AzjG4peY` zud+6@lUJFIo7<@|VO+wtZQ^T#afVpPeS)|VYXGUsZ^MGTP@##wz|o}yKGjbH7Ji~C zNS~U!WK|a5I>0~+(S!RdD(d(X9+`Gn9!qCY2_+%j1{@KH!3>dHWp!+>J{bAG`ahpN z4TQj=_cj4Z_vr!Y1uNb|$$1=~N|RAXppic+GzZgv)%Qwm&L`gTTaV_?ZR91RE^%0k zcEQKnF%8~hj>8-szuF@^pPzlg5F8_T8Z^XZ(hGkU zh8co=cX?zvqL9*^|BWCzeF{D&j*Ls-)EXv42h1PSp|^!%i4Xf zNem+UWI^rJVeOA7utvCVMR3>Nd;^=Eo;fbnx-p#u9iUl||6DTi4cRiJziIX#QsJ3Iir}salK(3=%nFWEfP82$w1Wj> zpx`oI_&%cj;Ks9N9{XG2_mmHohwk5e(&6%|2f}uAjxJz}>`O8j#Ao0arp6cE5 z^5f=$LwnaVWqVE=dHe04VcR;h_KQsFk%l$Da_4%3K)&CGhVK?B`RE;>rfB*iPRa#})KT@z$nc)fU3I3n! zQ8ZocMChjA!R?XHZ|eWhJyYZ{2D{uM^|X^{FC=~=_)G)?^Zbmzpy3KV`^(=k1RRVA WVjD?@BW%s@`bdi_h*gRh1^zG3BR38J literal 0 HcmV?d00001 diff --git a/docs/user-guide/img/kpi-strip.png b/docs/user-guide/img/kpi-strip.png new file mode 100644 index 0000000000000000000000000000000000000000..abb73afd2be7daaa168ed21f349a1368cd9c4f07 GIT binary patch literal 9599 zcmd6NXH-*7*LDB_1re3rZ$+eoG($(EgLE)R7o-FTMVgcVf)b>PAV}|>fbWALB6Cg~~pgf%~ubEAQRE`?mgP#%nO-QCFNRW1A#smBP#ad(64f2oDN?bH>{{Y@`(T*lEJkO9>(jjN zLUzxFz}Jx7jGs!s%9J&|yk`Y{`nR^WR5JorfGrr80Fg~|=+FB`HPzMEw^ao@^Gj$e zii*^#jXIRqJ11-jPVUt=H24-yPdI!=Ak@z^mUmVb7e@m^v;1!d-=r$@=;soldw|+Uc2T|MRnY zL<8A&j(^0qcoIy8{T~f!e6Fb2wnm8}OohyZ%!^tsX93(DgqDH1MuE^qmLvsMZ0yA5*@tj zZ+DCRcgd>?n&^JS8?z3a8UdprlqwP0!a34fnk2Ev=-x#Ba@t57EU{IR=oe} z%Mq1jF0n8r=imGaN%F1<+p9Y!3kr#|nzjeuEhUPQfmr)R^tVg=#~W87ihsXMQ}cd1 zzbhf5py$}c!z|BN6;mn|CZO8tk3Qn0f0?hanKh(Np?7;r^>NaMyMJH+#UO~@LD_@B zb(Snv@sI!5cfIvjY^?j5q&pMu(1{Zinbn4&z>j7qMH{P;N-GP@J<{=^{R-eK(UstX zE*Ti$@dgsOy6Q!i_gY8t3&^-ES+N9NMf#tul>o#OYvsFcX*arqk7?&k|7?Z2(+rRr z4L|i-KM_5mO?DL*{Ik!?^JF}!y)_u3D!28f`L-wQslVsIe+I9U*>s>JsS6ledEAN+tH{*l+n99F*cWci4|Y5zYlZiIm^u;Qa;K0Yp2&`&7K{b6PJfcnZ{(Y3P{~9ywpc zf5AH}!1Bm$#$5UomTA|yB;R~*v(uh^c6=uY^VUC-ua`F6>@iU2$;1Lnjh9hdPcu<< z+swq~RVQvFxX76|TA|UXfcIU2!(Z28Mbe|0ve#cORjEXDw7MPpkk7rX6L5>4-^}?G zQ#zWHNTPUav*d0NCZot77Y>va=M;3fAXSNBkn($#TMiH}F_*f?r`iNHndNG$rO>`c zR=wnr`)Ylu(z(36DnKqP-+qAAf7rPRuGkK`OJ7*J_wRJmx||If3+dBb(TA_a0p5|? z9c_s#F}p&b=kY5})9G%ibq=Xzh*ouXpB+3Bn(?7|=p1p=rWkkN7O)q*{Iqqay+#|R zo4H7uvjKEiXd_Y?Xr5*Q2Wrf8)ZaSf>`Ym(vE)o8f($v0`%nAEVjc*jX3+nEbYB<~WAMva z;bU6{;RAe?qqIQ#h5m7@XeX0^ zM_dxUUp>3;+>;gCX4u9;r{-orzRK6bAya#!BiXldJa^#=Ck_=SM5hQ7@VElsN(Eg8(+j8i_Fq2 zQgt?Y?+bL~38>vDyWsKzX~{R(JTchI{e``=cUZ>DNtstZr|+tD^Jdh|J4$caZ^?X3 z8&9OV$W^qlW#QpMaGk(7p848(nFiK;ceU&u zG$ZEhRodSDQNbOz=)qV~sbT6E!`wR^3o@`ZAXbUa5S5T9-aIto9^G5``r34P;~?io zk$VO`mjzU^MhZId{p2&b)w{uS>L85PHjAj}{d?nxlhTp<$ja2Ap+TRz`3{CEP*}jP ztt9LQbX`MEnw_ltp`8y$F8-x%UZK_T)yE?E5RsM3Acwh2eH$pwc!D zZMQWAB}7@}>i1`$D$(fcZxRdFXkkz*h=wUEbXR8*V|K~+Zf8|IXfdUddfd3B zTcdeYy?G}~jKX)eKE8sl#Sv1qOc=+zj5eTMk@z#jE?e-u32MUr+#X{5_MES;ey z;oeSpzC)Fts}(~Pb7Ww{l~74$05-iCuVDW?+xQ^xvkrRf@!_)4a{Frkz;&ANN?=Q3 zTHA)rD&Kri#xlyq?bJ=C|9Xm69sG8Ped55HU;g0pqXd6Y?yj;(%5rl<^}@<0zg!-zq&EH-w;2<)@f$_E=<%nXzdQcW zf5?X3D6nbD6`|QSjWfjbL7CUm?YRW1-$h47(B_sXpRO*(VKxIEDQvSHDXMP~vMm_Q zt{6%k(QOuNh95ie@2B8KOT`1TD)YaC*gM4pk@}_?N0Xn^>=?v&m-22$o%+Lz>vn1; za+?iQ4!&;Y1oX!o_*AP**v$xkMX~*IO?@jYbvSlF$n!Zzghaa<~bek<-~)u zd@WYBX=Cb|i=@*cv2{P#;0syhbe%sA9}9cYhdxs`qY;ey78b0`MVIWrE6_FQ6qx*F z*r)a@kG=l}UwekPgKfU7)cBO-v6Bh9D#WskPk>xa^Hd>RLXdq4XGk+rwaTYPCyTp#LIgR(FXGZI9!sp5ns;@{m_)E!pqTu6 zvzDC}5C}o#p%<0XPaTUNlkN3;eRgqm>aBc`CVG%jgGaLDxpB{kZpmNm3nE?#n{lJI zXADOW69j&EbbRFP>QSyupor7Tr{ydw><(nz^Jyx;AGj@f@kxX8HE;C|Zr!t>w(5($ z$wE?SUd87&^{9Z-=ENJH6Ug<-*)1Z~pb)3(7R$Sa4%kThGSM)sUgA{UHsHhl2AH(Q zM&+iqmWixbk3oD~v!8ae?DCDkDJd(Ph1zPC#0=Fmk^7m=yW8f)H{a{^DtvN?SM>LwefQ+JM#bs{@Ox4vvLB2bt)XCXN;@NaDc0Rbg znDzLLhhD0Hz2&RL!q$9im7y=WhnmF%LQB?p8wQdaz3zR}RO!)Z$^)jmQo^kTdW=g= zdb)2+!&3JSLK>R`+80$gN&MUfZp?@jQ#Xt_>H5XXJ~e%NKLT!@Dq6BRDHPlTiM`+Fwth`1ZB|>Q6yg2p=GT;v$rkh zGwpig&PaA+<~tn-B@E!YxuFhf;Yz!1PJfwlV2Y{7F4P9p);SkyIcQ7}ge(Oc=2smi z+{(C9LZXLiuj z5r%IJ`%%!Aw-bQ$XPOvblYIhO>DP*UC8_CCAKzX z{BS!#bVc4QNWl47Y1xXZTGZ*Y1zr8MAKY$|b3q^PEG-_&KEt)~Z`QV3yfC%jjrqX3 z#!?t_qm+xQjFvcp%iEgJsU}b85}x==zde$;Z%R(?*~T}JJyk(;6w?~~x!<|NuP2kY zZbQ$Ba#t;sW&U_dcKR_0e|9o5B5NDcaAEVU=4LsPN+ z*xrnJv+T@8;9RqJ)?S18pc>R!SBJvScBhX>ZnhCy126u{Ey6jqGF&?)t7?~0CBR4_ zMyF9}t~uxc)u1wS5k{vd3eKSd_4C#@^EOaK@X`=BbT=0{^bG|C z@44MO+u76e6F#2V!Vz~1^e$)d@X6mkQ{;dr$6fA3YjXcI*UWt|_}^SWMO^-n)*4#V z^gUiXw|`=5c7G=vB&@KMkpWKWo}eK@0GC-yt{9zXqx{;A*Wl&+3{nYpY$u;+}}R&ddlm0@SB96o=35_6aOKFUG} z2)?L+Bc6lB-m%n`rQB*+_SfAR*|Szz>Hpz0(_zN-LwGq4m#@cS zYhB$+Ck0bZoSw}#RoLrhzU=SQS&G}EJPt692ZGx4W*7J=Fw^lnzLaU4m=QRB#DejK znvX>f1AlCd z$76}gFHxZAH%0w)ntMJrl3oS}B*7 zf-$iM=PD0R*arD|;QXSLaTwIv(QSZph_TcIo`o0g>)9-vH}k`aXPAWh+R`n=eSQEI z-aJ3^ORIZ+h;^eIVn9@96`O2Id;58PO>M!Fiq*NE0O~T&pPPM@QZj3walbV|5c_?f zy*q>dnf6?|gzNA=9yaDh-#z3nBsM;zSG}+dML{HiTWvHh5X&`dZt^&o8AkI%H~EaF z7&=;P#6Ej97*l8sYsnWyGuIrH4|Tq<+kd{&f|Nd1HT3uI(Sbk$7OvKR8O@S-$sed& zO(pBU;CS53M_A+xo^H7{Wa>g%JpyY^&HjjUz9Y=aW2@2U2&o5h->Bxd`ki9WyVH7Dw$8GrscPv>e4VNdn~9LmY*6Phe5>TQSmBNGuaQt($N@WN42Z^dOWbXF zrQ{}u?xez}YZkaA?JbR=pWgRI+PmF=nghpSrR9oX{aIZ#qRF-_vJd2Auc)cfk(lbdv*!BT zcP_R0m+LZBw+mzFW#_Z2^`sLs5(4T?ox_%2MKcy|uzP&iR4rd*S$B*O=<}nDHj8KU z$|27x&fG1`EFB14yHPu)mXT#fNxjqcZ;EeOxLb? zA3mIzH-d(h`y0oTR9O>>JeHQS_ItXa5n-h~#9G6>O&NjiWs5XEn=DO1h8EpmjLp=T zI&paIrMQu?^3Y30>Kz%-s=2eBwEo3R?S{;n5R7&|$*jae#)w^zv`e;1x-syh_|XS; z#OQNo;iQ`Ooc5>2ej8~_URjusp~0fLWF{Nc|5`}-$eEZMwSJVLw{FE2GdS~g4d?@fo>7dcgid#Ck2yr`tm><* z6u1NmZZvd+r<4j)cPwY9v_Qr08+|+OFl~~M(SK$x5fRXpc3eIhP9PuPTvjF!91&3x zk(<}>!Wgr-F?k>C48CJr_86E^()8pXiA3ezrndYO{G(jQ<7JXfQ=z_}?1=CU&pWt6 zErP3rSteDdCKf0Eg@;TS)dFo=teh9&^6me{FcR=Ps}IOzCmYASRZo26W7ziUcX`0D zNdC#cK5Y>a^ZZtP6VS>0f1#N5q~O3v)5zw|Jy~CE1!I`5eft^fBVOJWejLamJIsjr z_`y+w5iyg$w_lo9^)UJb8xjIHxfQV6&Hl+2Uw)WjAbLG0$59;jeUex|_YLL8B^Flv zQie>;d|K036yf|XBkwJETE;;YDIEHRfbyZL=JCbn7e1U(+jZOl``-g?H`<$Z-m-10ZnK@l*lwogPL= z4OPSKOJ()%wLiROk%E zLoHVmJX6TDj+>XiQ{wklYBmh$q};pePXN}lgZw6x8Lyy_xm!$-v|t$ zO&;=y4TDX#A4d8%2#-ml#+_58cJIq=kb=y$Z&>>M*^`08jq#;)KO*Vkn__S(B1Vld zix)tzb2ir6TY|o8_t`$sHV!h_7xM5|XfZC)*F&iby%i2&Ezs9zgUKk69=UUA(Om$i z^2$Qf#vUm0e4?=u1!9#9%z~pwJ*VP`Ip8zLVK$DJwSOq{@@uB<+Pm)6(2XDreVUEkxE9s(c{;Nrox|^=MOCH5 z#g`p)>7(xS)2uu4$dVlf*_Y-kQW0z;H-dz(^7(?>A1kUXF3-b9knxG>#*ob)4KNr? zd+JeRbzv?4>4`mqDa_iTRMdAZhn+P%nqCZw4eoY&UwDL%Rf~By`S>e+)bt0k?q7E# z9Jdq9rt&gU`{@>qG3~qK99q5xbCa)tkB>g>khCX&XyNBEP4tg(6E<=MlpcusU|*p9 z;TWBr)^+F_rySW)Y(WR}ZNTbpUrKveTlnK*zp-_yz|P)~d1fRYRlVOWga^ZZaW0{2LARi?wn1E6BPDEe9ah~Q}q8E;TJ8-}xu6C=yn820x zb4H8od4neh$|h#**NYKpC6Zg^3%^J;0_%3F;_^7lWOUk>-sS@FLyIThSZn2Qt>K0l z#x9O}F~0Z2u;v7HG2G1nz_d{MahGP2h^NykgE>AY|8is9o&}va&&SAngvTf9qibZV z%$N{cH9o%SIuxA4mR#HpC0ZDtS6*kFY`HjxH`>+_6B)>)=I&Z7kv`qCdDbnd5JDP2 zkW}u@oTD9XYh)F*E!3lpO*`&J18pt>X01c6Wv$GcM088bx(}>M|6qLJ-e&aZv|E24*%v+iYAk~tWg}q?+7EKJ(ST|7dFF9U3fKs zJ5d%Ge`u%dW|}Gi`)vQudIzn&xRy806AAE?hbMi!CFJ-@2PupBTiqxfsO8W2k(L=~ z+gb0GYx+}L_~&`=FYrmjmU@k8P0YlC~e4iqBfmGRK=V8Q!>1?(N_tnDk< zPTMJ?v<>?Au6egir? z*CD&Zs1MOmoaD`=&8R+b;oy{Cb<=U-ngW%^GCyU1I_$Si}l8i@EAIg&& zczNW8CVkfo9Gjv-1x?t(-5TE5osGt!T)@W+pTxlq+S;Wg%(s$Y`@;B5T&WDFqmv*b z#p!3bhK--FMR_G{uIzF27xDEGOKo^Sn?5DPm^0AD{18v~$u^;1&G$hjwl8fHwT*B( z*ZI-&AevA-m!v%%I4XyDOq02@f3UtZDh0`oNjBWFAqO# zb;j5VqC#)oI1)`i(bn92lG7cinR42YIqAN)&SA%JC|G)(re#rtwUF7kzS4(ZI) z^Kb`J8R7NgK*5vC%j2X$3)jhE`=+CQf*M#t>#Y9%hx~O&f_bAq zabD_B!Yhn6ua9tk`Y&D3m9D?DQweajRs0La{tf0fG&VGbYl4WswW*vZGd?COW`;Y?wuBVjS4Hw(BaVuvq(qEC zS4@C^Lh%z5QBT?#zHIQ9U196Uo1&Am_x~4So`OiJegL>NIBx0Y?Oi1{hKv+aXZqiu z{&LxCH#ae$C|OXu@C5K`9F2W1o#)UHG{7&ZTmt9BO?s!$oGVkU`v1FhSc`=Rr$}%m?5Rd=Tcj^0Jqjs-06_g6Fbgj z=t;-T$%IXyQx7@+( z3JNCMCu%&mz8$lq!EQ21IA<@TL$XKYBDHKfmOP17cqx zDjMD%OoWt~xZ=e;JB+f(C_6GJ8Bb^B@%9D+K_(^qmk`IUa|E7pqj z6;bEEe4Sb0K)fEbucjg~Az`HiWRfeFpjSEvp!4&5(0z-cY?bvt_eYcySwY6i(7)byGw9bd~plz?(XjHKN6DnzJJ`h z_0@N(W_RmsPw&k1oSyDyo>_k>2_b}cSnr^qpb&(=@ykF#y>5K^T>bXt)2mKQnLHHK z2Pk3wFLDk^d-DjNkvH$(9KGox2{Zbo>i3P`kxC0%bxWl7qhjTXsTEpw#(9yfVkr&8 zq(G^tGJ3`%(TwIm(`cO1n_#RFb;27gMTPdWo&qMEmEgzgh}CuX7yMyR85#aA zyt*tkbE4MQ>(+~0gY%^-s;a_?1K7#-(Uie5l!k^TIg+`GDYZW8!fFOx(FCll zax^Kf#%_3LL)zx`tfVp%HGnmx7Ie^0#Hk`o70bAI?R!Ml zt_%CO3W=B)j&|#&vgJF`f;a=g-DNdWddBVH3T{@7eDXO5>U{ZF`fiUL10vSnW`lx) zS8lDrQ7i>^pJT~qX5XW*XZN`$s3b0mO{W`=FzUjuS}Jk@LZ6j4e_-kJ062M#ukSJg z#e>8Oh>W?GecD#|(5qb6ek7wIywimE*WlRiy>{R$f~TVxKOOkN)`MTz*wgeSzwk6S(*OZ8G5rzEL>I9L<)e- zkD8)9|M-mb6Q3Kb`HXGgS}CD>Y4hr)sbTMKU|gNp0P*v0XdNB2mWGHKF)=e26jJ3G zS1)uLiJ9NER#%D*RIG&D?cA~a?Uvw*TT*U^7ZbDT?LhE*m&@tUU27tnx^OU=WpUKD z1vjhPbUMVtNqhl%p z3;s?61yc)d{$725ZMtIE+k7QwGMoIX?cRO&_YO0yPCrjEw&4nRdCm(w;86oo7u1ht zmJ6N-NbB>#S6}(sn(%AgL}hqg)p3@XMZ4ETqa#;$Q5i%sc`f-t#_ zOAesBK?NI)fa7v-*SDacd0l;MG_C}~ncdBm%RLFj6Y>E2H>bw+H;0vQ6R98Xc=XzD zwKLfQ3N{CE)bK3{SIY-kCGlQv?nRR za~dP_yDJpOL7hTkMoq~Ypd*ofXhyA_m5RA{xBk6kwf@w=deVXS6?D8d7iHktf?6)g z{R)%bozI<|-ObU4c5a|PE;ibS#|7n&TsAYOa0+G*)I8P(?J2B;648K`vQIopwV+AG zLuk=X05Aei_M!gvw&#KZyEtX0atE3h(aRFn z`(}8Xqv^0(fFrOdI*Eeoyz713XNkCdYJuBxV8$cAl@hvmb#c7-z$kl3i4tF@sdg2L z4>w42X#>HdKzP&;{xi5s1j7m&ZHzxkOwqp;-L)-5$qWiG)DaPG9*wRb>qm>J$r<2t zrvN!QtvBCTsT!!NbGVV;F7I_OrfwPOGw_*hn}0#DM}9ECm@TuZF6ftSvUeOFYb&g* zT>n)GD2s3puS)}l#Zu7VCR5LrGtbV<4`)EcGzv)CtnUKGPsyehm1m9HB|DoGo%+>} zqSAUlb9RqRrv&tkqEmq`nW8s&?v!{=TU%>3BO%orX3pD{+_h?oj$A7ZuaVAH0oR0SGAF1Boaur?Ol}g4)S|XqJ&!S%~4)TiGy6yH>%*loWx|&+6 z@NX*b^GR?TR{>u*BOAN9i?AE*L|r;9#idUM=(^{ck~!TEQmL*wS*cxDHBS~|!8JGg z>8p48ZzK%6?C6kP9yP8WaOTah)_qyM>W?+Gr#3gO0T(_sXpF@=mjc1f_TxVW(X0K* z+VoN8t7FiPReAo*@oX;^+~+9ay(C zwh;0@M)#~NES1pu_IsJpL)YErMrQaA z1U*Ii3rPbLccWHdY5Y_ig@jn_hI6B0jjlq#e5dA~*CC;S{n63uxHKNEWKCs`(-!Ld zc9M-B@L1wcN0-Nb31%3Z>EUhoUActsMglgu9}{Pts!W*rS~u+Yek2Dz;-d}V*aM=E z)%hkGnWp8*vCvBl+7J8T7*A&1KYa*FW~ZB4l&7{vaB%eIw>-Za55a#wm6`3`z3!tVDx5D-)>~&u0&gS2&wXWwNe~@5&JvOhC7_=v$t0S*U<<} zx!9w-2^_3j(~L?{3@RKY;}o4=DrUvSa>0R_vWvL@Az=OM{LA-NgrkZoc&&!2M68~h5{Y~`okyfr(e4D|Qg!Zcs{6ZbYsFq7bH6E%j&a%PaAAHHiy^%Bg;ZOJJVJg{6 zhyB|d11H3RoY*0do4y6&n#q8;Cf5oJq1>O;6r}r3hq&|kw9a`y%&fGQ=N zd%eh>nm21C#^P~R%g5BiUPXu)*0|X6(lzru-lVyF&hd?Vvt=cJGmB>lY>eIfk48X`TQH|_eCW3 zMf>TBiv}dPBV-I{!a3k~ZCz^BF0M~lGi1R-^Dv2ZeQI?>1xvl?=#7wF&76^@$Adma z{CneNQ&_Af=Hk1?RVZl@@`@MZ3RRnxM; zvFp>~srU3HEF1kiGB}WWj9WHochGwa0ukZv5IA*U!VJq{xGQ-v>C^u_Ys3 za2$Pg&=3&ptoQ3qfYm(e2gc9#)12j$O;YaAU9xBcfvXd|IAxdylJJ7MU+x0oE*$oG z?m`Y*yX1n|>pWsp;**aCQE4}xqvK|&JG!d1Mq4G4+S5YFgPc=-R%KSW0!fN{ap>IN zWKnQUgR>9LT}>7^Tf3nw#NUetB?hl4WMC zVq!&(*>lWbzgg z{BdPaFq;~gnVG?z1JF5to559kU|NXv?H?AfX>9#0xfJfT^;hKnGw8Wn-`zfOJU8z% z?+4uH5IhavFXXw#5aQqU0&1%>6AxFu{dPg4>ENfD=)tS~%CRHG#XuS<WSLHi6>!wxW`q;;ko3KY5G%F%umA5@0Q$zH}k*Vv&AXEul1IR$%gi8a!537id=8*yCZ!-6Isms-~3mQVz6t%%wBT&B-)QZp4|IwjRmP zrEZo!1$kfV2d0$p)yuaz?nKN3pPWfZXVrP#8dA{)M@NC$6I=nhFewxIV zf9X>^YQ0Oo>ZMusz#T}aLPv1T$^YA=U(`i=i-o@TdS%-ULlZ?hT-JaXyj1rfF}`qj z7oEZf&d$;i_AdmDEN0p1hkQHeLZjvHqPaRtM-8B%Aqa&vVOX(!@b&5I0duY?>SaQ-g$>(frVFEW^-50@v}l-pBva-`j(kA1SbhpeX?fiRcN+l+1RC;N zBz-G8EA+*h&e49OtgWhXk`-_%fSVg8QQF7l#&YWe>1C_kJo-{AJ2is}Rx`Leip=;R zY&$Xx*Z!_&pw<3(BrU`~hvji-q3vg(HR_~OQfN~hA<>}o^h91m)#4BWWy0aX?)L7C zDj=ET6r%W%l;^DFVJ2@EB2i|n%6ak?pFhXZOK`x35J)A$*h_P}wH;IjlhUwR;eGnG zKHmPPVfXnk{kO@eZbw-~p_yLD0dVU?AV^l%}RjTg!@`kYA?uF|lh>(L)|5(^qujl5HZ+~lKPjsB{ z$7e3%^^2Zd@mLokRv6IUTpl&G`n{sK`i9sV#qk@v!%8a+R_Qrgmi!f0hBsw6uUZ#W zjkCJu&~LXVdZK4%IX+zQTM4+?D1fb(Ck=4@xvJe$?`xSC8X)GSH7;bvOBJXRdyL9+ z3sGB{EBmKym6*b}Wks>Yvl~QGo9aD^(>n=96^bL7x}%Qe8$vz-a?_pm?Sj4Cwaj=Y zURPsS+oE+7{@;|X6fpIL!*AejC;9c|K&m0<6~WkOkuvP1;T$Rzh-KEGa-y8m{H^V( zbv=OxbcXc?0p?i%frk&^Qe;#<9ygIw;jj_625uY43vSsX<{yTaF% zN!ja|ZwP_huQq8_SGM#zNty4#Z)&;rwRV2i1lkpqfV()5Z0H$U+<>{4`xFI`qcik5 zirz_}-DCr&zt!>k0c{zEJRam*W4>tntr#hMT0m->>$C`&`d3Evv+)q-&Yrq{k6qkG z-KAdsVHI5YY4)!!34;$*#?(Tj*BY;vuK9dGyfUl^cPa?NL3cgsCo4XJ zicQF+B(N=9hq#c|;Kap{Kr121flTkd`@ZTz(o`&;O(zCfbyk$Zq}Uy|Qui{;oaVH4 zQF%gGVDBl$XlUzWptEn@zxX{jJG#g-i26UFv?E&Cva3#ePmyZLoSU8&lIlNTEn7XCmGsh2py7$|S9;iRHS9$p1-Wj&mFTLQ{e-U%M3%tl%!Bjp<&yKqev$8E;o_!u(;( z8KKwCOv~RMB%*xeaHkDk&8dR~?>3Mh1PXA+%17o+FzU*%YGSl+epJb7blsE}WmUeR zQ<+SGt(=q{2`IW#^tswWuJHePq>Ahojm6eSafO=6pf&)Ez@ zpXo_L?#^i%prw_sSSJVDUzX>Oppu-7P*y9odP2h4;91Gh#iI1yqd{fUBvZ zk9e|7M&}K;TtXIfswPmoKQapUiX4;e$usixH(YKbe^#?duo{X=tMMrUOf?F~iwRhG zAh<^e2R85wY6@#MmsWVx0{1(O{r4k7f_8+hZooiwU=E>T)2n~Gw@aE9PCF* zEapnd4lL*tB-&^#D2OZy%TGgeqzy?_%D!p85Iyr?O@7=dn^y5?*$qSS zLKj?m9JQ6Ic~6rlM4JSLxd%;%T#c(VZ8a2b($}`uyCaT*Pv=FQwo53NWI=f%lPO~m z7^AEJDZ4qmL!vA=BlQE7Ak|vC-Jb_<=8cOlUD#2OQ>R|3BGxVh-)AV@VA*qO@RQkf z&OfCXBDJ=AYcs)+?6N3k5PmUVsCg6m93;bhN7;IGOt`^ziP-+T;jW%=bj)bQg^1JI zLa#>pE}yN{uqNef6UBw>aBWHRaPqFsXVj_NE>GDX_E%2vz}_PJ?>i?fyEN@HJG0sQ zcE!8SAj8zN*_dh9>c*L^B9z;m5|Q7&(FcWyqm@!|jY55Sg)}5ULYOLfgTtpnKl~zF zpomZ7=hCe=4D?Q|$jC`P2WsgS5Bn$^X$HzkJ>^V6D%ww<6AzOWMyqQaf3R$_2(9AT z@X}Y4o)gn*-B>z!qdNW{W%VNSwtLz{oiJdK4LUzivhLI6WmS2llzLBWflHAS3%GW+XnB znfHI0pT#-bsYDsp{=K`kL@KNEIx5~!R$nnnWj;jLSVKN?yldIa;Bsujhq9L|QeC}R z9>966L#r-rm#JaT*WTwUbn1M;UwINH0$dk%jvPRho19eG)X&cDT03__GbGTlb3xs& zTk6@%>>QM1Hgn-&Dd2|>HyCGPTc zFA5U``b+i}$W18r00vWTYCj9hwHr$GoKV^hR%aJhdt~puZV+;s7gqSz(Xv`*+LJ&P z58GX%g5G51s#zXF6CsBCwJbtMVx!A+oLSL&AW5uW53LhYQl4W%*IfqAt8{WYcyGo% z2MyVu>ZTPh^As9Dk(*TOmGtLL8Q07$=oaS$u?T@PDl4t0hq;HhMJ|OG?$sni4fo9k zBMdhfWycja+;LN*vbQe1w`gj93@bb&Ss`c*-MZ3yw=cW*w&G}n#h5L4sT~4Mrxx?=>}b$~Rx?<2x?i{mURD{)-gjnESfs)Y z0=sL+SJq^OhY$HFK>a9gaP!&Q%2*^9i8Rr!NTn=atJqTg3EU7GQbi)*ZIUonqOp&^ zI3Sh3Mo&$PPcamknaHnYqhK+%zm34XSMa{sc|QrR>G_8RIAi0M*y8do8|_>mXl}m` zLaC;Ce@gaKm{v3(gm;MLz%sRl4!Y)IIHV)CPn|gQo=fLzt@!}$AXs&0R&$YEgjyZ# zOR@x8d^L}TJzJ7FC9gU=x7--U}8M8u&{lJPbfvBBgkEQk40trqXv+QEG+?S*Dt<9Mwf%4e7wWG)+~rv-Q`6==ow0QXj2VD2j3@L z4v7#Ipch`GTTF4-&MmAin>J-ogo6&?x0RJU%f~eX+8kHPN@iYJm2y55K`f7sQ#TY- zG7i59=xk5)k|po*80xbL&*9b)35!#6ry#%@eWM_<-!T{Yo#bm7F&qcI-Kvi)KPGMbCKy2@6TE>3aFM7gnHD~ta!ittJLv;AW)4MT=5?i){z!_T0ryb-Ya zr~}@Y@Y>$Cj}FZh0ln?-bVy2?Yw>sF#vMsSAVjG`c<-O9N`B?yu%6ePi#sFy;me;K z8rN1Z%U5jNxQrEjt*c?Ci+KZaIz~N}5bwew0XrCy0mftc2Ic z0p#T?o}7x5ij-5Quf4faV={kt0qFcSl-VaDag(uMpUvgWx;--9>xyq?{gvW(m~#f( z`40`!e27)mK|$Yqy8Co}_k|t)yDNV4YZC?Cz539*`p=L41^z77I2YWVx05rUJa?}{;e@ko~cq%1`ltW=+ zVse7{$|;Bt3K6H=stU*8i(mU&Q@4llfC&X1)9J^Y_X2Q;aFl34|4Q$?e4$2%RUy$S zQE&#h1sD|u#bsumxo5I7HvUe2F*Dt$<@3K*{(hg+ZolYwo1tqX3O8DP7=Vb40dFv> z=~7hdeLb9wPOd^vo~uMp`>)(9nBfO~;)s!_!1a3hQtMlrU5vhbzyhAYut)lrb`Pt6 zWxG!2R_pb!^muONlGN+wus9oXc~3}{kaUHXkyNRc+P`uGp5no1F}5BHRspjoLSZ1K zuqR%VZtY8>&|qNw_joi?K@bH$;3g)hTAt|8Gl_{`Sh17-Z$cebK2Ox6-=g0yW=UJx&sM2 zukrmwB#An|?mY)}pu9gAZXtkM%NN-)TJsio9xs_;*L|Rki0Nebx|8F*@{B3F@)<_A z5iKStD%tmu3H_MJ>&tAoU>Gyr$@S3Opd&43x9COSQR^Z zV@*M~yqRC!rYk2|FEr+7%%Y~r5cfIbRYmyS$~|Hvt#_`^(~A0qr=5vO>f{9c>q%jJ z`Y|oW^|u{01@>k7Le$t}=53fDjE8&Su4Hs;@*g=7n@B_rMZM(iEBm~zRt@PW_UrG^ z>5^Jpcq%8|B-kA8-~s7cK^3u^P@(lVaQ>=;0UizOu-)i>?n-N1ZW>iRDA8`H^I0XV zuhDR(gzm^jw{J%ZNVKuhWHGId^DwX;Ou1_LQ zV5>wtP*!(pD!kt+&O}5JB|8}jNh1H~+i;noegQ-s7;k#Je;$<^XJ(86c;o8)UT)Tx zgn@&nAP$1Jxe1h4*r{M*a$w5X!2JSZf~R!sX#ws3)Mvm>cG%fvj{3INz)?tfvUyXTZx&4WcB7KR3A5X#iFcdUOEeWMOAmEIQMcLy1_k-ZwNT4;I2~Ks0VwC zZ=&V~j>?{Hzz7W5mgxX^Evv;5&sB*&32eV1jF2>Rra9T?u&{NUAdxoOnxW5UqF^}= zj9lR9ovY>1kahUNbJ|`6Cy-zGb1Sn`Un70A=Gv9`9j>-t$#hS&O(AE62MMF!s#XYbOIP)1>qEpmF4ZL76q(qXT`0Y-I+VC# z1}(nQzF3VeyLu)3uBn8E?&MvlQ0GC;M9~=qdy14%xyve}KMh5(Ng1EH*TWb(PAHed zwClD{5iRIqxXlPUd<$kb54_M=Xk4piceWYkt$oV{j7wfln5CMF9cl8TJu?gIjPMv` z=(gpdT3Fbb3bQ@?EC$=_-<-_nE8qHAc?#C=?&WA%j)7{(hwW)Xn|82{!D0dp4w2PQ zxk)VZ^UG06x}G(=NCPpu%8!mW&`#kQPhsL9W(jak2!PMk(_>W_nwBRxoc0)hh7l}& zT=dOWVME`MrW9$0elWdpZ8NZq#%RZ(REj!&hYYOa)~k(xtduKaO^ZfZ$%S3I{)&wU z86K4+LIR6wL)u;;UOcM7A}y7cm2oOY$DSbe`o-71WeUB}8f&nTt7hQ!txbV12e$mKVMbClt+>|dvCKf(Q7j6w zE|`(?%UPLU(TSxCDC@|Dy~XJfZ*o#-*znrO0tc#s0}JNDZ#$jJQ7H6q_Cg{l@G(CN z;Nm?mG^EfQ5s5Tn6M!n-$6fb5TNQCw#Bn@0@c8ger#<}AHPhK$&c&C!7=CMy{d`*54g2_RdnlhzBA<|KW+A`q<>IMRikJ^K6#2!!%rtuD0BGlj^!a*=0b{* z14iz#9B*4u+?6^nQpZ_dO6Or4iSSn<>ZvL*NWyD=u%xSYF;scWO9?brIVF8>Kbm6CnHjKSPX_LWPYWOyoM=W|}7tL!M}E zHt005D_z1{*y_dRE7K>OjkXj##rrV|MLD%5c~3K(qHg#uF?yo>*EwBAT?)2dt;TTM zhP8#bx8^C#%i&?Iqc-G?q9l6J6r|Jc15aH-MSCEZ;wH(VKNU!|(!8;byE`!SA!FC8 z@6a1U*rc59irbRa;6`Zo$bLJ$%*oY0cjA@%3iTCcFbxiNy_4BeW#87;S1TgiUuM^K zw1YliIJ?6IjvZQr1xkdjhn>-`hR$S2-zA?#41t8ZKIN%X&}8W~xG%{6K8DqF8cbgr z%?Dw=d0At~1%}R|E!J+Nh&x08ehp3_^>oy+#CVwtW}USY0(FNO8AsP1&aCJ@ja&W( zpNL)=qviGtJvugUB1N3+JY>B0pbhB!3GP|}V&2=z_e#ia7TM5QWYR&T2M|ekL7HL- z-&(3YL}r2!o0TtPXNGB8YQ;O`V1ta`K>F= zUQJB0U;?T3$=eH7sln9^PbjFN)s}s>h@Q?WsT(MlsZ{cqDWS^)W&W#5bvHztbQnhL1pe3?Cs8<#TH_hbg@EZ zPeDP&lb&dHA@%&FKwDG9iee$Qgi|%&JzQ`>>A&iLkB)sG3q8S_prD?Bbp9|+IlpS> zb_%(fq7T>V4)FOZg#2}t4zNgM$8!qtxydg>t}<^dOr$w|&1Yc|=0zavu5U-NXZu6F z!yq)~wJ;qSf77LR!jz+9NO{wjgXHz8b*i3BUHjLaTM{!H2S(C?^SITT)blUN{2?9n z$Vj|%18#+}vAG$OI~i0Z-d-&N{w@Y$h6(zRto*S%D;{%+8+)H?9;YV{x%Z=lVtYhj zW=%HS6cL5$t^Q$Eig6Zoq=2BiYGLBOWvVw7S#Vni$_+!GzDSsm*qF~`CTwqu(8{V3 z7`q3ut*nlu-K05jL#6ikdw{{Rzcz>kEu3Ae4&V#S4@*RdXAnWCqID>fno>%IJ)md`ByP8Z3C%mS z;PDjde}CavqB)Vj>nN|5GwcEZx0(okoCGpRlsR>NcvAJWr=n<)BnIWfMWY_>NX)Te zZ6*I|aKUC|&M|Vh`{@Wna~jtYooOxY?beU3yvv`fv-84TS+y3rSOZ;b$$SaE;CHeC1@&J9g*O?iQt8zWTfu2L`$8O{;SY}X z*%43}jP3Ro-;#@j7Vd1Ev@Zn^UeIF%Lm#)`4NR2V8L~p=53_Wzsr2d;`*9A}%y`{4 zQgxcBEQq0C_>V`y%6Joa1ezM{Fla$9C?}wnlsePX84L$~ST7$|=#4K>)-Q8di>|wN zw{9Pv^0G80EN`EqRM+cBMbfREs8)NM^$$?5qzP_2RS%Q*Z;+D~0)SEYZo7ktji^3e z?o42rgk9Lwj2?+xW|BvFu{T7OELMDa8A3_eyN-L<`d5jehge$nE5;{868EekdI>$yT9 z--GTK+|6TeI=XjuRJme281cy@lb<*|2y)pfohEu=qao%x5utA1w63H|VP4@X_`VJ< z%T;T|JFgBiuKrkE>&N!Q&l5RIdg(ddRk8d*=Hy;zkau63_o0T@H7LbRi~MycT_C3| z`08v?!?J-{%DvsX?P1l7xyi-;s)_f0b9S`J#4OdR1G;Ea>%$(gg{#}y-L>w=rb-eh zA4F7B`VY9iUV>RNU)g?s+76WzUA!vx`C1Y1?#}2HOrdn%@P}YQw(IB;biwXOq6v#z z$df@|{+KdNQnFNiJ*HZ^v4Uy3m_*bUtbuXX>38ZeZPCaq9z-fc5AV`xrHeb9_bLd6 zazZMtbtg<~VAICW=PlP)g{g#78{V;!{k)3F!%WTT#|!KPGUmlp1+GTwHOwTUH}J~q@u5V6 z9Vt8jXR}pz#C4mb)|4-jy!7yYNNxc=Nl4Jcme%ff#f4T;fr*U)l_-=Ag5i&PZt<2L zt9IwTAa<)H76mU37|%57nmE~v!anfO2l4gxL;Y>A4xJ`ClfP#=Upv-2{?7CkwR$bZ zm($l8XW@sho75i%?KOWzvp0vu*TFO)yB+NKjKW&6n-9J=o4!9`)C85ncn&EY9agr4 z^V+G&?QP9?3tbPofeu6)P^qTNI1|dbZ}1CZD6R$~V}&e~Z=KDYT0W6mKa`rZ@V=Wp z9J^mV*MIylc(Xa&Bn}~h@_G9_G2mOKpl6ZL%!r^mzOPnZJu;#pLwy}RFX=EplP|2< zJw4oWe87Fe5?)96Rb;$xt@YNeLB*J6 zN#)ehHPj~q`qhvBQf}>wI*Sch){7nBEXWG{Q6Rc|*6zFi!edx`}V%{0vzR!!w^Ww68Knck)m4Ek#cX#{5yF+T77@6teGPd

e4`BCS=GlB^7V{?2y7%}mA(7r3mB|vxF?wuG&CRLFNK=(nk`CdPOVDZ- z8=GTVce?yuJfWIj8Eg%Q8_U!wWR4ugR<`>x(A`$8>rRmi6SRbaw)TAM!hw@6_(Ei zZaMpwR1!v94VEB&&7NC=~wDcrFQBjduz`f{z zz@JV+X<^`X2WpT5Ak+aX)UX8I?!0$jDXbivHmDLO8kV}r_+>%Vlsbp3n7n|molhJ+ zcufw0LN2sg@)qr^3eaIAAU5BJlb4m3>uwx&<>Ix9yCEq#Ny-SOsHUrJHk>A_ z5)p6g(Q#wzq8`e}P1w`PhC98P*>wNdx5@{pVqI59?zl=jj=r!Lo`^g{Cd|q9lBpc` z?GN`%MSl-6!mLVjPm~^PdL{dI-^CGaYd+e-KKlCY!`5JBP;lKz$oaJP#Fg_tY<7iY z$rv&;aW7H0U#%SSvK7y!KBN*I^3W`z-`p$?QrWhycP=>IA7)85VZ}NTnrB^Y?)S3d z!~htIPtGpd)1^0kCEq@p_GxM)pH`ILu8F7K>-%`NCRLwly7%91;%*9dL`w23+m9AE zZ2)SlR@(gA{+*M=2ofqAm-eM`3o#}dt7Hn;EWBGj+T~qSptU8%LWsO$gYY#A(hh&H3JtO(-*O z^g~E*U!|zmqL)M>S}pgoT6&p{-uuOt7KpA{dihwSEE%0VlK${@Mt-XO!0AZx_s;v$ z#UcsXA{olvsH~dkw(D&&oC96+yV=Eufwd0SMVHfPMGb=INeZ(e4Ybv>VZ#{#%);J3 z+)caly&J@xy*&1r#^)^sK5A+jSvc3*RuBMCZzsH3vp646Pd%!4lcF&9b>~^}-L8fW zQ(YmBryBx+8pO{keGJJpCc_UI3G`+k1~qKExbK4NX#gfsSpxF7+tQO0xj3|TO8|t} z0mx7C)rhj&HJmq+1Qh+zi#*jpLA|sL!pyk2YJE8CXD5IGCP2lt-4x(<0e_3VX3DCm z$!eQOU*%%Ajg5G0JXF)py1QG>-S-z5806mUBg5H@CySGC0RT)o*p^Yl$DXQuGWKhsTNyC6|PT9Ev%_#LvreMA9Ej|xI^26BEBkd@@sBtq_tC)%mLD*oE zOj*5aaA;Cm zkGoJXoj6T0IK$lQ-14~_^l?FH4#0}@7$j?n@$u>7&%19=?wDUl zR#q1Kg{0OO^h`lQCaq>%-iLtRZ36VlI3~+XoX?+le`c&H?c6p_NXaUcUIsx`t!~AH{D%e($KDFv7@DU83Vb^WRBo~ zKf0ly=EmkmObkqPOnNK#aOC7%qyOFKXJsz$6B7qIRAuNg{M%Rrr3Z$H*|~^qjj`v- zNk5wd3d&rD*{F{KFlwkylI$6fFY?#k&EoR!P*V-fZ&Y(~II!_X1Ks3UCXoN?=i9+% zqA4okf(yx`U{I!wU*xHf$eejxZTedoihyLjl20@x&)=UyH={t)uyqxk7e}!5FNF^8P)*@nUo_T?gK!!ynnEvN4r#5wN*E1@6+>OeoB92J z0s0S;Dv1dR?i9D@`;qS9lQHZn>}F2pm1EE_13QXB(w-ERH(xcGS6B%GX8ck%vyV5#(_q_dMAsk!-WmBzeM@dV3A-+qjN z0bXtCb90tHw(x(=4ju~9|D_#|P^EQRjUy|I42XmPRx36;AYXCfAMbddL?0j1Ch+{9 eKEV9$pU5AZqX#rY4;IdA6koxRuEx%S%EUf24juBt%#fc60a0Ridzce0uU1b1hzzJLC6 z`|9__lP|vs2>vB_FZ)K@H)9h`9H{JbiQjW6v-#Qw8EA{^6(Rz@M}A?}P|+vVWr3^8F&+8z0(CI+JA0D?K_1rAYR-M zZZ_l)L{HD#sSSADn4yaIxjd7;oV#^g$8qag?zIy?WxB@$;2$Y{%$}yaSE(|Xx?S$h z8pU;`Yc&L|Ek(LPcX=E_l8?~$-^;bI5^HqL(Kpt63cB7Mf^FJVjSZ9=HxEx)A0!3~ zi;El1ozJS-{;QuXiG+MwYmH?oDB#bSyniG}8=2;}arau0zz0NxvK5|9@#>)~6+K zIj`v{2pnkj_^4R^7AgbYp!PbX14tSB@#AjSr+6tv?y9f#3R!jDd-Gcc);9}S6C=uL zw^kBVe^=OkbBtZ2!C^BL)wv~{Y_iPd9Fn_#pm6}M0DXfhh}0yZ+8}QsJxjpT)>gA4U2D_ z-rx+0YKymYaw##6@1?XfWCeS%%b}k%<{kpw5#%Uof$B$P*Ht2tN2{ z2RA>TZ!~qjEFiwzOfm9w9QMiOYetHZ!8gHeAWIueBiXJbxoPs5tggeI&u7{!z=ge7 z`HrTh!RHU8=@{6RYBVBajg8+yjb7Tjt?Q+e6NXAtNLU`GHlXe}I;_@L2yN7i|q2j-Il0GbhJnoc`4%+9-yEZO@ zGiyad5hr+gXEBJ5x?P$aU1XsTW-Q+u4D&m86jBs2wJ2Jg=#whs^e@Y-9RHjQ`2vJJWJdn^SYbOcn2 zdcR1VV%MMN>B$cM`GS*Cl5dkR{Xs(PtrAeY)5elju=HTCP^`CTVdAYuUv0Vq+1KDLgYZbl!foHzJe8y9u5- z(Nv!2Y23Oex3zDZRZAx|I4xRVKQ3WCtdLv*Swj}YKPRZT~yh#;du z0w$}I1s=AsES+Jtle|22-#(<;3jA^WYXgG__8P>_KHE(63B#HOL^`avz*o~S^h*vtdxgk2@bVRKzdELa|PB3*ToYJI?V|H@WAVEPJ6>Er9{JYeY^)Fb6v zbnau*_C;{demvh{`lol=bk(#HLx_r^dh7WF z@6q}s;#*l##gLP62**ZrCufD_2+#!|pg8s=@vK#d#lt=}NsD(-55n6w#?;ICxV5oA z3!o$KlnAY`>yeCSlld@LIP@u9-ZX6r{&LI`ScR<>%C%9LI#+p{zMjN1mL63-**z*C z<~IFtbRCcZuw$egGUC=&j2tzYuT+lgwxn7QulI7Qvp-xLWc7ADJ+iovXKOnq9l5#F zemYbmPe(m*B@NTOY>54IU%xmUFKg;RUu}IduTh2d*wBKX?v%E1a_`2z?F>s_iFbYC zjOuHzpmQiRvO!N8<#W1sAs)LHaN=v~FSi$&E?9oH^tmF}AxDtq0;}k_?|Z(4s{L(f ztQZ_o_^l7;M>&W}Q=-w*X}rjuuCal&+x`%rJWQ)a+3PF`6Z9lHH;{@qHl3*>I|`JN zN~bDaP;G3-Ho&?iFa8@l4JaI5ef2gN7dkU-67*XP%VzNWwGnOywGhpK<0&O7Q8D18>+HR8_3f>dFf&;`spjX>SV$C|C;wtx9{= zFj?h7Z5>c$Kh&m_x5_x$t*q~P(n|tB>FYm;u~|%^+*w8tn55YJc~z8AYdrq!QbiKT z5~bVZNRrKe+VQ?qSTO96Ni9!1K5^-faKF_0y9}H>5q~X$2#^Vy+xUQsXSGVH% z{delW8>(XUxJ@fwjO07e2zwn^Fy4Tl&5Xjs8l4TWoC8_Yh5K}T5MftmR{o+k9#SZ*NQ*(urhpWTV^&K>8(fu}4B>4;BK zH;sM_U=tKrA=Cbgj%tGb--e28+yZ)fdYc7PBKk*}`bUQ=+;33IiR2Y>_tdH#g)U5< zyot`s3#lQkrc@{Gn0INC+$kD4&-oN!oLCZB8Gn5u2+lJT5|X;p3MLHQ7q!b=@(7ub zHQ&;T{a&N+u5;nJS%!qZ#Mg4ab`rUCMqzpiU&VrLv!y@)-@V*FqcAgh=$s&=!rPt; zS5#E3dM!zyo>Nrac`PS=IjGNNpU-*=UfOV;?lLI+Ka^d@LY!(dcmXq_(ky|o-Z}(? zbZdbh-Dnzh_U2ty-_L_IrQJoESHffFnQzK#Viet8Ty}F5!&pO}U)Ul>b!u;*EJ4{q zp?jj2ibjwi5v0IsX7qLx!)d@7GR}G8nfLd8v00AAWSp4Py^+_`r^PPxXm+boD;C`s-h;4u_PM za)Eclzf9hrmtsrb#q2cka>Ck`Y=~~azB<6NaWA9gXQ$Iq z(#v(nMH-s%ypT3^=RVfW!0vO6;eG{evpIY#DTzE$`A&K|WXNn!dW#Z1 z|GAb*8s5ybDSwS@L0m#fiHTEq1}oT59Fv#r}Y$jAsI8bt|bW0%aGRhuHbe} ztZ7I`GGj|H+VG@ORJl7oH$Yk1w$wMX4vQ*#dMo;N>zS%yaF4H5;pYo-vCy{C0r+_< z)j%oJlf%}Ih~&On@xnH5V3+WJ|E-6$pB*kM?U-4-wfqP(&71A|;ZWI^j5v#&;s?Tg zKlr-lef-YI+n8m7%xx9;_hp6pQ-yEaB{{@ zdMnQ@f4T+CA*y#8?5Ek!yM6n9>ENU0vbx%y1Sx^#wPtWi+r_|7ZYd7`<$gN%jcC3I zr1k$53utUd+v2R>tfU4L75i2h?ars%DZ&jGCm+xRKTI51iN>FVD|w_>O3hr_FE%va zZ9Mf{RPc}*X#S9{?=QvTH86tz`XG%4b(*zc0}OIt(si=}^%919rg;AGMI}TN4PpNf zuku^=u-!alKGrKXd&z>GVOv9yXG%`@wIhqv?{+}d#DjUJj^3I6yXMxvu`H0DOurWq zfB?yN{M;JpaM6r4X*^yWxc8%0siRcP(612i6Mw-+l&*%yc2@u~J*Z`8w?{J(eX70W zGZN^_LrJ>;>F8seFSg357UAP+kFGkcWuV@Y@U(T)ZmA&Zjh)if{^>B8;>BF%4)C8s zINA(`i5Du*2nfu@?YY=qEi?X_gvI8cfg2G*;mSR zjJ`k8Hy*n2c4fO;BCB!b_eu5$v1yP+aC@i0mZ7WV3SZuN(3WakfX<^DBByaqk3wm> zY$~5DhY*D``0A6*jI%6W8`bWUGV{sPc(5u}cQbct_sW2t>ibD4;Q83f9YzKL$%g2- zW{>kEoi%HRqcIPKpv!_?KOoNZ{ylv%*rb$=)2BmSVXn&6D7_UXKI6loDtdsR8PHkW zZ#77)1HW6rf>0y7Iw`;~7Ki@q@bnAQ$_RxGRY?V9Oo|^q*|oSp7xo5z zvRk|Fp+>{YxvU*dMng_LAq(Ck^Dm7b@MZ30w@IN8@YDewAl)7@HvfFQzt{4j9Po|>d*ZF`uxmu(^>Gf)2T4K9n zIpkG@d;b}NBBP7EU%cu&VC+PCbfz~Y*{kxS;rk`Z(O^%b0JoEOnSnkL3hHz$7E?bO zEKIk{U+o?LTiqX*^@23gdA3NluR3tUKPA~p{P9{AQ?dT zbskn(>4f1)@rWM?;-%n)va3oa>WH$)sTNU>m&lFH#CG(r3EKPn20-`2Dn$+Ca?ReA z@H8^5N$0I#SberJqgzQr_EMLZzie2a2XywYt^_YVbMAXL$->;x)-3}wsQ#8--4}CE zmQuSrJD+S`CogxjDzKP%1*sq)?hdMBAvtje1?4L-<5Dsj@{7BQz$0M<+gN1aAN%GjiAXFlh$9z4l zKax#e7~gKMJ%g~K)4ni&#&IXULmE55$o-1aL8^SEW}+l4+%N^Jnx0Tt*V9h<(4}Bk zHZ#hGbO|BmH-T^%WtvF@+i$^syy4+hW1r|XjUplYhb%fspeRj}KCBR>R3Z!%IZPEA zyE~LFJh#%VSWm|tpKbH%;(&jJDN3E3}q=*oJ``IiFWmg>H)&oHxO zygaH1IfDX&v;R=3lAV@!g;yA)%(1}d5L%*2j^6cFMh!BT$@4wn(DEZ*ox{&(ye|u2 zMoEn4lZ13*fx9zZAN%r=#lF_vY8QrU_<;csvHv%G>9n_Q;m~F*xKEMnaw?4*FxK~v zxVXzQ+r--`eNK}5zn4s7yIm`~&?6)0gbH>y17O>MV7*V1c$cK#a;Xyn)x&95A zJ>k6ou`!2Izt@afy+X?Gr)!0Z#?QVfOcIWIG$ule7=hcrNgb9+H`t|5^rUF9k(7-; z>3u6*e0OD@4#7&Ho4OxF8`rjCQEvE|3a@?s50Y+{e$#QQzEB;Ljg5^r>Z*eN-OC5eN1jBe8s$ z@bW2aBzB*Bk^m}lDiHq$Z6Hv~v(Tr}|%AeTR+Y&o^O%5rm)9;Y9n6 z>``mo_ozcFePGVtLIP=N2ewSSc0n4P;bhYP;R%;$Pgcj*xNldrn+Cg|TFW;j4A_8qJD>?!LfVN5OMB()!)+9wSHrP$cdO3y#O`P1OHse+qXge@j=UA=KJT8>x z0H)r~#Lib~fvC9r5&U0@bok>Rz_w)+I=!%Qax88Igr!T!$!0R+0#$v9@{0QyuqvJI{eaX|dk)%ST8nnnD!r=zv< zwixQp5QnG;Q#U8_vZMWunOiF{F&ifXU&}iy!W0%cxz7`aKO-}Ya!$hiUXkqXc~JXx zPuxQk<3hIZM}0j}$H&vj77DMx#l<$;zT>fCXf! zCX&{;B*je9(6`q@dV9DW_G;r8m(Qv|9^h$9|Hp6kfC0QZfAt!7wMZ%*!~oph-3=$~ zsl+xUM8C-0hiLrZZuGolFu(6JX6-Dg?R6_XAbjo{Uk*^rxVNi(k0GH78c-SR%?v*A ziHX^!UoF^wuqD}0Ir<&sq}lM+@~sLc&EV0Ha%vVBU?~43U}pdJ+pEWc#Z38vpC0J* zBu`u^y`J~NaEMfh`IO0f@j{C!>B*DCD3b^hJ>GU73bvz+CO0WAk)MHbW}P+8CVDG? z{nY+%yiKVi9!Js(03E14)S-A)Z3AU?VH&lSF9iBi;%P$3o2r?7DF_7dnC)#0&FG-S zV_C(__|f&`)+2L+-i`nYx^ev^vAjaYE|VeQz}ZO&i|w2RDty<`8&X00ECn%n$o>(i zM&mV{BBNVG$M;Y~5L?Sdywd$)q5}Zk`{%(1m8F#1Dyulc{(HX11Y323c&*-_B}}a1 zUaWDW&;HMB@dH1dbN@lKFZ)7U3*|*Sn-(<-I$OJ_#sQTJ>C&8FEh%~jfp52>N3{6z zgMTnj9<6VuW@Ti!93C8Mb8g$~Z7jt!7oPXUpSmlo#V7YG={_`mTJ7bzlK+#lGo34= zsZwxDwqslL(H9*ps)7^>A0Oz>!iW!jzsL`by?0Y0uXYRjbsXDZ@Pjl~{ey|ri7$p5 zf;oqg0boozXQna2W;gQbVQ9N9Klh`g-xk!zt2JvMOBrld0)vi%&sKu4$8AWi`1OY^ z!Ty(Ug*!&=$ESsrQ{Dq#F-^|^$o>6T{{oME7(3tjPGCQ#$Z#dETt8|I`g)1-v|}`W zrDD*DX3On^;XJGS@fB-$B0AC?OlogI_q?c`Jib!K2AW38U6RHsGugda$wXog5Cv+c zzu{D)RsPabscg`)*PyLG3@6#sBALKB4F2ZARSAwL9Bd*Dd8QB;LYqVPH<7y!)|`|e z2(q@qs=*O1;G?H?0hku3(4FG7+4E#PuA?ksgY6-gX*_3061OH*V69~|i(*G_iM5XG zc9QTiFu}f86=zW&L+iyI6F8g>uIo2Ba4g}B*SSOo_=9Gu z#5=jJBItTqdl73cC8bo3SwhzFrJyFpwIDyb2(>dFMjKt1gv0j+nZ`bUhMyzB?s6Rw zjIF#|H6xP1V4-($JpOL}DA&#j0ITG>u;s<~U@SGe5MpXPNBK@{2UX932voEP10Mu>8L>c9i0_WYyhsR=q%z7rz4T<w9Sai zmZzCB>k(6Qbl8XBWFibz-b`W<9FqbhH7dP#Ccqt0F#C%wE0*c7`*Z5l%JXeWJFhdX zWBE+Qy)jdIka*x$M(PyxbLZ2O(%lCR>!$mc(`mYBrgBFk8@l`5zLvk!1lBv#HaMD| zq`ho5x3xXh(a?fp_n$UiJ#=0DzURwXv`U7_?UmAMbTS*kTP%cvg4U7v`h#&Pgw>M6 z*mU3=&ci`znebU=mRpx3|6UeeG?2>+?TRTLkuZ=UrFxMC-Z;E+U6l9azhL$+8{;Y= zk~5Nz)#j^tMg!pW#Bl z=JXYd22p-b1j;hh(J`K)qJ09pt2)`hhwF=T${lQ#-Y7Gf2{i9bZ9K` z>hf*Gx>ncJeq*51EfYOn<;x51%l0O9nbD`AK0ER|lSMU);rgjA??|Ef2lyAL({_h{ zSD&XJZ}V9s*{w&V{CSVf$8ef2ZibEuE0V4w_Q>;019L29W#y>HgT`yDWSmrNMLfMf z@AaQRmiTl1s~BHeC_ZFzi?s_0xOm|;g!Gj;8Y^0iJVV&9~>}a-i%;~5XMWBXYer+ zt!2`0x)PG`=-;@9Bf2{$eHbdHi*;(bMEqB$YQ>t%7iRs0gZGaz&(q9C`%>6tn%)r6 zi1|ULM~!JX&d<9y>jnuz zwgXXupyb9sno!^S>usHDss2i{UN45tdj^E{W!uqO$`f{{Z>J`7i5iWlhHStz$hmJ8 z;XdVZ?2S^e)nQJtRQ1GKz;U6i0Q3{zQ!z6_dQJW&J39TwzPPkhEwtRWsi=eL6P#7D zmCNB_&Zrgx=JNGQzj&TTkd)umIF8FJDk`3Vu|bV==+=g$Qb8F)=w86a?-x1tfaa!X z-iK)xwC97tBoDPWd{o!*lP1k`S{<_0$g~PAP5#8))q%PO!NKFRfI^$&@&yI2BV@4D zipz+WL?_IUY5}%d9kY5I(&*UR#$| zd-6(j%9w9;UayfdD3TIcS&NYx?i=auUR$n2{U_8TXJ7dl2gsHVdMhCW;aod7?n-F# z9_m3l@P7BwsWOu>UEA*5t-P8K#45scG35jrvaE`Eayj_)lW_-3dz9<^!$5W|n-vjK zaCXgLF43dt4)Ze~o8k?Yevl@o@eJGx?BrtYOZ;XS)aa^mKGJa7cX9r zd$qZIgy&b4&2!PQ*b^SoTvZc5-bZ-Lc3z z>L&w{XVX4k_GiT4@$ux8H)%yR_%5!M)zy>i4i&A=CGm@(42`Aq;OsJ`J6^^U8I!~i zqmUb(1^$jz%)kqY^6_v#-*eoqerP*wY@A`d!~_BmD7cbX9+ zMSruLUOSa+t^!U+v{sBvOnBH#s?N^%biJ3n8rpI`LsYKlqXE^JtHij9|G2MbPf-19 zF0O|;{{lRsnE0)#ZU||}e8)n?9(pC{%rSOFO#vG*Z9#$8qO;Dm#B%QfcJn3-^_H$F zs+>`1Oi8=p|EQ}0wb94b^;GI~l8|oh^EcNT1g@ZlRwh~p!CajEV|4}whCn(=Uok!2 zo4T5wBKpSWl@P&=Zlm9PpRedR$yt-|sGBvb+0-s;`c&wA2c8c3k%qCB2Q+8)wS^0K zG)SzMP*4jeaxUQhvP;qM{7r>Qlq2_bpQEY{8LI70GlVEz1)VWTo0n9Xyc}gUL!~zz zo@ZVWKv#@fT~UJ2t(j_3`8}t5+TBAfElPT(Hv+B!_P(Uoj_xYYjq3k%sbcgXdIlxM z-fKGj%7uQ7x=QX7tE&^COov{tB*Di3*$z!TY8wS=8!*#f*6m|}LWd?bHJgnB+usv0 zYr=8az0NKwD&B4LKc)O}M=$Ry9@fp<&DHgM$nh_~IqlUhZID6F00J4)GlW9)VB7fK zp&NRYV7pGM*1y`{i@|COIh3Tuu)$u2zu0cL9snm_fkpYmL7qVY#F7ghrX_l{dqdsbE&b5#tGr3Jyqhs?-%eGv`p2sTpm$%B zMrnz{#z3yox7>(TTW?O^}`Vl7Y<^ zm*B|xdAU;sBQ9=k&KKO=<%a!9arphc?;bY>O0b~A&>UW{R}4LPP0&&=H-Sxy}Wfr6xCv7{6%%$1t8GhO+aaA8tOVCE^QH1 z_~%SigbE6Y^XtVmBBCOqqB^9}+J_h)c@H@e_ywd7!k?U^bIt`fRO;zje4c8!L@p|K zvt;TFF)?IUQC%($rThxZb)zBhSa*)OQGSo7J!kfV(=j|D0RbLALXf$6C4T=Jci$n< XKL0+fr}n!4x`Fp{s +![Agents (HITL) phase — the agent_hitl_flow step card. In this local run the agent answered without calling save_scenario, so the step skipped by design; the one-click Approve button renders only when the agent triggers the gated save_scenario tool.](./img/agent-hitl-approve.png) ### Phase: Ops snapshot @@ -168,7 +168,7 @@ endpoints are 200-safe on an empty DB, so the step always reports `pass` unless every endpoint fails (then `warn`). The Inspect button deep-links to `/ops`. - +![Ops snapshot step card — the five-tile KPI grid (stale aliases, retraining queue, total runs, total aliases, degrading-health grains) rendered after a green showcase_rich run.](./img/ops-snapshot-tiles.png) ### Cross-cutting polish @@ -198,9 +198,9 @@ Five chrome-level additions wrap the page: phase. Prior behavior pinned the open panel to the running/fallback phase. - - - +![Showcase KPI strip — five tiles populated after a completed run: runs registered, aliases live, batch items completed, scenario plans saved, RAG chunks indexed.](./img/kpi-strip.png) +![Inspect-Artifacts panel — the grid of deep-link cards into every dashboard page populated by the run.](./img/inspect-artifacts-panel.png) +![Run history strip — the most recent runs with scenario, status, wall-clock, and a one-click Replay button.](./img/run-history-strip.png) ## Performance budgets From ec921e366b801c44e7782936366a7bc74359cee7 Mon Sep 17 00:00:00 2001 From: Gabor Szabo <168316277+w7-mgfcode@users.noreply.github.com> Date: Sun, 31 May 2026 23:56:38 +0200 Subject: [PATCH 09/30] Update docs/user-guide/showcase-walkthrough.md Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- docs/user-guide/showcase-walkthrough.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/showcase-walkthrough.md b/docs/user-guide/showcase-walkthrough.md index a848533e..0f63837e 100644 --- a/docs/user-guide/showcase-walkthrough.md +++ b/docs/user-guide/showcase-walkthrough.md @@ -156,7 +156,7 @@ run. The Inspect button deep-links to `/chat` where the approved tool call is visible in the transcript. See [Agents and RAG Guide](./agents-and-rag-guide.md) for the approval gate. -![Agents (HITL) phase — the agent_hitl_flow step card. In this local run the agent answered without calling save_scenario, so the step skipped by design; the one-click Approve button renders only when the agent triggers the gated save_scenario tool.](./img/agent-hitl-approve.png) +![Agents (HITL) phase — the agent_hitl_flow step card. In this local run the agent answered without calling save_scenario, so the step is skipped by design; the one-click Approve button renders only when the agent triggers the gated save_scenario tool.](./img/agent-hitl-approve.png) ### Phase: Ops snapshot From d832b70a433aac3ce126f7f1a393bb2f7c58d4cc Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 00:51:04 +0200 Subject: [PATCH 10/30] fix(agents): persist pending_action for gated tool calls (#336) --- app/features/agents/agents/experiment.py | 22 +++ app/features/agents/deps.py | 33 +++++ app/features/agents/service.py | 158 ++++++++++++++-------- app/features/agents/tests/test_service.py | 147 ++++++++++++++++++++ 4 files changed, 302 insertions(+), 58 deletions(-) diff --git a/app/features/agents/agents/experiment.py b/app/features/agents/agents/experiment.py index d9bad5bd..1b0139f6 100644 --- a/app/features/agents/agents/experiment.py +++ b/app/features/agents/agents/experiment.py @@ -322,6 +322,13 @@ async def tool_create_alias( # Check if approval is required if requires_approval("create_alias"): + # Record a machine-readable approval request so the service layer + # can persist pending_action + emit approval_required (#336). + ctx.deps.set_pending_action( + "create_alias", + {"alias_name": alias_name, "run_id": run_id, "description": description}, + f"Create alias '{alias_name}' pointing at run {run_id}", + ) return { "status": "approval_required", "action": "create_alias", @@ -366,6 +373,13 @@ async def tool_archive_run( # Check if approval is required if requires_approval("archive_run"): + # Record a machine-readable approval request so the service layer + # can persist pending_action + emit approval_required (#336). + ctx.deps.set_pending_action( + "archive_run", + {"run_id": run_id}, + f"Archive run {run_id}", + ) return { "status": "approval_required", "action": "archive_run", @@ -466,6 +480,14 @@ async def tool_save_scenario( # Check if approval is required — mirrors tool_create_alias exactly. if requires_approval("save_scenario"): + # Record a machine-readable approval request so the service layer + # can persist pending_action + emit approval_required (#336). The + # arguments dict is exactly what _execute_pending_action replays. + ctx.deps.set_pending_action( + "save_scenario", + arguments, + f"Save scenario plan '{name}' for store {store_id} / product {product_id}", + ) return { "status": "approval_required", "action": "save_scenario", diff --git a/app/features/agents/deps.py b/app/features/agents/deps.py index 23bcf1f8..3344ad67 100644 --- a/app/features/agents/deps.py +++ b/app/features/agents/deps.py @@ -7,6 +7,7 @@ from __future__ import annotations from dataclasses import dataclass, field +from typing import Any from sqlalchemy.ext.asyncio import AsyncSession @@ -23,14 +24,46 @@ class AgentDeps: session_id: Current agent session ID. request_id: Optional request correlation ID for logging. tool_call_count: Counter for tool calls in this run. + pending_action: Machine-readable HITL approval request recorded by a + gated tool when it short-circuits without persisting (#336). The + service layer reads this after the agent run to flip the session to + ``awaiting_approval`` and emit the ``approval_required`` event, + instead of relying on the model echoing the request into its + structured output. """ db: AsyncSession session_id: str request_id: str | None = None tool_call_count: int = field(default=0) + pending_action: dict[str, Any] | None = field(default=None) def increment_tool_calls(self) -> int: """Increment and return the tool call count.""" self.tool_call_count += 1 return self.tool_call_count + + def set_pending_action( + self, + action_type: str, + arguments: dict[str, Any], + description: str, + ) -> None: + """Record that a gated tool call needs human approval (HITL). + + Called by approval-gated tools (e.g. ``save_scenario``, ``create_alias``, + ``archive_run``) instead of persisting their effect. The ``arguments`` + dict must carry everything ``AgentService._execute_pending_action`` needs + to run the action once a human approves it. + + Args: + action_type: The gated action name (``create_alias`` / ``archive_run`` + / ``save_scenario``). + arguments: Arguments to replay when the action is approved. + description: Human-readable summary shown on the approval card. + """ + self.pending_action = { + "action_type": action_type, + "arguments": arguments, + "description": description, + } diff --git a/app/features/agents/service.py b/app/features/agents/service.py index 1b3c4644..cdc83882 100644 --- a/app/features/agents/service.py +++ b/app/features/agents/service.py @@ -314,9 +314,26 @@ async def chat( # NOTE: PydanticAI v1.48.0 uses result.output (not result.data) result_data: Any = result.output - # Check for pending_action in result data (primary trigger) + # Primary trigger (#336): a gated tool recorded a machine-readable + # approval request on deps. Deterministic — does not rely on the model + # echoing the request into its structured output (ExperimentReport has + # no pending_action field, so the legacy triggers below never fired). + if deps.pending_action: + pending_approval = True + pending_action = self._record_pending_action( + session, + action_type=str(deps.pending_action.get("action_type", "unknown")), + arguments=deps.pending_action.get("arguments") or {}, + description=str( + deps.pending_action.get("description") + or f"Agent requested approval for " + f"{deps.pending_action.get('action_type', 'unknown')}" + ), + now=now, + ) + # Legacy trigger: structured output carried a pending_action field. # The agent tools should return a pending_action dict with action_type and arguments - if hasattr(result_data, "pending_action") and result_data.pending_action: + elif hasattr(result_data, "pending_action") and result_data.pending_action: pending_approval = True pending_action_data = result_data.pending_action # Extract action details - support both dict and object with attributes @@ -335,33 +352,19 @@ async def chat( f"Agent requested approval for {action_type}", ) - session.pending_action = { - "action_id": uuid.uuid4().hex[:16], - "action_type": action_type, - "description": description, - "arguments": arguments, - "created_at": now.isoformat(), - "expires_at": ( - now + timedelta(minutes=self.settings.agent_approval_timeout_minutes) - ).isoformat(), - } - session.status = SessionStatus.AWAITING_APPROVAL.value - pending_action = self._format_pending_action(session.pending_action) + pending_action = self._record_pending_action( + session, action_type, arguments, description, now + ) # Fallback: check approval_required flag (legacy trigger) elif hasattr(result_data, "approval_required") and result_data.approval_required: pending_approval = True - session.pending_action = { - "action_id": uuid.uuid4().hex[:16], - "action_type": "unknown", - "description": "Agent requested approval for an action", - "arguments": {}, - "created_at": now.isoformat(), - "expires_at": ( - now + timedelta(minutes=self.settings.agent_approval_timeout_minutes) - ).isoformat(), - } - session.status = SessionStatus.AWAITING_APPROVAL.value - pending_action = self._format_pending_action(session.pending_action) + pending_action = self._record_pending_action( + session, + "unknown", + {}, + "Agent requested approval for an action", + now, + ) # Update session usage = result.usage() @@ -502,8 +505,28 @@ async def stream_chat( pending_approval = False stream_now = datetime.now(UTC) - # Check for pending_action in result data (primary trigger) - if hasattr(final_result, "pending_action") and final_result.pending_action: + # Primary trigger (#336): a gated tool recorded a + # machine-readable approval request on deps. Deterministic + # — the experiment agent's ExperimentReport output has no + # pending_action field, so the legacy triggers below never + # fired and the approval_required event was never emitted. + if deps.pending_action: + pending_approval = True + pending_action = self._record_pending_action( + session, + action_type=str(deps.pending_action.get("action_type", "unknown")), + arguments=deps.pending_action.get("arguments") or {}, + description=str( + deps.pending_action.get("description") + or "Agent requested approval for " + f"{deps.pending_action.get('action_type', 'unknown')}" + ), + now=stream_now, + ) + # Legacy trigger: structured output carried pending_action. + elif ( + hasattr(final_result, "pending_action") and final_result.pending_action + ): pending_approval = True pending_action_data = final_result.pending_action # Extract action details - support both dict and object with attributes @@ -522,42 +545,22 @@ async def stream_chat( f"Agent requested approval for {action_type}", ) - session.pending_action = { - "action_id": uuid.uuid4().hex[:16], - "action_type": action_type, - "description": description, - "arguments": arguments, - "created_at": stream_now.isoformat(), - "expires_at": ( - stream_now - + timedelta( - minutes=self.settings.agent_approval_timeout_minutes - ) - ).isoformat(), - } - session.status = SessionStatus.AWAITING_APPROVAL.value - pending_action = self._format_pending_action(session.pending_action) + pending_action = self._record_pending_action( + session, action_type, arguments, description, stream_now + ) # Fallback: check approval_required flag (legacy trigger) elif ( hasattr(final_result, "approval_required") and final_result.approval_required ): pending_approval = True - session.pending_action = { - "action_id": uuid.uuid4().hex[:16], - "action_type": "unknown", - "description": "Agent requested approval for an action", - "arguments": {}, - "created_at": stream_now.isoformat(), - "expires_at": ( - stream_now - + timedelta( - minutes=self.settings.agent_approval_timeout_minutes - ) - ).isoformat(), - } - session.status = SessionStatus.AWAITING_APPROVAL.value - pending_action = self._format_pending_action(session.pending_action) + pending_action = self._record_pending_action( + session, + "unknown", + {}, + "Agent requested approval for an action", + stream_now, + ) await db.flush() @@ -825,6 +828,45 @@ def _deserialize_messages( ) return [] + def _record_pending_action( + self, + session: AgentSession, + action_type: str, + arguments: dict[str, Any], + description: str, + now: datetime, + ) -> PendingAction | None: + """Persist a HITL approval request on the session and format it. + + Builds the canonical ``session.pending_action`` dict (fresh action_id + + expiry), flips the session to ``awaiting_approval``, and returns the + ``PendingAction`` schema for the response / stream event. Shared by the + deterministic deps-based trigger (#336) and the legacy structured-output + triggers so all three paths persist an identical shape. + + Args: + session: The agent session to mutate. + action_type: Gated action name. + arguments: Arguments to replay on approval. + description: Human-readable approval-card summary. + now: Timestamp used for created_at / expires_at. + + Returns: + The formatted PendingAction, or None if formatting fails. + """ + session.pending_action = { + "action_id": uuid.uuid4().hex[:16], + "action_type": action_type, + "description": description, + "arguments": arguments, + "created_at": now.isoformat(), + "expires_at": ( + now + timedelta(minutes=self.settings.agent_approval_timeout_minutes) + ).isoformat(), + } + session.status = SessionStatus.AWAITING_APPROVAL.value + return self._format_pending_action(session.pending_action) + def _format_pending_action( self, pending: dict[str, Any] | None, diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index 08064495..47b90c31 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -777,3 +777,150 @@ def test_increment_tool_calls(self, mock_db_session: AsyncMock) -> None: assert deps.tool_call_count == 1 deps.increment_tool_calls() assert deps.tool_call_count == 2 + + def test_set_pending_action_records_request(self, mock_db_session: AsyncMock) -> None: + """set_pending_action should record a machine-readable HITL request (#336).""" + deps = AgentDeps(db=mock_db_session, session_id="test-123") + assert deps.pending_action is None + + deps.set_pending_action( + "save_scenario", + {"name": "p", "run_id": "r", "store_id": 1, "product_id": 2}, + "Save scenario plan 'p'", + ) + + assert deps.pending_action is not None + assert deps.pending_action["action_type"] == "save_scenario" + assert deps.pending_action["arguments"]["run_id"] == "r" + assert deps.pending_action["description"] == "Save scenario plan 'p'" + + +class TestAgentServiceDepsApproval: + """Regression tests for #336 — gated tools propagate approval via deps. + + The experiment agent's structured output (ExperimentReport) carries no + pending_action/approval_required field, so a gated tool call (e.g. + save_scenario) used to leave the session ``active`` with no pending action + and no ``approval_required`` event. These assert the deterministic + deps-based path: tool -> deps.pending_action -> awaiting_approval -> + approval_required. + """ + + @staticmethod + def _save_scenario_pending(deps: AgentDeps) -> None: + """Simulate the gated save_scenario tool short-circuiting for approval.""" + deps.set_pending_action( + "save_scenario", + { + "name": "plan-a", + "run_id": "702c7ce74e9848d3b11f124a71bf7b50", + "store_id": 111, + "product_id": 339, + "horizon": 14, + "assumptions": {}, + "source": "agent", + "agent_session_id": deps.session_id, + }, + "Save scenario plan 'plan-a' for store 111 / product 339", + ) + + @pytest.mark.asyncio + async def test_chat_persists_pending_action_from_deps( + self, + sample_active_session: AgentSession, + sample_experiment_report: ExperimentReport, + ) -> None: + """chat() must persist deps.pending_action even when the output lacks one.""" + service = AgentService() + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + def _run(message: str, *, deps: AgentDeps, message_history: Any) -> MagicMock: + # A gated tool fired during the run and recorded the approval request. + self._save_scenario_pending(deps) + res = MagicMock() + res.output = sample_experiment_report # no pending_action field + usage = MagicMock() + usage.total_tokens = 7 + res.usage.return_value = usage + res.all_messages.return_value = [] + return res + + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=_run) + + with patch.object(service, "_get_agent", return_value=mock_agent): + response = await service.chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="Save a what-if scenario plan for run 702c...", + ) + + assert response.pending_approval is True + assert response.pending_action is not None + assert response.pending_action.action_type == "save_scenario" + assert response.pending_action.arguments["run_id"] == "702c7ce74e9848d3b11f124a71bf7b50" + assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value + assert sample_active_session.pending_action is not None + assert sample_active_session.pending_action["action_type"] == "save_scenario" + + @pytest.mark.asyncio + async def test_stream_chat_emits_approval_required_from_deps( + self, + sample_active_session: AgentSession, + sample_experiment_report: ExperimentReport, + ) -> None: + """stream_chat() must emit approval_required from deps.pending_action.""" + service = AgentService() + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + report = sample_experiment_report + + class _StubStream: + async def __aenter__(self) -> MagicMock: + stream = MagicMock() + + async def _stream_text() -> AsyncIterator[str]: + # Structured-output agents cannot stream text deltas; mirror + # that by yielding nothing. + return + yield # pragma: no cover + + stream.stream_text = _stream_text + stream.get_output = AsyncMock(return_value=report) + usage = MagicMock() + usage.total_tokens = 9 + stream.usage.return_value = usage + stream.all_messages.return_value = [] + return stream + + async def __aexit__(self, *exc: object) -> bool: + return False + + def _run_stream(message: str, *, deps: AgentDeps, message_history: Any) -> _StubStream: + self._save_scenario_pending(deps) + return _StubStream() + + mock_agent = MagicMock() + mock_agent.run_stream = MagicMock(side_effect=_run_stream) + + with patch.object(service, "_get_agent", return_value=mock_agent): + events = [ + event + async for event in service.stream_chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="Save a what-if scenario plan for run 702c...", + ) + ] + + approval_events = [e for e in events if e.event_type == "approval_required"] + assert len(approval_events) == 1 + assert approval_events[0].data["action"].action_type == "save_scenario" + assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value + assert sample_active_session.pending_action is not None From 5af7e26f2af9cb0d4e923b07eb288feb2f8567ae Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 02:10:22 +0200 Subject: [PATCH 11/30] fix(api): honor feature_frame_version >= 3 in ops + registry (#338) --- app/features/ops/service.py | 5 ++- app/features/ops/tests/test_service.py | 43 +++++++++++++++++---- app/features/registry/service.py | 5 ++- app/features/registry/tests/test_service.py | 15 +++++-- 4 files changed, 55 insertions(+), 13 deletions(-) diff --git a/app/features/ops/service.py b/app/features/ops/service.py index 43c59318..4ec3af78 100644 --- a/app/features/ops/service.py +++ b/app/features/ops/service.py @@ -154,7 +154,10 @@ def _run_feature_frame_version(run: ModelRun) -> int: """ info = run.runtime_info or {} value = info.get("feature_frame_version") - if isinstance(value, int) and value in (1, 2): + # Honor any positive int V (feature_frame_version is an opaque incrementing + # integer per docs/_base/DOMAIN_MODEL.md). bool is excluded because it + # subclasses int. Missing / invalid value -> V=1 back-compat (#338). + if isinstance(value, int) and not isinstance(value, bool) and value >= 1: return value return 1 diff --git a/app/features/ops/tests/test_service.py b/app/features/ops/tests/test_service.py index fcfb61a7..8a4c107c 100644 --- a/app/features/ops/tests/test_service.py +++ b/app/features/ops/tests/test_service.py @@ -188,14 +188,23 @@ def test_run_feature_frame_version_reads_runtime_info() -> None: assert _run_feature_frame_version(_make_run(run_id="b")) == 1 -def test_run_feature_frame_version_rejects_unsupported_value() -> None: - """Unknown int (e.g. 3) or non-int values fall back to V=1 (defensive).""" - legacy_explicit_v3 = _make_run(run_id="bad-int") - legacy_explicit_v3.runtime_info = {"feature_frame_version": 3} - legacy_str = _make_run(run_id="bad-str") - legacy_str.runtime_info = {"feature_frame_version": "2"} - assert _run_feature_frame_version(legacy_explicit_v3) == 1 - assert _run_feature_frame_version(legacy_str) == 1 +def test_run_feature_frame_version_honors_any_positive_int() -> None: + """Any positive int V is honored (e.g. 3); non-int / non-positive / bool -> V=1. + + Regression for #338: feature_frame_version is an opaque incrementing integer + (docs/_base/DOMAIN_MODEL.md), so V>=3 must NOT be clamped to 1 — the showcase + stale_alias_trigger step registers a V=3 run to fire the + feature_frame_version_mismatch verdict. + """ + v3 = _make_run(run_id="v3") + v3.runtime_info = {"feature_frame_version": 3} + assert _run_feature_frame_version(v3) == 3 + + # Non-int / non-positive / bool all fall back to V=1. + for bad in ("2", 0, -1, True): + run = _make_run(run_id=f"bad-{bad!r}") + run.runtime_info = {"feature_frame_version": bad} + assert _run_feature_frame_version(run) == 1 def test_alias_staleness_legacy_run_treated_as_v1_no_spurious_mismatch() -> None: @@ -241,6 +250,24 @@ def test_alias_staleness_v_mismatch_wins_over_newer_run() -> None: assert comparable_v == 2 +def test_alias_staleness_v1_alias_v3_latest_reports_mismatch() -> None: + """A V1 alias with a newer V3 comparable reports MISMATCH, not NEWER (#338). + + Mirrors the showcase stale_alias_trigger scenario: the demo-production alias + points at a V1 run while the grain's newest run is V=3. Before #338 the V=3 + latest was clamped to V=1, so this fell through to NEWER_SUCCESS_RUN. + """ + older = datetime(2026, 1, 1, tzinfo=UTC) + newer = datetime(2026, 5, 1, tzinfo=UTC) + run = _make_run(run_id="v1-alias", created_at=older, feature_frame_version=1) + latest = _make_run(run_id="v3-latest", created_at=newer, feature_frame_version=3) + is_stale, reason, alias_v, comparable_v = _alias_staleness(run, {(1, 1): latest}) + assert is_stale is True + assert reason == StaleReason.FEATURE_FRAME_VERSION_MISMATCH.value + assert alias_v == 1 + assert comparable_v == 3 + + def test_alias_staleness_same_v_newer_run_uses_newer_reason() -> None: """V matches but the comparable is newer → NEWER_SUCCESS_RUN reason.""" older = datetime(2026, 1, 1, tzinfo=UTC) diff --git a/app/features/registry/service.py b/app/features/registry/service.py index 503c45d2..37e8a3fe 100644 --- a/app/features/registry/service.py +++ b/app/features/registry/service.py @@ -649,7 +649,10 @@ def _extract_feature_frame_version( if not runtime_info_extras: return 1 value = runtime_info_extras.get("feature_frame_version") - if isinstance(value, int) and value in (1, 2): + # Honor any positive int V (feature_frame_version is an opaque + # incrementing integer per docs/_base/DOMAIN_MODEL.md). bool is excluded + # because it subclasses int. Missing / invalid value -> V=1 (#338). + if isinstance(value, int) and not isinstance(value, bool) and value >= 1: return value return 1 diff --git a/app/features/registry/tests/test_service.py b/app/features/registry/tests/test_service.py index abd2a2ce..1014d7bc 100644 --- a/app/features/registry/tests/test_service.py +++ b/app/features/registry/tests/test_service.py @@ -165,10 +165,19 @@ def test_extract_feature_frame_version_explicit_v2(self) -> None: """Explicit feature_frame_version=2 round-trips.""" assert RegistryService._extract_feature_frame_version({"feature_frame_version": 2}) == 2 - def test_extract_feature_frame_version_rejects_unsupported_value(self) -> None: - """Unknown int (e.g. 3) and non-int (e.g. '2') fall back to V1.""" - assert RegistryService._extract_feature_frame_version({"feature_frame_version": 3}) == 1 + def test_extract_feature_frame_version_honors_any_positive_int(self) -> None: + """Any positive int V is honored (e.g. 3); non-int / non-positive / bool -> V1. + + Regression for #338: feature_frame_version is an opaque incrementing + integer, so V>=3 must not be clamped to 1 (the showcase + stale_alias_trigger step registers a V=3 run). + """ + assert RegistryService._extract_feature_frame_version({"feature_frame_version": 3}) == 3 + assert RegistryService._extract_feature_frame_version({"feature_frame_version": 7}) == 7 + # Non-int / non-positive / bool all fall back to V1. assert RegistryService._extract_feature_frame_version({"feature_frame_version": "2"}) == 1 + assert RegistryService._extract_feature_frame_version({"feature_frame_version": 0}) == 1 + assert RegistryService._extract_feature_frame_version({"feature_frame_version": True}) == 1 class TestRegistryServiceConfigDiff: From 985ef8e6e0453e52d0e22d5a3f3517489677477c Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 02:22:55 +0200 Subject: [PATCH 12/30] fix(api): treat ollama agent provider as key-present in showcase (#340) --- app/features/demo/pipeline.py | 8 +++++- app/features/demo/tests/test_pipeline.py | 35 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index 3eb64533..041d5361 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -280,15 +280,21 @@ def _model_config_payload(model_type: str) -> dict[str, Any]: def _llm_key_present() -> bool: - """Return True when the configured agent model's provider API key is set. + """Return True when the configured agent model's provider can be used. Matches the provider prefix of ``agent_default_model`` so the agent step skips gracefully when its provider is unreachable. Logs key PRESENCE only, never the value (port of run_demo.py:317-335; see security-patterns.md). + + The local ``ollama`` provider needs no API key (#340), so it always returns + True — the agent step still degrades gracefully if Ollama is unreachable + (the chat round-trip fails and the step skips via its error path). """ settings = get_settings() model = settings.agent_default_model provider = model.split(":", 1)[0] if ":" in model else "" + if provider == "ollama": + return True if provider == "anthropic": return bool(settings.anthropic_api_key) if provider == "openai": diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index 971e1dd7..5f73a8c8 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -1801,6 +1801,41 @@ async def request( return _HitlClient(event_sink=intermediate), intermediate +def test_llm_key_present_ollama_needs_no_key(monkeypatch): + """#340 — the local ollama provider needs no API key, so the gate is True. + + Without this, a local-Ollama stack (agent_default_model=ollama:*) makes the + showcase agent_hitl_flow / agent steps skip with "no API key matching + agent_default_model provider" even though Ollama is reachable. + """ + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: SimpleNamespace( + agent_default_model="ollama:qwen3:8b", + anthropic_api_key="", + openai_api_key="", + google_api_key="", + ), + ) + assert pipeline._llm_key_present() is True + + +def test_llm_key_present_cloud_still_requires_key(monkeypatch): + """Regression guard for #340 — a cloud provider still requires its key.""" + monkeypatch.setattr( + pipeline, + "get_settings", + lambda: SimpleNamespace( + agent_default_model="openai:gpt-4.1-mini", + anthropic_api_key="", + openai_api_key="", + google_api_key="", + ), + ) + assert pipeline._llm_key_present() is False + + async def test_agent_hitl_flow_happy_path(monkeypatch, tmp_path): """PRP-41 — full HITL round-trip: chat -> intermediate -> approve -> pass.""" monkeypatch.setattr( From 535946c1deb338273256d186bd2419a3c5970224 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 02:40:24 +0200 Subject: [PATCH 13/30] fix(agents): non-streaming fallback for ollama agent chat (#342) --- app/features/agents/service.py | 288 ++++++++++++---------- app/features/agents/tests/test_service.py | 126 ++++++++++ 2 files changed, 285 insertions(+), 129 deletions(-) diff --git a/app/features/agents/service.py b/app/features/agents/service.py index cdc83882..20625e01 100644 --- a/app/features/agents/service.py +++ b/app/features/agents/service.py @@ -459,153 +459,183 @@ async def stream_chat( agent_type=session.agent_type, ) - # Stream the response + # Stream the response. Ollama's OpenAI-compat endpoint rejects + # PydanticAI's streamed request with 400 "invalid message content type: + # " (#342), while the non-streaming run() path works — so fall back + # to run() for the ollama provider and emit the result as a single + # text_delta plus the usual approval/complete events. Cloud providers + # keep the true token-streaming path. + default_model = self.settings.agent_default_model + provider = default_model.split(":", 1)[0] if ":" in default_model else "" + stream_supported = provider != "ollama" try: with _sequential_tool_execution(): async with asyncio.timeout(self.settings.agent_timeout_seconds): - async with agent.run_stream( - message, - deps=deps, - message_history=message_history, - ) as result: - try: - async for text in result.stream_text(): - yield StreamEvent( - event_type="text_delta", - data={"delta": text}, - timestamp=datetime.now(UTC), + final_result: Any + usage: Any + all_messages: list[ModelMessage] + + if stream_supported: + async with agent.run_stream( + message, + deps=deps, + message_history=message_history, + ) as result: + try: + async for text in result.stream_text(): + yield StreamEvent( + event_type="text_delta", + data={"delta": text}, + timestamp=datetime.now(UTC), + ) + except Exception as e: + # Structured output agents (output_type=...) cannot + # stream raw text deltas. Skip delta streaming and + # only emit the final complete event. + logger.info( + "agents.stream_chat_text_delta_unavailable", + session_id=session_id, + error=str(e), + error_type=type(e).__name__, ) - except Exception as e: - # Structured output agents (output_type=...) cannot stream raw text deltas. - # In that case we skip delta streaming and only emit the final complete event. - logger.info( - "agents.stream_chat_text_delta_unavailable", - session_id=session_id, - error=str(e), - error_type=type(e).__name__, + # NOTE: PydanticAI exposes get_output() on StreamedRunResult. + final_result = await result.get_output() + usage = result.usage() + all_messages = result.all_messages() + else: + # #342 — non-streaming fallback for the ollama provider. + run_result = await agent.run( + message, + deps=deps, + message_history=message_history, + ) + final_result = run_result.output + usage = run_result.usage() + all_messages = run_result.all_messages() + + # Update session (shared by both paths) + session.message_history = self._serialize_messages(all_messages) + session.total_tokens_used += usage.total_tokens or 0 + session.tool_calls_count += deps.tool_call_count + session.last_activity = datetime.now(UTC) + session.expires_at = session.last_activity + timedelta( + minutes=self.settings.agent_session_ttl_minutes + ) + + await db.flush() + + # Check for pending approval actions (mirror chat() logic) + pending_action = None + pending_approval = False + stream_now = datetime.now(UTC) + + # Primary trigger (#336): a gated tool recorded a + # machine-readable approval request on deps. Deterministic + # — the experiment agent's ExperimentReport output has no + # pending_action field, so the legacy triggers below never + # fired and the approval_required event was never emitted. + if deps.pending_action: + pending_approval = True + pending_action = self._record_pending_action( + session, + action_type=str(deps.pending_action.get("action_type", "unknown")), + arguments=deps.pending_action.get("arguments") or {}, + description=str( + deps.pending_action.get("description") + or "Agent requested approval for " + f"{deps.pending_action.get('action_type', 'unknown')}" + ), + now=stream_now, + ) + # Legacy trigger: structured output carried pending_action. + elif hasattr(final_result, "pending_action") and final_result.pending_action: + pending_approval = True + pending_action_data = final_result.pending_action + # Extract action details - support both dict and object with attributes + if isinstance(pending_action_data, dict): + action_type = pending_action_data.get("action_type", "unknown") + arguments = pending_action_data.get("arguments", {}) + description = pending_action_data.get( + "description", f"Agent requested approval for {action_type}" + ) + else: + action_type = getattr(pending_action_data, "action_type", "unknown") + arguments = getattr(pending_action_data, "arguments", {}) + description = getattr( + pending_action_data, + "description", + f"Agent requested approval for {action_type}", ) - # Get final result and update session - # NOTE: PydanticAI v1.48 exposes get_output() on StreamedRunResult. - final_result: Any = await result.get_output() - usage = result.usage() - - session.message_history = self._serialize_messages(result.all_messages()) - session.total_tokens_used += usage.total_tokens or 0 - session.tool_calls_count += deps.tool_call_count - session.last_activity = datetime.now(UTC) - session.expires_at = session.last_activity + timedelta( - minutes=self.settings.agent_session_ttl_minutes + pending_action = self._record_pending_action( + session, action_type, arguments, description, stream_now + ) + # Fallback: check approval_required flag (legacy trigger) + elif ( + hasattr(final_result, "approval_required") + and final_result.approval_required + ): + pending_approval = True + pending_action = self._record_pending_action( + session, + "unknown", + {}, + "Agent requested approval for an action", + stream_now, ) - await db.flush() - - # Check for pending approval actions (mirror chat() logic) - pending_action = None - pending_approval = False - stream_now = datetime.now(UTC) - - # Primary trigger (#336): a gated tool recorded a - # machine-readable approval request on deps. Deterministic - # — the experiment agent's ExperimentReport output has no - # pending_action field, so the legacy triggers below never - # fired and the approval_required event was never emitted. - if deps.pending_action: - pending_approval = True - pending_action = self._record_pending_action( - session, - action_type=str(deps.pending_action.get("action_type", "unknown")), - arguments=deps.pending_action.get("arguments") or {}, - description=str( - deps.pending_action.get("description") - or "Agent requested approval for " - f"{deps.pending_action.get('action_type', 'unknown')}" - ), - now=stream_now, - ) - # Legacy trigger: structured output carried pending_action. - elif ( - hasattr(final_result, "pending_action") and final_result.pending_action - ): - pending_approval = True - pending_action_data = final_result.pending_action - # Extract action details - support both dict and object with attributes - if isinstance(pending_action_data, dict): - action_type = pending_action_data.get("action_type", "unknown") - arguments = pending_action_data.get("arguments", {}) - description = pending_action_data.get( - "description", f"Agent requested approval for {action_type}" - ) - else: - action_type = getattr(pending_action_data, "action_type", "unknown") - arguments = getattr(pending_action_data, "arguments", {}) - description = getattr( - pending_action_data, - "description", - f"Agent requested approval for {action_type}", - ) + await db.flush() - pending_action = self._record_pending_action( - session, action_type, arguments, description, stream_now - ) - # Fallback: check approval_required flag (legacy trigger) + # Build the response text (shared by both paths). + response_message: str = "No response generated." + if final_result: + if hasattr(final_result, "answer") and final_result.answer: + response_message = str(final_result.answer) + elif hasattr(final_result, "summary") and final_result.summary: + response_message = str(final_result.summary) elif ( - hasattr(final_result, "approval_required") - and final_result.approval_required + hasattr(final_result, "recommendations") + and final_result.recommendations ): - pending_approval = True - pending_action = self._record_pending_action( - session, - "unknown", - {}, - "Agent requested approval for an action", - stream_now, - ) - - await db.flush() - - # If approval is required, emit approval_required event - if pending_approval and pending_action: - yield StreamEvent( - event_type="approval_required", - data={ - "action": pending_action, - "message": "Human approval required before proceeding.", - }, - timestamp=stream_now, - ) - - # Yield completion event - response_message: str = "No response generated." - if final_result: - if hasattr(final_result, "answer") and final_result.answer: - response_message = str(final_result.answer) - elif hasattr(final_result, "summary") and final_result.summary: - response_message = str(final_result.summary) - elif ( - hasattr(final_result, "recommendations") - and final_result.recommendations - ): - recommendations = final_result.recommendations - if isinstance(recommendations, list) and recommendations: - response_message = "\n".join( - str(item) for item in recommendations - ) - else: - response_message = str(final_result) + recommendations = final_result.recommendations + if isinstance(recommendations, list) and recommendations: + response_message = "\n".join(str(item) for item in recommendations) else: response_message = str(final_result) + else: + response_message = str(final_result) + # #342 — the ollama (non-streaming) path produced no token + # deltas; emit the full text once so the FE renders the reply. + # Cloud streaming behavior is unchanged. + if not stream_supported and response_message != "No response generated.": yield StreamEvent( - event_type="complete", + event_type="text_delta", + data={"delta": response_message}, + timestamp=datetime.now(UTC), + ) + + # If approval is required, emit approval_required event + if pending_approval and pending_action: + yield StreamEvent( + event_type="approval_required", data={ - "message": response_message, - "tokens_used": usage.total_tokens or 0, - "tool_calls_count": deps.tool_call_count, - "pending_approval": pending_approval, + "action": pending_action, + "message": "Human approval required before proceeding.", }, - timestamp=datetime.now(UTC), + timestamp=stream_now, ) + + yield StreamEvent( + event_type="complete", + data={ + "message": response_message, + "tokens_used": usage.total_tokens or 0, + "tool_calls_count": deps.tool_call_count, + "pending_approval": pending_approval, + }, + timestamp=datetime.now(UTC), + ) except TimeoutError as e: raise TimeoutError( f"Agent response timed out after {self.settings.agent_timeout_seconds} seconds" diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index 47b90c31..74709be2 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -386,6 +386,7 @@ class TestAgentServiceStreamChat: async def test_stream_chat_model_misbehavior_yields_error_event( self, sample_active_session: AgentSession, + monkeypatch: pytest.MonkeyPatch, ) -> None: """A misbehaving model should yield a recoverable `error` event, not crash. @@ -394,6 +395,9 @@ async def test_stream_chat_model_misbehavior_yields_error_event( raw exception string to the client. """ service = AgentService() + # Pin a streaming-capable (cloud) provider so this exercises the + # run_stream path regardless of the local .env (#342). + monkeypatch.setattr(service.settings, "agent_default_model", "anthropic:claude-test") mock_db = AsyncMock() mock_result = MagicMock() @@ -434,6 +438,7 @@ async def __aexit__(self, *exc: object) -> bool: async def test_stream_chat_runs_tools_sequentially( self, sample_active_session: AgentSession, + monkeypatch: pytest.MonkeyPatch, ) -> None: """stream_chat() must also run the agent under sequential tool execution. @@ -442,6 +447,9 @@ async def test_stream_chat_runs_tools_sequentially( concurrent-session bug from issue #172. """ service = AgentService() + # Pin a streaming-capable (cloud) provider so this exercises the + # run_stream path regardless of the local .env (#342). + monkeypatch.setattr(service.settings, "agent_default_model", "anthropic:claude-test") mock_db = AsyncMock() mock_result = MagicMock() @@ -484,6 +492,120 @@ async def __aexit__(self, *exc: object) -> bool: mock_mode.assert_called_once_with("sequential") + @pytest.mark.asyncio + async def test_stream_chat_ollama_uses_nonstreaming_path( + self, + sample_active_session: AgentSession, + sample_experiment_report: ExperimentReport, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """#342 — an ollama agent uses agent.run() (not run_stream). + + Ollama's OpenAI-compat endpoint rejects PydanticAI's streamed request + with 400 "invalid message content type: ". The service must fall + back to the non-streaming run() path and still emit text_delta + + approval_required (from deps.pending_action, #336) + complete. + """ + service = AgentService() + monkeypatch.setattr(service.settings, "agent_default_model", "ollama:qwen3:8b") + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + def _run(message: str, *, deps: AgentDeps, message_history: Any) -> MagicMock: + # A gated tool fired during the run and recorded an approval request. + deps.set_pending_action( + "save_scenario", + {"name": "p", "run_id": "r", "store_id": 1, "product_id": 2}, + "Save scenario plan 'p'", + ) + res = MagicMock() + res.output = sample_experiment_report # has a non-empty summary + usage = MagicMock() + usage.total_tokens = 11 + res.usage.return_value = usage + res.all_messages.return_value = [] + return res + + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=_run) + mock_agent.run_stream = MagicMock( + side_effect=AssertionError("run_stream must not be called for the ollama provider") + ) + + with patch.object(service, "_get_agent", return_value=mock_agent): + events = [ + event + async for event in service.stream_chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="Save a what-if scenario plan", + ) + ] + + types = [e.event_type for e in events] + assert "text_delta" in types # full reply emitted as one delta + assert "approval_required" in types + assert types[-1] == "complete" + approval = next(e for e in events if e.event_type == "approval_required") + assert approval.data["action"].action_type == "save_scenario" + mock_agent.run.assert_awaited_once() + mock_agent.run_stream.assert_not_called() + assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value + + @pytest.mark.asyncio + async def test_stream_chat_cloud_keeps_streaming_path( + self, + sample_active_session: AgentSession, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Regression guard for #342 — a cloud provider keeps the run_stream path.""" + service = AgentService() + monkeypatch.setattr(service.settings, "agent_default_model", "anthropic:claude-test") + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + class _StubStream: + async def __aenter__(self) -> MagicMock: + stream = MagicMock() + + async def _stream_text() -> AsyncIterator[str]: + yield "hello" + + stream.stream_text = _stream_text + stream.get_output = AsyncMock(return_value=None) + usage = MagicMock() + usage.total_tokens = 1 + stream.usage.return_value = usage + stream.all_messages.return_value = [] + return stream + + async def __aexit__(self, *exc: object) -> bool: + return False + + mock_agent = MagicMock() + mock_agent.run_stream = MagicMock(return_value=_StubStream()) + mock_agent.run = AsyncMock( + side_effect=AssertionError("run must not be called for a cloud provider") + ) + + with patch.object(service, "_get_agent", return_value=mock_agent): + events = [ + event + async for event in service.stream_chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="hello", + ) + ] + + mock_agent.run_stream.assert_called_once() + mock_agent.run.assert_not_called() + assert any(e.event_type == "complete" for e in events) + class TestAgentServiceApproval: """Tests for approval workflow.""" @@ -871,9 +993,13 @@ async def test_stream_chat_emits_approval_required_from_deps( self, sample_active_session: AgentSession, sample_experiment_report: ExperimentReport, + monkeypatch: pytest.MonkeyPatch, ) -> None: """stream_chat() must emit approval_required from deps.pending_action.""" service = AgentService() + # Pin a streaming-capable (cloud) provider so this exercises the + # run_stream path regardless of the local .env (#342). + monkeypatch.setattr(service.settings, "agent_default_model", "anthropic:claude-test") mock_db = AsyncMock() mock_result = MagicMock() mock_result.scalar_one_or_none.return_value = sample_active_session From 5fff33421b1442b026e1cd34fe118f467ca001b8 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 03:21:37 +0200 Subject: [PATCH 14/30] fix(agents): sanitize null content for ollama chat requests (#344) Ollama's OpenAI-compatible /v1/chat/completions rejects any message whose content is JSON null and carries no tool_calls (400 'invalid message content type: '). A weak local model emits that shape for an empty assistant turn and PydanticAI replays it on retry, so every retry 400s and the run dies with FallbackExceptionGroup. Inject a sanitizing httpx transport into the OllamaProvider client that coerces outgoing content:null to content:"". --- app/features/agents/agents/base.py | 81 ++++++++++++++- app/features/agents/tests/test_base.py | 135 +++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 2 deletions(-) diff --git a/app/features/agents/agents/base.py b/app/features/agents/agents/base.py index f9f4e1a0..90c433b6 100644 --- a/app/features/agents/agents/base.py +++ b/app/features/agents/agents/base.py @@ -7,10 +7,12 @@ import functools import inspect +import json import os from collections.abc import Awaitable, Callable -from typing import Any +from typing import Any, cast +import httpx import structlog from pydantic_ai import ModelRetry from pydantic_ai.models import Model @@ -62,6 +64,71 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> ToolReturnT: return wrapper +def _coerce_null_message_content(body: bytes) -> bytes | None: + """Coerce ``messages[*].content: null`` -> ``""`` in a chat-request body. + + Ollama's OpenAI-compatible ``/v1/chat/completions`` rejects any message + whose ``content`` is JSON ``null`` and which carries no ``tool_calls`` with + ``400 invalid message content type: `` — stricter than the real OpenAI + API, which tolerates it. A weak local model can emit a degenerate empty + assistant turn (no text, no tool call); PydanticAI serialises it as + ``content: null`` and then *replays* that message on its validation-retry, + so every retry 400s and the whole run dies with a ``FallbackExceptionGroup``. + Coercing ``null`` -> ``""`` keeps the message OpenAI-spec-valid and lets the + retry loop proceed. + + Args: + body: The raw outgoing request body bytes. + + Returns: + Re-serialised body bytes when a null ``content`` was rewritten, or + ``None`` when nothing changed (the common case) so the caller can + forward the original request untouched. + """ + try: + parsed = json.loads(body) + except (ValueError, TypeError): + return None + if not isinstance(parsed, dict): + return None + payload = cast("dict[str, Any]", parsed) + messages = payload.get("messages") + if not isinstance(messages, list): + return None + message_list: list[Any] = messages + changed = False + for message in message_list: + if isinstance(message, dict) and "content" in message and message["content"] is None: + message["content"] = "" + changed = True + if not changed: + return None + return json.dumps(payload).encode("utf-8") + + +class _OllamaNullContentTransport(httpx.AsyncHTTPTransport): + """httpx transport that null-content-sanitises outgoing Ollama requests. + + See :func:`_coerce_null_message_content` for the Ollama-compat defect this + works around. Applied to the ``OllamaProvider``'s HTTP client so the fix + covers both the streaming and non-streaming agent paths. + """ + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + sanitized = _coerce_null_message_content(request.content) + if sanitized is not None: + headers = dict(request.headers) + headers.pop("content-length", None) # httpx recomputes from the new body + request = httpx.Request( + request.method, + request.url, + headers=headers, + content=sanitized, + extensions=request.extensions, + ) + return await super().handle_async_request(request) + + def build_agent_model(identifier: str) -> str | Model: """Build the PydanticAI ``model`` argument for an agent identifier. @@ -85,7 +152,17 @@ def build_agent_model(identifier: str) -> str | Model: model_name = identifier.split(":", 1)[1] # CRITICAL: Ollama's OpenAI-compatible base ends in /v1. base_url = settings.ollama_base_url.rstrip("/") + "/v1" - return OpenAIChatModel(model_name, provider=OllamaProvider(base_url=base_url)) + # The null-content sanitiser lives on the HTTP client (see + # _OllamaNullContentTransport). A generous read timeout is required because + # local generation on an 8B model routinely exceeds httpx's 5s default. + http_client = httpx.AsyncClient( + transport=_OllamaNullContentTransport(), + timeout=httpx.Timeout(600.0, connect=10.0), + ) + return OpenAIChatModel( + model_name, + provider=OllamaProvider(base_url=base_url, http_client=http_client), + ) def reset_agent_caches() -> None: diff --git a/app/features/agents/tests/test_base.py b/app/features/agents/tests/test_base.py index ddfcd02b..c7d8d29a 100644 --- a/app/features/agents/tests/test_base.py +++ b/app/features/agents/tests/test_base.py @@ -1,10 +1,12 @@ """Unit tests for agent base helpers (Ollama-aware model factory).""" +import json import re from collections.abc import Iterator from typing import Any, cast from unittest.mock import AsyncMock +import httpx import pytest from pydantic_ai import ModelRetry from pydantic_ai.messages import ModelMessage, ModelResponse, TextPart @@ -15,6 +17,8 @@ from app.core.config import get_settings from app.features.agents.agents.base import ( TOOL_USAGE_INSTRUCTIONS, + _coerce_null_message_content, + _OllamaNullContentTransport, build_agent_model, build_agent_model_with_fallback, get_agent_retries, @@ -322,3 +326,134 @@ def respond(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: assert captured["output_tools"] == [] assert isinstance(result.output, RAGAnswer) assert result.output.confidence == "high" + + +class TestOllamaNullContentSanitizer: + """The Ollama HTTP client must convert ``content: null`` -> ``""`` (#344). + + Ollama's OpenAI-compatible ``/v1/chat/completions`` rejects any message + whose ``content`` is JSON ``null`` and carries no ``tool_calls`` with + ``400 invalid message content type: ``. PydanticAI emits that shape for + a degenerate empty assistant turn and then replays it on retry, so without + this coercion every retry 400s and the run dies with ``FallbackExceptionGroup``. + """ + + def test_coerce_rewrites_null_content_to_empty_string(self) -> None: + body = json.dumps( + { + "model": "qwen3:8b", + "messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": None}, + ], + } + ).encode("utf-8") + + out = _coerce_null_message_content(body) + + assert out is not None + payload = json.loads(out) + assert payload["messages"][1]["content"] == "" + # Untouched fields survive the round-trip. + assert payload["messages"][0]["content"] == "hi" + assert payload["model"] == "qwen3:8b" + + def test_coerce_rewrites_null_content_even_with_tool_calls(self) -> None: + body = json.dumps( + { + "messages": [ + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "c1", + "type": "function", + "function": {"name": "x", "arguments": "{}"}, + } + ], + } + ] + } + ).encode("utf-8") + + out = _coerce_null_message_content(body) + + assert out is not None + payload = json.loads(out) + assert payload["messages"][0]["content"] == "" + assert payload["messages"][0]["tool_calls"][0]["id"] == "c1" + + def test_coerce_is_noop_when_no_null_content(self) -> None: + body = json.dumps({"messages": [{"role": "user", "content": "hi"}]}).encode("utf-8") + + assert _coerce_null_message_content(body) is None + + def test_coerce_ignores_missing_content_key(self) -> None: + # A message with no ``content`` key at all must not be rewritten — only + # an explicit JSON null is the Ollama-rejected shape. + body = json.dumps({"messages": [{"role": "assistant", "tool_calls": []}]}).encode("utf-8") + + assert _coerce_null_message_content(body) is None + + def test_coerce_handles_non_json_body(self) -> None: + assert _coerce_null_message_content(b"not json at all") is None + + def test_coerce_handles_non_dict_payload(self) -> None: + assert _coerce_null_message_content(b"[1, 2, 3]") is None + + @pytest.mark.asyncio + async def test_transport_sanitizes_outgoing_request( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """The transport rewrites the body and fixes Content-Length before send.""" + captured: dict[str, bytes] = {} + + async def fake_send( + _self: httpx.AsyncHTTPTransport, request: httpx.Request + ) -> httpx.Response: + captured["body"] = request.content + captured["content_length"] = request.headers["content-length"].encode() + return httpx.Response(200, json={"ok": True}) + + monkeypatch.setattr(httpx.AsyncHTTPTransport, "handle_async_request", fake_send) + + transport = _OllamaNullContentTransport() + body = json.dumps({"messages": [{"role": "assistant", "content": None}]}).encode("utf-8") + request = httpx.Request("POST", "http://ollama/v1/chat/completions", content=body) + + await transport.handle_async_request(request) + + sent = json.loads(captured["body"]) + assert sent["messages"][0]["content"] == "" + # Content-Length must match the rewritten body, not the original. + assert int(captured["content_length"]) == len(captured["body"]) + + @pytest.mark.asyncio + async def test_transport_passthrough_when_nothing_to_sanitize( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + captured: dict[str, bytes] = {} + + async def fake_send( + _self: httpx.AsyncHTTPTransport, request: httpx.Request + ) -> httpx.Response: + captured["body"] = request.content + return httpx.Response(200, json={"ok": True}) + + monkeypatch.setattr(httpx.AsyncHTTPTransport, "handle_async_request", fake_send) + + transport = _OllamaNullContentTransport() + body = json.dumps({"messages": [{"role": "user", "content": "hi"}]}).encode("utf-8") + request = httpx.Request("POST", "http://ollama/v1/chat/completions", content=body) + + await transport.handle_async_request(request) + + # Forwarded unchanged. + assert json.loads(captured["body"])["messages"][0]["content"] == "hi" + + def test_build_agent_model_returns_openai_chat_model_for_ollama(self) -> None: + # The Ollama branch must hand back a configured OpenAIChatModel (whose + # HTTP client carries the sanitizing transport), not the bare identifier. + model = build_agent_model("ollama:qwen3:8b") + assert isinstance(model, OpenAIChatModel) From cc72f89e1fbdf686964635bdb8eddc4810dae9e8 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 03:21:48 +0200 Subject: [PATCH 15/30] fix(agents): preserve pending approval when ollama model misbehaves (#344) A gated tool (create_alias/archive_run/save_scenario) records deps.pending_action the moment it fires, but does not halt the run. A weak model can ramble past the gate and exhaust its retry budget, so agent.run raises UnexpectedModelBehavior before returning and the post-run approval-surfacing path never runs. The captured approval is valid, so surface it: chat() and stream_chat()'s UnexpectedModelBehavior handlers now check deps.pending_action first and emit the approval (ChatResponse pending_approval / approval_required event) before falling back to the generic error. --- app/features/agents/service.py | 87 ++++++++++++++++++++- app/features/agents/tests/test_service.py | 93 +++++++++++++++++++++++ 2 files changed, 178 insertions(+), 2 deletions(-) diff --git a/app/features/agents/service.py b/app/features/agents/service.py index 20625e01..8009ea08 100644 --- a/app/features/agents/service.py +++ b/app/features/agents/service.py @@ -290,8 +290,23 @@ async def chat( error=str(e), error_type=type(e).__name__, ) - session.last_activity = datetime.now(UTC) + misbehavior_now = datetime.now(UTC) + session.last_activity = misbehavior_now + # A gated tool may have fired (and recorded a valid approval request) + # before the model misbehaved — surface the Approve card rather than + # discarding it behind the generic error (#344). + salvaged = self._salvage_pending_action(session, deps, misbehavior_now) await db.flush() + if salvaged is not None: + return ChatResponse( + session_id=session_id, + message=( + "I've prepared an action that needs your approval before " + "I can proceed. Please review the pending request." + ), + pending_approval=True, + pending_action=salvaged, + ) return ChatResponse( session_id=session_id, message=( @@ -650,6 +665,35 @@ async def stream_chat( error=str(e), error_type=type(e).__name__, ) + misbehavior_now = datetime.now(UTC) + session.last_activity = misbehavior_now + # A gated tool may have fired (and recorded a valid approval request) + # before the model misbehaved — surface the Approve card rather than + # discarding it behind the generic error (#344). + salvaged = self._salvage_pending_action(session, deps, misbehavior_now) + await db.flush() + if salvaged is not None: + yield StreamEvent( + event_type="approval_required", + data={ + "action": salvaged, + "message": "Human approval required before proceeding.", + }, + timestamp=misbehavior_now, + ) + yield StreamEvent( + event_type="complete", + data={ + "message": ( + "I've prepared an action that needs your approval before I can proceed." + ), + "tokens_used": 0, + "tool_calls_count": deps.tool_call_count, + "pending_approval": True, + }, + timestamp=misbehavior_now, + ) + return yield StreamEvent( event_type="error", data={ @@ -660,7 +704,7 @@ async def stream_chat( "error_type": "model_behavior_error", "recoverable": True, }, - timestamp=datetime.now(UTC), + timestamp=misbehavior_now, ) return @@ -858,6 +902,45 @@ def _deserialize_messages( ) return [] + def _salvage_pending_action( + self, + session: AgentSession, + deps: AgentDeps, + now: datetime, + ) -> PendingAction | None: + """Persist a gated tool's approval request captured before a misbehaving run. + + A gated tool sets ``deps.pending_action`` the moment it fires (#336), but + it does not halt the run. A weak model can ramble past the gate and + exhaust its retry budget, so ``agent.run()`` raises + ``UnexpectedModelBehavior`` BEFORE returning and the normal post-run + approval-surfacing path never executes. The gate did fire and the + captured arguments are valid, so surface the approval card instead of + discarding it behind a generic error (issue #344). + + Args: + session: The agent session to mutate. + deps: The agent deps that a gated tool may have written to. + now: Timestamp for created_at / expires_at. + + Returns: + The formatted :class:`PendingAction` when a gated tool recorded a + request, else ``None`` (the genuine "invalid tool call" case). + """ + if not deps.pending_action: + return None + action_type = str(deps.pending_action.get("action_type", "unknown")) + return self._record_pending_action( + session, + action_type=action_type, + arguments=deps.pending_action.get("arguments") or {}, + description=str( + deps.pending_action.get("description") + or f"Agent requested approval for {action_type}" + ), + now=now, + ) + def _record_pending_action( self, session: AgentSession, diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index 74709be2..888260ec 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -434,6 +434,99 @@ async def __aexit__(self, *exc: object) -> bool: assert events[0].data["error_type"] == "model_behavior_error" assert "exceeded max retries" not in events[0].data["error"] + @pytest.mark.asyncio + async def test_chat_surfaces_pending_action_on_model_misbehavior( + self, + sample_active_session: AgentSession, + ) -> None: + """A gated tool that fired before the model misbehaved must surface the + Approve card, not the generic error (#344). + + A gated tool records ``deps.pending_action`` the moment it fires, but a + weak model can ramble past the gate and exhaust its retry budget, so + ``agent.run`` raises ``UnexpectedModelBehavior`` before returning. The + captured approval is valid and must not be discarded. + """ + service = AgentService() + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + def _fire_gate_then_misbehave(*_args: Any, **kwargs: Any) -> None: + deps: AgentDeps = kwargs["deps"] + deps.set_pending_action( + "create_alias", + {"alias_name": "champion", "run_id": "1" * 32}, + "Create alias champion", + ) + raise UnexpectedModelBehavior("Exceeded maximum output retries (3)") + + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=_fire_gate_then_misbehave) + + with patch.object(service, "_get_agent", return_value=mock_agent): + response = await service.chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="Create alias champion. Call tool_create_alias now.", + ) + + assert response.pending_approval is True + assert response.pending_action is not None + assert response.pending_action.action_type == "create_alias" + assert response.pending_action.arguments["alias_name"] == "champion" + assert "invalid tool call" not in response.message + # Session flipped so POST /approve can find the action. + assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value + assert sample_active_session.pending_action is not None + + @pytest.mark.asyncio + async def test_stream_chat_surfaces_approval_on_model_misbehavior( + self, + sample_active_session: AgentSession, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """The streaming path must emit ``approval_required`` (not ``error``) + when a gated tool fired before the model misbehaved (#344).""" + service = AgentService() + # Pin ollama so stream_chat uses the non-streaming run() path (#342) — + # the real-world scenario where this surfaced. + monkeypatch.setattr(service.settings, "agent_default_model", "ollama:qwen3:8b") + mock_db = AsyncMock() + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + def _fire_gate_then_misbehave(*_args: Any, **kwargs: Any) -> None: + deps: AgentDeps = kwargs["deps"] + deps.set_pending_action( + "create_alias", + {"alias_name": "champion", "run_id": "1" * 32}, + "Create alias champion", + ) + raise UnexpectedModelBehavior("Exceeded maximum output retries (3)") + + mock_agent = MagicMock() + mock_agent.run = AsyncMock(side_effect=_fire_gate_then_misbehave) + + with patch.object(service, "_get_agent", return_value=mock_agent): + events = [ + event + async for event in service.stream_chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="Create alias champion. Call tool_create_alias now.", + ) + ] + + event_types = [event.event_type for event in events] + assert "approval_required" in event_types + assert "error" not in event_types + approval = next(e for e in events if e.event_type == "approval_required") + assert approval.data["action"].action_type == "create_alias" + assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value + @pytest.mark.asyncio async def test_stream_chat_runs_tools_sequentially( self, From c4b532e19bd4dd269d07548685b51a9a73484402 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 03:41:28 +0200 Subject: [PATCH 16/30] fix(ui): surface approved-action execution report for all outcomes (#346) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chat Approve/Reject handlers awaited POST /approve but discarded the ApprovalResponse and only cleared the card, so a click produced no visible result — and a failed action execution (e.g. create_alias 'Run not found', returned as status=rejected + result.error) was silent. Capture the response and append a one-line report for every outcome (executed / approved-but-failed / rejected / expired) via a pure, unit-tested formatApprovalReport helper. Closes #346. --- frontend/src/lib/approval-report.test.ts | 67 ++++++++++++++++++++++++ frontend/src/lib/approval-report.ts | 44 ++++++++++++++++ frontend/src/pages/chat.tsx | 49 ++++++++++------- frontend/src/types/api.ts | 9 ++++ 4 files changed, 150 insertions(+), 19 deletions(-) create mode 100644 frontend/src/lib/approval-report.test.ts create mode 100644 frontend/src/lib/approval-report.ts diff --git a/frontend/src/lib/approval-report.test.ts b/frontend/src/lib/approval-report.test.ts new file mode 100644 index 00000000..f94f04d9 --- /dev/null +++ b/frontend/src/lib/approval-report.test.ts @@ -0,0 +1,67 @@ +import { describe, it, expect } from 'vitest' +import { formatApprovalReport } from './approval-report' +import type { ApprovalResponse } from '@/types/api' + +describe('formatApprovalReport', () => { + it('reports a successful execution', () => { + const res: ApprovalResponse = { + action_id: 'a1', + approved: true, + status: 'executed', + result: { alias_name: 'champion' }, + } + const msg = formatApprovalReport('create_alias', res) + expect(msg).toContain('✅') + expect(msg).toContain('create_alias') + expect(msg).toContain('executed successfully') + }) + + it('reports an approved-but-failed execution with the error cause', () => { + // The backend marks a failed execution `rejected` with the cause in result.error. + const res: ApprovalResponse = { + action_id: 'a2', + approved: true, + status: 'rejected', + result: { error: 'Run not found: 3c5d', error_type: 'ValueError' }, + } + const msg = formatApprovalReport('create_alias', res) + expect(msg).toContain('❌') + expect(msg).toContain('could not be executed') + expect(msg).toContain('Run not found: 3c5d') + }) + + it('reports an operator rejection (no execution)', () => { + const res: ApprovalResponse = { + action_id: 'a3', + approved: false, + status: 'rejected', + result: null, + } + const msg = formatApprovalReport('archive_run', res) + expect(msg).toContain('🚫') + expect(msg).toContain('Rejected') + expect(msg).toContain('No action was taken') + }) + + it('reports an expired approval', () => { + const res: ApprovalResponse = { + action_id: 'a4', + approved: true, + status: 'expired', + result: null, + } + const msg = formatApprovalReport('save_scenario', res) + expect(msg).toContain('⏰') + expect(msg).toContain('expired') + }) + + it('does not throw on a non-object result', () => { + const res: ApprovalResponse = { + action_id: 'a5', + approved: true, + status: 'executed', + result: 'ok', + } + expect(() => formatApprovalReport('create_alias', res)).not.toThrow() + }) +}) diff --git a/frontend/src/lib/approval-report.ts b/frontend/src/lib/approval-report.ts new file mode 100644 index 00000000..f041157e --- /dev/null +++ b/frontend/src/lib/approval-report.ts @@ -0,0 +1,44 @@ +import type { ApprovalResponse } from '@/types/api' + +/** + * Build a human-readable chat report for an approved/rejected agent action. + * + * The backend's `POST /approve` returns an {@link ApprovalResponse} for every + * outcome, but the chat UI previously discarded it — so a click produced no + * visible result ("nothing returned"). This formats a one-line report for ALL + * outcomes so the operator always sees what happened: + * + * - `executed` → the action ran successfully. + * - approved but `rejected` + error → the action was approved but execution + * failed (the backend marks a failed execution `rejected` and puts the cause + * in `result.error`). + * - `rejected` (not approved) → the operator rejected the action. + * - `expired` → the approval window lapsed before it ran. + * + * @param actionLabel - The gated action name (e.g. `create_alias`). + * @param res - The approval response from the backend. + * @returns A markdown-ish one-line report for the chat transcript. + */ +export function formatApprovalReport(actionLabel: string, res: ApprovalResponse): string { + const result = + res.result && typeof res.result === 'object' + ? (res.result as Record) + : undefined + const errorDetail = + result && 'error' in result ? String(result.error) : undefined + + if (res.status === 'executed') { + return `✅ Approved — \`${actionLabel}\` executed successfully.` + } + if (res.approved && errorDetail) { + return `❌ Approved, but \`${actionLabel}\` could not be executed: ${errorDetail}` + } + if (!res.approved) { + return `🚫 Rejected \`${actionLabel}\`. No action was taken.` + } + if (res.status === 'expired') { + return `⏰ The \`${actionLabel}\` approval expired before it could run.` + } + // Defensive fallback: approved, not executed, no error detail. + return `\`${actionLabel}\` finished with status: ${res.status}.` +} diff --git a/frontend/src/pages/chat.tsx b/frontend/src/pages/chat.tsx index cc22a9d5..6bbaaeb6 100644 --- a/frontend/src/pages/chat.tsx +++ b/frontend/src/pages/chat.tsx @@ -16,8 +16,15 @@ import { SelectValue, } from '@/components/ui/select' import { api } from '@/lib/api' +import { formatApprovalReport } from '@/lib/approval-report' import { WS_URL, ROUTES } from '@/lib/constants' -import type { ChatMessage as ChatMessageType, AgentStreamEvent, AgentType, AgentSession } from '@/types/api' +import type { + ChatMessage as ChatMessageType, + AgentStreamEvent, + AgentType, + AgentSession, + ApprovalResponse, +} from '@/types/api' export default function ChatPage() { const [sessionId, setSessionId] = useState(null) @@ -142,38 +149,42 @@ export default function ChatPage() { send({ session_id: sessionId, message: content }) } - const handleApprove = async () => { - if (!sessionId || !pendingAction?.actionId) return - setIsApproving(true) - try { - await api(`/agents/sessions/${sessionId}/approve`, { - method: 'POST', - body: { action_id: pendingAction.actionId, approved: true }, - }) - setPendingAction(null) - } catch (error) { - console.error('Failed to approve:', error) - } finally { - setIsApproving(false) - } + const appendAssistantMessage = (content: string) => { + setMessages((prev) => [ + ...prev, + { role: 'assistant', content, timestamp: new Date().toISOString() }, + ]) } - const handleReject = async () => { + // Approve or reject a pending action, then ALWAYS surface the execution + // report — for every outcome (executed / failed / rejected / expired). The + // handlers previously discarded the /approve response, so a click left the + // user with no feedback ("nothing returned"). + const decideAction = async (approved: boolean) => { if (!sessionId || !pendingAction?.actionId) return + const actionLabel = pendingAction.action setIsApproving(true) try { - await api(`/agents/sessions/${sessionId}/approve`, { + const res = await api(`/agents/sessions/${sessionId}/approve`, { method: 'POST', - body: { action_id: pendingAction.actionId, approved: false }, + body: { action_id: pendingAction.actionId, approved }, }) setPendingAction(null) + appendAssistantMessage(formatApprovalReport(actionLabel, res)) } catch (error) { - console.error('Failed to reject:', error) + console.error(approved ? 'Failed to approve:' : 'Failed to reject:', error) + setPendingAction(null) + const verb = approved ? 'approve' : 'reject' + const detail = error instanceof Error ? error.message : 'request failed' + appendAssistantMessage(`Error: could not ${verb} \`${actionLabel}\` — ${detail}`) } finally { setIsApproving(false) } } + const handleApprove = () => decideAction(true) + const handleReject = () => decideAction(false) + const handleNewSession = () => { setSessionId(null) setMessages([]) diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 3c62f684..df2289f4 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -624,6 +624,15 @@ export interface ChatMessage { timestamp: string } +/** Response from POST /agents/sessions/{id}/approve (mirrors backend ApprovalResponse). */ +export interface ApprovalResponse { + action_id: string + approved: boolean + /** Execution result on success, or `{ error, error_type }` when execution failed. */ + result?: unknown + status: 'executed' | 'rejected' | 'expired' +} + export interface ToolCall { tool_name: string arguments: Record From ba6da8294156ac9f02cc220bb801dc6c58a57932 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 04:09:47 +0200 Subject: [PATCH 17/30] fix(agents): constrain read-only experiment queries (#347) --- app/features/agents/agents/base.py | 46 +++++ app/features/agents/agents/experiment.py | 3 + .../agents/tests/test_read_only_guard.py | 172 ++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 app/features/agents/tests/test_read_only_guard.py diff --git a/app/features/agents/agents/base.py b/app/features/agents/agents/base.py index 90c433b6..d098b567 100644 --- a/app/features/agents/agents/base.py +++ b/app/features/agents/agents/base.py @@ -371,3 +371,49 @@ def requires_approval(action_name: str) -> bool: - Never bypass safety checks or approval requirements - Log all significant decisions and their reasoning """ + +# Generalized read-only intent guard. Embedded in the experiment-agent prompt to +# stop a read-only question (list/rank/summarize/compare/report) from derailing +# into a scenario / write / experiment tool — especially on an output-format +# validation retry, where a weak local model tends to start a brand-new action +# instead of just reformatting the data it already fetched (issue #347). Every +# `tool_*` name referenced here is registered on the experiment agent, so the +# `test_prompts_only_reference_registered_tool_names` invariant still holds. +READ_ONLY_INTENT_GUARD = """ +READ-ONLY INTENT GUARD (apply this before every turn): +Many requests are READ-ONLY — the user wants you to look something up and report +it, not to change anything. Treat a request as READ-ONLY when it asks you to list, +show, rank, summarize, compare, or report. Examples that are ALWAYS read-only +unless the user explicitly asks to change something: +- listing or ranking stores or products (e.g. "top products") +- sales, revenue, or units-sold summaries +- forecast summaries, or which products have the highest forecasted demand +- model runs and metric comparisons, including WAPE, MAE, or RMSE +- registry aliases and deployment status +- backtest metrics +- RAG / document / knowledge questions + +For a READ-ONLY request you MUST: +- Use ONLY read-only tools: tool_list_runs, tool_get_run, tool_compare_runs, + tool_compare_backtest_results. +- NEVER call tool_propose_scenario, tool_save_scenario, tool_create_alias, + tool_archive_run, or tool_run_backtest. Those create, save, promote, archive, + run, or plan something — they are NOT allowed for a read-only question. +- Call a mutating / planning / experiment tool ONLY when the user EXPLICITLY asks + to create, save, promote, archive, run a backtest, or run an experiment. +- Answer directly in the ExperimentReport `summary` field, grounded in tool output. + +OUTPUT-FORMAT RETRIES: +- If your previous reply failed schema validation (e.g. "summary: Field required"), + DO NOT call any new tool. Only reformat the data you already obtained into a + valid ExperimentReport with a concise `summary`. A validation retry is a + formatting fix, never a reason to start a new action. + +WHEN A TOOL IS MISSING OR THE REQUEST IS AMBIGUOUS: +- If a ranking is ambiguous (e.g. "top products"), ask a clarifying question such + as: "Top by revenue, units sold, forecasted demand, or model error?" — do not guess. +- If no read-only tool exists for the requested metric, say plainly that this agent + does not have a tool for that metric. Do NOT invent data. +- NEVER invent or guess a store_id, product_id, or run_id. Use only IDs returned by + a tool or explicitly supplied by the user. +""" diff --git a/app/features/agents/agents/experiment.py b/app/features/agents/agents/experiment.py index 1b0139f6..d58b53de 100644 --- a/app/features/agents/agents/experiment.py +++ b/app/features/agents/agents/experiment.py @@ -16,6 +16,7 @@ from pydantic_ai import Agent, PromptedOutput, RunContext from app.features.agents.agents.base import ( + READ_ONLY_INTENT_GUARD, SAFETY_INSTRUCTIONS, SYSTEM_PROMPT_HEADER, TOOL_USAGE_INSTRUCTIONS, @@ -68,6 +69,8 @@ would like to experiment on. Do NOT call any tools until you have a specific objective (a store and product plus a date range, or an explicit request). +{READ_ONLY_INTENT_GUARD} + {TOOL_USAGE_INSTRUCTIONS} {SAFETY_INSTRUCTIONS} diff --git a/app/features/agents/tests/test_read_only_guard.py b/app/features/agents/tests/test_read_only_guard.py new file mode 100644 index 00000000..c59e232d --- /dev/null +++ b/app/features/agents/tests/test_read_only_guard.py @@ -0,0 +1,172 @@ +"""Deterministic tests for the experiment-agent read-only intent guard (#347). + +The guard stops a read-only question ("list the runs and tell me the lowest +WAPE", "top products", "current deployment alias") from derailing into a +scenario / write / experiment tool — especially on an output-format validation +retry, where a weak local model tends to start a brand-new action instead of +reformatting the data it already fetched. + +These tests are deterministic and require **no live model call**: they assert +that the guard text exists, names the right tools, and governs each named +read-only intent, and that the guard is actually delivered to the model in the +system prompt. +""" + +from __future__ import annotations + +from collections.abc import Iterator +from unittest.mock import AsyncMock + +import pytest +from pydantic_ai.messages import ModelMessage, ModelResponse, TextPart +from pydantic_ai.models.function import AgentInfo, FunctionModel + +from app.core.config import get_settings +from app.features.agents.agents.base import READ_ONLY_INTENT_GUARD +from app.features.agents.agents.experiment import ( + EXPERIMENT_SYSTEM_PROMPT, + create_experiment_agent, +) +from app.features.agents.deps import AgentDeps + +# Tools that must NEVER be called for a read-only intent. +PROHIBITED_TOOLS = ( + "tool_propose_scenario", + "tool_save_scenario", + "tool_create_alias", + "tool_archive_run", + "tool_run_backtest", +) + +# Read-only tools the guard steers the model toward. +ALLOWED_READ_TOOLS = ( + "tool_list_runs", + "tool_get_run", + "tool_compare_runs", +) + + +@pytest.fixture(autouse=True) +def _reset_settings() -> Iterator[None]: + """Reset the settings cache so model mutations do not leak across tests.""" + get_settings.cache_clear() + yield + get_settings.cache_clear() + + +def test_guard_is_embedded_in_experiment_prompt() -> None: + """The experiment system prompt embeds the read-only intent guard.""" + assert READ_ONLY_INTENT_GUARD.strip() in EXPERIMENT_SYSTEM_PROMPT + assert "READ-ONLY INTENT GUARD" in EXPERIMENT_SYSTEM_PROMPT + + +@pytest.mark.parametrize("tool_name", PROHIBITED_TOOLS) +def test_guard_names_prohibited_tools(tool_name: str) -> None: + """The guard explicitly forbids each scenario/write/experiment tool.""" + assert tool_name in READ_ONLY_INTENT_GUARD + # And the prohibition is unambiguous. + assert "NEVER call" in READ_ONLY_INTENT_GUARD + + +@pytest.mark.parametrize("tool_name", ALLOWED_READ_TOOLS) +def test_guard_names_allowed_read_tools(tool_name: str) -> None: + """The guard points the model at the read-only tools to use instead.""" + assert tool_name in READ_ONLY_INTENT_GUARD + assert "Use ONLY read-only tools" in READ_ONLY_INTENT_GUARD + + +def test_guard_forbids_new_tools_on_validation_retry() -> None: + """On an output-format retry the model must reformat, not call new tools.""" + guard = READ_ONLY_INTENT_GUARD + assert "OUTPUT-FORMAT RETRIES" in guard + assert "DO NOT call any new tool" in guard + assert "reformat" in guard + # The exact validation-error string that triggered the original derail. + assert "summary: Field required" in guard + + +def test_guard_requires_clarification_for_ambiguous_top_products() -> None: + """An ambiguous "top products" ranking gets a clarifying question, not a guess.""" + guard = READ_ONLY_INTENT_GUARD + assert "top products" in guard + assert "Top by revenue, units sold, forecasted demand, or model error?" in guard + + +def test_guard_prohibits_invented_ids() -> None: + """The guard forbids inventing store_id / product_id / run_id values.""" + guard = READ_ONLY_INTENT_GUARD + assert "NEVER invent" in guard + for token in ("store_id", "product_id", "run_id"): + assert token in guard + + +def test_guard_states_limitation_when_no_tool_exists() -> None: + """The guard tells the model to state a missing-tool limitation, not fabricate.""" + guard = READ_ONLY_INTENT_GUARD + assert "does not have a tool for that metric" in guard + assert "Do NOT invent data" in guard + + +# Each example read-only prompt and the guard substring that proves the guard +# governs that intent (so the named query class can never silently lose +# coverage). Deterministic — no model is invoked. +@pytest.mark.parametrize( + ("prompt", "covered_intent"), + [ + ( + "List the most recent model runs and tell me which has the lowest WAPE.", + "WAPE", + ), + ("List the top products.", "top products"), + ( + "Which products have the highest forecasted demand?", + "highest forecasted demand", + ), + ("Show the current deployment alias.", "registry aliases and deployment status"), + ("Summarize total revenue and units sold.", "units-sold summaries"), + ("Show the backtest metrics for this grain.", "backtest metrics"), + ], +) +def test_read_only_intents_are_covered_by_guard(prompt: str, covered_intent: str) -> None: + """Every named read-only intent is enumerated in the guard's read-only list. + + This is the routing contract: each of these prompts is a read-only request, + and the guard's read-only example list names its intent — so the model is + told to answer it with read tools only and never with a scenario/write tool. + """ + assert covered_intent in READ_ONLY_INTENT_GUARD + + +def test_guard_is_delivered_in_system_prompt_to_model() -> None: + """The guard actually reaches the model in the delivered system prompt. + + Builds the real experiment agent against a stub FunctionModel (no live + call), captures the system prompt the framework sends, and asserts the guard + is present. Regression for #347 — a guard that never reaches the model is + worthless. + """ + settings = get_settings() + settings.agent_default_model = "ollama:llama3.1" + agent = create_experiment_agent() + + captured: dict[str, str] = {} + + def respond(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse: + for message in messages: + for part in getattr(message, "parts", []): + if getattr(part, "part_kind", None) == "system-prompt": + captured["system_prompt"] = part.content + # End the run with a PromptedOutput-parseable text reply. + return ModelResponse(parts=[TextPart(content='{"summary": "noop"}')]) + + agent.run_sync( + "List the most recent model runs and tell me which has the lowest WAPE.", + model=FunctionModel(respond), + deps=AgentDeps(db=AsyncMock(), session_id="test-read-only-guard"), + ) + + system_prompt = captured.get("system_prompt", "") + assert "READ-ONLY INTENT GUARD" in system_prompt + assert "DO NOT call any new tool" in system_prompt + for tool_name in PROHIBITED_TOOLS: + assert tool_name in system_prompt From 0e05ec204de795311f4d5aac5ffa698fa7d10243 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 04:09:53 +0200 Subject: [PATCH 18/30] fix(agents): validate scenario proposal entity ids (#347) --- app/features/scenarios/agent_tools.py | 59 ++++++++- app/features/scenarios/tests/conftest.py | 23 ++++ .../scenarios/tests/test_agent_tools.py | 119 ++++++++++++++++-- 3 files changed, 190 insertions(+), 11 deletions(-) diff --git a/app/features/scenarios/agent_tools.py b/app/features/scenarios/agent_tools.py index ab99c17c..b987afca 100644 --- a/app/features/scenarios/agent_tools.py +++ b/app/features/scenarios/agent_tools.py @@ -26,7 +26,7 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.features.data_platform.models import SalesDaily +from app.features.data_platform.models import Product, SalesDaily, Store from app.features.scenarios.models import SCENARIO_SOURCE_AGENT from app.features.scenarios.schemas import ( CreateScenarioRequest, @@ -52,6 +52,27 @@ AGENT_SAVE_APPROVED_BY = "operator" +async def _grain_exists(db: AsyncSession, store_id: int, product_id: int) -> tuple[bool, bool]: + """Report whether the store and product dimension rows exist. + + Read-only existence probe against the ``store`` / ``product`` dimension + tables. Used to reject a proposal for a grain that does not exist (issue + #347 — a weak model derailed into proposing a what-if for a hallucinated + ``store_id=123`` / ``product_id=456``). + + Args: + db: Database session. + store_id: Candidate store id. + product_id: Candidate product id. + + Returns: + ``(store_exists, product_exists)``. + """ + store_exists = await db.scalar(select(Store.id).where(Store.id == store_id)) is not None + product_exists = await db.scalar(select(Product.id).where(Product.id == product_id)) is not None + return store_exists, product_exists + + async def propose_scenario( db: AsyncSession, store_id: int, @@ -79,6 +100,12 @@ async def propose_scenario( the candidate ``assumptions`` (JSON-mode dump so dates are ISO strings, ready to pass straight back into ``save_scenario``), and a plain-language ``recommendation``. + + When the ``(store_id, product_id)`` grain does not exist, returns a + non-persistable validation error instead — ``{"valid": False, + "persistable": False, "error": ..., "missing": [...]}`` — so a + hallucinated grain (e.g. store 123 / product 456) never yields a normal + proposal (issue #347). """ logger.info( "agents.scenario_tool.propose_scenario_called", @@ -87,6 +114,36 @@ async def propose_scenario( horizon=horizon, ) + # Reject a grain that does not exist before drafting anything. A weak model + # can hallucinate placeholder ids (store 123 / product 456); a proposal for a + # non-existent grain is meaningless and must never look like a normal, + # save-able candidate (issue #347). This is read-only and persists nothing. + store_exists, product_exists = await _grain_exists(db, store_id, product_id) + if not (store_exists and product_exists): + missing: list[str] = [] + if not store_exists: + missing.append(f"store_id={store_id}") + if not product_exists: + missing.append(f"product_id={product_id}") + logger.info( + "agents.scenario_tool.propose_scenario_rejected_unknown_grain", + store_id=store_id, + product_id=product_id, + missing=missing, + ) + return { + "valid": False, + "persistable": False, + "store_id": store_id, + "product_id": product_id, + "missing": missing, + "error": ( + f"Cannot propose a scenario: {' and '.join(missing)} " + "do not exist. Use a real store/product pair (look one up with a " + "read-only tool) — do not invent identifiers." + ), + } + # Read the most recent unit price for a grounded recommendation. Read-only. latest_price = await db.scalar( select(SalesDaily.unit_price) diff --git a/app/features/scenarios/tests/conftest.py b/app/features/scenarios/tests/conftest.py index c5ebc731..0ef3ca06 100644 --- a/app/features/scenarios/tests/conftest.py +++ b/app/features/scenarios/tests/conftest.py @@ -66,6 +66,29 @@ async def db_session() -> AsyncGenerator[AsyncSession, None]: await engine.dispose() +@pytest.fixture +async def existing_grain(db_session: AsyncSession) -> AsyncGenerator[tuple[int, int], None]: + """Insert the Store + Product dimension rows for the test grain; clean up after. + + ``propose_scenario`` now rejects a grain whose store/product do not exist + (#347). ``TEST_STORE_ID`` / ``TEST_PRODUCT_ID`` are deliberately high IDs no + seeder uses, so a read-only proposal for them needs the dimension rows seeded + explicitly. Removed on teardown so the grain stays absent for the + rejection-path tests. + """ + from app.features.data_platform.models import Product, Store + + db_session.add(Store(id=TEST_STORE_ID, code=f"S{TEST_STORE_ID}", name="Test Store")) + db_session.add(Product(id=TEST_PRODUCT_ID, sku=f"SKU{TEST_PRODUCT_ID}", name="Test Product")) + await db_session.commit() + try: + yield (TEST_STORE_ID, TEST_PRODUCT_ID) + finally: + await db_session.execute(delete(Product).where(Product.id == TEST_PRODUCT_ID)) + await db_session.execute(delete(Store).where(Store.id == TEST_STORE_ID)) + await db_session.commit() + + @pytest.fixture async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]: """Create a test client with the database dependency overridden.""" diff --git a/app/features/scenarios/tests/test_agent_tools.py b/app/features/scenarios/tests/test_agent_tools.py index 5f1ba739..95400c92 100644 --- a/app/features/scenarios/tests/test_agent_tools.py +++ b/app/features/scenarios/tests/test_agent_tools.py @@ -14,6 +14,7 @@ import uuid from datetime import UTC, datetime, timedelta +from unittest.mock import AsyncMock import pytest from sqlalchemy import delete, func, select @@ -35,18 +36,85 @@ def test_save_scenario_requires_approval() -> None: assert requires_approval("save_scenario") is True +class TestProposeScenarioEntityValidation: + """propose_scenario rejects a grain whose store/product do not exist (#347). + + Unit-level (mocked DB): the entity-existence probe (`_grain_exists`) issues + `db.scalar(...)` for the store id, then the product id, then — only on the + valid path — the latest unit price. Driving `db.scalar` with a `side_effect` + list exercises every branch with no database. These run in the fast + ``not integration`` gate. + """ + + @staticmethod + def _mock_db(scalar_returns: list[object]) -> AsyncMock: + """An AsyncSession-shaped mock whose `scalar` yields the given values.""" + db = AsyncMock(spec=AsyncSession) + db.scalar = AsyncMock(side_effect=scalar_returns) + return db + + async def test_existing_grain_returns_a_proposal(self) -> None: + """A real store/product pair yields a normal, save-able proposal.""" + db = self._mock_db([5, 8, 12.5]) # store id, product id, latest price + + result = await propose_scenario( + db, store_id=5, product_id=8, horizon=14, objective="grow demand" + ) + + assert result.get("valid") is not False + assert "assumptions" in result + assert "recommendation" in result + ScenarioAssumptions.model_validate(result["assumptions"]) + + async def test_nonexistent_grain_is_rejected(self) -> None: + """A grain with no store and no product row is rejected, not proposed.""" + db = self._mock_db([None, None]) + + result = await propose_scenario( + db, store_id=77001, product_id=77002, horizon=14, objective="x" + ) + + assert result["valid"] is False + assert result["persistable"] is False + assert "assumptions" not in result + assert "store_id=77001" in result["missing"] + assert "product_id=77002" in result["missing"] + + async def test_hallucinated_123_456_is_rejected(self) -> None: + """The exact hallucinated store 123 / product 456 case never proposes.""" + db = self._mock_db([None, None]) + + result = await propose_scenario(db, store_id=123, product_id=456, horizon=14, objective="") + + assert result["valid"] is False + assert "assumptions" not in result + # Read-only rejection — nothing is written. + db.add.assert_not_called() + db.commit.assert_not_awaited() + + async def test_missing_product_only_is_rejected(self) -> None: + """A real store but a missing product is still rejected.""" + db = self._mock_db([5, None]) + + result = await propose_scenario(db, store_id=5, product_id=999999, horizon=7, objective="") + + assert result["valid"] is False + assert result["missing"] == ["product_id=999999"] + + @pytest.mark.integration class TestProposeScenario: - """propose_scenario drafts a candidate and persists nothing.""" + """propose_scenario drafts a candidate (for a real grain) and persists nothing.""" async def test_returns_valid_assumptions_and_recommendation( - self, db_session: AsyncSession + self, db_session: AsyncSession, existing_grain: tuple[int, int] ) -> None: """A default objective yields a valid price-cut candidate.""" + store_id, product_id = existing_grain result = await propose_scenario( db_session, - store_id=TEST_STORE_ID, - product_id=TEST_PRODUCT_ID, + store_id=store_id, + product_id=product_id, horizon=14, objective="grow demand for the summer range", ) @@ -58,12 +126,15 @@ async def test_returns_valid_assumptions_and_recommendation( assert isinstance(result["recommendation"], str) assert result["recommendation"] - async def test_promotion_keyword_proposes_a_promotion(self, db_session: AsyncSession) -> None: + async def test_promotion_keyword_proposes_a_promotion( + self, db_session: AsyncSession, existing_grain: tuple[int, int] + ) -> None: """An objective mentioning a promotion steers the candidate accordingly.""" + store_id, product_id = existing_grain result = await propose_scenario( db_session, - store_id=TEST_STORE_ID, - product_id=TEST_PRODUCT_ID, + store_id=store_id, + product_id=product_id, horizon=7, objective="run a promotion next week", ) @@ -72,12 +143,15 @@ async def test_promotion_keyword_proposes_a_promotion(self, db_session: AsyncSes assert assumptions.promotion is not None assert assumptions.price is None - async def test_persists_no_row(self, db_session: AsyncSession) -> None: + async def test_persists_no_row( + self, db_session: AsyncSession, existing_grain: tuple[int, int] + ) -> None: """propose_scenario is read-only — it never writes a scenario_plan row.""" + store_id, product_id = existing_grain await propose_scenario( db_session, - store_id=TEST_STORE_ID, - product_id=TEST_PRODUCT_ID, + store_id=store_id, + product_id=product_id, horizon=10, objective="test", ) @@ -85,6 +159,31 @@ async def test_persists_no_row(self, db_session: AsyncSession) -> None: assert count == 0 +@pytest.mark.integration +class TestProposeScenarioRejectsUnknownGrain: + """propose_scenario rejects a non-existent grain against a real DB (#347).""" + + async def test_rejects_nonexistent_grain_and_persists_nothing( + self, db_session: AsyncSession + ) -> None: + """A grain with no dimension rows is rejected and writes no plan.""" + # IDs far above any seeded range — guaranteed absent. + result = await propose_scenario( + db_session, + store_id=9_999_001, + product_id=9_999_002, + horizon=14, + objective="grow demand", + ) + + assert result["valid"] is False + assert result["persistable"] is False + assert "assumptions" not in result + + count = await db_session.scalar(select(func.count()).select_from(ScenarioPlan)) + assert count == 0 + + @pytest.mark.integration class TestSaveScenario: """save_scenario persists a plan stamped with agent provenance.""" From bcb80dee4e1c2eb8bc39d92f64bbc08537090a1e Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 04:58:03 +0200 Subject: [PATCH 19/30] fix(agents): stop read-only tool-call loop in experiment guard (#349) --- app/features/agents/agents/base.py | 10 ++++++++ .../agents/tests/test_read_only_guard.py | 23 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/app/features/agents/agents/base.py b/app/features/agents/agents/base.py index d098b567..c69b944e 100644 --- a/app/features/agents/agents/base.py +++ b/app/features/agents/agents/base.py @@ -403,6 +403,16 @@ def requires_approval(action_name: str) -> bool: to create, save, promote, archive, run a backtest, or run an experiment. - Answer directly in the ExperimentReport `summary` field, grounded in tool output. +FINISH IN ONE PASS — do not loop: +- Call each read-only tool AT MOST ONCE per question. +- The MOMENT a read tool returns, STOP calling tools and write your + ExperimentReport `summary` from what it returned — you already have the answer. +- NEVER call a tool again that has already returned. Re-running the same tool + (e.g. tool_list_runs twice) is the most common failure: it burns the retry + budget until the run is killed. Use the data you already received. +- If a read tool returns an EMPTY result, say so in the `summary` (e.g. "No model + runs found.") — do NOT retry the tool hoping for different data. + OUTPUT-FORMAT RETRIES: - If your previous reply failed schema validation (e.g. "summary: Field required"), DO NOT call any new tool. Only reformat the data you already obtained into a diff --git a/app/features/agents/tests/test_read_only_guard.py b/app/features/agents/tests/test_read_only_guard.py index c59e232d..6c1a3f4c 100644 --- a/app/features/agents/tests/test_read_only_guard.py +++ b/app/features/agents/tests/test_read_only_guard.py @@ -85,6 +85,29 @@ def test_guard_forbids_new_tools_on_validation_retry() -> None: assert "summary: Field required" in guard +def test_guard_forbids_tool_call_loops() -> None: + """The guard tells the model to finish in one pass and never re-call a tool (#349). + + Regression for the observed failure where a weak 8B model called + ``tool_list_runs`` four times in a row — even though it already had the data — + and blew the output-retry budget (``Exceeded maximum output retries (3)``). + """ + guard = READ_ONLY_INTENT_GUARD + assert "FINISH IN ONE PASS" in guard + assert "AT MOST ONCE" in guard + assert "NEVER call a tool again that has already returned" in guard + assert "STOP calling tools" in guard + + +def test_guard_handles_empty_tool_result() -> None: + """An empty read result is reported in the summary, not retried (#349).""" + guard = READ_ONLY_INTENT_GUARD + assert "EMPTY result" in guard + assert "do NOT retry the tool" in guard + # The wrapped example phrase, newlines/indent collapsed. + assert "No model runs found." in " ".join(guard.split()) + + def test_guard_requires_clarification_for_ambiguous_top_products() -> None: """An ambiguous "top products" ranking gets a clarifying question, not a guess.""" guard = READ_ONLY_INTENT_GUARD From 57cc8948f1572313161c45191f7ccd9e91198111 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 05:16:11 +0200 Subject: [PATCH 20/30] fix(agents): salvage plain-text answer when structured output fails (#351) --- app/features/agents/agents/base.py | 36 +++++- app/features/agents/service.py | 134 ++++++++++++++++++++-- app/features/agents/tests/test_service.py | 86 ++++++++++++++ 3 files changed, 243 insertions(+), 13 deletions(-) diff --git a/app/features/agents/agents/base.py b/app/features/agents/agents/base.py index c69b944e..4ccd86b3 100644 --- a/app/features/agents/agents/base.py +++ b/app/features/agents/agents/base.py @@ -14,7 +14,7 @@ import httpx import structlog -from pydantic_ai import ModelRetry +from pydantic_ai import Agent, ModelRetry from pydantic_ai.models import Model from pydantic_ai.models.fallback import FallbackModel from pydantic_ai.models.openai import OpenAIChatModel @@ -248,6 +248,40 @@ def build_agent_model_with_fallback() -> Model | str: return FallbackModel(primary, fallback) +FINALIZER_SYSTEM_PROMPT = """You are a concise analyst for ForecastLabAI. +Answer the user's question using ONLY the provided tool data. Be specific and brief +(2-4 sentences, plain text — no JSON, no preamble). +- If the user asked for a ranking (lowest/highest WAPE, MAE, RMSE, …), name the + specific run/item and its value, and ignore entries whose metric is missing. +- If the data is empty, say so plainly. +- Never invent values, run ids, or entities that are not present in the data. +""" + + +def build_finalizer_agent() -> Agent[None, str]: + """Build a tool-less, plain-text agent that salvages an answer from tool data. + + Weak local models (e.g. ``ollama:llama3.1:8b``) reliably call tools and obtain + the data, but cannot wrap the result in the primary agent's structured + ``PromptedOutput`` schema — they echo the raw tool output and exhaust the + output-retry budget (issue #351). This finalizer takes the data already + obtained and answers in plain text, which weak models *can* do. It has NO + tools (cannot loop) and ``output_type=str`` (cannot fail schema validation), + so it degrades gracefully. Cloud models never need it — it only runs on the + primary agent's misbehavior path. + + Returns: + A configured plain-text :class:`Agent`, primary+fallback model wrapped. + """ + model = build_agent_model_with_fallback() + return Agent( + model=model, + output_type=str, + system_prompt=FINALIZER_SYSTEM_PROMPT, + **get_model_settings(), + ) + + def get_agent_retries() -> int: """Get the configured retry budget for agent tool calls and output validation. diff --git a/app/features/agents/service.py b/app/features/agents/service.py index 8009ea08..75468c1a 100644 --- a/app/features/agents/service.py +++ b/app/features/agents/service.py @@ -13,6 +13,7 @@ from __future__ import annotations import asyncio +import json import uuid from collections.abc import AsyncIterator from contextlib import AbstractContextManager @@ -20,9 +21,9 @@ from typing import Any, Literal, cast import structlog -from pydantic_ai import Agent +from pydantic_ai import Agent, capture_run_messages from pydantic_ai.exceptions import UnexpectedModelBehavior -from pydantic_ai.messages import ModelMessage, ModelMessagesTypeAdapter +from pydantic_ai.messages import ModelMessage, ModelMessagesTypeAdapter, ToolReturnPart from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -40,6 +41,10 @@ logger = structlog.get_logger() +# Cap on the tool-data JSON fed to the plain-text finalizer (#351). Large enough +# for a runs list, small enough to stay well within the model's context budget. +_FINALIZER_MAX_CHARS = 6000 + class SessionNotFoundError(ValueError): """Session not found in the database.""" @@ -266,16 +271,20 @@ async def chat( history_length=len(message_history), ) + # Always bound for the misbehavior handler, even if the run raises before + # capture_run_messages() populates it. + captured_messages: list[ModelMessage] = [] try: - with _sequential_tool_execution(): - result = await asyncio.wait_for( - agent.run( - message, - deps=deps, - message_history=message_history, - ), - timeout=self.settings.agent_timeout_seconds, - ) + with capture_run_messages() as captured_messages: + with _sequential_tool_execution(): + result = await asyncio.wait_for( + agent.run( + message, + deps=deps, + message_history=message_history, + ), + timeout=self.settings.agent_timeout_seconds, + ) except TimeoutError as e: raise TimeoutError( f"Agent response timed out after {self.settings.agent_timeout_seconds} seconds" @@ -307,6 +316,13 @@ async def chat( pending_approval=True, pending_action=salvaged, ) + # A weak local model often calls tools and obtains the data, then + # fails to wrap it in the structured output schema (#351). Salvage a + # plain-text answer from the tool data already captured this run. + answer = await self._salvage_plaintext_answer(message, captured_messages) + if answer is not None: + logger.info("agents.chat_finalizer_salvage", session_id=session_id) + return ChatResponse(session_id=session_id, message=answer) return ChatResponse( session_id=session_id, message=( @@ -483,8 +499,10 @@ async def stream_chat( default_model = self.settings.agent_default_model provider = default_model.split(":", 1)[0] if ":" in default_model else "" stream_supported = provider != "ollama" + # Always bound for the misbehavior handler (see chat()). + captured_messages: list[ModelMessage] = [] try: - with _sequential_tool_execution(): + with capture_run_messages() as captured_messages, _sequential_tool_execution(): async with asyncio.timeout(self.settings.agent_timeout_seconds): final_result: Any usage: Any @@ -694,6 +712,29 @@ async def stream_chat( timestamp=misbehavior_now, ) return + # A weak local model often calls tools and obtains the data, then + # fails to wrap it in the structured output schema (#351). Salvage a + # plain-text answer from the tool data already captured this run and + # emit it as a normal reply rather than an error. + answer = await self._salvage_plaintext_answer(message, captured_messages) + if answer is not None: + logger.info("agents.stream_chat_finalizer_salvage", session_id=session_id) + yield StreamEvent( + event_type="text_delta", + data={"delta": answer}, + timestamp=misbehavior_now, + ) + yield StreamEvent( + event_type="complete", + data={ + "message": answer, + "tokens_used": 0, + "tool_calls_count": deps.tool_call_count, + "pending_approval": False, + }, + timestamp=misbehavior_now, + ) + return yield StreamEvent( event_type="error", data={ @@ -941,6 +982,75 @@ def _salvage_pending_action( now=now, ) + @staticmethod + def _extract_tool_payloads(captured: list[ModelMessage]) -> list[dict[str, Any]]: + """Pull every tool return out of a captured run's message trace. + + Used by :meth:`_salvage_plaintext_answer` to recover the data a weak + model fetched before it failed structured-output validation (#351). + + Args: + captured: Messages captured via ``capture_run_messages`` (may be empty + when the run failed before any tool returned). + + Returns: + One ``{"tool", "result"}`` dict per ``ToolReturnPart``, in order. + """ + payloads: list[dict[str, Any]] = [] + for message in captured: + for part in getattr(message, "parts", []): + if isinstance(part, ToolReturnPart): + payloads.append({"tool": part.tool_name, "result": part.content}) + return payloads + + async def _salvage_plaintext_answer( + self, + message: str, + captured: list[ModelMessage], + ) -> str | None: + """Answer in plain text from tool data when structured output failed (#351). + + A weak local model (e.g. ``ollama:llama3.1:8b``) reliably calls the read + tool and gets the data, but echoes the raw tool result instead of the + primary agent's ``PromptedOutput`` schema, exhausting the output-retry + budget. The data was obtained, though — so hand it to a tool-less, + ``str``-output finalizer that answers the user's question directly. The + finalizer cannot loop (no tools) or fail schema validation (plain text). + + Args: + message: The original user message. + captured: Messages captured from the failed run. + + Returns: + The finalizer's plain-text answer, or ``None`` when no tool data was + obtained or the finalizer itself errors (caller falls back to the + generic recoverable error). + """ + payloads = self._extract_tool_payloads(captured) + if not payloads: + return None + try: + from app.features.agents.agents.base import build_finalizer_agent + + data = json.dumps(payloads, default=str)[:_FINALIZER_MAX_CHARS] + prompt = ( + f"User question:\n{message}\n\n" + f"Data retrieved from tools (JSON):\n{data}\n\n" + "Answer the user's question concisely from this data." + ) + finalizer = build_finalizer_agent() + result = await asyncio.wait_for( + finalizer.run(prompt), + timeout=self.settings.agent_timeout_seconds, + ) + text = str(result.output).strip() + return text or None + except Exception: + # Best-effort: a finalizer failure must never replace the original + # recoverable error with a crash. + logger.warning("agents.finalizer_fallback_failed", exc_info=True) + return None + def _record_pending_action( self, session: AgentSession, diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index 888260ec..bd4e51b8 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -14,6 +14,7 @@ ModelRequest, ModelResponse, TextPart, + ToolReturnPart, UserPromptPart, ) @@ -335,6 +336,49 @@ async def test_chat_model_misbehavior_returns_friendly_message( assert "invalid tool call" in response.message assert "exceeded max retries" not in response.message + @pytest.mark.asyncio + async def test_chat_finalizer_salvages_answer_on_misbehavior( + self, + sample_active_session: AgentSession, + ) -> None: + """When tools fetched data but structured output failed, salvage a reply (#351). + + A weak local model calls the read tool and gets the data, then can't wrap + it in the ExperimentReport schema and exhausts the output-retry budget. + The service then asks a tool-less finalizer to answer in plain text — the + user gets the answer instead of the generic "invalid tool call" error. + """ + service = AgentService() + mock_db = AsyncMock() + + mock_result = MagicMock() + mock_result.scalar_one_or_none.return_value = sample_active_session + mock_db.execute.return_value = mock_result + + mock_agent = MagicMock() + mock_agent.run = AsyncMock( + side_effect=UnexpectedModelBehavior("Exceeded maximum output retries (3)") + ) + + salvaged_answer = "The lowest WAPE is the naive run 2fad611b (18.93)." + with ( + patch.object(service, "_get_agent", return_value=mock_agent), + patch.object( + service, + "_salvage_plaintext_answer", + AsyncMock(return_value=salvaged_answer), + ), + ): + response = await service.chat( + db=mock_db, + session_id=sample_active_session.session_id, + message="List the most recent model runs and tell me which has the lowest WAPE.", + ) + + assert response.message == salvaged_answer + assert response.pending_approval is False + assert "invalid tool call" not in response.message + @pytest.mark.asyncio async def test_chat_runs_tools_sequentially( self, @@ -1143,3 +1187,45 @@ def _run_stream(message: str, *, deps: AgentDeps, message_history: Any) -> _Stub assert approval_events[0].data["action"].action_type == "save_scenario" assert sample_active_session.status == SessionStatus.AWAITING_APPROVAL.value assert sample_active_session.pending_action is not None + + +class TestFinalizerSalvage: + """The plain-text finalizer fallback used on structured-output failure (#351).""" + + def test_extract_tool_payloads_pulls_tool_returns(self) -> None: + """Tool returns are extracted from a captured run trace, in order.""" + captured: list[ModelMessage] = [ + ModelRequest(parts=[UserPromptPart(content="List runs")]), + ModelResponse(parts=[TextPart(content="{}")]), + ModelRequest( + parts=[ + ToolReturnPart( + tool_name="tool_list_runs", + content={"runs": [{"run_id": "abc", "wape": 18.93}]}, + tool_call_id="call-1", + ) + ] + ), + ] + + payloads = AgentService._extract_tool_payloads(captured) + + assert payloads == [ + {"tool": "tool_list_runs", "result": {"runs": [{"run_id": "abc", "wape": 18.93}]}} + ] + + def test_extract_tool_payloads_empty_when_no_tool_returns(self) -> None: + """No tool returns (model failed before any tool ran) yields an empty list.""" + captured: list[ModelMessage] = [ + ModelRequest(parts=[UserPromptPart(content="List runs")]), + ModelResponse(parts=[TextPart(content='{"runs": []}')]), + ] + + assert AgentService._extract_tool_payloads(captured) == [] + + @pytest.mark.asyncio + async def test_salvage_returns_none_without_tool_data(self) -> None: + """With no captured tool data, salvage returns None (caller emits the error).""" + service = AgentService() + result = await service._salvage_plaintext_answer("any question", []) + assert result is None From ae25be87baf5a7dc499d6a3015f186e3b8c63407 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 05:24:01 +0200 Subject: [PATCH 21/30] fix(agents): compact tool data for finalizer to fix metric ranking (#351) --- app/features/agents/service.py | 61 +++++++++++++++++++++-- app/features/agents/tests/test_service.py | 44 ++++++++++++++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/app/features/agents/service.py b/app/features/agents/service.py index 75468c1a..6372fd9c 100644 --- a/app/features/agents/service.py +++ b/app/features/agents/service.py @@ -41,9 +41,30 @@ logger = structlog.get_logger() -# Cap on the tool-data JSON fed to the plain-text finalizer (#351). Large enough -# for a runs list, small enough to stay well within the model's context budget. -_FINALIZER_MAX_CHARS = 6000 +# Cap on the tool-data JSON fed to the plain-text finalizer (#351). With the +# verbose keys below stripped, a full runs page fits comfortably; the cap is a +# context-budget backstop for pathological payloads. +_FINALIZER_MAX_CHARS = 8000 + +# Verbose, decision-irrelevant keys stripped from tool results before they are +# handed to the finalizer (#351). Dropping these keeps every run's identity + +# metrics (e.g. WAPE) inside the budget, so a ranking question sees ALL runs +# instead of just the first one or two — the bug where the finalizer reported +# 99.0 as "lowest" while the true minimum (18.93) had been truncated away. +_FINALIZER_DROP_KEYS = frozenset( + { + "model_config", + "model_config_data", + "feature_config", + "runtime_info", + "agent_context", + "config_hash", + "artifact_hash", + "artifact_uri", + "artifact_size_bytes", + "error_message", + } +) class SessionNotFoundError(ValueError): @@ -1003,6 +1024,31 @@ def _extract_tool_payloads(captured: list[ModelMessage]) -> list[dict[str, Any]] payloads.append({"tool": part.tool_name, "result": part.content}) return payloads + @classmethod + def _compact_for_finalizer(cls, obj: object) -> object: + """Recursively strip verbose, decision-irrelevant keys from tool data (#351). + + Keeps each entry's identity + metrics while dropping bulky nested config + / runtime blobs, so a full result set fits in the finalizer's budget and + a ranking sees every entry. Pure/serialisation-only — no I/O. + + Args: + obj: Any JSON-ish value extracted from a tool return. + + Returns: + The same structure with :data:`_FINALIZER_DROP_KEYS` removed at every + dict level. + """ + if isinstance(obj, dict): + return { + k: cls._compact_for_finalizer(v) + for k, v in obj.items() + if k not in _FINALIZER_DROP_KEYS + } + if isinstance(obj, list): + return [cls._compact_for_finalizer(v) for v in obj] + return obj + async def _salvage_plaintext_answer( self, message: str, @@ -1032,11 +1078,16 @@ async def _salvage_plaintext_answer( try: from app.features.agents.agents.base import build_finalizer_agent - data = json.dumps(payloads, default=str)[:_FINALIZER_MAX_CHARS] + compact = self._compact_for_finalizer(payloads) + data = json.dumps(compact, default=str)[:_FINALIZER_MAX_CHARS] prompt = ( f"User question:\n{message}\n\n" f"Data retrieved from tools (JSON):\n{data}\n\n" - "Answer the user's question concisely from this data." + "Answer the user's question concisely from this data. If the " + "question asks for the lowest/highest of a metric (e.g. WAPE), " + "compare that metric across ALL entries that have it, ignore " + "entries where it is missing/null, and report the true " + "minimum/maximum with its value." ) finalizer = build_finalizer_agent() result = await asyncio.wait_for( diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index bd4e51b8..09413aa6 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -1229,3 +1229,47 @@ async def test_salvage_returns_none_without_tool_data(self) -> None: service = AgentService() result = await service._salvage_plaintext_answer("any question", []) assert result is None + + def test_compact_for_finalizer_strips_verbose_keys_keeps_metrics(self) -> None: + """Compaction drops bulky config/runtime blobs but keeps identity + metrics (#351). + + Regression for the finalizer reporting 99.0 as "lowest WAPE" when the + true minimum (18.93) had been truncated out of the oversized payload. + """ + raw = [ + { + "tool": "tool_list_runs", + "result": { + "runs": [ + { + "run_id": "a", + "model_type": "seasonal_naive", + "metrics": {"wape": 99.0}, + "model_config_data": {"x": "y" * 500}, + "runtime_info": {"python": "3.12"}, + "artifact_uri": "demo/seasonal-model_a.joblib", + }, + { + "run_id": "b", + "model_type": "naive", + "metrics": {"wape": 18.93}, + "feature_config": {"lots": "of stuff"}, + }, + ] + }, + } + ] + + compact = AgentService._compact_for_finalizer(raw) + runs = compact[0]["result"]["runs"] + + # Identity + metrics survive for BOTH runs (so a ranking sees 18.93). + assert runs[0]["run_id"] == "a" + assert runs[0]["metrics"] == {"wape": 99.0} + assert runs[1]["run_id"] == "b" + assert runs[1]["metrics"] == {"wape": 18.93} + # Verbose blobs are gone. + assert "model_config_data" not in runs[0] + assert "runtime_info" not in runs[0] + assert "artifact_uri" not in runs[0] + assert "feature_config" not in runs[1] From 645047e3cafe9926bef5dba8c0c6d6aa8ba0108f Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 06:29:42 +0200 Subject: [PATCH 22/30] feat(api,db): add forecast champion selector backend (#353) --- alembic/env.py | 1 + ...b667d321603c_create_model_selection_run.py | 129 ++++ app/features/model_selection/__init__.py | 9 + app/features/model_selection/explanations.py | 97 +++ app/features/model_selection/models.py | 93 +++ app/features/model_selection/ranking.py | 283 +++++++++ app/features/model_selection/routes.py | 174 ++++++ app/features/model_selection/schemas.py | 303 ++++++++++ app/features/model_selection/service.py | 568 ++++++++++++++++++ .../model_selection/tests/__init__.py | 1 + .../model_selection/tests/conftest.py | 301 ++++++++++ .../tests/test_explanations.py | 44 ++ .../model_selection/tests/test_models.py | 41 ++ .../model_selection/tests/test_ranking.py | 205 +++++++ .../model_selection/tests/test_routes.py | 180 ++++++ .../tests/test_routes_integration.py | 138 +++++ .../model_selection/tests/test_schemas.py | 81 +++ .../model_selection/tests/test_service.py | 222 +++++++ app/main.py | 2 + 19 files changed, 2872 insertions(+) create mode 100644 alembic/versions/b667d321603c_create_model_selection_run.py create mode 100644 app/features/model_selection/__init__.py create mode 100644 app/features/model_selection/explanations.py create mode 100644 app/features/model_selection/models.py create mode 100644 app/features/model_selection/ranking.py create mode 100644 app/features/model_selection/routes.py create mode 100644 app/features/model_selection/schemas.py create mode 100644 app/features/model_selection/service.py create mode 100644 app/features/model_selection/tests/__init__.py create mode 100644 app/features/model_selection/tests/conftest.py create mode 100644 app/features/model_selection/tests/test_explanations.py create mode 100644 app/features/model_selection/tests/test_models.py create mode 100644 app/features/model_selection/tests/test_ranking.py create mode 100644 app/features/model_selection/tests/test_routes.py create mode 100644 app/features/model_selection/tests/test_routes_integration.py create mode 100644 app/features/model_selection/tests/test_schemas.py create mode 100644 app/features/model_selection/tests/test_service.py diff --git a/alembic/env.py b/alembic/env.py index 4ce8f0e1..2cadd971 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -18,6 +18,7 @@ from app.features.data_platform import models as data_platform_models # noqa: F401 from app.features.explainability import models as explainability_models # noqa: F401 from app.features.jobs import models as jobs_models # noqa: F401 +from app.features.model_selection import models as model_selection_models # noqa: F401 from app.features.rag import models as rag_models # noqa: F401 from app.features.registry import models as registry_models # noqa: F401 from app.features.scenarios import models as scenarios_models # noqa: F401 diff --git a/alembic/versions/b667d321603c_create_model_selection_run.py b/alembic/versions/b667d321603c_create_model_selection_run.py new file mode 100644 index 00000000..e3dcaa2a --- /dev/null +++ b/alembic/versions/b667d321603c_create_model_selection_run.py @@ -0,0 +1,129 @@ +"""create_model_selection_run + +Revision ID: b667d321603c +Revises: c1d2e3f40512 +Create Date: 2026-06-01 05:58:51.986105 + +Creates the ``model_selection_run`` table for the Forecast Champion Selector +backend (issue #353). One row per ``POST /model-selection/run`` — an auditable +record of which candidate models competed for a (store, product) pair, over +which window/policy, and which model won. + +JSONB snapshot columns mirror the ``batch_job`` precedent +(``c1d2e3f40512_create_batch_tables``): every flexible payload (candidate +configs, policy, availability, ranking, per-candidate results incl. fold chart +data, winner metrics, forecast summary, business summary) is JSONB so the +eventual UI PRP can add keys without a schema migration. ``candidate_results`` +holds the full per-candidate detail (incl. fold actuals/predictions) so a +``GET`` rebuilds the same ``chart_data`` payload the originating ``/run`` +returned — without it the chart's fold-stability and actual-vs-predicted +overlays could not be reconstructed. +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b667d321603c" +down_revision: str | None = "c1d2e3f40512" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Apply migration.""" + op.create_table( + "model_selection_run", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("selection_id", sa.String(length=32), nullable=False), + sa.Column("store_id", sa.Integer(), nullable=False), + sa.Column("product_id", sa.Integer(), nullable=False), + sa.Column("start_date", sa.Date(), nullable=False), + sa.Column("end_date", sa.Date(), nullable=False), + sa.Column("forecast_horizon", sa.Integer(), nullable=False), + sa.Column("ranking_metric", sa.String(length=20), nullable=False), + sa.Column("status", sa.String(length=20), nullable=False), + sa.Column("candidate_models", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("policy_snapshot", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("availability_snapshot", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("ranking_result", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("candidate_results", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("chart_data", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("winner_model_type", sa.String(length=40), nullable=True), + sa.Column("winner_metrics", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("final_model_path", sa.String(length=512), nullable=True), + sa.Column("forecast_result", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("business_summary", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("error_message", sa.String(length=2000), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.CheckConstraint( + "status IN ('pending', 'running', 'completed', 'partial', 'failed')", + name="ck_model_selection_run_valid_status", + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_model_selection_run_selection_id"), + "model_selection_run", + ["selection_id"], + unique=True, + ) + op.create_index( + op.f("ix_model_selection_run_store_id"), + "model_selection_run", + ["store_id"], + unique=False, + ) + op.create_index( + op.f("ix_model_selection_run_product_id"), + "model_selection_run", + ["product_id"], + unique=False, + ) + op.create_index( + op.f("ix_model_selection_run_status"), + "model_selection_run", + ["status"], + unique=False, + ) + op.create_index( + "ix_model_selection_run_store_product_created", + "model_selection_run", + ["store_id", "product_id", "created_at"], + unique=False, + ) + op.create_index( + "ix_model_selection_run_status_created", + "model_selection_run", + ["status", "created_at"], + unique=False, + ) + + +def downgrade() -> None: + """Revert migration.""" + op.drop_index("ix_model_selection_run_status_created", table_name="model_selection_run") + op.drop_index( + "ix_model_selection_run_store_product_created", table_name="model_selection_run" + ) + op.drop_index(op.f("ix_model_selection_run_status"), table_name="model_selection_run") + op.drop_index(op.f("ix_model_selection_run_product_id"), table_name="model_selection_run") + op.drop_index(op.f("ix_model_selection_run_store_id"), table_name="model_selection_run") + op.drop_index(op.f("ix_model_selection_run_selection_id"), table_name="model_selection_run") + op.drop_table("model_selection_run") diff --git a/app/features/model_selection/__init__.py b/app/features/model_selection/__init__.py new file mode 100644 index 00000000..01931b63 --- /dev/null +++ b/app/features/model_selection/__init__.py @@ -0,0 +1,9 @@ +"""Forecast Champion Selector — backend vertical slice (issue #353). + +Validates a (store, product) pair's data availability, runs comparable +backtests for a set of candidate forecasting models, deterministically ranks +them, selects a champion with a recommendation confidence, persists an +auditable selection run, and optionally trains/predicts with the winner. + +Backend-only by design — the UI is a deliberate follow-up PRP. +""" diff --git a/app/features/model_selection/explanations.py b/app/features/model_selection/explanations.py new file mode 100644 index 00000000..907b974a --- /dev/null +++ b/app/features/model_selection/explanations.py @@ -0,0 +1,97 @@ +"""Deterministic business-explanation layer for the champion selector (#353). + +Pure functions — NO LLM, NO external call. Translates the numeric ranking + +availability into short, deterministic English a business user can read. The +output dict is persisted into ``model_selection_run.business_summary`` and +echoed on the response. +""" + +from __future__ import annotations + +from typing import Any + +from app.features.model_selection.schemas import PairAvailabilityResponse, RankingResult + + +def _metric_phrase(metrics: dict[str, float] | None) -> str: + """One-line plain-English metric summary for a ranked model.""" + if not metrics: + return "no metrics available" + return ( + f"WAPE {metrics['wape']:.1f}%, sMAPE {metrics['smape']:.1f}, " + f"MAE {metrics['mae']:.2f}, bias {metrics['bias']:.2f}" + ) + + +def explain_winner( + ranking: RankingResult, + availability: PairAvailabilityResponse | None, +) -> dict[str, Any]: + """Build the deterministic ``business_summary`` payload. + + Always returns a dict; when there is no winner the summary explains why no + model could be recommended. + """ + caveats = [ + "Backtest accuracy reflects historical fit, not a guarantee of future performance.", + "Metrics measure correlation with past demand, not causation.", + ] + + if availability is not None: + data_notes = [ + f"Observed {availability.observed_days} of " + f"{availability.expected_calendar_days} calendar days " + f"({availability.coverage_ratio:.0%} coverage).", + f"Average daily demand {availability.average_daily_demand:.2f}.", + ] + data_notes.extend(availability.warnings) + else: + data_notes = ["No availability snapshot was computed."] + + if ranking.winner is None: + return { + "headline": "No model could be recommended for this pair.", + "winner": None, + "recommendation_confidence": ranking.confidence, + "confidence_reasons": ranking.reasons, + "comparison": None, + "data_notes": data_notes, + "caveats": caveats, + } + + winner = ranking.winner + headline = f"Recommended model: {winner.model_type} ({ranking.confidence} confidence)." + + included = [e for e in ranking.entries if e.included] + runner_up = included[1] if len(included) > 1 else None + if runner_up is not None and runner_up.metrics and winner.metrics: + runner_wape = runner_up.metrics["wape"] + if runner_wape > 0: + lead = (runner_wape - winner.metrics["wape"]) / runner_wape + lead_text = f"{lead:.1%} lower WAPE than the runner-up ({runner_up.model_type})" + else: + lead_text = f"a comparable WAPE to the runner-up ({runner_up.model_type})" + comparison: dict[str, Any] = { + "runner_up_model_type": runner_up.model_type, + "runner_up_summary": _metric_phrase(runner_up.metrics), + "lead_text": lead_text, + } + else: + comparison = { + "runner_up_model_type": None, + "runner_up_summary": None, + "lead_text": "no runner-up was available for comparison", + } + + return { + "headline": headline, + "winner": { + "model_type": winner.model_type, + "summary": _metric_phrase(winner.metrics), + }, + "recommendation_confidence": ranking.confidence, + "confidence_reasons": ranking.reasons, + "comparison": comparison, + "data_notes": data_notes, + "caveats": caveats, + } diff --git a/app/features/model_selection/models.py b/app/features/model_selection/models.py new file mode 100644 index 00000000..ce7c6e20 --- /dev/null +++ b/app/features/model_selection/models.py @@ -0,0 +1,93 @@ +"""ORM models for the Forecast Champion Selector slice (issue #353). + +One table — ``model_selection_run`` — records one ``POST /model-selection/run`` +invocation as an auditable artifact. Mirrors ``app/features/batch/models.py`` +for shape: ``TimestampMixin`` + ``Base``, a string status column with an +allow-list ``CheckConstraint`` in ``__table_args__``, and JSONB columns for the +flexible audit snapshots (candidate configs, policy, availability, ranking, +per-candidate results, chart data, winner metrics, forecast summary, business +summary). +""" + +from __future__ import annotations + +import datetime as _dt +from enum import Enum +from typing import Any + +from sqlalchemy import CheckConstraint, Date, DateTime, Index, Integer, String +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class ModelSelectionStatus(str, Enum): + """Lifecycle states of a selection run. + + Transitions: + - PENDING -> RUNNING -> {COMPLETED, PARTIAL, FAILED} + - PARTIAL fires when >=1 candidate succeeded AND >=1 candidate failed. + - FAILED fires when availability is unusable (fail-fast) OR every + candidate's backtest errored (no valid winner). + """ + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + PARTIAL = "partial" + FAILED = "failed" + + +class ModelSelectionRun(TimestampMixin, Base): + """A single champion-selection run over one (store, product) pair. + + ``candidate_results`` carries the full per-candidate detail (incl. fold + actuals/predictions) so a ``GET`` rebuilds the same ``chart_data`` payload + the originating ``/run`` returned. ``chart_data`` caches the computed + chart-ready payload so the read path needs no recomputation. + """ + + __tablename__ = "model_selection_run" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + selection_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + store_id: Mapped[int] = mapped_column(Integer, index=True) + product_id: Mapped[int] = mapped_column(Integer, index=True) + start_date: Mapped[_dt.date] = mapped_column(Date) + end_date: Mapped[_dt.date] = mapped_column(Date) + forecast_horizon: Mapped[int] = mapped_column(Integer) + ranking_metric: Mapped[str] = mapped_column(String(20)) + status: Mapped[str] = mapped_column( + String(20), default=ModelSelectionStatus.PENDING.value, index=True + ) + candidate_models: Mapped[list[dict[str, Any]]] = mapped_column(JSONB, nullable=False) + policy_snapshot: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) + availability_snapshot: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + ranking_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + candidate_results: Mapped[list[dict[str, Any]] | None] = mapped_column(JSONB, nullable=True) + chart_data: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + winner_model_type: Mapped[str | None] = mapped_column(String(40), nullable=True) + winner_metrics: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + final_model_path: Mapped[str | None] = mapped_column(String(512), nullable=True) + forecast_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + business_summary: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + completed_at: Mapped[_dt.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + + __table_args__ = ( + CheckConstraint( + "status IN ('pending', 'running', 'completed', 'partial', 'failed')", + name="ck_model_selection_run_valid_status", + ), + Index( + "ix_model_selection_run_store_product_created", + "store_id", + "product_id", + "created_at", + ), + Index("ix_model_selection_run_status_created", "status", "created_at"), + ) diff --git a/app/features/model_selection/ranking.py b/app/features/model_selection/ranking.py new file mode 100644 index 00000000..ecca7587 --- /dev/null +++ b/app/features/model_selection/ranking.py @@ -0,0 +1,283 @@ +"""Pure ranking + confidence logic for the champion selector (issue #353). + +No DB, no I/O — every function here is deterministic and unit-tested directly. +The ranking key and confidence policy implement the PRP's LOCKED decision #6 +(deterministic tie-break chain) and the relative-improvement confidence model. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass + +from app.features.model_selection.schemas import ( + CandidateResult, + ChartData, + ConfidenceLevel, + FoldChart, + ModelRankEntry, + RankingPolicy, + RankingResult, +) + +# Below this relative WAPE lead over second place, the winner is a near-tie and +# confidence is capped at LOW (the lead is not meaningful). +NEAR_TIE_EPSILON = 0.02 + +# The metric keys that MUST be finite for a candidate to be rankable. rmse is +# carried for the contract but not required (it never enters the sort key). +_REQUIRED_FINITE = ("wape", "smape", "mae", "bias") + + +@dataclass(frozen=True) +class NormalizedMetrics: + """The five backtest metrics plus the derived sample size, all floats.""" + + wape: float + smape: float + mae: float + rmse: float + bias: float + sample_size: int + + def as_dict(self) -> dict[str, float]: + """Stable 6-key dict embedded in ``ModelRankEntry.metrics``.""" + return { + "wape": self.wape, + "smape": self.smape, + "mae": self.mae, + "rmse": self.rmse, + "bias": self.bias, + "sample_size": float(self.sample_size), + } + + +def _is_finite(value: float) -> bool: + return not (math.isnan(value) or math.isinf(value)) + + +def normalize_metrics( + aggregated_metrics: dict[str, float] | None, + sample_size: int, +) -> NormalizedMetrics | None: + """Coerce a raw 5-key backtest metric dict into ``NormalizedMetrics``. + + Returns ``None`` (candidate is unrankable) when the dict is missing/empty or + when any of the sort-key metrics (wape, smape, mae, bias) is NaN/inf — e.g. + a WAPE of ``inf`` from an all-zero actual window. + """ + if not aggregated_metrics: + return None + + def _g(key: str) -> float: + raw = aggregated_metrics.get(key) + return float(raw) if raw is not None else math.nan + + metrics = NormalizedMetrics( + wape=_g("wape"), + smape=_g("smape"), + mae=_g("mae"), + rmse=_g("rmse"), + bias=_g("bias"), + sample_size=sample_size, + ) + if not all(_is_finite(getattr(metrics, name)) for name in _REQUIRED_FINITE): + return None + return metrics + + +def _primary_value(metrics: NormalizedMetrics, ranking_metric: str) -> float: + """Value of the primary ranking metric (``bias`` ranks by magnitude).""" + if ranking_metric == "bias": + return abs(metrics.bias) + return float(getattr(metrics, ranking_metric)) + + +def _sort_key( + metrics: NormalizedMetrics, model_type: str, ranking_metric: str +) -> tuple[float, float, float, float, str]: + """Deterministic sort key (LOCKED #6). + + Primary = the chosen ranking metric, then the fixed tie-break chain + ``wape -> smape -> abs(bias) -> mae -> model_type`` with the primary metric + removed from the chain so it is never duplicated. + """ + chain: list[tuple[str, float]] = [ + ("wape", metrics.wape), + ("smape", metrics.smape), + ("bias", abs(metrics.bias)), + ("mae", metrics.mae), + ] + key: list[float] = [_primary_value(metrics, ranking_metric)] + key.extend(value for name, value in chain if name != ranking_metric) + return (key[0], key[1], key[2], key[3], model_type) + + +def rank_candidates( + results: list[CandidateResult], + policy: RankingPolicy, + ranking_metric: str = "wape", + availability_status: str | None = None, +) -> RankingResult: + """Rank completed candidates and pick a deterministic winner. + + Failed/filtered candidates are never hidden — they appear as excluded + ``ModelRankEntry`` rows (``rank=None``) after the ranked winners. + """ + valid: list[tuple[CandidateResult, NormalizedMetrics]] = [] + excluded: list[ModelRankEntry] = [] + + for result in results: + if result.failed: + excluded.append(_excluded_entry(result, result.error or "candidate backtest failed")) + continue + metrics = normalize_metrics(result.aggregated_metrics, result.sample_size) + if metrics is None: + excluded.append(_excluded_entry(result, "missing or non-finite primary metric")) + continue + if metrics.sample_size < policy.minimum_sample_size: + excluded.append( + _excluded_entry( + result, + f"sample_size {metrics.sample_size} below minimum {policy.minimum_sample_size}", + ) + ) + continue + valid.append((result, metrics)) + + if not valid: + return RankingResult( + winner=None, + entries=excluded, + confidence="low", + reasons=["No candidate produced a valid backtest."], + ) + + ordered = sorted(valid, key=lambda pair: _sort_key(pair[1], pair[0].model_type, ranking_metric)) + ranked_entries = [ + ModelRankEntry( + rank=index + 1, + model_type=result.model_type, + params=result.params, + included=True, + metrics=metrics.as_dict(), + ) + for index, (result, metrics) in enumerate(ordered) + ] + + confidence, reasons = _confidence(ordered, policy, availability_status) + + return RankingResult( + winner=ranked_entries[0], + entries=ranked_entries + excluded, + confidence=confidence, + reasons=reasons, + ) + + +def _excluded_entry(result: CandidateResult, reason: str) -> ModelRankEntry: + return ModelRankEntry( + rank=None, + model_type=result.model_type, + params=result.params, + included=False, + exclusion_reason=reason, + metrics=None, + ) + + +def _confidence( + ordered: list[tuple[CandidateResult, NormalizedMetrics]], + policy: RankingPolicy, + availability_status: str | None, +) -> tuple[ConfidenceLevel, list[str]]: + """Derive the recommendation confidence from the ranked candidates. + + Order of checks: a single valid candidate, limited availability, or an + over-threshold winner bias all cap confidence at LOW; a clear WAPE lead with + acceptable bias is HIGH; everything in between is MEDIUM. + """ + reasons: list[str] = [] + winner_metrics = ordered[0][1] + + if len(ordered) == 1: + reasons.append("Only one candidate produced a valid backtest.") + return "low", reasons + + second_metrics = ordered[1][1] + if second_metrics.wape > 0: + rel_improvement = (second_metrics.wape - winner_metrics.wape) / second_metrics.wape + else: + rel_improvement = 0.0 + + bias_ok = abs(winner_metrics.bias) <= policy.max_acceptable_abs_bias + + if availability_status == "limited": + reasons.append("Data availability is limited; treat the recommendation cautiously.") + return "low", reasons + if not bias_ok: + reasons.append( + f"Winner bias {winner_metrics.bias:.3f} exceeds the acceptable bound " + f"{policy.max_acceptable_abs_bias:.3f}." + ) + return "low", reasons + if rel_improvement < NEAR_TIE_EPSILON: + reasons.append(f"Winner WAPE lead over second place is {rel_improvement:.1%} — a near tie.") + return "low", reasons + if rel_improvement >= policy.high_confidence_rel_improvement: + reasons.append( + f"Winner WAPE beats second place by {rel_improvement:.1%} " + f"(>= {policy.high_confidence_rel_improvement:.0%})." + ) + return "high", reasons + + reasons.append( + f"Winner leads second place by {rel_improvement:.1%}, below the " + f"{policy.high_confidence_rel_improvement:.0%} high-confidence threshold." + ) + return "medium", reasons + + +def _fold_wape(actuals: list[float], predictions: list[float]) -> float: + """WAPE (%) for one fold; 0.0 when the actual window sums to zero.""" + denominator = sum(abs(a) for a in actuals) + if denominator == 0: + return 0.0 + numerator = sum(abs(a - p) for a, p in zip(actuals, predictions, strict=False)) + return numerator / denominator * 100.0 + + +def build_chart_data(results: list[CandidateResult], ranking: RankingResult) -> ChartData: + """Assemble the chart-ready comparison payload from candidate results. + + Keyed by ``model_type``; when a candidate list repeats a model_type the last + occurrence wins (acceptable for v1 — duplicate model_types are uncommon). + """ + by_type: dict[str, CandidateResult] = {r.model_type: r for r in results} + wape_by_model: dict[str, float] = {} + bias_by_model: dict[str, float] = {} + fold_stability: dict[str, list[float]] = {} + + for entry in ranking.entries: + if not entry.included or entry.metrics is None: + continue + wape_by_model[entry.model_type] = entry.metrics["wape"] + bias_by_model[entry.model_type] = entry.metrics["bias"] + result = by_type.get(entry.model_type) + if result is not None: + fold_stability[entry.model_type] = [ + _fold_wape(fold.actuals, fold.predictions) for fold in result.folds + ] + + winner_folds: list[FoldChart] = [] + if ranking.winner is not None: + winner_result = by_type.get(ranking.winner.model_type) + if winner_result is not None: + winner_folds = winner_result.folds + + return ChartData( + wape_by_model=wape_by_model, + bias_by_model=bias_by_model, + fold_stability=fold_stability, + winner_actual_vs_predicted=winner_folds, + ) diff --git a/app/features/model_selection/routes.py b/app/features/model_selection/routes.py new file mode 100644 index 00000000..f989aac0 --- /dev/null +++ b/app/features/model_selection/routes.py @@ -0,0 +1,174 @@ +"""FastAPI routes for the Forecast Champion Selector slice (issue #353). + +Endpoints (all under ``/model-selection``): +- GET /availability — pair data-availability assessment +- POST /run — run candidate comparison + ranking (200) +- GET /{selection_id} — fetch a persisted selection run +- GET /{selection_id}/ranking — fetch just the ranking block +- POST /{selection_id}/train-winner — train the winning model +- POST /{selection_id}/predict — forecast with the trained winner + +Error mapping mirrors ``app/features/backtesting/routes.py``: ``ValueError`` → +``BadRequestError`` (RFC 7807 400), ``SQLAlchemyError`` → ``DatabaseError`` (500). +``NotFoundError`` / ``BadRequestError`` raised inside the service are +``ForecastLabError`` subclasses and bubble straight to the global handler. +""" + +from __future__ import annotations + +from fastapi import APIRouter, Depends, Query, status +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.database import get_db +from app.core.exceptions import BadRequestError, DatabaseError +from app.core.logging import get_logger +from app.features.model_selection.schemas import ( + ModelSelectionRunRequest, + ModelSelectionRunResponse, + PairAvailabilityResponse, + PredictWinnerResponse, + RankingResult, + TrainWinnerResponse, +) +from app.features.model_selection.service import ModelSelectionService + +logger = get_logger(__name__) + +router = APIRouter(prefix="/model-selection", tags=["model-selection"]) + + +@router.get( + "/availability", + response_model=PairAvailabilityResponse, + status_code=status.HTTP_200_OK, + summary="Assess data availability for a (store, product) pair", +) +async def get_availability( + store_id: int = Query(..., ge=1, description="Store ID"), + product_id: int = Query(..., ge=1, description="Product ID"), + forecast_horizon: int = Query(14, ge=1, le=90, description="Forecast horizon in days"), + db: AsyncSession = Depends(get_db), +) -> PairAvailabilityResponse: + """Return coverage, demand, promotion, and a recommended split config.""" + service = ModelSelectionService() + try: + return await service.get_availability(db, store_id, product_id, forecast_horizon) + except ValueError as exc: + raise BadRequestError(message=str(exc)) from exc + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to assess availability", details={"error": str(exc)} + ) from exc + + +@router.post( + "/run", + response_model=ModelSelectionRunResponse, + status_code=status.HTTP_200_OK, + summary="Run candidate model comparison and select a champion", +) +async def run_selection( + request: ModelSelectionRunRequest, + db: AsyncSession = Depends(get_db), +) -> ModelSelectionRunResponse: + """Validate availability, backtest candidates, rank, and persist the run.""" + logger.info( + "model_selection.request_received", + store_id=request.store_id, + product_id=request.product_id, + n_candidates=len(request.candidate_models), + ranking_metric=request.ranking_metric, + ) + service = ModelSelectionService() + try: + return await service.run_selection(db, request) + except ValueError as exc: + raise BadRequestError(message=str(exc)) from exc + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to run model selection", details={"error": str(exc)} + ) from exc + + +@router.get( + "/{selection_id}", + response_model=ModelSelectionRunResponse, + status_code=status.HTTP_200_OK, + summary="Fetch a persisted selection run", +) +async def get_selection( + selection_id: str, + db: AsyncSession = Depends(get_db), +) -> ModelSelectionRunResponse: + """Return the full persisted selection run by id (404 when missing).""" + service = ModelSelectionService() + try: + return await service.get_selection(db, selection_id) + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to fetch selection run", details={"error": str(exc)} + ) from exc + + +@router.get( + "/{selection_id}/ranking", + response_model=RankingResult, + status_code=status.HTTP_200_OK, + summary="Fetch the ranking block for a selection run", +) +async def get_ranking( + selection_id: str, + db: AsyncSession = Depends(get_db), +) -> RankingResult: + """Return just the ranking (winner, entries, confidence, reasons).""" + service = ModelSelectionService() + try: + return await service.get_ranking(db, selection_id) + except SQLAlchemyError as exc: + raise DatabaseError(message="Failed to fetch ranking", details={"error": str(exc)}) from exc + + +@router.post( + "/{selection_id}/train-winner", + response_model=TrainWinnerResponse, + status_code=status.HTTP_200_OK, + summary="Train the winning model for a selection run", +) +async def train_winner( + selection_id: str, + db: AsyncSession = Depends(get_db), +) -> TrainWinnerResponse: + """Train the champion and store its model bundle path.""" + service = ModelSelectionService() + try: + return await service.train_winner(db, selection_id) + except ValueError as exc: + raise BadRequestError(message=str(exc)) from exc + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to train winning model", details={"error": str(exc)} + ) from exc + + +@router.post( + "/{selection_id}/predict", + response_model=PredictWinnerResponse, + status_code=status.HTTP_200_OK, + summary="Forecast with the trained winning model", +) +async def predict_winner( + selection_id: str, + db: AsyncSession = Depends(get_db), +) -> PredictWinnerResponse: + """Generate a horizon forecast from the trained champion bundle.""" + service = ModelSelectionService() + try: + forecast = await service.predict_winner(db, selection_id) + return PredictWinnerResponse(selection_id=selection_id, forecast=forecast) + except ValueError as exc: + raise BadRequestError(message=str(exc)) from exc + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to forecast with winning model", details={"error": str(exc)} + ) from exc diff --git a/app/features/model_selection/schemas.py b/app/features/model_selection/schemas.py new file mode 100644 index 00000000..9fc10d37 --- /dev/null +++ b/app/features/model_selection/schemas.py @@ -0,0 +1,303 @@ +"""Pydantic v2 schemas for the Forecast Champion Selector slice (issue #353). + +Request bodies use ``ConfigDict(strict=True)`` per +``docs/_base/SECURITY.md`` § "Pydantic v2 strict mode on FastAPI request +bodies"; the only JSON-non-native fields (``SelectionWindow.start_date`` / +``end_date``) carry ``Field(strict=False, ...)`` so the strict-mode policy +linter (``app/core/tests/test_strict_mode_policy.py``) stays green and ISO-date +JSON strings are accepted on the ``validate_python`` path. + +Enum-like string fields use ``Literal[...]`` (NOT a ``str``-``Enum``) because +strict mode refuses to coerce a JSON string into a str-enum instance — the same +reason ``app/features/batch/schemas.py`` uses literals. + +Response/intermediate models are plain ``BaseModel`` (outputs need no strict +coercion). They form the stable backend contract the eventual UI consumes. + +``SplitConfig`` is reused directly from the backtesting slice (a schema type +with no import cycle back to this slice) to avoid configuration drift. +""" + +from __future__ import annotations + +from datetime import date, datetime +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from app.features.backtesting.schemas import SplitConfig + +# Valid forecasting model_type values — the full ``ModelConfig`` union +# (``app/features/forecasting/schemas.py``). ``lightgbm``/``xgboost`` are opt-in +# extras and may degrade to a failed candidate at runtime when the extra is +# absent (handled in the service, not rejected here). +ModelType = Literal[ + "naive", + "seasonal_naive", + "moving_average", + "weighted_moving_average", + "seasonal_average", + "trend_regression_baseline", + "random_forest", + "lightgbm", + "xgboost", + "regression", + "prophet_like", +] + +RankingMetric = Literal["wape", "smape", "mae", "bias"] +SelectionStatusLiteral = Literal["pending", "running", "completed", "partial", "failed"] +ConfidenceLevel = Literal["high", "medium", "low"] +AvailabilityStatus = Literal["ready", "limited", "unusable"] + + +# ============================================================================= +# Request models (strict mode) +# ============================================================================= + + +class SelectionWindow(BaseModel): + """Inclusive date window the candidate backtests run over.""" + + model_config = ConfigDict(strict=True) + + start_date: date = Field(strict=False, description="Window start (inclusive), YYYY-MM-DD") + end_date: date = Field(strict=False, description="Window end (inclusive), YYYY-MM-DD") + + @model_validator(mode="after") + def _check_order(self) -> SelectionWindow: + """Reject an inverted/zero-length window (surfaced as RFC 7807 422).""" + if self.end_date <= self.start_date: + raise ValueError("end_date must be after start_date") + return self + + +class CandidateModelConfig(BaseModel): + """One candidate forecasting model to evaluate. + + ``params`` are the FLAT model-specific parameters (e.g. + ``{"season_length": 7}``). They are flattened into the forecasting + ``ModelConfig`` union at the service boundary; unknown params surface as a + failed candidate with a reason rather than a request rejection. + """ + + model_config = ConfigDict(strict=True) + + model_type: ModelType + params: dict[str, Any] = Field(default_factory=dict) + + +class RankingPolicy(BaseModel): + """Tunable thresholds for ranking filters + confidence. + + ``max_acceptable_abs_bias`` is an ABSOLUTE bias bound in demand units and is + therefore series-scale dependent; it defaults high enough to be effectively + disabled so confidence is driven primarily by the relative WAPE lead, the + valid-candidate count, and the sample size. Set a series-appropriate value + to enable the bias guard. + """ + + model_config = ConfigDict(strict=True) + + minimum_sample_size: int = Field( + default=0, ge=0, description="Drop candidates whose backtest sample is below this" + ) + high_confidence_rel_improvement: float = Field( + default=0.10, + ge=0.0, + le=1.0, + description="Relative WAPE lead over 2nd place required for HIGH confidence", + ) + max_acceptable_abs_bias: float = Field( + default=1_000_000_000.0, + ge=0.0, + description="Absolute winner-bias bound (demand units); high default = guard disabled", + ) + + +class ModelSelectionRunRequest(BaseModel): + """``POST /model-selection/run`` request body.""" + + model_config = ConfigDict(strict=True) + + store_id: int = Field(..., ge=1, description="Store ID") + product_id: int = Field(..., ge=1, description="Product ID") + selection_window: SelectionWindow + forecast_horizon: int = Field(..., ge=1, le=90, description="Forecast horizon in days") + ranking_metric: RankingMetric = "wape" + split_config: SplitConfig = Field(default_factory=SplitConfig) + candidate_models: list[CandidateModelConfig] = Field(min_length=1, max_length=10) + feature_frame_version: int = Field(default=1, ge=1, le=2) + feature_groups: list[str] | None = Field(default=None) + ranking_policy: RankingPolicy = Field(default_factory=RankingPolicy) + auto_train_winner: bool = Field(default=False) + auto_predict: bool = Field(default=False) + + @model_validator(mode="after") + def _check_consistency(self) -> ModelSelectionRunRequest: + """Enforce LOCKED decisions #5 and #7 plus V1/feature-group consistency.""" + if self.split_config.horizon != self.forecast_horizon: + raise ValueError( + f"split_config.horizon ({self.split_config.horizon}) must equal " + f"forecast_horizon ({self.forecast_horizon})" + ) + if self.auto_predict and not self.auto_train_winner: + raise ValueError("auto_predict requires auto_train_winner=True") + if self.feature_frame_version == 1 and self.feature_groups is not None: + raise ValueError( + "feature_groups is only valid when feature_frame_version=2; " + "omit it for V1 selection." + ) + return self + + +class AvailabilityQuery(BaseModel): + """Validated query params for ``GET /model-selection/availability``.""" + + model_config = ConfigDict(strict=True) + + store_id: int = Field(..., ge=1) + product_id: int = Field(..., ge=1) + forecast_horizon: int = Field(default=14, ge=1, le=90) + + +# ============================================================================= +# Intermediate models (service-internal; embedded in JSONB snapshots) +# ============================================================================= + + +class FoldChart(BaseModel): + """Per-fold chart points for one candidate.""" + + fold_index: int + dates: list[date] + actuals: list[float] + predictions: list[float] + + +class CandidateResult(BaseModel): + """One candidate's full backtest outcome (success or failure). + + ``params`` are carried through unchanged so the winning model can be rebuilt + from the persisted record without re-deriving them. + """ + + model_type: str + params: dict[str, Any] + failed: bool + error: str | None = None + aggregated_metrics: dict[str, float] | None = None + sample_size: int = 0 + config_hash: str | None = None + folds: list[FoldChart] = Field(default_factory=list) + + +class ModelRankEntry(BaseModel): + """One row in the ranking table — a ranked winner/runner-up or an excluded + (failed/filtered) candidate. Excluded entries keep ``rank=None``.""" + + rank: int | None + model_type: str + params: dict[str, Any] + included: bool + exclusion_reason: str | None = None + metrics: dict[str, float] | None = None + + +class RankingResult(BaseModel): + """Deterministic ranking outcome — persisted into ``ranking_result``.""" + + winner: ModelRankEntry | None + entries: list[ModelRankEntry] + confidence: ConfidenceLevel + reasons: list[str] + + +class WinnerSummary(BaseModel): + """The champion — flattened for the response top level.""" + + model_type: str + params: dict[str, Any] + metrics: dict[str, float] + rank: int + + +class ChartData(BaseModel): + """Chart-ready comparison payload (a Success-Criteria deliverable).""" + + wape_by_model: dict[str, float] + bias_by_model: dict[str, float] + fold_stability: dict[str, list[float]] + winner_actual_vs_predicted: list[FoldChart] + + +# ============================================================================= +# Response models +# ============================================================================= + + +class PairAvailabilityResponse(BaseModel): + """``GET /model-selection/availability`` response.""" + + store_id: int + product_id: int + first_sales_date: date | None + last_sales_date: date | None + observed_days: int + expected_calendar_days: int + coverage_ratio: float + missing_days: int + zero_sale_days: int + promotion_days: int | None + average_daily_demand: float + status: AvailabilityStatus + recommended_split_config: SplitConfig + warnings: list[str] = Field(default_factory=list) + + +class ForecastSummary(BaseModel): + """Forecast output rolled up for the response.""" + + points: list[dict[str, Any]] + total_demand: float + average_demand: float + horizon: int + + +class ModelSelectionRunResponse(BaseModel): + """``POST /model-selection/run`` and ``GET /model-selection/{id}`` contract.""" + + selection_id: str + store_id: int + product_id: int + status: SelectionStatusLiteral + selection_window: SelectionWindow + forecast_horizon: int + ranking_metric: str + availability: PairAvailabilityResponse | None + ranking: list[ModelRankEntry] + winner: WinnerSummary | None + recommendation_confidence: ConfidenceLevel | None + confidence_reasons: list[str] + chart_data: ChartData | None + final_model: dict[str, Any] | None + forecast: ForecastSummary | None + business_summary: dict[str, Any] | None + error_message: str | None + created_at: datetime + completed_at: datetime | None + + +class TrainWinnerResponse(BaseModel): + """``POST /model-selection/{id}/train-winner`` response.""" + + selection_id: str + model_type: str + model_path: str + + +class PredictWinnerResponse(BaseModel): + """``POST /model-selection/{id}/predict`` response.""" + + selection_id: str + forecast: ForecastSummary diff --git a/app/features/model_selection/service.py b/app/features/model_selection/service.py new file mode 100644 index 00000000..ff7111e8 --- /dev/null +++ b/app/features/model_selection/service.py @@ -0,0 +1,568 @@ +"""Service layer for the Forecast Champion Selector slice (issue #353). + +Orchestrates pair-availability → candidate backtests → deterministic ranking → +optional winner train/predict, persisting an auditable ``model_selection_run``. + +Cross-slice coupling rules (mirror ``OpsService`` + the forecasting/Batch +precedent): +- Read the data-platform ORM **models** at module scope (the sanctioned + read-only ORM surface). +- Import sibling feature **services** (``BacktestingService`` / + ``ForecastingService``) and the ``ModelConfig`` ``TypeAdapter`` LAZILY inside + the methods that use them — avoids closing an alembic cold-boot import cycle. +- Reuse the backtesting ``SplitConfig`` schema directly (no cycle). +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from sqlalchemy import and_, func, or_, select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.exceptions import BadRequestError, NotFoundError +from app.core.logging import get_logger +from app.features.backtesting.schemas import SplitConfig +from app.features.data_platform.models import Product, Promotion, SalesDaily, Store +from app.features.model_selection.explanations import explain_winner +from app.features.model_selection.models import ModelSelectionRun, ModelSelectionStatus +from app.features.model_selection.ranking import build_chart_data, rank_candidates +from app.features.model_selection.schemas import ( + AvailabilityStatus, + CandidateModelConfig, + CandidateResult, + ChartData, + FoldChart, + ForecastSummary, + ModelSelectionRunRequest, + ModelSelectionRunResponse, + PairAvailabilityResponse, + RankingResult, + SelectionWindow, + TrainWinnerResponse, + WinnerSummary, +) + +if TYPE_CHECKING: + from app.features.backtesting.schemas import BacktestResponse + from app.features.forecasting.schemas import PredictResponse + +logger = get_logger(__name__) + +# Availability policy constants (module-level; not operator-configurable in v1). +MIN_COVERAGE_RATIO = 0.8 +DEFAULT_MIN_TRAIN_SIZE = 30 +MAX_RECOMMENDED_SPLITS = 5 + +_TERMINAL_WITH_WINNER = frozenset( + {ModelSelectionStatus.COMPLETED.value, ModelSelectionStatus.PARTIAL.value} +) + + +class ModelSelectionService: + """Stateless orchestrator — a fresh ``db`` session per method.""" + + # ------------------------------------------------------------------------- + # Availability + # ------------------------------------------------------------------------- + + async def get_availability( + self, + db: AsyncSession, + store_id: int, + product_id: int, + forecast_horizon: int, + split_config: SplitConfig | None = None, + ) -> PairAvailabilityResponse: + """Assess whether a (store, product) pair has enough history to model.""" + store = await db.get(Store, store_id) + if store is None: + raise NotFoundError(message=f"Store {store_id} not found") + product = await db.get(Product, product_id) + if product is None: + raise NotFoundError(message=f"Product {product_id} not found") + + n_splits = split_config.n_splits if split_config else MAX_RECOMMENDED_SPLITS + min_train = split_config.min_train_size if split_config else DEFAULT_MIN_TRAIN_SIZE + + agg = ( + await db.execute( + select( + func.min(SalesDaily.date), + func.max(SalesDaily.date), + func.count(func.distinct(SalesDaily.date)), + func.avg(SalesDaily.quantity), + func.count().filter(SalesDaily.quantity == 0), + ).where( + SalesDaily.store_id == store_id, + SalesDaily.product_id == product_id, + ) + ) + ).one() + first_date, last_date, observed_raw, avg_qty, zero_raw = agg + observed_days = int(observed_raw or 0) + zero_sale_days = int(zero_raw or 0) + average_daily_demand = float(avg_qty) if avg_qty is not None else 0.0 + + warnings: list[str] = [] + + if first_date is None or last_date is None or observed_days == 0: + expected_calendar_days = 0 + coverage_ratio = 0.0 + missing_days = 0 + promotion_days: int | None = 0 + else: + expected_calendar_days = (last_date - first_date).days + 1 + coverage_ratio = ( + observed_days / expected_calendar_days if expected_calendar_days > 0 else 0.0 + ) + missing_days = max(0, expected_calendar_days - observed_days) + promotion_days = await self._count_promotion_days(db, store_id, product_id, warnings) + + ready_threshold = min_train + forecast_horizon * n_splits + limited_threshold = min_train + forecast_horizon + status: AvailabilityStatus + if observed_days >= ready_threshold and coverage_ratio >= MIN_COVERAGE_RATIO: + status = "ready" + elif observed_days >= limited_threshold: + status = "limited" + else: + status = "unusable" + + if coverage_ratio and coverage_ratio < MIN_COVERAGE_RATIO and status != "unusable": + warnings.append( + f"Coverage {coverage_ratio:.0%} is below the {MIN_COVERAGE_RATIO:.0%} " + "ready threshold." + ) + + feasible_splits = (observed_days - min_train) // max(forecast_horizon, 1) + recommended_splits = min(20, max(2, min(MAX_RECOMMENDED_SPLITS, feasible_splits))) + recommended_split_config = SplitConfig( + strategy="expanding", + n_splits=recommended_splits, + min_train_size=min_train, + gap=0, + horizon=forecast_horizon, + ) + + return PairAvailabilityResponse( + store_id=store_id, + product_id=product_id, + first_sales_date=first_date, + last_sales_date=last_date, + observed_days=observed_days, + expected_calendar_days=expected_calendar_days, + coverage_ratio=coverage_ratio, + missing_days=missing_days, + zero_sale_days=zero_sale_days, + promotion_days=promotion_days, + average_daily_demand=average_daily_demand, + status=status, + recommended_split_config=recommended_split_config, + warnings=warnings, + ) + + async def _count_promotion_days( + self, + db: AsyncSession, + store_id: int, + product_id: int, + warnings: list[str], + ) -> int | None: + """Count distinct sales dates inside any promotion for the pair. + + Includes chain-wide promos (``promotion.store_id IS NULL``). Returns + ``None`` + a warning on any error (an acceptable fallback per the + Success Criteria) — never sums ``(end-start)`` which would double-count + overlapping ranges. + """ + try: + count = await db.scalar( + select(func.count(func.distinct(SalesDaily.date))) + .select_from(SalesDaily) + .join( + Promotion, + and_( + Promotion.product_id == SalesDaily.product_id, + or_( + Promotion.store_id == SalesDaily.store_id, + Promotion.store_id.is_(None), + ), + SalesDaily.date >= Promotion.start_date, + SalesDaily.date <= Promotion.end_date, + ), + ) + .where( + SalesDaily.store_id == store_id, + SalesDaily.product_id == product_id, + ) + ) + return int(count or 0) + except Exception as exc: # promotion_days is best-effort; degrade gracefully + warnings.append(f"promotion_days could not be derived: {exc}") + return None + + # ------------------------------------------------------------------------- + # Orchestration + # ------------------------------------------------------------------------- + + async def run_selection( + self, db: AsyncSession, request: ModelSelectionRunRequest + ) -> ModelSelectionRunResponse: + """Run the full champion-selection workflow and persist the audit row.""" + from pydantic import TypeAdapter # lazy + + from app.features.backtesting.schemas import BacktestConfig # lazy + from app.features.backtesting.service import BacktestingService # lazy + from app.features.forecasting.schemas import ModelConfig # lazy + + adapter: TypeAdapter[object] = TypeAdapter(ModelConfig) + + row = ModelSelectionRun( + selection_id=uuid.uuid4().hex, + status=ModelSelectionStatus.RUNNING.value, + store_id=request.store_id, + product_id=request.product_id, + start_date=request.selection_window.start_date, + end_date=request.selection_window.end_date, + forecast_horizon=request.forecast_horizon, + ranking_metric=request.ranking_metric, + candidate_models=[c.model_dump() for c in request.candidate_models], + policy_snapshot=request.ranking_policy.model_dump(mode="json"), + ) + db.add(row) + await db.flush() + logger.info( + "model_selection.run_received", + selection_id=row.selection_id, + store_id=request.store_id, + product_id=request.product_id, + n_candidates=len(request.candidate_models), + ) + + availability = await self.get_availability( + db, + request.store_id, + request.product_id, + request.forecast_horizon, + request.split_config, + ) + row.availability_snapshot = availability.model_dump(mode="json") + logger.info( + "model_selection.availability_checked", + selection_id=row.selection_id, + status=availability.status, + observed_days=availability.observed_days, + ) + + if availability.status == "unusable": # LOCKED #2 — fail fast (400) + message = "Insufficient data for model selection (availability unusable)." + row.status = ModelSelectionStatus.FAILED.value + row.error_message = message + await db.flush() + logger.warning( + "model_selection.run_failed", + selection_id=row.selection_id, + reason="unusable_availability", + ) + raise BadRequestError(message=message) + + results: list[CandidateResult] = [] + backtesting_service = BacktestingService() + for candidate in request.candidate_models: + try: + cfg = adapter.validate_python( + {"model_type": candidate.model_type, **candidate.params} + ) + backtest = await backtesting_service.run_backtest( + db, + request.store_id, + request.product_id, + request.selection_window.start_date, + request.selection_window.end_date, + BacktestConfig( + split_config=request.split_config, + model_config_main=cfg, # type: ignore[arg-type] + include_baselines=False, + store_fold_details=True, + ), + ) + results.append(self._shape_candidate(candidate, backtest)) + logger.info( + "model_selection.candidate_completed", + selection_id=row.selection_id, + model_type=candidate.model_type, + ) + except Exception as exc: # never hide a failed candidate + results.append(self._shape_failed_candidate(candidate, exc)) + logger.warning( + "model_selection.candidate_failed", + selection_id=row.selection_id, + model_type=candidate.model_type, + error=str(exc), + ) + + row.candidate_results = [r.model_dump(mode="json") for r in results] + ranking = rank_candidates( + results, request.ranking_policy, request.ranking_metric, availability.status + ) + row.ranking_result = ranking.model_dump(mode="json") + + if ranking.winner is None: # LOCKED #3 — persist failed, return 200 + row.status = ModelSelectionStatus.FAILED.value + row.error_message = "No candidate produced a valid backtest." + row.business_summary = explain_winner(ranking, availability) + row.completed_at = datetime.now(UTC) + await db.flush() + await db.refresh(row) + logger.warning( + "model_selection.run_failed", + selection_id=row.selection_id, + reason="no_valid_winner", + ) + return self._response(row, ranking) + + winner_cfg = adapter.validate_python( + {"model_type": ranking.winner.model_type, **ranking.winner.params} + ) + + if request.auto_train_winner: + from app.features.forecasting.service import ForecastingService # lazy + + train = await ForecastingService().train_model( + db, + request.store_id, + request.product_id, + request.selection_window.start_date, + request.selection_window.end_date, + winner_cfg, # type: ignore[arg-type] + feature_frame_version=request.feature_frame_version, + feature_groups=request.feature_groups, + ) + row.final_model_path = train.model_path + + forecast_warning: str | None = None + if request.auto_predict and row.final_model_path: + from app.features.forecasting.service import ForecastingService # lazy + + try: + prediction = await ForecastingService().predict( + request.store_id, + request.product_id, + request.forecast_horizon, + row.final_model_path, + ) + row.forecast_result = self._forecast_summary( + prediction, request.forecast_horizon + ).model_dump(mode="json") + except Exception as exc: # e.g. feature-aware predict reject — warn, don't fail + forecast_warning = f"Auto-predict skipped: {exc}" + logger.warning( + "model_selection.predict_skipped", + selection_id=row.selection_id, + error=str(exc), + ) + + row.winner_model_type = ranking.winner.model_type + row.winner_metrics = ranking.winner.metrics + row.chart_data = build_chart_data(results, ranking).model_dump(mode="json") + business = explain_winner(ranking, availability) + if forecast_warning is not None: + business["forecast_warning"] = forecast_warning + row.business_summary = business + row.status = ( + ModelSelectionStatus.PARTIAL.value + if any(r.failed for r in results) + else ModelSelectionStatus.COMPLETED.value + ) + row.completed_at = datetime.now(UTC) + await db.flush() + await db.refresh(row) + logger.info( + "model_selection.run_completed", + selection_id=row.selection_id, + status=row.status, + winner=row.winner_model_type, + ) + return self._response(row, ranking) + + # ------------------------------------------------------------------------- + # Read / re-run helpers + # ------------------------------------------------------------------------- + + async def get_selection(self, db: AsyncSession, selection_id: str) -> ModelSelectionRunResponse: + """Return a persisted selection run by id (404 when missing).""" + row = await self._load(db, selection_id) + return self._response(row, self._load_ranking(row)) + + async def get_ranking(self, db: AsyncSession, selection_id: str) -> RankingResult: + """Return just the ranking block for a selection run.""" + row = await self._load(db, selection_id) + return self._load_ranking(row) + + async def train_winner(self, db: AsyncSession, selection_id: str) -> TrainWinnerResponse: + """Train the winning model for a completed selection (V1 contract).""" + from pydantic import TypeAdapter # lazy + + from app.features.forecasting.schemas import ModelConfig # lazy + from app.features.forecasting.service import ForecastingService # lazy + + row = await self._load(db, selection_id) + ranking = self._load_ranking(row) + if ranking.winner is None: + raise BadRequestError(message="Selection has no winning model to train.") + + adapter: TypeAdapter[object] = TypeAdapter(ModelConfig) + cfg = adapter.validate_python( + {"model_type": ranking.winner.model_type, **ranking.winner.params} + ) + train = await ForecastingService().train_model( + db, + row.store_id, + row.product_id, + row.start_date, + row.end_date, + cfg, # type: ignore[arg-type] + ) + row.final_model_path = train.model_path + await db.flush() + logger.info( + "model_selection.winner_trained", + selection_id=row.selection_id, + model_type=ranking.winner.model_type, + ) + return TrainWinnerResponse( + selection_id=row.selection_id, + model_type=ranking.winner.model_type, + model_path=train.model_path, + ) + + async def predict_winner(self, db: AsyncSession, selection_id: str) -> ForecastSummary: + """Forecast with the trained winning model (requires train-winner first).""" + from app.features.forecasting.service import ForecastingService # lazy + + row = await self._load(db, selection_id) + if not row.final_model_path: + raise BadRequestError( + message="No trained model for this selection; call train-winner first." + ) + prediction = await ForecastingService().predict( + row.store_id, row.product_id, row.forecast_horizon, row.final_model_path + ) + summary = self._forecast_summary(prediction, row.forecast_horizon) + row.forecast_result = summary.model_dump(mode="json") + await db.flush() + logger.info( + "model_selection.winner_predicted", + selection_id=row.selection_id, + horizon=row.forecast_horizon, + ) + return summary + + # ------------------------------------------------------------------------- + # Pure mappers + # ------------------------------------------------------------------------- + + def _shape_candidate( + self, candidate: CandidateModelConfig, backtest: BacktestResponse + ) -> CandidateResult: + main = backtest.main_model_results + sample_size = sum(len(fold.actuals) for fold in main.fold_results) + folds = [ + FoldChart( + fold_index=fold.fold_index, + dates=fold.dates, + actuals=fold.actuals, + predictions=fold.predictions, + ) + for fold in main.fold_results + ] + return CandidateResult( + model_type=candidate.model_type, + params=candidate.params, + failed=False, + aggregated_metrics=main.aggregated_metrics, + sample_size=sample_size, + config_hash=backtest.config_hash, + folds=folds, + ) + + def _shape_failed_candidate( + self, candidate: CandidateModelConfig, exc: Exception + ) -> CandidateResult: + return CandidateResult( + model_type=candidate.model_type, + params=candidate.params, + failed=True, + error=str(exc), + aggregated_metrics=None, + sample_size=0, + folds=[], + ) + + def _forecast_summary(self, prediction: PredictResponse, horizon: int) -> ForecastSummary: + points = [point.model_dump(mode="json") for point in prediction.forecasts] + total = float(sum(point.forecast for point in prediction.forecasts)) + average = total / len(prediction.forecasts) if prediction.forecasts else 0.0 + return ForecastSummary( + points=points, total_demand=total, average_demand=average, horizon=horizon + ) + + async def _load(self, db: AsyncSession, selection_id: str) -> ModelSelectionRun: + row = await db.scalar( + select(ModelSelectionRun).where(ModelSelectionRun.selection_id == selection_id) + ) + if row is None: + raise NotFoundError(message=f"Selection run {selection_id} not found") + return row + + def _load_ranking(self, row: ModelSelectionRun) -> RankingResult: + if row.ranking_result: + return RankingResult.model_validate(row.ranking_result) + return RankingResult(winner=None, entries=[], confidence="low", reasons=[]) + + def _response( + self, row: ModelSelectionRun, ranking: RankingResult + ) -> ModelSelectionRunResponse: + availability = ( + PairAvailabilityResponse.model_validate(row.availability_snapshot) + if row.availability_snapshot + else None + ) + chart_data = ChartData.model_validate(row.chart_data) if row.chart_data else None + forecast = ( + ForecastSummary.model_validate(row.forecast_result) if row.forecast_result else None + ) + winner: WinnerSummary | None = None + if ranking.winner is not None and row.status in _TERMINAL_WITH_WINNER: + winner = WinnerSummary( + model_type=ranking.winner.model_type, + params=ranking.winner.params, + metrics=ranking.winner.metrics or {}, + rank=1, + ) + confidence = ranking.confidence if (ranking.entries or ranking.winner) else None + final_model = {"model_path": row.final_model_path} if row.final_model_path else None + return ModelSelectionRunResponse( + selection_id=row.selection_id, + store_id=row.store_id, + product_id=row.product_id, + status=row.status, # type: ignore[arg-type] + selection_window=SelectionWindow(start_date=row.start_date, end_date=row.end_date), + forecast_horizon=row.forecast_horizon, + ranking_metric=row.ranking_metric, + availability=availability, + ranking=ranking.entries, + winner=winner, + recommendation_confidence=confidence, + confidence_reasons=ranking.reasons, + chart_data=chart_data, + final_model=final_model, + forecast=forecast, + business_summary=row.business_summary, + error_message=row.error_message, + created_at=row.created_at, + completed_at=row.completed_at, + ) diff --git a/app/features/model_selection/tests/__init__.py b/app/features/model_selection/tests/__init__.py new file mode 100644 index 00000000..1dd09b80 --- /dev/null +++ b/app/features/model_selection/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the Forecast Champion Selector slice (issue #353).""" diff --git a/app/features/model_selection/tests/conftest.py b/app/features/model_selection/tests/conftest.py new file mode 100644 index 00000000..6e3335b7 --- /dev/null +++ b/app/features/model_selection/tests/conftest.py @@ -0,0 +1,301 @@ +"""Test fixtures + factories for the model_selection slice (issue #353). + +Unit helpers build ``CandidateResult`` / fake backtest+predict responses and a +mock ``AsyncSession`` whose ``flush`` stamps ``created_at`` (so the response +mapper, which reads it, works without a real DB). Integration fixtures +(``@pytest.mark.integration``) seed a real ``docker compose`` Postgres and clean +up after themselves with prefix-scoped teardown. +""" + +from __future__ import annotations + +import uuid +from collections.abc import AsyncGenerator +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock, MagicMock + +import pytest +from httpx import ASGITransport, AsyncClient +from sqlalchemy import delete +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +from app.core.config import get_settings +from app.core.database import get_db +from app.features.backtesting.schemas import SplitConfig +from app.features.data_platform.models import Calendar, Product, SalesDaily, Store +from app.features.model_selection.models import ModelSelectionRun +from app.features.model_selection.schemas import ( + CandidateResult, + FoldChart, + PairAvailabilityResponse, +) +from app.main import app + +# Integration test window. +TEST_START = date(2024, 1, 1) + + +# ============================================================================= +# Unit factories +# ============================================================================= + + +def make_candidate_result( + model_type: str, + *, + wape: float = 20.0, + smape: float = 15.0, + mae: float = 5.0, + rmse: float = 6.0, + bias: float = 0.5, + sample_size: int = 28, + n_folds: int = 2, + points_per_fold: int = 14, + params: dict[str, Any] | None = None, + failed: bool = False, + error: str | None = None, + aggregated_metrics: dict[str, float] | None = None, +) -> CandidateResult: + """Build a ``CandidateResult`` for ranking/chart unit tests.""" + if failed: + return CandidateResult( + model_type=model_type, + params=params or {}, + failed=True, + error=error or "boom", + aggregated_metrics=None, + sample_size=0, + folds=[], + ) + folds = [ + FoldChart( + fold_index=i, + dates=[ + TEST_START + timedelta(days=i * points_per_fold + j) for j in range(points_per_fold) + ], + actuals=[10.0 + j for j in range(points_per_fold)], + predictions=[10.5 + j for j in range(points_per_fold)], + ) + for i in range(n_folds) + ] + metrics = aggregated_metrics or { + "mae": mae, + "rmse": rmse, + "smape": smape, + "wape": wape, + "bias": bias, + } + return CandidateResult( + model_type=model_type, + params=params or {}, + failed=False, + aggregated_metrics=metrics, + sample_size=sample_size, + config_hash="cafef00d", + folds=folds, + ) + + +def make_backtest_response( + *, + wape: float = 20.0, + smape: float = 15.0, + mae: float = 5.0, + rmse: float = 6.0, + bias: float = 0.5, + n_folds: int = 2, + points_per_fold: int = 14, +) -> SimpleNamespace: + """A duck-typed stand-in for ``BacktestResponse`` (what _shape_candidate reads).""" + folds = [ + SimpleNamespace( + fold_index=i, + dates=[ + TEST_START + timedelta(days=i * points_per_fold + j) for j in range(points_per_fold) + ], + actuals=[10.0 + j for j in range(points_per_fold)], + predictions=[10.5 + j for j in range(points_per_fold)], + ) + for i in range(n_folds) + ] + main = SimpleNamespace( + fold_results=folds, + aggregated_metrics={ + "mae": mae, + "rmse": rmse, + "smape": smape, + "wape": wape, + "bias": bias, + }, + metric_std={}, + ) + return SimpleNamespace(main_model_results=main, config_hash="bt00deadbeef", backtest_id="bt") + + +def make_availability( + *, + status: str = "ready", + store_id: int = 1, + product_id: int = 1, + horizon: int = 14, +) -> PairAvailabilityResponse: + """A ready/limited/unusable availability response for service unit tests.""" + return PairAvailabilityResponse( + store_id=store_id, + product_id=product_id, + first_sales_date=TEST_START, + last_sales_date=TEST_START + timedelta(days=119), + observed_days=120, + expected_calendar_days=120, + coverage_ratio=1.0, + missing_days=0, + zero_sale_days=0, + promotion_days=0, + average_daily_demand=12.0, + status=status, # type: ignore[arg-type] + recommended_split_config=SplitConfig( + strategy="expanding", n_splits=5, min_train_size=30, gap=0, horizon=horizon + ), + warnings=[], + ) + + +def make_mock_db() -> AsyncMock: + """Mock ``AsyncSession`` whose flush stamps ``created_at`` on added rows.""" + db = AsyncMock() + added: list[Any] = [] + + def _add(obj: Any) -> None: + added.append(obj) + + async def _flush() -> None: + for obj in added: + if isinstance(obj, ModelSelectionRun) and obj.created_at is None: + obj.created_at = datetime.now(UTC) + + db.add = MagicMock(side_effect=_add) + db.flush = AsyncMock(side_effect=_flush) + db.refresh = AsyncMock() + return db + + +# ============================================================================= +# Integration fixtures — real Postgres +# ============================================================================= + + +@pytest.fixture +async def db_session() -> AsyncGenerator[AsyncSession, None]: + """Yield an async session; wipe model_selection + test data on teardown.""" + settings = get_settings() + engine = create_async_engine(settings.database_url, echo=False) + session_maker = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + async with session_maker() as session: + try: + yield session + finally: + store_ids = _registered_store_ids() + if store_ids: + await session.execute( + delete(ModelSelectionRun).where(ModelSelectionRun.store_id.in_(store_ids)) + ) + await session.execute( + delete(SalesDaily).where(SalesDaily.unit_price == Decimal("3.33")) + ) + await session.execute(delete(Product).where(Product.sku.like("TMSEL-%"))) + await session.execute(delete(Store).where(Store.code.like("TMSEL-%"))) + await session.commit() + + await engine.dispose() + + +# Track store ids created by the seeding fixtures so teardown can scope the +# model_selection_run cleanup precisely. +_SEEDED_STORE_IDS: list[int] = [] + + +def _registered_store_ids() -> list[int]: + return list(_SEEDED_STORE_IDS) + + +@pytest.fixture +async def client(db_session: AsyncSession) -> AsyncGenerator[AsyncClient, None]: + """Test client with the database dependency overridden.""" + + async def override_get_db() -> AsyncGenerator[AsyncSession, None]: + yield db_session + + app.dependency_overrides[get_db] = override_get_db + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as ac: + yield ac + app.dependency_overrides.pop(get_db, None) + + +async def _seed_pair(db: AsyncSession, n_days: int) -> dict[str, Any]: + """Seed a store/product/calendar + a clean weekly sales series of n_days.""" + suffix = uuid.uuid4().hex[:8] + store = Store(code=f"TMSEL-{suffix}", name="MSel Store", region="R", store_type="x") + product = Product( + sku=f"TMSEL-{suffix}", + name="MSel Product", + category="C", + base_price=Decimal("3.33"), + launch_date=TEST_START, + ) + db.add_all([store, product]) + await db.commit() + await db.refresh(store) + await db.refresh(product) + _SEEDED_STORE_IDS.append(store.id) + + weekly = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0] + for i in range(n_days): + d = TEST_START + timedelta(days=i) + await db.merge( + Calendar( + date=d, + day_of_week=d.weekday(), + month=d.month, + quarter=(d.month - 1) // 3 + 1, + year=d.year, + is_holiday=False, + ) + ) + await db.commit() + + for i in range(n_days): + qty = int(weekly[i % 7]) + db.add( + SalesDaily( + date=TEST_START + timedelta(days=i), + store_id=store.id, + product_id=product.id, + quantity=qty, + unit_price=Decimal("3.33"), + total_amount=Decimal("3.33") * qty, + ) + ) + await db.commit() + return { + "store_id": store.id, + "product_id": product.id, + "start_date": TEST_START.isoformat(), + "end_date": (TEST_START + timedelta(days=n_days - 1)).isoformat(), + "n_days": n_days, + } + + +@pytest.fixture +async def ready_pair(db_session: AsyncSession) -> dict[str, Any]: + """A 120-day pair — ``ready`` for horizon=14, n_splits=5 (threshold 100).""" + return await _seed_pair(db_session, 120) + + +@pytest.fixture +async def limited_pair(db_session: AsyncSession) -> dict[str, Any]: + """A 50-day pair — ``limited`` (>= 44, < 100).""" + return await _seed_pair(db_session, 50) diff --git a/app/features/model_selection/tests/test_explanations.py b/app/features/model_selection/tests/test_explanations.py new file mode 100644 index 00000000..040b8aa3 --- /dev/null +++ b/app/features/model_selection/tests/test_explanations.py @@ -0,0 +1,44 @@ +"""Unit tests for the deterministic business-explanation layer.""" + +from __future__ import annotations + +from app.features.model_selection.explanations import explain_winner +from app.features.model_selection.ranking import rank_candidates +from app.features.model_selection.schemas import RankingPolicy +from app.features.model_selection.tests.conftest import make_availability, make_candidate_result + + +def test_explain_winner_produces_deterministic_summary() -> None: + results = [ + make_candidate_result("winner", wape=10.0), + make_candidate_result("second", wape=20.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="ready") + summary = explain_winner(ranking, make_availability(status="ready")) + + assert "winner" in summary["headline"] + assert summary["winner"]["model_type"] == "winner" + assert summary["recommendation_confidence"] == ranking.confidence + assert summary["confidence_reasons"] == ranking.reasons + assert summary["comparison"]["runner_up_model_type"] == "second" + assert any("coverage" in note.lower() for note in summary["data_notes"]) + assert summary["caveats"] + + +def test_explain_winner_is_deterministic() -> None: + """Same input → byte-identical output (no LLM, no randomness).""" + results = [ + make_candidate_result("winner", wape=10.0), + make_candidate_result("second", wape=20.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="ready") + availability = make_availability(status="ready") + assert explain_winner(ranking, availability) == explain_winner(ranking, availability) + + +def test_explain_winner_handles_no_winner() -> None: + results = [make_candidate_result("x", failed=True, error="boom")] + ranking = rank_candidates(results, RankingPolicy(), "wape") + summary = explain_winner(ranking, make_availability(status="limited")) + assert summary["winner"] is None + assert "No model" in summary["headline"] diff --git a/app/features/model_selection/tests/test_models.py b/app/features/model_selection/tests/test_models.py new file mode 100644 index 00000000..4f69d9e9 --- /dev/null +++ b/app/features/model_selection/tests/test_models.py @@ -0,0 +1,41 @@ +"""Tests for the ModelSelectionRun ORM model + status enum. + +The status CHECK-constraint enforcement is exercised in the integration suite +(it requires the real Postgres CHECK); here we cover the enum values and the +in-Python ORM construction. +""" + +from __future__ import annotations + +from datetime import date + +from app.features.model_selection.models import ModelSelectionRun, ModelSelectionStatus + + +def test_status_enum_values() -> None: + assert {s.value for s in ModelSelectionStatus} == { + "pending", + "running", + "completed", + "partial", + "failed", + } + + +def test_model_selection_run_construction_defaults() -> None: + row = ModelSelectionRun( + selection_id="abc123", + store_id=1, + product_id=2, + start_date=date(2026, 1, 1), + end_date=date(2026, 5, 31), + forecast_horizon=14, + ranking_metric="wape", + status=ModelSelectionStatus.RUNNING.value, + candidate_models=[{"model_type": "naive", "params": {}}], + policy_snapshot={"minimum_sample_size": 0}, + ) + assert row.selection_id == "abc123" + assert row.status == "running" + assert row.winner_model_type is None + assert row.final_model_path is None diff --git a/app/features/model_selection/tests/test_ranking.py b/app/features/model_selection/tests/test_ranking.py new file mode 100644 index 00000000..3c01b25a --- /dev/null +++ b/app/features/model_selection/tests/test_ranking.py @@ -0,0 +1,205 @@ +"""Unit tests for the pure ranking + chart logic.""" + +from __future__ import annotations + +import math + +from app.features.model_selection.ranking import ( + build_chart_data, + normalize_metrics, + rank_candidates, +) +from app.features.model_selection.schemas import RankingPolicy +from app.features.model_selection.tests.conftest import make_candidate_result + + +def test_rank_candidates_wape_smape_abs_bias_mae_tie_break() -> None: + """Default sort key is (wape, smape, abs(bias), mae, model_type) (LOCKED #6).""" + # Same wape; B wins on smape; C loses smape but would win mae (irrelevant). + results = [ + make_candidate_result("a_model", wape=10.0, smape=20.0, bias=1.0, mae=9.0), + make_candidate_result("b_model", wape=10.0, smape=15.0, bias=5.0, mae=8.0), + make_candidate_result("c_model", wape=10.0, smape=18.0, bias=0.0, mae=1.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape") + order = [e.model_type for e in ranking.entries if e.included] + assert order == ["b_model", "c_model", "a_model"] + assert ranking.winner is not None + assert ranking.winner.model_type == "b_model" + assert ranking.winner.rank == 1 + + +def test_rank_candidates_model_type_breaks_full_tie() -> None: + """Identical metrics fall back to model_type alphabetical for determinism.""" + results = [ + make_candidate_result("zeta", wape=5.0, smape=5.0, bias=0.0, mae=1.0), + make_candidate_result("alpha", wape=5.0, smape=5.0, bias=0.0, mae=1.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape") + assert ranking.winner is not None + assert ranking.winner.model_type == "alpha" + + +def test_rank_candidates_non_default_metric_puts_it_first() -> None: + """ranking_metric='mae' ranks by mae first.""" + results = [ + make_candidate_result("high_wape_low_mae", wape=50.0, mae=1.0), + make_candidate_result("low_wape_high_mae", wape=5.0, mae=99.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "mae") + assert ranking.winner is not None + assert ranking.winner.model_type == "high_wape_low_mae" + + +def test_rank_candidates_excludes_missing_or_nan_metrics() -> None: + """A NaN/None primary metric drops the candidate to an excluded entry.""" + good = make_candidate_result("good", wape=10.0) + nan_metrics = make_candidate_result("nan_model", wape=float("nan")) + no_metrics = make_candidate_result("no_metrics", failed=False) + no_metrics.aggregated_metrics = None + ranking = rank_candidates([good, nan_metrics, no_metrics], RankingPolicy(), "wape") + + assert ranking.winner is not None + assert ranking.winner.model_type == "good" + excluded = {e.model_type: e for e in ranking.entries if not e.included} + assert set(excluded) == {"nan_model", "no_metrics"} + assert excluded["nan_model"].rank is None + assert excluded["nan_model"].exclusion_reason is not None + + +def test_rank_candidates_normalizes_five_metric_keys_including_rmse() -> None: + """normalize_metrics carries all five keys incl. rmse; entries echo them.""" + metrics = normalize_metrics( + {"mae": 1.0, "rmse": 2.0, "smape": 3.0, "wape": 4.0, "bias": 5.0}, sample_size=20 + ) + assert metrics is not None + assert metrics.rmse == 2.0 + as_dict = metrics.as_dict() + assert set(as_dict) == {"wape", "smape", "mae", "rmse", "bias", "sample_size"} + + ranking = rank_candidates([make_candidate_result("m", rmse=7.5)], RankingPolicy(), "wape") + assert ranking.entries[0].metrics is not None + assert ranking.entries[0].metrics["rmse"] == 7.5 + + +def test_normalize_metrics_rejects_inf_wape() -> None: + """An inf WAPE (all-zero actuals) is unrankable.""" + assert ( + normalize_metrics( + {"mae": 1.0, "rmse": 2.0, "smape": 3.0, "wape": math.inf, "bias": 0.0}, 10 + ) + is None + ) + + +def test_rank_candidates_excludes_below_minimum_sample_size() -> None: + """A candidate below the policy sample floor is excluded.""" + results = [ + make_candidate_result("ok", wape=10.0, sample_size=40), + make_candidate_result("tiny", wape=1.0, sample_size=5), + ] + ranking = rank_candidates(results, RankingPolicy(minimum_sample_size=30), "wape") + assert ranking.winner is not None + assert ranking.winner.model_type == "ok" + excluded = [e for e in ranking.entries if not e.included] + assert excluded[0].model_type == "tiny" + + +def test_confidence_high_when_winner_beats_second_by_10_percent() -> None: + """A >=10% relative WAPE lead with acceptable bias yields HIGH confidence.""" + results = [ + make_candidate_result("winner", wape=10.0, bias=0.1), + make_candidate_result("second", wape=20.0, bias=0.1), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="ready") + assert ranking.winner is not None + assert ranking.winner.model_type == "winner" + assert ranking.confidence == "high" + + +def test_confidence_low_for_single_valid_candidate() -> None: + ranking = rank_candidates([make_candidate_result("solo", wape=10.0)], RankingPolicy(), "wape") + assert ranking.confidence == "low" + + +def test_confidence_low_for_near_tie() -> None: + """A sub-epsilon lead is a near tie → LOW.""" + results = [ + make_candidate_result("a", wape=10.0), + make_candidate_result("b", wape=10.05), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="ready") + assert ranking.confidence == "low" + + +def test_confidence_medium_when_lead_below_high_threshold() -> None: + """A 5% lead (between epsilon and 10%) is MEDIUM.""" + results = [ + make_candidate_result("a", wape=9.5), + make_candidate_result("b", wape=10.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="ready") + assert ranking.confidence == "medium" + + +def test_confidence_low_when_availability_limited() -> None: + """Limited availability caps confidence at LOW even with a clear lead.""" + results = [ + make_candidate_result("winner", wape=10.0), + make_candidate_result("second", wape=20.0), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape", availability_status="limited") + assert ranking.confidence == "low" + + +def test_confidence_low_when_bias_over_threshold() -> None: + """A winner bias above the policy bound caps confidence at LOW.""" + results = [ + make_candidate_result("winner", wape=10.0, bias=50.0), + make_candidate_result("second", wape=20.0, bias=0.0), + ] + ranking = rank_candidates( + results, RankingPolicy(max_acceptable_abs_bias=1.0), "wape", availability_status="ready" + ) + assert ranking.confidence == "low" + + +def test_all_failed_candidates_yield_no_winner() -> None: + results = [ + make_candidate_result("x", failed=True, error="train error"), + make_candidate_result("y", failed=True, error="value error"), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape") + assert ranking.winner is None + assert ranking.confidence == "low" + assert all(not e.included for e in ranking.entries) + + +def test_winner_entry_carries_params_for_rebuild() -> None: + """The winner entry preserves the original candidate params.""" + results = [ + make_candidate_result("seasonal_naive", wape=10.0, params={"season_length": 7}), + make_candidate_result("naive", wape=20.0, params={}), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape") + assert ranking.winner is not None + assert ranking.winner.model_type == "seasonal_naive" + assert ranking.winner.params == {"season_length": 7} + + +def test_chart_data_has_wape_bias_fold_stability_and_winner_actual_vs_predicted() -> None: + """build_chart_data populates all four chart series.""" + results = [ + make_candidate_result("winner", wape=10.0, n_folds=3), + make_candidate_result("second", wape=20.0, n_folds=3), + ] + ranking = rank_candidates(results, RankingPolicy(), "wape") + chart = build_chart_data(results, ranking) + + assert set(chart.wape_by_model) == {"winner", "second"} + assert chart.wape_by_model["winner"] == 10.0 + assert set(chart.bias_by_model) == {"winner", "second"} + assert len(chart.fold_stability["winner"]) == 3 + assert all(isinstance(v, float) for v in chart.fold_stability["winner"]) + assert len(chart.winner_actual_vs_predicted) == 3 + assert chart.winner_actual_vs_predicted[0].actuals diff --git a/app/features/model_selection/tests/test_routes.py b/app/features/model_selection/tests/test_routes.py new file mode 100644 index 00000000..7cfb35f5 --- /dev/null +++ b/app/features/model_selection/tests/test_routes.py @@ -0,0 +1,180 @@ +"""Unit route tests — service methods mocked, exercised over the HTTP boundary. + +``get_db`` is overridden with a mock session; the service is patched at the +class level so the routes are tested in isolation. Error paths assert the +RFC 7807 problem-detail shape. +""" + +from __future__ import annotations + +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from datetime import UTC, date, datetime +from typing import Any +from unittest.mock import AsyncMock + +import pytest +from httpx import ASGITransport, AsyncClient + +from app.core.database import get_db +from app.core.exceptions import BadRequestError, NotFoundError +from app.features.model_selection.schemas import ( + ModelRankEntry, + ModelSelectionRunResponse, + SelectionWindow, + WinnerSummary, +) +from app.features.model_selection.service import ModelSelectionService +from app.main import app + + +@asynccontextmanager +async def _client() -> AsyncGenerator[AsyncClient, None]: + async def override_get_db() -> AsyncGenerator[AsyncMock, None]: + yield AsyncMock() + + app.dependency_overrides[get_db] = override_get_db + try: + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as ac: + yield ac + finally: + app.dependency_overrides.pop(get_db, None) + + +def _assert_problem_detail(body: dict[str, Any], expected_status: int) -> None: + for key in ("type", "title", "status", "detail"): + assert key in body, f"missing RFC 7807 field: {key}" + assert body["status"] == expected_status + + +def _run_response() -> ModelSelectionRunResponse: + metrics = { + "wape": 10.0, + "smape": 8.0, + "mae": 4.0, + "rmse": 5.0, + "bias": 0.1, + "sample_size": 28.0, + } + return ModelSelectionRunResponse( + selection_id="sel123", + store_id=1, + product_id=1, + status="completed", + selection_window=SelectionWindow(start_date=date(2026, 1, 1), end_date=date(2026, 5, 31)), + forecast_horizon=14, + ranking_metric="wape", + availability=None, + ranking=[ + ModelRankEntry(rank=1, model_type="naive", params={}, included=True, metrics=metrics) + ], + winner=WinnerSummary(model_type="naive", params={}, metrics=metrics, rank=1), + recommendation_confidence="high", + confidence_reasons=["clear lead"], + chart_data=None, + final_model=None, + forecast=None, + business_summary=None, + error_message=None, + created_at=datetime.now(UTC), + completed_at=None, + ) + + +def _valid_run_body(**overrides: Any) -> dict[str, Any]: + body: dict[str, Any] = { + "store_id": 1, + "product_id": 1, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 14, + }, + "candidate_models": [{"model_type": "naive", "params": {}}], + } + body.update(overrides) + return body + + +async def test_run_returns_200(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + ModelSelectionService, "run_selection", AsyncMock(return_value=_run_response()) + ) + async with _client() as ac: + response = await ac.post("/model-selection/run", json=_valid_run_body()) + assert response.status_code == 200 + body = response.json() + assert body["selection_id"] == "sel123" + assert body["recommendation_confidence"] == "high" + assert "confidence" not in body + + +async def test_run_validation_error_returns_problem_json() -> None: + """auto_predict without auto_train_winner is rejected by the validator (422).""" + async with _client() as ac: + response = await ac.post( + "/model-selection/run", + json=_valid_run_body(auto_predict=True, auto_train_winner=False), + ) + assert response.status_code == 422 + _assert_problem_detail(response.json(), 422) + + +async def test_routes_return_problem_json_on_bad_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + ModelSelectionService, + "run_selection", + AsyncMock(side_effect=BadRequestError(message="availability unusable")), + ) + async with _client() as ac: + response = await ac.post("/model-selection/run", json=_valid_run_body()) + assert response.status_code == 400 + _assert_problem_detail(response.json(), 400) + + +async def test_get_selection_not_found_returns_problem_json( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + ModelSelectionService, + "get_selection", + AsyncMock(side_effect=NotFoundError(message="Selection run missing not found")), + ) + async with _client() as ac: + response = await ac.get("/model-selection/missing") + assert response.status_code == 404 + _assert_problem_detail(response.json(), 404) + + +async def test_availability_returns_200(monkeypatch: pytest.MonkeyPatch) -> None: + from app.features.model_selection.tests.conftest import make_availability + + monkeypatch.setattr( + ModelSelectionService, + "get_availability", + AsyncMock(return_value=make_availability(status="ready")), + ) + async with _client() as ac: + response = await ac.get( + "/model-selection/availability", + params={"store_id": 1, "product_id": 1, "forecast_horizon": 14}, + ) + assert response.status_code == 200 + assert response.json()["status"] == "ready" + + +async def test_availability_rejects_bad_query() -> None: + """store_id < 1 fails Query validation → 422 problem+json.""" + async with _client() as ac: + response = await ac.get( + "/model-selection/availability", + params={"store_id": 0, "product_id": 1}, + ) + assert response.status_code == 422 + _assert_problem_detail(response.json(), 422) diff --git a/app/features/model_selection/tests/test_routes_integration.py b/app/features/model_selection/tests/test_routes_integration.py new file mode 100644 index 00000000..a6440f71 --- /dev/null +++ b/app/features/model_selection/tests/test_routes_integration.py @@ -0,0 +1,138 @@ +"""Integration tests for the model_selection slice against real Postgres. + +Marked ``@pytest.mark.integration`` — require ``docker compose up -d`` + an +applied ``alembic upgrade head``. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from httpx import AsyncClient +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +pytestmark = pytest.mark.integration + + +def _run_body( + pair: dict[str, Any], extra_candidates: list[dict[str, Any]] | None = None +) -> dict[str, Any]: + candidates = [ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + {"model_type": "moving_average", "params": {"window_size": 7}}, + ] + if extra_candidates: + candidates.extend(extra_candidates) + return { + "store_id": pair["store_id"], + "product_id": pair["product_id"], + "selection_window": {"start_date": pair["start_date"], "end_date": pair["end_date"]}, + "forecast_horizon": 14, + "ranking_metric": "wape", + "split_config": { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 14, + }, + "candidate_models": candidates, + "auto_train_winner": False, + "auto_predict": False, + } + + +async def test_table_has_named_indexes(db_session: AsyncSession) -> None: + rows = await db_session.execute( + text("SELECT indexname FROM pg_indexes WHERE tablename = 'model_selection_run'") + ) + names = {row[0] for row in rows} + assert "ix_model_selection_run_selection_id" in names + assert "ix_model_selection_run_store_product_created" in names + assert "ix_model_selection_run_status_created" in names + + +async def test_availability_ready_pair(client: AsyncClient, ready_pair: dict[str, Any]) -> None: + response = await client.get( + "/model-selection/availability", + params={ + "store_id": ready_pair["store_id"], + "product_id": ready_pair["product_id"], + "forecast_horizon": 14, + }, + ) + assert response.status_code == 200 + body = response.json() + assert body["status"] == "ready" + assert body["observed_days"] == ready_pair["n_days"] + assert body["recommended_split_config"]["horizon"] == 14 + + +async def test_availability_limited_pair(client: AsyncClient, limited_pair: dict[str, Any]) -> None: + response = await client.get( + "/model-selection/availability", + params={ + "store_id": limited_pair["store_id"], + "product_id": limited_pair["product_id"], + "forecast_horizon": 14, + }, + ) + assert response.status_code == 200 + assert response.json()["status"] == "limited" + + +async def test_availability_unknown_pair_returns_404(client: AsyncClient) -> None: + response = await client.get( + "/model-selection/availability", + params={"store_id": 999999, "product_id": 999999, "forecast_horizon": 14}, + ) + assert response.status_code == 404 + assert response.json()["status"] == 404 + + +async def test_run_persists_and_get_returns_same( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + run = await client.post("/model-selection/run", json=_run_body(ready_pair)) + assert run.status_code == 200 + body = run.json() + assert body["status"] in {"completed", "partial"} + assert body["winner"] is not None + assert body["recommendation_confidence"] in {"high", "medium", "low"} + assert body["chart_data"] is not None + assert body["ranking"] + selection_id = body["selection_id"] + + fetched = await client.get(f"/model-selection/{selection_id}") + assert fetched.status_code == 200 + assert fetched.json()["selection_id"] == selection_id + + ranking = await client.get(f"/model-selection/{selection_id}/ranking") + assert ranking.status_code == 200 + assert ranking.json()["winner"] is not None + + +async def test_run_partial_with_bad_candidate( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + """An invalid candidate param surfaces as a failed entry, not a 500.""" + body = _run_body( + ready_pair, + extra_candidates=[{"model_type": "moving_average", "params": {"window_size": 0}}], + ) + response = await client.post("/model-selection/run", json=body) + assert response.status_code == 200 + payload = response.json() + assert payload["status"] == "partial" + excluded = [e for e in payload["ranking"] if not e["included"]] + assert excluded + assert payload["winner"] is not None + + +async def test_get_missing_selection_returns_404(client: AsyncClient) -> None: + response = await client.get("/model-selection/does-not-exist") + assert response.status_code == 404 + assert response.json()["status"] == 404 diff --git a/app/features/model_selection/tests/test_schemas.py b/app/features/model_selection/tests/test_schemas.py new file mode 100644 index 00000000..3d34c510 --- /dev/null +++ b/app/features/model_selection/tests/test_schemas.py @@ -0,0 +1,81 @@ +"""Unit tests for model_selection request schemas (strict mode + validators).""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.features.model_selection.schemas import ( + ModelSelectionRunRequest, + SelectionWindow, +) + + +def _base_request_dict(**overrides: object) -> dict[str, object]: + payload: dict[str, object] = { + "store_id": 1, + "product_id": 1, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 14, + }, + "candidate_models": [{"model_type": "naive", "params": {}}], + } + payload.update(overrides) + return payload + + +def test_schema_accepts_iso_dates_under_strict_model() -> None: + """ISO-string dates validate through the strict ``validate_python`` path.""" + window = SelectionWindow.model_validate({"start_date": "2026-01-01", "end_date": "2026-02-01"}) + assert window.start_date.isoformat() == "2026-01-01" + + request = ModelSelectionRunRequest.model_validate(_base_request_dict()) + assert request.selection_window.end_date.isoformat() == "2026-05-31" + + +def test_schema_rejects_auto_predict_without_train_winner() -> None: + """LOCKED #7 — auto_predict requires auto_train_winner.""" + with pytest.raises(ValidationError, match="auto_predict requires auto_train_winner"): + ModelSelectionRunRequest.model_validate( + _base_request_dict(auto_predict=True, auto_train_winner=False) + ) + + +def test_schema_rejects_horizon_mismatch_between_split_and_forecast() -> None: + """LOCKED #5 — split_config.horizon must equal forecast_horizon.""" + bad = _base_request_dict(forecast_horizon=14) + bad["split_config"] = { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 7, + } + with pytest.raises(ValidationError, match="must equal"): + ModelSelectionRunRequest.model_validate(bad) + + +def test_schema_rejects_feature_groups_with_v1() -> None: + """V1 must not carry feature_groups (mirrors forecasting TrainRequest).""" + with pytest.raises(ValidationError, match="feature_groups is only valid"): + ModelSelectionRunRequest.model_validate( + _base_request_dict(feature_frame_version=1, feature_groups=["calendar"]) + ) + + +def test_selection_window_rejects_inverted_range() -> None: + """An end <= start window is rejected.""" + with pytest.raises(ValidationError, match="after start_date"): + SelectionWindow.model_validate({"start_date": "2026-02-01", "end_date": "2026-01-01"}) + + +def test_candidate_models_min_length_enforced() -> None: + """At least one candidate is required.""" + with pytest.raises(ValidationError): + ModelSelectionRunRequest.model_validate(_base_request_dict(candidate_models=[])) diff --git a/app/features/model_selection/tests/test_service.py b/app/features/model_selection/tests/test_service.py new file mode 100644 index 00000000..7d3da5f1 --- /dev/null +++ b/app/features/model_selection/tests/test_service.py @@ -0,0 +1,222 @@ +"""Unit tests for ModelSelectionService orchestration (mocked sibling services).""" + +from __future__ import annotations + +from datetime import date, timedelta +from types import SimpleNamespace +from typing import Any +from unittest.mock import AsyncMock +from uuid import uuid4 + +import pytest +from pydantic import TypeAdapter + +from app.core.exceptions import BadRequestError, NotFoundError +from app.features.forecasting.schemas import ModelConfig +from app.features.model_selection.schemas import ModelSelectionRunRequest +from app.features.model_selection.service import ModelSelectionService +from app.features.model_selection.tests.conftest import ( + make_availability, + make_backtest_response, + make_mock_db, +) + + +def _request(**overrides: Any) -> ModelSelectionRunRequest: + payload: dict[str, Any] = { + "store_id": 1, + "product_id": 1, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 14, + }, + "candidate_models": [{"model_type": "naive", "params": {}}], + } + payload.update(overrides) + return ModelSelectionRunRequest.model_validate(payload) + + +def _patch_backtester( + monkeypatch: pytest.MonkeyPatch, *, side_effect: list[Any] +) -> SimpleNamespace: + instance = SimpleNamespace(run_backtest=AsyncMock(side_effect=side_effect)) + monkeypatch.setattr("app.features.backtesting.service.BacktestingService", lambda: instance) + return instance + + +def _patch_availability(monkeypatch: pytest.MonkeyPatch, status: str) -> None: + monkeypatch.setattr( + ModelSelectionService, + "get_availability", + AsyncMock(return_value=make_availability(status=status)), + ) + + +# ----------------------------------------------------------------------------- +# Flattening +# ----------------------------------------------------------------------------- + + +def test_build_model_config_flattens_params() -> None: + """The service's flatten-then-validate builds a typed ModelConfig.""" + adapter: TypeAdapter[Any] = TypeAdapter(ModelConfig) + cfg = adapter.validate_python({"model_type": "seasonal_naive", "season_length": 7}) + assert cfg.model_type == "seasonal_naive" + assert cfg.season_length == 7 + + +# ----------------------------------------------------------------------------- +# Availability thresholds +# ----------------------------------------------------------------------------- + + +def _availability_db(observed: int) -> AsyncMock: + """Mock DB returning a contiguous `observed`-day aggregate for one pair.""" + first = date(2024, 1, 1) if observed else None + last = date(2024, 1, 1) + timedelta(days=observed - 1) if observed else None + db = AsyncMock() + db.get = AsyncMock(return_value=SimpleNamespace(id=1)) + result = AsyncMock() + result.one = lambda: (first, last, observed, 12.0, 0) + db.execute = AsyncMock(return_value=result) + db.scalar = AsyncMock(return_value=0) + return db + + +@pytest.mark.parametrize( + ("observed", "expected"), + [(120, "ready"), (50, "limited"), (20, "unusable")], +) +async def test_availability_ready_limited_unusable_thresholds(observed: int, expected: str) -> None: + service = ModelSelectionService() + db = _availability_db(observed) + availability = await service.get_availability(db, 1, 1, forecast_horizon=14) + assert availability.status == expected + + +async def test_availability_missing_store_raises_not_found() -> None: + service = ModelSelectionService() + db = AsyncMock() + db.get = AsyncMock(return_value=None) + with pytest.raises(NotFoundError): + await service.get_availability(db, 999, 1, forecast_horizon=14) + + +# ----------------------------------------------------------------------------- +# Orchestration +# ----------------------------------------------------------------------------- + + +async def test_run_selection_partial_success_chooses_valid_winner( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_availability(monkeypatch, "ready") + _patch_backtester( + monkeypatch, + side_effect=[make_backtest_response(wape=10.0), ValueError("insufficient data")], + ) + request = _request( + candidate_models=[ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + ] + ) + response = await ModelSelectionService().run_selection(make_mock_db(), request) + + assert response.status == "partial" + assert response.winner is not None + assert response.winner.model_type == "naive" + failed = [e for e in response.ranking if not e.included] + assert [e.model_type for e in failed] == ["seasonal_naive"] + assert failed[0].exclusion_reason is not None + + +async def test_run_selection_all_candidates_fail_returns_failed_status_not_500( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """LOCKED #3 — every candidate failing persists FAILED and returns (no raise).""" + _patch_availability(monkeypatch, "ready") + _patch_backtester(monkeypatch, side_effect=[ValueError("boom-1"), ValueError("boom-2")]) + request = _request( + candidate_models=[ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + ] + ) + response = await ModelSelectionService().run_selection(make_mock_db(), request) + + assert response.status == "failed" + assert response.winner is None + assert response.selection_id + assert all(not e.included for e in response.ranking) + + +async def test_run_selection_unusable_availability_raises_bad_request( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """LOCKED #2 — unusable availability fails fast with 400.""" + _patch_availability(monkeypatch, "unusable") + with pytest.raises(BadRequestError): + await ModelSelectionService().run_selection(make_mock_db(), _request()) + + +async def test_run_selection_auto_train_passes_feature_frame_version_and_groups( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_availability(monkeypatch, "ready") + _patch_backtester(monkeypatch, side_effect=[make_backtest_response(wape=10.0)]) + train_mock = AsyncMock( + return_value=SimpleNamespace(model_path="artifacts/models/model_abc.joblib") + ) + monkeypatch.setattr( + "app.features.forecasting.service.ForecastingService", + lambda: SimpleNamespace(train_model=train_mock), + ) + request = _request( + feature_frame_version=2, + feature_groups=["calendar"], + auto_train_winner=True, + auto_predict=False, + ) + response = await ModelSelectionService().run_selection(make_mock_db(), request) + + assert response.final_model == {"model_path": "artifacts/models/model_abc.joblib"} + train_mock.assert_awaited_once() + assert train_mock.await_args is not None + assert train_mock.await_args.kwargs["feature_frame_version"] == 2 + assert train_mock.await_args.kwargs["feature_groups"] == ["calendar"] + + +async def test_response_uses_recommendation_confidence_key( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The response carries ``recommendation_confidence`` (not ``confidence``).""" + _patch_availability(monkeypatch, "ready") + _patch_backtester( + monkeypatch, + side_effect=[make_backtest_response(wape=10.0), make_backtest_response(wape=20.0)], + ) + request = _request( + candidate_models=[ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + ] + ) + response = await ModelSelectionService().run_selection(make_mock_db(), request) + dumped = response.model_dump() + assert "recommendation_confidence" in dumped + assert "confidence" not in dumped + assert response.recommendation_confidence in {"high", "medium", "low"} + assert response.chart_data is not None + + +async def test_get_selection_missing_raises_not_found() -> None: + db = AsyncMock() + db.scalar = AsyncMock(return_value=None) + with pytest.raises(NotFoundError): + await ModelSelectionService().get_selection(db, uuid4().hex) diff --git a/app/main.py b/app/main.py index eb4f5145..1533ce50 100644 --- a/app/main.py +++ b/app/main.py @@ -26,6 +26,7 @@ from app.features.forecasting.routes import router as forecasting_router from app.features.ingest.routes import router as ingest_router from app.features.jobs.routes import router as jobs_router +from app.features.model_selection.routes import router as model_selection_router from app.features.ops.routes import router as ops_router from app.features.rag.routes import router as rag_router from app.features.registry.routes import router as registry_router @@ -145,6 +146,7 @@ def create_app() -> FastAPI: app.include_router(forecasting_router) app.include_router(explainability_router) app.include_router(backtesting_router) + app.include_router(model_selection_router) app.include_router(registry_router) app.include_router(rag_router) app.include_router(scenarios_router) From 0d1c119f17a472465f7fb30294056f3d8227c7da Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 06:34:54 +0200 Subject: [PATCH 23/30] fix(agents): cast finalizer compact result to fix mypy index error (#355) --- app/features/agents/tests/test_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/features/agents/tests/test_service.py b/app/features/agents/tests/test_service.py index 09413aa6..759e0284 100644 --- a/app/features/agents/tests/test_service.py +++ b/app/features/agents/tests/test_service.py @@ -3,7 +3,7 @@ import json from collections.abc import AsyncIterator from datetime import UTC, datetime, timedelta -from typing import Any +from typing import Any, cast from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -1260,7 +1260,7 @@ def test_compact_for_finalizer_strips_verbose_keys_keeps_metrics(self) -> None: } ] - compact = AgentService._compact_for_finalizer(raw) + compact = cast(list[dict[str, Any]], AgentService._compact_for_finalizer(raw)) runs = compact[0]["result"]["runs"] # Identity + metrics survive for BOTH runs (so a ranking sees 18.93). From b719184c8e41874b2336c28bfcbdb0bab907b336 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 06:34:59 +0200 Subject: [PATCH 24/30] docs: add forecast champion selector backend PRP and research (#353) --- ...cast-champion-selector-backend-research.md | 222 ++++ PRPs/forecast-champion-selector-backend.md | 970 ++++++++++++++++++ 2 files changed, 1192 insertions(+) create mode 100644 PRPs/ai_docs/forecast-champion-selector-backend-research.md create mode 100644 PRPs/forecast-champion-selector-backend.md diff --git a/PRPs/ai_docs/forecast-champion-selector-backend-research.md b/PRPs/ai_docs/forecast-champion-selector-backend-research.md new file mode 100644 index 00000000..2d37603b --- /dev/null +++ b/PRPs/ai_docs/forecast-champion-selector-backend-research.md @@ -0,0 +1,222 @@ +# Forecast Champion Selector Backend Research + +Date: 2026-06-01 + +This note captures external-library and runtime facts used by +`PRPs/forecast-champion-selector-backend.md`. It is intentionally narrow: +only claims that affect backend implementation are recorded here. + +## Official Documentation References + +- FastAPI APIRouter / multi-file apps: + https://fastapi.tiangolo.com/tutorial/bigger-applications/ + - Reason: the new `app/features/model_selection/routes.py` must follow the + existing `APIRouter(prefix=..., tags=...)` slice pattern and be wired in + `app/main.py`. + +- Pydantic v2 strict mode and field-level overrides: + https://pydantic.dev/docs/validation/latest/concepts/strict_mode/ + - Reason: ForecastLabAI request schemas use `ConfigDict(strict=True)`, but + JSON request bodies still need date/datetime/UUID/Decimal fields to accept + JSON-native strings via `Field(strict=False, ...)`. + +- SQLAlchemy 2.0 PostgreSQL JSONB: + https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#json-types + - Reason: `model_selection_run` should store immutable request/response + snapshots (`candidate_models`, `ranking_result`, `winner_metrics`, + `forecast_result`, `business_summary`) as PostgreSQL JSONB. + +- Alembic `Operations.create_index`: + https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.create_index + - Reason: the migration should use explicit named indexes; any partial or + JSONB index must use Alembic operations rather than raw SQL. + +- scikit-learn `TimeSeriesSplit`: + https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + - Reason: the selector's recommended split defaults mirror the project's + own `SplitConfig` semantics and should not assume unsupported parameters. + +## Runtime Verification Commands + +Run from repository root on 2026-06-01. + +```bash +uv run python -c "import inspect; from sqlalchemy import select, table, column; import sqlalchemy; stmt=select(column('id')).select_from(table('t')).with_for_update(skip_locked=True); print('sqlalchemy', sqlalchemy.__version__); print('with_for_update_has_skip_locked', 'skip_locked' in str(inspect.signature(select(column('id')).with_for_update))); print(stmt)" +``` + +Observed: + +```text +sqlalchemy 2.0.46 +with_for_update_has_skip_locked True +SELECT id +FROM t FOR UPDATE +``` + +Note: generic SQL compilation does not render PostgreSQL-specific +`SKIP LOCKED`; use PostgreSQL dialect compilation in tests when asserting +that string. + +```bash +uv run python -c "from datetime import date; import pydantic; from pydantic import BaseModel, ConfigDict, Field; M=type('M',(BaseModel,),{'__annotations__':{'d':date},'model_config':ConfigDict(strict=True),'d':Field(strict=False)}); print('pydantic', pydantic.__version__); print(M.model_validate({'d':'2026-06-01'}).d.isoformat())" +``` + +Observed: + +```text +pydantic 2.12.5 +2026-06-01 +``` + +```bash +uv run python -c "import inspect, sklearn; from sklearn.model_selection import TimeSeriesSplit; print('sklearn', sklearn.__version__); print(inspect.signature(TimeSeriesSplit)); t=TimeSeriesSplit(n_splits=3, test_size=2, gap=1); print(t)" +``` + +Observed: + +```text +sklearn 1.8.0 +(n_splits=5, *, max_train_size=None, test_size=None, gap=0) +TimeSeriesSplit(gap=1, max_train_size=None, n_splits=3, test_size=2) +``` + +```bash +uv run python -c "import inspect, fastapi; from fastapi import APIRouter, BackgroundTasks; print('fastapi', fastapi.__version__); print('APIRouter_prefix_param', 'prefix' in inspect.signature(APIRouter).parameters); print('BackgroundTasks_add_task', inspect.signature(BackgroundTasks.add_task))" +``` + +Observed: + +```text +fastapi 0.128.0 +APIRouter_prefix_param True +BackgroundTasks_add_task (self, func: ..., *args: P.args, **kwargs: P.kwargs) -> None +``` + +```bash +uv run python -c "import inspect, alembic; from alembic.operations import Operations; print('alembic', alembic.__version__); print(inspect.signature(Operations.create_index))" +``` + +Observed: + +```text +alembic 1.18.4 +(self, index_name, table_name, columns, *, schema=None, unique=False, if_not_exists=None, **kw) -> None +``` + +## Implementation Consequences + +- Use `Literal[...]` request fields for JSON string enums under + `ConfigDict(strict=True)`; convert to ORM enums at service boundaries. +- Use `Field(strict=False, ...)` on every request-body date/datetime/UUID/ + Decimal field, or `app/core/tests/test_strict_mode_policy.py` can fail. +- Persist selector decisions in JSONB snapshots because registry metrics are + free-form JSONB and metric key names differ across layers. +- Do not assume a batch backtest item contains fold-level chart data. Batch + metrics are intentionally pinned to `{wape, smape, mae, bias, sample_size}`. +- If an implementation compiles SQL for PostgreSQL-specific clauses, compile + with the PostgreSQL dialect rather than relying on generic SQL strings. + +## Verified Internal Service Contracts (read from source 2026-06-01) + +These are the in-repo signatures the selector orchestrates. They were the prior +draft's #1 residual risk; recorded here so they survive and can be re-verified on +refactor. Re-verify with `grep -n "async def run_backtest\|async def train_model\|async def predict" app/features/backtesting/service.py app/features/forecasting/service.py`. + +### BacktestingService — `app/features/backtesting/service.py:213` + +```python +# __init__(self) -> None — takes NO db; instantiate as BacktestingService() +async def run_backtest( + self, db: AsyncSession, store_id: int, product_id: int, + start_date: date, end_date: date, config: BacktestConfig, +) -> BacktestResponse +``` + +`BacktestConfig` (`backtesting/schemas.py:81`, `frozen=True, extra="forbid"`): +`split_config: SplitConfig`, `model_config_main: Annotated[ModelConfig, Field(discriminator="model_type")]`, +`include_baselines: bool = True`, `store_fold_details: bool = True`. + +`SplitConfig` (`:24`): `strategy: Literal["expanding","sliding"]="expanding"`, +`n_splits: int=5 (ge=2,le=20)`, `min_train_size: int=30 (ge=7)`, `gap: int=0 (ge=0,le=30)`, +`horizon: int=14 (ge=1,le=90)`; validator `horizon > gap`. + +### BacktestResponse — `backtesting/schemas.py:257` + +`main_model_results: ModelBacktestResult`, `baseline_results: list[ModelBacktestResult] | None`, +plus `backtest_id, store_id, product_id, config_hash, split_config, comparison_summary, +duration_ms, leakage_check_passed`. + +`ModelBacktestResult` (`:180`): `model_type, config_hash, fold_results: list[FoldResult], +aggregated_metrics: dict[str,float], metric_std: dict[str,float], +bucketed_aggregated_metrics: dict|None, feature_aware: bool, exogenous_policy`. + +`FoldResult` (`:147`): `fold_index, split, dates: list[date], actuals: list[float], +predictions: list[float], metrics: dict[str,float], horizon_bucket_metrics`. + +**Metric keys (CORRECTION to the prior draft):** `aggregated_metrics` has **five** keys — +`{"mae", "rmse", "smape", "wape", "bias"}` (`backtesting/metrics.py:347`; PRP-36 added `rmse`). +`metric_std` keys are suffixed `"{name}_stability"` (a coefficient of variation, not a raw std). +`sample_size` is NOT in `aggregated_metrics` — derive from fold actuals length or n_folds. +Fold chart data path: `main_model_results.fold_results[i].{dates,actuals,predictions}` — populated +only when `config.store_fold_details=True`. + +### ForecastingService — `app/features/forecasting/service.py` + +```python +# __init__(self) -> None +async def train_model( # :247 + self, db: AsyncSession, store_id: int, product_id: int, + train_start_date: date, train_end_date: date, config: ModelConfig, + *, feature_frame_version: int = 1, feature_groups: list[str] | None = None, +) -> TrainResponse # TrainResponse.model_path is the artifact path + +async def predict( # :402 — NO db arg + self, store_id: int, product_id: int, horizon: int, model_path: str, +) -> PredictResponse # PredictResponse.forecasts: list[ForecastPoint] +``` + +`predict()` rejects feature-aware models (`service.py:491`) — feature-aware winners must route +through `/scenarios/simulate`; catch and warn rather than 500. + +### ModelConfig union — `forecasting/schemas.py:417` + +Plain PEP 604 union (`NaiveModelConfig | SeasonalNaiveModelConfig | … | ProphetLikeModelConfig`), +discriminated by each member's `model_type` Literal. Members are **flat** (`SeasonalNaiveModelConfig` +has `model_type` + `season_length`, NOT a nested `params`). No module-level `TypeAdapter`/helper. +Build from `{"model_type": ..., "params": {...}}` by FLATTENING: + +```python +from pydantic import TypeAdapter +from app.features.forecasting.schemas import ModelConfig +TypeAdapter(ModelConfig).validate_python({"model_type": c.model_type, **c.params}) +``` + +Members are `frozen=True, extra="forbid"` → bad params raise `ValidationError` (treat as a failed +candidate). `model_type` values: `naive, seasonal_naive, moving_average, weighted_moving_average, +seasonal_average, trend_regression_baseline, random_forest, lightgbm, xgboost, regression, +prophet_like` (`lightgbm`/`xgboost` are opt-in extras → may `ImportError`). + +### Data-platform ORM column names — `data_platform/models.py` + +`Store` (`:40`): `id` (int PK), `code` (business key — NOT `store_code`). `Product` (`:68`): `id`, +`sku`, `launch_date: date|None`. `SalesDaily` (`:172`): `date` (FK calendar.date), `store_id`, +`product_id`, `quantity` (Integer, CHECK ≥0), `unit_price`, `total_amount`; grain unique +`(date, store_id, product_id)`. `Promotion` (`:274`): `product_id` NOT NULL, `store_id` NULLABLE +(NULL = chain-wide, applies to all stores), date RANGE `[start_date, end_date]`, +`kind ∈ {pct_off,bogo,bundle,markdown}`. + +### Cross-cutting patterns + +- Exceptions (`app/core/exceptions.py`): `BadRequestError`(400), `NotFoundError`(404), + `DatabaseError`(500), `ConflictError`(409), `UnprocessableEntityError`(422); each + `(message=..., details=None)`. Routes map `ValueError→BadRequestError`, + `SQLAlchemyError→DatabaseError` (mirror `backtesting/routes.py:60`). +- `validate_date_range` is slice-local in `analytics/routes.py:36` (raises `BadRequestError`, + inverted-range + 730-day-max) — NOT importable cross-slice; reimplement locally. +- `TimestampMixin` (`app/shared/models.py`): `created_at`/`updated_at`, `server_default func.now()`, + `updated_at onupdate func.now()`. Mix in first: `class X(TimestampMixin, Base)`. +- JSONB import differs: migration `from sqlalchemy.dialects import postgresql` → + `postgresql.JSONB(astext_type=sa.Text())`; ORM `from sqlalchemy.dialects.postgresql import JSONB`. +- `app/main.py` wires routers as `from app.features..routes import router as _router` + + `app.include_router(_router)` (NO prefix at include; the router carries it). +- Current alembic head observed: `c1d2e3f40512` (`create_batch_tables`). diff --git a/PRPs/forecast-champion-selector-backend.md b/PRPs/forecast-champion-selector-backend.md new file mode 100644 index 00000000..651fc009 --- /dev/null +++ b/PRPs/forecast-champion-selector-backend.md @@ -0,0 +1,970 @@ +name: "Forecast Champion Selector Backend" +description: | + Backend foundation for an interactive Forecast Champion Selector. Adds a + first-class `model_selection` vertical slice that validates a store/product + pair, recommends/selects backtest settings, runs candidate model comparison, + ranks results by WAPE/sMAPE/bias/MAE, persists an auditable selection record, + and optionally trains/predicts with the winning model. This PRP deliberately + scopes UI work out; it creates the stable backend contract the UI can consume. + +**Created:** 2026-06-01 · **Refined:** 2026-06-01 (signatures verified against live code) +**Current repo base observed:** `dev` at `1b4c3f3` (`Merge pull request #352 ...fix/agents-finalizer-fallback`) +**Current alembic head observed:** `c1d2e3f40512` (`create_batch_tables`) — verify with `uv run alembic heads` at implementation time and chain to whatever head exists THEN. +**Working-tree caveat observed:** `docker-compose.lan.yml` is an untracked local dogfood override; do not commit it. +**Tracking issue:** create before implementation, suggested title `feat(api): add forecast champion selector backend`. +**Suggested branch:** `feat/forecast-champion-selector-backend` (off `dev`, per `.claude/rules/branch-naming.md`). +**Commit scope:** `api` (cross-feature backend wiring + new slice + `app/main.py`) and `db` (migration). Every commit references the tracking issue. + +--- + +## VALIDATE — Missing Backend Surface Check + +The lower-level primitives exist; the business workflow does not. + +### Reusable backend primitives already present (verified) + +- `POST /backtesting/run` → single store/product/model backtest with fold metrics, + aggregated metrics, optional baselines, bucketed horizon metrics, leakage status. + `app/features/backtesting/routes.py:24` (router), `:60` (handler). + **Service entry point is `BacktestingService().run_backtest(db, store_id, product_id, start_date, end_date, config)`** — see verified signature below. +- `POST /forecasting/train` → trains one model; supports `feature_frame_version` (1|2) and + `feature_groups`. `app/features/forecasting/routes.py:25`. Service: + `ForecastingService().train_model(db, store_id, product_id, train_start_date, train_end_date, config, *, feature_frame_version=1, feature_groups=None) -> TrainResponse`. +- `POST /forecasting/predict` → predicts from a saved bundle. Service: + `ForecastingService().predict(store_id, product_id, horizon, model_path) -> PredictResponse` + (**no db arg** — loads bundle from disk; rejects feature-aware models, `service.py:491`). +- `POST /batch/forecasting` fan-out exists but pins metrics to five keys and does **not** + expose fold-level chart data — NOT suitable for this slice's chart payload. +- `GET /dimensions/stores`, `GET /dimensions/products` provide dimension metadata. +- `app/features/ops/service.py` is the canonical read-only cross-slice ORM aggregation precedent. + +### Backend pieces missing for the full feature + +- No `app/features/model_selection/` slice; no `POST /model-selection/run`; no persisted + `model_selection_run` table; no orchestration of pair-validation → candidate backtests → + ranking → optional final train → optional predict; no pair-availability endpoint; no + backend ranking/confidence policy; no deterministic business explanation layer; no + chart-ready comparison payload. +- Batch/Job model allow-lists are narrower than forecasting's full `ModelConfig` union, and + job/batch training does not pass `feature_frame_version`/`feature_groups`. **Therefore this + slice calls the direct backtesting/forecasting services**, not batch/jobs. + +--- + +## BRAINSTORM / RERANK — Chosen Scope + +Chosen: **Option A — Backend foundation only** (new `model_selection` slice: pair +availability, candidate comparison, ranking/confidence, persisted audit, optional +train/predict, chart-ready payload). It covers every backend gap the eventual UI needs, +reuses mature primitives, creates a stable testable contract, and avoids frontend coupling. + +Non-goals (out of scope for this PRP): + +- No React page / shadcn UI / frontend routing. +- No agent tool, no `agent_require_approval` entry, no agent mutation surface. +- No alias auto-promotion (the selector may *recommend* a winner; alias mutation is a future + approval-gated PRP). +- No batch model-zoo retrofit. Use direct services for the single selected pair. + +--- + +## Goal + +**Feature Goal:** A backend-only Forecast Champion Selector vertical slice that, given one +store/product pair + window + horizon + candidate models, validates data availability, runs +comparable backtests for every candidate, deterministically ranks completed candidates, +computes a recommendation confidence with reasons, persists an auditable selection run, and +returns chart-ready comparison data plus optional final-model training and forecast output. + +**Deliverable:** `app/features/model_selection/` slice (`models.py`, `schemas.py`, +`ranking.py`, `explanations.py`, `service.py`, `routes.py`, `tests/`) + one Alembic migration +creating `model_selection_run`, wired in `app/main.py`. + +**Success Definition:** `POST /model-selection/run` with the default five candidates against +a seeded pair returns HTTP 200 with a persisted `selection_id`, a non-empty deterministic +`ranking`, a `winner`, a `recommendation_confidence`, and a `chart_data` payload; the row is +retrievable by `GET /model-selection/{selection_id}`; all validation gates pass. + +## Why + +- Business users want to ask "which model should I use for this store/product?" without + manually coordinating `/backtesting/run`, `/forecasting/train`, `/forecasting/predict`. +- The UI needs **one stable backend contract** rather than re-implementing ranking in TypeScript. +- A persisted selection run makes the model choice auditable: which models competed, which + window, which policy, and why the winner won. +- Keeps the single-host architecture intact — no queue, no cloud SDK, no new service. + +## What + +### New endpoints (all under `APIRouter(prefix="/model-selection", tags=["model-selection"])`) + +```http +GET /model-selection/availability?store_id=...&product_id=...&forecast_horizon=14 +POST /model-selection/run +GET /model-selection/{selection_id} +GET /model-selection/{selection_id}/ranking +POST /model-selection/{selection_id}/train-winner +POST /model-selection/{selection_id}/predict +``` + +### Core request shape (`POST /model-selection/run`) + +```json +{ + "store_id": 1, + "product_id": 1, + "selection_window": { "start_date": "2026-01-01", "end_date": "2026-05-31" }, + "forecast_horizon": 14, + "ranking_metric": "wape", + "split_config": { "strategy": "expanding", "n_splits": 5, "min_train_size": 30, "gap": 0, "horizon": 14 }, + "candidate_models": [ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + {"model_type": "moving_average", "params": {"window_size": 7}}, + {"model_type": "regression", "params": {}}, + {"model_type": "prophet_like", "params": {}} + ], + "feature_frame_version": 1, + "feature_groups": null, + "auto_train_winner": false, + "auto_predict": false +} +``` + +### LOCKED decisions (these remove every "choose one and test" ambiguity in the prior draft) + +1. **HTTP status codes:** `POST /model-selection/run` → **200** (synchronous, returns the + full result, mirrors `/backtesting/run` which is `status.HTTP_200_OK`). All GETs → 200. + `train-winner` / `predict` → 200. (201 is *not* used; the row is an audit side-effect, the + response is the computed result. Tests lock 200.) +2. **Availability gate:** if `availability.status == "unusable"`, **fail fast** — persist the + row as `status="failed"` with `error_message`, then raise `BadRequestError` (RFC 7807 **400**). + Nothing is ranked. +3. **All-candidates-fail (availability OK but every backtest errored):** **do NOT raise.** + Persist `status="failed"`, `ranking_result` with the failed entries, `winner=null`, and + return **200** with the failed-status response. Rationale: the run was validly attempted and + is an auditable outcome, not a client error. (Distinguish from #2: #2 is "we never started".) +4. **Per-candidate backtest config:** `BacktestConfig(split_config=req.split_config, + model_config_main=, include_baselines=False, store_fold_details=True)`. + `include_baselines=False` because each candidate is itself a `model_config_main` run — we do + not want N redundant baseline runs. `store_fold_details=True` so fold chart data is populated. +5. **`split_config.horizon` MUST equal `forecast_horizon`** (model-validator on the request). + The window dates from `selection_window` become `run_backtest`'s `start_date`/`end_date`. +6. **Ranking determinism:** primary = `ranking_metric` (default `"wape"`), then the fixed + tie-break chain `wape → smape → abs(bias) → mae → model_type`. With the default, the sort key + is exactly `(wape, smape, abs(bias), mae, model_type)` (success-criteria order). A non-default + `ranking_metric` puts that metric first, remaining chain follows excluding the duplicate. +7. **`auto_predict=True` requires `auto_train_winner=True`** (request model-validator) — predict + needs a freshly trained `final_model.model_path` from this run. + +### Success Criteria + +- [ ] `app/features/model_selection/` slice exists and is wired in `app/main.py`. +- [ ] `POST /model-selection/run` with the default five candidates returns a persisted + `status="completed"` (or `"partial"`) selection with `winner`, `ranking`, confidence, and `chart_data`. +- [ ] `GET /model-selection/availability` returns: `first_sales_date`, `last_sales_date`, + `observed_days`, `expected_calendar_days`, `coverage_ratio`, `missing_days`, + `zero_sale_days`, `promotion_days` (or `null` + warning), `average_daily_demand`, + `status` ∈ `{ready, limited, unusable}`, and `recommended_split_config`. +- [ ] Ranking is deterministic per LOCKED decision #6. +- [ ] Partial success supported (LOCKED #3): failed candidates appear in `ranking` with error + detail and are excluded from winner selection; a valid candidate still wins. +- [ ] `auto_train_winner=True` stores `final_model.model_path` via the **direct** + `ForecastingService.train_model`, preserving `feature_frame_version` + `feature_groups`. +- [ ] `auto_predict=True` (with train) returns forecast points + total/average demand summary. +- [ ] New migration creates `model_selection_run` with JSONB snapshots and named indexes; + `downgrade` drops indexes then table cleanly. +- [ ] `app/core/tests/test_strict_mode_policy.py` stays green for all new strict request schemas. +- [ ] No agent tools / `agent_require_approval` entries; no frontend files; no cloud SDK. + +## All Needed Context + +### Documentation & References + +```yaml +# PRP conventions +- file: PRPs/templates/prp_base.md + why: Base template (Goal/Context/Blueprint/Validation). NOTE — the user referenced a + "PRPs/prp-readme.md.md"; it does NOT exist (`find PRPs -iname '*readme*'` empty on 2026-06-01). +- file: PRPs/PRP-33-batch-runner-mvp.md + why: Strongest backend vertical-slice precedent — migration assertions, strict-mode gotchas, + route/test detail. Mirror its structure. +- file: PRPs/PRP-28-forecast-explainability-driver-attribution.md + why: Read/composition-slice precedent consuming existing contracts; deterministic explanation layer. +- docfile: PRPs/ai_docs/forecast-champion-selector-backend-research.md + why: External-lib + runtime verification (FastAPI APIRouter, Pydantic strict, JSONB, Alembic + create_index, sklearn TimeSeriesSplit). Versions: pydantic 2.12.5, sqlalchemy 2.0.46, + sklearn 1.8.0, fastapi 0.128.0, alembic 1.18.4. + +# Verified service contracts to reuse (DO NOT re-derive — exact signatures below in Gotchas) +- file: app/features/backtesting/service.py + why: BacktestingService().run_backtest(db, store_id, product_id, start_date, end_date, config). :213 +- file: app/features/backtesting/schemas.py + why: SplitConfig :24, BacktestConfig :81, BacktestResponse :257, ModelBacktestResult :180, + FoldResult :147. aggregated_metrics keys = {mae,rmse,smape,wape,bias}. +- file: app/features/backtesting/routes.py + why: EXACT route error-mapping pattern to mirror (try/except ValueError->BadRequestError, + SQLAlchemyError->DatabaseError; service instantiated as BacktestingService()). :60-140 +- file: app/features/forecasting/service.py + why: ForecastingService().train_model :247 (db first; feature_frame_version/feature_groups + keyword-only after *), predict :402 (NO db). Lazy cross-slice import precedent :55-61, :967. +- file: app/features/forecasting/schemas.py + why: ModelConfig union :417-429 (flat members, model_type discriminator, NO module-level helper); + TrainResponse.model_path :540; PredictResponse.forecasts :605; ForecastPoint :574. +- file: app/features/data_platform/models.py + why: Store :40 (business key `code`, not store_code), Product :68 (`sku`, `launch_date`), + SalesDaily :172 (date/store_id/product_id/quantity/unit_price/total_amount), Promotion :274. +- file: app/features/ops/service.py + why: Read-only cross-slice ORM aggregation precedent — module-scope ORM-model imports, stateless + service, db: AsyncSession per method, func.min/max/count/sum + group_by style. :225, :456. +- file: app/features/analytics/routes.py + why: validate_date_range :36 (raises BadRequestError, inverted-range + 730-day-max). CANNOT be + cross-slice imported — reimplement the two checks locally raising BadRequestError. +- file: app/core/exceptions.py + why: BadRequestError(400) :152, NotFoundError(404) :64, DatabaseError(500) :108, + ConflictError(409) :130, UnprocessableEntityError(422) :174. Each: (message=..., details=None). +- file: app/core/problem_details.py + why: RFC 7807 envelope; never raise bare HTTPException with raw strings. +- file: app/core/config.py + why: get_settings() cached singleton :225; Settings(BaseSettings) :62; add a plain typed attr + with literal default; env var = UPPER_SNAKE of the field name. +- file: app/core/database.py + why: Base (ORM declarative base) + get_db dependency used by routes/tests. +- file: app/shared/models.py + why: TimestampMixin (created_at/updated_at, server_default func.now(), updated_at onupdate). Mix in first. +- file: app/main.py + why: Router wiring — `from app.features..routes import router as _router` (:18-26), + `app.include_router(_router)` with NO prefix at include (:137-155), inside create_app(). +- file: app/core/tests/test_strict_mode_policy.py + why: AST policy — scans app/features/*/schemas.py; any ConfigDict(strict=True) model field typed + date/datetime/time/UUID/Decimal (anywhere in the annotation) MUST carry Field(strict=False, ...). + +# Migration / test patterns +- file: alembic/versions/c1d2e3f40512_create_batch_tables.py + why: JSONB via `from sqlalchemy.dialects import postgresql` -> postgresql.JSONB(astext_type=sa.Text()); + named CheckConstraint; op.create_index (op.f for single-col, explicit name for composite); + sa.DateTime(timezone=True) server_default sa.text("now()"); downgrade drops indexes THEN table. +- file: app/features/batch/models.py + why: ORM JSONB via `from sqlalchemy.dialects.postgresql import JSONB` (bare); Mapped[]+mapped_column; + status as String + default=Enum.PENDING.value + CheckConstraint in __table_args__; TimestampMixin. +- file: app/features/batch/schemas.py + why: Strict request pattern — ConfigDict(strict=True), Literal[...] for JSON enums, Field(strict=False) + on date fields (:132-133), @model_validator cross-field checks. +- file: app/features/explainability/tests/test_routes.py + why: ASGITransport + AsyncClient + app.dependency_overrides[get_db]; RFC 7807 4-key body assert; async tests. +- file: app/features/explainability/tests/conftest.py + why: Integration fixture — real engine from get_settings().database_url, prefix-scoped teardown in finally. + +# External official docs (verified in research doc) +- url: https://fastapi.tiangolo.com/tutorial/bigger-applications/ + why: APIRouter prefix/tags multi-file pattern. +- url: https://pydantic.dev/docs/validation/latest/concepts/strict_mode/ + why: strict mode + field-level Field(strict=False) override (runtime-verified, pydantic 2.12.5). +- url: https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#json-types + why: JSONB column type for audit snapshots. +- url: https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.create_index + why: create_index signature (alembic 1.18.4: index_name, table_name, columns, *, unique, **kw). +- url: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + why: split semantics (sklearn 1.8.0 signature: n_splits, *, max_train_size, test_size, gap). +``` + +### Current Codebase Tree (relevant slices) + +```bash +app/features/ +├── analytics/ # KPI/drilldown/timeseries; validate_date_range lives in routes.py (slice-local) +├── backtesting/ # single-pair single-model backtesting; fold/chart data via store_fold_details +├── batch/ # batch fan-out; pinned 5-key metrics; NO fold chart data +├── data_platform/ # shared ORM: Store, Product, SalesDaily, Promotion, InventorySnapshotDaily, ... +├── dimensions/ # store/product discovery +├── forecasting/ # direct train/predict; full ModelConfig union +├── jobs/ # train/predict/backtest job orchestration +├── ops/ # read-only cross-slice ORM aggregation precedent (OpsService) +└── registry/ # model runs, aliases, compare, artifact verify +alembic/versions/ # current head: c1d2e3f40512 (create_batch_tables) +``` + +### Desired Codebase Tree + +```bash +app/features/model_selection/ +├── __init__.py +├── models.py # ModelSelectionRun ORM + ModelSelectionStatus enum +├── schemas.py # strict request models + response models +├── ranking.py # PURE: normalize metrics, filter, rank, confidence +├── explanations.py # PURE: deterministic business summary + confidence_reasons +├── service.py # ModelSelectionService: availability + orchestration (lazy cross-slice imports) +├── routes.py # APIRouter(prefix="/model-selection") +└── tests/ + ├── __init__.py + ├── conftest.py + ├── test_models.py + ├── test_schemas.py + ├── test_ranking.py + ├── test_explanations.py + ├── test_service.py + ├── test_routes.py + └── test_routes_integration.py +alembic/versions/_create_model_selection_run.py +``` + +### Known Gotchas & VERIFIED Library/Internal Contracts + +```python +# ── VERIFIED INTERNAL SIGNATURES (exact, read 2026-06-01) ───────────────────── +# BacktestingService.__init__(self) -> None # takes NO db; instantiate as BacktestingService() +# await BacktestingService().run_backtest( +# db, store_id, product_id, start_date, end_date, config: BacktestConfig +# ) -> BacktestResponse # service.py:213 ; db is FIRST arg +# +# ForecastingService.__init__(self) -> None +# await ForecastingService().train_model( +# db, store_id, product_id, train_start_date, train_end_date, config: ModelConfig, +# *, feature_frame_version: int = 1, feature_groups: list[str] | None = None +# ) -> TrainResponse # service.py:247 ; .model_path is the artifact path +# await ForecastingService().predict( +# store_id, product_id, horizon, model_path # NO db arg — loads bundle from disk +# ) -> PredictResponse # service.py:402 ; .forecasts: list[ForecastPoint] +# # ForecastPoint: {date, forecast, lower_bound?, upper_bound?} +# GOTCHA: predict() REJECTS feature-aware models (service.py:491). For a feature-aware winner, +# auto_predict may raise; catch and surface a warning rather than failing the whole run. + +# ── METRIC KEYS — CORRECTED (draft was incomplete) ──────────────────────────── +# BacktestResponse.main_model_results.aggregated_metrics has FIVE keys: +# {"mae", "rmse", "smape", "wape", "bias"} # metrics.py:347 — draft MISSED "rmse" +# metric_std keys are SUFFIXED "{name}_stability" (a coefficient of variation, NOT raw std). +# sample_size is NOT in aggregated_metrics — derive it from fold actuals length +# (sum of len(fold.actuals) across fold_results) or n_folds; normalize in ranking.py. +# Fold chart data path: BacktestResponse.main_model_results.fold_results[i].{dates, actuals, predictions} +# populated ONLY when config.store_fold_details=True (LOCKED #4 sets it True). +# bucketed_aggregated_metrics lives on each ModelBacktestResult (optional, may be None). + +# ── ModelConfig CONSTRUCTION — members are FLAT, no nested "params" ──────────── +# The request uses {"model_type": "seasonal_naive", "params": {"season_length": 7}} but the +# ModelConfig members are FLAT (SeasonalNaiveModelConfig has model_type + season_length at top +# level). There is NO module-level TypeAdapter/helper. Build at the service boundary by FLATTENING: +# from pydantic import TypeAdapter +# from app.features.forecasting.schemas import ModelConfig +# _MODEL_CONFIG_ADAPTER = TypeAdapter(ModelConfig) +# cfg = _MODEL_CONFIG_ADAPTER.validate_python({"model_type": c.model_type, **c.params}) +# Members are frozen + extra="forbid", so unknown params raise a ValidationError (good — surfaces +# bad candidate params as a failed candidate with a reason). Do this import LAZILY in-method. +# Valid model_type values (full union, forecasting/schemas.py:417): naive, seasonal_naive, +# moving_average, weighted_moving_average, seasonal_average, trend_regression_baseline, +# random_forest, lightgbm, xgboost, regression, prophet_like. +# (lightgbm/xgboost are opt-in extras — may ImportError at runtime; treat as a failed candidate.) + +# ── CROSS-SLICE IMPORT RULE ─────────────────────────────────────────────────── +# Vertical-slice rule: app/features/X must not import app/features/Y at MODULE scope when it +# would close an alembic cold-boot cycle. model_selection is a NEW leaf (nothing imports it), but +# to match the BatchService/forecasting precedent and stay safe, import the SERVICE CLASSES +# (BacktestingService, ForecastingService) and the ModelConfig TypeAdapter LAZILY inside the +# methods that use them. Read ORM models (Store/Product/SalesDaily/Promotion) at module scope — +# that mirrors OpsService and is the sanctioned read-only ORM surface. + +# ── validate_date_range IS NOT IMPORTABLE ───────────────────────────────────── +# It lives in app/features/analytics/routes.py (slice-local). Reimplement the two checks locally +# (inverted range; max-span) raising app.core.exceptions.BadRequestError, OR rely on schema +# validators. Do NOT import across the slice boundary. +# NOTE: analytics' max-span is settings.analytics_max_date_range_days (configurable, ~730), not a +# hardcoded constant — pick your own local bound (or reuse the setting) when reimplementing. + +# ── STRICT-MODE POLICY (app/core/tests/test_strict_mode_policy.py) ──────────── +# Every request model with model_config = ConfigDict(strict=True) MUST add Field(strict=False, ...) +# to EVERY field typed date|datetime|time|UUID|Decimal (incl. inside Optional/Annotated/list/dict). +# Use Literal[...] for JSON enum strings (NOT a str-Enum — strict won't coerce). The AST walker does +# NOT follow inheritance, so set ConfigDict(strict=True) on each concrete request model directly. + +# ── ORM / MIGRATION QUIRKS ──────────────────────────────────────────────────── +# JSONB import DIFFERS by layer: +# migration: from sqlalchemy.dialects import postgresql -> postgresql.JSONB(astext_type=sa.Text()) +# ORM: from sqlalchemy.dialects.postgresql import JSONB -> mapped_column(JSONB) +# Status enum enforced via CheckConstraint("status IN (...)", name="ck_...") in BOTH migration and +# ORM __table_args__; ORM column is String(N) with default=ModelSelectionStatus.PENDING.value. +# created_at/updated_at come from TimestampMixin (app/shared/models.py) — declare class as +# `class ModelSelectionRun(TimestampMixin, Base)` (mixin FIRST). Declare completed_at explicitly. +# Migration down_revision: chain to the CURRENT head at implementation time (observed c1d2e3f40512); +# run `uv run alembic heads` to confirm — do NOT hardcode this PRP's observed value blindly. + +# ── DATA-PLATFORM COLUMN NAMES (availability aggregation) ───────────────────── +# Store.id (int PK), Store.code (business key). Product.id, Product.sku, Product.launch_date (date|None). +# SalesDaily: .date (Date FK calendar.date), .store_id, .product_id, .quantity (Integer, CHECK >=0), +# .unit_price (Numeric), .total_amount (Numeric). Grain unique (date, store_id, product_id). +# => For ONE pair: count(distinct date) == count(*); zero_sale_days = count where quantity == 0. +# Promotion: per-product (product_id NOT NULL), store_id NULLABLE (NULL = CHAIN-WIDE, applies to all +# stores), date RANGE [start_date, end_date], kind in {pct_off,bogo,bundle,markdown}. To count +# promotion_days for (store, product) within the window, JOIN promotion to the pair's sales dates +# ON sd.date BETWEEN p.start_date AND p.end_date AND p.product_id=? AND (p.store_id=? OR p.store_id IS NULL), +# then COUNT(DISTINCT sd.date). If this proves complex/edge-casey, return promotion_days=None with a +# warning string (acceptable per Success Criteria) — do NOT sum (end-start) per row (double-counts overlaps). + +# ── RUNTIME-VERIFIED LIBRARY FACTS (research doc) ───────────────────────────── +# Pydantic 2.12.5 accepts Field(strict=False) date string under a strict model. sklearn 1.8.0 +# TimeSeriesSplit(n_splits, *, max_train_size, test_size, gap). FastAPI 0.128.0 APIRouter(prefix=...). +# Alembic 1.18.4 Operations.create_index(index_name, table_name, columns, *, unique, **kw). +``` + +## Implementation Blueprint + +### Data Models and Schemas + +`app/features/model_selection/models.py`: + +```python +from datetime import date, datetime +from enum import Enum +from typing import Any + +from sqlalchemy import CheckConstraint, Date, DateTime, Index, Integer, String +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class ModelSelectionStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + PARTIAL = "partial" + FAILED = "failed" + + +class ModelSelectionRun(TimestampMixin, Base): # TimestampMixin FIRST → created_at/updated_at + __tablename__ = "model_selection_run" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + selection_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + store_id: Mapped[int] = mapped_column(Integer, index=True) + product_id: Mapped[int] = mapped_column(Integer, index=True) + start_date: Mapped[date] = mapped_column(Date) + end_date: Mapped[date] = mapped_column(Date) + forecast_horizon: Mapped[int] = mapped_column(Integer) + ranking_metric: Mapped[str] = mapped_column(String(20)) + status: Mapped[str] = mapped_column(String(20), default=ModelSelectionStatus.PENDING.value, index=True) + candidate_models: Mapped[list[dict[str, Any]]] = mapped_column(JSONB) + policy_snapshot: Mapped[dict[str, Any]] = mapped_column(JSONB) + availability_snapshot: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + ranking_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + winner_model_type: Mapped[str | None] = mapped_column(String(40), nullable=True) + winner_metrics: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + final_model_path: Mapped[str | None] = mapped_column(String(512), nullable=True) + forecast_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + business_summary: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + + __table_args__ = ( + CheckConstraint( + "status IN ('pending','running','completed','partial','failed')", + name="ck_model_selection_run_valid_status", + ), + Index("ix_model_selection_run_store_product_created", "store_id", "product_id", "created_at"), + Index("ix_model_selection_run_status_created", "status", "created_at"), + ) +``` + +`app/features/model_selection/schemas.py` — strict request models + response models: + +- `SelectionWindow(start_date, end_date)` — `ConfigDict(strict=True)`, both dates `Field(strict=False, ...)`. +- `CandidateModelConfig(model_type: Literal[<11 model_types>], params: dict[str, Any] = {})`. +- `RankingPolicy(minimum_sample_size: int = 0, high_confidence_rel_improvement: float = 0.10, + max_acceptable_abs_bias: float = ...)` — defaults; snapshotted into `policy_snapshot`. +- `ModelSelectionRunRequest` — `ConfigDict(strict=True)`; fields: `store_id`, `product_id`, + `selection_window`, `forecast_horizon` (int, ge=1, le=90), `ranking_metric: Literal["wape","smape","mae","bias"]="wape"`, + `split_config: SplitConfig` (reuse backtesting's? — see NOTE), `candidate_models: list` (min_length=1, max_length=10), + `feature_frame_version: int = 1` (ge=1, le=2), `feature_groups: list[str] | None = None`, + `ranking_policy: RankingPolicy = Field(default_factory=RankingPolicy)`, + `auto_train_winner: bool = False`, `auto_predict: bool = False`. + - `@model_validator(mode="after")`: `split_config.horizon == forecast_horizon` (LOCKED #5); + `auto_predict implies auto_train_winner` (LOCKED #7). + - NOTE on `split_config`: `backtesting.schemas.SplitConfig` is `frozen=True, extra="forbid"` + (NOT strict). Either (a) reuse it directly (import lazily is unnecessary for a schema type — + it's safe at module scope since backtesting.schemas has no cycle back to model_selection), or + (b) define a local `SplitSettings` mirror. **Prefer reusing `SplitConfig`** to avoid drift; it + already validates n_splits/min_train_size/gap/horizon. Since it is not strict-mode, its `date`-free + fields don't trip the strict-mode linter. +**Response + intermediate models (plain `BaseModel` — outputs don't need `strict=True`). These +fields ARE the stable contract the UI consumes; specify them exactly, do not improvise.** + +```python +# ── intermediate (service-internal, also embedded in JSONB) ─────────────────── +class CandidateResult(BaseModel): # what shape_candidate()/shape_failed_candidate() return + model_type: str + params: dict[str, Any] # ORIGINAL candidate params — REQUIRED so the winner can be rebuilt (pseudocode L667) + failed: bool + error: str | None = None # reason when failed=True + aggregated_metrics: dict[str, float] | None = None # raw 5-key dict from backtest (mae,rmse,smape,wape,bias) or None + sample_size: int = 0 # RULE: sum(len(fold.actuals)) across main_model_results.fold_results + config_hash: str | None = None + folds: list[FoldChart] = [] # per-fold chart points (empty when failed) + +class FoldChart(BaseModel): + fold_index: int + dates: list[date] + actuals: list[float] + predictions: list[float] + +class ModelRankEntry(BaseModel): # one row in the ranking table (valid OR excluded) + rank: int | None # 1-based; None when excluded/failed + model_type: str + params: dict[str, Any] # carried through (see CandidateResult.params) + included: bool # False = failed or filtered out + exclusion_reason: str | None = None + metrics: dict[str, float] | None = None # normalized {wape,smape,mae,rmse,bias,sample_size} + +class RankingResult(BaseModel): # Pydantic (model_dump'd into ranking_result JSONB, L663) + winner: ModelRankEntry | None + entries: list[ModelRankEntry] # ALL candidates, ranked-then-failed, never hidden + confidence: Literal["high", "medium", "low"] + reasons: list[str] + +class WinnerSummary(BaseModel): + model_type: str + params: dict[str, Any] + metrics: dict[str, float] # normalized winner metrics + rank: int # always 1 + +class ChartData(BaseModel): # chart-ready comparison payload (Success Criteria deliverable) + wape_by_model: dict[str, float] # {model_type: wape} → WAPE bar chart + bias_by_model: dict[str, float] # {model_type: bias} → bias chart + fold_stability: dict[str, list[float]] # {model_type: per-fold wape} → stability lines + winner_actual_vs_predicted: list[FoldChart] # the WINNER's folds only → actual-vs-predicted overlay + +class PairAvailabilityResponse(BaseModel): + store_id: int + product_id: int + first_sales_date: date | None + last_sales_date: date | None + observed_days: int + expected_calendar_days: int + coverage_ratio: float + missing_days: int + zero_sale_days: int + promotion_days: int | None # None + a warning when not safely derivable + average_daily_demand: float # CAST float(...) — func.avg over Integer quantity returns Decimal + status: Literal["ready", "limited", "unusable"] + recommended_split_config: SplitConfig # reuse backtesting.schemas.SplitConfig + warnings: list[str] = [] + +class ForecastSummary(BaseModel): + points: list[dict[str, Any]] # ForecastPoint.model_dump(mode="json") list + total_demand: float + average_demand: float + horizon: int + +class ModelSelectionRunResponse(BaseModel): # THE /run + /{id} contract + selection_id: str + store_id: int + product_id: int + status: Literal["pending", "running", "completed", "partial", "failed"] + selection_window: SelectionWindow + forecast_horizon: int + ranking_metric: str + availability: PairAvailabilityResponse | None + ranking: list[ModelRankEntry] # == RankingResult.entries + winner: WinnerSummary | None + recommendation_confidence: Literal["high", "medium", "low"] | None # CANONICAL KEY (maps from RankingResult.confidence) + confidence_reasons: list[str] # == RankingResult.reasons + chart_data: ChartData | None + final_model: dict[str, Any] | None # {"model_path": ...} when auto_train_winner + forecast: ForecastSummary | None # when auto_predict + business_summary: dict[str, Any] | None + error_message: str | None + created_at: datetime + completed_at: datetime | None + +class TrainWinnerResponse(BaseModel): + selection_id: str + model_type: str + model_path: str + +class PredictWinnerResponse(BaseModel): + selection_id: str + forecast: ForecastSummary +``` + +> **NAMING (resolves the only internal-consistency nit):** the response key is +> **`recommendation_confidence`** (Success Criteria + manual probe + Goal all use it). +> `RankingResult.confidence` is the service-internal field; `_response()` maps +> `RankingResult.confidence → ModelSelectionRunResponse.recommendation_confidence` and +> `RankingResult.reasons → confidence_reasons`. Tests assert the response key +> `recommendation_confidence`. + +> **`self._response(row, ranking)` helper:** pure mapping `ModelSelectionRun` ORM row + +> `RankingResult` → `ModelSelectionRunResponse` (rehydrate `availability_snapshot`/`ranking_result`/ +> `business_summary`/`forecast_result` JSONB back into the response models; build `chart_data` from +> the per-candidate `CandidateResult.folds` + normalized metrics; map the confidence keys per above). + +### Implementation Tasks (dependency-ordered) + +```yaml +Task 1 — Migration + ORM: + RUN: uv run alembic heads # confirm current head (observed c1d2e3f40512) + CREATE alembic/versions/_create_model_selection_run.py: + - down_revision = "" + - MIRROR alembic/versions/c1d2e3f40512_create_batch_tables.py exactly: + - from sqlalchemy.dialects import postgresql -> postgresql.JSONB(astext_type=sa.Text()) + - sa.DateTime(timezone=True), server_default=sa.text("now()") for created_at/updated_at + - CheckConstraint name="ck_model_selection_run_valid_status" + - op.create_index(op.f("ix_model_selection_run_selection_id"), ..., unique=True) + - op.create_index("ix_model_selection_run_store_product_created", ..., ["store_id","product_id","created_at"]) + - op.create_index("ix_model_selection_run_status_created", ..., ["status","created_at"]) + - downgrade(): drop indexes (reverse order) THEN op.drop_table("model_selection_run") + CREATE app/features/model_selection/models.py: # as blueprint above; mirror batch/models.py + +Task 2 — Schemas: + CREATE app/features/model_selection/schemas.py: + - all REQUEST models ConfigDict(strict=True); date fields Field(strict=False, ...) + - Literal[...] for model_type + ranking_metric (NOT str-Enum) + - candidate_models min_length=1 max_length=10 (or settings.model_selection_max_candidates) + - @model_validator: horizon match (LOCKED #5) + auto_predict implies auto_train_winner (LOCKED #7) + - reuse backtesting.schemas.SplitConfig (module-scope import OK; no cycle) + +Task 3 — Ranking pure logic: + CREATE app/features/model_selection/ranking.py: + - NormalizedMetrics dataclass {wape, smape, mae, rmse, bias, sample_size} + - normalize_metrics(aggregated_metrics, sample_size) -> NormalizedMetrics | None + (None when the primary metric is missing OR NaN — use math.isnan guard; np.nan can appear, + metrics.py:381; keys are mae/rmse/smape/wape/bias) + - input: list[CandidateResult] (Task-2 schema). Each entry CARRIES model_type + params through to + ModelRankEntry/WinnerSummary so the winner can be rebuilt (pseudocode L667 reads winner.params). + - filter: not failed AND numeric primary metric AND sample_size >= policy.minimum_sample_size + - rank key (default ranking_metric="wape"): (wape, smape, abs(bias), mae, model_type) [LOCKED #6] + - confidence (PIN the rel-improvement formula — denominator is the SECOND-place value): + rel_improvement = (second.wape - winner.wape) / second.wape # guard second.wape == 0 → treat as 0.0 + HIGH : >=2 valid AND rel_improvement >= policy.high_confidence_rel_improvement (default 0.10) + AND abs(winner.bias) <= policy.max_acceptable_abs_bias AND winner.sample_size sufficient + MEDIUM: a valid winner exists but HIGH not met (narrow lead OR mild warnings) and >=2 valid + LOW : exactly one valid candidate, OR availability "limited", OR abs(bias) over threshold, + OR rel_improvement < some near-tie epsilon (document the epsilon as a module constant) + - emit human-readable reasons[] strings explaining the chosen level (consumed as confidence_reasons) + - return RankingResult(winner, entries[ALL ranked-then-failed, never hidden], confidence, reasons) + +Task 4 — Business explanation pure logic: + CREATE app/features/model_selection/explanations.py: + - explain_winner(ranking, availability) -> business_summary dict + confidence_reasons + warnings + - translate WAPE/sMAPE/MAE/bias into short deterministic English; NO LLM, NO external call + +Task 5 — Pair availability: + CREATE ModelSelectionService.get_availability(db, store_id, product_id, forecast_horizon, split_config?) -> PairAvailabilityResponse: + - verify Store and Product exist (NotFoundError if absent) via data_platform ORM (module-scope import OK) + - aggregate SalesDaily for the pair (SQLAlchemy 2.0 async, mirror OpsService style): + select(func.min(SalesDaily.date), func.max(SalesDaily.date), + func.count(func.distinct(SalesDaily.date)), func.sum(SalesDaily.quantity), + func.avg(SalesDaily.quantity), + func.count().filter(SalesDaily.quantity == 0)) # FILTER aggregate; valid async idiom + .where(SalesDaily.store_id == store_id, SalesDaily.product_id == product_id) + # CAST: func.avg over Integer quantity returns Decimal; wrap average_daily_demand in float(...). + # func.count().filter(...) is a Postgres FILTER aggregate (not shown in OpsService, but supported); + # alternatively a second scalar count with .where(quantity == 0). One round-trip is fine. + - expected_calendar_days = (max_date - min_date).days + 1 + - coverage_ratio = observed_days / expected_calendar_days (guard div-by-zero / no rows) + - missing_days = expected_calendar_days - observed_days + - promotion_days: JOIN promotion ON date BETWEEN start/end AND product_id match AND + (store_id == X OR store_id IS NULL); COUNT(DISTINCT date). On any doubt → None + warning. + - status (LOCKED thresholds): + ready if observed_days >= min_train_size + horizon*n_splits AND coverage_ratio >= 0.8 + limited if observed_days >= min_train_size + horizon + unusable otherwise + - recommended_split_config: expanding, n_splits=min(5, feasible), min_train_size=30 (or adjusted), + gap=0, horizon=forecast_horizon + - NO rows for the pair -> status="unusable" with zeros/None and a warning + +Task 6 — Orchestration: + CREATE ModelSelectionService.run_selection(db, request) -> ModelSelectionRunResponse: + - persist ModelSelectionRun(selection_id=uuid4().hex, status="running", snapshots); flush + - availability = get_availability(...); persist snapshot + - if availability.status == "unusable": status="failed", error_message, flush, raise BadRequestError [LOCKED #2] + - for each candidate (LAZY import services + ModelConfig adapter): + try: cfg = flatten+validate ModelConfig; bt = await BacktestingService().run_backtest( + db, store_id, product_id, window.start, window.end, + BacktestConfig(split_config=req.split_config, model_config_main=cfg, + include_baselines=False, store_fold_details=True)) + collect aggregated_metrics, sample_size, fold dates/actuals/predictions for chart + except Exception as exc: append failed entry with reason=str(exc) [never hide — Anti-Patterns] + - ranking = rank_candidates(results, req.ranking_policy, req.ranking_metric) + - if ranking.winner is None: status="failed", persist ranking_result, flush, RETURN 200 response [LOCKED #3] + - if req.auto_train_winner: + train = await ForecastingService().train_model(db, store_id, product_id, window.start, window.end, + winner_cfg, feature_frame_version=req.feature_frame_version, feature_groups=req.feature_groups) + row.final_model_path = train.model_path + - if req.auto_predict: # requires auto_train_winner (validated) + try: pred = await ForecastingService().predict(store_id, product_id, req.forecast_horizon, row.final_model_path) + row.forecast_result = pred.model_dump(mode="json") + except : warning, leave forecast_result None + - business_summary = explain_winner(ranking, availability) + - status = "partial" if any candidate failed else "completed"; completed_at = datetime.now(UTC) + - persist all JSONB via model_dump(mode="json"); flush + refresh; return response_from_row(row) + ADD methods: get_selection(db, selection_id)->row|NotFoundError ; get_ranking ; train_winner ; predict_winner + +Task 7 — Routes: + CREATE app/features/model_selection/routes.py: + - router = APIRouter(prefix="/model-selection", tags=["model-selection"]) + - GET /availability ; POST /run (200) ; GET /{selection_id} ; GET /{selection_id}/ranking ; + POST /{selection_id}/train-winner ; POST /{selection_id}/predict + - MIRROR backtesting/routes.py error mapping EXACTLY: + service instantiated locally; try/except ValueError->BadRequestError(str(e)), + SQLAlchemyError->DatabaseError("...", details={"error": str(e)}); NotFoundError from service bubbles. + - structured logger.info events (see Integration Points) + MODIFY app/main.py: + - `from app.features.model_selection.routes import router as model_selection_router` (alpha order with siblings) + - `app.include_router(model_selection_router)` inside create_app(), near backtesting/forecasting (NO prefix arg) + +Task 8 — Tests (see Validation Loop for required names): + CREATE app/features/model_selection/tests/{conftest,test_models,test_schemas,test_ranking, + test_explanations,test_service,test_routes,test_routes_integration}.py + - unit route tests: ASGITransport + app.dependency_overrides[get_db]=AsyncMock; 4-key RFC7807 assert + - service tests: mock BacktestingService/ForecastingService (patch the lazy import targets) for + happy/partial/all-fail/auto-train/auto-predict paths + - integration tests (@pytest.mark.integration): real engine, prefix-scoped teardown in finally +``` + +### Pseudocode (CRITICAL details only) + +```python +# ranking.py — deterministic, pure +def rank_candidates(results, policy, ranking_metric="wape"): + valid, failed = [], [] + for r in results: + m = normalize_metrics(r.aggregated_metrics, r.sample_size) # keys: mae,rmse,smape,wape,bias + if m is None or m.sample_size < policy.minimum_sample_size: + failed.append(r.as_failed("missing/NaN primary metric or sample_size below minimum")) + continue + valid.append((r, m)) + if not valid: + return RankingResult(winner=None, entries=failed, confidence="low", reasons=["no valid candidate"]) + primary = lambda m: getattr(m, ranking_metric) if ranking_metric != "bias" else abs(m.bias) + ordered = sorted(valid, key=lambda p: (primary(p[1]), p[1].smape, abs(p[1].bias), p[1].mae, p[0].model_type)) + winner = ordered[0] + return build_ranking_result(ordered, failed, policy) # computes confidence vs 2nd place +``` + +```python +# service.py — orchestration (exact verified service calls) +async def run_selection(self, db, req): + from pydantic import TypeAdapter # lazy + from app.features.backtesting.schemas import BacktestConfig # lazy + from app.features.backtesting.service import BacktestingService # lazy + from app.features.forecasting.schemas import ModelConfig # lazy + from app.features.forecasting.service import ForecastingService # lazy + adapter = TypeAdapter(ModelConfig) + + row = ModelSelectionRun(selection_id=uuid.uuid4().hex, status="running", + store_id=req.store_id, product_id=req.product_id, + start_date=req.selection_window.start_date, end_date=req.selection_window.end_date, + forecast_horizon=req.forecast_horizon, ranking_metric=req.ranking_metric, + candidate_models=[c.model_dump() for c in req.candidate_models], + policy_snapshot=req.ranking_policy.model_dump(mode="json")) + db.add(row); await db.flush() + + availability = await self.get_availability(db, req.store_id, req.product_id, req.forecast_horizon, req.split_config) + row.availability_snapshot = availability.model_dump(mode="json") + if availability.status == "unusable": + row.status = "failed"; row.error_message = "Insufficient data for model selection" + await db.flush(); raise BadRequestError(message=row.error_message) # LOCKED #2 + + results = [] + for c in req.candidate_models: + try: + cfg = adapter.validate_python({"model_type": c.model_type, **c.params}) # FLATTEN + bt = await BacktestingService().run_backtest( + db, req.store_id, req.product_id, + req.selection_window.start_date, req.selection_window.end_date, + BacktestConfig(split_config=req.split_config, model_config_main=cfg, + include_baselines=False, store_fold_details=True)) # LOCKED #4 + results.append(shape_candidate(c, bt)) + except Exception as exc: + results.append(shape_failed_candidate(c, exc)) + + ranking = rank_candidates(results, req.ranking_policy, req.ranking_metric) + row.ranking_result = ranking.model_dump(mode="json") + if ranking.winner is None: + row.status = "failed"; await db.flush(); return self._response(row, ranking) # LOCKED #3 (HTTP 200) + + winner_cfg = adapter.validate_python({"model_type": ranking.winner.model_type, **ranking.winner.params}) + if req.auto_train_winner: + train = await ForecastingService().train_model( + db, req.store_id, req.product_id, req.selection_window.start_date, req.selection_window.end_date, + winner_cfg, feature_frame_version=req.feature_frame_version, feature_groups=req.feature_groups) + row.final_model_path = train.model_path + if req.auto_predict and row.final_model_path: + try: + pred = await ForecastingService().predict(req.store_id, req.product_id, req.forecast_horizon, row.final_model_path) + row.forecast_result = pred.model_dump(mode="json") + except Exception as exc: # e.g. feature-aware reject (forecasting service.py:491) + row.forecast_result = None # surface a warning in business_summary + + row.winner_model_type = ranking.winner.model_type + row.winner_metrics = ranking.winner.metrics + row.business_summary = explain_winner(ranking, availability) + row.status = "partial" if any(r.failed for r in results) else "completed" + row.completed_at = datetime.now(UTC) + await db.flush(); await db.refresh(row) + return self._response(row, ranking) +``` + +### Integration Points + +```yaml +DATABASE: + - migration: add `model_selection_run` (JSONB snapshots: candidate_models, policy_snapshot, + availability_snapshot, ranking_result, winner_metrics, forecast_result, business_summary) + - indexes: ix_model_selection_run_selection_id (unique), ix_model_selection_run_store_product_created, + ix_model_selection_run_status_created +ROUTES: + - app/main.py: import + app.include_router(model_selection_router) (router carries its own prefix) +CONFIG (optional — only if used; then ADD to .env.example with UPPER_SNAKE + a comment, and a test): + - model_selection_max_candidates: int = 10 + - model_selection_min_coverage_ratio: float = 0.8 + - model_selection_default_min_train_size: int = 30 +OBSERVABILITY (structlog events, mirror ops/backtesting naming): + - model_selection.run_received / .availability_checked / .candidate_completed / + .candidate_failed / .run_completed / .run_failed +``` + +## Validation Loop + +### Level 1 — Focused syntax & policy + +```bash +uv run ruff check app/features/model_selection app/main.py alembic/versions +uv run ruff format --check app/features/model_selection app/main.py alembic/versions +uv run mypy app/features/model_selection app/main.py +uv run pyright app/features/model_selection app/main.py +uv run pytest app/core/tests/test_strict_mode_policy.py -v +``` + +### Level 2 — Focused unit tests + +```bash +uv run pytest app/features/model_selection/tests -v -m "not integration" +``` + +Required test names: + +- `test_schema_accepts_iso_dates_under_strict_model` (JSON path: `Model.model_validate({"start_date":"2026-01-01",...})`) +- `test_schema_rejects_auto_predict_without_train_winner` +- `test_schema_rejects_horizon_mismatch_between_split_and_forecast` +- `test_rank_candidates_wape_smape_abs_bias_mae_tie_break` +- `test_rank_candidates_excludes_missing_or_nan_metrics` +- `test_rank_candidates_normalizes_five_metric_keys_including_rmse` +- `test_confidence_high_when_winner_beats_second_by_10_percent` +- `test_availability_ready_limited_unusable_thresholds` +- `test_build_model_config_flattens_params` (e.g. seasonal_naive + {"season_length":7}) +- `test_run_selection_partial_success_chooses_valid_winner` +- `test_run_selection_all_candidates_fail_returns_failed_status_not_500` (LOCKED #3) +- `test_run_selection_unusable_availability_raises_bad_request` (LOCKED #2) +- `test_run_selection_auto_train_passes_feature_frame_version_and_groups` +- `test_routes_return_problem_json_on_bad_request` (4-key RFC 7807 body) +- `test_response_uses_recommendation_confidence_key` (NOT `confidence`; maps from `RankingResult.confidence`) +- `test_winner_entry_carries_params_for_rebuild` (`ModelRankEntry.params` / `WinnerSummary.params` preserved) +- `test_chart_data_has_wape_bias_fold_stability_and_winner_actual_vs_predicted` + +### Level 3 — Migration & integration + +```bash +docker compose up -d +uv run alembic upgrade head +uv run pytest app/features/model_selection/tests -v -m integration +uv run alembic downgrade -1 && uv run alembic upgrade head # downgrade/upgrade round-trips cleanly +``` + +Integration expectations: + +- `model_selection_run` exists with the three named indexes. +- `POST /model-selection/run` persists a row; `GET /model-selection/{selection_id}` returns the same id. +- Availability detects an inserted pair with enough history (`ready`) and a too-short pair (`limited`/`unusable`). +- Partial failure persists the failed candidate reason and still ranks a valid winner. + +### Level 4 — Full backend gates (must be green before PR) + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +uv run pytest -v -m "not integration" +uv run pytest -v -m integration +``` + +> Known-local-noise: mypy/pyright report pre-existing `lightgbm`/`xgboost` optional-dep import +> errors in `forecasting/`+`registry/` (untouched here; CI installs the extras). Do not "fix" them. + +### Manual API probe (seeded DB; discover real store/product ids + date window first — IDs are +not guaranteed 1-based, see memory `seeder-does-not-reset-id-sequences`) + +```bash +uv run uvicorn app.main:app --port 8123 & +curl -s "http://localhost:8123/model-selection/availability?store_id=5&product_id=8&forecast_horizon=14" | python3 -m json.tool +curl -s -X POST http://localhost:8123/model-selection/run -H "Content-Type: application/json" -d '{ + "store_id": 5, "product_id": 8, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": {"strategy":"expanding","n_splits":5,"min_train_size":30,"gap":0,"horizon":14}, + "candidate_models": [ + {"model_type":"naive","params":{}}, + {"model_type":"seasonal_naive","params":{"season_length":7}}, + {"model_type":"moving_average","params":{"window_size":7}}, + {"model_type":"regression","params":{}}, + {"model_type":"prophet_like","params":{}} + ], + "auto_train_winner": false, "auto_predict": false +}' | python3 -m json.tool +``` + +Expected: HTTP 200; response carries `selection_id`, non-empty `ranking`, `winner.model_type`, +`recommendation_confidence`, `chart_data`. + +## Final Validation Checklist + +- [ ] New slice follows `app/features//{models,schemas,service,routes,tests}.py`. +- [ ] Router wired in `app/main.py` (import alias + `include_router`, no prefix at include). +- [ ] Migration `down_revision` chains to the live head; downgrade drops indexes then table. +- [ ] Request schemas use `ConfigDict(strict=True)` + `Field(strict=False)` for every date field; strict-mode test green. +- [ ] All 4xx responses use project exceptions (`BadRequestError`/`NotFoundError`/`DatabaseError`) → RFC 7807. +- [ ] Ranking + explanation logic is pure and unit-tested; normalizer handles all five metric keys incl. `rmse`. +- [ ] Availability covered for ready/limited/unusable + no-rows. +- [ ] `auto_train_winner` uses direct `ForecastingService.train_model` (db first, feature args keyword-only). +- [ ] `auto_predict` handles feature-aware-reject gracefully (warning, not 500). +- [ ] LOCKED decisions #1–#7 are implemented and tested. +- [ ] No frontend files, no agent mutation surface, no managed-cloud SDK. +- [ ] All four Level-4 gates pass; `gh issue view ` confirms the referenced issue is open. + +## Anti-Patterns to Avoid + +- Don't implement the React UI; don't rank models in TypeScript — backend owns ranking/confidence. +- Don't use batch item metrics for fold-level chart data (batch has none) — use direct `BacktestingService` with `store_fold_details=True`. +- Don't import sibling feature *services* at module scope — lazy in-method (matches forecasting/BatchService precedent). ORM *models* at module scope is fine (OpsService precedent). +- Don't import `validate_date_range` from analytics — reimplement locally. +- Don't pass the candidate `params` as a nested dict to `ModelConfig` — FLATTEN (`{"model_type":..., **params}`). +- Don't assume four metric keys — there are five (`rmse` included); normalize, never index a raw shape blindly. +- Don't sum `(end_date - start_date)` for promotion days (double-counts overlaps; ignores chain-wide `store_id IS NULL`). +- Don't mutate aliases automatically; don't add an agent tool. +- Don't hide failed candidates — include them with `reason`. +- Don't use an LLM for explanations — deterministic text only. +- Don't raise on all-candidates-fail (LOCKED #3 → persist failed + return 200); DO raise on unusable availability (LOCKED #2 → 400). +- Don't build SQL with string concatenation; don't weaken strict-mode or leakage tests. + +## Confidence Score + +**9.5/10** for one-pass backend implementation success. The prior draft self-rated 8/10 with +"service signatures must be rechecked at implementation time" as the top risk — that risk is now +**retired**: every `run_backtest` / `train_model` / `predict` signature, the corrected five-key +metric shape, the `ModelConfig` flattening, the strict-mode rule, the migration/JSONB/exception +patterns, and seven previously-ambiguous decisions are verified and locked here. An independent +quality-gate pass confirmed every cited signature/line-number/field-name against live source +("tried to break the cited signatures and could not") and its findings — the full response/ +intermediate contract (`CandidateResult`, `ModelRankEntry`, `RankingResult`, `WinnerSummary`, +`ChartData`, `ModelSelectionRunResponse`, …), the `recommendation_confidence` naming, the +`winner.params` carry-through, the `_response` mapping, and the rel-improvement denominator — are +now specified inline. + +Residual risks: + +- Per-candidate backtest runtime: five models × a multi-fold backtest is synchronous in-process. + On a slow host the `/run` request can be slow (acceptable for a single pair; mirrors + `/backtesting/run`). If it becomes a problem, a future PRP can move it behind the jobs slice. +- `promotion_days` derivation has real edge cases (chain-wide promos, overlapping ranges); the + PRP explicitly permits `null + warning` as a correct fallback. +- `lightgbm`/`xgboost` candidates can `ImportError` when extras are absent — they degrade to a + failed candidate with a reason (verified path), not a 500. From 5a49fe323be919e7ae255fbc5cad83680e8715d8 Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 08:49:57 +0200 Subject: [PATCH 25/30] fix(ui): avoid setState-in-effect in RunHistoryStrip to unblock lint (#357) --- .../src/components/demo/RunHistoryStrip.tsx | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/frontend/src/components/demo/RunHistoryStrip.tsx b/frontend/src/components/demo/RunHistoryStrip.tsx index 5605879c..fce287ba 100644 --- a/frontend/src/components/demo/RunHistoryStrip.tsx +++ b/frontend/src/components/demo/RunHistoryStrip.tsx @@ -63,26 +63,35 @@ export function RunHistoryStrip({ onReplay, summary, scenario }: RunHistoryStrip const [items, setItems] = useState(() => loadHistory()) const [lastSummary, setLastSummary] = useState(null) - useEffect(() => { - if (!summary || summary === lastSummary) return - // Persist exactly once per pipeline_complete summary (R18). - const entry: RunHistoryItem = { - id: crypto.randomUUID(), - runId: summary.winningRunId, - timestamp: new Date().toISOString(), - scenario, - status: summary.overallStatus, - wallClockS: summary.wallClockS, - } - const next = [entry, ...items].slice(0, HISTORY_CAP) - setItems(next) - saveHistory(next) + // Append exactly once per pipeline_complete summary (R18). Done DURING render + // (the React "storing information from previous renders" pattern) rather than + // in an effect — calling setState synchronously inside an effect body causes + // cascading renders and is flagged by react-hooks/set-state-in-effect. + if (summary && summary !== lastSummary) { setLastSummary(summary) - }, [summary, lastSummary, items, scenario]) + setItems((prev) => + [ + { + id: crypto.randomUUID(), + runId: summary.winningRunId, + timestamp: new Date().toISOString(), + scenario, + status: summary.overallStatus, + wallClockS: summary.wallClockS, + }, + ...prev, + ].slice(0, HISTORY_CAP), + ) + } + + // Persist the history to localStorage whenever it changes — syncing React + // state to an external system is the sanctioned use of an effect. + useEffect(() => { + saveHistory(items) + }, [items]) const clear = useCallback(() => { setItems([]) - saveHistory([]) }, []) if (items.length === 0) return null From f1bb4edb6bad824ce766060a5a6bece1fcc19c7a Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 09:01:33 +0200 Subject: [PATCH 26/30] feat(ui): add forecast champion selector foundation (#356) --- app/features/model_selection/capabilities.py | 157 ++++++++++ app/features/model_selection/routes.py | 17 + app/features/model_selection/schemas.py | 26 ++ app/features/model_selection/service.py | 14 + .../tests/test_capabilities.py | 102 ++++++ .../model_selection/tests/test_routes.py | 50 +++ frontend/src/App.tsx | 9 + .../availability-panel.test.tsx | 76 +++++ .../champion-selector/availability-panel.tsx | 146 +++++++++ .../backtest-settings-form.test.tsx | 120 +++++++ .../backtest-settings-form.tsx | 206 ++++++++++++ .../candidate-model-picker.test.tsx | 99 ++++++ .../candidate-model-picker.tsx | 129 ++++++++ .../src/components/champion-selector/copy.ts | 20 ++ .../champion-selector/run-request.test.ts | 63 ++++ .../champion-selector/run-request.ts | 50 +++ .../searchable-entity-select.test.tsx | 78 +++++ .../searchable-entity-select.tsx | 144 +++++++++ .../champion-selector/split-config.ts | 24 ++ frontend/src/hooks/index.ts | 1 + .../src/hooks/use-model-selection.test.ts | 126 ++++++++ frontend/src/hooks/use-model-selection.ts | 57 ++++ frontend/src/lib/constants.ts | 2 + .../src/pages/visualize/champion.test.tsx | 118 +++++++ frontend/src/pages/visualize/champion.tsx | 294 ++++++++++++++++++ frontend/src/types/api.ts | 158 ++++++++++ 26 files changed, 2286 insertions(+) create mode 100644 app/features/model_selection/capabilities.py create mode 100644 app/features/model_selection/tests/test_capabilities.py create mode 100644 frontend/src/components/champion-selector/availability-panel.test.tsx create mode 100644 frontend/src/components/champion-selector/availability-panel.tsx create mode 100644 frontend/src/components/champion-selector/backtest-settings-form.test.tsx create mode 100644 frontend/src/components/champion-selector/backtest-settings-form.tsx create mode 100644 frontend/src/components/champion-selector/candidate-model-picker.test.tsx create mode 100644 frontend/src/components/champion-selector/candidate-model-picker.tsx create mode 100644 frontend/src/components/champion-selector/copy.ts create mode 100644 frontend/src/components/champion-selector/run-request.test.ts create mode 100644 frontend/src/components/champion-selector/run-request.ts create mode 100644 frontend/src/components/champion-selector/searchable-entity-select.test.tsx create mode 100644 frontend/src/components/champion-selector/searchable-entity-select.tsx create mode 100644 frontend/src/components/champion-selector/split-config.ts create mode 100644 frontend/src/hooks/use-model-selection.test.ts create mode 100644 frontend/src/hooks/use-model-selection.ts create mode 100644 frontend/src/pages/visualize/champion.test.tsx create mode 100644 frontend/src/pages/visualize/champion.tsx diff --git a/app/features/model_selection/capabilities.py b/app/features/model_selection/capabilities.py new file mode 100644 index 00000000..5c513496 --- /dev/null +++ b/app/features/model_selection/capabilities.py @@ -0,0 +1,157 @@ +"""Pure model-capability catalog for the champion selector (issue #356, Slice A). + +No DB, no I/O — :func:`build_model_catalog` is deterministic and unit-tested +directly (mirrors ``ranking.py`` / ``explanations.py``). It surfaces the +forecasting model union as a frontend-consumable catalog so the React +``MODEL_FAMILY_MAP`` / labels never drift from the Python authority. + +Capability provenance (BACKEND-OWNED, verified 2026-06-01): +- ``family`` — ``forecasting.feature_metadata.model_family_for`` (lazy + cross-slice import inside the builder, per the slice's import discipline). +- ``feature_aware`` — the set whose forecasters set ``requires_features=True`` + (RandomForest/Regression/LightGBM/XGBoost/ProphetLike), i.e. exactly the set + ``ForecastingService.predict()`` rejects (``forecasting/service.py``). +- ``requires_extra`` — ``lightgbm``/``xgboost`` (opt-in extras that may + ``ImportError`` when the extra is not installed). +- ``supports_auto_predict`` — ``not feature_aware`` (feature-aware winners + forecast through ``POST /scenarios/simulate``, not the plain predict path). +- ``default_params`` — the FLAT model-tuning defaults pinned from the live + ``forecasting.schemas.ModelConfig`` members (the internal ``schema_version`` + and ``feature_config_hash`` meta fields are intentionally omitted). +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from app.features.model_selection.schemas import ( + CandidateModelInfo, + ModelCatalogResponse, +) + +# Models gated behind the matching opt-in extra (may be absent at runtime). +_REQUIRES_EXTRA: frozenset[str] = frozenset({"lightgbm", "xgboost"}) + +# Feature-aware models — their forecasters set ``requires_features=True`` and +# ``ForecastingService.predict()`` rejects them (they need an exogenous feature +# frame). Verified against ``forecasting/models.py`` requires_features flags. +_FEATURE_AWARE: frozenset[str] = frozenset( + {"regression", "prophet_like", "lightgbm", "xgboost", "random_forest"} +) + +# The default candidate set the backend ``POST /run`` contract documents — the +# UI pre-selects exactly these. +DEFAULT_CANDIDATE_MODEL_TYPES: list[str] = [ + "naive", + "seasonal_naive", + "moving_average", + "regression", + "prophet_like", +] + + +@dataclass(frozen=True) +class _CatalogEntry: + """Slice-local presentation metadata for one model_type.""" + + label: str + description: str + default_params: dict[str, object] = field(default_factory=lambda: {}) + + +# Ordered map: model_type → presentation metadata. The KEYS must equal the +# ``ModelType`` Literal in ``schemas.py`` exactly (asserted in +# ``test_capabilities.py``). ``default_params`` are the flat model-tuning +# defaults from the forecasting ``ModelConfig`` members (schema_version / +# feature_config_hash meta fields omitted), pinned 2026-06-01. +_CATALOG: dict[str, _CatalogEntry] = { + "naive": _CatalogEntry( + label="Naive", + description="Repeats the last observed value.", + ), + "seasonal_naive": _CatalogEntry( + label="Seasonal Naive", + description="Repeats the value from one season ago.", + default_params={"season_length": 7}, + ), + "moving_average": _CatalogEntry( + label="Moving Average", + description="Averages the last N observed values.", + default_params={"window_size": 7}, + ), + "weighted_moving_average": _CatalogEntry( + label="Weighted Moving Average", + description="Recency-weighted average of the last N values.", + default_params={"window_size": 7, "weight_strategy": "linear", "decay": 0.7}, + ), + "seasonal_average": _CatalogEntry( + label="Seasonal Average", + description="Averages the same season-position across recent cycles.", + default_params={"season_length": 7, "lookback_cycles": 4, "trim_outliers": False}, + ), + "trend_regression_baseline": _CatalogEntry( + label="Trend Regression Baseline", + description="Ridge trend with optional day-of-week / month terms.", + default_params={"alpha": 1.0, "include_dow": True, "include_month": True}, + ), + "random_forest": _CatalogEntry( + label="Random Forest", + description="Feature-aware random-forest regressor over lag/calendar features.", + default_params={"n_estimators": 100, "max_depth": 10, "min_samples_leaf": 2}, + ), + "lightgbm": _CatalogEntry( + label="LightGBM", + description="Gradient-boosted trees (opt-in extra) over engineered features.", + default_params={"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1}, + ), + "xgboost": _CatalogEntry( + label="XGBoost", + description="Extreme gradient boosting (opt-in extra) over engineered features.", + default_params={"n_estimators": 100, "max_depth": 6, "learning_rate": 0.1}, + ), + "regression": _CatalogEntry( + label="Gradient Boosting Regression", + description="Histogram gradient-boosting over lag, calendar, and exogenous features.", + default_params={"max_iter": 200, "learning_rate": 0.05, "max_depth": 6}, + ), + "prophet_like": _CatalogEntry( + label="Prophet-like Additive", + description="Additive trend/seasonality Ridge over engineered features.", + default_params={"alpha": 1.0}, + ), +} + + +def build_model_catalog() -> ModelCatalogResponse: + """Build the backend-owned candidate-model catalog (pure, no I/O). + + Iterates the slice-local ``_CATALOG`` in declaration order, deriving each + entry's ``family`` from the forecasting authority and its capability flags + from the module-level sets. Returns the full catalog plus the documented + default candidate set. + """ + # Lazy cross-slice import (mirror service.py) — avoids closing an alembic + # cold-boot import cycle through the forecasting slice. + from app.features.forecasting.feature_metadata import model_family_for + + models: list[CandidateModelInfo] = [] + for model_type, meta in _CATALOG.items(): + feature_aware = model_type in _FEATURE_AWARE + models.append( + CandidateModelInfo( + model_type=model_type, + label=meta.label, + # ``ModelFamily`` is a ``str, Enum`` whose ``.value`` is already + # typed as the ``baseline|tree|additive`` literal the schema wants. + family=model_family_for(model_type).value, + feature_aware=feature_aware, + requires_extra=model_type in _REQUIRES_EXTRA, + default_params=dict(meta.default_params), + supports_auto_predict=not feature_aware, + description=meta.description, + ) + ) + return ModelCatalogResponse( + models=models, + default_candidate_model_types=list(DEFAULT_CANDIDATE_MODEL_TYPES), + ) diff --git a/app/features/model_selection/routes.py b/app/features/model_selection/routes.py index f989aac0..f4f833c7 100644 --- a/app/features/model_selection/routes.py +++ b/app/features/model_selection/routes.py @@ -24,6 +24,7 @@ from app.core.exceptions import BadRequestError, DatabaseError from app.core.logging import get_logger from app.features.model_selection.schemas import ( + ModelCatalogResponse, ModelSelectionRunRequest, ModelSelectionRunResponse, PairAvailabilityResponse, @@ -62,6 +63,22 @@ async def get_availability( ) from exc +@router.get( + "/models", + response_model=ModelCatalogResponse, + status_code=status.HTTP_200_OK, + summary="List the backend-owned candidate-model capability catalog", +) +async def get_model_catalog() -> ModelCatalogResponse: + """Return the static candidate-model catalog (no DB, no query params). + + Declared BEFORE ``GET /{selection_id}`` so Starlette matches the literal + ``/models`` path and does not capture it as ``selection_id="models"``. + """ + service = ModelSelectionService() + return service.get_model_catalog() + + @router.post( "/run", response_model=ModelSelectionRunResponse, diff --git a/app/features/model_selection/schemas.py b/app/features/model_selection/schemas.py index 9fc10d37..d3bc45dd 100644 --- a/app/features/model_selection/schemas.py +++ b/app/features/model_selection/schemas.py @@ -288,6 +288,32 @@ class ModelSelectionRunResponse(BaseModel): completed_at: datetime | None +class CandidateModelInfo(BaseModel): + """One selectable forecasting model in the capability catalog. + + Output-only (plain ``BaseModel`` — no strict coercion needed). The + capability flags are BACKEND-OWNED: they derive from the forecasting + authority (``model_family_for`` + each forecaster's ``requires_features``) + so the frontend never re-derives families/feature-awareness in TypeScript. + """ + + model_type: str + label: str + family: Literal["baseline", "tree", "additive"] + feature_aware: bool + requires_extra: bool # lightgbm/xgboost — opt-in extra may be absent at runtime + default_params: dict[str, Any] + supports_auto_predict: bool # False for feature-aware models (predict() rejects them) + description: str + + +class ModelCatalogResponse(BaseModel): + """``GET /model-selection/models`` — backend-owned candidate catalog.""" + + models: list[CandidateModelInfo] + default_candidate_model_types: list[str] + + class TrainWinnerResponse(BaseModel): """``POST /model-selection/{id}/train-winner`` response.""" diff --git a/app/features/model_selection/service.py b/app/features/model_selection/service.py index ff7111e8..b8536068 100644 --- a/app/features/model_selection/service.py +++ b/app/features/model_selection/service.py @@ -26,6 +26,7 @@ from app.core.logging import get_logger from app.features.backtesting.schemas import SplitConfig from app.features.data_platform.models import Product, Promotion, SalesDaily, Store +from app.features.model_selection.capabilities import build_model_catalog from app.features.model_selection.explanations import explain_winner from app.features.model_selection.models import ModelSelectionRun, ModelSelectionStatus from app.features.model_selection.ranking import build_chart_data, rank_candidates @@ -36,6 +37,7 @@ ChartData, FoldChart, ForecastSummary, + ModelCatalogResponse, ModelSelectionRunRequest, ModelSelectionRunResponse, PairAvailabilityResponse, @@ -64,6 +66,18 @@ class ModelSelectionService: """Stateless orchestrator — a fresh ``db`` session per method.""" + # ------------------------------------------------------------------------- + # Capability catalog + # ------------------------------------------------------------------------- + + def get_model_catalog(self) -> ModelCatalogResponse: + """Return the backend-owned candidate-model catalog (static, no I/O). + + Thin pass-through to the pure :func:`capabilities.build_model_catalog`; + kept on the service for symmetry with ``get_availability`` / ``run``. + """ + return build_model_catalog() + # ------------------------------------------------------------------------- # Availability # ------------------------------------------------------------------------- diff --git a/app/features/model_selection/tests/test_capabilities.py b/app/features/model_selection/tests/test_capabilities.py new file mode 100644 index 00000000..3ff73804 --- /dev/null +++ b/app/features/model_selection/tests/test_capabilities.py @@ -0,0 +1,102 @@ +"""Unit tests for the pure model-capability catalog (issue #356, Slice A). + +No DB, no I/O — exercises ``build_model_catalog`` directly, mirroring +``test_ranking.py``. These pin the BACKEND-OWNED capability contract the +frontend consumes read-only. +""" + +from __future__ import annotations + +import typing + +from app.features.model_selection.capabilities import ( + DEFAULT_CANDIDATE_MODEL_TYPES, + build_model_catalog, +) +from app.features.model_selection.schemas import ModelType + +_EXPECTED_MODEL_TYPES = set(typing.get_args(ModelType)) + + +def test_catalog_model_types_match_literal() -> None: + """The catalog covers EXACTLY the ``ModelType`` Literal — no drift.""" + catalog = build_model_catalog() + catalog_types = {m.model_type for m in catalog.models} + assert catalog_types == _EXPECTED_MODEL_TYPES + # 11 models, no duplicates. + assert len(catalog.models) == len(_EXPECTED_MODEL_TYPES) == 11 + + +def test_catalog_families_are_valid_literals() -> None: + """Every family is one of the three lowercase literals from forecasting.""" + catalog = build_model_catalog() + for model in catalog.models: + assert model.family in {"baseline", "tree", "additive"} + + +def test_requires_extra_flags_lightgbm_xgboost_only() -> None: + """Only the opt-in extras (lightgbm/xgboost) carry requires_extra=True.""" + catalog = build_model_catalog() + extras = {m.model_type for m in catalog.models if m.requires_extra} + assert extras == {"lightgbm", "xgboost"} + + +def test_feature_aware_set_matches_predict_reject_set() -> None: + """feature_aware == the forecasters with requires_features=True.""" + catalog = build_model_catalog() + feature_aware = {m.model_type for m in catalog.models if m.feature_aware} + assert feature_aware == { + "regression", + "prophet_like", + "lightgbm", + "xgboost", + "random_forest", + } + + +def test_feature_aware_models_do_not_support_auto_predict() -> None: + """supports_auto_predict is the strict negation of feature_aware.""" + catalog = build_model_catalog() + for model in catalog.models: + assert model.supports_auto_predict == (not model.feature_aware) + + +def test_default_candidate_model_types_are_the_default_five() -> None: + """The pre-selected defaults match the backend /run contract example.""" + catalog = build_model_catalog() + assert catalog.default_candidate_model_types == [ + "naive", + "seasonal_naive", + "moving_average", + "regression", + "prophet_like", + ] + # The exported constant and the response agree. + assert DEFAULT_CANDIDATE_MODEL_TYPES == catalog.default_candidate_model_types + # Every default is a real catalog entry. + catalog_types = {m.model_type for m in catalog.models} + assert set(catalog.default_candidate_model_types) <= catalog_types + + +def test_default_params_match_forecasting_defaults() -> None: + """default_params are pinned to the live forecasting ModelConfig defaults.""" + by_type = {m.model_type: m.default_params for m in build_model_catalog().models} + assert by_type["naive"] == {} + assert by_type["seasonal_naive"] == {"season_length": 7} + assert by_type["moving_average"] == {"window_size": 7} + assert by_type["regression"] == { + "max_iter": 200, + "learning_rate": 0.05, + "max_depth": 6, + } + # No internal/meta fields leak into the catalog. + for params in by_type.values(): + assert "schema_version" not in params + assert "feature_config_hash" not in params + + +def test_labels_and_descriptions_are_non_empty() -> None: + """Each entry carries human-facing label + description copy.""" + for model in build_model_catalog().models: + assert model.label.strip() + assert model.description.strip() diff --git a/app/features/model_selection/tests/test_routes.py b/app/features/model_selection/tests/test_routes.py index 7cfb35f5..2effbc62 100644 --- a/app/features/model_selection/tests/test_routes.py +++ b/app/features/model_selection/tests/test_routes.py @@ -178,3 +178,53 @@ async def test_availability_rejects_bad_query() -> None: ) assert response.status_code == 422 _assert_problem_detail(response.json(), 422) + + +async def test_get_models_returns_catalog_200() -> None: + """GET /model-selection/models returns the static catalog (no mock needed).""" + async with _client() as ac: + response = await ac.get("/model-selection/models") + assert response.status_code == 200 + body = response.json() + assert isinstance(body["models"], list) + assert len(body["models"]) == 11 + # Each entry carries the backend-owned capability contract. + first = body["models"][0] + for key in ( + "model_type", + "label", + "family", + "feature_aware", + "requires_extra", + "default_params", + "supports_auto_predict", + "description", + ): + assert key in first, f"missing catalog field: {key}" + assert body["default_candidate_model_types"] == [ + "naive", + "seasonal_naive", + "moving_average", + "regression", + "prophet_like", + ] + + +async def test_models_route_not_captured_by_selection_id( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Literal /models must NOT be matched as GET /{selection_id}. + + If route ordering regressed, the request would hit ``get_selection`` (here + forced to 404) instead of the catalog handler. We assert the catalog shape + comes back, proving the literal-before-path-param ordering holds. + """ + monkeypatch.setattr( + ModelSelectionService, + "get_selection", + AsyncMock(side_effect=NotFoundError(message="selection run models not found")), + ) + async with _client() as ac: + response = await ac.get("/model-selection/models") + assert response.status_code == 200 + assert "models" in response.json() diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 1ef34bf1..2dc4042f 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -26,6 +26,7 @@ const BacktestPage = lazy(() => import('@/pages/visualize/backtest')) const DemandPlannerPage = lazy(() => import('@/pages/visualize/demand')) const WhatIfPlannerPage = lazy(() => import('@/pages/visualize/planner')) const BatchRunnerPage = lazy(() => import('@/pages/visualize/batch')) +const ChampionSelectorPage = lazy(() => import('@/pages/visualize/champion')) const ChatPage = lazy(() => import('@/pages/chat')) const KnowledgePage = lazy(() => import('@/pages/knowledge')) const GuidePage = lazy(() => import('@/pages/guide')) @@ -186,6 +187,14 @@ function App() { } /> + }> + + + } + /> = {}): PairAvailability { + return { + store_id: 7, + product_id: 12, + first_sales_date: '2026-01-01', + last_sales_date: '2026-05-31', + observed_days: 150, + expected_calendar_days: 151, + coverage_ratio: 0.99, + missing_days: 1, + zero_sale_days: 4, + promotion_days: 3, + average_daily_demand: 9.2, + status: 'ready', + recommended_split_config: { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, + }, + warnings: [], + ...overrides, + } +} + +describe('AvailabilityPanel', () => { + it('renders status badge + metric tiles for a ready pair', () => { + render( + , + ) + expect(screen.getByTestId('availability-panel')).toBeTruthy() + expect(screen.getByTestId('availability-status-badge').textContent).toContain('Ready') + expect(screen.getByText('Observed days')).toBeTruthy() + expect(screen.getByText('Avg daily demand')).toBeTruthy() + }) + + it('renders the not-enough-data empty state for an unusable pair', () => { + render( + , + ) + expect(screen.queryByTestId('availability-panel')).toBeNull() + expect(screen.getByText('Not enough data to model this pair')).toBeTruthy() + }) + + it('renders an em dash when promotion_days is null', () => { + render( + , + ) + expect(screen.getByText('—')).toBeTruthy() + }) + + it('shows a loading state while assessing', () => { + render() + expect(screen.getByText('Assessing data availability…')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/availability-panel.tsx b/frontend/src/components/champion-selector/availability-panel.tsx new file mode 100644 index 00000000..3dfa7370 --- /dev/null +++ b/frontend/src/components/champion-selector/availability-panel.tsx @@ -0,0 +1,146 @@ +import { AlertTriangle, DatabaseZap } from 'lucide-react' +import { EmptyState } from '@/components/common/error-display' +import { LoadingState } from '@/components/common/loading-state' +import { Badge } from '@/components/ui/badge' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { formatNumber, formatPercent } from '@/lib/api' +import type { AvailabilityStatus, PairAvailability } from '@/types/api' + +interface AvailabilityPanelProps { + availability?: PairAvailability + isLoading: boolean + isError: boolean +} + +const STATUS_VARIANT: Record< + AvailabilityStatus, + 'default' | 'secondary' | 'destructive' +> = { + ready: 'default', + limited: 'secondary', + unusable: 'destructive', +} + +const STATUS_LABEL: Record = { + ready: 'Ready', + limited: 'Limited', + unusable: 'Unusable', +} + +function Metric({ label, value }: { label: string; value: string }) { + return ( +

+ ) +} + +/** + * Renders the (store, product) data-availability triage for the Champion + * Selector. Slice A surfaces the backend assessment only — no run, no charts. + */ +export function AvailabilityPanel({ + availability, + isLoading, + isError, +}: AvailabilityPanelProps) { + if (isLoading) { + return + } + + if (isError) { + return ( + } + /> + ) + } + + if (!availability) { + return ( + } + /> + ) + } + + // Not-enough-data state: an unusable pair or one with zero observed history. + if (availability.status === 'unusable' || availability.observed_days === 0) { + return ( + } + /> + ) + } + + const split = availability.recommended_split_config + + return ( + + +
+ Data availability + + {STATUS_LABEL[availability.status]} + +
+
+ +
+ + + + + +
+ +
+

+ Recommended split +

+

+ {split.strategy} · {split.n_splits} splits · min train{' '} + {split.min_train_size}d · gap {split.gap}d · horizon {split.horizon}d +

+
+ + {availability.warnings.length > 0 && ( +
    + {availability.warnings.map((warning, index) => ( +
  • + + {warning} +
  • + ))} +
+ )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/backtest-settings-form.test.tsx b/frontend/src/components/champion-selector/backtest-settings-form.test.tsx new file mode 100644 index 00000000..b9df7a2b --- /dev/null +++ b/frontend/src/components/champion-selector/backtest-settings-form.test.tsx @@ -0,0 +1,120 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { BacktestSettingsForm } from './backtest-settings-form' +import { splitConfigErrors } from './split-config' +import type { SplitConfig } from '@/types/api' + +// Radix Collapsible/Select need a couple of layout APIs jsdom lacks. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +afterEach(cleanup) + +const VALID: SplitConfig = { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, +} + +describe('splitConfigErrors', () => { + it('accepts a valid config', () => { + expect(splitConfigErrors(VALID)).toEqual([]) + }) + + it('flags out-of-range n_splits and gap >= horizon', () => { + const errors = splitConfigErrors({ ...VALID, n_splits: 1, gap: 14 }) + expect(errors.some((e) => e.includes('Splits'))).toBe(true) + expect(errors.some((e) => e.includes('Gap must be smaller'))).toBe(true) + }) +}) + +describe('BacktestSettingsForm', () => { + it('reveals the advanced split inputs when toggled', () => { + render( + {}} + onRankingMetricChange={() => {}} + />, + ) + // Hidden until the collapsible opens. + expect(screen.queryByTestId('settings-n-splits')).toBeNull() + fireEvent.click(screen.getByTestId('advanced-toggle')) + expect(screen.getByTestId('settings-n-splits')).toBeTruthy() + expect(screen.getByTestId('settings-gap')).toBeTruthy() + }) + + it('renders validation errors for an invalid config', () => { + render( + {}} + onRankingMetricChange={() => {}} + />, + ) + expect(screen.getByTestId('settings-errors')).toBeTruthy() + expect(screen.getByText(/Splits must be between 2 and 20/)).toBeTruthy() + }) + + it('"Use recommended split" emits the recommended config (horizon synced)', () => { + const onChange = vi.fn() + const recommended: SplitConfig = { + strategy: 'sliding', + n_splits: 8, + min_train_size: 45, + gap: 1, + horizon: 7, // intentionally different — must be overridden to forecastHorizon + } + render( + {}} + recommended={recommended} + />, + ) + fireEvent.click(screen.getByTestId('use-recommended-split')) + expect(onChange).toHaveBeenCalledWith({ + strategy: 'sliding', + n_splits: 8, + min_train_size: 45, + gap: 1, + horizon: 14, // synced to forecastHorizon + }) + }) + + it('keeps the horizon input read-only and equal to the forecast horizon', () => { + render( + {}} + onRankingMetricChange={() => {}} + />, + ) + const horizon = screen.getByTestId('settings-horizon') as HTMLInputElement + expect(horizon.value).toBe('21') + expect(horizon.readOnly).toBe(true) + }) +}) diff --git a/frontend/src/components/champion-selector/backtest-settings-form.tsx b/frontend/src/components/champion-selector/backtest-settings-form.tsx new file mode 100644 index 00000000..fdaca7f3 --- /dev/null +++ b/frontend/src/components/champion-selector/backtest-settings-form.tsx @@ -0,0 +1,206 @@ +import { useState } from 'react' +import { ChevronDown, Settings2, Wand2 } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from '@/components/ui/collapsible' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { cn } from '@/lib/utils' +import type { RankingMetric, SplitConfig, SplitStrategy } from '@/types/api' +import { BIAS_EXPLANATION, RANKING_TIE_BREAK } from './copy' +import { splitConfigErrors } from './split-config' + +interface BacktestSettingsFormProps { + value: SplitConfig + rankingMetric: RankingMetric + forecastHorizon: number + onChange: (next: SplitConfig) => void + onRankingMetricChange: (metric: RankingMetric) => void + recommended?: SplitConfig +} + +const RANKING_METRICS: { value: RankingMetric; label: string }[] = [ + { value: 'wape', label: 'WAPE (default)' }, + { value: 'smape', label: 'sMAPE' }, + { value: 'mae', label: 'MAE' }, + { value: 'bias', label: 'Bias' }, +] + +function Field({ + label, + children, + hint, +}: { + label: string + children: React.ReactNode + hint?: string +}) { + return ( +
+ {label} + {children} + {hint &&

{hint}

} +
+ ) +} + +/** + * Simple/advanced backtest-settings form. The horizon is DERIVED from + * `forecastHorizon` (kept equal so the assembled run request is always valid) + * and shown read-only. The advanced toggle reveals the split-CV knobs. + */ +export function BacktestSettingsForm({ + value, + rankingMetric, + forecastHorizon, + onChange, + onRankingMetricChange, + recommended, +}: BacktestSettingsFormProps) { + const [advancedOpen, setAdvancedOpen] = useState(false) + const errors = splitConfigErrors(value) + + function patch(partial: Partial) { + onChange({ ...value, ...partial, horizon: forecastHorizon }) + } + + return ( +
+
+ + + + + + +
+ + {recommended && ( + + )} + + + + + + +
+ + + + + + patch({ n_splits: Number(event.target.value) || 0 }) + } + /> + + + + patch({ min_train_size: Number(event.target.value) || 0 }) + } + /> + + + + patch({ gap: Number(event.target.value) || 0 }) + } + /> + +
+
+
+ + {errors.length > 0 && ( +
    + {errors.map((error) => ( +
  • + {error} +
  • + ))} +
+ )} +
+ ) +} diff --git a/frontend/src/components/champion-selector/candidate-model-picker.test.tsx b/frontend/src/components/champion-selector/candidate-model-picker.test.tsx new file mode 100644 index 00000000..8c7d171d --- /dev/null +++ b/frontend/src/components/champion-selector/candidate-model-picker.test.tsx @@ -0,0 +1,99 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { CandidateModelPicker, MAX_CANDIDATES } from './candidate-model-picker' +import type { CandidateModelInfo, ModelCatalogResponse } from '@/types/api' + +afterEach(cleanup) + +function model( + model_type: string, + overrides: Partial = {}, +): CandidateModelInfo { + return { + model_type, + label: model_type, + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: `desc ${model_type}`, + ...overrides, + } +} + +const CATALOG: ModelCatalogResponse = { + models: [ + model('naive'), + model('regression', { family: 'tree', feature_aware: true }), + model('lightgbm', { family: 'tree', feature_aware: true, requires_extra: true }), + model('xgboost', { family: 'tree', feature_aware: true, requires_extra: true }), + ], + default_candidate_model_types: ['naive', 'regression'], +} + +describe('CandidateModelPicker', () => { + it('toggling a model calls onChange with the new selection', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('candidate-checkbox-regression')) + expect(onChange).toHaveBeenCalledWith(['naive', 'regression']) + }) + + it('deselects an already-selected model', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('candidate-checkbox-naive')) + expect(onChange).toHaveBeenCalledWith(['regression']) + }) + + it('flags opt-in-extra models with an "extra" badge', () => { + render( + {}} + isLoading={false} + />, + ) + expect(screen.getByTestId('candidate-extra-badge-lightgbm')).toBeTruthy() + expect(screen.getByTestId('candidate-extra-badge-xgboost')).toBeTruthy() + // A baseline model carries no extra badge. + expect(screen.queryByTestId('candidate-extra-badge-naive')).toBeNull() + }) + + it('caps the selection at MAX_CANDIDATES and disables unselected models', () => { + const many = Array.from({ length: MAX_CANDIDATES }, (_, i) => `m${i}`) + const onChange = vi.fn() + const bigCatalog: ModelCatalogResponse = { + models: [...many.map((m) => model(m)), model('extra_model')], + default_candidate_model_types: [], + } + render( + , + ) + expect(screen.getByTestId('candidate-cap-badge')).toBeTruthy() + // Clicking an unselected model at the cap must NOT add it. + fireEvent.click(screen.getByTestId('candidate-checkbox-extra_model')) + expect(onChange).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/components/champion-selector/candidate-model-picker.tsx b/frontend/src/components/champion-selector/candidate-model-picker.tsx new file mode 100644 index 00000000..6a3b4366 --- /dev/null +++ b/frontend/src/components/champion-selector/candidate-model-picker.tsx @@ -0,0 +1,129 @@ +import { LoadingState } from '@/components/common/loading-state' +import { Badge } from '@/components/ui/badge' +import { Checkbox } from '@/components/ui/checkbox' +import { cn } from '@/lib/utils' +import type { CandidateModelInfo, ModelCatalogResponse, ModelFamily } from '@/types/api' + +/** Backend caps `candidate_models` at 10 (ModelSelectionRunRequest.max_length). */ +export const MAX_CANDIDATES = 10 + +interface CandidateModelPickerProps { + catalog?: ModelCatalogResponse + selected: string[] + onChange: (types: string[]) => void + isLoading: boolean +} + +const FAMILY_ORDER: ModelFamily[] = ['baseline', 'additive', 'tree'] +const FAMILY_LABEL: Record = { + baseline: 'Baseline', + additive: 'Additive', + tree: 'Tree-based', +} + +/** + * Candidate-model multi-select fed by the BACKEND catalog (never the hardcoded + * `model-type-utils`). Mirrors the batch-matrix-picker conventions: a checkbox + * per model grouped by family, opt-in-extra + feature-aware badges, and a + * selection cap of 10. + */ +export function CandidateModelPicker({ + catalog, + selected, + onChange, + isLoading, +}: CandidateModelPickerProps) { + if (isLoading) { + return + } + if (!catalog || catalog.models.length === 0) { + return ( +

No models available.

+ ) + } + + const selectedSet = new Set(selected) + const atCap = selected.length >= MAX_CANDIDATES + + function toggle(modelType: string) { + if (selectedSet.has(modelType)) { + onChange(selected.filter((type) => type !== modelType)) + } else if (!atCap) { + onChange([...selected, modelType]) + } + } + + const byFamily = new Map() + for (const model of catalog.models) { + const list = byFamily.get(model.family) ?? [] + list.push(model) + byFamily.set(model.family, list) + } + + return ( +
+
+ + {selected.length} of {MAX_CANDIDATES} selected + + {atCap && ( + + Max {MAX_CANDIDATES} reached + + )} +
+ + {FAMILY_ORDER.filter((family) => byFamily.has(family)).map((family) => ( +
+

+ {FAMILY_LABEL[family]} +

+
+ {(byFamily.get(family) ?? []).map((model) => { + const isSelected = selectedSet.has(model.model_type) + const disabled = !isSelected && atCap + return ( + + ) + })} +
+
+ ))} +
+ ) +} diff --git a/frontend/src/components/champion-selector/copy.ts b/frontend/src/components/champion-selector/copy.ts new file mode 100644 index 00000000..bafbfd53 --- /dev/null +++ b/frontend/src/components/champion-selector/copy.ts @@ -0,0 +1,20 @@ +/** + * Shared, LOCKED copy for the Champion Selector workflow (Slices A/B/C). + * + * Kept in a `.ts` (not `.tsx`) module so the `react-refresh/only-export-components` + * lint rule never trips on these non-component exports. Slices B and C import + * the SAME constants so the bias wording / tie-break explanation never drift. + */ + +/** LOCKED #7 — the canonical bias explanation reused everywhere bias is shown. */ +export const BIAS_EXPLANATION = + 'Positive bias means the model under-forecasts (risk of stockouts); ' + + 'negative bias means it over-forecasts (risk of overstock).' + +/** LOCKED #8 — the deterministic ranking tie-break chain. */ +export const RANKING_TIE_BREAK = + 'Ranked by WAPE, then sMAPE, then |bias|, then MAE.' + +/** Copy for the disabled Slice-A "Run comparison" CTA. */ +export const RUN_COMPARISON_PENDING = + 'Model comparison runs in the next update.' diff --git a/frontend/src/components/champion-selector/run-request.test.ts b/frontend/src/components/champion-selector/run-request.test.ts new file mode 100644 index 00000000..59f4ad0e --- /dev/null +++ b/frontend/src/components/champion-selector/run-request.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest' +import { assembleRunRequest } from './run-request' +import type { SplitConfig } from '@/types/api' + +const SPLIT: SplitConfig = { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 7, // intentionally stale — must be overridden to forecastHorizon +} + +describe('assembleRunRequest', () => { + it('pins auto_train_winner and auto_predict to false (Slice A invariant)', () => { + const req = assembleRunRequest({ + storeId: 7, + productId: 12, + startDate: '2026-01-01', + endDate: '2026-05-31', + forecastHorizon: 14, + rankingMetric: 'wape', + splitConfig: SPLIT, + selectedModels: ['naive', 'regression'], + }) + expect(req.auto_train_winner).toBe(false) + expect(req.auto_predict).toBe(false) + }) + + it('forces split_config.horizon === forecast_horizon', () => { + const req = assembleRunRequest({ + storeId: 1, + productId: 2, + startDate: '2026-01-01', + endDate: '2026-03-31', + forecastHorizon: 21, + rankingMetric: 'wape', + splitConfig: SPLIT, + selectedModels: ['naive'], + }) + expect(req.forecast_horizon).toBe(21) + expect(req.split_config.horizon).toBe(21) + }) + + it('maps selected model types into flat candidate configs and stays V1', () => { + const req = assembleRunRequest({ + storeId: 1, + productId: 2, + startDate: '2026-01-01', + endDate: '2026-03-31', + forecastHorizon: 14, + rankingMetric: 'smape', + splitConfig: SPLIT, + selectedModels: ['naive', 'seasonal_naive'], + }) + expect(req.candidate_models).toEqual([ + { model_type: 'naive', params: {} }, + { model_type: 'seasonal_naive', params: {} }, + ]) + expect(req.feature_frame_version).toBe(1) + expect(req.feature_groups).toBeNull() + expect(req.ranking_metric).toBe('smape') + }) +}) diff --git a/frontend/src/components/champion-selector/run-request.ts b/frontend/src/components/champion-selector/run-request.ts new file mode 100644 index 00000000..253da365 --- /dev/null +++ b/frontend/src/components/champion-selector/run-request.ts @@ -0,0 +1,50 @@ +import type { + ModelSelectionRunRequest, + RankingMetric, + SplitConfig, +} from '@/types/api' + +export interface AssembleRunRequestInput { + storeId: number + productId: number + startDate: string // YYYY-MM-DD + endDate: string // YYYY-MM-DD + forecastHorizon: number + rankingMetric: RankingMetric + splitConfig: SplitConfig + selectedModels: string[] +} + +/** + * Assemble the typed `ModelSelectionRunRequest` from the Champion Selector + * form state. Pure + side-effect-free so it can be unit-tested. + * + * Slice A pins `auto_train_winner` and `auto_predict` to `false`: the async run + * path (Slice B) treats both as NO-OPS, and Slice C owns explicit + * train/predict. `split_config.horizon` is forced equal to `forecast_horizon` + * (the backend `ModelSelectionRunRequest` validator requires it). The request + * is assembled but NOT sent in Slice A — the "Run comparison" CTA is disabled. + */ +export function assembleRunRequest( + input: AssembleRunRequestInput, +): ModelSelectionRunRequest { + return { + store_id: input.storeId, + product_id: input.productId, + selection_window: { + start_date: input.startDate, + end_date: input.endDate, + }, + forecast_horizon: input.forecastHorizon, + ranking_metric: input.rankingMetric, + split_config: { ...input.splitConfig, horizon: input.forecastHorizon }, + candidate_models: input.selectedModels.map((model_type) => ({ + model_type, + params: {}, + })), + feature_frame_version: 1, + feature_groups: null, + auto_train_winner: false, + auto_predict: false, + } +} diff --git a/frontend/src/components/champion-selector/searchable-entity-select.test.tsx b/frontend/src/components/champion-selector/searchable-entity-select.test.tsx new file mode 100644 index 00000000..99b476a7 --- /dev/null +++ b/frontend/src/components/champion-selector/searchable-entity-select.test.tsx @@ -0,0 +1,78 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { SearchableEntitySelect, type SearchableEntityItem } from './searchable-entity-select' + +// Radix Popover positions its content with Popper, which needs ResizeObserver +// + a couple of layout APIs jsdom lacks. Polyfill them locally (the repo has no +// vitest setup file) so the popover can open in the test environment. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +afterEach(cleanup) + +const ITEMS: SearchableEntityItem[] = [ + { id: 7, primary: 'S001 · Downtown', secondary: 'North' }, + { id: 12, primary: 'S002 · Airport', secondary: 'West' }, + { id: 99, primary: 'S003 · Suburb', secondary: 'East' }, +] + +describe('SearchableEntitySelect', () => { + it('shows the placeholder when nothing is selected', () => { + render( + {}} + placeholder="Pick a store…" + />, + ) + expect(screen.getByText('Pick a store…')).toBeTruthy() + }) + + it('filters the list client-side and selects an option on click', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('searchable-entity-select')) + + // All three options visible before filtering. + expect(screen.getByTestId('searchable-entity-select-option-7')).toBeTruthy() + expect(screen.getByTestId('searchable-entity-select-option-12')).toBeTruthy() + expect(screen.getByTestId('searchable-entity-select-option-99')).toBeTruthy() + + // Filter narrows to the Airport row (matches the primary text). + fireEvent.change(screen.getByTestId('searchable-entity-select-filter'), { + target: { value: 'airport' }, + }) + expect(screen.queryByTestId('searchable-entity-select-option-7')).toBeNull() + expect(screen.getByTestId('searchable-entity-select-option-12')).toBeTruthy() + + fireEvent.click(screen.getByTestId('searchable-entity-select-option-12')) + expect(onChange).toHaveBeenCalledWith(12) + }) + + it('filters on the secondary descriptor too', () => { + render( + {}} />, + ) + fireEvent.click(screen.getByTestId('searchable-entity-select')) + fireEvent.change(screen.getByTestId('searchable-entity-select-filter'), { + target: { value: 'east' }, + }) + expect(screen.getByTestId('searchable-entity-select-option-99')).toBeTruthy() + expect(screen.queryByTestId('searchable-entity-select-option-7')).toBeNull() + }) +}) diff --git a/frontend/src/components/champion-selector/searchable-entity-select.tsx b/frontend/src/components/champion-selector/searchable-entity-select.tsx new file mode 100644 index 00000000..f4dcf51b --- /dev/null +++ b/frontend/src/components/champion-selector/searchable-entity-select.tsx @@ -0,0 +1,144 @@ +import { useState } from 'react' +import { Check, ChevronsUpDown, Search } from 'lucide-react' +import { cn } from '@/lib/utils' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/components/ui/popover' + +export interface SearchableEntityItem { + id: number + primary: string + secondary?: string +} + +interface SearchableEntitySelectProps { + items: SearchableEntityItem[] + value: number | null + onChange: (id: number) => void + placeholder?: string + loading?: boolean + emptyLabel?: string + /** Forwarded to the trigger button + filter input for scoped test queries. */ + testId?: string +} + +/** + * A combobox built from existing primitives (Popover + Input + a filtered + * ` + + +
+ + setFilter(event.target.value)} + placeholder="Filter…" + data-testid={`${testId}-filter`} + className="h-8 border-0 px-0 shadow-none focus-visible:ring-0" + /> +
+
+ {filtered.length === 0 ? ( +

+ {emptyLabel} +

+ ) : ( + filtered.map((item) => ( + + )) + )} +
+
+ + ) +} diff --git a/frontend/src/components/champion-selector/split-config.ts b/frontend/src/components/champion-selector/split-config.ts new file mode 100644 index 00000000..ecc98f35 --- /dev/null +++ b/frontend/src/components/champion-selector/split-config.ts @@ -0,0 +1,24 @@ +import type { SplitConfig } from '@/types/api' + +/** + * Inline-validate a `SplitConfig` against the backend SplitConfig bounds + * (`app/features/backtesting/schemas.py`). Kept in a `.ts` module (not the + * form `.tsx`) so the `react-refresh/only-export-components` lint rule stays + * happy. Returns a list of human-facing error strings (empty = valid). + */ +export function splitConfigErrors(config: SplitConfig): string[] { + const errors: string[] = [] + if (config.n_splits < 2 || config.n_splits > 20) { + errors.push('Splits must be between 2 and 20.') + } + if (config.min_train_size < 7) { + errors.push('Minimum train size must be at least 7 days.') + } + if (config.gap < 0 || config.gap > 30) { + errors.push('Gap must be between 0 and 30 days.') + } + if (config.gap >= config.horizon) { + errors.push('Gap must be smaller than the horizon.') + } + return errors +} diff --git a/frontend/src/hooks/index.ts b/frontend/src/hooks/index.ts index 1c47074d..eebde40d 100644 --- a/frontend/src/hooks/index.ts +++ b/frontend/src/hooks/index.ts @@ -7,6 +7,7 @@ export * from './use-inventory' export * from './use-lifecycle-curve' export * from './use-runs' export * from './use-jobs' +export * from './use-model-selection' export * from './use-ops' export * from './use-scenarios' export * from './use-rag-sources' diff --git a/frontend/src/hooks/use-model-selection.test.ts b/frontend/src/hooks/use-model-selection.test.ts new file mode 100644 index 00000000..a1187321 --- /dev/null +++ b/frontend/src/hooks/use-model-selection.test.ts @@ -0,0 +1,126 @@ +/** + * Unit tests for the model-selection query hooks (Champion Selector, Slice A). + * + * Stubs `fetch` to assert the catalog + availability GET URLs and the + * availability `enabled` gating. No real backend is exercised. + */ +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { renderHook, waitFor } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { createElement, type ReactNode } from 'react' + +import { useModelCatalog, usePairAvailability } from './use-model-selection' +import type { ModelCatalogResponse, PairAvailability } from '@/types/api' + +function makeWrapper(client: QueryClient) { + return function Wrapper({ children }: { children: ReactNode }) { + return createElement(QueryClientProvider, { client }, children) + } +} + +function makeClient() { + return new QueryClient({ defaultOptions: { queries: { retry: false } } }) +} + +const CATALOG: ModelCatalogResponse = { + models: [ + { + model_type: 'naive', + label: 'Naive', + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: 'Repeats the last observed value.', + }, + ], + default_candidate_model_types: ['naive', 'seasonal_naive', 'moving_average'], +} + +const AVAILABILITY: PairAvailability = { + store_id: 7, + product_id: 12, + first_sales_date: '2026-01-01', + last_sales_date: '2026-05-31', + observed_days: 150, + expected_calendar_days: 151, + coverage_ratio: 0.99, + missing_days: 1, + zero_sale_days: 4, + promotion_days: 3, + average_daily_demand: 9.2, + status: 'ready', + recommended_split_config: { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, + }, + warnings: [], +} + +afterEach(() => { + vi.unstubAllGlobals() +}) + +describe('useModelCatalog', () => { + it('GETs /model-selection/models and returns the parsed catalog', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(CATALOG), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + + const { result } = renderHook(() => useModelCatalog(), { + wrapper: makeWrapper(makeClient()), + }) + + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + expect(fetchMock).toHaveBeenCalledTimes(1) + expect(fetchMock.mock.calls[0]![0]).toContain('/model-selection/models') + expect(result.current.data?.models[0]?.model_type).toBe('naive') + }) +}) + +describe('usePairAvailability', () => { + it('GETs /model-selection/availability with the three query params', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(AVAILABILITY), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + + const { result } = renderHook( + () => usePairAvailability({ storeId: 7, productId: 12, forecastHorizon: 14 }), + { wrapper: makeWrapper(makeClient()) }, + ) + + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const url = String(fetchMock.mock.calls[0]![0]) + expect(url).toContain('/model-selection/availability') + expect(url).toContain('store_id=7') + expect(url).toContain('product_id=12') + expect(url).toContain('forecast_horizon=14') + expect(result.current.data?.status).toBe('ready') + }) + + it('does NOT fetch while the pair is incomplete (enabled gating)', async () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + + renderHook( + () => usePairAvailability({ storeId: null, productId: 12, forecastHorizon: 14 }), + { wrapper: makeWrapper(makeClient()) }, + ) + + // Give TanStack a tick; the disabled query must never call fetch. + await new Promise((resolve) => setTimeout(resolve, 20)) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/hooks/use-model-selection.ts b/frontend/src/hooks/use-model-selection.ts new file mode 100644 index 00000000..726f8072 --- /dev/null +++ b/frontend/src/hooks/use-model-selection.ts @@ -0,0 +1,57 @@ +import { useQuery } from '@tanstack/react-query' +import { api } from '@/lib/api' +import type { ModelCatalogResponse, PairAvailability } from '@/types/api' + +/** + * Model-selection query hooks (Champion Selector, Slice A). + * + * Read-only: the catalog and pair-availability GETs. The run mutation, + * progress, and results hooks are owned by Slice B; train/predict by Slice C. + */ + +/** + * Fetch the backend-owned candidate-model capability catalog. + * + * The catalog is static, so it is cached aggressively (no refetch churn). + */ +export function useModelCatalog() { + return useQuery({ + queryKey: ['model-selection', 'models'], + queryFn: () => api('/model-selection/models'), + staleTime: 1000 * 60 * 60, // 1h — the catalog rarely changes within a session + }) +} + +interface UsePairAvailabilityParams { + storeId: number | null + productId: number | null + forecastHorizon: number + enabled?: boolean +} + +/** + * Assess data availability for a (store, product) pair at a given horizon. + * + * Gated like `useStore`: only fires once a real pair is chosen. `storeId` / + * `productId` are nullable so the page can pass its raw selection state without + * coercing un-selected values to a bogus `0`/`1`. + */ +export function usePairAvailability({ + storeId, + productId, + forecastHorizon, + enabled = true, +}: UsePairAvailabilityParams) { + return useQuery({ + queryKey: ['model-selection', 'availability', storeId, productId, forecastHorizon], + queryFn: () => + api('/model-selection/availability', { + params: { + store_id: storeId, + product_id: productId, + forecast_horizon: forecastHorizon, + }, + }), + enabled: enabled && !!storeId && storeId > 0 && !!productId && productId > 0, + }) +} diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts index 6a6de39f..95cb28b8 100644 --- a/frontend/src/lib/constants.ts +++ b/frontend/src/lib/constants.ts @@ -25,6 +25,7 @@ export const ROUTES = { DEMAND: '/visualize/demand', PLANNER: '/visualize/planner', BATCH: '/visualize/batch', + CHAMPION: '/visualize/champion', }, KNOWLEDGE: '/knowledge', CHAT: '/chat', @@ -55,6 +56,7 @@ export const NAV_ITEMS = [ { label: 'Forecast', href: ROUTES.VISUALIZE.FORECAST }, { label: 'Backtest Results', href: ROUTES.VISUALIZE.BACKTEST }, { label: 'Batch Runner', href: ROUTES.VISUALIZE.BATCH }, + { label: 'Champion Selector', href: ROUTES.VISUALIZE.CHAMPION }, ], }, { label: 'Knowledge', href: ROUTES.KNOWLEDGE }, diff --git a/frontend/src/pages/visualize/champion.test.tsx b/frontend/src/pages/visualize/champion.test.tsx new file mode 100644 index 00000000..123d4862 --- /dev/null +++ b/frontend/src/pages/visualize/champion.test.tsx @@ -0,0 +1,118 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen, waitFor } from '@testing-library/react' +import type { ModelCatalogResponse } from '@/types/api' + +// Radix primitives need a couple of layout APIs jsdom lacks. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +const CATALOG: ModelCatalogResponse = { + models: [ + { + model_type: 'naive', + label: 'Naive', + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: 'Repeats the last observed value.', + }, + { + model_type: 'regression', + label: 'Gradient Boosting Regression', + family: 'tree', + feature_aware: true, + requires_extra: false, + default_params: {}, + supports_auto_predict: false, + description: 'Histogram gradient boosting.', + }, + ], + default_candidate_model_types: ['naive', 'regression'], +} + +vi.mock('@/hooks/use-stores', () => ({ + useStores: () => ({ + data: { stores: [{ id: 7, code: 'S001', name: 'Downtown', region: 'North', store_type: 'flagship' }] }, + isLoading: false, + }), +})) +vi.mock('@/hooks/use-products', () => ({ + useProducts: () => ({ + data: { products: [{ id: 12, sku: 'SKU1', name: 'Widget', category: 'tools' }] }, + isLoading: false, + }), +})) +vi.mock('@/hooks/use-model-selection', () => ({ + useModelCatalog: () => ({ + data: CATALOG, + isLoading: false, + isError: false, + error: null, + refetch: () => {}, + }), + usePairAvailability: () => ({ + data: undefined, + isLoading: false, + isError: false, + }), +})) + +import ChampionSelectorPage from './champion' + +afterEach(cleanup) + +describe('ChampionSelectorPage', () => { + it('renders the selection shell', () => { + render() + expect(screen.getByText('Champion Selector')).toBeTruthy() + expect(screen.getByText('1 · Pick a store & product')).toBeTruthy() + expect(screen.getByText('2 · Data availability')).toBeTruthy() + expect(screen.getByText('3 · Candidate models')).toBeTruthy() + expect(screen.getByText('4 · Backtest settings')).toBeTruthy() + }) + + it('drives candidate cards from the backend catalog', () => { + render() + expect(screen.getByTestId('candidate-model-naive')).toBeTruthy() + expect(screen.getByTestId('candidate-model-regression')).toBeTruthy() + }) + + it('pre-selects the catalog default candidate models', async () => { + render() + // The seeding effect selects the default two models. + await waitFor(() => + expect(screen.getByText('2 of 10 selected')).toBeTruthy(), + ) + }) + + it('renders the availability empty state until a pair is chosen', () => { + render() + expect(screen.getByText('Pick a store and product')).toBeTruthy() + }) + + it('keeps the Run comparison CTA disabled and issues no POST', () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + render() + const cta = screen.getByTestId('run-comparison-cta') as HTMLButtonElement + expect(cta.disabled).toBe(true) + // The page itself issues no network calls (the hooks are mocked); in + // particular it never POSTs to /model-selection/run. + expect(fetchMock).not.toHaveBeenCalled() + vi.unstubAllGlobals() + }) +}) diff --git a/frontend/src/pages/visualize/champion.tsx b/frontend/src/pages/visualize/champion.tsx new file mode 100644 index 00000000..d3e3106f --- /dev/null +++ b/frontend/src/pages/visualize/champion.tsx @@ -0,0 +1,294 @@ +import { useMemo, useState } from 'react' +import { format } from 'date-fns' +import { DateRange } from 'react-day-picker' +import { Trophy } from 'lucide-react' +import { useStores } from '@/hooks/use-stores' +import { useProducts } from '@/hooks/use-products' +import { useModelCatalog, usePairAvailability } from '@/hooks/use-model-selection' +import { DateRangePicker } from '@/components/common/date-range-picker' +import { ErrorDisplay } from '@/components/common/error-display' +import { AvailabilityPanel } from '@/components/champion-selector/availability-panel' +import { BacktestSettingsForm } from '@/components/champion-selector/backtest-settings-form' +import { splitConfigErrors } from '@/components/champion-selector/split-config' +import { CandidateModelPicker } from '@/components/champion-selector/candidate-model-picker' +import { SearchableEntitySelect } from '@/components/champion-selector/searchable-entity-select' +import { RUN_COMPARISON_PENDING } from '@/components/champion-selector/copy' +import { assembleRunRequest } from '@/components/champion-selector/run-request' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import type { + ModelSelectionRunRequest, + SplitConfig, +} from '@/types/api' + +const DEFAULT_HORIZON = 14 + +const DEFAULT_SPLIT: SplitConfig = { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: DEFAULT_HORIZON, +} + +/** + * Forecast Champion Selector — Slice A. + * + * Configuration + availability triage only. It assembles a typed + * `ModelSelectionRunRequest` in component state and surfaces a DISABLED + * "Run comparison" CTA — the comparison RUN itself (and all results/training) + * lands in Slices B/C. This page calls only the two read GETs (catalog + + * availability); it never POSTs. + */ +export default function ChampionSelectorPage() { + const [storeId, setStoreId] = useState(null) + const [productId, setProductId] = useState(null) + const [dateRange, setDateRange] = useState() + const [forecastHorizon, setForecastHorizon] = useState(DEFAULT_HORIZON) + const [splitConfig, setSplitConfig] = useState(DEFAULT_SPLIT) + const [rankingMetric, setRankingMetric] = useState< + ModelSelectionRunRequest['ranking_metric'] + >('wape') + // `null` means "the user hasn't edited the selection yet" — fall back to the + // catalog's default candidate set (derived below, no effect needed). + const [editedModels, setEditedModels] = useState(null) + + // /dimensions/{stores,products} both cap page_size at 100 (client-filtered). + const storesQuery = useStores({ page: 1, pageSize: 100 }) + const productsQuery = useProducts({ page: 1, pageSize: 100 }) + const catalogQuery = useModelCatalog() + + const validPair = !!storeId && !!productId + const availabilityQuery = usePairAvailability({ + storeId, + productId, + forecastHorizon, + enabled: validPair, + }) + + // Pre-select the backend default candidate set until the user edits it — + // derived during render rather than seeded via an effect. + const selectedModels = + editedModels ?? catalogQuery.data?.default_candidate_model_types ?? [] + + // split_config.horizon must equal forecast_horizon (the backend validator). + // Force it during render so no effect is needed to keep them in sync. + const effectiveSplit: SplitConfig = useMemo( + () => ({ ...splitConfig, horizon: forecastHorizon }), + [splitConfig, forecastHorizon], + ) + + const storeItems = useMemo( + () => + (storesQuery.data?.stores ?? []).map((store) => ({ + id: store.id, + primary: `${store.code} · ${store.name}`, + secondary: [store.region, store.store_type].filter(Boolean).join(' · '), + })), + [storesQuery.data], + ) + const productItems = useMemo( + () => + (productsQuery.data?.products ?? []).map((product) => ({ + id: product.id, + primary: `${product.sku} · ${product.name}`, + secondary: product.category ?? undefined, + })), + [productsQuery.data], + ) + + const formReady = + validPair && + !!dateRange?.from && + !!dateRange?.to && + forecastHorizon >= 1 && + forecastHorizon <= 90 && + selectedModels.length >= 1 && + splitConfigErrors(effectiveSplit).length === 0 + + // The assembled request — typed but NOT sent in Slice A (the CTA is disabled). + // `auto_train_winner`/`auto_predict` are pinned false by `assembleRunRequest`. + // Built defensively so it is valid the moment Slice B wires the mutation. + const runRequest: ModelSelectionRunRequest | null = + formReady && dateRange?.from && dateRange?.to + ? assembleRunRequest({ + storeId: storeId!, + productId: productId!, + startDate: format(dateRange.from, 'yyyy-MM-dd'), + endDate: format(dateRange.to, 'yyyy-MM-dd'), + forecastHorizon, + rankingMetric, + splitConfig: effectiveSplit, + selectedModels, + }) + : null + + return ( +
+
+

+ + Champion Selector +

+

+ Configure a store, product, time period, horizon and candidate models, + and check whether the pair has enough history to model. Running the + comparison arrives in a later update. +

+
+ + {/* Selection */} + + + 1 · Pick a store & product + + Search by code/SKU or name. The availability check runs automatically + once a valid pair and horizon are chosen. + + + +
+
+ Store + +
+
+ Product + +
+
+ Time period + +
+
+ + Forecast horizon (days) + + + setForecastHorizon(Number(event.target.value) || 0) + } + /> +
+
+
+
+ + {/* Availability */} + + + 2 · Data availability + + Whether this pair has enough observed history for a reliable + comparison, plus the recommended split. + + + + + + + + {/* Candidate models */} + + + 3 · Candidate models + + Pick the models to compare (up to 10). The default five are + pre-selected; opt-in extras are flagged. + + + + {catalogQuery.isError ? ( + catalogQuery.refetch()} + /> + ) : ( + + )} + + + + {/* Backtest settings */} + + + 4 · Backtest settings + + The ranking metric and cross-validation split. Start with the + recommended split or fine-tune under Advanced. + + + + + + + + {/* Run CTA (disabled until Slice B) */} + + +
+ {formReady + ? `Ready to compare ${selectedModels.length} model${ + selectedModels.length === 1 ? '' : 's' + }. ${RUN_COMPARISON_PENDING}` + : 'Pick a store, product, time period, horizon and at least one model to continue.'} +
+ +
+
+ + {/* Dev-only assurance that a valid request is assembled (not sent). */} + {runRequest && ( +

+ {JSON.stringify(runRequest)} +

+ )} +
+ ) +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index df2289f4..d6e0584f 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -1188,3 +1188,161 @@ export interface ForecastExplanation { as_of_date: string // ISO date generated_at: string // ISO datetime } + +// ============================================================================= +// Model Selection (Champion Selector) — backend slice app/features/model_selection +// ============================================================================= +// +// The FULL workflow contract is declared here so Slices B/C add BEHAVIOR, not +// type definitions. Slice A CONSUMES only `ModelCatalogResponse`, +// `PairAvailability`, and `SplitConfig` (read-only). Everything tagged +// DECLARED-FOR-LATER is wired by Slice B (async run + results) and Slice C +// (train / predict / business summary / override / promotion). + +export type ModelSelectionStatus = + | 'pending' + | 'running' + | 'completed' + | 'partial' + | 'failed' +export type RankingMetric = 'wape' | 'smape' | 'mae' | 'bias' +export type AvailabilityStatus = 'ready' | 'limited' | 'unusable' +// `ConfidenceLevel` ('high' | 'medium' | 'low') is reused from the +// Explainability section above — the backend uses the same enum. + +// Backtest split config — mirrors `app/features/backtesting/schemas.py` +// `SplitConfig` EXACTLY (bounds enforced client-side so the assembled run +// request is always valid for Slice B). +export type SplitStrategy = 'expanding' | 'sliding' +export interface SplitConfig { + strategy: SplitStrategy // def 'expanding' + n_splits: number // 2..20, def 5 + min_train_size: number // >= 7, def 30 + gap: number // 0..30, def 0 + horizon: number // 1..90, def 14; must be > gap; kept === forecast_horizon +} + +// --- CONSUMED in Slice A --------------------------------------------------- + +export interface CandidateModelInfo { + model_type: string + label: string + family: ModelFamily + feature_aware: boolean + /** lightgbm/xgboost — opt-in extra may be absent at runtime. */ + requires_extra: boolean + default_params: Record + /** false for feature-aware models (the predict path rejects them). */ + supports_auto_predict: boolean + description: string +} + +export interface ModelCatalogResponse { + models: CandidateModelInfo[] + default_candidate_model_types: string[] +} + +export interface PairAvailability { + store_id: number + product_id: number + first_sales_date: string | null + last_sales_date: string | null + observed_days: number + expected_calendar_days: number + coverage_ratio: number + missing_days: number + zero_sale_days: number + promotion_days: number | null + average_daily_demand: number + status: AvailabilityStatus + recommended_split_config: SplitConfig + warnings: string[] +} + +// --- DECLARED-FOR-LATER (Slices B/C wire behavior on these) ---------------- + +export interface SelectionWindow { + start_date: string // ISO date (inclusive) + end_date: string // ISO date (inclusive) +} + +export interface CandidateModelConfig { + model_type: string + params: Record +} + +export interface RankingPolicy { + minimum_sample_size: number + high_confidence_rel_improvement: number + max_acceptable_abs_bias: number +} + +export interface ModelSelectionRunRequest { + store_id: number + product_id: number + selection_window: SelectionWindow + forecast_horizon: number + ranking_metric: RankingMetric + split_config: SplitConfig + candidate_models: CandidateModelConfig[] + feature_frame_version: number // 1 | 2 (Slice A always 1) + feature_groups: string[] | null // only valid when feature_frame_version === 2 + ranking_policy?: RankingPolicy + // Slice A sets BOTH false. The async run path (Slice B `POST /runs`) treats + // them as NO-OPS, and Slice C owns explicit train/predict — so these two + // fields stay false throughout the UI flow and are never surfaced as toggles. + auto_train_winner: boolean + auto_predict: boolean +} + +export interface ModelRankEntry { + rank: number | null + model_type: string + params: Record + included: boolean + exclusion_reason: string | null + metrics: Record | null +} + +export interface WinnerSummary { + model_type: string + params: Record + metrics: Record + rank: number +} + +export interface ModelSelectionChartData { + wape_by_model: Record + bias_by_model: Record + fold_stability: Record + winner_actual_vs_predicted: unknown[] +} + +export interface ModelSelectionForecastSummary { + points: Record[] + total_demand: number + average_demand: number + horizon: number +} + +export interface ModelSelectionRunResponse { + selection_id: string + store_id: number + product_id: number + status: ModelSelectionStatus + selection_window: SelectionWindow + forecast_horizon: number + ranking_metric: string + availability: PairAvailability | null + ranking: ModelRankEntry[] + winner: WinnerSummary | null + recommendation_confidence: ConfidenceLevel | null + confidence_reasons: string[] + chart_data: ModelSelectionChartData | null + final_model: Record | null + forecast: ModelSelectionForecastSummary | null + business_summary: Record | null + error_message: string | null + created_at: string // ISO datetime + completed_at: string | null +} From cf2cb47809cadbcd76fb6539e58bf39bcc551cdf Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 09:47:18 +0200 Subject: [PATCH 27/30] =?UTF-8?q?feat(api,db):=20forecast=20champion=20sel?= =?UTF-8?q?ector=20slice=20B=20=E2=80=94=20async=20comparison=20&=20result?= =?UTF-8?q?s=20(#360)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 10 + ..._model_selection_candidate_and_progress.py | 185 +++++++ app/core/config.py | 11 + app/core/tests/test_config.py | 9 + app/features/model_selection/models.py | 87 +++- app/features/model_selection/routes.py | 75 ++- app/features/model_selection/runner.py | 312 +++++++++++ app/features/model_selection/schemas.py | 53 +- app/features/model_selection/service.py | 492 +++++++++++++++++- .../tests/test_async_routes.py | 180 +++++++ .../model_selection/tests/test_models.py | 41 +- .../tests/test_routes_integration.py | 134 +++++ .../model_selection/tests/test_runner.py | 238 +++++++++ .../model_selection/tests/test_schemas.py | 82 +++ .../model_selection/tests/test_service.py | 147 +++++- .../results/cancel-run-dialog.test.tsx | 33 ++ .../results/cancel-run-dialog.tsx | 62 +++ .../results/comparison-charts.test.tsx | 36 ++ .../results/comparison-charts.tsx | 105 ++++ .../champion-selector/results/constants.ts | 17 + .../results/model-detail-drawer.test.tsx | 43 ++ .../results/model-detail-drawer.tsx | 79 +++ .../results/ranking-table.test.tsx | 50 ++ .../results/ranking-table.tsx | 90 ++++ .../results/run-progress-panel.test.tsx | 57 ++ .../results/run-progress-panel.tsx | 87 ++++ .../results/winner-card.test.tsx | 40 ++ .../champion-selector/results/winner-card.tsx | 100 ++++ .../src/hooks/use-model-selection.test.ts | 153 +++++- frontend/src/hooks/use-model-selection.ts | 73 ++- .../src/pages/visualize/champion.test.tsx | 4 + frontend/src/pages/visualize/champion.tsx | 128 ++++- frontend/src/types/api.ts | 38 ++ 33 files changed, 3206 insertions(+), 45 deletions(-) create mode 100644 alembic/versions/d3e4f5a6b7c8_add_model_selection_candidate_and_progress.py create mode 100644 app/features/model_selection/runner.py create mode 100644 app/features/model_selection/tests/test_async_routes.py create mode 100644 app/features/model_selection/tests/test_runner.py create mode 100644 frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx create mode 100644 frontend/src/components/champion-selector/results/cancel-run-dialog.tsx create mode 100644 frontend/src/components/champion-selector/results/comparison-charts.test.tsx create mode 100644 frontend/src/components/champion-selector/results/comparison-charts.tsx create mode 100644 frontend/src/components/champion-selector/results/constants.ts create mode 100644 frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx create mode 100644 frontend/src/components/champion-selector/results/model-detail-drawer.tsx create mode 100644 frontend/src/components/champion-selector/results/ranking-table.test.tsx create mode 100644 frontend/src/components/champion-selector/results/ranking-table.tsx create mode 100644 frontend/src/components/champion-selector/results/run-progress-panel.test.tsx create mode 100644 frontend/src/components/champion-selector/results/run-progress-panel.tsx create mode 100644 frontend/src/components/champion-selector/results/winner-card.test.tsx create mode 100644 frontend/src/components/champion-selector/results/winner-card.tsx diff --git a/.env.example b/.env.example index 7d49f5b9..38ef75b4 100644 --- a/.env.example +++ b/.env.example @@ -126,5 +126,15 @@ BATCH_GLOBAL_MAX_PARALLEL=4 # mid-call, so a long fit can stall the drain. BATCH_CANCEL_DRAIN_TIMEOUT_SECONDS=30 +# Model selection (champion selector) async runner (Slice B) +# Hard upper bound on concurrent candidate backtests across all active selection +# runs on this host. Effective parallelism per run is min(this, candidates). +# Set to 1 for sequential execution. Requires uvicorn restart to apply. +MODEL_SELECTION_GLOBAL_MAX_PARALLEL=4 +# Max seconds DELETE /model-selection/{id} waits for in-flight candidates to +# drain before returning RFC 7807 504. sklearn / LightGBM fits are uncancellable +# mid-call, so a long fit can stall the drain. +MODEL_SELECTION_CANCEL_DRAIN_TIMEOUT_SECONDS=30 + # Frontend (Vite) VITE_API_BASE_URL=http://localhost:8123 diff --git a/alembic/versions/d3e4f5a6b7c8_add_model_selection_candidate_and_progress.py b/alembic/versions/d3e4f5a6b7c8_add_model_selection_candidate_and_progress.py new file mode 100644 index 00000000..c510c5ef --- /dev/null +++ b/alembic/versions/d3e4f5a6b7c8_add_model_selection_candidate_and_progress.py @@ -0,0 +1,185 @@ +"""add model_selection_candidate and async progress columns + +Revision ID: d3e4f5a6b7c8 +Revises: b667d321603c +Create Date: 2026-06-01 09:30:00.000000 + +Slice B of the Forecast Champion Selector (issue #360). Converts the selection +run into a DB-backed async LRO: + +- creates ``model_selection_candidate`` (one row per candidate, FK CASCADE to + ``model_selection_run.selection_id``) carrying per-candidate status, result + JSONB, error, and timing — the live-progress + audit surface; +- adds ``started_at`` + the four final count columns to ``model_selection_run``; +- widens the run status CheckConstraint to include ``'cancelled'`` (forward-only + drop + recreate of the named constraint). + +Mirrors ``c1d2e3f40512_create_batch_tables`` for JSONB / index / FK style. +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "d3e4f5a6b7c8" +down_revision: str | None = "b667d321603c" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +_OLD_RUN_STATUS = "status IN ('pending', 'running', 'completed', 'partial', 'failed')" +_NEW_RUN_STATUS = ( + "status IN ('pending', 'running', 'completed', 'partial', 'failed', 'cancelled')" +) + + +def upgrade() -> None: + """Apply migration.""" + # ------------------------------------------------------------------ + # 1. Widen the run status CheckConstraint to include 'cancelled'. + # ------------------------------------------------------------------ + op.drop_constraint( + "ck_model_selection_run_valid_status", + "model_selection_run", + type_="check", + ) + op.create_check_constraint( + "ck_model_selection_run_valid_status", + "model_selection_run", + _NEW_RUN_STATUS, + ) + + # ------------------------------------------------------------------ + # 2. Additive progress columns on the parent run. + # ------------------------------------------------------------------ + op.add_column( + "model_selection_run", + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + ) + op.add_column( + "model_selection_run", + sa.Column("total_candidates", sa.Integer(), nullable=False, server_default="0"), + ) + op.add_column( + "model_selection_run", + sa.Column( + "completed_candidates", sa.Integer(), nullable=False, server_default="0" + ), + ) + op.add_column( + "model_selection_run", + sa.Column("failed_candidates", sa.Integer(), nullable=False, server_default="0"), + ) + op.add_column( + "model_selection_run", + sa.Column( + "cancelled_candidates", sa.Integer(), nullable=False, server_default="0" + ), + ) + + # ------------------------------------------------------------------ + # 3. Per-candidate execution child table (FK CASCADE on selection_id). + # ------------------------------------------------------------------ + op.create_table( + "model_selection_candidate", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("candidate_id", sa.String(length=32), nullable=False), + sa.Column("selection_id", sa.String(length=32), nullable=False), + sa.Column("ordinal", sa.Integer(), nullable=False), + sa.Column("model_type", sa.String(length=40), nullable=False), + sa.Column("params", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.Column("status", sa.String(length=20), nullable=False), + sa.Column("result", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("error_message", sa.String(length=2000), nullable=True), + sa.Column("error_type", sa.String(length=100), nullable=True), + sa.Column("started_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("duration_ms", sa.Integer(), nullable=True), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.CheckConstraint( + "status IN ('pending', 'running', 'completed', 'failed', 'cancelled')", + name="ck_model_selection_candidate_valid_status", + ), + sa.ForeignKeyConstraint( + ["selection_id"], + ["model_selection_run.selection_id"], + ondelete="CASCADE", + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_model_selection_candidate_candidate_id"), + "model_selection_candidate", + ["candidate_id"], + unique=True, + ) + op.create_index( + op.f("ix_model_selection_candidate_selection_id"), + "model_selection_candidate", + ["selection_id"], + unique=False, + ) + op.create_index( + op.f("ix_model_selection_candidate_status"), + "model_selection_candidate", + ["status"], + unique=False, + ) + op.create_index( + "ix_model_selection_candidate_selection_status", + "model_selection_candidate", + ["selection_id", "status"], + unique=False, + ) + + +def downgrade() -> None: + """Revert migration.""" + op.drop_index( + "ix_model_selection_candidate_selection_status", + table_name="model_selection_candidate", + ) + op.drop_index( + op.f("ix_model_selection_candidate_status"), + table_name="model_selection_candidate", + ) + op.drop_index( + op.f("ix_model_selection_candidate_selection_id"), + table_name="model_selection_candidate", + ) + op.drop_index( + op.f("ix_model_selection_candidate_candidate_id"), + table_name="model_selection_candidate", + ) + op.drop_table("model_selection_candidate") + + op.drop_column("model_selection_run", "cancelled_candidates") + op.drop_column("model_selection_run", "failed_candidates") + op.drop_column("model_selection_run", "completed_candidates") + op.drop_column("model_selection_run", "total_candidates") + op.drop_column("model_selection_run", "started_at") + + op.drop_constraint( + "ck_model_selection_run_valid_status", + "model_selection_run", + type_="check", + ) + op.create_check_constraint( + "ck_model_selection_run_valid_status", + "model_selection_run", + _OLD_RUN_STATUS, + ) diff --git a/app/core/config.py b/app/core/config.py index 09a30cfc..e2d76a85 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -134,6 +134,17 @@ class Settings(BaseSettings): # are uncancellable mid-call, so a long fit can stall the drain. batch_cancel_drain_timeout_seconds: int = 30 + # Model selection (champion selector) async runner (Slice B) — mirrors the + # batch runner. Hard upper bound on concurrent candidate backtests across + # all active selection runs on this host; sized for the same Postgres pool + # (pool_size=5, max_overflow=10). Setting this to 1 makes the runner + # sequential. Env override: MODEL_SELECTION_GLOBAL_MAX_PARALLEL=8 (restart). + model_selection_global_max_parallel: int = 4 + # Max seconds DELETE /model-selection/{id} waits for in-flight candidates to + # settle before returning RFC 7807 504. In-flight sklearn/LightGBM fits are + # uncancellable mid-call, so a long fit can stall the drain. + model_selection_cancel_drain_timeout_seconds: int = 30 + # RAG Embedding Configuration rag_embedding_provider: Literal["openai", "ollama"] = "openai" openai_api_key: str = "" diff --git a/app/core/tests/test_config.py b/app/core/tests/test_config.py index 0dc96733..496c29bb 100644 --- a/app/core/tests/test_config.py +++ b/app/core/tests/test_config.py @@ -23,6 +23,15 @@ def test_settings_has_defaults(monkeypatch): assert settings.api_port == 8123 +def test_model_selection_runner_defaults(monkeypatch): + """Slice B async-runner settings default to the batch-mirrored values.""" + monkeypatch.delenv("MODEL_SELECTION_GLOBAL_MAX_PARALLEL", raising=False) + monkeypatch.delenv("MODEL_SELECTION_CANCEL_DRAIN_TIMEOUT_SECONDS", raising=False) + settings = Settings(_env_file=None) + assert settings.model_selection_global_max_parallel == 4 + assert settings.model_selection_cancel_drain_timeout_seconds == 30 + + def test_settings_is_development_property(): """is_development should return True for development env.""" settings = Settings(app_env="development") diff --git a/app/features/model_selection/models.py b/app/features/model_selection/models.py index ce7c6e20..a39d5763 100644 --- a/app/features/model_selection/models.py +++ b/app/features/model_selection/models.py @@ -15,7 +15,7 @@ from enum import Enum from typing import Any -from sqlalchemy import CheckConstraint, Date, DateTime, Index, Integer, String +from sqlalchemy import CheckConstraint, Date, DateTime, ForeignKey, Index, Integer, String from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column @@ -27,10 +27,12 @@ class ModelSelectionStatus(str, Enum): """Lifecycle states of a selection run. Transitions: - - PENDING -> RUNNING -> {COMPLETED, PARTIAL, FAILED} - - PARTIAL fires when >=1 candidate succeeded AND >=1 candidate failed. + - PENDING -> RUNNING -> {COMPLETED, PARTIAL, FAILED, CANCELLED} + - PARTIAL fires when >=1 candidate succeeded AND >=1 candidate failed/cancelled. - FAILED fires when availability is unusable (fail-fast) OR every candidate's backtest errored (no valid winner). + - CANCELLED (Slice B) fires when a cancel drained before any candidate + reached a non-cancelled terminal state. """ PENDING = "pending" @@ -38,6 +40,29 @@ class ModelSelectionStatus(str, Enum): COMPLETED = "completed" PARTIAL = "partial" FAILED = "failed" + CANCELLED = "cancelled" + + +# Statuses a selection run cannot transition out of — the DELETE-route 409 set +# (Slice B). Mirrors ``batch.models.TERMINAL_BATCH_STATES``. +TERMINAL_SELECTION_STATES: frozenset[str] = frozenset( + { + ModelSelectionStatus.COMPLETED.value, + ModelSelectionStatus.PARTIAL.value, + ModelSelectionStatus.FAILED.value, + ModelSelectionStatus.CANCELLED.value, + } +) + + +class CandidateStatus(str, Enum): + """Per-candidate execution states inside an async selection run (Slice B).""" + + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" class ModelSelectionRun(TimestampMixin, Base): @@ -74,13 +99,21 @@ class ModelSelectionRun(TimestampMixin, Base): forecast_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) business_summary: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + # Slice B (async) — set when the run starts executing; the four count + # columns cache the FINAL per-status candidate tally written once at settle + # (live progress is derived from a GROUP BY over the child rows). + started_at: Mapped[_dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + total_candidates: Mapped[int] = mapped_column(Integer, default=0, server_default="0") + completed_candidates: Mapped[int] = mapped_column(Integer, default=0, server_default="0") + failed_candidates: Mapped[int] = mapped_column(Integer, default=0, server_default="0") + cancelled_candidates: Mapped[int] = mapped_column(Integer, default=0, server_default="0") completed_at: Mapped[_dt.datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) __table_args__ = ( CheckConstraint( - "status IN ('pending', 'running', 'completed', 'partial', 'failed')", + "status IN ('pending', 'running', 'completed', 'partial', 'failed', 'cancelled')", name="ck_model_selection_run_valid_status", ), Index( @@ -91,3 +124,49 @@ class ModelSelectionRun(TimestampMixin, Base): ), Index("ix_model_selection_run_status_created", "status", "created_at"), ) + + +class ModelSelectionCandidate(TimestampMixin, Base): + """One candidate's async execution record inside a selection run (Slice B). + + Concurrent candidate tasks each write their OWN row in their OWN session — + no shared-row write race. ``result`` carries the full ``CandidateResult`` + JSONB (incl. folds) on success; failed/cancelled candidates keep their row + so they stay visible in the results UI. Mirrors ``batch.BatchJobItem``. + """ + + __tablename__ = "model_selection_candidate" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + candidate_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + selection_id: Mapped[str] = mapped_column( + String(32), + ForeignKey("model_selection_run.selection_id", ondelete="CASCADE"), + index=True, + ) + ordinal: Mapped[int] = mapped_column(Integer) # submit order — stable display + model_type: Mapped[str] = mapped_column(String(40)) + params: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False) + status: Mapped[str] = mapped_column( + String(20), default=CandidateStatus.PENDING.value, index=True + ) + result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + error_type: Mapped[str | None] = mapped_column(String(100), nullable=True) + started_at: Mapped[_dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + completed_at: Mapped[_dt.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True + ) + duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True) + + __table_args__ = ( + CheckConstraint( + "status IN ('pending', 'running', 'completed', 'failed', 'cancelled')", + name="ck_model_selection_candidate_valid_status", + ), + Index( + "ix_model_selection_candidate_selection_status", + "selection_id", + "status", + ), + ) diff --git a/app/features/model_selection/routes.py b/app/features/model_selection/routes.py index f4f833c7..7597464e 100644 --- a/app/features/model_selection/routes.py +++ b/app/features/model_selection/routes.py @@ -16,7 +16,7 @@ from __future__ import annotations -from fastapi import APIRouter, Depends, Query, status +from fastapi import APIRouter, Depends, Query, Response, status from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.asyncio import AsyncSession @@ -30,6 +30,7 @@ PairAvailabilityResponse, PredictWinnerResponse, RankingResult, + SubmitRunResponse, TrainWinnerResponse, ) from app.features.model_selection.service import ModelSelectionService @@ -79,6 +80,43 @@ async def get_model_catalog() -> ModelCatalogResponse: return service.get_model_catalog() +@router.post( + "/runs", + response_model=SubmitRunResponse, + status_code=status.HTTP_202_ACCEPTED, + summary="Submit an async candidate comparison (fire-and-forget LRO)", +) +async def submit_run( + request: ModelSelectionRunRequest, + response: Response, + db: AsyncSession = Depends(get_db), +) -> SubmitRunResponse: + """Submit an async selection run — returns 202 with monitor/cancel pointers. + + The candidate backtests run in a detached task; poll + ``GET /model-selection/{selection_id}`` for live progress, terminal ranking, + and the winner. + """ + logger.info( + "model_selection.runs_request_received", + store_id=request.store_id, + product_id=request.product_id, + n_candidates=len(request.candidate_models), + ) + service = ModelSelectionService() + try: + result = await service.submit_run(db, request) + response.headers["Location"] = result.monitor_url + response.headers["Retry-After"] = "2" + return result + except ValueError as exc: + raise BadRequestError(message=str(exc)) from exc + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to submit selection run", details={"error": str(exc)} + ) from exc + + @router.post( "/run", response_model=ModelSelectionRunResponse, @@ -128,6 +166,41 @@ async def get_selection( ) from exc +@router.delete( + "/{selection_id}", + response_model=ModelSelectionRunResponse, + status_code=status.HTTP_200_OK, + summary="Cancel an in-flight selection run (cooperative drain)", + description=( + "Cooperatively cancel an async selection run (Slice B). Pending " + "candidates skip; running candidates observe ``asyncio.CancelledError`` " + "at the next safe yield — sklearn / LightGBM fits are uncancellable " + "mid-call, so an in-flight fit may finish first. Returns:\n\n" + "- ``200`` settled run on a clean drain\n" + "- ``404`` RFC 7807 if the run does not exist\n" + "- ``409`` RFC 7807 if the run is already terminal\n" + "- ``504`` RFC 7807 if the drain exceeds " + "``Settings.model_selection_cancel_drain_timeout_seconds``" + ), +) +async def cancel_run( + selection_id: str, + db: AsyncSession = Depends(get_db), +) -> ModelSelectionRunResponse: + """Cancel an in-flight selection run and return its settled record. + + ``NotFoundError`` (404) / ``ConflictError`` (409) / ``GatewayTimeoutError`` + (504) raised in-service bubble to the global RFC 7807 handler. + """ + service = ModelSelectionService() + try: + return await service.cancel_run(db, selection_id) + except SQLAlchemyError as exc: + raise DatabaseError( + message="Failed to cancel selection run", details={"error": str(exc)} + ) from exc + + @router.get( "/{selection_id}/ranking", response_model=RankingResult, diff --git a/app/features/model_selection/runner.py b/app/features/model_selection/runner.py new file mode 100644 index 00000000..7320ea03 --- /dev/null +++ b/app/features/model_selection/runner.py @@ -0,0 +1,312 @@ +"""Bounded-concurrency candidate runner for the champion selector (Slice B). + +A slice-local mirror of ``app/features/batch/runner.py``: one +:class:`asyncio.Semaphore` inside an :class:`asyncio.TaskGroup` fans out one +task per ``model_selection_candidate``; each child opens its own +``AsyncSession`` and observes a cooperative :class:`asyncio.Event` so +``DELETE /model-selection/{selection_id}`` cancels what hasn't started and +gracefully drains what has. + +The asyncio mechanics (the three cancel mechanisms, the +``except* asyncio.CancelledError`` PEP-654 catch shape, the per-task cancel + +cooperative event) are documented in +``PRPs/ai_docs/asyncio-taskgroup-cancellation.md``. + +Cross-slice rule: this module imports from ``app.features.model_selection.models`` +(same slice) and ``app.core.*`` only — it does NOT import the batch runner +(vertical-slice rule). The per-child ``execute_candidate`` callable supplied by +``ModelSelectionService`` is the seam that keeps the heavy backtest work out of +this module. +""" + +from __future__ import annotations + +import asyncio +from collections.abc import Awaitable, Callable +from dataclasses import dataclass, field +from datetime import UTC, datetime +from typing import TYPE_CHECKING + +from sqlalchemy import select, update + +from app.core.logging import get_logger +from app.features.model_selection.models import ( + CandidateStatus, + ModelSelectionCandidate, +) + +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + +logger = get_logger(__name__) + + +@dataclass +class CancelHandle: + """Cancel signal + Task refs + completion event for an in-flight selection. + + Created by :func:`run_selection_candidates`, looked up by + :func:`cancel_selection`, removed from :data:`_ACTIVE_SELECTIONS` and + signalled by the runner's caller via :func:`mark_completed` *after* the + parent's settle has committed — so ``DELETE`` never observes the parent + mid-settle. + """ + + cancel_event: asyncio.Event = field(default_factory=asyncio.Event) + completed_event: asyncio.Event = field(default_factory=asyncio.Event) + tasks: list[asyncio.Task[None]] = field(default_factory=list) + + +# Module-level registry — single-process scope (matches the single-host vision). +_ACTIVE_SELECTIONS: dict[str, CancelHandle] = {} + + +def register_selection(selection_id: str) -> CancelHandle: + """Eagerly create (or reuse) the cancel handle for a selection. + + Called by the service the moment ``POST /runs`` commits — BEFORE the + detached worker starts — so a ``DELETE`` arriving in the gap between the 202 + response and the worker's first ``run_selection_candidates`` call still + finds a handle (and is not misreported as "already settled"). The worker's + ``setdefault`` reuses this same handle. + """ + return _ACTIVE_SELECTIONS.setdefault(selection_id, CancelHandle()) + + +async def run_selection_candidates( + *, + selection_id: str, + candidate_ids: list[str], + max_parallel: int, + global_max_parallel: int, + session_maker: async_sessionmaker[AsyncSession], + execute_candidate: Callable[[str], Awaitable[None]], +) -> int: + """Execute one selection's candidates through a bounded TaskGroup. + + Args: + selection_id: ``model_selection_run.selection_id`` — registry key + log + correlator. + candidate_ids: ``model_selection_candidate.candidate_id`` values, in + submit order. + max_parallel: per-run cap (Slice B passes the global setting — there is + no per-run field). + global_max_parallel: host-wide cap from + :attr:`Settings.model_selection_global_max_parallel`. + session_maker: shared ``async_sessionmaker``; each child opens one + ``AsyncSession`` from it for the state-transition writes the runner + emits. The caller-supplied ``execute_candidate`` opens its OWN + session from the same maker. + execute_candidate: one-arg coroutine; runs one candidate's backtest + + persists its result/failure in its own session. + + Returns: + ``effective = min(max_parallel, global_max_parallel)``. + + Notes: + - Caller MUST call :func:`mark_completed` after the parent settle + commits (even on the exception path). + - Cancellation does NOT propagate out: ``except* asyncio.CancelledError`` + absorbs the ``ExceptionGroup`` so the caller can settle the parent. + """ + effective = min(max_parallel, global_max_parallel) + sem = asyncio.Semaphore(effective) + handle = _ACTIVE_SELECTIONS.setdefault(selection_id, CancelHandle()) + + logger.info( + "model_selection.runner_start", + selection_id=selection_id, + total_candidates=len(candidate_ids), + max_parallel=max_parallel, + effective_max_parallel=effective, + ) + + async def _child(candidate_id: str) -> None: + # One ``AsyncSession`` per child for the runner's own state writes. + async with session_maker() as session: + # FAST-CANCEL before the semaphore acquire — skips not-yet-started + # work cleanly (sync check; no await window). + if handle.cancel_event.is_set(): + await _mark_cancelled_skipped(session, candidate_id) + return + + acquired = False + try: + async with sem: + acquired = True + # Re-check after acquire — a sibling may have signalled + # cancel while we waited on the semaphore. + if handle.cancel_event.is_set(): + await _mark_cancelled_skipped(session, candidate_id) + return + try: + await execute_candidate(candidate_id) + except asyncio.CancelledError: + # Persist the cancelled terminal state before re-raising + # so the TaskGroup absorbs the cancel. + await _mark_cancelled_running(session, candidate_id) + raise + except Exception: + # Defensive: ``execute_candidate`` should persist its own + # failure; if it didn't, mark FAILED so settle aggregates + # correctly. Do NOT re-raise — that would tear down siblings. + logger.exception( + "model_selection.runner_unexpected_child_error", + selection_id=selection_id, + candidate_id=candidate_id, + ) + await _mark_failed_unexpected(session, candidate_id) + except asyncio.CancelledError: + if not acquired: + await _mark_cancelled_skipped(session, candidate_id) + raise + + try: + async with asyncio.TaskGroup() as tg: + for cid in candidate_ids: + task = tg.create_task(_child(cid), name=f"model_selection:{selection_id}:{cid}") + handle.tasks.append(task) + except* asyncio.CancelledError: + # Clean ``task.cancel()`` calls are absorbed here; the per-child blocks + # already wrote the terminal state. The caller settles the parent. + logger.info( + "model_selection.runner_cancelled_exception_group", + selection_id=selection_id, + ) + + logger.info( + "model_selection.runner_complete", + selection_id=selection_id, + cancel_requested=handle.cancel_event.is_set(), + ) + return effective + + +def cancel_selection(selection_id: str) -> bool: + """Signal cooperative cancel for an in-flight selection. + + Sets ``cancel_event`` (skips pending children) and ``task.cancel()`` on + every tracked child (interrupts running children at the next yield). + + Returns: + ``True`` if the selection was registered; ``False`` if no handle exists + (race: the selection settled before cancel). + """ + handle = _ACTIVE_SELECTIONS.get(selection_id) + if handle is None: + return False + handle.cancel_event.set() + cancelled_count = 0 + for task in handle.tasks: + if not task.done(): + task.cancel() + cancelled_count += 1 + logger.info( + "model_selection.cancel_requested", + selection_id=selection_id, + n_tasks_tracked=len(handle.tasks), + n_tasks_cancelled=cancelled_count, + ) + return True + + +async def await_drain(selection_id: str, timeout_seconds: float) -> bool: + """Block until the selection's parent settle commits, or timeout elapses. + + Returns: + ``True`` on clean drain (or if never registered); ``False`` on timeout. + """ + handle = _ACTIVE_SELECTIONS.get(selection_id) + if handle is None: + return True + try: + await asyncio.wait_for(handle.completed_event.wait(), timeout=timeout_seconds) + return True + except TimeoutError: + # asyncio.wait_for raises the built-in TimeoutError since Python 3.11. + logger.warning( + "model_selection.cancel_drain_timeout", + selection_id=selection_id, + timeout_seconds=timeout_seconds, + ) + return False + + +def mark_completed(selection_id: str) -> None: + """Signal that the selection's parent settle has committed. + + Must be called after ``_settle`` commits (including the failure path) so any + concurrent ``DELETE`` drain unblocks. Idempotent: a missing handle is a no-op. + """ + handle = _ACTIVE_SELECTIONS.pop(selection_id, None) + if handle is None: + return + handle.completed_event.set() + + +# --------------------------------------------------------------------- helpers +# Each helper accepts an already-open ``AsyncSession`` (one per child) and +# commits its single UPDATE. They never raise on a missing row (a deleted-parent +# race is survivable — log + move on). + + +async def _mark_cancelled_skipped(session: AsyncSession, candidate_id: str) -> None: + """Mark a not-yet-started candidate as cancelled (pending → cancelled).""" + now = datetime.now(UTC) + await session.execute( + update(ModelSelectionCandidate) + .where(ModelSelectionCandidate.candidate_id == candidate_id) + .values(status=CandidateStatus.CANCELLED.value, completed_at=now) + ) + await session.commit() + + +async def _mark_cancelled_running(session: AsyncSession, candidate_id: str) -> None: + """Mark a running candidate as cancelled (running → cancelled).""" + now = datetime.now(UTC) + row = ( + await session.execute( + select(ModelSelectionCandidate.started_at).where( + ModelSelectionCandidate.candidate_id == candidate_id + ) + ) + ).first() + started_at = row[0] if row is not None else None + duration_ms = int((now - started_at).total_seconds() * 1000) if started_at is not None else None + await session.execute( + update(ModelSelectionCandidate) + .where(ModelSelectionCandidate.candidate_id == candidate_id) + .values( + status=CandidateStatus.CANCELLED.value, + completed_at=now, + duration_ms=duration_ms, + ) + ) + await session.commit() + + +async def _mark_failed_unexpected(session: AsyncSession, candidate_id: str) -> None: + """Defensive: mark a candidate ``failed`` when ``execute_candidate`` raised.""" + now = datetime.now(UTC) + await session.execute( + update(ModelSelectionCandidate) + .where(ModelSelectionCandidate.candidate_id == candidate_id) + .values( + status=CandidateStatus.FAILED.value, + completed_at=now, + error_message="Runner caught unexpected exception (see structlog)", + error_type="UnexpectedRunnerError", + ) + ) + await session.commit() + + +__all__ = [ + "_ACTIVE_SELECTIONS", + "CancelHandle", + "await_drain", + "cancel_selection", + "mark_completed", + "run_selection_candidates", +] diff --git a/app/features/model_selection/schemas.py b/app/features/model_selection/schemas.py index d3bc45dd..050d3ead 100644 --- a/app/features/model_selection/schemas.py +++ b/app/features/model_selection/schemas.py @@ -46,7 +46,10 @@ ] RankingMetric = Literal["wape", "smape", "mae", "bias"] -SelectionStatusLiteral = Literal["pending", "running", "completed", "partial", "failed"] +SelectionStatusLiteral = Literal[ + "pending", "running", "completed", "partial", "failed", "cancelled" +] +CandidateStatusLiteral = Literal["pending", "running", "completed", "failed", "cancelled"] ConfidenceLevel = Literal["high", "medium", "low"] AvailabilityStatus = Literal["ready", "limited", "unusable"] @@ -264,8 +267,40 @@ class ForecastSummary(BaseModel): horizon: int +class CandidateProgress(BaseModel): + """One candidate's live execution state (Slice B async run). + + Output-only. Empty list on a legacy synchronous ``/run`` row (no children). + """ + + candidate_id: str + ordinal: int + model_type: str + status: CandidateStatusLiteral + error: str | None = None + started_at: datetime | None = None + completed_at: datetime | None = None + duration_ms: int | None = None + + +class SelectionProgress(BaseModel): + """Per-status candidate counts for an async selection run (Slice B).""" + + total: int + pending: int + running: int + completed: int + failed: int + cancelled: int + + class ModelSelectionRunResponse(BaseModel): - """``POST /model-selection/run`` and ``GET /model-selection/{id}`` contract.""" + """``POST /model-selection/run`` and ``GET /model-selection/{id}`` contract. + + Slice B adds ``started_at`` / ``progress`` / ``candidate_progress`` as + ADDITIVE fields with safe defaults — a legacy synchronous ``/run`` row has + ``progress=None`` and ``candidate_progress=[]``. + """ selection_id: str store_id: int @@ -285,7 +320,21 @@ class ModelSelectionRunResponse(BaseModel): business_summary: dict[str, Any] | None error_message: str | None created_at: datetime + started_at: datetime | None = None completed_at: datetime | None + progress: SelectionProgress | None = None + candidate_progress: list[CandidateProgress] = Field(default_factory=list) + + +class SubmitRunResponse(ModelSelectionRunResponse): + """``POST /model-selection/runs`` 202 response — an additive superset. + + Carries the LRO status-monitor pointers (the frontend drives the UI from + these body fields, not the ``Location``/``Retry-After`` headers). + """ + + monitor_url: str + cancel_url: str class CandidateModelInfo(BaseModel): diff --git a/app/features/model_selection/service.py b/app/features/model_selection/service.py index b8536068..743e647e 100644 --- a/app/features/model_selection/service.py +++ b/app/features/model_selection/service.py @@ -15,24 +15,41 @@ from __future__ import annotations +import asyncio import uuid +from collections.abc import Sequence from datetime import UTC, datetime from typing import TYPE_CHECKING from sqlalchemy import and_, func, or_, select -from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker -from app.core.exceptions import BadRequestError, NotFoundError +from app.core.config import get_settings +from app.core.database import get_session_maker +from app.core.exceptions import ( + BadRequestError, + ConflictError, + GatewayTimeoutError, + NotFoundError, +) from app.core.logging import get_logger from app.features.backtesting.schemas import SplitConfig from app.features.data_platform.models import Product, Promotion, SalesDaily, Store +from app.features.model_selection import runner from app.features.model_selection.capabilities import build_model_catalog from app.features.model_selection.explanations import explain_winner -from app.features.model_selection.models import ModelSelectionRun, ModelSelectionStatus +from app.features.model_selection.models import ( + TERMINAL_SELECTION_STATES, + CandidateStatus, + ModelSelectionCandidate, + ModelSelectionRun, + ModelSelectionStatus, +) from app.features.model_selection.ranking import build_chart_data, rank_candidates from app.features.model_selection.schemas import ( AvailabilityStatus, CandidateModelConfig, + CandidateProgress, CandidateResult, ChartData, FoldChart, @@ -42,7 +59,9 @@ ModelSelectionRunResponse, PairAvailabilityResponse, RankingResult, + SelectionProgress, SelectionWindow, + SubmitRunResponse, TrainWinnerResponse, WinnerSummary, ) @@ -53,6 +72,11 @@ logger = get_logger(__name__) +# Strong refs to detached background workers — asyncio holds only a WEAK ref to +# a bare ``create_task`` result, so without this set a worker can be GC'd +# mid-run (https://docs.python.org/3.12/library/asyncio-task.html#asyncio.create_task). +_BACKGROUND_TASKS: set[asyncio.Task[None]] = set() + # Availability policy constants (module-level; not operator-configurable in v1). MIN_COVERAGE_RATIO = 0.8 DEFAULT_MIN_TRAIN_SIZE = 30 @@ -402,14 +426,471 @@ async def run_selection( ) return self._response(row, ranking) + # ------------------------------------------------------------------------- + # Async orchestration (Slice B) — fire-and-forget LRO + # ------------------------------------------------------------------------- + + async def submit_run( + self, db: AsyncSession, request: ModelSelectionRunRequest + ) -> SubmitRunResponse: + """Submit an async selection run: insert parent + children, detach worker. + + Returns 202-shaped ``SubmitRunResponse`` (status=running) IMMEDIATELY — + the candidate backtests run in a detached :func:`asyncio.create_task` + that uses its OWN sessions (never this request ``db``). + """ + availability = await self.get_availability( + db, + request.store_id, + request.product_id, + request.forecast_horizon, + request.split_config, + ) + + selection_id = uuid.uuid4().hex + now = datetime.now(UTC) + row = ModelSelectionRun( + selection_id=selection_id, + status=ModelSelectionStatus.RUNNING.value, + store_id=request.store_id, + product_id=request.product_id, + start_date=request.selection_window.start_date, + end_date=request.selection_window.end_date, + forecast_horizon=request.forecast_horizon, + ranking_metric=request.ranking_metric, + candidate_models=[c.model_dump() for c in request.candidate_models], + policy_snapshot=request.ranking_policy.model_dump(mode="json"), + availability_snapshot=availability.model_dump(mode="json"), + started_at=now, + total_candidates=len(request.candidate_models), + ) + db.add(row) + # Flush the parent INSERT before the children — there is no ORM + # ``relationship`` and the FK targets the non-PK ``selection_id``, so the + # unit-of-work would not otherwise order parent-before-child. + await db.flush() + + # Fail fast on unusable availability (LOCKED #2 parity with the sync path) + # — persist a failed parent (no children, no worker) and raise 400. + if availability.status == "unusable": + message = "Insufficient data for model selection (availability unusable)." + row.status = ModelSelectionStatus.FAILED.value + row.error_message = message + row.completed_at = now + await db.commit() + logger.warning( + "model_selection.run_failed", + selection_id=selection_id, + reason="unusable_availability", + ) + raise BadRequestError(message=message) + + candidates: list[ModelSelectionCandidate] = [] + for ordinal, candidate in enumerate(request.candidate_models): + cand = ModelSelectionCandidate( + candidate_id=uuid.uuid4().hex, + selection_id=selection_id, + ordinal=ordinal, + model_type=candidate.model_type, + params=candidate.params, + status=CandidateStatus.PENDING.value, + ) + db.add(cand) + candidates.append(cand) + await db.commit() + await db.refresh(row) # populate server-default created_at for the 202 body + + logger.info( + "model_selection.run_submitted", + selection_id=selection_id, + store_id=request.store_id, + product_id=request.product_id, + n_candidates=len(candidates), + ) + + # Eagerly register the cancel handle so a DELETE arriving before the + # detached worker starts still finds it (avoids a false "already settled" + # 409). The worker's setdefault reuses this same handle. + runner.register_selection(selection_id) + + # Detach the worker — hold a strong ref so it cannot be GC'd mid-run. + task = asyncio.create_task( + self._run_in_background(selection_id, request), + name=f"model_selection_worker:{selection_id}", + ) + _BACKGROUND_TASKS.add(task) + task.add_done_callback(_BACKGROUND_TASKS.discard) + + candidate_progress = [ + CandidateProgress( + candidate_id=c.candidate_id, + ordinal=c.ordinal, + model_type=c.model_type, + status="pending", + ) + for c in candidates + ] + progress = SelectionProgress( + total=len(candidates), + pending=len(candidates), + running=0, + completed=0, + failed=0, + cancelled=0, + ) + return SubmitRunResponse( + selection_id=selection_id, + store_id=request.store_id, + product_id=request.product_id, + status="running", + selection_window=request.selection_window, + forecast_horizon=request.forecast_horizon, + ranking_metric=request.ranking_metric, + availability=availability, + ranking=[], + winner=None, + recommendation_confidence=None, + confidence_reasons=[], + chart_data=None, + final_model=None, + forecast=None, + business_summary=None, + error_message=None, + created_at=row.created_at, + started_at=now, + completed_at=None, + progress=progress, + candidate_progress=candidate_progress, + monitor_url=f"/model-selection/{selection_id}", + cancel_url=f"/model-selection/{selection_id}", + ) + + async def _run_in_background( + self, selection_id: str, request: ModelSelectionRunRequest + ) -> None: + """Detached worker — runs candidate backtests, then settles the parent. + + Uses ONLY sessions from ``get_session_maker()`` (the request session is + long gone). Never raises out — settles the parent to its observed state. + """ + session_maker = get_session_maker() + settings = get_settings() + + async def _exec(candidate_id: str) -> None: + from pydantic import TypeAdapter # lazy + + from app.features.backtesting.schemas import BacktestConfig # lazy + from app.features.backtesting.service import BacktestingService # lazy + from app.features.forecasting.schemas import ModelConfig # lazy + + async with session_maker() as session: + cand = await session.scalar( + select(ModelSelectionCandidate).where( + ModelSelectionCandidate.candidate_id == candidate_id + ) + ) + if cand is None: # deleted-parent race — survivable + return + started = datetime.now(UTC) + cand.status = CandidateStatus.RUNNING.value + cand.started_at = started + await session.commit() + logger.info( + "model_selection.candidate_started", + selection_id=selection_id, + model_type=cand.model_type, + ) + try: + adapter: TypeAdapter[object] = TypeAdapter(ModelConfig) + cfg = adapter.validate_python({"model_type": cand.model_type, **cand.params}) + backtest = await BacktestingService().run_backtest( + session, + request.store_id, + request.product_id, + request.selection_window.start_date, + request.selection_window.end_date, + BacktestConfig( + split_config=request.split_config, + model_config_main=cfg, # type: ignore[arg-type] + include_baselines=False, + store_fold_details=True, + ), + ) + result = self._shape_candidate( + CandidateModelConfig.model_validate( + {"model_type": cand.model_type, "params": cand.params} + ), + backtest, + ) + cand.result = result.model_dump(mode="json") + cand.status = CandidateStatus.COMPLETED.value + logger.info( + "model_selection.candidate_completed", + selection_id=selection_id, + model_type=cand.model_type, + ) + except Exception as exc: # never hide a failed candidate + cand.status = CandidateStatus.FAILED.value + cand.error_message = str(exc)[:2000] + cand.error_type = type(exc).__name__ + logger.warning( + "model_selection.candidate_failed", + selection_id=selection_id, + model_type=cand.model_type, + error=str(exc), + ) + finished = datetime.now(UTC) + cand.completed_at = finished + cand.duration_ms = int((finished - started).total_seconds() * 1000) + await session.commit() + + try: + candidate_ids = await self._candidate_ids(session_maker, selection_id) + await runner.run_selection_candidates( + selection_id=selection_id, + candidate_ids=candidate_ids, + max_parallel=settings.model_selection_global_max_parallel, + global_max_parallel=settings.model_selection_global_max_parallel, + session_maker=session_maker, + execute_candidate=_exec, + ) + finally: + # Always settle + unblock any DELETE drain, even if loading the + # candidate ids or the runner itself raised unexpectedly. + await self._settle(selection_id, request, session_maker) + runner.mark_completed(selection_id) + + async def _candidate_ids( + self, session_maker: async_sessionmaker[AsyncSession], selection_id: str + ) -> list[str]: + """Load this run's candidate ids in submit (ordinal) order.""" + async with session_maker() as session: + rows = ( + await session.execute( + select(ModelSelectionCandidate.candidate_id) + .where(ModelSelectionCandidate.selection_id == selection_id) + .order_by(ModelSelectionCandidate.ordinal) + ) + ).all() + return [r[0] for r in rows] + + async def _settle( + self, + selection_id: str, + request: ModelSelectionRunRequest, + session_maker: async_sessionmaker[AsyncSession], + ) -> None: + """Aggregate terminal children → ranking/chart/business + final status. + + REUSES the pure ``rank_candidates`` / ``build_chart_data`` / + ``explain_winner`` so the terminal GET output is byte-compatible with + the synchronous ``/run`` path (LOCKED #7). + """ + async with session_maker() as session: + row = await session.scalar( + select(ModelSelectionRun).where(ModelSelectionRun.selection_id == selection_id) + ) + if row is None: # deleted-parent race + return + children = ( + ( + await session.execute( + select(ModelSelectionCandidate) + .where(ModelSelectionCandidate.selection_id == selection_id) + .order_by(ModelSelectionCandidate.ordinal) + ) + ) + .scalars() + .all() + ) + + results: list[CandidateResult] = [] + for child in children: + if child.status == CandidateStatus.COMPLETED.value and child.result: + results.append(CandidateResult.model_validate(child.result)) + elif child.status == CandidateStatus.CANCELLED.value: + results.append( + CandidateResult( + model_type=child.model_type, + params=child.params, + failed=True, + error="cancelled", + aggregated_metrics=None, + sample_size=0, + folds=[], + ) + ) + else: # failed (or any non-completed leftover) + results.append( + CandidateResult( + model_type=child.model_type, + params=child.params, + failed=True, + error=child.error_message or "candidate failed", + aggregated_metrics=None, + sample_size=0, + folds=[], + ) + ) + + availability = ( + PairAvailabilityResponse.model_validate(row.availability_snapshot) + if row.availability_snapshot + else None + ) + availability_status: AvailabilityStatus = ( + availability.status if availability is not None else "ready" + ) + ranking = rank_candidates( + results, request.ranking_policy, row.ranking_metric, availability_status + ) + row.candidate_results = [r.model_dump(mode="json") for r in results] + row.ranking_result = ranking.model_dump(mode="json") + if ranking.winner is not None: + row.winner_model_type = ranking.winner.model_type + row.winner_metrics = ranking.winner.metrics + row.chart_data = build_chart_data(results, ranking).model_dump(mode="json") + if availability is not None: + row.business_summary = explain_winner(ranking, availability) + + counts = self._status_counts(children) + row.completed_candidates = counts["completed"] + row.failed_candidates = counts["failed"] + row.cancelled_candidates = counts["cancelled"] + row.status = self._terminal_status(counts).value + row.completed_at = datetime.now(UTC) + await session.commit() + logger.info( + "model_selection.run_settled", + selection_id=selection_id, + status=row.status, + winner=row.winner_model_type, + ) + + async def cancel_run(self, db: AsyncSession, selection_id: str) -> ModelSelectionRunResponse: + """Cooperatively cancel + drain an in-flight selection run.""" + row = await self._load(db, selection_id) + if row.status in TERMINAL_SELECTION_STATES: + raise ConflictError( + message=f"Selection run already terminal: {row.status}", + details={"selection_id": selection_id, "status": row.status}, + ) + logger.info("model_selection.run_cancel_requested", selection_id=selection_id) + fired = runner.cancel_selection(selection_id) + if not fired: + # Race: the worker settled between our load and the cancel. + raise ConflictError( + message="Selection run settled before cancel could fire", + details={"selection_id": selection_id}, + ) + settings = get_settings() + drained = await runner.await_drain( + selection_id, + timeout_seconds=float(settings.model_selection_cancel_drain_timeout_seconds), + ) + if not drained: + raise GatewayTimeoutError( + message=( + f"Drain exceeded {settings.model_selection_cancel_drain_timeout_seconds}s; " + "in-flight sklearn / LightGBM fits are uncancellable mid-call — " + "retry once the fit completes." + ), + details={"selection_id": selection_id}, + ) + # Re-load through a fresh read so the settled state is visible. + await db.commit() + refreshed = await self._load(db, selection_id) + logger.info( + "model_selection.run_cancel_drained", + selection_id=selection_id, + status=refreshed.status, + ) + response = self._response(refreshed, self._load_ranking(refreshed)) + await self._attach_progress(db, selection_id, response) + return response + + @staticmethod + def _status_counts(children: Sequence[ModelSelectionCandidate]) -> dict[str, int]: + """Tally child statuses into the five count buckets.""" + counts = {"pending": 0, "running": 0, "completed": 0, "failed": 0, "cancelled": 0} + for child in children: + counts[child.status] = counts.get(child.status, 0) + 1 + return counts + + @staticmethod + def _terminal_status(counts: dict[str, int]) -> ModelSelectionStatus: + """Terminal-status rule at settle (mirror ``batch.service._settle``).""" + completed = counts.get("completed", 0) + failed = counts.get("failed", 0) + cancelled = counts.get("cancelled", 0) + if cancelled > 0 and completed == 0 and failed == 0: + return ModelSelectionStatus.CANCELLED + if completed > 0 and failed == 0 and cancelled == 0: + return ModelSelectionStatus.COMPLETED + if failed > 0 and completed == 0 and cancelled == 0: + return ModelSelectionStatus.FAILED + if completed > 0 or failed > 0: + return ModelSelectionStatus.PARTIAL + return ModelSelectionStatus.FAILED + + async def _attach_progress( + self, db: AsyncSession, selection_id: str, response: ModelSelectionRunResponse + ) -> None: + """Attach live ``progress`` + ``candidate_progress`` to a response. + + A legacy synchronous ``/run`` row has no children → ``progress`` stays + ``None`` and ``candidate_progress`` stays ``[]``. + """ + children = ( + ( + await db.execute( + select(ModelSelectionCandidate) + .where(ModelSelectionCandidate.selection_id == selection_id) + .order_by(ModelSelectionCandidate.ordinal) + ) + ) + .scalars() + .all() + ) + if not children: + return + counts = self._status_counts(children) + response.progress = SelectionProgress( + total=len(children), + pending=counts["pending"], + running=counts["running"], + completed=counts["completed"], + failed=counts["failed"], + cancelled=counts["cancelled"], + ) + response.candidate_progress = [ + CandidateProgress( + candidate_id=child.candidate_id, + ordinal=child.ordinal, + model_type=child.model_type, + status=child.status, # type: ignore[arg-type] + error=child.error_message, + started_at=child.started_at, + completed_at=child.completed_at, + duration_ms=child.duration_ms, + ) + for child in children + ] + # ------------------------------------------------------------------------- # Read / re-run helpers # ------------------------------------------------------------------------- async def get_selection(self, db: AsyncSession, selection_id: str) -> ModelSelectionRunResponse: - """Return a persisted selection run by id (404 when missing).""" + """Return a persisted selection run by id (404 when missing). + + Attaches live async progress (Slice B) when the run has child rows; a + legacy synchronous ``/run`` row has none and reads as before. + """ row = await self._load(db, selection_id) - return self._response(row, self._load_ranking(row)) + response = self._response(row, self._load_ranking(row)) + await self._attach_progress(db, selection_id, response) + return response async def get_ranking(self, db: AsyncSession, selection_id: str) -> RankingResult: """Return just the ranking block for a selection run.""" @@ -578,5 +1059,6 @@ def _response( business_summary=row.business_summary, error_message=row.error_message, created_at=row.created_at, + started_at=row.started_at, completed_at=row.completed_at, ) diff --git a/app/features/model_selection/tests/test_async_routes.py b/app/features/model_selection/tests/test_async_routes.py new file mode 100644 index 00000000..6d0f3532 --- /dev/null +++ b/app/features/model_selection/tests/test_async_routes.py @@ -0,0 +1,180 @@ +"""Unit route tests for the Slice B async endpoints (service mocked). + +Mirrors ``test_routes.py``: ``get_db`` overridden with a mock session, the +service patched at the class level. Asserts the 202 shape + headers and the +DELETE 404/409 mapping over the HTTP boundary. +""" + +from __future__ import annotations + +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from datetime import UTC, datetime +from typing import Any +from unittest.mock import AsyncMock + +import pytest +from httpx import ASGITransport, AsyncClient + +from app.core.database import get_db +from app.core.exceptions import ConflictError, NotFoundError +from app.features.model_selection.schemas import ( + CandidateProgress, + ModelSelectionRunResponse, + SelectionProgress, + SelectionWindow, + SubmitRunResponse, +) +from app.features.model_selection.service import ModelSelectionService +from app.main import app + + +@asynccontextmanager +async def _client() -> AsyncGenerator[AsyncClient, None]: + async def override_get_db() -> AsyncGenerator[AsyncMock, None]: + yield AsyncMock() + + app.dependency_overrides[get_db] = override_get_db + try: + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as ac: + yield ac + finally: + app.dependency_overrides.pop(get_db, None) + + +def _assert_problem_detail(body: dict[str, Any], expected_status: int) -> None: + for key in ("type", "title", "status", "detail"): + assert key in body, f"missing RFC 7807 field: {key}" + assert body["status"] == expected_status + + +def _valid_run_body(**overrides: Any) -> dict[str, Any]: + body: dict[str, Any] = { + "store_id": 5, + "product_id": 8, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 14, + }, + "candidate_models": [ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + ], + } + body.update(overrides) + return body + + +def _running_submit_response(selection_id: str = "sel_async") -> SubmitRunResponse: + return SubmitRunResponse( + selection_id=selection_id, + store_id=5, + product_id=8, + status="running", + selection_window=SelectionWindow(start_date="2026-01-01", end_date="2026-05-31"), # type: ignore[arg-type] + forecast_horizon=14, + ranking_metric="wape", + availability=None, + ranking=[], + winner=None, + recommendation_confidence=None, + confidence_reasons=[], + chart_data=None, + final_model=None, + forecast=None, + business_summary=None, + error_message=None, + created_at=datetime.now(UTC), + started_at=datetime.now(UTC), + completed_at=None, + progress=SelectionProgress( + total=2, pending=2, running=0, completed=0, failed=0, cancelled=0 + ), + candidate_progress=[ + CandidateProgress(candidate_id="c0", ordinal=0, model_type="naive", status="pending"), + CandidateProgress( + candidate_id="c1", ordinal=1, model_type="seasonal_naive", status="pending" + ), + ], + monitor_url=f"/model-selection/{selection_id}", + cancel_url=f"/model-selection/{selection_id}", + ) + + +async def test_submit_runs_returns_202_with_headers_and_running_body( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + ModelSelectionService, + "submit_run", + AsyncMock(return_value=_running_submit_response()), + ) + async with _client() as ac: + response = await ac.post("/model-selection/runs", json=_valid_run_body()) + assert response.status_code == 202 + body = response.json() + assert body["status"] == "running" + assert body["monitor_url"] == "/model-selection/sel_async" + assert body["cancel_url"] == "/model-selection/sel_async" + assert body["progress"]["pending"] == 2 + assert len(body["candidate_progress"]) == 2 + # LRO status-monitor headers. + assert response.headers.get("location") == "/model-selection/sel_async" + assert response.headers.get("retry-after") == "2" + + +async def test_submit_runs_validation_error_returns_problem_json() -> None: + """A horizon mismatch is rejected by the request validator (422).""" + bad = _valid_run_body(forecast_horizon=14) + bad["split_config"] = { + "strategy": "expanding", + "n_splits": 5, + "min_train_size": 30, + "gap": 0, + "horizon": 7, + } + async with _client() as ac: + response = await ac.post("/model-selection/runs", json=bad) + assert response.status_code == 422 + _assert_problem_detail(response.json(), 422) + + +async def test_delete_run_404_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + ModelSelectionService, + "cancel_run", + AsyncMock(side_effect=NotFoundError(message="Selection run missing not found")), + ) + async with _client() as ac: + response = await ac.delete("/model-selection/missing") + assert response.status_code == 404 + _assert_problem_detail(response.json(), 404) + + +async def test_delete_run_409_when_terminal(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + ModelSelectionService, + "cancel_run", + AsyncMock(side_effect=ConflictError(message="Selection run already terminal: completed")), + ) + async with _client() as ac: + response = await ac.delete("/model-selection/sel_done") + assert response.status_code == 409 + _assert_problem_detail(response.json(), 409) + + +async def test_delete_run_returns_settled_200(monkeypatch: pytest.MonkeyPatch) -> None: + settled = _running_submit_response("sel_cancel") + settled_resp = ModelSelectionRunResponse.model_validate( + {**settled.model_dump(), "status": "cancelled"} + ) + monkeypatch.setattr(ModelSelectionService, "cancel_run", AsyncMock(return_value=settled_resp)) + async with _client() as ac: + response = await ac.delete("/model-selection/sel_cancel") + assert response.status_code == 200 + assert response.json()["status"] == "cancelled" diff --git a/app/features/model_selection/tests/test_models.py b/app/features/model_selection/tests/test_models.py index 4f69d9e9..7264aec9 100644 --- a/app/features/model_selection/tests/test_models.py +++ b/app/features/model_selection/tests/test_models.py @@ -9,7 +9,13 @@ from datetime import date -from app.features.model_selection.models import ModelSelectionRun, ModelSelectionStatus +from app.features.model_selection.models import ( + TERMINAL_SELECTION_STATES, + CandidateStatus, + ModelSelectionCandidate, + ModelSelectionRun, + ModelSelectionStatus, +) def test_status_enum_values() -> None: @@ -19,9 +25,26 @@ def test_status_enum_values() -> None: "completed", "partial", "failed", + "cancelled", } +def test_candidate_status_enum_values() -> None: + assert {s.value for s in CandidateStatus} == { + "pending", + "running", + "completed", + "failed", + "cancelled", + } + + +def test_terminal_selection_states() -> None: + assert TERMINAL_SELECTION_STATES == {"completed", "partial", "failed", "cancelled"} + assert "running" not in TERMINAL_SELECTION_STATES + assert "pending" not in TERMINAL_SELECTION_STATES + + def test_model_selection_run_construction_defaults() -> None: row = ModelSelectionRun( selection_id="abc123", @@ -39,3 +62,19 @@ def test_model_selection_run_construction_defaults() -> None: assert row.status == "running" assert row.winner_model_type is None assert row.final_model_path is None + + +def test_model_selection_candidate_construction() -> None: + cand = ModelSelectionCandidate( + candidate_id="cand1", + selection_id="abc123", + ordinal=0, + model_type="naive", + params={}, + status=CandidateStatus.PENDING.value, + ) + assert cand.candidate_id == "cand1" + assert cand.selection_id == "abc123" + assert cand.status == "pending" + assert cand.result is None + assert cand.error_message is None diff --git a/app/features/model_selection/tests/test_routes_integration.py b/app/features/model_selection/tests/test_routes_integration.py index a6440f71..b74b98c2 100644 --- a/app/features/model_selection/tests/test_routes_integration.py +++ b/app/features/model_selection/tests/test_routes_integration.py @@ -6,6 +6,7 @@ from __future__ import annotations +import asyncio from typing import Any import pytest @@ -15,6 +16,23 @@ pytestmark = pytest.mark.integration +_TERMINAL = {"completed", "partial", "failed", "cancelled"} + + +async def _poll_until_terminal( + client: AsyncClient, selection_id: str, *, attempts: int = 60, delay: float = 0.5 +) -> dict[str, Any]: + """Poll GET /{id} until the run reaches a terminal status (or attempts run out).""" + body: dict[str, Any] = {} + for _ in range(attempts): + response = await client.get(f"/model-selection/{selection_id}") + assert response.status_code == 200 + body = response.json() + if body["status"] in _TERMINAL: + return body + await asyncio.sleep(delay) + raise AssertionError(f"run {selection_id} did not settle: last status {body.get('status')}") + def _run_body( pair: dict[str, Any], extra_candidates: list[dict[str, Any]] | None = None @@ -136,3 +154,119 @@ async def test_get_missing_selection_returns_404(client: AsyncClient) -> None: response = await client.get("/model-selection/does-not-exist") assert response.status_code == 404 assert response.json()["status"] == 404 + + +# --------------------------------------------------------------------- Slice B + + +async def test_async_runs_submits_202_and_polls_to_terminal_with_winner( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + """POST /runs returns 202 running immediately; polling settles with a winner.""" + submit = await client.post("/model-selection/runs", json=_run_body(ready_pair)) + assert submit.status_code == 202 + body = submit.json() + assert body["status"] == "running" + selection_id = body["selection_id"] + assert body["monitor_url"] == f"/model-selection/{selection_id}" + assert body["cancel_url"] == f"/model-selection/{selection_id}" + assert body["progress"]["total"] == 3 + assert submit.headers.get("location") == f"/model-selection/{selection_id}" + assert submit.headers.get("retry-after") == "2" + + terminal = await _poll_until_terminal(client, selection_id) + assert terminal["status"] in {"completed", "partial"} + assert terminal["winner"] is not None + assert terminal["chart_data"] is not None + assert terminal["ranking"] + assert terminal["progress"]["total"] == 3 + # Terminal GET output is byte-compatible with the sync /run shape. + assert terminal["recommendation_confidence"] in {"high", "medium", "low"} + + +async def test_async_runs_failed_candidate_stays_visible( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + """An invalid candidate surfaces as a failed/excluded entry, not a 500.""" + body = _run_body( + ready_pair, + extra_candidates=[{"model_type": "moving_average", "params": {"window_size": 0}}], + ) + submit = await client.post("/model-selection/runs", json=body) + assert submit.status_code == 202 + selection_id = submit.json()["selection_id"] + + terminal = await _poll_until_terminal(client, selection_id) + assert terminal["status"] == "partial" + excluded = [e for e in terminal["ranking"] if not e["included"]] + assert excluded + assert terminal["winner"] is not None + # The failed candidate is visible in candidate_progress too. + failed = [c for c in terminal["candidate_progress"] if c["status"] == "failed"] + assert failed + + +async def test_cancel_leaves_no_candidate_running( + client: AsyncClient, ready_pair: dict[str, Any], db_session: AsyncSession +) -> None: + """DELETE cooperatively cancels + drains — no candidate left 'running'.""" + submit = await client.post("/model-selection/runs", json=_run_body(ready_pair)) + assert submit.status_code == 202 + selection_id = submit.json()["selection_id"] + + # Cancel almost immediately. Fast baseline fits are uncancellable mid-call + # and may settle the whole run before the DELETE arrives — an HONEST race: + # 200 = the cancel fired and drained; + # 409 = the run had already settled (so nothing was left to cancel). + # Either way the LOAD-BEARING invariant below must hold. + cancel = await client.delete(f"/model-selection/{selection_id}") + assert cancel.status_code in {200, 409} + + # Ensure the run is terminal before asserting the invariant (covers the 200 + # path where the worker just settled, and the 409 already-settled path). + await _poll_until_terminal(client, selection_id) + + # The load-bearing invariant: after the drain, no candidate row is 'running'. + rows = await db_session.execute( + text( + "SELECT count(*) FROM model_selection_candidate " + "WHERE selection_id = :sid AND status = 'running'" + ), + {"sid": selection_id}, + ) + assert rows.scalar() == 0 + + +async def test_cancel_terminal_run_returns_409( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + """Cancelling an already-settled run returns 409.""" + submit = await client.post("/model-selection/runs", json=_run_body(ready_pair)) + selection_id = submit.json()["selection_id"] + await _poll_until_terminal(client, selection_id) + + cancel = await client.delete(f"/model-selection/{selection_id}") + assert cancel.status_code == 409 + assert cancel.json()["status"] == 409 + + +async def test_candidate_table_has_named_indexes(db_session: AsyncSession) -> None: + rows = await db_session.execute( + text("SELECT indexname FROM pg_indexes WHERE tablename = 'model_selection_candidate'") + ) + names = {row[0] for row in rows} + assert "ix_model_selection_candidate_candidate_id" in names + assert "ix_model_selection_candidate_selection_status" in names + + +async def test_legacy_sync_run_has_no_progress_children( + client: AsyncClient, ready_pair: dict[str, Any] +) -> None: + """A legacy synchronous /run row carries no async progress.""" + run = await client.post("/model-selection/run", json=_run_body(ready_pair)) + assert run.status_code == 200 + selection_id = run.json()["selection_id"] + fetched = await client.get(f"/model-selection/{selection_id}") + body = fetched.json() + assert body["progress"] is None + assert body["candidate_progress"] == [] diff --git a/app/features/model_selection/tests/test_runner.py b/app/features/model_selection/tests/test_runner.py new file mode 100644 index 00000000..9421d303 --- /dev/null +++ b/app/features/model_selection/tests/test_runner.py @@ -0,0 +1,238 @@ +"""Unit tests for the Slice B bounded-concurrency candidate runner. + +The runner's DB helpers are monkeypatched to awaitable no-ops so the asyncio +orchestration is exercised without docker-compose. The DB invariants (no +candidate left ``running`` after a cancel drain) are covered in the integration +suite. Mirrors ``app/features/batch/tests/test_runner.py``. +""" + +from __future__ import annotations + +import asyncio +from contextlib import asynccontextmanager +from typing import Any, cast +from unittest.mock import AsyncMock + +import pytest + +from app.features.model_selection import runner + + +@pytest.fixture(autouse=True) +def _clear_registry() -> Any: + runner._ACTIVE_SELECTIONS.clear() + yield + runner._ACTIVE_SELECTIONS.clear() + + +@pytest.fixture +def patch_db_helpers(monkeypatch: pytest.MonkeyPatch) -> dict[str, list[Any]]: + """Replace runner DB helpers with awaitable no-ops + a call tracker.""" + calls: dict[str, list[Any]] = { + "mark_cancelled_skipped": [], + "mark_cancelled_running": [], + "mark_failed_unexpected": [], + } + + async def _mark_cancelled_skipped(_session: Any, candidate_id: str) -> None: + calls["mark_cancelled_skipped"].append(candidate_id) + + async def _mark_cancelled_running(_session: Any, candidate_id: str) -> None: + calls["mark_cancelled_running"].append(candidate_id) + + async def _mark_failed_unexpected(_session: Any, candidate_id: str) -> None: + calls["mark_failed_unexpected"].append(candidate_id) + + monkeypatch.setattr(runner, "_mark_cancelled_skipped", _mark_cancelled_skipped) + monkeypatch.setattr(runner, "_mark_cancelled_running", _mark_cancelled_running) + monkeypatch.setattr(runner, "_mark_failed_unexpected", _mark_failed_unexpected) + return calls + + +def _fake_session_maker() -> Any: + @asynccontextmanager + async def _ctx() -> Any: + yield AsyncMock() + + def _maker() -> Any: + return _ctx() + + return cast(Any, _maker) + + +# ---------------------------------------------------------------- semaphore + + +async def test_runner_semaphore_caps_concurrency( + patch_db_helpers: dict[str, list[Any]], +) -> None: + """5 candidates with max_parallel=2 — observed concurrent peak == 2.""" + in_flight = 0 + peak = 0 + + async def child(_cid: str) -> None: + nonlocal in_flight, peak + in_flight += 1 + peak = max(peak, in_flight) + try: + await asyncio.sleep(0.02) + finally: + in_flight -= 1 + + effective = await runner.run_selection_candidates( + selection_id="s_sem", + candidate_ids=[f"c{i}" for i in range(5)], + max_parallel=2, + global_max_parallel=10, + session_maker=_fake_session_maker(), + execute_candidate=child, + ) + runner.mark_completed("s_sem") + assert effective == 2 + assert peak == 2, f"observed peak {peak}, expected exactly 2" + + +async def test_runner_global_cap_clamps_max_parallel( + patch_db_helpers: dict[str, list[Any]], +) -> None: + """max_parallel=32 clamped by global_max_parallel=1 → sequential (peak 1).""" + in_flight = 0 + peak = 0 + + async def child(_cid: str) -> None: + nonlocal in_flight, peak + in_flight += 1 + peak = max(peak, in_flight) + try: + await asyncio.sleep(0.01) + finally: + in_flight -= 1 + + effective = await runner.run_selection_candidates( + selection_id="s_seq", + candidate_ids=[f"c{i}" for i in range(4)], + max_parallel=32, + global_max_parallel=1, + session_maker=_fake_session_maker(), + execute_candidate=child, + ) + runner.mark_completed("s_seq") + assert effective == 1 + assert peak == 1, f"global cap of 1 must serialize; observed peak {peak}" + + +# ---------------------------------------------------- per-child failure isolation + + +async def test_runner_child_failure_does_not_abort_siblings( + patch_db_helpers: dict[str, list[Any]], +) -> None: + completed: list[str] = [] + + async def child(cid: str) -> None: + if cid == "c2": + raise RuntimeError("synthetic failure") + await asyncio.sleep(0.01) + completed.append(cid) + + await runner.run_selection_candidates( + selection_id="s_fail", + candidate_ids=[f"c{i}" for i in range(5)], + max_parallel=5, + global_max_parallel=10, + session_maker=_fake_session_maker(), + execute_candidate=child, + ) + runner.mark_completed("s_fail") + assert sorted(completed) == ["c0", "c1", "c3", "c4"] + assert patch_db_helpers["mark_failed_unexpected"] == ["c2"] + + +# --------------------------------------------------------------- cancel paths + + +async def test_runner_cancel_before_start_skips( + patch_db_helpers: dict[str, list[Any]], +) -> None: + """max_parallel=1, 3 candidates. Cancel after c0 starts → c1/c2 skip.""" + started: list[str] = [] + + async def child(cid: str) -> None: + started.append(cid) + await asyncio.sleep(0.5) + + task = asyncio.create_task( + runner.run_selection_candidates( + selection_id="s_pending", + candidate_ids=["c0", "c1", "c2"], + max_parallel=1, + global_max_parallel=10, + session_maker=_fake_session_maker(), + execute_candidate=child, + ) + ) + await asyncio.sleep(0.05) + fired = runner.cancel_selection("s_pending") + await task + runner.mark_completed("s_pending") + + assert fired is True + assert patch_db_helpers["mark_cancelled_running"] == ["c0"] + assert set(patch_db_helpers["mark_cancelled_skipped"]) == {"c1", "c2"} + assert started == ["c0"] + + +async def test_runner_cancel_mid_flight_marks_cancelled( + patch_db_helpers: dict[str, list[Any]], +) -> None: + cancelled_in_child: list[str] = [] + + async def child(cid: str) -> None: + try: + await asyncio.sleep(1.0) + except asyncio.CancelledError: + cancelled_in_child.append(cid) + raise + + task = asyncio.create_task( + runner.run_selection_candidates( + selection_id="s_running", + candidate_ids=["c0"], + max_parallel=1, + global_max_parallel=10, + session_maker=_fake_session_maker(), + execute_candidate=child, + ) + ) + await asyncio.sleep(0.05) + runner.cancel_selection("s_running") + await task + runner.mark_completed("s_running") + assert cancelled_in_child == ["c0"] + assert patch_db_helpers["mark_cancelled_running"] == ["c0"] + + +# ------------------------------------------------------------- registry hygiene + + +async def test_mark_completed_unblocks_await_drain() -> None: + runner._ACTIVE_SELECTIONS["sx"] = runner.CancelHandle() + drain_task = asyncio.create_task(runner.await_drain("sx", timeout_seconds=1.0)) + await asyncio.sleep(0.01) + runner.mark_completed("sx") + drained = await drain_task + assert drained is True + assert "sx" not in runner._ACTIVE_SELECTIONS + + +async def test_cancel_selection_returns_false_when_unregistered() -> None: + assert runner.cancel_selection("nope") is False + + +async def test_await_drain_returns_true_when_unregistered() -> None: + assert await runner.await_drain("nope", timeout_seconds=0.0) is True + + +async def test_await_drain_times_out_on_stuck_handle() -> None: + runner._ACTIVE_SELECTIONS["s_stuck"] = runner.CancelHandle() + assert await runner.await_drain("s_stuck", timeout_seconds=0.05) is False diff --git a/app/features/model_selection/tests/test_schemas.py b/app/features/model_selection/tests/test_schemas.py index 3d34c510..87fb093d 100644 --- a/app/features/model_selection/tests/test_schemas.py +++ b/app/features/model_selection/tests/test_schemas.py @@ -2,12 +2,18 @@ from __future__ import annotations +from datetime import UTC, datetime + import pytest from pydantic import ValidationError from app.features.model_selection.schemas import ( + CandidateProgress, ModelSelectionRunRequest, + ModelSelectionRunResponse, + SelectionProgress, SelectionWindow, + SubmitRunResponse, ) @@ -79,3 +85,79 @@ def test_candidate_models_min_length_enforced() -> None: """At least one candidate is required.""" with pytest.raises(ValidationError): ModelSelectionRunRequest.model_validate(_base_request_dict(candidate_models=[])) + + +# --------------------------------------------------------------------- Slice B + + +def _base_response_dict(**overrides: object) -> dict[str, object]: + payload: dict[str, object] = { + "selection_id": "sel1", + "store_id": 1, + "product_id": 2, + "status": "running", + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "ranking_metric": "wape", + "availability": None, + "ranking": [], + "winner": None, + "recommendation_confidence": None, + "confidence_reasons": [], + "chart_data": None, + "final_model": None, + "forecast": None, + "business_summary": None, + "error_message": None, + "created_at": datetime(2026, 6, 1, 12, 0, 0, tzinfo=UTC), + "completed_at": None, + } + payload.update(overrides) + return payload + + +def test_response_progress_fields_default_safely() -> None: + """Legacy sync-run rows validate without progress fields (additive defaults).""" + resp = ModelSelectionRunResponse.model_validate(_base_response_dict()) + assert resp.started_at is None + assert resp.progress is None + assert resp.candidate_progress == [] + + +def test_status_literal_accepts_cancelled() -> None: + """The 'cancelled' status (Slice B) is accepted by the response literal.""" + resp = ModelSelectionRunResponse.model_validate(_base_response_dict(status="cancelled")) + assert resp.status == "cancelled" + + +def test_selection_and_candidate_progress_models() -> None: + progress = SelectionProgress(total=5, pending=3, running=1, completed=1, failed=0, cancelled=0) + assert progress.total == 5 + cand = CandidateProgress(candidate_id="c1", ordinal=0, model_type="naive", status="running") + assert cand.status == "running" + assert cand.error is None + + +def test_submit_run_response_carries_monitor_and_cancel_urls() -> None: + submit = SubmitRunResponse.model_validate( + _base_response_dict( + monitor_url="/model-selection/sel1", + cancel_url="/model-selection/sel1", + progress={ + "total": 1, + "pending": 1, + "running": 0, + "completed": 0, + "failed": 0, + "cancelled": 0, + }, + candidate_progress=[ + {"candidate_id": "c1", "ordinal": 0, "model_type": "naive", "status": "pending"} + ], + ) + ) + assert submit.monitor_url == "/model-selection/sel1" + assert submit.cancel_url == "/model-selection/sel1" + assert submit.progress is not None + assert submit.progress.pending == 1 + assert submit.candidate_progress[0].model_type == "naive" diff --git a/app/features/model_selection/tests/test_service.py b/app/features/model_selection/tests/test_service.py index 7d3da5f1..2fd3002e 100644 --- a/app/features/model_selection/tests/test_service.py +++ b/app/features/model_selection/tests/test_service.py @@ -5,7 +5,7 @@ from datetime import date, timedelta from types import SimpleNamespace from typing import Any -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, MagicMock from uuid import uuid4 import pytest @@ -218,5 +218,150 @@ async def test_response_uses_recommendation_confidence_key( async def test_get_selection_missing_raises_not_found() -> None: db = AsyncMock() db.scalar = AsyncMock(return_value=None) + db.execute = AsyncMock() with pytest.raises(NotFoundError): await ModelSelectionService().get_selection(db, uuid4().hex) + + +# ----------------------------------------------------------------------------- +# Slice B — async submit / settle / cancel (worker mocked or DB-free units) +# ----------------------------------------------------------------------------- + +from datetime import UTC, datetime # noqa: E402 + +from app.core.exceptions import ConflictError # noqa: E402 +from app.features.model_selection import runner as _runner # noqa: E402 +from app.features.model_selection.models import ( # noqa: E402 + ModelSelectionCandidate, + ModelSelectionRun, + ModelSelectionStatus, +) + + +def _submit_mock_db() -> AsyncMock: + """Mock ``AsyncSession`` whose ``refresh`` stamps ``created_at`` on the run.""" + db = AsyncMock() + added: list[Any] = [] + + def _add(obj: Any) -> None: + added.append(obj) + + async def _refresh(obj: Any) -> None: + if isinstance(obj, ModelSelectionRun) and obj.created_at is None: + obj.created_at = datetime.now(UTC) + + db.add = MagicMock(side_effect=_add) + db.commit = AsyncMock() + db.refresh = AsyncMock(side_effect=_refresh) + db._added = added # expose for assertions + return db + + +async def test_submit_run_inserts_running_parent_and_pending_candidates( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_availability(monkeypatch, "ready") + # Stub the detached worker so create_task schedules a harmless no-op. + monkeypatch.setattr(ModelSelectionService, "_run_in_background", AsyncMock()) + + request = _request( + candidate_models=[ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + ] + ) + db = _submit_mock_db() + response = await ModelSelectionService().submit_run(db, request) + + assert response.status == "running" + assert response.monitor_url == f"/model-selection/{response.selection_id}" + assert response.cancel_url == f"/model-selection/{response.selection_id}" + assert response.progress is not None + assert response.progress.total == 2 + assert response.progress.pending == 2 + assert len(response.candidate_progress) == 2 + assert {c.status for c in response.candidate_progress} == {"pending"} + + parents = [o for o in db._added if isinstance(o, ModelSelectionRun)] + children = [o for o in db._added if isinstance(o, ModelSelectionCandidate)] + assert len(parents) == 1 + assert parents[0].status == ModelSelectionStatus.RUNNING.value + assert parents[0].started_at is not None + assert parents[0].total_candidates == 2 + assert len(children) == 2 + assert {c.status for c in children} == {"pending"} + assert [c.ordinal for c in children] == [0, 1] + + +async def test_submit_run_unusable_availability_raises_400( + monkeypatch: pytest.MonkeyPatch, +) -> None: + _patch_availability(monkeypatch, "unusable") + monkeypatch.setattr(ModelSelectionService, "_run_in_background", AsyncMock()) + db = _submit_mock_db() + with pytest.raises(BadRequestError): + await ModelSelectionService().submit_run(db, _request()) + # The parent was persisted as failed; no children were inserted. + parents = [o for o in db._added if isinstance(o, ModelSelectionRun)] + children = [o for o in db._added if isinstance(o, ModelSelectionCandidate)] + assert parents[0].status == ModelSelectionStatus.FAILED.value + assert children == [] + + +def test_terminal_status_rule() -> None: + svc = ModelSelectionService() + f = svc._terminal_status + assert f({"completed": 3, "failed": 0, "cancelled": 0}) is ModelSelectionStatus.COMPLETED + assert f({"completed": 0, "failed": 3, "cancelled": 0}) is ModelSelectionStatus.FAILED + assert f({"completed": 0, "failed": 0, "cancelled": 3}) is ModelSelectionStatus.CANCELLED + assert f({"completed": 2, "failed": 1, "cancelled": 0}) is ModelSelectionStatus.PARTIAL + assert f({"completed": 1, "failed": 0, "cancelled": 1}) is ModelSelectionStatus.PARTIAL + + +async def test_cancel_run_404_when_missing() -> None: + db = AsyncMock() + db.scalar = AsyncMock(return_value=None) + with pytest.raises(NotFoundError): + await ModelSelectionService().cancel_run(db, uuid4().hex) + + +async def test_cancel_run_409_when_terminal() -> None: + row = ModelSelectionRun( + selection_id="sel_terminal", + status=ModelSelectionStatus.COMPLETED.value, + store_id=1, + product_id=1, + start_date=date(2026, 1, 1), + end_date=date(2026, 5, 31), + forecast_horizon=14, + ranking_metric="wape", + candidate_models=[], + policy_snapshot={}, + ) + db = AsyncMock() + db.scalar = AsyncMock(return_value=row) + with pytest.raises(ConflictError): + await ModelSelectionService().cancel_run(db, "sel_terminal") + + +async def test_cancel_run_409_when_settle_races_cancel( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """If the worker settled (no handle) between load and cancel → 409.""" + row = ModelSelectionRun( + selection_id="sel_race", + status=ModelSelectionStatus.RUNNING.value, + store_id=1, + product_id=1, + start_date=date(2026, 1, 1), + end_date=date(2026, 5, 31), + forecast_horizon=14, + ranking_metric="wape", + candidate_models=[], + policy_snapshot={}, + ) + db = AsyncMock() + db.scalar = AsyncMock(return_value=row) + monkeypatch.setattr(_runner, "cancel_selection", lambda _sid: False) + with pytest.raises(ConflictError): + await ModelSelectionService().cancel_run(db, "sel_race") diff --git a/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx b/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx new file mode 100644 index 00000000..c5d53231 --- /dev/null +++ b/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx @@ -0,0 +1,33 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { CancelRunDialog } from './cancel-run-dialog' + +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } +}) + +afterEach(cleanup) + +describe('CancelRunDialog', () => { + it('confirms cancellation via the AlertDialog', () => { + const onConfirm = vi.fn() + render() + fireEvent.click(screen.getByTestId('cancel-run-trigger')) + fireEvent.click(screen.getByTestId('cancel-run-confirm')) + expect(onConfirm).toHaveBeenCalledTimes(1) + }) + + it('disables the trigger while cancelling', () => { + render( {}} isCancelling />) + const trigger = screen.getByTestId('cancel-run-trigger') as HTMLButtonElement + expect(trigger.disabled).toBe(true) + }) +}) diff --git a/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx b/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx new file mode 100644 index 00000000..d85c08ca --- /dev/null +++ b/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx @@ -0,0 +1,62 @@ +import { Loader2, X } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from '@/components/ui/alert-dialog' +import { Button } from '@/components/ui/button' + +interface CancelRunDialogProps { + onConfirm: () => void + isCancelling?: boolean + disabled?: boolean +} + +/** + * Cancel-run confirmation (Slice B). Mirrors the batch cancel dialog and reuses + * the honest pending-skip / running-yield copy. + */ +export function CancelRunDialog({ onConfirm, isCancelling, disabled }: CancelRunDialogProps) { + return ( + + + + + + + Cancel this comparison? + + Candidates that haven't started will be skipped. A candidate + already mid-fit stops at the next safe point — sklearn / LightGBM + fits are uncancellable mid-call, so an in-flight fit may finish + first. Results from candidates that already completed are kept. + + + + Keep running + + Cancel run + + + + + ) +} diff --git a/frontend/src/components/champion-selector/results/comparison-charts.test.tsx b/frontend/src/components/champion-selector/results/comparison-charts.test.tsx new file mode 100644 index 00000000..d1ea60bf --- /dev/null +++ b/frontend/src/components/champion-selector/results/comparison-charts.test.tsx @@ -0,0 +1,36 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ComparisonCharts } from './comparison-charts' +import type { ModelSelectionChartData } from '@/types/api' + +// Recharts' ResponsiveContainer needs ResizeObserver in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(cleanup) + +const chartData: ModelSelectionChartData = { + wape_by_model: { regression: 10, naive: 14 }, + bias_by_model: { regression: -0.2, naive: 0.5 }, + fold_stability: { regression: [10, 11] }, + winner_actual_vs_predicted: [ + { dates: ['2026-01-01', '2026-01-02'], actuals: [10, 12], predictions: [9.5, 12.5] }, + ], +} + +describe('ComparisonCharts', () => { + it('renders WAPE + bias bars from chart_data', () => { + render() + expect(screen.getByTestId('comparison-charts')).toBeTruthy() + expect(screen.getByTestId('metric-bars-wape-by-model')).toBeTruthy() + expect(screen.getByTestId('metric-bars-bias-by-model')).toBeTruthy() + // Winner is starred in the bar list. + expect(screen.getAllByText('★ regression').length).toBeGreaterThan(0) + }) +}) diff --git a/frontend/src/components/champion-selector/results/comparison-charts.tsx b/frontend/src/components/champion-selector/results/comparison-charts.tsx new file mode 100644 index 00000000..5e192a22 --- /dev/null +++ b/frontend/src/components/champion-selector/results/comparison-charts.tsx @@ -0,0 +1,105 @@ +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { MultiSeriesChart } from '@/components/charts/multi-series-chart' +import { BIAS_EXPLANATION } from '@/components/champion-selector/copy' +import type { ModelSelectionChartData } from '@/types/api' + +interface ComparisonChartsProps { + chartData: ModelSelectionChartData + winnerModelType?: string +} + +/** One labelled horizontal bar (CSS — deterministic, no chart lib needed). */ +function MetricBars({ + title, + byModel, + winnerModelType, + signed = false, +}: { + title: string + byModel: Record + winnerModelType?: string + signed?: boolean +}) { + const entries = Object.entries(byModel) + const max = Math.max(1, ...entries.map(([, v]) => Math.abs(v))) + return ( +
+

{title}

+ {entries.map(([model, value]) => ( +
+ + {model === winnerModelType ? `★ ${model}` : model} + +
+
+
+ {value.toFixed(2)} +
+ ))} +
+ ) +} + +/** + * Comparison charts (Slice B): WAPE-by-model + bias-by-model bars, and the + * winner's actual-vs-predicted overlay. Reads the backend `chart_data` payload. + */ +export function ComparisonCharts({ chartData, winnerModelType }: ComparisonChartsProps) { + // Build actual-vs-predicted rows for the winner from the fold chart points. + const avpRows: Record[] = [] + for (const fold of chartData.winner_actual_vs_predicted as Array<{ + dates?: string[] + actuals?: number[] + predictions?: number[] + }>) { + const dates = fold.dates ?? [] + const actuals = fold.actuals ?? [] + const predictions = fold.predictions ?? [] + for (let i = 0; i < dates.length; i++) { + avpRows.push({ + date: dates[i] ?? String(i), + actual: actuals[i] ?? 0, + predicted: predictions[i] ?? 0, + }) + } + } + + return ( + + + Comparison + {BIAS_EXPLANATION} + + +
+ + +
+ {avpRows.length > 0 && ( + + )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/constants.ts b/frontend/src/components/champion-selector/results/constants.ts new file mode 100644 index 00000000..41aa3bb2 --- /dev/null +++ b/frontend/src/components/champion-selector/results/constants.ts @@ -0,0 +1,17 @@ +import type { ModelSelectionStatus } from '@/types/api' + +/** + * Terminal selection-run statuses (Slice B). Polling stops once a run reaches + * one of these. Kept in a `.ts` module so the + * `react-refresh/only-export-components` lint rule never trips. + */ +export const TERMINAL_SELECTION_STATES: ReadonlySet = new Set([ + 'completed', + 'partial', + 'failed', + 'cancelled', +]) + +export function isTerminalSelectionStatus(status: ModelSelectionStatus): boolean { + return TERMINAL_SELECTION_STATES.has(status) +} diff --git a/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx b/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx new file mode 100644 index 00000000..83d90d1b --- /dev/null +++ b/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx @@ -0,0 +1,43 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ModelDetailDrawer } from './model-detail-drawer' +import type { ModelRankEntry } from '@/types/api' + +// Radix Dialog (Sheet) needs these layout APIs in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } +}) + +afterEach(cleanup) + +const entry: ModelRankEntry = { + rank: 1, + model_type: 'regression', + params: { max_depth: 6 }, + included: true, + exclusion_reason: null, + metrics: { wape: 10, smape: 8, mae: 4, rmse: 5, bias: 0.1 }, +} + +describe('ModelDetailDrawer', () => { + it('renders the candidate metrics + params when open', () => { + render( {}} />) + const drawer = screen.getByTestId('model-detail-drawer') + expect(drawer.textContent).toContain('regression') + expect(drawer.textContent).toContain('WAPE') + expect(drawer.textContent).toContain('max_depth') + }) + + it('renders nothing meaningful when closed', () => { + render( {}} />) + expect(screen.queryByTestId('model-detail-drawer')).toBeNull() + }) +}) diff --git a/frontend/src/components/champion-selector/results/model-detail-drawer.tsx b/frontend/src/components/champion-selector/results/model-detail-drawer.tsx new file mode 100644 index 00000000..f7ac0148 --- /dev/null +++ b/frontend/src/components/champion-selector/results/model-detail-drawer.tsx @@ -0,0 +1,79 @@ +import { + Sheet, + SheetContent, + SheetDescription, + SheetHeader, + SheetTitle, +} from '@/components/ui/sheet' +import { Badge } from '@/components/ui/badge' +import type { ModelRankEntry } from '@/types/api' + +interface ModelDetailDrawerProps { + entry: ModelRankEntry | null + open: boolean + onOpenChange: (open: boolean) => void +} + +function fmt(value: number | undefined): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return '—' + return value.toFixed(3) +} + +const METRIC_KEYS: { key: string; label: string }[] = [ + { key: 'wape', label: 'WAPE' }, + { key: 'smape', label: 'sMAPE' }, + { key: 'mae', label: 'MAE' }, + { key: 'rmse', label: 'RMSE' }, + { key: 'bias', label: 'Bias' }, +] + +/** + * Per-model detail drawer (Slice B). Opens from a ranking-row click; shows one + * candidate's metrics, params, and exclusion reason (read-only). + */ +export function ModelDetailDrawer({ entry, open, onOpenChange }: ModelDetailDrawerProps) { + return ( + + + {entry && ( + <> + + + {entry.model_type} + {!entry.included && ( + {entry.exclusion_reason ?? 'excluded'} + )} + + + {entry.rank !== null ? `Ranked #${entry.rank}` : 'Not ranked'} + + +
+
+

Metrics

+ + + {METRIC_KEYS.map((m) => ( + + + + + ))} + +
{m.label} + {fmt(entry.metrics?.[m.key])} +
+
+
+

Parameters

+
+                  {JSON.stringify(entry.params, null, 2)}
+                
+
+
+ + )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/ranking-table.test.tsx b/frontend/src/components/champion-selector/results/ranking-table.test.tsx new file mode 100644 index 00000000..9943ff6b --- /dev/null +++ b/frontend/src/components/champion-selector/results/ranking-table.test.tsx @@ -0,0 +1,50 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { RankingTable } from './ranking-table' +import type { ModelRankEntry } from '@/types/api' + +afterEach(cleanup) + +const ranking: ModelRankEntry[] = [ + { + rank: 1, + model_type: 'regression', + params: {}, + included: true, + exclusion_reason: null, + metrics: { wape: 10, smape: 8, mae: 4, bias: 0.1 }, + }, + { + rank: 2, + model_type: 'naive', + params: {}, + included: true, + exclusion_reason: null, + metrics: { wape: 14, smape: 12, mae: 6, bias: 0.5 }, + }, + { + rank: null, + model_type: 'moving_average', + params: { window_size: 0 }, + included: false, + exclusion_reason: 'failed', + metrics: null, + }, +] + +describe('RankingTable', () => { + it('renders a row per entry; excluded rows show their reason', () => { + render( {}} />) + expect(screen.getByTestId('ranking-row-regression')).toBeTruthy() + expect(screen.getByTestId('ranking-row-naive')).toBeTruthy() + const excluded = screen.getByTestId('ranking-row-moving_average') + expect(excluded.textContent).toContain('failed') + }) + + it('calls onSelectModel with the clicked entry', () => { + const onSelect = vi.fn() + render() + fireEvent.click(screen.getByTestId('ranking-row-naive')) + expect(onSelect).toHaveBeenCalledWith(ranking[1]) + }) +}) diff --git a/frontend/src/components/champion-selector/results/ranking-table.tsx b/frontend/src/components/champion-selector/results/ranking-table.tsx new file mode 100644 index 00000000..a8c0515a --- /dev/null +++ b/frontend/src/components/champion-selector/results/ranking-table.tsx @@ -0,0 +1,90 @@ +import { Trophy } from 'lucide-react' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { cn } from '@/lib/utils' +import { RANKING_TIE_BREAK } from '@/components/champion-selector/copy' +import type { ModelRankEntry } from '@/types/api' + +interface RankingTableProps { + ranking: ModelRankEntry[] + onSelectModel: (entry: ModelRankEntry) => void +} + +function fmt(value: number | undefined): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return '—' + return value.toFixed(2) +} + +/** + * Candidate ranking table (Slice B). Winner row highlighted; excluded + * (failed/cancelled/filtered) rows show their reason and stay visible. Clicking + * a row opens the model-detail drawer. + */ +export function RankingTable({ ranking, onSelectModel }: RankingTableProps) { + return ( + + + Ranking + {RANKING_TIE_BREAK} + + + + + + + + + + + + + + + {ranking.map((entry) => ( + onSelectModel(entry)} + className={cn( + 'cursor-pointer border-t hover:bg-accent/50', + entry.rank === 1 && 'bg-primary/5 font-medium', + !entry.included && 'text-muted-foreground', + )} + > + + + + + + + + ))} + +
RankModelWAPEsMAPEMAEBias
+ {entry.rank === 1 ? ( + + 1 + + ) : ( + (entry.rank ?? '—') + )} + + {entry.model_type} + {!entry.included && ( + + {entry.exclusion_reason ?? 'excluded'} + + )} + + {fmt(entry.metrics?.['wape'])} + + {fmt(entry.metrics?.['smape'])} + + {fmt(entry.metrics?.['mae'])} + + {fmt(entry.metrics?.['bias'])} +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx b/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx new file mode 100644 index 00000000..13c4ef54 --- /dev/null +++ b/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx @@ -0,0 +1,57 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { RunProgressPanel } from './run-progress-panel' +import type { CandidateProgress, SelectionProgress } from '@/types/api' + +afterEach(cleanup) + +const progress: SelectionProgress = { + total: 3, + pending: 1, + running: 1, + completed: 1, + failed: 0, + cancelled: 0, +} + +function cand(model_type: string, status: CandidateProgress['status']): CandidateProgress { + return { + candidate_id: `id-${model_type}`, + ordinal: 0, + model_type, + status, + error: status === 'failed' ? 'boom' : null, + started_at: null, + completed_at: null, + duration_ms: status === 'completed' ? 1500 : null, + } +} + +describe('RunProgressPanel', () => { + it('renders status badge, counts, and a per-candidate row', () => { + render( + , + ) + expect(screen.getByTestId('run-status-badge').textContent).toContain('running') + expect(screen.getByText('Total')).toBeTruthy() + expect(screen.getByTestId('candidate-row-naive')).toBeTruthy() + expect(screen.getByTestId('candidate-row-regression')).toBeTruthy() + }) + + it('keeps a failed candidate visible with its error', () => { + render( + , + ) + const row = screen.getByTestId('candidate-row-xgboost') + expect(row.textContent).toContain('failed') + expect(row.textContent).toContain('boom') + }) +}) diff --git a/frontend/src/components/champion-selector/results/run-progress-panel.tsx b/frontend/src/components/champion-selector/results/run-progress-panel.tsx new file mode 100644 index 00000000..4c5699a3 --- /dev/null +++ b/frontend/src/components/champion-selector/results/run-progress-panel.tsx @@ -0,0 +1,87 @@ +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { StatusBadge } from '@/components/common/status-badge' +import { getStatusVariant } from '@/lib/status-utils' +import type { + CandidateProgress, + ModelSelectionStatus, + SelectionProgress, +} from '@/types/api' + +interface RunProgressPanelProps { + status: ModelSelectionStatus + progress: SelectionProgress | null + candidates: CandidateProgress[] +} + +function Count({ label, value }: { label: string; value: number }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +/** + * Live async-run progress (Slice B): the run status, per-status counts, and a + * per-candidate table. Failed/cancelled candidates stay visible. + */ +export function RunProgressPanel({ status, progress, candidates }: RunProgressPanelProps) { + return ( + + +
+ Comparison progress + + {status} + +
+
+ + {progress && ( +
+ + + + + + +
+ )} + {candidates.length > 0 && ( + + + + + + + + + + {candidates.map((c) => ( + + + + + + ))} + +
ModelStatusDuration
{c.model_type} + + {c.status} + + {c.error && ( + {c.error} + )} + + {c.duration_ms === null ? '—' : `${(c.duration_ms / 1000).toFixed(1)}s`} +
+ )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/winner-card.test.tsx b/frontend/src/components/champion-selector/results/winner-card.test.tsx new file mode 100644 index 00000000..54054253 --- /dev/null +++ b/frontend/src/components/champion-selector/results/winner-card.test.tsx @@ -0,0 +1,40 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { WinnerCard } from './winner-card' +import type { WinnerSummary } from '@/types/api' + +afterEach(cleanup) + +const winner: WinnerSummary = { + model_type: 'regression', + params: {}, + metrics: { wape: 10, smape: 8, mae: 4, bias: 0.1 }, + rank: 1, +} + +describe('WinnerCard', () => { + it('renders the winner, confidence, metrics, and bias copy', () => { + render() + expect(screen.getByTestId('winner-card').textContent).toContain('regression') + expect(screen.getByTestId('winner-confidence-badge').textContent).toContain('high') + expect(screen.getByText('clear lead')).toBeTruthy() + expect(screen.getByText(/Positive bias means the model under-forecasts/)).toBeTruthy() + }) + + it('renders a no-winner state when winner is null', () => { + render() + expect(screen.getByText('No champion selected')).toBeTruthy() + }) + + it('surfaces the deterministic business_summary headline read-only', () => { + render( + , + ) + expect(screen.getByText('regression wins by 28% WAPE')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/results/winner-card.tsx b/frontend/src/components/champion-selector/results/winner-card.tsx new file mode 100644 index 00000000..c5fa0b8a --- /dev/null +++ b/frontend/src/components/champion-selector/results/winner-card.tsx @@ -0,0 +1,100 @@ +import { Trophy } from 'lucide-react' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { StatusBadge } from '@/components/common/status-badge' +import { BIAS_EXPLANATION } from '@/components/champion-selector/copy' +import type { ConfidenceLevel, WinnerSummary } from '@/types/api' + +interface WinnerCardProps { + winner: WinnerSummary | null + confidence: ConfidenceLevel | null + reasons: string[] + /** The deterministic backend `business_summary` (read-only; Slice C extends). */ + businessSummary?: Record | null +} + +const CONFIDENCE_VARIANT: Record = { + high: 'success', + medium: 'info', + low: 'warning', +} + +function Metric({ label, value }: { label: string; value: number | undefined }) { + return ( +
+

{label}

+

+ {typeof value === 'number' && Number.isFinite(value) ? value.toFixed(2) : '—'} +

+
+ ) +} + +/** + * Winner summary card (Slice B). Null-safe — renders a "no winner" state for a + * failed/cancelled run. Renders the deterministic `business_summary` headline + * READ-ONLY (Slice C adds the decision-layer interpretation on top). + */ +export function WinnerCard({ winner, confidence, reasons, businessSummary }: WinnerCardProps) { + if (winner === null) { + return ( + + + No champion selected + + No candidate produced a valid backtest. Review the failed candidates + below or adjust the selection. + + + + ) + } + + const headline = + typeof businessSummary?.['headline'] === 'string' + ? (businessSummary['headline'] as string) + : null + + return ( + + +
+ + + {winner.model_type} + + {confidence && ( + + {confidence} confidence + + )} +
+ {headline && {headline}} +
+ +
+ + + + +
+ {reasons.length > 0 && ( +
+ {reasons.map((reason, i) => ( +
+ + why + + {reason} +
+ ))} +
+ )} +

{BIAS_EXPLANATION}

+
+
+ ) +} diff --git a/frontend/src/hooks/use-model-selection.test.ts b/frontend/src/hooks/use-model-selection.test.ts index a1187321..4209a072 100644 --- a/frontend/src/hooks/use-model-selection.test.ts +++ b/frontend/src/hooks/use-model-selection.test.ts @@ -5,12 +5,23 @@ * availability `enabled` gating. No real backend is exercised. */ import { QueryClient, QueryClientProvider } from '@tanstack/react-query' -import { renderHook, waitFor } from '@testing-library/react' +import { act, renderHook, waitFor } from '@testing-library/react' import { afterEach, describe, expect, it, vi } from 'vitest' import { createElement, type ReactNode } from 'react' -import { useModelCatalog, usePairAvailability } from './use-model-selection' -import type { ModelCatalogResponse, PairAvailability } from '@/types/api' +import { + useCancelSelectionRun, + useModelCatalog, + usePairAvailability, + useSelectionRun, + useSubmitSelectionRun, +} from './use-model-selection' +import type { + ModelCatalogResponse, + ModelSelectionRunRequest, + PairAvailability, + SubmitRunResponse, +} from '@/types/api' function makeWrapper(client: QueryClient) { return function Wrapper({ children }: { children: ReactNode }) { @@ -124,3 +135,139 @@ describe('usePairAvailability', () => { expect(fetchMock).not.toHaveBeenCalled() }) }) + +// --------------------------------------------------------------------- Slice B + +const SUBMIT_RESPONSE: SubmitRunResponse = { + selection_id: 'sel_b', + store_id: 7, + product_id: 12, + status: 'running', + selection_window: { start_date: '2026-01-01', end_date: '2026-05-31' }, + forecast_horizon: 14, + ranking_metric: 'wape', + availability: null, + ranking: [], + winner: null, + recommendation_confidence: null, + confidence_reasons: [], + chart_data: null, + final_model: null, + forecast: null, + business_summary: null, + error_message: null, + created_at: '2026-06-01T12:00:00Z', + started_at: '2026-06-01T12:00:00Z', + completed_at: null, + progress: { total: 1, pending: 1, running: 0, completed: 0, failed: 0, cancelled: 0 }, + candidate_progress: [ + { + candidate_id: 'c0', + ordinal: 0, + model_type: 'naive', + status: 'pending', + error: null, + started_at: null, + completed_at: null, + duration_ms: null, + }, + ], + monitor_url: '/model-selection/sel_b', + cancel_url: '/model-selection/sel_b', +} + +const RUN_REQUEST: ModelSelectionRunRequest = { + store_id: 7, + product_id: 12, + selection_window: { start_date: '2026-01-01', end_date: '2026-05-31' }, + forecast_horizon: 14, + ranking_metric: 'wape', + split_config: { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, + }, + candidate_models: [{ model_type: 'naive', params: {} }], + feature_frame_version: 1, + feature_groups: null, + auto_train_winner: false, + auto_predict: false, +} + +describe('useSubmitSelectionRun', () => { + it('POSTs to /model-selection/runs and seeds the poll cache', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(SUBMIT_RESPONSE), { + status: 202, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const client = makeClient() + const { result } = renderHook(() => useSubmitSelectionRun(), { + wrapper: makeWrapper(client), + }) + await act(async () => { + result.current.mutate(RUN_REQUEST) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/runs') + expect((call[1] as RequestInit).method).toBe('POST') + // The poll cache is seeded so useSelectionRun starts warm. + expect( + client.getQueryData(['model-selection', 'run', 'sel_b']), + ).toEqual(SUBMIT_RESPONSE) + }) +}) + +describe('useSelectionRun', () => { + it('GETs /model-selection/{id} when given a selection id', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ ...SUBMIT_RESPONSE, status: 'completed' }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useSelectionRun('sel_b'), { + wrapper: makeWrapper(makeClient()), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + expect(String(fetchMock.mock.calls[0]![0])).toContain('/model-selection/sel_b') + expect(result.current.data?.status).toBe('completed') + }) + + it('does NOT fetch without a selection id (enabled gating)', async () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + renderHook(() => useSelectionRun(null), { wrapper: makeWrapper(makeClient()) }) + await new Promise((resolve) => setTimeout(resolve, 20)) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) + +describe('useCancelSelectionRun', () => { + it('DELETEs /model-selection/{id}', async () => { + const cancelled = { ...SUBMIT_RESPONSE, status: 'cancelled' as const } + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(cancelled), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useCancelSelectionRun(), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate('sel_b') + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_b') + expect((call[1] as RequestInit).method).toBe('DELETE') + }) +}) diff --git a/frontend/src/hooks/use-model-selection.ts b/frontend/src/hooks/use-model-selection.ts index 726f8072..2cf7286f 100644 --- a/frontend/src/hooks/use-model-selection.ts +++ b/frontend/src/hooks/use-model-selection.ts @@ -1,12 +1,19 @@ -import { useQuery } from '@tanstack/react-query' +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' import { api } from '@/lib/api' -import type { ModelCatalogResponse, PairAvailability } from '@/types/api' +import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' +import type { + ModelCatalogResponse, + ModelSelectionRunRequest, + ModelSelectionRunResponse, + PairAvailability, + SubmitRunResponse, +} from '@/types/api' /** - * Model-selection query hooks (Champion Selector, Slice A). + * Model-selection query hooks (Champion Selector). * - * Read-only: the catalog and pair-availability GETs. The run mutation, - * progress, and results hooks are owned by Slice B; train/predict by Slice C. + * Slice A: catalog + availability GETs. Slice B: async submit / poll / cancel. + * Train/predict/promotion are owned by Slice C. */ /** @@ -55,3 +62,59 @@ export function usePairAvailability({ enabled: enabled && !!storeId && storeId > 0 && !!productId && productId > 0, }) } + +/** + * Submit an async selection run (Slice B). `POST /model-selection/runs` returns + * 202 immediately; we seed the poll cache so `useSelectionRun` starts warm. + */ +export function useSubmitSelectionRun() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (request: ModelSelectionRunRequest) => + api('/model-selection/runs', { + method: 'POST', + body: request, + }), + onSuccess: (data) => { + queryClient.setQueryData(['model-selection', 'run', data.selection_id], data) + }, + }) +} + +/** + * Poll one selection run. Refetches every 2s while pending/running, then stops + * once the run reaches a terminal status. Gated on a real selection id. + */ +export function useSelectionRun(selectionId: string | null, enabled = true) { + return useQuery({ + queryKey: ['model-selection', 'run', selectionId], + queryFn: () => + api(`/model-selection/${selectionId}`), + enabled: enabled && !!selectionId, + refetchInterval: (query) => { + const status = query.state.data?.status + return status && isTerminalSelectionStatus(status) ? false : 2000 + }, + }) +} + +/** + * Cancel an in-flight selection run (Slice B). `DELETE /model-selection/{id}` — + * 200 settled / 404 / 409 terminal / 504 drain timeout. Seeds + invalidates the + * poll query on success. + */ +export function useCancelSelectionRun() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (selectionId: string) => + api(`/model-selection/${selectionId}`, { + method: 'DELETE', + }), + onSuccess: (data) => { + queryClient.setQueryData(['model-selection', 'run', data.selection_id], data) + void queryClient.invalidateQueries({ + queryKey: ['model-selection', 'run', data.selection_id], + }) + }, + }) +} diff --git a/frontend/src/pages/visualize/champion.test.tsx b/frontend/src/pages/visualize/champion.test.tsx index 123d4862..2ae297ca 100644 --- a/frontend/src/pages/visualize/champion.test.tsx +++ b/frontend/src/pages/visualize/champion.test.tsx @@ -69,6 +69,10 @@ vi.mock('@/hooks/use-model-selection', () => ({ isLoading: false, isError: false, }), + // Slice B — inert async hooks (no run in progress for the shell test). + useSubmitSelectionRun: () => ({ mutate: vi.fn(), isPending: false }), + useCancelSelectionRun: () => ({ mutate: vi.fn(), isPending: false }), + useSelectionRun: () => ({ data: undefined, isLoading: false, isError: false }), })) import ChampionSelectorPage from './champion' diff --git a/frontend/src/pages/visualize/champion.tsx b/frontend/src/pages/visualize/champion.tsx index d3e3106f..6157148e 100644 --- a/frontend/src/pages/visualize/champion.tsx +++ b/frontend/src/pages/visualize/champion.tsx @@ -1,10 +1,16 @@ import { useMemo, useState } from 'react' import { format } from 'date-fns' import { DateRange } from 'react-day-picker' -import { Trophy } from 'lucide-react' +import { Loader2, Trophy } from 'lucide-react' import { useStores } from '@/hooks/use-stores' import { useProducts } from '@/hooks/use-products' -import { useModelCatalog, usePairAvailability } from '@/hooks/use-model-selection' +import { + useCancelSelectionRun, + useModelCatalog, + usePairAvailability, + useSelectionRun, + useSubmitSelectionRun, +} from '@/hooks/use-model-selection' import { DateRangePicker } from '@/components/common/date-range-picker' import { ErrorDisplay } from '@/components/common/error-display' import { AvailabilityPanel } from '@/components/champion-selector/availability-panel' @@ -12,12 +18,20 @@ import { BacktestSettingsForm } from '@/components/champion-selector/backtest-se import { splitConfigErrors } from '@/components/champion-selector/split-config' import { CandidateModelPicker } from '@/components/champion-selector/candidate-model-picker' import { SearchableEntitySelect } from '@/components/champion-selector/searchable-entity-select' -import { RUN_COMPARISON_PENDING } from '@/components/champion-selector/copy' import { assembleRunRequest } from '@/components/champion-selector/run-request' +import { RunProgressPanel } from '@/components/champion-selector/results/run-progress-panel' +import { RankingTable } from '@/components/champion-selector/results/ranking-table' +import { WinnerCard } from '@/components/champion-selector/results/winner-card' +import { ComparisonCharts } from '@/components/champion-selector/results/comparison-charts' +import { ModelDetailDrawer } from '@/components/champion-selector/results/model-detail-drawer' +import { CancelRunDialog } from '@/components/champion-selector/results/cancel-run-dialog' +import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Input } from '@/components/ui/input' +import { getErrorMessage } from '@/lib/api' import type { + ModelRankEntry, ModelSelectionRunRequest, SplitConfig, } from '@/types/api' @@ -54,6 +68,12 @@ export default function ChampionSelectorPage() { // catalog's default candidate set (derived below, no effect needed). const [editedModels, setEditedModels] = useState(null) + // Slice B — the in-flight/terminal async run + the detail-drawer selection. + const [selectionId, setSelectionId] = useState(null) + const [submitError, setSubmitError] = useState(null) + const [drawerEntry, setDrawerEntry] = useState(null) + const [drawerOpen, setDrawerOpen] = useState(false) + // /dimensions/{stores,products} both cap page_size at 100 (client-filtered). const storesQuery = useStores({ page: 1, pageSize: 100 }) const productsQuery = useProducts({ page: 1, pageSize: 100 }) @@ -107,9 +127,8 @@ export default function ChampionSelectorPage() { selectedModels.length >= 1 && splitConfigErrors(effectiveSplit).length === 0 - // The assembled request — typed but NOT sent in Slice A (the CTA is disabled). - // `auto_train_winner`/`auto_predict` are pinned false by `assembleRunRequest`. - // Built defensively so it is valid the moment Slice B wires the mutation. + // The assembled request — `auto_train_winner`/`auto_predict` pinned false by + // `assembleRunRequest` (no-ops in the async path; Slice C owns train/predict). const runRequest: ModelSelectionRunRequest | null = formReady && dateRange?.from && dateRange?.to ? assembleRunRequest({ @@ -124,6 +143,28 @@ export default function ChampionSelectorPage() { }) : null + // Slice B — async submit → poll → cancel. + const submitRun = useSubmitSelectionRun() + const cancelRun = useCancelSelectionRun() + const runQuery = useSelectionRun(selectionId) + const run = runQuery.data + const isRunning = !!run && !isTerminalSelectionStatus(run.status) + const isTerminal = !!run && isTerminalSelectionStatus(run.status) + + function handleRunComparison() { + if (!runRequest) return + setSubmitError(null) + submitRun.mutate(runRequest, { + onSuccess: (data) => setSelectionId(data.selection_id), + onError: (err) => setSubmitError(getErrorMessage(err)), + }) + } + + function handleSelectModel(entry: ModelRankEntry) { + setDrawerEntry(entry) + setDrawerOpen(true) + } + return (
@@ -260,34 +301,75 @@ export default function ChampionSelectorPage() { - {/* Run CTA (disabled until Slice B) */} + {/* Run CTA (Slice B — submit the async comparison) */}
{formReady ? `Ready to compare ${selectedModels.length} model${ selectedModels.length === 1 ? '' : 's' - }. ${RUN_COMPARISON_PENDING}` + }.` : 'Pick a store, product, time period, horizon and at least one model to continue.'} + {submitError && ( + {submitError} + )} +
+
+ {isRunning && ( + selectionId && cancelRun.mutate(selectionId)} + isCancelling={cancelRun.isPending} + /> + )} +
-
- {/* Dev-only assurance that a valid request is assembled (not sent). */} - {runRequest && ( -

- {JSON.stringify(runRequest)} -

+ {/* Live progress + results (Slice B) */} + {run && ( + + )} + + {isTerminal && run && ( + <> + + {run.chart_data && ( + + )} + {run.ranking.length > 0 && ( + + )} + + )}
) diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index d6e0584f..63ebe3f4 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -1205,6 +1205,13 @@ export type ModelSelectionStatus = | 'completed' | 'partial' | 'failed' + | 'cancelled' // Slice B — async cancel terminal state +export type CandidateStatus = + | 'pending' + | 'running' + | 'completed' + | 'failed' + | 'cancelled' export type RankingMetric = 'wape' | 'smape' | 'mae' | 'bias' export type AvailabilityStatus = 'ready' | 'limited' | 'unusable' // `ConfidenceLevel` ('high' | 'medium' | 'low') is reused from the @@ -1325,6 +1332,27 @@ export interface ModelSelectionForecastSummary { horizon: number } +// Slice B — live async progress on a selection run. +export interface CandidateProgress { + candidate_id: string + ordinal: number + model_type: string + status: CandidateStatus + error: string | null + started_at: string | null + completed_at: string | null + duration_ms: number | null +} + +export interface SelectionProgress { + total: number + pending: number + running: number + completed: number + failed: number + cancelled: number +} + export interface ModelSelectionRunResponse { selection_id: string store_id: number @@ -1344,5 +1372,15 @@ export interface ModelSelectionRunResponse { business_summary: Record | null error_message: string | null created_at: string // ISO datetime + // Slice B — additive async fields (null/empty on a legacy sync `/run` row). + started_at?: string | null completed_at: string | null + progress?: SelectionProgress | null + candidate_progress?: CandidateProgress[] +} + +// Slice B — 202 response from `POST /model-selection/runs` (additive superset). +export interface SubmitRunResponse extends ModelSelectionRunResponse { + monitor_url: string + cancel_url: string } From 45b7a7043232c57ce7af37ac9055b6a19377b98a Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Mon, 1 Jun 2026 09:50:16 +0200 Subject: [PATCH 28/30] docs: add forecast champion selector slice A/B/C PRPs (#360) --- ...n-selector-slice-a-selection-capability.md | 716 +++++++++++ ...lector-slice-b-async-comparison-results.md | 1010 +++++++++++++++ ...-c-forecast-decision-operationalization.md | 1107 +++++++++++++++++ 3 files changed, 2833 insertions(+) create mode 100644 PRPs/forecast-champion-selector-slice-a-selection-capability.md create mode 100644 PRPs/forecast-champion-selector-slice-b-async-comparison-results.md create mode 100644 PRPs/forecast-champion-selector-slice-c-forecast-decision-operationalization.md diff --git a/PRPs/forecast-champion-selector-slice-a-selection-capability.md b/PRPs/forecast-champion-selector-slice-a-selection-capability.md new file mode 100644 index 00000000..f43c0371 --- /dev/null +++ b/PRPs/forecast-champion-selector-slice-a-selection-capability.md @@ -0,0 +1,716 @@ +name: "Forecast Champion Selector — Slice A: Selection & Capability Foundation" +description: | + First usable frontend/backend surface for the Forecast Champion Selector. Adds + one backend-owned model-capability catalog endpoint to the existing + `model_selection` slice, then builds the React selection shell — searchable + store/product selectors, pair validation, live data-availability assessment, + a simple/advanced backtest-settings form, and a candidate-model picker — under + a new `/visualize/champion` page. Slice A deliberately STOPS before running the + comparison: it does NOT call `POST /model-selection/run`, render ranking/chart + results, train, predict, or promote. Those are Slice B (async run + results) + and Slice C (train/predict/business summary/override/promotion). + +**Created:** 2026-06-01 · **Slice:** A of 3 (A → B → C) +**Current repo base observed:** `dev` @ `6c3f8d4` (Merge PR #354 — model_selection backend merged) +**Backend foundation (source of truth):** `PRPs/forecast-champion-selector-backend.md` (issue #353, MERGED) + +the live slice `app/features/model_selection/` (schemas/service/routes/ranking/explanations verified 2026-06-01). +**Working-tree caveat:** `docker-compose.lan.yml` is an untracked local dogfood override; do NOT commit it. +**Tracking issue:** create before implementation, suggested title `feat(api,ui): forecast champion selector slice A — selection & capability`. +**Suggested branch:** `feat/champion-selector-slice-a` (off `dev`, per `.claude/rules/branch-naming.md`). +**Commit scope:** `api` (new catalog endpoint + slice schemas/service/routes) and `ui` (frontend page/components/hooks/types). +No migration in Slice A — no schema change. Every commit references the tracking issue. + +--- + +## Goal + +**Feature Goal:** Ship the first interactive Forecast Champion Selector surface — a `/visualize/champion` +React page that lets a user choose a **Store → Product → Time Period → Forecast Horizon → Model Types → +Backtest Settings**, see whether the chosen pair has enough history to model (live availability assessment), +and pick candidate models from a **backend-owned** capability catalog — backed by exactly one new backend +endpoint (`GET /model-selection/models`). The page is genuinely usable for *configuration + availability +triage* even though the comparison **run** itself lands in Slice B. + +**Deliverable:** +- **Backend:** `GET /model-selection/models` → `ModelCatalogResponse` (capability catalog), implemented via a new + pure module `app/features/model_selection/capabilities.py`, response schemas added to the slice's + `schemas.py`, a thin `ModelSelectionService.get_model_catalog()` delegate, and the route wired in the slice's + existing `routes.py`. No migration, no new mutation surface, no agent tool. +- **Frontend:** a lazy-loaded `pages/visualize/champion.tsx` page (route `ROUTES.VISUALIZE.CHAMPION`, + nav entry under **Visualize**), a `components/champion-selector/` component family (searchable store/product + selects, availability panel, backtest-settings form, candidate-model picker), a `hooks/use-model-selection.ts` + query-hook module (catalog + availability reads), and a `types/api.ts` "Model Selection" section that declares + the FULL workflow contract (so Slices B/C inherit, not redefine, the types). + +**Success Definition:** +1. `GET /model-selection/models` returns HTTP 200 with a non-empty `models` array — each entry carrying + `model_type`, `label`, `family ∈ {baseline,tree,additive}`, `feature_aware`, `requires_extra`, + `default_params`, `supports_auto_predict`, `description` — plus a `default_candidate_model_types` list. +2. The `/visualize/champion` page renders: a searchable store select, a searchable product select (each with a + secondary line — store `code · name`, product `sku · category`), a date-range picker, a horizon input, a + candidate-model picker fed by `GET /model-selection/models`, and a simple/advanced backtest-settings form. +3. Selecting a valid `(store, product, horizon)` triggers `GET /model-selection/availability` and renders a + `ready | limited | unusable` status block with coverage/observed-days/zero-sale/promotion/avg-demand and the + recommended split config; an unusable/empty pair shows a clear not-enough-data state. +4. The "Run comparison" primary CTA is present but **disabled** with explanatory copy (Slice B turns it on). +5. All Slice A validation gates pass (backend Level-1..4 + frontend `tsc`/`lint`/`test`). + +## Why + +- Business users want to ask "which model should I use for this store/product?" through a UI, not curl. Slice A + gives them the **configuration + triage** half of that workflow immediately, and a stable shell Slice B/C bolt + onto with minimal churn. +- The capability catalog must be **backend-owned** (coordination contract): the model union, families, opt-in + extras, and feature-aware flags live in Python (`app/features/forecasting/`), and shipping them over an API + prevents the TypeScript `MODEL_FAMILY_MAP`/`MODEL_TYPE_LABELS` from drifting out of sync as new models land. +- Declaring the full TS contract now (consumed read-only in A) means Slices B and C add behavior, not type + definitions — cleaner slice boundaries, fewer merge conflicts. +- Preserves the single-host architecture: one new read-only GET, no queue, no new dependency, no cloud SDK. + +## What + +### New backend endpoint (added to the existing slice router `APIRouter(prefix="/model-selection")`) + +```http +GET /model-selection/models +``` + +Response `ModelCatalogResponse`: + +```json +{ + "models": [ + { + "model_type": "naive", + "label": "Naive", + "family": "baseline", + "feature_aware": false, + "requires_extra": false, + "default_params": {}, + "supports_auto_predict": true, + "description": "Repeats the last observed value." + }, + { + "model_type": "seasonal_naive", + "label": "Seasonal Naive", + "family": "baseline", + "feature_aware": false, + "requires_extra": false, + "default_params": { "season_length": 7 }, + "supports_auto_predict": true, + "description": "Repeats the value from one season ago." + } + // ... one entry per forecasting ModelConfig member (11 total) + ], + "default_candidate_model_types": ["naive", "seasonal_naive", "moving_average", "regression", "prophet_like"] +} +``` + +### LOCKED Slice-A decisions (remove every "choose-one" ambiguity) + +1. **Exactly one new backend endpoint:** `GET /model-selection/models`. It is **declared in `routes.py` + BEFORE the `GET /{selection_id}` route** (literal path must precede the path-param route, mirroring the + existing `/availability` route at `routes.py:41` which sits before `/{selection_id}` at `:94`). Status 200. + No request body, no query params. +2. **Catalog is backend-owned and derived, not hand-duplicated.** `family` comes from the forecasting + authority `app.features.forecasting.feature_metadata.model_family_for(model_type)` (imported LAZILY inside + the builder, per the slice's cross-slice discipline) mapped to the lowercase literal + (`ModelFamily.BASELINE → "baseline"`, etc.). `model_type` iteration order + `default_params` + `label` + + `description` come from a slice-local ordered map in `capabilities.py` whose keys are asserted (in a test) to + exactly equal the `ModelType` Literal in `app/features/model_selection/schemas.py`. +3. **`requires_extra`** = `model_type in {"lightgbm", "xgboost"}` (opt-in extras that may `ImportError`). + **`feature_aware`** = `model_type in {"regression", "prophet_like", "lightgbm", "xgboost", "random_forest"}` + (the set the forecasting `predict()` rejects — see Known Gotchas to verify against `forecasting/service.py`). + **`supports_auto_predict`** = `not feature_aware` (feature-aware winners cannot auto-predict — backend + `predict()` rejects them; this flag lets Slice C grey-out the auto-predict toggle). +4. **`default_candidate_model_types`** = `["naive", "seasonal_naive", "moving_average", "regression", "prophet_like"]` + — the exact default five from the backend PRP's `POST /run` example, so the UI pre-selects the same set the + contract documents. +5. **No `model_selection_run` write in Slice A.** The page consumes `GET /models` and `GET /availability` only. + It assembles a typed `ModelSelectionRunRequest` in component state and exposes it through a **disabled** + "Run comparison" CTA; Slice B wires the `POST /run` mutation + results. Slice A MUST NOT call `POST /run`, + `/{id}`, `/{id}/ranking`, `/{id}/train-winner`, or `/{id}/predict`. +6. **Searchable selects use existing primitives only** (no new npm dependency). Stores/products are fetched at + `pageSize: 100` (the dimensions cap) and filtered **client-side** inside a `Popover` + text `Input` + + scrollable button list. (If the catalog ever exceeds 100, swap to the server-side `search` param the + `useStores`/`useProducts` hooks already support — out of scope here.) +7. **Bias-explanation copy (locked, reused by B/C):** wherever bias is explained in help text/tooltips, use + exactly — *"Positive bias means the model under-forecasts (risk of stockouts); negative bias means it + over-forecasts (risk of overstock)."* Export it as a shared constant so B/C reuse the same wording. +8. **WAPE is the default ranking metric**; the advanced form's ranking-metric select offers `wape` (default), + `smape`, `mae`, `bias`, with help text stating the tie-break chain *WAPE → sMAPE → |bias| → MAE* and the + bias copy from #7. + +### Success Criteria + +- [ ] `GET /model-selection/models` returns 200 with `models` (11 entries) + `default_candidate_model_types`. +- [ ] `capabilities.build_model_catalog()` is pure (no DB/IO) and its `model_type` set equals the slice + `ModelType` Literal (asserted by a test). +- [ ] `/model-selection/models` is matched correctly (NOT captured by `/{selection_id}`) — route-order test green. +- [ ] `/visualize/champion` route + Visualize nav entry render the page; lazy-loaded like its siblings. +- [ ] Searchable store + product selects filter client-side and show the secondary descriptor line. +- [ ] Pair validation: the form's primary CTA stays disabled until a store, product, valid date window, and + horizon are all chosen; the date window + horizon respect backend bounds. +- [ ] Availability auto-fetches for a valid pair and renders `ready/limited/unusable` + metrics + recommended + split config; an empty/unusable pair renders a not-enough-data `EmptyState`. +- [ ] The candidate-model picker is fed by `GET /model-selection/models`; opt-in-extra models are visibly + flagged; the default five are pre-selected. +- [ ] The simple/advanced settings form mirrors `SplitConfig` bounds and keeps `split_config.horizon === + forecast_horizon` (matching the backend request validator). +- [ ] The "Run comparison" CTA is present but disabled with copy indicating it arrives next. +- [ ] No `POST /model-selection/run` (or any mutation) is called; no chart/ranking results UI; no train/predict/ + promotion UI; no agent tool; no migration; no new npm dependency. +- [ ] `app/core/tests/test_strict_mode_policy.py` stays green (no new strict request model with date fields). +- [ ] All backend Level-1..4 gates + frontend `pnpm tsc --noEmit && pnpm lint && pnpm test --run` pass. + +## All Needed Context + +### Documentation & References + +```yaml +# Slice / contract source of truth +- file: PRPs/forecast-champion-selector-backend.md + why: The merged backend foundation. LOCKED decisions #1-#7, the full /run + /{id} contract, the + availability semantics (ready/limited/unusable thresholds), and the default-five candidate list. + Slice A consumes this contract read-only; do not re-derive ranking/confidence in TS. +- file: PRPs/ai_docs/forecast-champion-selector-backend-research.md + why: External-lib + runtime facts (FastAPI APIRouter, Pydantic strict mode, sklearn TimeSeriesSplit). +- file: PRPs/templates/prp_base.md + why: Base PRP template structure. NOTE — the referenced "PRPs/prp-readme.md.md" does NOT exist + (`find PRPs -iname '*readme*'` empty on 2026-06-01); the backend PRP records the same finding. + +# Live backend slice to read (the contract the UI consumes) +- file: app/features/model_selection/schemas.py + why: ModelType Literal (:34, the 11 model_types), RankingMetric (:48), AvailabilityStatus (:51), + ConfidenceLevel (:50), PairAvailabilityResponse (:239), ModelSelectionRunRequest (:118), + ModelSelectionRunResponse (:267), ModelRankEntry (:195), WinnerSummary (:216), ChartData (:225). + ADD the new ModelCatalogResponse + CandidateModelInfo here (plain BaseModel — outputs need no strict). +- file: app/features/model_selection/routes.py + why: APIRouter(prefix="/model-selection") (:38); the literal `/availability` (:41) precedes `/{selection_id}` + (:94) — MIRROR that ordering for the new `/models` route. Error mapping: ValueError→BadRequestError, + SQLAlchemyError→DatabaseError. +- file: app/features/model_selection/service.py + why: Stateless service pattern; lazy cross-slice imports inside methods (:215-219). ADD + get_model_catalog() delegating to capabilities.build_model_catalog() (no DB needed; keep signature + db-free or accept db and ignore — prefer db-free since the catalog is static). +- file: app/features/model_selection/ranking.py + why: PURE-module precedent (no DB/IO, unit-tested directly). MIRROR this style for capabilities.py. +- file: app/features/model_selection/explanations.py + why: Second pure-module precedent (deterministic text). Same import/style conventions. +- file: app/features/model_selection/tests/test_routes.py + why: Route-test pattern (ASGITransport + AsyncClient + dependency_overrides[get_db]); ADD a /models 200 + test + a route-ordering test (GET /model-selection/models is NOT treated as selection_id="models"). +- file: app/features/model_selection/tests/test_ranking.py + why: Pure-unit test pattern to MIRROR for tests/test_capabilities.py. + +# Backend authority for model family / union (catalog source) +- file: app/features/forecasting/feature_metadata.py + why: model_family_for(model_type) -> ModelFamily (:57) and _MODEL_FAMILY_MAP (:42). The catalog `family` + field derives from here. ModelFamily enum is BASELINE/TREE/ADDITIVE (lowercase .value). +- file: app/features/forecasting/schemas.py + why: ModelConfig union (the 11 flat members + their default params). Use to VERIFY default_params per model + (see Known Gotchas verification one-liner). ModelFamily enum lives here too (imported by feature_metadata). +- file: app/features/backtesting/schemas.py + why: SplitConfig (:24) — strategy Literal["expanding","sliding"] (def "expanding"), n_splits 2-20 (def 5), + min_train_size >=7 (def 30), gap 0-30 (def 0), horizon 1-90 (def 14), field_validator horizon>gap (:65). + The TS SplitConfig type + advanced form bounds mirror this exactly. + +# Frontend examples to MIRROR (verified 2026-06-01) +- file: frontend/src/pages/visualize/backtest.tsx + why: Canonical analytical page: Card sections, store/product Select fed by useStores/useProducts + ({page:1,pageSize:100}), DateRangePicker, numeric Inputs, a `formReady` gate, EmptyState/LoadingState, + getErrorMessage. Slice A's champion page mirrors this density (minus the results/charts). +- file: frontend/src/components/forecast-intelligence/model-type-select.tsx + why: shadcn Select-based model picker convention + data-testid pattern. The Slice-A candidate picker mirrors + the labelling style but sources options from GET /model-selection/models (NOT the hardcoded util). +- file: frontend/src/components/forecast-intelligence/model-type-utils.ts + why: The EXISTING hardcoded MODEL_FAMILY_MAP / MODEL_TYPE_LABELS used by OTHER pages. DO NOT refactor or + delete it in Slice A — other pages depend on it; the champion page just doesn't use it. +- file: frontend/src/components/forecast-intelligence/batch-matrix-picker.tsx + why: Multi-select-of-models pattern (checkbox list, max-rows cap, data-testid scheme, Badge for state). + The candidate-model picker mirrors this (checkbox per model, opt-in-extra Badge), but rows = model_types + from the catalog, no feature-frame matrix (that's B/C). +- file: frontend/src/components/forecast-intelligence/batch-matrix-picker.test.tsx + why: Component test convention — render + fireEvent + expect(onChange).toHaveBeenCalledWith; afterEach(cleanup). +- file: frontend/src/hooks/use-stores.ts + why: useStores({page,pageSize,...,search,enabled}) query-hook shape + keyed query + keepPreviousData. +- file: frontend/src/hooks/use-products.ts + why: useProducts(...) — identical shape; the searchable selects fetch at pageSize:100. +- file: frontend/src/hooks/use-batches.test.ts + why: Hook test convention — vi.fn() fetch mock via vi.stubGlobal('fetch',...), QueryClient wrapper, + renderHook + waitFor, afterEach(vi.unstubAllGlobals()). MIRROR for use-model-selection.test.ts. +- file: frontend/src/hooks/index.ts + why: Star-export barrel; ADD `export * from './use-model-selection'`. +- file: frontend/src/lib/api.ts + why: `api(endpoint,{params})` typed fetch helper; getErrorMessage(); ApiError. All hooks call `api`. +- file: frontend/src/lib/constants.ts + why: ROUTES (VISUALIZE.* block) + NAV_ITEMS (Visualize group). ADD ROUTES.VISUALIZE.CHAMPION + + a { label:'Champion Selector', href: ROUTES.VISUALIZE.CHAMPION } nav entry under Visualize. +- file: frontend/src/App.tsx + why: Lazy-page + }> pattern. ADD the + champion route mirroring the BATCH/PLANNER entries. +- file: frontend/src/types/api.ts + why: Section-commented type file. ModelFamily (:177 = 'baseline'|'tree'|'additive'), ProblemDetail (:652), + Store/StoreListResponse (:10/:21), Product/ProductListResponse (:25/:37). ADD a new + "// === Model Selection (Champion Selector) ===" section near the Registry block. +- file: frontend/src/components/common/error-display.tsx + why: EmptyState({title,description,action?,icon?}) — used for the not-enough-data state. +- file: frontend/src/components/common/loading-state.tsx + why: LoadingState({message}) — used while availability/catalog load. +- file: frontend/src/components/common/date-range-picker.tsx + why: DateRangePicker({value:DateRange|undefined,onChange}) — the time-period selector. +- file: frontend/src/components/ui/{select,popover,input,card,button,badge,checkbox,table}.tsx + why: Available shadcn primitives. NOTE: there is NO command/combobox/cmdk primitive — build the searchable + select from Popover + Input + a filtered button list (LOCKED #6). +- file: frontend/src/components/layout/top-nav.tsx + why: Renders NAV_ITEMS (grouped via NavigationMenu). No edit needed beyond the constants.ts NAV_ITEMS entry. +- file: frontend/vitest.config.ts + why: jsdom env; include 'src/**/*.test.{ts,tsx}'; `@`→./src alias. No setup file. `pnpm test --run` runs once. + +# External official docs (with reasoning) +- url: https://fastapi.tiangolo.com/tutorial/bigger-applications/#include-an-apirouter-with-a-custom-prefix-tags-responses-and-dependencies + why: APIRouter route-registration + the literal-before-path-param ordering rule that LOCKED #1 depends on. +- url: https://www.ibm.com/design/language/ # (progressive disclosure principle) + why: Simple/advanced settings split — show the recommended split config by default, reveal n_splits/min_train/ + gap/strategy under an "Advanced" toggle so novice users aren't overwhelmed. NOTE: the originally-cited + IBM technical-content URL 404s; use the IBM Design language site / Nielsen Norman + (https://www.nngroup.com/articles/progressive-disclosure/) as the canonical reference instead. +- url: https://help.tableau.com/current/pro/desktop/en-us/dashboards_best_practices.htm + why: Analytical dashboard layout — lead with the question (which model?), group related controls, keep the + availability triage adjacent to the selection. Informs the Card grouping of the champion page. +- url: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + why: The split semantics behind SplitConfig (expanding window, n_splits, gap, horizon) — so the advanced + form's help text describes folds correctly. +- url: https://tanstack.com/query/latest/docs/framework/react/guides/queries + why: useQuery enabled-gating (only fetch availability once a valid pair exists) + queryKey conventions. +``` + +### Current Codebase Tree (relevant) + +```bash +app/features/model_selection/ # MERGED backend slice (issue #353) +├── __init__.py +├── models.py # ModelSelectionRun ORM (NOT touched in Slice A) +├── schemas.py # request/response contract ← ADD catalog response models +├── ranking.py # pure ranking (precedent for capabilities.py) +├── explanations.py # pure explanations (precedent) +├── service.py # ModelSelectionService ← ADD get_model_catalog() +├── routes.py # APIRouter(/model-selection) ← ADD GET /models (before /{selection_id}) +└── tests/ # ← ADD test_capabilities.py; extend test_routes.py +app/features/forecasting/feature_metadata.py # model_family_for() — catalog family authority +frontend/src/ +├── App.tsx # ← ADD lazy champion route +├── lib/{api,constants}.ts # ← constants: ROUTES.VISUALIZE.CHAMPION + NAV_ITEMS entry +├── types/api.ts # ← ADD "Model Selection" section +├── hooks/{use-stores,use-products,index}.ts # ← index: export use-model-selection +├── pages/visualize/{backtest,batch,...}.tsx # page-density precedent +└── components/ + ├── common/{error-display,loading-state,date-range-picker}.tsx + ├── ui/{select,popover,input,card,button,badge,checkbox,table}.tsx + └── forecast-intelligence/{model-type-select,batch-matrix-picker}.tsx # picker precedents +``` + +### Desired Codebase Tree (Slice A additions) + +```bash +# Backend +app/features/model_selection/capabilities.py # NEW: pure build_model_catalog() +app/features/model_selection/schemas.py # MODIFIED: + CandidateModelInfo, ModelCatalogResponse +app/features/model_selection/service.py # MODIFIED: + get_model_catalog() +app/features/model_selection/routes.py # MODIFIED: + GET /models (before /{selection_id}) +app/features/model_selection/tests/test_capabilities.py # NEW: pure catalog unit tests +app/features/model_selection/tests/test_routes.py # MODIFIED: + /models route + ordering tests + +# Frontend +frontend/src/lib/constants.ts # MODIFIED: ROUTES.VISUALIZE.CHAMPION + NAV_ITEMS entry +frontend/src/App.tsx # MODIFIED: lazy ChampionSelectorPage route +frontend/src/types/api.ts # MODIFIED: Model Selection section (full contract) +frontend/src/hooks/use-model-selection.ts # NEW: useModelCatalog + usePairAvailability +frontend/src/hooks/use-model-selection.test.ts # NEW +frontend/src/hooks/index.ts # MODIFIED: + export +frontend/src/pages/visualize/champion.tsx # NEW: the page shell +frontend/src/components/champion-selector/searchable-entity-select.tsx # NEW (generic combobox) +frontend/src/components/champion-selector/searchable-entity-select.test.tsx # NEW +frontend/src/components/champion-selector/availability-panel.tsx # NEW +frontend/src/components/champion-selector/availability-panel.test.tsx # NEW +frontend/src/components/champion-selector/backtest-settings-form.tsx # NEW +frontend/src/components/champion-selector/backtest-settings-form.test.tsx # NEW +frontend/src/components/champion-selector/candidate-model-picker.tsx # NEW +frontend/src/components/champion-selector/candidate-model-picker.test.tsx # NEW +frontend/src/components/champion-selector/copy.ts # NEW: BIAS_EXPLANATION const (LOCKED #7) +``` + +### Known Gotchas & VERIFIED Contracts + +```python +# ── ROUTE ORDERING (LOCKED #1) ──────────────────────────────────────────────── +# Starlette matches routes in DECLARATION ORDER. The literal `GET /models` MUST be declared BEFORE +# `GET /{selection_id}` or a request to /model-selection/models is captured as selection_id="models" +# and 404s in the service. The existing `/availability` route (routes.py:41) already sits before +# `/{selection_id}` (:94) — place `/models` immediately after `/availability`. + +# ── CATALOG default_params — VERIFY before hardcoding ───────────────────────── +# default_params per model must match the forecasting ModelConfig member defaults. Verify with: +# uv run python -c " +# from pydantic import TypeAdapter +# from app.features.forecasting.schemas import ModelConfig +# a=TypeAdapter(ModelConfig) +# for mt in ['naive','seasonal_naive','moving_average','weighted_moving_average','seasonal_average', +# 'trend_regression_baseline','regression','prophet_like','random_forest','lightgbm','xgboost']: +# try: +# m=a.validate_python({'model_type':mt}); d=m.model_dump(); d.pop('model_type',None) +# print(mt, d) +# except Exception as e: +# print(mt, 'NEEDS-PARAMS:', e)" +# Use the printed defaults as `default_params` in capabilities.py. If a member REQUIRES a param (validation +# error with only model_type), supply the contract default (seasonal_naive→{'season_length':7}, +# moving_average→{'window_size':7}) — match the backend PRP /run example. Pin these in test_capabilities.py. + +# ── feature_aware / requires_extra — VERIFY against forecasting predict() reject ── +# LOCKED #3 sets feature_aware = {regression, prophet_like, lightgbm, xgboost, random_forest}. Confirm this +# equals the set ForecastingService.predict() rejects (the backend PRP cites forecasting/service.py:491 +# "rejects feature-aware models"). If the live reject-set differs, the live code wins — update the +# capabilities set and the test to match, and note the discrepancy in the PR description. + +# ── family literal mapping ──────────────────────────────────────────────────── +# model_family_for(mt) returns a ModelFamily enum; serialize via `.value` → "baseline"|"tree"|"additive" +# which already matches the frontend ModelFamily TS union (types/api.ts:177). Import model_family_for +# LAZILY inside build_model_catalog() (mirror service.py lazy cross-slice imports). + +# ── NO new strict request model ─────────────────────────────────────────────── +# GET /models has no body and no query params → no ConfigDict(strict=True) model, no date fields → the +# strict-mode policy linter is unaffected. Do NOT add an AvailabilityQuery-style model for /models. + +# ── catalog is static/pure ───────────────────────────────────────────────────── +# build_model_catalog() takes no args and does no I/O — it is unit-testable like ranking.py. get_model_catalog() +# on the service is a thin pass-through (no db round-trip needed); keep it sync-pure or trivially async. +``` + +```typescript +// ── FRONTEND ──────────────────────────────────────────────────────────────── +// NO combobox/cmdk primitive exists (only select/popover/input/dialog under components/ui). Build the +// searchable select from + (filter box) + a scrollable list of + + + )} + + +
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx b/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx new file mode 100644 index 00000000..c28c6726 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx @@ -0,0 +1,33 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ForecastChart } from './forecast-chart' +import type { ModelSelectionForecastSummary } from '@/types/api' + +// Recharts' ResponsiveContainer needs ResizeObserver in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(cleanup) + +const forecast: ModelSelectionForecastSummary = { + points: [ + { date: '2026-06-01', forecast: 10, lower_bound: 8, upper_bound: 12 }, + { date: '2026-06-02', forecast: 14, lower_bound: 11, upper_bound: 17 }, + ], + total_demand: 24, + average_demand: 12, + horizon: 2, +} + +describe('ForecastChart', () => { + it('renders the chart container from forecast points', () => { + render() + expect(screen.getByTestId('forecast-chart')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/decision/forecast-chart.tsx b/frontend/src/components/champion-selector/decision/forecast-chart.tsx new file mode 100644 index 00000000..fccd54b8 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-chart.tsx @@ -0,0 +1,43 @@ +import { TimeSeriesChart } from '@/components/charts/time-series-chart' +import type { ModelSelectionForecastSummary } from '@/types/api' + +interface ForecastChartProps { + forecast: ModelSelectionForecastSummary +} + +interface ChartRow { + date: string + forecast: number + lower?: number + upper?: number +} + +/** Slice C — the horizon forecast curve (optional interval band). */ +export function ForecastChart({ forecast }: ForecastChartProps) { + const rows: ChartRow[] = forecast.points.map((point) => { + const lower = point['lower_bound'] + const upper = point['upper_bound'] + return { + date: String(point['date'] ?? ''), + forecast: Number(point['forecast'] ?? 0), + lower: typeof lower === 'number' ? lower : undefined, + upper: typeof upper === 'number' ? upper : undefined, + } + }) + const hasInterval = rows.some((row) => row.lower !== undefined && row.upper !== undefined) + + return ( +
+ +
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx b/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx new file mode 100644 index 00000000..d9e57324 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx @@ -0,0 +1,37 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ForecastSummaryCard } from './forecast-summary-card' +import type { ModelSelectionForecastSummary } from '@/types/api' + +afterEach(cleanup) + +const forecast: ModelSelectionForecastSummary = { + points: [], + total_demand: 140, + average_demand: 10, + horizon: 14, + peak_date: '2026-06-02', + peak_demand: 25, + low_date: '2026-06-03', + low_demand: 5, +} + +describe('ForecastSummaryCard', () => { + it('renders total, peak, and low tiles', () => { + render() + const text = screen.getByTestId('forecast-summary-card').textContent ?? '' + expect(text).toContain('140.0') + expect(text).toContain('25.0') + expect(text).toContain('2026-06-02') + expect(text).toContain('14d') + }) + + it('renders an em-dash for null peak/low', () => { + render( + , + ) + expect(screen.getByTestId('forecast-summary-card').textContent).toContain('—') + }) +}) diff --git a/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx b/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx new file mode 100644 index 00000000..dddab510 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx @@ -0,0 +1,48 @@ +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import type { ModelSelectionForecastSummary } from '@/types/api' + +interface ForecastSummaryCardProps { + forecast: ModelSelectionForecastSummary +} + +function Tile({ label, value, sub }: { label: string; value: string; sub?: string }) { + return ( +
+

{label}

+

{value}

+ {sub &&

{sub}

} +
+ ) +} + +function num(value: number | null | undefined): string { + return typeof value === 'number' && Number.isFinite(value) ? value.toFixed(1) : '—' +} + +/** Slice C — total / average / peak / low / horizon KPI tiles (null-safe). */ +export function ForecastSummaryCard({ forecast }: ForecastSummaryCardProps) { + return ( + + + Forecast summary + + +
+ + + + + +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx b/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx new file mode 100644 index 00000000..c7dbe718 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx @@ -0,0 +1,72 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { PromoteChampionDialog } from './promote-champion-dialog' + +afterEach(cleanup) + +function renderDialog(overrides: Partial[0]> = {}) { + const props = { + open: true, + onOpenChange: vi.fn(), + isOverride: false, + isPromoting: false, + promoteError: null, + promotedAlias: null, + onConfirm: vi.fn(), + ...overrides, + } + render() + return props +} + +describe('PromoteChampionDialog', () => { + it('keeps confirm disabled until alias + approver are valid', () => { + renderDialog() + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(true) + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(false) + }) + + it('flags an invalid alias name', () => { + renderDialog() + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'Bad Alias' }, + }) + expect(screen.getByTestId('promote-alias-error')).toBeTruthy() + }) + + it('requires the ack checkbox for a non-recommended (override) model', () => { + renderDialog({ isOverride: true }) + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + // still disabled until the ack is checked + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(true) + fireEvent.click(screen.getByTestId('promote-ack-checkbox')) + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(false) + }) + + it('calls onConfirm with the promote body', () => { + const props = renderDialog() + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + fireEvent.click(screen.getByTestId('promote-confirm-action')) + expect(props.onConfirm).toHaveBeenCalledWith({ + alias_name: 'champion-x', + approved_by: 'gabor', + acknowledge_non_recommended: false, + }) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx b/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx new file mode 100644 index 00000000..79e7e486 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx @@ -0,0 +1,163 @@ +import { useState } from 'react' +import { CheckCircle2, ShieldAlert } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Checkbox } from '@/components/ui/checkbox' +import { Input } from '@/components/ui/input' +import type { PromoteRequest } from '@/types/api' +import { PROMOTE_AUDIT_NOTE } from './constants' + +const ALIAS_RE = /^[a-z0-9][a-z0-9\-_]*$/ + +interface PromoteChampionDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + /** True when a non-recommended model was trained (requires explicit ack). */ + isOverride: boolean + defaultAliasName?: string + isPromoting: boolean + /** Error message from the last promote attempt (null on success/idle). */ + promoteError: string | null + /** The alias name on a successful promotion (null until promoted). */ + promotedAlias: string | null + onConfirm: (body: PromoteRequest) => void +} + +/** + * Slice C — the approval-gated promote dialog. Requires an approver and a valid + * alias name; a non-recommended model additionally requires the ack checkbox. + * Mirrors `forecast-intelligence/promote-confirmation-dialog.tsx`, but calls the + * model_selection `promote` flow (compare and promote stay separate). + */ +export function PromoteChampionDialog({ + open, + onOpenChange, + isOverride, + defaultAliasName = '', + isPromoting, + promoteError, + promotedAlias, + onConfirm, +}: PromoteChampionDialogProps) { + const [aliasName, setAliasName] = useState(defaultAliasName) + const [approvedBy, setApprovedBy] = useState('') + const [ack, setAck] = useState(false) + + const aliasValid = ALIAS_RE.test(aliasName.trim()) + const canConfirm = + aliasValid && + approvedBy.trim().length > 0 && + (!isOverride || ack) && + !isPromoting + + function handleConfirm() { + if (!canConfirm) return + onConfirm({ + alias_name: aliasName.trim(), + approved_by: approvedBy.trim(), + acknowledge_non_recommended: isOverride ? ack : false, + }) + } + + return ( + { + if (!next) setAck(false) + onOpenChange(next) + }} + > + + + Promote champion to a registry alias + {PROMOTE_AUDIT_NOTE} + + +
+
+ + setAliasName(event.target.value)} + placeholder="e.g. champion-store5-prod8" + autoComplete="off" + data-testid="promote-alias-input" + /> + {aliasName.length > 0 && !aliasValid && ( +

+ Lowercase letters, digits, hyphens and underscores only (must start + with a letter or digit). +

+ )} +
+ +
+ + setApprovedBy(event.target.value)} + placeholder="your name" + autoComplete="off" + data-testid="promote-approver-input" + /> +
+ + {isOverride && ( + + )} + + {promoteError && ( +

+ {promoteError} +

+ )} + + {promotedAlias && ( +
+ + Promoted to alias {promotedAlias}. +
+ )} +
+ + + Close + + {isPromoting ? 'Promoting…' : 'Promote'} + + +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx b/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx new file mode 100644 index 00000000..a5a27e9f --- /dev/null +++ b/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx @@ -0,0 +1,54 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { SafetyStockPanel } from './safety-stock-panel' +import type { ForecastDecision } from '@/types/api' + +afterEach(cleanup) + +const decision: ForecastDecision = { + method: 'heuristic', + lead_time_days: 7, + service_level: 0.95, + z_value: 1.6449, + sigma_daily_demand: 1.41, + expected_demand_over_lead_time: 70, + safety_stock: 6.13, + reorder_point: 76.13, + bias_risk_text: 'bias text', + caveats: ['heuristic'], +} + +function renderPanel(overrides: Partial[0]> = {}) { + const props = { + decision, + leadTimeDays: 7, + serviceLevel: 0.95, + isRecomputing: false, + onLeadTimeChange: vi.fn(), + onServiceLevelChange: vi.fn(), + onRecompute: vi.fn(), + ...overrides, + } + render() + return props +} + +describe('SafetyStockPanel', () => { + it('renders the labeled heuristic header and stats', () => { + renderPanel() + const text = screen.getByTestId('safety-stock-panel').textContent ?? '' + expect(text).toContain('Safety stock (heuristic)') + expect(text).toContain('1.6449') + expect(text).toContain('6.1') + }) + + it('fires onLeadTimeChange and onRecompute', () => { + const props = renderPanel() + fireEvent.change(screen.getByTestId('safety-stock-lead-time'), { + target: { value: '14' }, + }) + expect(props.onLeadTimeChange).toHaveBeenCalledWith(14) + fireEvent.click(screen.getByTestId('safety-stock-recompute')) + expect(props.onRecompute).toHaveBeenCalledOnce() + }) +}) diff --git a/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx b/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx new file mode 100644 index 00000000..11f1b43e --- /dev/null +++ b/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx @@ -0,0 +1,115 @@ +import { Loader2, RefreshCw } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import type { ForecastDecision } from '@/types/api' +import { SAFETY_STOCK_HEADER, SERVICE_LEVEL_OPTIONS } from './constants' + +interface SafetyStockPanelProps { + decision: ForecastDecision | null + leadTimeDays: number + serviceLevel: number + isRecomputing: boolean + onLeadTimeChange: (value: number) => void + onServiceLevelChange: (value: number) => void + onRecompute: () => void +} + +function Stat({ label, value }: { label: string; value: string }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +/** + * Slice C — the CLEARLY-LABELED safety-stock heuristic. Lead time + service + * level inputs recompute the forecast decision. Never influences ranking. + */ +export function SafetyStockPanel({ + decision, + leadTimeDays, + serviceLevel, + isRecomputing, + onLeadTimeChange, + onServiceLevelChange, + onRecompute, +}: SafetyStockPanelProps) { + return ( + + + {SAFETY_STOCK_HEADER} + + A deterministic reorder heuristic (demand variability only, constant lead + time). Adjust the inputs and recompute. + + + +
+
+ Lead time (days) + onLeadTimeChange(Number(event.target.value) || 0)} + className="w-32" + data-testid="safety-stock-lead-time" + /> +
+
+ Service level + +
+ +
+ + {decision && ( +
+ + + + +
+ )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx b/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx new file mode 100644 index 00000000..3ff27f99 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx @@ -0,0 +1,48 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { TrainForecastActions } from './train-forecast-actions' + +afterEach(cleanup) + +describe('TrainForecastActions', () => { + it('shows the blocked state for a feature-aware winner', () => { + render( + {}} + />, + ) + expect(screen.getByTestId('forecast-blocked-state').textContent).toContain( + 'What-If Planner', + ) + expect(screen.queryByTestId('forecast-button')).toBeNull() + }) + + it('fires onForecast when the trained forecast button is clicked', () => { + const onForecast = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('forecast-button')) + expect(onForecast).toHaveBeenCalledOnce() + }) + + it('disables the forecast button until a model is trained', () => { + render( + {}} + />, + ) + expect(screen.getByTestId('forecast-button').hasAttribute('disabled')).toBe(true) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx b/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx new file mode 100644 index 00000000..0ba12605 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx @@ -0,0 +1,54 @@ +import { Loader2, LineChart, Ban } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { FEATURE_AWARE_BLOCKED_COPY } from './constants' + +interface TrainForecastActionsProps { + /** From the Slice A catalog (`supports_auto_predict = not feature_aware`). */ + supportsAutoPredict: boolean + /** True once a model bundle has been trained for the selection. */ + trained: boolean + isPredicting: boolean + onForecast: () => void +} + +/** + * Slice C — the Forecast action + the capability-limited blocked state. + * + * A feature-aware winner cannot auto-predict (LOCKED #5): instead of faking a + * forecast we surface the limitation and route the user to the What-If Planner. + */ +export function TrainForecastActions({ + supportsAutoPredict, + trained, + isPredicting, + onForecast, +}: TrainForecastActionsProps) { + if (!supportsAutoPredict) { + return ( +
+ + {FEATURE_AWARE_BLOCKED_COPY} +
+ ) + } + + return ( + + ) +} diff --git a/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx b/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx new file mode 100644 index 00000000..5aca0b38 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx @@ -0,0 +1,48 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { WinnerDecisionPanel } from './winner-decision-panel' +import type { TrainWinnerResponse } from '@/types/api' + +afterEach(cleanup) + +describe('WinnerDecisionPanel', () => { + it('trains the recommended winner without a confirm dialog', () => { + const onTrain = vi.fn() + render( + , + ) + expect(screen.getByTestId('decision-train-button').textContent).toContain( + 'Train recommended', + ) + fireEvent.click(screen.getByTestId('decision-train-button')) + expect(onTrain).toHaveBeenCalledWith('naive', null) + }) + + it('renders the override warning from a train result', () => { + const trainResult: TrainWinnerResponse = { + selection_id: 's', + model_type: 'seasonal_naive', + model_path: 'p', + is_override: true, + override_warning: 'You trained seasonal_naive instead of naive.', + } + render( + {}} + />, + ) + expect(screen.getByTestId('decision-override-warning').textContent).toContain( + 'seasonal_naive', + ) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx b/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx new file mode 100644 index 00000000..5b0d58d5 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx @@ -0,0 +1,158 @@ +import { useState } from 'react' +import { Loader2, Trophy, TriangleAlert } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import type { TrainWinnerResponse } from '@/types/api' + +interface WinnerDecisionPanelProps { + winnerModelType: string + /** Every candidate offered in the run (winner + runners-up + failed). */ + candidateModelTypes: string[] + isTraining: boolean + trainResult: TrainWinnerResponse | null + /** Train the chosen model — the page routes winner vs. override. */ + onTrain: (modelType: string, overrideReason: string | null) => void +} + +/** + * Slice C — accept the recommended winner OR override to another candidate. + * + * Picking a non-winner opens a confirm dialog (explicit warning + an optional + * reason) before training. Presentational — the page owns the train mutations. + */ +export function WinnerDecisionPanel({ + winnerModelType, + candidateModelTypes, + isTraining, + trainResult, + onTrain, +}: WinnerDecisionPanelProps) { + const [selected, setSelected] = useState(winnerModelType) + const [overrideReason, setOverrideReason] = useState('') + const [confirmOpen, setConfirmOpen] = useState(false) + + const isOverride = selected !== winnerModelType + + function handleTrainClick() { + if (isOverride) { + setConfirmOpen(true) + return + } + onTrain(selected, null) + } + + function handleConfirmOverride() { + onTrain(selected, overrideReason.trim() || null) + setConfirmOpen(false) + } + + return ( + + + 5 · Decide & train + + Train the recommended champion, or override to another candidate. The + recommended model is {winnerModelType}. + + + +
+
+ Model to train + +
+ +
+ + {trainResult?.override_warning && ( +
+ + {trainResult.override_warning} +
+ )} + + {trainResult && !trainResult.override_warning && ( +

+ Trained {trainResult.model_type}. +

+ )} +
+ + + + + Train a non-recommended model? + + You picked {selected} instead of the + recommended {winnerModelType}. This is an + override and is recorded on the run. + + +
+ Reason (optional) + setOverrideReason(event.target.value)} + placeholder="e.g. domain seasonality outweighs the WAPE lead" + data-testid="override-reason-input" + /> +
+ + Cancel + + Train override + + +
+
+
+ ) +} diff --git a/frontend/src/hooks/use-model-selection.test.ts b/frontend/src/hooks/use-model-selection.test.ts index 4209a072..5074351b 100644 --- a/frontend/src/hooks/use-model-selection.test.ts +++ b/frontend/src/hooks/use-model-selection.test.ts @@ -13,8 +13,12 @@ import { useCancelSelectionRun, useModelCatalog, usePairAvailability, + usePredictWinner, + usePromoteChampion, useSelectionRun, useSubmitSelectionRun, + useTrainSelected, + useTrainWinner, } from './use-model-selection' import type { ModelCatalogResponse, @@ -271,3 +275,90 @@ describe('useCancelSelectionRun', () => { expect((call[1] as RequestInit).method).toBe('DELETE') }) }) + +// --------------------------------------------------------------- Slice C hooks + +function jsonResponse(body: unknown) { + return new Response(JSON.stringify(body), { + status: 200, + headers: { 'content-type': 'application/json' }, + }) +} + +describe('useTrainWinner', () => { + it('POSTs /train-winner (no body) and invalidates the run query', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', model_type: 'naive', model_path: 'p', is_override: false, override_warning: null }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useTrainWinner('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate() + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/train-winner') + expect((call[1] as RequestInit).method).toBe('POST') + }) +}) + +describe('useTrainSelected', () => { + it('POSTs /train-selected with the override body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', model_type: 'seasonal_naive', model_path: 'p', is_override: true, override_warning: 'w' }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useTrainSelected('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ model_type: 'seasonal_naive', override_reason: 'domain' }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/train-selected') + expect((call[1] as RequestInit).method).toBe('POST') + expect(String((call[1] as RequestInit).body)).toContain('seasonal_naive') + }) +}) + +describe('usePredictWinner', () => { + it('POSTs /predict with the decision params body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', forecast: { points: [], total_demand: 0, average_demand: 0, horizon: 14 }, decision: null }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => usePredictWinner('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ lead_time_days: 7, service_level: 0.95 }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/predict') + expect((call[1] as RequestInit).method).toBe('POST') + }) +}) + +describe('usePromoteChampion', () => { + it('POSTs /promote with the promote body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', alias_name: 'champion-x', run_id: 'r', run_status: 'success', model_type: 'naive', is_override: false, promoted_at: '2026-06-01T00:00:00Z' }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => usePromoteChampion('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ alias_name: 'champion-x', approved_by: 'gabor' }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/promote') + expect((call[1] as RequestInit).method).toBe('POST') + expect(String((call[1] as RequestInit).body)).toContain('champion-x') + }) +}) diff --git a/frontend/src/hooks/use-model-selection.ts b/frontend/src/hooks/use-model-selection.ts index 2cf7286f..bc861a1b 100644 --- a/frontend/src/hooks/use-model-selection.ts +++ b/frontend/src/hooks/use-model-selection.ts @@ -2,18 +2,24 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' import { api } from '@/lib/api' import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' import type { + ForecastDecisionParams, ModelCatalogResponse, ModelSelectionRunRequest, ModelSelectionRunResponse, PairAvailability, + PredictWinnerResponse, + PromoteRequest, + PromoteResponse, SubmitRunResponse, + TrainSelectedRequest, + TrainWinnerResponse, } from '@/types/api' /** * Model-selection query hooks (Champion Selector). * * Slice A: catalog + availability GETs. Slice B: async submit / poll / cancel. - * Train/predict/promotion are owned by Slice C. + * Slice C: train (winner / override) / predict (decision) / promote. */ /** @@ -118,3 +124,67 @@ export function useCancelSelectionRun() { }, }) } + +/** + * Invalidate the polled run query so a terminal run re-fetches the new + * `final_model_path` / `forecast` / promotion after a Slice C mutation. + */ +function invalidateRun( + queryClient: ReturnType, + selectionId: string, +) { + void queryClient.invalidateQueries({ + queryKey: ['model-selection', 'run', selectionId], + }) +} + +/** Train the ranked winner (`POST /{id}/train-winner`, no body). */ +export function useTrainWinner(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: () => + api(`/model-selection/${selectionId}/train-winner`, { + method: 'POST', + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Train a user-chosen candidate (`POST /{id}/train-selected`, override). */ +export function useTrainSelected(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: TrainSelectedRequest) => + api(`/model-selection/${selectionId}/train-selected`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Forecast with the trained model + decision (`POST /{id}/predict`). */ +export function usePredictWinner(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: ForecastDecisionParams) => + api(`/model-selection/${selectionId}/predict`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Promote the trained champion to a registry alias (`POST /{id}/promote`). */ +export function usePromoteChampion(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: PromoteRequest) => + api(`/model-selection/${selectionId}/promote`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} diff --git a/frontend/src/pages/visualize/champion.tsx b/frontend/src/pages/visualize/champion.tsx index 6157148e..30b624c8 100644 --- a/frontend/src/pages/visualize/champion.tsx +++ b/frontend/src/pages/visualize/champion.tsx @@ -25,6 +25,7 @@ import { WinnerCard } from '@/components/champion-selector/results/winner-card' import { ComparisonCharts } from '@/components/champion-selector/results/comparison-charts' import { ModelDetailDrawer } from '@/components/champion-selector/results/model-detail-drawer' import { CancelRunDialog } from '@/components/champion-selector/results/cancel-run-dialog' +import { DecisionSection } from '@/components/champion-selector/decision/decision-section' import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' @@ -369,6 +370,16 @@ export default function ChampionSelectorPage() { open={drawerOpen} onOpenChange={setDrawerOpen} /> + {/* Slice C — decide → train → forecast → interpret → promote. Keyed by + selectionId so a fresh run resets the decision state. */} + {selectionId && run.winner && ( + + )} )}
diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 63ebe3f4..88a204e1 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -1330,6 +1330,11 @@ export interface ModelSelectionForecastSummary { total_demand: number average_demand: number horizon: number + // Slice C — additive peak/low day (null on legacy snapshots). + peak_date?: string | null + peak_demand?: number | null + low_date?: string | null + low_demand?: number | null } // Slice B — live async progress on a selection run. @@ -1384,3 +1389,66 @@ export interface SubmitRunResponse extends ModelSelectionRunResponse { monitor_url: string cancel_url: string } + +// Slice C — forecast decision, override, and promotion contracts. + +/** `POST /model-selection/{id}/train-selected` body (override). */ +export interface TrainSelectedRequest { + model_type: string + override_reason?: string | null +} + +/** Optional `POST /model-selection/{id}/predict` body. */ +export interface ForecastDecisionParams { + lead_time_days: number + service_level: number +} + +/** Deterministic, labeled inventory-decision heuristic (never feeds ranking). */ +export interface ForecastDecision { + method: 'heuristic' + lead_time_days: number + service_level: number + z_value: number + sigma_daily_demand: number + expected_demand_over_lead_time: number + safety_stock: number + reorder_point: number + bias_risk_text: string + caveats: string[] +} + +/** `POST /model-selection/{id}/train-winner` and `/train-selected` response. */ +export interface TrainWinnerResponse { + selection_id: string + model_type: string + model_path: string + is_override: boolean + override_warning: string | null +} + +/** `POST /model-selection/{id}/predict` response (forecast + decision). */ +export interface PredictWinnerResponse { + selection_id: string + forecast: ModelSelectionForecastSummary + decision: ForecastDecision | null +} + +/** `POST /model-selection/{id}/promote` body (approval-gated). */ +export interface PromoteRequest { + alias_name: string + approved_by: string + acknowledge_non_recommended?: boolean + description?: string | null +} + +/** `POST /model-selection/{id}/promote` response. */ +export interface PromoteResponse { + selection_id: string + alias_name: string + run_id: string + run_status: string + model_type: string + is_override: boolean + promoted_at: string // ISO datetime +}