diff --git a/.env.example b/.env.example index 7d49f5b9..38ef75b4 100644 --- a/.env.example +++ b/.env.example @@ -126,5 +126,15 @@ BATCH_GLOBAL_MAX_PARALLEL=4 # mid-call, so a long fit can stall the drain. BATCH_CANCEL_DRAIN_TIMEOUT_SECONDS=30 +# Model selection (champion selector) async runner (Slice B) +# Hard upper bound on concurrent candidate backtests across all active selection +# runs on this host. Effective parallelism per run is min(this, candidates). +# Set to 1 for sequential execution. Requires uvicorn restart to apply. +MODEL_SELECTION_GLOBAL_MAX_PARALLEL=4 +# Max seconds DELETE /model-selection/{id} waits for in-flight candidates to +# drain before returning RFC 7807 504. sklearn / LightGBM fits are uncancellable +# mid-call, so a long fit can stall the drain. +MODEL_SELECTION_CANCEL_DRAIN_TIMEOUT_SECONDS=30 + # Frontend (Vite) VITE_API_BASE_URL=http://localhost:8123 diff --git a/PRPs/ai_docs/forecast-champion-selector-backend-research.md b/PRPs/ai_docs/forecast-champion-selector-backend-research.md new file mode 100644 index 00000000..2d37603b --- /dev/null +++ b/PRPs/ai_docs/forecast-champion-selector-backend-research.md @@ -0,0 +1,222 @@ +# Forecast Champion Selector Backend Research + +Date: 2026-06-01 + +This note captures external-library and runtime facts used by +`PRPs/forecast-champion-selector-backend.md`. It is intentionally narrow: +only claims that affect backend implementation are recorded here. + +## Official Documentation References + +- FastAPI APIRouter / multi-file apps: + https://fastapi.tiangolo.com/tutorial/bigger-applications/ + - Reason: the new `app/features/model_selection/routes.py` must follow the + existing `APIRouter(prefix=..., tags=...)` slice pattern and be wired in + `app/main.py`. + +- Pydantic v2 strict mode and field-level overrides: + https://pydantic.dev/docs/validation/latest/concepts/strict_mode/ + - Reason: ForecastLabAI request schemas use `ConfigDict(strict=True)`, but + JSON request bodies still need date/datetime/UUID/Decimal fields to accept + JSON-native strings via `Field(strict=False, ...)`. + +- SQLAlchemy 2.0 PostgreSQL JSONB: + https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#json-types + - Reason: `model_selection_run` should store immutable request/response + snapshots (`candidate_models`, `ranking_result`, `winner_metrics`, + `forecast_result`, `business_summary`) as PostgreSQL JSONB. + +- Alembic `Operations.create_index`: + https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.create_index + - Reason: the migration should use explicit named indexes; any partial or + JSONB index must use Alembic operations rather than raw SQL. + +- scikit-learn `TimeSeriesSplit`: + https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + - Reason: the selector's recommended split defaults mirror the project's + own `SplitConfig` semantics and should not assume unsupported parameters. + +## Runtime Verification Commands + +Run from repository root on 2026-06-01. + +```bash +uv run python -c "import inspect; from sqlalchemy import select, table, column; import sqlalchemy; stmt=select(column('id')).select_from(table('t')).with_for_update(skip_locked=True); print('sqlalchemy', sqlalchemy.__version__); print('with_for_update_has_skip_locked', 'skip_locked' in str(inspect.signature(select(column('id')).with_for_update))); print(stmt)" +``` + +Observed: + +```text +sqlalchemy 2.0.46 +with_for_update_has_skip_locked True +SELECT id +FROM t FOR UPDATE +``` + +Note: generic SQL compilation does not render PostgreSQL-specific +`SKIP LOCKED`; use PostgreSQL dialect compilation in tests when asserting +that string. + +```bash +uv run python -c "from datetime import date; import pydantic; from pydantic import BaseModel, ConfigDict, Field; M=type('M',(BaseModel,),{'__annotations__':{'d':date},'model_config':ConfigDict(strict=True),'d':Field(strict=False)}); print('pydantic', pydantic.__version__); print(M.model_validate({'d':'2026-06-01'}).d.isoformat())" +``` + +Observed: + +```text +pydantic 2.12.5 +2026-06-01 +``` + +```bash +uv run python -c "import inspect, sklearn; from sklearn.model_selection import TimeSeriesSplit; print('sklearn', sklearn.__version__); print(inspect.signature(TimeSeriesSplit)); t=TimeSeriesSplit(n_splits=3, test_size=2, gap=1); print(t)" +``` + +Observed: + +```text +sklearn 1.8.0 +(n_splits=5, *, max_train_size=None, test_size=None, gap=0) +TimeSeriesSplit(gap=1, max_train_size=None, n_splits=3, test_size=2) +``` + +```bash +uv run python -c "import inspect, fastapi; from fastapi import APIRouter, BackgroundTasks; print('fastapi', fastapi.__version__); print('APIRouter_prefix_param', 'prefix' in inspect.signature(APIRouter).parameters); print('BackgroundTasks_add_task', inspect.signature(BackgroundTasks.add_task))" +``` + +Observed: + +```text +fastapi 0.128.0 +APIRouter_prefix_param True +BackgroundTasks_add_task (self, func: ..., *args: P.args, **kwargs: P.kwargs) -> None +``` + +```bash +uv run python -c "import inspect, alembic; from alembic.operations import Operations; print('alembic', alembic.__version__); print(inspect.signature(Operations.create_index))" +``` + +Observed: + +```text +alembic 1.18.4 +(self, index_name, table_name, columns, *, schema=None, unique=False, if_not_exists=None, **kw) -> None +``` + +## Implementation Consequences + +- Use `Literal[...]` request fields for JSON string enums under + `ConfigDict(strict=True)`; convert to ORM enums at service boundaries. +- Use `Field(strict=False, ...)` on every request-body date/datetime/UUID/ + Decimal field, or `app/core/tests/test_strict_mode_policy.py` can fail. +- Persist selector decisions in JSONB snapshots because registry metrics are + free-form JSONB and metric key names differ across layers. +- Do not assume a batch backtest item contains fold-level chart data. Batch + metrics are intentionally pinned to `{wape, smape, mae, bias, sample_size}`. +- If an implementation compiles SQL for PostgreSQL-specific clauses, compile + with the PostgreSQL dialect rather than relying on generic SQL strings. + +## Verified Internal Service Contracts (read from source 2026-06-01) + +These are the in-repo signatures the selector orchestrates. They were the prior +draft's #1 residual risk; recorded here so they survive and can be re-verified on +refactor. Re-verify with `grep -n "async def run_backtest\|async def train_model\|async def predict" app/features/backtesting/service.py app/features/forecasting/service.py`. + +### BacktestingService — `app/features/backtesting/service.py:213` + +```python +# __init__(self) -> None — takes NO db; instantiate as BacktestingService() +async def run_backtest( + self, db: AsyncSession, store_id: int, product_id: int, + start_date: date, end_date: date, config: BacktestConfig, +) -> BacktestResponse +``` + +`BacktestConfig` (`backtesting/schemas.py:81`, `frozen=True, extra="forbid"`): +`split_config: SplitConfig`, `model_config_main: Annotated[ModelConfig, Field(discriminator="model_type")]`, +`include_baselines: bool = True`, `store_fold_details: bool = True`. + +`SplitConfig` (`:24`): `strategy: Literal["expanding","sliding"]="expanding"`, +`n_splits: int=5 (ge=2,le=20)`, `min_train_size: int=30 (ge=7)`, `gap: int=0 (ge=0,le=30)`, +`horizon: int=14 (ge=1,le=90)`; validator `horizon > gap`. + +### BacktestResponse — `backtesting/schemas.py:257` + +`main_model_results: ModelBacktestResult`, `baseline_results: list[ModelBacktestResult] | None`, +plus `backtest_id, store_id, product_id, config_hash, split_config, comparison_summary, +duration_ms, leakage_check_passed`. + +`ModelBacktestResult` (`:180`): `model_type, config_hash, fold_results: list[FoldResult], +aggregated_metrics: dict[str,float], metric_std: dict[str,float], +bucketed_aggregated_metrics: dict|None, feature_aware: bool, exogenous_policy`. + +`FoldResult` (`:147`): `fold_index, split, dates: list[date], actuals: list[float], +predictions: list[float], metrics: dict[str,float], horizon_bucket_metrics`. + +**Metric keys (CORRECTION to the prior draft):** `aggregated_metrics` has **five** keys — +`{"mae", "rmse", "smape", "wape", "bias"}` (`backtesting/metrics.py:347`; PRP-36 added `rmse`). +`metric_std` keys are suffixed `"{name}_stability"` (a coefficient of variation, not a raw std). +`sample_size` is NOT in `aggregated_metrics` — derive from fold actuals length or n_folds. +Fold chart data path: `main_model_results.fold_results[i].{dates,actuals,predictions}` — populated +only when `config.store_fold_details=True`. + +### ForecastingService — `app/features/forecasting/service.py` + +```python +# __init__(self) -> None +async def train_model( # :247 + self, db: AsyncSession, store_id: int, product_id: int, + train_start_date: date, train_end_date: date, config: ModelConfig, + *, feature_frame_version: int = 1, feature_groups: list[str] | None = None, +) -> TrainResponse # TrainResponse.model_path is the artifact path + +async def predict( # :402 — NO db arg + self, store_id: int, product_id: int, horizon: int, model_path: str, +) -> PredictResponse # PredictResponse.forecasts: list[ForecastPoint] +``` + +`predict()` rejects feature-aware models (`service.py:491`) — feature-aware winners must route +through `/scenarios/simulate`; catch and warn rather than 500. + +### ModelConfig union — `forecasting/schemas.py:417` + +Plain PEP 604 union (`NaiveModelConfig | SeasonalNaiveModelConfig | … | ProphetLikeModelConfig`), +discriminated by each member's `model_type` Literal. Members are **flat** (`SeasonalNaiveModelConfig` +has `model_type` + `season_length`, NOT a nested `params`). No module-level `TypeAdapter`/helper. +Build from `{"model_type": ..., "params": {...}}` by FLATTENING: + +```python +from pydantic import TypeAdapter +from app.features.forecasting.schemas import ModelConfig +TypeAdapter(ModelConfig).validate_python({"model_type": c.model_type, **c.params}) +``` + +Members are `frozen=True, extra="forbid"` → bad params raise `ValidationError` (treat as a failed +candidate). `model_type` values: `naive, seasonal_naive, moving_average, weighted_moving_average, +seasonal_average, trend_regression_baseline, random_forest, lightgbm, xgboost, regression, +prophet_like` (`lightgbm`/`xgboost` are opt-in extras → may `ImportError`). + +### Data-platform ORM column names — `data_platform/models.py` + +`Store` (`:40`): `id` (int PK), `code` (business key — NOT `store_code`). `Product` (`:68`): `id`, +`sku`, `launch_date: date|None`. `SalesDaily` (`:172`): `date` (FK calendar.date), `store_id`, +`product_id`, `quantity` (Integer, CHECK ≥0), `unit_price`, `total_amount`; grain unique +`(date, store_id, product_id)`. `Promotion` (`:274`): `product_id` NOT NULL, `store_id` NULLABLE +(NULL = chain-wide, applies to all stores), date RANGE `[start_date, end_date]`, +`kind ∈ {pct_off,bogo,bundle,markdown}`. + +### Cross-cutting patterns + +- Exceptions (`app/core/exceptions.py`): `BadRequestError`(400), `NotFoundError`(404), + `DatabaseError`(500), `ConflictError`(409), `UnprocessableEntityError`(422); each + `(message=..., details=None)`. Routes map `ValueError→BadRequestError`, + `SQLAlchemyError→DatabaseError` (mirror `backtesting/routes.py:60`). +- `validate_date_range` is slice-local in `analytics/routes.py:36` (raises `BadRequestError`, + inverted-range + 730-day-max) — NOT importable cross-slice; reimplement locally. +- `TimestampMixin` (`app/shared/models.py`): `created_at`/`updated_at`, `server_default func.now()`, + `updated_at onupdate func.now()`. Mix in first: `class X(TimestampMixin, Base)`. +- JSONB import differs: migration `from sqlalchemy.dialects import postgresql` → + `postgresql.JSONB(astext_type=sa.Text())`; ORM `from sqlalchemy.dialects.postgresql import JSONB`. +- `app/main.py` wires routers as `from app.features..routes import router as _router` + + `app.include_router(_router)` (NO prefix at include; the router carries it). +- Current alembic head observed: `c1d2e3f40512` (`create_batch_tables`). diff --git a/PRPs/forecast-champion-selector-backend.md b/PRPs/forecast-champion-selector-backend.md new file mode 100644 index 00000000..651fc009 --- /dev/null +++ b/PRPs/forecast-champion-selector-backend.md @@ -0,0 +1,970 @@ +name: "Forecast Champion Selector Backend" +description: | + Backend foundation for an interactive Forecast Champion Selector. Adds a + first-class `model_selection` vertical slice that validates a store/product + pair, recommends/selects backtest settings, runs candidate model comparison, + ranks results by WAPE/sMAPE/bias/MAE, persists an auditable selection record, + and optionally trains/predicts with the winning model. This PRP deliberately + scopes UI work out; it creates the stable backend contract the UI can consume. + +**Created:** 2026-06-01 · **Refined:** 2026-06-01 (signatures verified against live code) +**Current repo base observed:** `dev` at `1b4c3f3` (`Merge pull request #352 ...fix/agents-finalizer-fallback`) +**Current alembic head observed:** `c1d2e3f40512` (`create_batch_tables`) — verify with `uv run alembic heads` at implementation time and chain to whatever head exists THEN. +**Working-tree caveat observed:** `docker-compose.lan.yml` is an untracked local dogfood override; do not commit it. +**Tracking issue:** create before implementation, suggested title `feat(api): add forecast champion selector backend`. +**Suggested branch:** `feat/forecast-champion-selector-backend` (off `dev`, per `.claude/rules/branch-naming.md`). +**Commit scope:** `api` (cross-feature backend wiring + new slice + `app/main.py`) and `db` (migration). Every commit references the tracking issue. + +--- + +## VALIDATE — Missing Backend Surface Check + +The lower-level primitives exist; the business workflow does not. + +### Reusable backend primitives already present (verified) + +- `POST /backtesting/run` → single store/product/model backtest with fold metrics, + aggregated metrics, optional baselines, bucketed horizon metrics, leakage status. + `app/features/backtesting/routes.py:24` (router), `:60` (handler). + **Service entry point is `BacktestingService().run_backtest(db, store_id, product_id, start_date, end_date, config)`** — see verified signature below. +- `POST /forecasting/train` → trains one model; supports `feature_frame_version` (1|2) and + `feature_groups`. `app/features/forecasting/routes.py:25`. Service: + `ForecastingService().train_model(db, store_id, product_id, train_start_date, train_end_date, config, *, feature_frame_version=1, feature_groups=None) -> TrainResponse`. +- `POST /forecasting/predict` → predicts from a saved bundle. Service: + `ForecastingService().predict(store_id, product_id, horizon, model_path) -> PredictResponse` + (**no db arg** — loads bundle from disk; rejects feature-aware models, `service.py:491`). +- `POST /batch/forecasting` fan-out exists but pins metrics to five keys and does **not** + expose fold-level chart data — NOT suitable for this slice's chart payload. +- `GET /dimensions/stores`, `GET /dimensions/products` provide dimension metadata. +- `app/features/ops/service.py` is the canonical read-only cross-slice ORM aggregation precedent. + +### Backend pieces missing for the full feature + +- No `app/features/model_selection/` slice; no `POST /model-selection/run`; no persisted + `model_selection_run` table; no orchestration of pair-validation → candidate backtests → + ranking → optional final train → optional predict; no pair-availability endpoint; no + backend ranking/confidence policy; no deterministic business explanation layer; no + chart-ready comparison payload. +- Batch/Job model allow-lists are narrower than forecasting's full `ModelConfig` union, and + job/batch training does not pass `feature_frame_version`/`feature_groups`. **Therefore this + slice calls the direct backtesting/forecasting services**, not batch/jobs. + +--- + +## BRAINSTORM / RERANK — Chosen Scope + +Chosen: **Option A — Backend foundation only** (new `model_selection` slice: pair +availability, candidate comparison, ranking/confidence, persisted audit, optional +train/predict, chart-ready payload). It covers every backend gap the eventual UI needs, +reuses mature primitives, creates a stable testable contract, and avoids frontend coupling. + +Non-goals (out of scope for this PRP): + +- No React page / shadcn UI / frontend routing. +- No agent tool, no `agent_require_approval` entry, no agent mutation surface. +- No alias auto-promotion (the selector may *recommend* a winner; alias mutation is a future + approval-gated PRP). +- No batch model-zoo retrofit. Use direct services for the single selected pair. + +--- + +## Goal + +**Feature Goal:** A backend-only Forecast Champion Selector vertical slice that, given one +store/product pair + window + horizon + candidate models, validates data availability, runs +comparable backtests for every candidate, deterministically ranks completed candidates, +computes a recommendation confidence with reasons, persists an auditable selection run, and +returns chart-ready comparison data plus optional final-model training and forecast output. + +**Deliverable:** `app/features/model_selection/` slice (`models.py`, `schemas.py`, +`ranking.py`, `explanations.py`, `service.py`, `routes.py`, `tests/`) + one Alembic migration +creating `model_selection_run`, wired in `app/main.py`. + +**Success Definition:** `POST /model-selection/run` with the default five candidates against +a seeded pair returns HTTP 200 with a persisted `selection_id`, a non-empty deterministic +`ranking`, a `winner`, a `recommendation_confidence`, and a `chart_data` payload; the row is +retrievable by `GET /model-selection/{selection_id}`; all validation gates pass. + +## Why + +- Business users want to ask "which model should I use for this store/product?" without + manually coordinating `/backtesting/run`, `/forecasting/train`, `/forecasting/predict`. +- The UI needs **one stable backend contract** rather than re-implementing ranking in TypeScript. +- A persisted selection run makes the model choice auditable: which models competed, which + window, which policy, and why the winner won. +- Keeps the single-host architecture intact — no queue, no cloud SDK, no new service. + +## What + +### New endpoints (all under `APIRouter(prefix="/model-selection", tags=["model-selection"])`) + +```http +GET /model-selection/availability?store_id=...&product_id=...&forecast_horizon=14 +POST /model-selection/run +GET /model-selection/{selection_id} +GET /model-selection/{selection_id}/ranking +POST /model-selection/{selection_id}/train-winner +POST /model-selection/{selection_id}/predict +``` + +### Core request shape (`POST /model-selection/run`) + +```json +{ + "store_id": 1, + "product_id": 1, + "selection_window": { "start_date": "2026-01-01", "end_date": "2026-05-31" }, + "forecast_horizon": 14, + "ranking_metric": "wape", + "split_config": { "strategy": "expanding", "n_splits": 5, "min_train_size": 30, "gap": 0, "horizon": 14 }, + "candidate_models": [ + {"model_type": "naive", "params": {}}, + {"model_type": "seasonal_naive", "params": {"season_length": 7}}, + {"model_type": "moving_average", "params": {"window_size": 7}}, + {"model_type": "regression", "params": {}}, + {"model_type": "prophet_like", "params": {}} + ], + "feature_frame_version": 1, + "feature_groups": null, + "auto_train_winner": false, + "auto_predict": false +} +``` + +### LOCKED decisions (these remove every "choose one and test" ambiguity in the prior draft) + +1. **HTTP status codes:** `POST /model-selection/run` → **200** (synchronous, returns the + full result, mirrors `/backtesting/run` which is `status.HTTP_200_OK`). All GETs → 200. + `train-winner` / `predict` → 200. (201 is *not* used; the row is an audit side-effect, the + response is the computed result. Tests lock 200.) +2. **Availability gate:** if `availability.status == "unusable"`, **fail fast** — persist the + row as `status="failed"` with `error_message`, then raise `BadRequestError` (RFC 7807 **400**). + Nothing is ranked. +3. **All-candidates-fail (availability OK but every backtest errored):** **do NOT raise.** + Persist `status="failed"`, `ranking_result` with the failed entries, `winner=null`, and + return **200** with the failed-status response. Rationale: the run was validly attempted and + is an auditable outcome, not a client error. (Distinguish from #2: #2 is "we never started".) +4. **Per-candidate backtest config:** `BacktestConfig(split_config=req.split_config, + model_config_main=, include_baselines=False, store_fold_details=True)`. + `include_baselines=False` because each candidate is itself a `model_config_main` run — we do + not want N redundant baseline runs. `store_fold_details=True` so fold chart data is populated. +5. **`split_config.horizon` MUST equal `forecast_horizon`** (model-validator on the request). + The window dates from `selection_window` become `run_backtest`'s `start_date`/`end_date`. +6. **Ranking determinism:** primary = `ranking_metric` (default `"wape"`), then the fixed + tie-break chain `wape → smape → abs(bias) → mae → model_type`. With the default, the sort key + is exactly `(wape, smape, abs(bias), mae, model_type)` (success-criteria order). A non-default + `ranking_metric` puts that metric first, remaining chain follows excluding the duplicate. +7. **`auto_predict=True` requires `auto_train_winner=True`** (request model-validator) — predict + needs a freshly trained `final_model.model_path` from this run. + +### Success Criteria + +- [ ] `app/features/model_selection/` slice exists and is wired in `app/main.py`. +- [ ] `POST /model-selection/run` with the default five candidates returns a persisted + `status="completed"` (or `"partial"`) selection with `winner`, `ranking`, confidence, and `chart_data`. +- [ ] `GET /model-selection/availability` returns: `first_sales_date`, `last_sales_date`, + `observed_days`, `expected_calendar_days`, `coverage_ratio`, `missing_days`, + `zero_sale_days`, `promotion_days` (or `null` + warning), `average_daily_demand`, + `status` ∈ `{ready, limited, unusable}`, and `recommended_split_config`. +- [ ] Ranking is deterministic per LOCKED decision #6. +- [ ] Partial success supported (LOCKED #3): failed candidates appear in `ranking` with error + detail and are excluded from winner selection; a valid candidate still wins. +- [ ] `auto_train_winner=True` stores `final_model.model_path` via the **direct** + `ForecastingService.train_model`, preserving `feature_frame_version` + `feature_groups`. +- [ ] `auto_predict=True` (with train) returns forecast points + total/average demand summary. +- [ ] New migration creates `model_selection_run` with JSONB snapshots and named indexes; + `downgrade` drops indexes then table cleanly. +- [ ] `app/core/tests/test_strict_mode_policy.py` stays green for all new strict request schemas. +- [ ] No agent tools / `agent_require_approval` entries; no frontend files; no cloud SDK. + +## All Needed Context + +### Documentation & References + +```yaml +# PRP conventions +- file: PRPs/templates/prp_base.md + why: Base template (Goal/Context/Blueprint/Validation). NOTE — the user referenced a + "PRPs/prp-readme.md.md"; it does NOT exist (`find PRPs -iname '*readme*'` empty on 2026-06-01). +- file: PRPs/PRP-33-batch-runner-mvp.md + why: Strongest backend vertical-slice precedent — migration assertions, strict-mode gotchas, + route/test detail. Mirror its structure. +- file: PRPs/PRP-28-forecast-explainability-driver-attribution.md + why: Read/composition-slice precedent consuming existing contracts; deterministic explanation layer. +- docfile: PRPs/ai_docs/forecast-champion-selector-backend-research.md + why: External-lib + runtime verification (FastAPI APIRouter, Pydantic strict, JSONB, Alembic + create_index, sklearn TimeSeriesSplit). Versions: pydantic 2.12.5, sqlalchemy 2.0.46, + sklearn 1.8.0, fastapi 0.128.0, alembic 1.18.4. + +# Verified service contracts to reuse (DO NOT re-derive — exact signatures below in Gotchas) +- file: app/features/backtesting/service.py + why: BacktestingService().run_backtest(db, store_id, product_id, start_date, end_date, config). :213 +- file: app/features/backtesting/schemas.py + why: SplitConfig :24, BacktestConfig :81, BacktestResponse :257, ModelBacktestResult :180, + FoldResult :147. aggregated_metrics keys = {mae,rmse,smape,wape,bias}. +- file: app/features/backtesting/routes.py + why: EXACT route error-mapping pattern to mirror (try/except ValueError->BadRequestError, + SQLAlchemyError->DatabaseError; service instantiated as BacktestingService()). :60-140 +- file: app/features/forecasting/service.py + why: ForecastingService().train_model :247 (db first; feature_frame_version/feature_groups + keyword-only after *), predict :402 (NO db). Lazy cross-slice import precedent :55-61, :967. +- file: app/features/forecasting/schemas.py + why: ModelConfig union :417-429 (flat members, model_type discriminator, NO module-level helper); + TrainResponse.model_path :540; PredictResponse.forecasts :605; ForecastPoint :574. +- file: app/features/data_platform/models.py + why: Store :40 (business key `code`, not store_code), Product :68 (`sku`, `launch_date`), + SalesDaily :172 (date/store_id/product_id/quantity/unit_price/total_amount), Promotion :274. +- file: app/features/ops/service.py + why: Read-only cross-slice ORM aggregation precedent — module-scope ORM-model imports, stateless + service, db: AsyncSession per method, func.min/max/count/sum + group_by style. :225, :456. +- file: app/features/analytics/routes.py + why: validate_date_range :36 (raises BadRequestError, inverted-range + 730-day-max). CANNOT be + cross-slice imported — reimplement the two checks locally raising BadRequestError. +- file: app/core/exceptions.py + why: BadRequestError(400) :152, NotFoundError(404) :64, DatabaseError(500) :108, + ConflictError(409) :130, UnprocessableEntityError(422) :174. Each: (message=..., details=None). +- file: app/core/problem_details.py + why: RFC 7807 envelope; never raise bare HTTPException with raw strings. +- file: app/core/config.py + why: get_settings() cached singleton :225; Settings(BaseSettings) :62; add a plain typed attr + with literal default; env var = UPPER_SNAKE of the field name. +- file: app/core/database.py + why: Base (ORM declarative base) + get_db dependency used by routes/tests. +- file: app/shared/models.py + why: TimestampMixin (created_at/updated_at, server_default func.now(), updated_at onupdate). Mix in first. +- file: app/main.py + why: Router wiring — `from app.features..routes import router as _router` (:18-26), + `app.include_router(_router)` with NO prefix at include (:137-155), inside create_app(). +- file: app/core/tests/test_strict_mode_policy.py + why: AST policy — scans app/features/*/schemas.py; any ConfigDict(strict=True) model field typed + date/datetime/time/UUID/Decimal (anywhere in the annotation) MUST carry Field(strict=False, ...). + +# Migration / test patterns +- file: alembic/versions/c1d2e3f40512_create_batch_tables.py + why: JSONB via `from sqlalchemy.dialects import postgresql` -> postgresql.JSONB(astext_type=sa.Text()); + named CheckConstraint; op.create_index (op.f for single-col, explicit name for composite); + sa.DateTime(timezone=True) server_default sa.text("now()"); downgrade drops indexes THEN table. +- file: app/features/batch/models.py + why: ORM JSONB via `from sqlalchemy.dialects.postgresql import JSONB` (bare); Mapped[]+mapped_column; + status as String + default=Enum.PENDING.value + CheckConstraint in __table_args__; TimestampMixin. +- file: app/features/batch/schemas.py + why: Strict request pattern — ConfigDict(strict=True), Literal[...] for JSON enums, Field(strict=False) + on date fields (:132-133), @model_validator cross-field checks. +- file: app/features/explainability/tests/test_routes.py + why: ASGITransport + AsyncClient + app.dependency_overrides[get_db]; RFC 7807 4-key body assert; async tests. +- file: app/features/explainability/tests/conftest.py + why: Integration fixture — real engine from get_settings().database_url, prefix-scoped teardown in finally. + +# External official docs (verified in research doc) +- url: https://fastapi.tiangolo.com/tutorial/bigger-applications/ + why: APIRouter prefix/tags multi-file pattern. +- url: https://pydantic.dev/docs/validation/latest/concepts/strict_mode/ + why: strict mode + field-level Field(strict=False) override (runtime-verified, pydantic 2.12.5). +- url: https://docs.sqlalchemy.org/en/20/dialects/postgresql.html#json-types + why: JSONB column type for audit snapshots. +- url: https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.create_index + why: create_index signature (alembic 1.18.4: index_name, table_name, columns, *, unique, **kw). +- url: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + why: split semantics (sklearn 1.8.0 signature: n_splits, *, max_train_size, test_size, gap). +``` + +### Current Codebase Tree (relevant slices) + +```bash +app/features/ +├── analytics/ # KPI/drilldown/timeseries; validate_date_range lives in routes.py (slice-local) +├── backtesting/ # single-pair single-model backtesting; fold/chart data via store_fold_details +├── batch/ # batch fan-out; pinned 5-key metrics; NO fold chart data +├── data_platform/ # shared ORM: Store, Product, SalesDaily, Promotion, InventorySnapshotDaily, ... +├── dimensions/ # store/product discovery +├── forecasting/ # direct train/predict; full ModelConfig union +├── jobs/ # train/predict/backtest job orchestration +├── ops/ # read-only cross-slice ORM aggregation precedent (OpsService) +└── registry/ # model runs, aliases, compare, artifact verify +alembic/versions/ # current head: c1d2e3f40512 (create_batch_tables) +``` + +### Desired Codebase Tree + +```bash +app/features/model_selection/ +├── __init__.py +├── models.py # ModelSelectionRun ORM + ModelSelectionStatus enum +├── schemas.py # strict request models + response models +├── ranking.py # PURE: normalize metrics, filter, rank, confidence +├── explanations.py # PURE: deterministic business summary + confidence_reasons +├── service.py # ModelSelectionService: availability + orchestration (lazy cross-slice imports) +├── routes.py # APIRouter(prefix="/model-selection") +└── tests/ + ├── __init__.py + ├── conftest.py + ├── test_models.py + ├── test_schemas.py + ├── test_ranking.py + ├── test_explanations.py + ├── test_service.py + ├── test_routes.py + └── test_routes_integration.py +alembic/versions/_create_model_selection_run.py +``` + +### Known Gotchas & VERIFIED Library/Internal Contracts + +```python +# ── VERIFIED INTERNAL SIGNATURES (exact, read 2026-06-01) ───────────────────── +# BacktestingService.__init__(self) -> None # takes NO db; instantiate as BacktestingService() +# await BacktestingService().run_backtest( +# db, store_id, product_id, start_date, end_date, config: BacktestConfig +# ) -> BacktestResponse # service.py:213 ; db is FIRST arg +# +# ForecastingService.__init__(self) -> None +# await ForecastingService().train_model( +# db, store_id, product_id, train_start_date, train_end_date, config: ModelConfig, +# *, feature_frame_version: int = 1, feature_groups: list[str] | None = None +# ) -> TrainResponse # service.py:247 ; .model_path is the artifact path +# await ForecastingService().predict( +# store_id, product_id, horizon, model_path # NO db arg — loads bundle from disk +# ) -> PredictResponse # service.py:402 ; .forecasts: list[ForecastPoint] +# # ForecastPoint: {date, forecast, lower_bound?, upper_bound?} +# GOTCHA: predict() REJECTS feature-aware models (service.py:491). For a feature-aware winner, +# auto_predict may raise; catch and surface a warning rather than failing the whole run. + +# ── METRIC KEYS — CORRECTED (draft was incomplete) ──────────────────────────── +# BacktestResponse.main_model_results.aggregated_metrics has FIVE keys: +# {"mae", "rmse", "smape", "wape", "bias"} # metrics.py:347 — draft MISSED "rmse" +# metric_std keys are SUFFIXED "{name}_stability" (a coefficient of variation, NOT raw std). +# sample_size is NOT in aggregated_metrics — derive it from fold actuals length +# (sum of len(fold.actuals) across fold_results) or n_folds; normalize in ranking.py. +# Fold chart data path: BacktestResponse.main_model_results.fold_results[i].{dates, actuals, predictions} +# populated ONLY when config.store_fold_details=True (LOCKED #4 sets it True). +# bucketed_aggregated_metrics lives on each ModelBacktestResult (optional, may be None). + +# ── ModelConfig CONSTRUCTION — members are FLAT, no nested "params" ──────────── +# The request uses {"model_type": "seasonal_naive", "params": {"season_length": 7}} but the +# ModelConfig members are FLAT (SeasonalNaiveModelConfig has model_type + season_length at top +# level). There is NO module-level TypeAdapter/helper. Build at the service boundary by FLATTENING: +# from pydantic import TypeAdapter +# from app.features.forecasting.schemas import ModelConfig +# _MODEL_CONFIG_ADAPTER = TypeAdapter(ModelConfig) +# cfg = _MODEL_CONFIG_ADAPTER.validate_python({"model_type": c.model_type, **c.params}) +# Members are frozen + extra="forbid", so unknown params raise a ValidationError (good — surfaces +# bad candidate params as a failed candidate with a reason). Do this import LAZILY in-method. +# Valid model_type values (full union, forecasting/schemas.py:417): naive, seasonal_naive, +# moving_average, weighted_moving_average, seasonal_average, trend_regression_baseline, +# random_forest, lightgbm, xgboost, regression, prophet_like. +# (lightgbm/xgboost are opt-in extras — may ImportError at runtime; treat as a failed candidate.) + +# ── CROSS-SLICE IMPORT RULE ─────────────────────────────────────────────────── +# Vertical-slice rule: app/features/X must not import app/features/Y at MODULE scope when it +# would close an alembic cold-boot cycle. model_selection is a NEW leaf (nothing imports it), but +# to match the BatchService/forecasting precedent and stay safe, import the SERVICE CLASSES +# (BacktestingService, ForecastingService) and the ModelConfig TypeAdapter LAZILY inside the +# methods that use them. Read ORM models (Store/Product/SalesDaily/Promotion) at module scope — +# that mirrors OpsService and is the sanctioned read-only ORM surface. + +# ── validate_date_range IS NOT IMPORTABLE ───────────────────────────────────── +# It lives in app/features/analytics/routes.py (slice-local). Reimplement the two checks locally +# (inverted range; max-span) raising app.core.exceptions.BadRequestError, OR rely on schema +# validators. Do NOT import across the slice boundary. +# NOTE: analytics' max-span is settings.analytics_max_date_range_days (configurable, ~730), not a +# hardcoded constant — pick your own local bound (or reuse the setting) when reimplementing. + +# ── STRICT-MODE POLICY (app/core/tests/test_strict_mode_policy.py) ──────────── +# Every request model with model_config = ConfigDict(strict=True) MUST add Field(strict=False, ...) +# to EVERY field typed date|datetime|time|UUID|Decimal (incl. inside Optional/Annotated/list/dict). +# Use Literal[...] for JSON enum strings (NOT a str-Enum — strict won't coerce). The AST walker does +# NOT follow inheritance, so set ConfigDict(strict=True) on each concrete request model directly. + +# ── ORM / MIGRATION QUIRKS ──────────────────────────────────────────────────── +# JSONB import DIFFERS by layer: +# migration: from sqlalchemy.dialects import postgresql -> postgresql.JSONB(astext_type=sa.Text()) +# ORM: from sqlalchemy.dialects.postgresql import JSONB -> mapped_column(JSONB) +# Status enum enforced via CheckConstraint("status IN (...)", name="ck_...") in BOTH migration and +# ORM __table_args__; ORM column is String(N) with default=ModelSelectionStatus.PENDING.value. +# created_at/updated_at come from TimestampMixin (app/shared/models.py) — declare class as +# `class ModelSelectionRun(TimestampMixin, Base)` (mixin FIRST). Declare completed_at explicitly. +# Migration down_revision: chain to the CURRENT head at implementation time (observed c1d2e3f40512); +# run `uv run alembic heads` to confirm — do NOT hardcode this PRP's observed value blindly. + +# ── DATA-PLATFORM COLUMN NAMES (availability aggregation) ───────────────────── +# Store.id (int PK), Store.code (business key). Product.id, Product.sku, Product.launch_date (date|None). +# SalesDaily: .date (Date FK calendar.date), .store_id, .product_id, .quantity (Integer, CHECK >=0), +# .unit_price (Numeric), .total_amount (Numeric). Grain unique (date, store_id, product_id). +# => For ONE pair: count(distinct date) == count(*); zero_sale_days = count where quantity == 0. +# Promotion: per-product (product_id NOT NULL), store_id NULLABLE (NULL = CHAIN-WIDE, applies to all +# stores), date RANGE [start_date, end_date], kind in {pct_off,bogo,bundle,markdown}. To count +# promotion_days for (store, product) within the window, JOIN promotion to the pair's sales dates +# ON sd.date BETWEEN p.start_date AND p.end_date AND p.product_id=? AND (p.store_id=? OR p.store_id IS NULL), +# then COUNT(DISTINCT sd.date). If this proves complex/edge-casey, return promotion_days=None with a +# warning string (acceptable per Success Criteria) — do NOT sum (end-start) per row (double-counts overlaps). + +# ── RUNTIME-VERIFIED LIBRARY FACTS (research doc) ───────────────────────────── +# Pydantic 2.12.5 accepts Field(strict=False) date string under a strict model. sklearn 1.8.0 +# TimeSeriesSplit(n_splits, *, max_train_size, test_size, gap). FastAPI 0.128.0 APIRouter(prefix=...). +# Alembic 1.18.4 Operations.create_index(index_name, table_name, columns, *, unique, **kw). +``` + +## Implementation Blueprint + +### Data Models and Schemas + +`app/features/model_selection/models.py`: + +```python +from datetime import date, datetime +from enum import Enum +from typing import Any + +from sqlalchemy import CheckConstraint, Date, DateTime, Index, Integer, String +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from app.core.database import Base +from app.shared.models import TimestampMixin + + +class ModelSelectionStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + PARTIAL = "partial" + FAILED = "failed" + + +class ModelSelectionRun(TimestampMixin, Base): # TimestampMixin FIRST → created_at/updated_at + __tablename__ = "model_selection_run" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + selection_id: Mapped[str] = mapped_column(String(32), unique=True, index=True) + store_id: Mapped[int] = mapped_column(Integer, index=True) + product_id: Mapped[int] = mapped_column(Integer, index=True) + start_date: Mapped[date] = mapped_column(Date) + end_date: Mapped[date] = mapped_column(Date) + forecast_horizon: Mapped[int] = mapped_column(Integer) + ranking_metric: Mapped[str] = mapped_column(String(20)) + status: Mapped[str] = mapped_column(String(20), default=ModelSelectionStatus.PENDING.value, index=True) + candidate_models: Mapped[list[dict[str, Any]]] = mapped_column(JSONB) + policy_snapshot: Mapped[dict[str, Any]] = mapped_column(JSONB) + availability_snapshot: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + ranking_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + winner_model_type: Mapped[str | None] = mapped_column(String(40), nullable=True) + winner_metrics: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + final_model_path: Mapped[str | None] = mapped_column(String(512), nullable=True) + forecast_result: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + business_summary: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True) + error_message: Mapped[str | None] = mapped_column(String(2000), nullable=True) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + + __table_args__ = ( + CheckConstraint( + "status IN ('pending','running','completed','partial','failed')", + name="ck_model_selection_run_valid_status", + ), + Index("ix_model_selection_run_store_product_created", "store_id", "product_id", "created_at"), + Index("ix_model_selection_run_status_created", "status", "created_at"), + ) +``` + +`app/features/model_selection/schemas.py` — strict request models + response models: + +- `SelectionWindow(start_date, end_date)` — `ConfigDict(strict=True)`, both dates `Field(strict=False, ...)`. +- `CandidateModelConfig(model_type: Literal[<11 model_types>], params: dict[str, Any] = {})`. +- `RankingPolicy(minimum_sample_size: int = 0, high_confidence_rel_improvement: float = 0.10, + max_acceptable_abs_bias: float = ...)` — defaults; snapshotted into `policy_snapshot`. +- `ModelSelectionRunRequest` — `ConfigDict(strict=True)`; fields: `store_id`, `product_id`, + `selection_window`, `forecast_horizon` (int, ge=1, le=90), `ranking_metric: Literal["wape","smape","mae","bias"]="wape"`, + `split_config: SplitConfig` (reuse backtesting's? — see NOTE), `candidate_models: list` (min_length=1, max_length=10), + `feature_frame_version: int = 1` (ge=1, le=2), `feature_groups: list[str] | None = None`, + `ranking_policy: RankingPolicy = Field(default_factory=RankingPolicy)`, + `auto_train_winner: bool = False`, `auto_predict: bool = False`. + - `@model_validator(mode="after")`: `split_config.horizon == forecast_horizon` (LOCKED #5); + `auto_predict implies auto_train_winner` (LOCKED #7). + - NOTE on `split_config`: `backtesting.schemas.SplitConfig` is `frozen=True, extra="forbid"` + (NOT strict). Either (a) reuse it directly (import lazily is unnecessary for a schema type — + it's safe at module scope since backtesting.schemas has no cycle back to model_selection), or + (b) define a local `SplitSettings` mirror. **Prefer reusing `SplitConfig`** to avoid drift; it + already validates n_splits/min_train_size/gap/horizon. Since it is not strict-mode, its `date`-free + fields don't trip the strict-mode linter. +**Response + intermediate models (plain `BaseModel` — outputs don't need `strict=True`). These +fields ARE the stable contract the UI consumes; specify them exactly, do not improvise.** + +```python +# ── intermediate (service-internal, also embedded in JSONB) ─────────────────── +class CandidateResult(BaseModel): # what shape_candidate()/shape_failed_candidate() return + model_type: str + params: dict[str, Any] # ORIGINAL candidate params — REQUIRED so the winner can be rebuilt (pseudocode L667) + failed: bool + error: str | None = None # reason when failed=True + aggregated_metrics: dict[str, float] | None = None # raw 5-key dict from backtest (mae,rmse,smape,wape,bias) or None + sample_size: int = 0 # RULE: sum(len(fold.actuals)) across main_model_results.fold_results + config_hash: str | None = None + folds: list[FoldChart] = [] # per-fold chart points (empty when failed) + +class FoldChart(BaseModel): + fold_index: int + dates: list[date] + actuals: list[float] + predictions: list[float] + +class ModelRankEntry(BaseModel): # one row in the ranking table (valid OR excluded) + rank: int | None # 1-based; None when excluded/failed + model_type: str + params: dict[str, Any] # carried through (see CandidateResult.params) + included: bool # False = failed or filtered out + exclusion_reason: str | None = None + metrics: dict[str, float] | None = None # normalized {wape,smape,mae,rmse,bias,sample_size} + +class RankingResult(BaseModel): # Pydantic (model_dump'd into ranking_result JSONB, L663) + winner: ModelRankEntry | None + entries: list[ModelRankEntry] # ALL candidates, ranked-then-failed, never hidden + confidence: Literal["high", "medium", "low"] + reasons: list[str] + +class WinnerSummary(BaseModel): + model_type: str + params: dict[str, Any] + metrics: dict[str, float] # normalized winner metrics + rank: int # always 1 + +class ChartData(BaseModel): # chart-ready comparison payload (Success Criteria deliverable) + wape_by_model: dict[str, float] # {model_type: wape} → WAPE bar chart + bias_by_model: dict[str, float] # {model_type: bias} → bias chart + fold_stability: dict[str, list[float]] # {model_type: per-fold wape} → stability lines + winner_actual_vs_predicted: list[FoldChart] # the WINNER's folds only → actual-vs-predicted overlay + +class PairAvailabilityResponse(BaseModel): + store_id: int + product_id: int + first_sales_date: date | None + last_sales_date: date | None + observed_days: int + expected_calendar_days: int + coverage_ratio: float + missing_days: int + zero_sale_days: int + promotion_days: int | None # None + a warning when not safely derivable + average_daily_demand: float # CAST float(...) — func.avg over Integer quantity returns Decimal + status: Literal["ready", "limited", "unusable"] + recommended_split_config: SplitConfig # reuse backtesting.schemas.SplitConfig + warnings: list[str] = [] + +class ForecastSummary(BaseModel): + points: list[dict[str, Any]] # ForecastPoint.model_dump(mode="json") list + total_demand: float + average_demand: float + horizon: int + +class ModelSelectionRunResponse(BaseModel): # THE /run + /{id} contract + selection_id: str + store_id: int + product_id: int + status: Literal["pending", "running", "completed", "partial", "failed"] + selection_window: SelectionWindow + forecast_horizon: int + ranking_metric: str + availability: PairAvailabilityResponse | None + ranking: list[ModelRankEntry] # == RankingResult.entries + winner: WinnerSummary | None + recommendation_confidence: Literal["high", "medium", "low"] | None # CANONICAL KEY (maps from RankingResult.confidence) + confidence_reasons: list[str] # == RankingResult.reasons + chart_data: ChartData | None + final_model: dict[str, Any] | None # {"model_path": ...} when auto_train_winner + forecast: ForecastSummary | None # when auto_predict + business_summary: dict[str, Any] | None + error_message: str | None + created_at: datetime + completed_at: datetime | None + +class TrainWinnerResponse(BaseModel): + selection_id: str + model_type: str + model_path: str + +class PredictWinnerResponse(BaseModel): + selection_id: str + forecast: ForecastSummary +``` + +> **NAMING (resolves the only internal-consistency nit):** the response key is +> **`recommendation_confidence`** (Success Criteria + manual probe + Goal all use it). +> `RankingResult.confidence` is the service-internal field; `_response()` maps +> `RankingResult.confidence → ModelSelectionRunResponse.recommendation_confidence` and +> `RankingResult.reasons → confidence_reasons`. Tests assert the response key +> `recommendation_confidence`. + +> **`self._response(row, ranking)` helper:** pure mapping `ModelSelectionRun` ORM row + +> `RankingResult` → `ModelSelectionRunResponse` (rehydrate `availability_snapshot`/`ranking_result`/ +> `business_summary`/`forecast_result` JSONB back into the response models; build `chart_data` from +> the per-candidate `CandidateResult.folds` + normalized metrics; map the confidence keys per above). + +### Implementation Tasks (dependency-ordered) + +```yaml +Task 1 — Migration + ORM: + RUN: uv run alembic heads # confirm current head (observed c1d2e3f40512) + CREATE alembic/versions/_create_model_selection_run.py: + - down_revision = "" + - MIRROR alembic/versions/c1d2e3f40512_create_batch_tables.py exactly: + - from sqlalchemy.dialects import postgresql -> postgresql.JSONB(astext_type=sa.Text()) + - sa.DateTime(timezone=True), server_default=sa.text("now()") for created_at/updated_at + - CheckConstraint name="ck_model_selection_run_valid_status" + - op.create_index(op.f("ix_model_selection_run_selection_id"), ..., unique=True) + - op.create_index("ix_model_selection_run_store_product_created", ..., ["store_id","product_id","created_at"]) + - op.create_index("ix_model_selection_run_status_created", ..., ["status","created_at"]) + - downgrade(): drop indexes (reverse order) THEN op.drop_table("model_selection_run") + CREATE app/features/model_selection/models.py: # as blueprint above; mirror batch/models.py + +Task 2 — Schemas: + CREATE app/features/model_selection/schemas.py: + - all REQUEST models ConfigDict(strict=True); date fields Field(strict=False, ...) + - Literal[...] for model_type + ranking_metric (NOT str-Enum) + - candidate_models min_length=1 max_length=10 (or settings.model_selection_max_candidates) + - @model_validator: horizon match (LOCKED #5) + auto_predict implies auto_train_winner (LOCKED #7) + - reuse backtesting.schemas.SplitConfig (module-scope import OK; no cycle) + +Task 3 — Ranking pure logic: + CREATE app/features/model_selection/ranking.py: + - NormalizedMetrics dataclass {wape, smape, mae, rmse, bias, sample_size} + - normalize_metrics(aggregated_metrics, sample_size) -> NormalizedMetrics | None + (None when the primary metric is missing OR NaN — use math.isnan guard; np.nan can appear, + metrics.py:381; keys are mae/rmse/smape/wape/bias) + - input: list[CandidateResult] (Task-2 schema). Each entry CARRIES model_type + params through to + ModelRankEntry/WinnerSummary so the winner can be rebuilt (pseudocode L667 reads winner.params). + - filter: not failed AND numeric primary metric AND sample_size >= policy.minimum_sample_size + - rank key (default ranking_metric="wape"): (wape, smape, abs(bias), mae, model_type) [LOCKED #6] + - confidence (PIN the rel-improvement formula — denominator is the SECOND-place value): + rel_improvement = (second.wape - winner.wape) / second.wape # guard second.wape == 0 → treat as 0.0 + HIGH : >=2 valid AND rel_improvement >= policy.high_confidence_rel_improvement (default 0.10) + AND abs(winner.bias) <= policy.max_acceptable_abs_bias AND winner.sample_size sufficient + MEDIUM: a valid winner exists but HIGH not met (narrow lead OR mild warnings) and >=2 valid + LOW : exactly one valid candidate, OR availability "limited", OR abs(bias) over threshold, + OR rel_improvement < some near-tie epsilon (document the epsilon as a module constant) + - emit human-readable reasons[] strings explaining the chosen level (consumed as confidence_reasons) + - return RankingResult(winner, entries[ALL ranked-then-failed, never hidden], confidence, reasons) + +Task 4 — Business explanation pure logic: + CREATE app/features/model_selection/explanations.py: + - explain_winner(ranking, availability) -> business_summary dict + confidence_reasons + warnings + - translate WAPE/sMAPE/MAE/bias into short deterministic English; NO LLM, NO external call + +Task 5 — Pair availability: + CREATE ModelSelectionService.get_availability(db, store_id, product_id, forecast_horizon, split_config?) -> PairAvailabilityResponse: + - verify Store and Product exist (NotFoundError if absent) via data_platform ORM (module-scope import OK) + - aggregate SalesDaily for the pair (SQLAlchemy 2.0 async, mirror OpsService style): + select(func.min(SalesDaily.date), func.max(SalesDaily.date), + func.count(func.distinct(SalesDaily.date)), func.sum(SalesDaily.quantity), + func.avg(SalesDaily.quantity), + func.count().filter(SalesDaily.quantity == 0)) # FILTER aggregate; valid async idiom + .where(SalesDaily.store_id == store_id, SalesDaily.product_id == product_id) + # CAST: func.avg over Integer quantity returns Decimal; wrap average_daily_demand in float(...). + # func.count().filter(...) is a Postgres FILTER aggregate (not shown in OpsService, but supported); + # alternatively a second scalar count with .where(quantity == 0). One round-trip is fine. + - expected_calendar_days = (max_date - min_date).days + 1 + - coverage_ratio = observed_days / expected_calendar_days (guard div-by-zero / no rows) + - missing_days = expected_calendar_days - observed_days + - promotion_days: JOIN promotion ON date BETWEEN start/end AND product_id match AND + (store_id == X OR store_id IS NULL); COUNT(DISTINCT date). On any doubt → None + warning. + - status (LOCKED thresholds): + ready if observed_days >= min_train_size + horizon*n_splits AND coverage_ratio >= 0.8 + limited if observed_days >= min_train_size + horizon + unusable otherwise + - recommended_split_config: expanding, n_splits=min(5, feasible), min_train_size=30 (or adjusted), + gap=0, horizon=forecast_horizon + - NO rows for the pair -> status="unusable" with zeros/None and a warning + +Task 6 — Orchestration: + CREATE ModelSelectionService.run_selection(db, request) -> ModelSelectionRunResponse: + - persist ModelSelectionRun(selection_id=uuid4().hex, status="running", snapshots); flush + - availability = get_availability(...); persist snapshot + - if availability.status == "unusable": status="failed", error_message, flush, raise BadRequestError [LOCKED #2] + - for each candidate (LAZY import services + ModelConfig adapter): + try: cfg = flatten+validate ModelConfig; bt = await BacktestingService().run_backtest( + db, store_id, product_id, window.start, window.end, + BacktestConfig(split_config=req.split_config, model_config_main=cfg, + include_baselines=False, store_fold_details=True)) + collect aggregated_metrics, sample_size, fold dates/actuals/predictions for chart + except Exception as exc: append failed entry with reason=str(exc) [never hide — Anti-Patterns] + - ranking = rank_candidates(results, req.ranking_policy, req.ranking_metric) + - if ranking.winner is None: status="failed", persist ranking_result, flush, RETURN 200 response [LOCKED #3] + - if req.auto_train_winner: + train = await ForecastingService().train_model(db, store_id, product_id, window.start, window.end, + winner_cfg, feature_frame_version=req.feature_frame_version, feature_groups=req.feature_groups) + row.final_model_path = train.model_path + - if req.auto_predict: # requires auto_train_winner (validated) + try: pred = await ForecastingService().predict(store_id, product_id, req.forecast_horizon, row.final_model_path) + row.forecast_result = pred.model_dump(mode="json") + except : warning, leave forecast_result None + - business_summary = explain_winner(ranking, availability) + - status = "partial" if any candidate failed else "completed"; completed_at = datetime.now(UTC) + - persist all JSONB via model_dump(mode="json"); flush + refresh; return response_from_row(row) + ADD methods: get_selection(db, selection_id)->row|NotFoundError ; get_ranking ; train_winner ; predict_winner + +Task 7 — Routes: + CREATE app/features/model_selection/routes.py: + - router = APIRouter(prefix="/model-selection", tags=["model-selection"]) + - GET /availability ; POST /run (200) ; GET /{selection_id} ; GET /{selection_id}/ranking ; + POST /{selection_id}/train-winner ; POST /{selection_id}/predict + - MIRROR backtesting/routes.py error mapping EXACTLY: + service instantiated locally; try/except ValueError->BadRequestError(str(e)), + SQLAlchemyError->DatabaseError("...", details={"error": str(e)}); NotFoundError from service bubbles. + - structured logger.info events (see Integration Points) + MODIFY app/main.py: + - `from app.features.model_selection.routes import router as model_selection_router` (alpha order with siblings) + - `app.include_router(model_selection_router)` inside create_app(), near backtesting/forecasting (NO prefix arg) + +Task 8 — Tests (see Validation Loop for required names): + CREATE app/features/model_selection/tests/{conftest,test_models,test_schemas,test_ranking, + test_explanations,test_service,test_routes,test_routes_integration}.py + - unit route tests: ASGITransport + app.dependency_overrides[get_db]=AsyncMock; 4-key RFC7807 assert + - service tests: mock BacktestingService/ForecastingService (patch the lazy import targets) for + happy/partial/all-fail/auto-train/auto-predict paths + - integration tests (@pytest.mark.integration): real engine, prefix-scoped teardown in finally +``` + +### Pseudocode (CRITICAL details only) + +```python +# ranking.py — deterministic, pure +def rank_candidates(results, policy, ranking_metric="wape"): + valid, failed = [], [] + for r in results: + m = normalize_metrics(r.aggregated_metrics, r.sample_size) # keys: mae,rmse,smape,wape,bias + if m is None or m.sample_size < policy.minimum_sample_size: + failed.append(r.as_failed("missing/NaN primary metric or sample_size below minimum")) + continue + valid.append((r, m)) + if not valid: + return RankingResult(winner=None, entries=failed, confidence="low", reasons=["no valid candidate"]) + primary = lambda m: getattr(m, ranking_metric) if ranking_metric != "bias" else abs(m.bias) + ordered = sorted(valid, key=lambda p: (primary(p[1]), p[1].smape, abs(p[1].bias), p[1].mae, p[0].model_type)) + winner = ordered[0] + return build_ranking_result(ordered, failed, policy) # computes confidence vs 2nd place +``` + +```python +# service.py — orchestration (exact verified service calls) +async def run_selection(self, db, req): + from pydantic import TypeAdapter # lazy + from app.features.backtesting.schemas import BacktestConfig # lazy + from app.features.backtesting.service import BacktestingService # lazy + from app.features.forecasting.schemas import ModelConfig # lazy + from app.features.forecasting.service import ForecastingService # lazy + adapter = TypeAdapter(ModelConfig) + + row = ModelSelectionRun(selection_id=uuid.uuid4().hex, status="running", + store_id=req.store_id, product_id=req.product_id, + start_date=req.selection_window.start_date, end_date=req.selection_window.end_date, + forecast_horizon=req.forecast_horizon, ranking_metric=req.ranking_metric, + candidate_models=[c.model_dump() for c in req.candidate_models], + policy_snapshot=req.ranking_policy.model_dump(mode="json")) + db.add(row); await db.flush() + + availability = await self.get_availability(db, req.store_id, req.product_id, req.forecast_horizon, req.split_config) + row.availability_snapshot = availability.model_dump(mode="json") + if availability.status == "unusable": + row.status = "failed"; row.error_message = "Insufficient data for model selection" + await db.flush(); raise BadRequestError(message=row.error_message) # LOCKED #2 + + results = [] + for c in req.candidate_models: + try: + cfg = adapter.validate_python({"model_type": c.model_type, **c.params}) # FLATTEN + bt = await BacktestingService().run_backtest( + db, req.store_id, req.product_id, + req.selection_window.start_date, req.selection_window.end_date, + BacktestConfig(split_config=req.split_config, model_config_main=cfg, + include_baselines=False, store_fold_details=True)) # LOCKED #4 + results.append(shape_candidate(c, bt)) + except Exception as exc: + results.append(shape_failed_candidate(c, exc)) + + ranking = rank_candidates(results, req.ranking_policy, req.ranking_metric) + row.ranking_result = ranking.model_dump(mode="json") + if ranking.winner is None: + row.status = "failed"; await db.flush(); return self._response(row, ranking) # LOCKED #3 (HTTP 200) + + winner_cfg = adapter.validate_python({"model_type": ranking.winner.model_type, **ranking.winner.params}) + if req.auto_train_winner: + train = await ForecastingService().train_model( + db, req.store_id, req.product_id, req.selection_window.start_date, req.selection_window.end_date, + winner_cfg, feature_frame_version=req.feature_frame_version, feature_groups=req.feature_groups) + row.final_model_path = train.model_path + if req.auto_predict and row.final_model_path: + try: + pred = await ForecastingService().predict(req.store_id, req.product_id, req.forecast_horizon, row.final_model_path) + row.forecast_result = pred.model_dump(mode="json") + except Exception as exc: # e.g. feature-aware reject (forecasting service.py:491) + row.forecast_result = None # surface a warning in business_summary + + row.winner_model_type = ranking.winner.model_type + row.winner_metrics = ranking.winner.metrics + row.business_summary = explain_winner(ranking, availability) + row.status = "partial" if any(r.failed for r in results) else "completed" + row.completed_at = datetime.now(UTC) + await db.flush(); await db.refresh(row) + return self._response(row, ranking) +``` + +### Integration Points + +```yaml +DATABASE: + - migration: add `model_selection_run` (JSONB snapshots: candidate_models, policy_snapshot, + availability_snapshot, ranking_result, winner_metrics, forecast_result, business_summary) + - indexes: ix_model_selection_run_selection_id (unique), ix_model_selection_run_store_product_created, + ix_model_selection_run_status_created +ROUTES: + - app/main.py: import + app.include_router(model_selection_router) (router carries its own prefix) +CONFIG (optional — only if used; then ADD to .env.example with UPPER_SNAKE + a comment, and a test): + - model_selection_max_candidates: int = 10 + - model_selection_min_coverage_ratio: float = 0.8 + - model_selection_default_min_train_size: int = 30 +OBSERVABILITY (structlog events, mirror ops/backtesting naming): + - model_selection.run_received / .availability_checked / .candidate_completed / + .candidate_failed / .run_completed / .run_failed +``` + +## Validation Loop + +### Level 1 — Focused syntax & policy + +```bash +uv run ruff check app/features/model_selection app/main.py alembic/versions +uv run ruff format --check app/features/model_selection app/main.py alembic/versions +uv run mypy app/features/model_selection app/main.py +uv run pyright app/features/model_selection app/main.py +uv run pytest app/core/tests/test_strict_mode_policy.py -v +``` + +### Level 2 — Focused unit tests + +```bash +uv run pytest app/features/model_selection/tests -v -m "not integration" +``` + +Required test names: + +- `test_schema_accepts_iso_dates_under_strict_model` (JSON path: `Model.model_validate({"start_date":"2026-01-01",...})`) +- `test_schema_rejects_auto_predict_without_train_winner` +- `test_schema_rejects_horizon_mismatch_between_split_and_forecast` +- `test_rank_candidates_wape_smape_abs_bias_mae_tie_break` +- `test_rank_candidates_excludes_missing_or_nan_metrics` +- `test_rank_candidates_normalizes_five_metric_keys_including_rmse` +- `test_confidence_high_when_winner_beats_second_by_10_percent` +- `test_availability_ready_limited_unusable_thresholds` +- `test_build_model_config_flattens_params` (e.g. seasonal_naive + {"season_length":7}) +- `test_run_selection_partial_success_chooses_valid_winner` +- `test_run_selection_all_candidates_fail_returns_failed_status_not_500` (LOCKED #3) +- `test_run_selection_unusable_availability_raises_bad_request` (LOCKED #2) +- `test_run_selection_auto_train_passes_feature_frame_version_and_groups` +- `test_routes_return_problem_json_on_bad_request` (4-key RFC 7807 body) +- `test_response_uses_recommendation_confidence_key` (NOT `confidence`; maps from `RankingResult.confidence`) +- `test_winner_entry_carries_params_for_rebuild` (`ModelRankEntry.params` / `WinnerSummary.params` preserved) +- `test_chart_data_has_wape_bias_fold_stability_and_winner_actual_vs_predicted` + +### Level 3 — Migration & integration + +```bash +docker compose up -d +uv run alembic upgrade head +uv run pytest app/features/model_selection/tests -v -m integration +uv run alembic downgrade -1 && uv run alembic upgrade head # downgrade/upgrade round-trips cleanly +``` + +Integration expectations: + +- `model_selection_run` exists with the three named indexes. +- `POST /model-selection/run` persists a row; `GET /model-selection/{selection_id}` returns the same id. +- Availability detects an inserted pair with enough history (`ready`) and a too-short pair (`limited`/`unusable`). +- Partial failure persists the failed candidate reason and still ranks a valid winner. + +### Level 4 — Full backend gates (must be green before PR) + +```bash +uv run ruff check . && uv run ruff format --check . +uv run mypy app/ && uv run pyright app/ +uv run pytest -v -m "not integration" +uv run pytest -v -m integration +``` + +> Known-local-noise: mypy/pyright report pre-existing `lightgbm`/`xgboost` optional-dep import +> errors in `forecasting/`+`registry/` (untouched here; CI installs the extras). Do not "fix" them. + +### Manual API probe (seeded DB; discover real store/product ids + date window first — IDs are +not guaranteed 1-based, see memory `seeder-does-not-reset-id-sequences`) + +```bash +uv run uvicorn app.main:app --port 8123 & +curl -s "http://localhost:8123/model-selection/availability?store_id=5&product_id=8&forecast_horizon=14" | python3 -m json.tool +curl -s -X POST http://localhost:8123/model-selection/run -H "Content-Type: application/json" -d '{ + "store_id": 5, "product_id": 8, + "selection_window": {"start_date": "2026-01-01", "end_date": "2026-05-31"}, + "forecast_horizon": 14, + "split_config": {"strategy":"expanding","n_splits":5,"min_train_size":30,"gap":0,"horizon":14}, + "candidate_models": [ + {"model_type":"naive","params":{}}, + {"model_type":"seasonal_naive","params":{"season_length":7}}, + {"model_type":"moving_average","params":{"window_size":7}}, + {"model_type":"regression","params":{}}, + {"model_type":"prophet_like","params":{}} + ], + "auto_train_winner": false, "auto_predict": false +}' | python3 -m json.tool +``` + +Expected: HTTP 200; response carries `selection_id`, non-empty `ranking`, `winner.model_type`, +`recommendation_confidence`, `chart_data`. + +## Final Validation Checklist + +- [ ] New slice follows `app/features//{models,schemas,service,routes,tests}.py`. +- [ ] Router wired in `app/main.py` (import alias + `include_router`, no prefix at include). +- [ ] Migration `down_revision` chains to the live head; downgrade drops indexes then table. +- [ ] Request schemas use `ConfigDict(strict=True)` + `Field(strict=False)` for every date field; strict-mode test green. +- [ ] All 4xx responses use project exceptions (`BadRequestError`/`NotFoundError`/`DatabaseError`) → RFC 7807. +- [ ] Ranking + explanation logic is pure and unit-tested; normalizer handles all five metric keys incl. `rmse`. +- [ ] Availability covered for ready/limited/unusable + no-rows. +- [ ] `auto_train_winner` uses direct `ForecastingService.train_model` (db first, feature args keyword-only). +- [ ] `auto_predict` handles feature-aware-reject gracefully (warning, not 500). +- [ ] LOCKED decisions #1–#7 are implemented and tested. +- [ ] No frontend files, no agent mutation surface, no managed-cloud SDK. +- [ ] All four Level-4 gates pass; `gh issue view ` confirms the referenced issue is open. + +## Anti-Patterns to Avoid + +- Don't implement the React UI; don't rank models in TypeScript — backend owns ranking/confidence. +- Don't use batch item metrics for fold-level chart data (batch has none) — use direct `BacktestingService` with `store_fold_details=True`. +- Don't import sibling feature *services* at module scope — lazy in-method (matches forecasting/BatchService precedent). ORM *models* at module scope is fine (OpsService precedent). +- Don't import `validate_date_range` from analytics — reimplement locally. +- Don't pass the candidate `params` as a nested dict to `ModelConfig` — FLATTEN (`{"model_type":..., **params}`). +- Don't assume four metric keys — there are five (`rmse` included); normalize, never index a raw shape blindly. +- Don't sum `(end_date - start_date)` for promotion days (double-counts overlaps; ignores chain-wide `store_id IS NULL`). +- Don't mutate aliases automatically; don't add an agent tool. +- Don't hide failed candidates — include them with `reason`. +- Don't use an LLM for explanations — deterministic text only. +- Don't raise on all-candidates-fail (LOCKED #3 → persist failed + return 200); DO raise on unusable availability (LOCKED #2 → 400). +- Don't build SQL with string concatenation; don't weaken strict-mode or leakage tests. + +## Confidence Score + +**9.5/10** for one-pass backend implementation success. The prior draft self-rated 8/10 with +"service signatures must be rechecked at implementation time" as the top risk — that risk is now +**retired**: every `run_backtest` / `train_model` / `predict` signature, the corrected five-key +metric shape, the `ModelConfig` flattening, the strict-mode rule, the migration/JSONB/exception +patterns, and seven previously-ambiguous decisions are verified and locked here. An independent +quality-gate pass confirmed every cited signature/line-number/field-name against live source +("tried to break the cited signatures and could not") and its findings — the full response/ +intermediate contract (`CandidateResult`, `ModelRankEntry`, `RankingResult`, `WinnerSummary`, +`ChartData`, `ModelSelectionRunResponse`, …), the `recommendation_confidence` naming, the +`winner.params` carry-through, the `_response` mapping, and the rel-improvement denominator — are +now specified inline. + +Residual risks: + +- Per-candidate backtest runtime: five models × a multi-fold backtest is synchronous in-process. + On a slow host the `/run` request can be slow (acceptable for a single pair; mirrors + `/backtesting/run`). If it becomes a problem, a future PRP can move it behind the jobs slice. +- `promotion_days` derivation has real edge cases (chain-wide promos, overlapping ranges); the + PRP explicitly permits `null + warning` as a correct fallback. +- `lightgbm`/`xgboost` candidates can `ImportError` when extras are absent — they degrade to a + failed candidate with a reason (verified path), not a 500. diff --git a/PRPs/forecast-champion-selector-slice-a-selection-capability.md b/PRPs/forecast-champion-selector-slice-a-selection-capability.md new file mode 100644 index 00000000..f43c0371 --- /dev/null +++ b/PRPs/forecast-champion-selector-slice-a-selection-capability.md @@ -0,0 +1,716 @@ +name: "Forecast Champion Selector — Slice A: Selection & Capability Foundation" +description: | + First usable frontend/backend surface for the Forecast Champion Selector. Adds + one backend-owned model-capability catalog endpoint to the existing + `model_selection` slice, then builds the React selection shell — searchable + store/product selectors, pair validation, live data-availability assessment, + a simple/advanced backtest-settings form, and a candidate-model picker — under + a new `/visualize/champion` page. Slice A deliberately STOPS before running the + comparison: it does NOT call `POST /model-selection/run`, render ranking/chart + results, train, predict, or promote. Those are Slice B (async run + results) + and Slice C (train/predict/business summary/override/promotion). + +**Created:** 2026-06-01 · **Slice:** A of 3 (A → B → C) +**Current repo base observed:** `dev` @ `6c3f8d4` (Merge PR #354 — model_selection backend merged) +**Backend foundation (source of truth):** `PRPs/forecast-champion-selector-backend.md` (issue #353, MERGED) + +the live slice `app/features/model_selection/` (schemas/service/routes/ranking/explanations verified 2026-06-01). +**Working-tree caveat:** `docker-compose.lan.yml` is an untracked local dogfood override; do NOT commit it. +**Tracking issue:** create before implementation, suggested title `feat(api,ui): forecast champion selector slice A — selection & capability`. +**Suggested branch:** `feat/champion-selector-slice-a` (off `dev`, per `.claude/rules/branch-naming.md`). +**Commit scope:** `api` (new catalog endpoint + slice schemas/service/routes) and `ui` (frontend page/components/hooks/types). +No migration in Slice A — no schema change. Every commit references the tracking issue. + +--- + +## Goal + +**Feature Goal:** Ship the first interactive Forecast Champion Selector surface — a `/visualize/champion` +React page that lets a user choose a **Store → Product → Time Period → Forecast Horizon → Model Types → +Backtest Settings**, see whether the chosen pair has enough history to model (live availability assessment), +and pick candidate models from a **backend-owned** capability catalog — backed by exactly one new backend +endpoint (`GET /model-selection/models`). The page is genuinely usable for *configuration + availability +triage* even though the comparison **run** itself lands in Slice B. + +**Deliverable:** +- **Backend:** `GET /model-selection/models` → `ModelCatalogResponse` (capability catalog), implemented via a new + pure module `app/features/model_selection/capabilities.py`, response schemas added to the slice's + `schemas.py`, a thin `ModelSelectionService.get_model_catalog()` delegate, and the route wired in the slice's + existing `routes.py`. No migration, no new mutation surface, no agent tool. +- **Frontend:** a lazy-loaded `pages/visualize/champion.tsx` page (route `ROUTES.VISUALIZE.CHAMPION`, + nav entry under **Visualize**), a `components/champion-selector/` component family (searchable store/product + selects, availability panel, backtest-settings form, candidate-model picker), a `hooks/use-model-selection.ts` + query-hook module (catalog + availability reads), and a `types/api.ts` "Model Selection" section that declares + the FULL workflow contract (so Slices B/C inherit, not redefine, the types). + +**Success Definition:** +1. `GET /model-selection/models` returns HTTP 200 with a non-empty `models` array — each entry carrying + `model_type`, `label`, `family ∈ {baseline,tree,additive}`, `feature_aware`, `requires_extra`, + `default_params`, `supports_auto_predict`, `description` — plus a `default_candidate_model_types` list. +2. The `/visualize/champion` page renders: a searchable store select, a searchable product select (each with a + secondary line — store `code · name`, product `sku · category`), a date-range picker, a horizon input, a + candidate-model picker fed by `GET /model-selection/models`, and a simple/advanced backtest-settings form. +3. Selecting a valid `(store, product, horizon)` triggers `GET /model-selection/availability` and renders a + `ready | limited | unusable` status block with coverage/observed-days/zero-sale/promotion/avg-demand and the + recommended split config; an unusable/empty pair shows a clear not-enough-data state. +4. The "Run comparison" primary CTA is present but **disabled** with explanatory copy (Slice B turns it on). +5. All Slice A validation gates pass (backend Level-1..4 + frontend `tsc`/`lint`/`test`). + +## Why + +- Business users want to ask "which model should I use for this store/product?" through a UI, not curl. Slice A + gives them the **configuration + triage** half of that workflow immediately, and a stable shell Slice B/C bolt + onto with minimal churn. +- The capability catalog must be **backend-owned** (coordination contract): the model union, families, opt-in + extras, and feature-aware flags live in Python (`app/features/forecasting/`), and shipping them over an API + prevents the TypeScript `MODEL_FAMILY_MAP`/`MODEL_TYPE_LABELS` from drifting out of sync as new models land. +- Declaring the full TS contract now (consumed read-only in A) means Slices B and C add behavior, not type + definitions — cleaner slice boundaries, fewer merge conflicts. +- Preserves the single-host architecture: one new read-only GET, no queue, no new dependency, no cloud SDK. + +## What + +### New backend endpoint (added to the existing slice router `APIRouter(prefix="/model-selection")`) + +```http +GET /model-selection/models +``` + +Response `ModelCatalogResponse`: + +```json +{ + "models": [ + { + "model_type": "naive", + "label": "Naive", + "family": "baseline", + "feature_aware": false, + "requires_extra": false, + "default_params": {}, + "supports_auto_predict": true, + "description": "Repeats the last observed value." + }, + { + "model_type": "seasonal_naive", + "label": "Seasonal Naive", + "family": "baseline", + "feature_aware": false, + "requires_extra": false, + "default_params": { "season_length": 7 }, + "supports_auto_predict": true, + "description": "Repeats the value from one season ago." + } + // ... one entry per forecasting ModelConfig member (11 total) + ], + "default_candidate_model_types": ["naive", "seasonal_naive", "moving_average", "regression", "prophet_like"] +} +``` + +### LOCKED Slice-A decisions (remove every "choose-one" ambiguity) + +1. **Exactly one new backend endpoint:** `GET /model-selection/models`. It is **declared in `routes.py` + BEFORE the `GET /{selection_id}` route** (literal path must precede the path-param route, mirroring the + existing `/availability` route at `routes.py:41` which sits before `/{selection_id}` at `:94`). Status 200. + No request body, no query params. +2. **Catalog is backend-owned and derived, not hand-duplicated.** `family` comes from the forecasting + authority `app.features.forecasting.feature_metadata.model_family_for(model_type)` (imported LAZILY inside + the builder, per the slice's cross-slice discipline) mapped to the lowercase literal + (`ModelFamily.BASELINE → "baseline"`, etc.). `model_type` iteration order + `default_params` + `label` + + `description` come from a slice-local ordered map in `capabilities.py` whose keys are asserted (in a test) to + exactly equal the `ModelType` Literal in `app/features/model_selection/schemas.py`. +3. **`requires_extra`** = `model_type in {"lightgbm", "xgboost"}` (opt-in extras that may `ImportError`). + **`feature_aware`** = `model_type in {"regression", "prophet_like", "lightgbm", "xgboost", "random_forest"}` + (the set the forecasting `predict()` rejects — see Known Gotchas to verify against `forecasting/service.py`). + **`supports_auto_predict`** = `not feature_aware` (feature-aware winners cannot auto-predict — backend + `predict()` rejects them; this flag lets Slice C grey-out the auto-predict toggle). +4. **`default_candidate_model_types`** = `["naive", "seasonal_naive", "moving_average", "regression", "prophet_like"]` + — the exact default five from the backend PRP's `POST /run` example, so the UI pre-selects the same set the + contract documents. +5. **No `model_selection_run` write in Slice A.** The page consumes `GET /models` and `GET /availability` only. + It assembles a typed `ModelSelectionRunRequest` in component state and exposes it through a **disabled** + "Run comparison" CTA; Slice B wires the `POST /run` mutation + results. Slice A MUST NOT call `POST /run`, + `/{id}`, `/{id}/ranking`, `/{id}/train-winner`, or `/{id}/predict`. +6. **Searchable selects use existing primitives only** (no new npm dependency). Stores/products are fetched at + `pageSize: 100` (the dimensions cap) and filtered **client-side** inside a `Popover` + text `Input` + + scrollable button list. (If the catalog ever exceeds 100, swap to the server-side `search` param the + `useStores`/`useProducts` hooks already support — out of scope here.) +7. **Bias-explanation copy (locked, reused by B/C):** wherever bias is explained in help text/tooltips, use + exactly — *"Positive bias means the model under-forecasts (risk of stockouts); negative bias means it + over-forecasts (risk of overstock)."* Export it as a shared constant so B/C reuse the same wording. +8. **WAPE is the default ranking metric**; the advanced form's ranking-metric select offers `wape` (default), + `smape`, `mae`, `bias`, with help text stating the tie-break chain *WAPE → sMAPE → |bias| → MAE* and the + bias copy from #7. + +### Success Criteria + +- [ ] `GET /model-selection/models` returns 200 with `models` (11 entries) + `default_candidate_model_types`. +- [ ] `capabilities.build_model_catalog()` is pure (no DB/IO) and its `model_type` set equals the slice + `ModelType` Literal (asserted by a test). +- [ ] `/model-selection/models` is matched correctly (NOT captured by `/{selection_id}`) — route-order test green. +- [ ] `/visualize/champion` route + Visualize nav entry render the page; lazy-loaded like its siblings. +- [ ] Searchable store + product selects filter client-side and show the secondary descriptor line. +- [ ] Pair validation: the form's primary CTA stays disabled until a store, product, valid date window, and + horizon are all chosen; the date window + horizon respect backend bounds. +- [ ] Availability auto-fetches for a valid pair and renders `ready/limited/unusable` + metrics + recommended + split config; an empty/unusable pair renders a not-enough-data `EmptyState`. +- [ ] The candidate-model picker is fed by `GET /model-selection/models`; opt-in-extra models are visibly + flagged; the default five are pre-selected. +- [ ] The simple/advanced settings form mirrors `SplitConfig` bounds and keeps `split_config.horizon === + forecast_horizon` (matching the backend request validator). +- [ ] The "Run comparison" CTA is present but disabled with copy indicating it arrives next. +- [ ] No `POST /model-selection/run` (or any mutation) is called; no chart/ranking results UI; no train/predict/ + promotion UI; no agent tool; no migration; no new npm dependency. +- [ ] `app/core/tests/test_strict_mode_policy.py` stays green (no new strict request model with date fields). +- [ ] All backend Level-1..4 gates + frontend `pnpm tsc --noEmit && pnpm lint && pnpm test --run` pass. + +## All Needed Context + +### Documentation & References + +```yaml +# Slice / contract source of truth +- file: PRPs/forecast-champion-selector-backend.md + why: The merged backend foundation. LOCKED decisions #1-#7, the full /run + /{id} contract, the + availability semantics (ready/limited/unusable thresholds), and the default-five candidate list. + Slice A consumes this contract read-only; do not re-derive ranking/confidence in TS. +- file: PRPs/ai_docs/forecast-champion-selector-backend-research.md + why: External-lib + runtime facts (FastAPI APIRouter, Pydantic strict mode, sklearn TimeSeriesSplit). +- file: PRPs/templates/prp_base.md + why: Base PRP template structure. NOTE — the referenced "PRPs/prp-readme.md.md" does NOT exist + (`find PRPs -iname '*readme*'` empty on 2026-06-01); the backend PRP records the same finding. + +# Live backend slice to read (the contract the UI consumes) +- file: app/features/model_selection/schemas.py + why: ModelType Literal (:34, the 11 model_types), RankingMetric (:48), AvailabilityStatus (:51), + ConfidenceLevel (:50), PairAvailabilityResponse (:239), ModelSelectionRunRequest (:118), + ModelSelectionRunResponse (:267), ModelRankEntry (:195), WinnerSummary (:216), ChartData (:225). + ADD the new ModelCatalogResponse + CandidateModelInfo here (plain BaseModel — outputs need no strict). +- file: app/features/model_selection/routes.py + why: APIRouter(prefix="/model-selection") (:38); the literal `/availability` (:41) precedes `/{selection_id}` + (:94) — MIRROR that ordering for the new `/models` route. Error mapping: ValueError→BadRequestError, + SQLAlchemyError→DatabaseError. +- file: app/features/model_selection/service.py + why: Stateless service pattern; lazy cross-slice imports inside methods (:215-219). ADD + get_model_catalog() delegating to capabilities.build_model_catalog() (no DB needed; keep signature + db-free or accept db and ignore — prefer db-free since the catalog is static). +- file: app/features/model_selection/ranking.py + why: PURE-module precedent (no DB/IO, unit-tested directly). MIRROR this style for capabilities.py. +- file: app/features/model_selection/explanations.py + why: Second pure-module precedent (deterministic text). Same import/style conventions. +- file: app/features/model_selection/tests/test_routes.py + why: Route-test pattern (ASGITransport + AsyncClient + dependency_overrides[get_db]); ADD a /models 200 + test + a route-ordering test (GET /model-selection/models is NOT treated as selection_id="models"). +- file: app/features/model_selection/tests/test_ranking.py + why: Pure-unit test pattern to MIRROR for tests/test_capabilities.py. + +# Backend authority for model family / union (catalog source) +- file: app/features/forecasting/feature_metadata.py + why: model_family_for(model_type) -> ModelFamily (:57) and _MODEL_FAMILY_MAP (:42). The catalog `family` + field derives from here. ModelFamily enum is BASELINE/TREE/ADDITIVE (lowercase .value). +- file: app/features/forecasting/schemas.py + why: ModelConfig union (the 11 flat members + their default params). Use to VERIFY default_params per model + (see Known Gotchas verification one-liner). ModelFamily enum lives here too (imported by feature_metadata). +- file: app/features/backtesting/schemas.py + why: SplitConfig (:24) — strategy Literal["expanding","sliding"] (def "expanding"), n_splits 2-20 (def 5), + min_train_size >=7 (def 30), gap 0-30 (def 0), horizon 1-90 (def 14), field_validator horizon>gap (:65). + The TS SplitConfig type + advanced form bounds mirror this exactly. + +# Frontend examples to MIRROR (verified 2026-06-01) +- file: frontend/src/pages/visualize/backtest.tsx + why: Canonical analytical page: Card sections, store/product Select fed by useStores/useProducts + ({page:1,pageSize:100}), DateRangePicker, numeric Inputs, a `formReady` gate, EmptyState/LoadingState, + getErrorMessage. Slice A's champion page mirrors this density (minus the results/charts). +- file: frontend/src/components/forecast-intelligence/model-type-select.tsx + why: shadcn Select-based model picker convention + data-testid pattern. The Slice-A candidate picker mirrors + the labelling style but sources options from GET /model-selection/models (NOT the hardcoded util). +- file: frontend/src/components/forecast-intelligence/model-type-utils.ts + why: The EXISTING hardcoded MODEL_FAMILY_MAP / MODEL_TYPE_LABELS used by OTHER pages. DO NOT refactor or + delete it in Slice A — other pages depend on it; the champion page just doesn't use it. +- file: frontend/src/components/forecast-intelligence/batch-matrix-picker.tsx + why: Multi-select-of-models pattern (checkbox list, max-rows cap, data-testid scheme, Badge for state). + The candidate-model picker mirrors this (checkbox per model, opt-in-extra Badge), but rows = model_types + from the catalog, no feature-frame matrix (that's B/C). +- file: frontend/src/components/forecast-intelligence/batch-matrix-picker.test.tsx + why: Component test convention — render + fireEvent + expect(onChange).toHaveBeenCalledWith; afterEach(cleanup). +- file: frontend/src/hooks/use-stores.ts + why: useStores({page,pageSize,...,search,enabled}) query-hook shape + keyed query + keepPreviousData. +- file: frontend/src/hooks/use-products.ts + why: useProducts(...) — identical shape; the searchable selects fetch at pageSize:100. +- file: frontend/src/hooks/use-batches.test.ts + why: Hook test convention — vi.fn() fetch mock via vi.stubGlobal('fetch',...), QueryClient wrapper, + renderHook + waitFor, afterEach(vi.unstubAllGlobals()). MIRROR for use-model-selection.test.ts. +- file: frontend/src/hooks/index.ts + why: Star-export barrel; ADD `export * from './use-model-selection'`. +- file: frontend/src/lib/api.ts + why: `api(endpoint,{params})` typed fetch helper; getErrorMessage(); ApiError. All hooks call `api`. +- file: frontend/src/lib/constants.ts + why: ROUTES (VISUALIZE.* block) + NAV_ITEMS (Visualize group). ADD ROUTES.VISUALIZE.CHAMPION + + a { label:'Champion Selector', href: ROUTES.VISUALIZE.CHAMPION } nav entry under Visualize. +- file: frontend/src/App.tsx + why: Lazy-page + }> pattern. ADD the + champion route mirroring the BATCH/PLANNER entries. +- file: frontend/src/types/api.ts + why: Section-commented type file. ModelFamily (:177 = 'baseline'|'tree'|'additive'), ProblemDetail (:652), + Store/StoreListResponse (:10/:21), Product/ProductListResponse (:25/:37). ADD a new + "// === Model Selection (Champion Selector) ===" section near the Registry block. +- file: frontend/src/components/common/error-display.tsx + why: EmptyState({title,description,action?,icon?}) — used for the not-enough-data state. +- file: frontend/src/components/common/loading-state.tsx + why: LoadingState({message}) — used while availability/catalog load. +- file: frontend/src/components/common/date-range-picker.tsx + why: DateRangePicker({value:DateRange|undefined,onChange}) — the time-period selector. +- file: frontend/src/components/ui/{select,popover,input,card,button,badge,checkbox,table}.tsx + why: Available shadcn primitives. NOTE: there is NO command/combobox/cmdk primitive — build the searchable + select from Popover + Input + a filtered button list (LOCKED #6). +- file: frontend/src/components/layout/top-nav.tsx + why: Renders NAV_ITEMS (grouped via NavigationMenu). No edit needed beyond the constants.ts NAV_ITEMS entry. +- file: frontend/vitest.config.ts + why: jsdom env; include 'src/**/*.test.{ts,tsx}'; `@`→./src alias. No setup file. `pnpm test --run` runs once. + +# External official docs (with reasoning) +- url: https://fastapi.tiangolo.com/tutorial/bigger-applications/#include-an-apirouter-with-a-custom-prefix-tags-responses-and-dependencies + why: APIRouter route-registration + the literal-before-path-param ordering rule that LOCKED #1 depends on. +- url: https://www.ibm.com/design/language/ # (progressive disclosure principle) + why: Simple/advanced settings split — show the recommended split config by default, reveal n_splits/min_train/ + gap/strategy under an "Advanced" toggle so novice users aren't overwhelmed. NOTE: the originally-cited + IBM technical-content URL 404s; use the IBM Design language site / Nielsen Norman + (https://www.nngroup.com/articles/progressive-disclosure/) as the canonical reference instead. +- url: https://help.tableau.com/current/pro/desktop/en-us/dashboards_best_practices.htm + why: Analytical dashboard layout — lead with the question (which model?), group related controls, keep the + availability triage adjacent to the selection. Informs the Card grouping of the champion page. +- url: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html + why: The split semantics behind SplitConfig (expanding window, n_splits, gap, horizon) — so the advanced + form's help text describes folds correctly. +- url: https://tanstack.com/query/latest/docs/framework/react/guides/queries + why: useQuery enabled-gating (only fetch availability once a valid pair exists) + queryKey conventions. +``` + +### Current Codebase Tree (relevant) + +```bash +app/features/model_selection/ # MERGED backend slice (issue #353) +├── __init__.py +├── models.py # ModelSelectionRun ORM (NOT touched in Slice A) +├── schemas.py # request/response contract ← ADD catalog response models +├── ranking.py # pure ranking (precedent for capabilities.py) +├── explanations.py # pure explanations (precedent) +├── service.py # ModelSelectionService ← ADD get_model_catalog() +├── routes.py # APIRouter(/model-selection) ← ADD GET /models (before /{selection_id}) +└── tests/ # ← ADD test_capabilities.py; extend test_routes.py +app/features/forecasting/feature_metadata.py # model_family_for() — catalog family authority +frontend/src/ +├── App.tsx # ← ADD lazy champion route +├── lib/{api,constants}.ts # ← constants: ROUTES.VISUALIZE.CHAMPION + NAV_ITEMS entry +├── types/api.ts # ← ADD "Model Selection" section +├── hooks/{use-stores,use-products,index}.ts # ← index: export use-model-selection +├── pages/visualize/{backtest,batch,...}.tsx # page-density precedent +└── components/ + ├── common/{error-display,loading-state,date-range-picker}.tsx + ├── ui/{select,popover,input,card,button,badge,checkbox,table}.tsx + └── forecast-intelligence/{model-type-select,batch-matrix-picker}.tsx # picker precedents +``` + +### Desired Codebase Tree (Slice A additions) + +```bash +# Backend +app/features/model_selection/capabilities.py # NEW: pure build_model_catalog() +app/features/model_selection/schemas.py # MODIFIED: + CandidateModelInfo, ModelCatalogResponse +app/features/model_selection/service.py # MODIFIED: + get_model_catalog() +app/features/model_selection/routes.py # MODIFIED: + GET /models (before /{selection_id}) +app/features/model_selection/tests/test_capabilities.py # NEW: pure catalog unit tests +app/features/model_selection/tests/test_routes.py # MODIFIED: + /models route + ordering tests + +# Frontend +frontend/src/lib/constants.ts # MODIFIED: ROUTES.VISUALIZE.CHAMPION + NAV_ITEMS entry +frontend/src/App.tsx # MODIFIED: lazy ChampionSelectorPage route +frontend/src/types/api.ts # MODIFIED: Model Selection section (full contract) +frontend/src/hooks/use-model-selection.ts # NEW: useModelCatalog + usePairAvailability +frontend/src/hooks/use-model-selection.test.ts # NEW +frontend/src/hooks/index.ts # MODIFIED: + export +frontend/src/pages/visualize/champion.tsx # NEW: the page shell +frontend/src/components/champion-selector/searchable-entity-select.tsx # NEW (generic combobox) +frontend/src/components/champion-selector/searchable-entity-select.test.tsx # NEW +frontend/src/components/champion-selector/availability-panel.tsx # NEW +frontend/src/components/champion-selector/availability-panel.test.tsx # NEW +frontend/src/components/champion-selector/backtest-settings-form.tsx # NEW +frontend/src/components/champion-selector/backtest-settings-form.test.tsx # NEW +frontend/src/components/champion-selector/candidate-model-picker.tsx # NEW +frontend/src/components/champion-selector/candidate-model-picker.test.tsx # NEW +frontend/src/components/champion-selector/copy.ts # NEW: BIAS_EXPLANATION const (LOCKED #7) +``` + +### Known Gotchas & VERIFIED Contracts + +```python +# ── ROUTE ORDERING (LOCKED #1) ──────────────────────────────────────────────── +# Starlette matches routes in DECLARATION ORDER. The literal `GET /models` MUST be declared BEFORE +# `GET /{selection_id}` or a request to /model-selection/models is captured as selection_id="models" +# and 404s in the service. The existing `/availability` route (routes.py:41) already sits before +# `/{selection_id}` (:94) — place `/models` immediately after `/availability`. + +# ── CATALOG default_params — VERIFY before hardcoding ───────────────────────── +# default_params per model must match the forecasting ModelConfig member defaults. Verify with: +# uv run python -c " +# from pydantic import TypeAdapter +# from app.features.forecasting.schemas import ModelConfig +# a=TypeAdapter(ModelConfig) +# for mt in ['naive','seasonal_naive','moving_average','weighted_moving_average','seasonal_average', +# 'trend_regression_baseline','regression','prophet_like','random_forest','lightgbm','xgboost']: +# try: +# m=a.validate_python({'model_type':mt}); d=m.model_dump(); d.pop('model_type',None) +# print(mt, d) +# except Exception as e: +# print(mt, 'NEEDS-PARAMS:', e)" +# Use the printed defaults as `default_params` in capabilities.py. If a member REQUIRES a param (validation +# error with only model_type), supply the contract default (seasonal_naive→{'season_length':7}, +# moving_average→{'window_size':7}) — match the backend PRP /run example. Pin these in test_capabilities.py. + +# ── feature_aware / requires_extra — VERIFY against forecasting predict() reject ── +# LOCKED #3 sets feature_aware = {regression, prophet_like, lightgbm, xgboost, random_forest}. Confirm this +# equals the set ForecastingService.predict() rejects (the backend PRP cites forecasting/service.py:491 +# "rejects feature-aware models"). If the live reject-set differs, the live code wins — update the +# capabilities set and the test to match, and note the discrepancy in the PR description. + +# ── family literal mapping ──────────────────────────────────────────────────── +# model_family_for(mt) returns a ModelFamily enum; serialize via `.value` → "baseline"|"tree"|"additive" +# which already matches the frontend ModelFamily TS union (types/api.ts:177). Import model_family_for +# LAZILY inside build_model_catalog() (mirror service.py lazy cross-slice imports). + +# ── NO new strict request model ─────────────────────────────────────────────── +# GET /models has no body and no query params → no ConfigDict(strict=True) model, no date fields → the +# strict-mode policy linter is unaffected. Do NOT add an AvailabilityQuery-style model for /models. + +# ── catalog is static/pure ───────────────────────────────────────────────────── +# build_model_catalog() takes no args and does no I/O — it is unit-testable like ranking.py. get_model_catalog() +# on the service is a thin pass-through (no db round-trip needed); keep it sync-pure or trivially async. +``` + +```typescript +// ── FRONTEND ──────────────────────────────────────────────────────────────── +// NO combobox/cmdk primitive exists (only select/popover/input/dialog under components/ui). Build the +// searchable select from + (filter box) + a scrollable list of + )} + + + + + + +
+ + + + + + patch({ n_splits: Number(event.target.value) || 0 }) + } + /> + + + + patch({ min_train_size: Number(event.target.value) || 0 }) + } + /> + + + + patch({ gap: Number(event.target.value) || 0 }) + } + /> + +
+
+
+ + {errors.length > 0 && ( +
    + {errors.map((error) => ( +
  • + {error} +
  • + ))} +
+ )} + + ) +} diff --git a/frontend/src/components/champion-selector/candidate-model-picker.test.tsx b/frontend/src/components/champion-selector/candidate-model-picker.test.tsx new file mode 100644 index 00000000..8c7d171d --- /dev/null +++ b/frontend/src/components/champion-selector/candidate-model-picker.test.tsx @@ -0,0 +1,99 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { CandidateModelPicker, MAX_CANDIDATES } from './candidate-model-picker' +import type { CandidateModelInfo, ModelCatalogResponse } from '@/types/api' + +afterEach(cleanup) + +function model( + model_type: string, + overrides: Partial = {}, +): CandidateModelInfo { + return { + model_type, + label: model_type, + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: `desc ${model_type}`, + ...overrides, + } +} + +const CATALOG: ModelCatalogResponse = { + models: [ + model('naive'), + model('regression', { family: 'tree', feature_aware: true }), + model('lightgbm', { family: 'tree', feature_aware: true, requires_extra: true }), + model('xgboost', { family: 'tree', feature_aware: true, requires_extra: true }), + ], + default_candidate_model_types: ['naive', 'regression'], +} + +describe('CandidateModelPicker', () => { + it('toggling a model calls onChange with the new selection', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('candidate-checkbox-regression')) + expect(onChange).toHaveBeenCalledWith(['naive', 'regression']) + }) + + it('deselects an already-selected model', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('candidate-checkbox-naive')) + expect(onChange).toHaveBeenCalledWith(['regression']) + }) + + it('flags opt-in-extra models with an "extra" badge', () => { + render( + {}} + isLoading={false} + />, + ) + expect(screen.getByTestId('candidate-extra-badge-lightgbm')).toBeTruthy() + expect(screen.getByTestId('candidate-extra-badge-xgboost')).toBeTruthy() + // A baseline model carries no extra badge. + expect(screen.queryByTestId('candidate-extra-badge-naive')).toBeNull() + }) + + it('caps the selection at MAX_CANDIDATES and disables unselected models', () => { + const many = Array.from({ length: MAX_CANDIDATES }, (_, i) => `m${i}`) + const onChange = vi.fn() + const bigCatalog: ModelCatalogResponse = { + models: [...many.map((m) => model(m)), model('extra_model')], + default_candidate_model_types: [], + } + render( + , + ) + expect(screen.getByTestId('candidate-cap-badge')).toBeTruthy() + // Clicking an unselected model at the cap must NOT add it. + fireEvent.click(screen.getByTestId('candidate-checkbox-extra_model')) + expect(onChange).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/components/champion-selector/candidate-model-picker.tsx b/frontend/src/components/champion-selector/candidate-model-picker.tsx new file mode 100644 index 00000000..6a3b4366 --- /dev/null +++ b/frontend/src/components/champion-selector/candidate-model-picker.tsx @@ -0,0 +1,129 @@ +import { LoadingState } from '@/components/common/loading-state' +import { Badge } from '@/components/ui/badge' +import { Checkbox } from '@/components/ui/checkbox' +import { cn } from '@/lib/utils' +import type { CandidateModelInfo, ModelCatalogResponse, ModelFamily } from '@/types/api' + +/** Backend caps `candidate_models` at 10 (ModelSelectionRunRequest.max_length). */ +export const MAX_CANDIDATES = 10 + +interface CandidateModelPickerProps { + catalog?: ModelCatalogResponse + selected: string[] + onChange: (types: string[]) => void + isLoading: boolean +} + +const FAMILY_ORDER: ModelFamily[] = ['baseline', 'additive', 'tree'] +const FAMILY_LABEL: Record = { + baseline: 'Baseline', + additive: 'Additive', + tree: 'Tree-based', +} + +/** + * Candidate-model multi-select fed by the BACKEND catalog (never the hardcoded + * `model-type-utils`). Mirrors the batch-matrix-picker conventions: a checkbox + * per model grouped by family, opt-in-extra + feature-aware badges, and a + * selection cap of 10. + */ +export function CandidateModelPicker({ + catalog, + selected, + onChange, + isLoading, +}: CandidateModelPickerProps) { + if (isLoading) { + return + } + if (!catalog || catalog.models.length === 0) { + return ( +

No models available.

+ ) + } + + const selectedSet = new Set(selected) + const atCap = selected.length >= MAX_CANDIDATES + + function toggle(modelType: string) { + if (selectedSet.has(modelType)) { + onChange(selected.filter((type) => type !== modelType)) + } else if (!atCap) { + onChange([...selected, modelType]) + } + } + + const byFamily = new Map() + for (const model of catalog.models) { + const list = byFamily.get(model.family) ?? [] + list.push(model) + byFamily.set(model.family, list) + } + + return ( +
+
+ + {selected.length} of {MAX_CANDIDATES} selected + + {atCap && ( + + Max {MAX_CANDIDATES} reached + + )} +
+ + {FAMILY_ORDER.filter((family) => byFamily.has(family)).map((family) => ( +
+

+ {FAMILY_LABEL[family]} +

+
+ {(byFamily.get(family) ?? []).map((model) => { + const isSelected = selectedSet.has(model.model_type) + const disabled = !isSelected && atCap + return ( + + ) + })} +
+
+ ))} +
+ ) +} diff --git a/frontend/src/components/champion-selector/copy.ts b/frontend/src/components/champion-selector/copy.ts new file mode 100644 index 00000000..bafbfd53 --- /dev/null +++ b/frontend/src/components/champion-selector/copy.ts @@ -0,0 +1,20 @@ +/** + * Shared, LOCKED copy for the Champion Selector workflow (Slices A/B/C). + * + * Kept in a `.ts` (not `.tsx`) module so the `react-refresh/only-export-components` + * lint rule never trips on these non-component exports. Slices B and C import + * the SAME constants so the bias wording / tie-break explanation never drift. + */ + +/** LOCKED #7 — the canonical bias explanation reused everywhere bias is shown. */ +export const BIAS_EXPLANATION = + 'Positive bias means the model under-forecasts (risk of stockouts); ' + + 'negative bias means it over-forecasts (risk of overstock).' + +/** LOCKED #8 — the deterministic ranking tie-break chain. */ +export const RANKING_TIE_BREAK = + 'Ranked by WAPE, then sMAPE, then |bias|, then MAE.' + +/** Copy for the disabled Slice-A "Run comparison" CTA. */ +export const RUN_COMPARISON_PENDING = + 'Model comparison runs in the next update.' diff --git a/frontend/src/components/champion-selector/decision/business-interpretation-panel.test.tsx b/frontend/src/components/champion-selector/decision/business-interpretation-panel.test.tsx new file mode 100644 index 00000000..27fd9c50 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/business-interpretation-panel.test.tsx @@ -0,0 +1,47 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { BusinessInterpretationPanel } from './business-interpretation-panel' +import type { ForecastDecision } from '@/types/api' + +afterEach(cleanup) + +const decision: ForecastDecision = { + method: 'heuristic', + lead_time_days: 7, + service_level: 0.95, + z_value: 1.6449, + sigma_daily_demand: 1.4, + expected_demand_over_lead_time: 70, + safety_stock: 6.1, + reorder_point: 76.1, + bias_risk_text: 'Positive bias means the model under-forecasts (risk of stockouts).', + caveats: ['Safety stock is a deterministic heuristic.'], +} + +const businessSummary = { + headline: 'Recommended model: naive (high confidence).', + winner: { model_type: 'naive', summary: 'WAPE 10.0%' }, + comparison: { lead_text: '15% lower WAPE than the runner-up' }, + data_notes: ['Observed 120 of 120 calendar days.'], +} + +describe('BusinessInterpretationPanel', () => { + it('renders the headline, expected demand, and bias risk', () => { + render( + , + ) + const text = screen.getByTestId('business-interpretation-panel').textContent ?? '' + expect(text).toContain('Recommended model: naive') + expect(screen.getByTestId('business-expected-demand').textContent).toContain('70.0') + expect(screen.getByTestId('business-bias-risk').textContent).toContain( + 'under-forecasts', + ) + }) + + it('falls back to the bias explanation when no decision is present', () => { + render() + expect( + screen.getByText(/Positive bias means the model under-forecasts/), + ).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/decision/business-interpretation-panel.tsx b/frontend/src/components/champion-selector/decision/business-interpretation-panel.tsx new file mode 100644 index 00000000..5cb9d1d0 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/business-interpretation-panel.tsx @@ -0,0 +1,84 @@ +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { BIAS_EXPLANATION } from '@/components/champion-selector/copy' +import type { ForecastDecision } from '@/types/api' + +interface BusinessInterpretationPanelProps { + /** The deterministic backend `business_summary` (read-only). */ + businessSummary: Record | null + /** The decision heuristic (carries bias-risk text + expected demand). */ + decision: ForecastDecision | null +} + +function str(value: unknown): string | null { + return typeof value === 'string' ? value : null +} + +/** + * Slice C — business interpretation. Renders the SAME `business_summary` the + * backend computed (read-only — Slice B's winner card owns the headline) and + * ADDS the decision-layer fields (expected demand + bias risk + caveats). + */ +export function BusinessInterpretationPanel({ + businessSummary, + decision, +}: BusinessInterpretationPanelProps) { + const headline = str(businessSummary?.['headline']) + const winner = businessSummary?.['winner'] as Record | null | undefined + const winnerSummary = str(winner?.['summary']) + const comparison = businessSummary?.['comparison'] as Record | null | undefined + const leadText = str(comparison?.['lead_text']) + const dataNotes = Array.isArray(businessSummary?.['data_notes']) + ? (businessSummary?.['data_notes'] as unknown[]).filter((x): x is string => typeof x === 'string') + : [] + + return ( + + + Business interpretation + {headline && {headline}} + + + {winnerSummary && ( +

+ Why it won: + {winnerSummary} + {leadText ? ` — ${leadText}.` : '.'} +

+ )} + + {decision && ( +
+

+ Expected demand over lead time: + {decision.expected_demand_over_lead_time.toFixed(1)} units ( + {decision.lead_time_days} days). +

+

+ {decision.bias_risk_text} +

+
+ )} + + {!decision && ( +

{BIAS_EXPLANATION}

+ )} + + {dataNotes.length > 0 && ( +
    + {dataNotes.map((note, i) => ( +
  • {note}
  • + ))} +
+ )} + + {decision?.caveats?.length ? ( +
    + {decision.caveats.map((caveat, i) => ( +
  • {caveat}
  • + ))} +
+ ) : null} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/constants.ts b/frontend/src/components/champion-selector/decision/constants.ts new file mode 100644 index 00000000..1005d481 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/constants.ts @@ -0,0 +1,19 @@ +/** + * Non-component constants for the Slice C decision panels. Kept in a `.ts` + * module so `react-refresh/only-export-components` never trips on them. + */ + +/** Service levels the safety-stock z-table supports exactly (others snap nearest). */ +export const SERVICE_LEVEL_OPTIONS = [0.9, 0.95, 0.975, 0.99] as const + +/** Capability-limited blocked state for a feature-aware winner (LOCKED #5). */ +export const FEATURE_AWARE_BLOCKED_COPY = + 'Forecast not available for feature-aware models — use the What-If Planner ' + + '(Scenarios) to forecast through explicit assumptions.' + +/** The promotion-is-audited note shown in the promote dialog. */ +export const PROMOTE_AUDIT_NOTE = + 'Promotion is explicit and recorded — the approver and decision are saved as ' + + 'an audit record on this run. It is never automatic.' + +export const SAFETY_STOCK_HEADER = 'Safety stock (heuristic)' diff --git a/frontend/src/components/champion-selector/decision/daily-forecast-table.test.tsx b/frontend/src/components/champion-selector/decision/daily-forecast-table.test.tsx new file mode 100644 index 00000000..c4d2a73b --- /dev/null +++ b/frontend/src/components/champion-selector/decision/daily-forecast-table.test.tsx @@ -0,0 +1,26 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { DailyForecastTable } from './daily-forecast-table' +import type { ModelSelectionForecastSummary } from '@/types/api' + +afterEach(cleanup) + +const forecast: ModelSelectionForecastSummary = { + points: [ + { date: '2026-06-01', forecast: 10.5, lower_bound: 8, upper_bound: 12 }, + { date: '2026-06-02', forecast: 14.2, lower_bound: null, upper_bound: null }, + ], + total_demand: 24.7, + average_demand: 12.35, + horizon: 2, +} + +describe('DailyForecastTable', () => { + it('renders one row per forecast point with the forecast value', () => { + render() + const text = screen.getByTestId('daily-forecast-table').textContent ?? '' + expect(text).toContain('2026-06-01') + expect(text).toContain('10.50') + expect(text).toContain('14.20') + }) +}) diff --git a/frontend/src/components/champion-selector/decision/daily-forecast-table.tsx b/frontend/src/components/champion-selector/decision/daily-forecast-table.tsx new file mode 100644 index 00000000..96404c8f --- /dev/null +++ b/frontend/src/components/champion-selector/decision/daily-forecast-table.tsx @@ -0,0 +1,57 @@ +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import type { ModelSelectionForecastSummary } from '@/types/api' + +interface DailyForecastTableProps { + forecast: ModelSelectionForecastSummary +} + +function cell(value: unknown): string { + return typeof value === 'number' && Number.isFinite(value) ? value.toFixed(2) : '—' +} + +/** Slice C — the per-day forecast table (date, forecast, lower, upper). */ +export function DailyForecastTable({ forecast }: DailyForecastTableProps) { + return ( + + + Daily forecast + + + + + + Date + Forecast + Lower + Upper + + + + {forecast.points.map((point, index) => ( + + {String(point['date'] ?? '—')} + + {cell(point['forecast'])} + + + {cell(point['lower_bound'])} + + + {cell(point['upper_bound'])} + + + ))} + +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/decision-section.tsx b/frontend/src/components/champion-selector/decision/decision-section.tsx new file mode 100644 index 00000000..c1dbd95c --- /dev/null +++ b/frontend/src/components/champion-selector/decision/decision-section.tsx @@ -0,0 +1,180 @@ +import { useMemo, useState } from 'react' +import { Button } from '@/components/ui/button' +import { Card, CardContent } from '@/components/ui/card' +import { getErrorMessage } from '@/lib/api' +import { + usePredictWinner, + usePromoteChampion, + useTrainSelected, + useTrainWinner, +} from '@/hooks/use-model-selection' +import type { + ModelCatalogResponse, + ModelSelectionRunResponse, + PredictWinnerResponse, + TrainWinnerResponse, +} from '@/types/api' +import { WinnerDecisionPanel } from './winner-decision-panel' +import { TrainForecastActions } from './train-forecast-actions' +import { ForecastSummaryCard } from './forecast-summary-card' +import { ForecastChart } from './forecast-chart' +import { DailyForecastTable } from './daily-forecast-table' +import { BusinessInterpretationPanel } from './business-interpretation-panel' +import { SafetyStockPanel } from './safety-stock-panel' +import { PromoteChampionDialog } from './promote-champion-dialog' + +interface DecisionSectionProps { + selectionId: string + run: ModelSelectionRunResponse + catalog: ModelCatalogResponse | undefined +} + +/** + * Slice C — the decision section rendered below a terminal winning run. + * + * Owns the train / predict / promote mutations (so the page keeps its hooks + * unconditional). Mount it with `key={selectionId}` so a fresh run resets the + * train/forecast/promote state. + */ +export function DecisionSection({ selectionId, run, catalog }: DecisionSectionProps) { + const winnerModelType = run.winner?.model_type ?? null + + const [trainResult, setTrainResult] = useState(null) + const [predictResult, setPredictResult] = useState(null) + const [leadTimeDays, setLeadTimeDays] = useState(7) + const [serviceLevel, setServiceLevel] = useState(0.95) + const [promoteOpen, setPromoteOpen] = useState(false) + const [promoteError, setPromoteError] = useState(null) + const [promotedAlias, setPromotedAlias] = useState(null) + + const trainWinner = useTrainWinner(selectionId) + const trainSelected = useTrainSelected(selectionId) + const predict = usePredictWinner(selectionId) + const promote = usePromoteChampion(selectionId) + + // Every candidate the run offered (winner + runners-up + failed), de-duped. + const candidateModelTypes = useMemo(() => { + const seen = new Set() + for (const entry of run.ranking) seen.add(entry.model_type) + if (winnerModelType) seen.add(winnerModelType) + return [...seen] + }, [run.ranking, winnerModelType]) + + // Capability of the model that WILL be (or was) trained — drives the blocked + // forecast state for a feature-aware winner (LOCKED #5). + const activeModelType = trainResult?.model_type ?? winnerModelType + const supportsAutoPredict = useMemo(() => { + const info = catalog?.models.find((m) => m.model_type === activeModelType) + return info?.supports_auto_predict ?? true + }, [catalog, activeModelType]) + + const trained = trainResult !== null || run.final_model !== null + + if (winnerModelType === null) return null + + function handleTrain(modelType: string, overrideReason: string | null) { + setPredictResult(null) + setPromotedAlias(null) + const onSuccess = (data: TrainWinnerResponse) => setTrainResult(data) + if (modelType === winnerModelType) { + trainWinner.mutate(undefined, { onSuccess }) + } else { + trainSelected.mutate({ model_type: modelType, override_reason: overrideReason }, { onSuccess }) + } + } + + function handleForecast() { + predict.mutate( + { lead_time_days: leadTimeDays, service_level: serviceLevel }, + { onSuccess: (data) => setPredictResult(data) }, + ) + } + + function handlePromote(body: Parameters[0]) { + setPromoteError(null) + promote.mutate(body, { + onSuccess: (data) => setPromotedAlias(data.alias_name), + onError: (err) => setPromoteError(getErrorMessage(err)), + }) + } + + const forecast = predictResult?.forecast ?? null + const decision = predictResult?.decision ?? null + const isOverride = trainResult?.is_override ?? false + + return ( +
+ + + + + + {predict.isError && ( +

+ {getErrorMessage(predict.error)} +

+ )} +
+
+ + {forecast && ( + <> + + + + + + + )} + + {trained && ( + + +

+ Promote the trained champion to a registry alias (approval-gated). +

+ +
+
+ )} + + +
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx b/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx new file mode 100644 index 00000000..c28c6726 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-chart.test.tsx @@ -0,0 +1,33 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ForecastChart } from './forecast-chart' +import type { ModelSelectionForecastSummary } from '@/types/api' + +// Recharts' ResponsiveContainer needs ResizeObserver in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(cleanup) + +const forecast: ModelSelectionForecastSummary = { + points: [ + { date: '2026-06-01', forecast: 10, lower_bound: 8, upper_bound: 12 }, + { date: '2026-06-02', forecast: 14, lower_bound: 11, upper_bound: 17 }, + ], + total_demand: 24, + average_demand: 12, + horizon: 2, +} + +describe('ForecastChart', () => { + it('renders the chart container from forecast points', () => { + render() + expect(screen.getByTestId('forecast-chart')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/decision/forecast-chart.tsx b/frontend/src/components/champion-selector/decision/forecast-chart.tsx new file mode 100644 index 00000000..fccd54b8 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-chart.tsx @@ -0,0 +1,43 @@ +import { TimeSeriesChart } from '@/components/charts/time-series-chart' +import type { ModelSelectionForecastSummary } from '@/types/api' + +interface ForecastChartProps { + forecast: ModelSelectionForecastSummary +} + +interface ChartRow { + date: string + forecast: number + lower?: number + upper?: number +} + +/** Slice C — the horizon forecast curve (optional interval band). */ +export function ForecastChart({ forecast }: ForecastChartProps) { + const rows: ChartRow[] = forecast.points.map((point) => { + const lower = point['lower_bound'] + const upper = point['upper_bound'] + return { + date: String(point['date'] ?? ''), + forecast: Number(point['forecast'] ?? 0), + lower: typeof lower === 'number' ? lower : undefined, + upper: typeof upper === 'number' ? upper : undefined, + } + }) + const hasInterval = rows.some((row) => row.lower !== undefined && row.upper !== undefined) + + return ( +
+ +
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx b/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx new file mode 100644 index 00000000..d9e57324 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-summary-card.test.tsx @@ -0,0 +1,37 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ForecastSummaryCard } from './forecast-summary-card' +import type { ModelSelectionForecastSummary } from '@/types/api' + +afterEach(cleanup) + +const forecast: ModelSelectionForecastSummary = { + points: [], + total_demand: 140, + average_demand: 10, + horizon: 14, + peak_date: '2026-06-02', + peak_demand: 25, + low_date: '2026-06-03', + low_demand: 5, +} + +describe('ForecastSummaryCard', () => { + it('renders total, peak, and low tiles', () => { + render() + const text = screen.getByTestId('forecast-summary-card').textContent ?? '' + expect(text).toContain('140.0') + expect(text).toContain('25.0') + expect(text).toContain('2026-06-02') + expect(text).toContain('14d') + }) + + it('renders an em-dash for null peak/low', () => { + render( + , + ) + expect(screen.getByTestId('forecast-summary-card').textContent).toContain('—') + }) +}) diff --git a/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx b/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx new file mode 100644 index 00000000..dddab510 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/forecast-summary-card.tsx @@ -0,0 +1,48 @@ +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import type { ModelSelectionForecastSummary } from '@/types/api' + +interface ForecastSummaryCardProps { + forecast: ModelSelectionForecastSummary +} + +function Tile({ label, value, sub }: { label: string; value: string; sub?: string }) { + return ( +
+

{label}

+

{value}

+ {sub &&

{sub}

} +
+ ) +} + +function num(value: number | null | undefined): string { + return typeof value === 'number' && Number.isFinite(value) ? value.toFixed(1) : '—' +} + +/** Slice C — total / average / peak / low / horizon KPI tiles (null-safe). */ +export function ForecastSummaryCard({ forecast }: ForecastSummaryCardProps) { + return ( + + + Forecast summary + + +
+ + + + + +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx b/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx new file mode 100644 index 00000000..c7dbe718 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/promote-champion-dialog.test.tsx @@ -0,0 +1,72 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { PromoteChampionDialog } from './promote-champion-dialog' + +afterEach(cleanup) + +function renderDialog(overrides: Partial[0]> = {}) { + const props = { + open: true, + onOpenChange: vi.fn(), + isOverride: false, + isPromoting: false, + promoteError: null, + promotedAlias: null, + onConfirm: vi.fn(), + ...overrides, + } + render() + return props +} + +describe('PromoteChampionDialog', () => { + it('keeps confirm disabled until alias + approver are valid', () => { + renderDialog() + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(true) + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(false) + }) + + it('flags an invalid alias name', () => { + renderDialog() + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'Bad Alias' }, + }) + expect(screen.getByTestId('promote-alias-error')).toBeTruthy() + }) + + it('requires the ack checkbox for a non-recommended (override) model', () => { + renderDialog({ isOverride: true }) + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + // still disabled until the ack is checked + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(true) + fireEvent.click(screen.getByTestId('promote-ack-checkbox')) + expect(screen.getByTestId('promote-confirm-action').hasAttribute('disabled')).toBe(false) + }) + + it('calls onConfirm with the promote body', () => { + const props = renderDialog() + fireEvent.change(screen.getByTestId('promote-alias-input'), { + target: { value: 'champion-x' }, + }) + fireEvent.change(screen.getByTestId('promote-approver-input'), { + target: { value: 'gabor' }, + }) + fireEvent.click(screen.getByTestId('promote-confirm-action')) + expect(props.onConfirm).toHaveBeenCalledWith({ + alias_name: 'champion-x', + approved_by: 'gabor', + acknowledge_non_recommended: false, + }) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx b/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx new file mode 100644 index 00000000..79e7e486 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/promote-champion-dialog.tsx @@ -0,0 +1,163 @@ +import { useState } from 'react' +import { CheckCircle2, ShieldAlert } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Checkbox } from '@/components/ui/checkbox' +import { Input } from '@/components/ui/input' +import type { PromoteRequest } from '@/types/api' +import { PROMOTE_AUDIT_NOTE } from './constants' + +const ALIAS_RE = /^[a-z0-9][a-z0-9\-_]*$/ + +interface PromoteChampionDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + /** True when a non-recommended model was trained (requires explicit ack). */ + isOverride: boolean + defaultAliasName?: string + isPromoting: boolean + /** Error message from the last promote attempt (null on success/idle). */ + promoteError: string | null + /** The alias name on a successful promotion (null until promoted). */ + promotedAlias: string | null + onConfirm: (body: PromoteRequest) => void +} + +/** + * Slice C — the approval-gated promote dialog. Requires an approver and a valid + * alias name; a non-recommended model additionally requires the ack checkbox. + * Mirrors `forecast-intelligence/promote-confirmation-dialog.tsx`, but calls the + * model_selection `promote` flow (compare and promote stay separate). + */ +export function PromoteChampionDialog({ + open, + onOpenChange, + isOverride, + defaultAliasName = '', + isPromoting, + promoteError, + promotedAlias, + onConfirm, +}: PromoteChampionDialogProps) { + const [aliasName, setAliasName] = useState(defaultAliasName) + const [approvedBy, setApprovedBy] = useState('') + const [ack, setAck] = useState(false) + + const aliasValid = ALIAS_RE.test(aliasName.trim()) + const canConfirm = + aliasValid && + approvedBy.trim().length > 0 && + (!isOverride || ack) && + !isPromoting + + function handleConfirm() { + if (!canConfirm) return + onConfirm({ + alias_name: aliasName.trim(), + approved_by: approvedBy.trim(), + acknowledge_non_recommended: isOverride ? ack : false, + }) + } + + return ( + { + if (!next) setAck(false) + onOpenChange(next) + }} + > + + + Promote champion to a registry alias + {PROMOTE_AUDIT_NOTE} + + +
+
+ + setAliasName(event.target.value)} + placeholder="e.g. champion-store5-prod8" + autoComplete="off" + data-testid="promote-alias-input" + /> + {aliasName.length > 0 && !aliasValid && ( +

+ Lowercase letters, digits, hyphens and underscores only (must start + with a letter or digit). +

+ )} +
+ +
+ + setApprovedBy(event.target.value)} + placeholder="your name" + autoComplete="off" + data-testid="promote-approver-input" + /> +
+ + {isOverride && ( + + )} + + {promoteError && ( +

+ {promoteError} +

+ )} + + {promotedAlias && ( +
+ + Promoted to alias {promotedAlias}. +
+ )} +
+ + + Close + + {isPromoting ? 'Promoting…' : 'Promote'} + + +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx b/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx new file mode 100644 index 00000000..a5a27e9f --- /dev/null +++ b/frontend/src/components/champion-selector/decision/safety-stock-panel.test.tsx @@ -0,0 +1,54 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { SafetyStockPanel } from './safety-stock-panel' +import type { ForecastDecision } from '@/types/api' + +afterEach(cleanup) + +const decision: ForecastDecision = { + method: 'heuristic', + lead_time_days: 7, + service_level: 0.95, + z_value: 1.6449, + sigma_daily_demand: 1.41, + expected_demand_over_lead_time: 70, + safety_stock: 6.13, + reorder_point: 76.13, + bias_risk_text: 'bias text', + caveats: ['heuristic'], +} + +function renderPanel(overrides: Partial[0]> = {}) { + const props = { + decision, + leadTimeDays: 7, + serviceLevel: 0.95, + isRecomputing: false, + onLeadTimeChange: vi.fn(), + onServiceLevelChange: vi.fn(), + onRecompute: vi.fn(), + ...overrides, + } + render() + return props +} + +describe('SafetyStockPanel', () => { + it('renders the labeled heuristic header and stats', () => { + renderPanel() + const text = screen.getByTestId('safety-stock-panel').textContent ?? '' + expect(text).toContain('Safety stock (heuristic)') + expect(text).toContain('1.6449') + expect(text).toContain('6.1') + }) + + it('fires onLeadTimeChange and onRecompute', () => { + const props = renderPanel() + fireEvent.change(screen.getByTestId('safety-stock-lead-time'), { + target: { value: '14' }, + }) + expect(props.onLeadTimeChange).toHaveBeenCalledWith(14) + fireEvent.click(screen.getByTestId('safety-stock-recompute')) + expect(props.onRecompute).toHaveBeenCalledOnce() + }) +}) diff --git a/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx b/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx new file mode 100644 index 00000000..11f1b43e --- /dev/null +++ b/frontend/src/components/champion-selector/decision/safety-stock-panel.tsx @@ -0,0 +1,115 @@ +import { Loader2, RefreshCw } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import type { ForecastDecision } from '@/types/api' +import { SAFETY_STOCK_HEADER, SERVICE_LEVEL_OPTIONS } from './constants' + +interface SafetyStockPanelProps { + decision: ForecastDecision | null + leadTimeDays: number + serviceLevel: number + isRecomputing: boolean + onLeadTimeChange: (value: number) => void + onServiceLevelChange: (value: number) => void + onRecompute: () => void +} + +function Stat({ label, value }: { label: string; value: string }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +/** + * Slice C — the CLEARLY-LABELED safety-stock heuristic. Lead time + service + * level inputs recompute the forecast decision. Never influences ranking. + */ +export function SafetyStockPanel({ + decision, + leadTimeDays, + serviceLevel, + isRecomputing, + onLeadTimeChange, + onServiceLevelChange, + onRecompute, +}: SafetyStockPanelProps) { + return ( + + + {SAFETY_STOCK_HEADER} + + A deterministic reorder heuristic (demand variability only, constant lead + time). Adjust the inputs and recompute. + + + +
+
+ Lead time (days) + onLeadTimeChange(Number(event.target.value) || 0)} + className="w-32" + data-testid="safety-stock-lead-time" + /> +
+
+ Service level + +
+ +
+ + {decision && ( +
+ + + + +
+ )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx b/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx new file mode 100644 index 00000000..3ff27f99 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/train-forecast-actions.test.tsx @@ -0,0 +1,48 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { TrainForecastActions } from './train-forecast-actions' + +afterEach(cleanup) + +describe('TrainForecastActions', () => { + it('shows the blocked state for a feature-aware winner', () => { + render( + {}} + />, + ) + expect(screen.getByTestId('forecast-blocked-state').textContent).toContain( + 'What-If Planner', + ) + expect(screen.queryByTestId('forecast-button')).toBeNull() + }) + + it('fires onForecast when the trained forecast button is clicked', () => { + const onForecast = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('forecast-button')) + expect(onForecast).toHaveBeenCalledOnce() + }) + + it('disables the forecast button until a model is trained', () => { + render( + {}} + />, + ) + expect(screen.getByTestId('forecast-button').hasAttribute('disabled')).toBe(true) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx b/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx new file mode 100644 index 00000000..0ba12605 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/train-forecast-actions.tsx @@ -0,0 +1,54 @@ +import { Loader2, LineChart, Ban } from 'lucide-react' +import { Button } from '@/components/ui/button' +import { FEATURE_AWARE_BLOCKED_COPY } from './constants' + +interface TrainForecastActionsProps { + /** From the Slice A catalog (`supports_auto_predict = not feature_aware`). */ + supportsAutoPredict: boolean + /** True once a model bundle has been trained for the selection. */ + trained: boolean + isPredicting: boolean + onForecast: () => void +} + +/** + * Slice C — the Forecast action + the capability-limited blocked state. + * + * A feature-aware winner cannot auto-predict (LOCKED #5): instead of faking a + * forecast we surface the limitation and route the user to the What-If Planner. + */ +export function TrainForecastActions({ + supportsAutoPredict, + trained, + isPredicting, + onForecast, +}: TrainForecastActionsProps) { + if (!supportsAutoPredict) { + return ( +
+ + {FEATURE_AWARE_BLOCKED_COPY} +
+ ) + } + + return ( + + ) +} diff --git a/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx b/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx new file mode 100644 index 00000000..5aca0b38 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/winner-decision-panel.test.tsx @@ -0,0 +1,48 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { WinnerDecisionPanel } from './winner-decision-panel' +import type { TrainWinnerResponse } from '@/types/api' + +afterEach(cleanup) + +describe('WinnerDecisionPanel', () => { + it('trains the recommended winner without a confirm dialog', () => { + const onTrain = vi.fn() + render( + , + ) + expect(screen.getByTestId('decision-train-button').textContent).toContain( + 'Train recommended', + ) + fireEvent.click(screen.getByTestId('decision-train-button')) + expect(onTrain).toHaveBeenCalledWith('naive', null) + }) + + it('renders the override warning from a train result', () => { + const trainResult: TrainWinnerResponse = { + selection_id: 's', + model_type: 'seasonal_naive', + model_path: 'p', + is_override: true, + override_warning: 'You trained seasonal_naive instead of naive.', + } + render( + {}} + />, + ) + expect(screen.getByTestId('decision-override-warning').textContent).toContain( + 'seasonal_naive', + ) + }) +}) diff --git a/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx b/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx new file mode 100644 index 00000000..5b0d58d5 --- /dev/null +++ b/frontend/src/components/champion-selector/decision/winner-decision-panel.tsx @@ -0,0 +1,158 @@ +import { useState } from 'react' +import { Loader2, Trophy, TriangleAlert } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import type { TrainWinnerResponse } from '@/types/api' + +interface WinnerDecisionPanelProps { + winnerModelType: string + /** Every candidate offered in the run (winner + runners-up + failed). */ + candidateModelTypes: string[] + isTraining: boolean + trainResult: TrainWinnerResponse | null + /** Train the chosen model — the page routes winner vs. override. */ + onTrain: (modelType: string, overrideReason: string | null) => void +} + +/** + * Slice C — accept the recommended winner OR override to another candidate. + * + * Picking a non-winner opens a confirm dialog (explicit warning + an optional + * reason) before training. Presentational — the page owns the train mutations. + */ +export function WinnerDecisionPanel({ + winnerModelType, + candidateModelTypes, + isTraining, + trainResult, + onTrain, +}: WinnerDecisionPanelProps) { + const [selected, setSelected] = useState(winnerModelType) + const [overrideReason, setOverrideReason] = useState('') + const [confirmOpen, setConfirmOpen] = useState(false) + + const isOverride = selected !== winnerModelType + + function handleTrainClick() { + if (isOverride) { + setConfirmOpen(true) + return + } + onTrain(selected, null) + } + + function handleConfirmOverride() { + onTrain(selected, overrideReason.trim() || null) + setConfirmOpen(false) + } + + return ( + + + 5 · Decide & train + + Train the recommended champion, or override to another candidate. The + recommended model is {winnerModelType}. + + + +
+
+ Model to train + +
+ +
+ + {trainResult?.override_warning && ( +
+ + {trainResult.override_warning} +
+ )} + + {trainResult && !trainResult.override_warning && ( +

+ Trained {trainResult.model_type}. +

+ )} +
+ + + + + Train a non-recommended model? + + You picked {selected} instead of the + recommended {winnerModelType}. This is an + override and is recorded on the run. + + +
+ Reason (optional) + setOverrideReason(event.target.value)} + placeholder="e.g. domain seasonality outweighs the WAPE lead" + data-testid="override-reason-input" + /> +
+ + Cancel + + Train override + + +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx b/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx new file mode 100644 index 00000000..c5d53231 --- /dev/null +++ b/frontend/src/components/champion-selector/results/cancel-run-dialog.test.tsx @@ -0,0 +1,33 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { CancelRunDialog } from './cancel-run-dialog' + +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } +}) + +afterEach(cleanup) + +describe('CancelRunDialog', () => { + it('confirms cancellation via the AlertDialog', () => { + const onConfirm = vi.fn() + render() + fireEvent.click(screen.getByTestId('cancel-run-trigger')) + fireEvent.click(screen.getByTestId('cancel-run-confirm')) + expect(onConfirm).toHaveBeenCalledTimes(1) + }) + + it('disables the trigger while cancelling', () => { + render( {}} isCancelling />) + const trigger = screen.getByTestId('cancel-run-trigger') as HTMLButtonElement + expect(trigger.disabled).toBe(true) + }) +}) diff --git a/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx b/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx new file mode 100644 index 00000000..d85c08ca --- /dev/null +++ b/frontend/src/components/champion-selector/results/cancel-run-dialog.tsx @@ -0,0 +1,62 @@ +import { Loader2, X } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from '@/components/ui/alert-dialog' +import { Button } from '@/components/ui/button' + +interface CancelRunDialogProps { + onConfirm: () => void + isCancelling?: boolean + disabled?: boolean +} + +/** + * Cancel-run confirmation (Slice B). Mirrors the batch cancel dialog and reuses + * the honest pending-skip / running-yield copy. + */ +export function CancelRunDialog({ onConfirm, isCancelling, disabled }: CancelRunDialogProps) { + return ( + + + + + + + Cancel this comparison? + + Candidates that haven't started will be skipped. A candidate + already mid-fit stops at the next safe point — sklearn / LightGBM + fits are uncancellable mid-call, so an in-flight fit may finish + first. Results from candidates that already completed are kept. + + + + Keep running + + Cancel run + + + + + ) +} diff --git a/frontend/src/components/champion-selector/results/comparison-charts.test.tsx b/frontend/src/components/champion-selector/results/comparison-charts.test.tsx new file mode 100644 index 00000000..d1ea60bf --- /dev/null +++ b/frontend/src/components/champion-selector/results/comparison-charts.test.tsx @@ -0,0 +1,36 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ComparisonCharts } from './comparison-charts' +import type { ModelSelectionChartData } from '@/types/api' + +// Recharts' ResponsiveContainer needs ResizeObserver in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) +}) + +afterEach(cleanup) + +const chartData: ModelSelectionChartData = { + wape_by_model: { regression: 10, naive: 14 }, + bias_by_model: { regression: -0.2, naive: 0.5 }, + fold_stability: { regression: [10, 11] }, + winner_actual_vs_predicted: [ + { dates: ['2026-01-01', '2026-01-02'], actuals: [10, 12], predictions: [9.5, 12.5] }, + ], +} + +describe('ComparisonCharts', () => { + it('renders WAPE + bias bars from chart_data', () => { + render() + expect(screen.getByTestId('comparison-charts')).toBeTruthy() + expect(screen.getByTestId('metric-bars-wape-by-model')).toBeTruthy() + expect(screen.getByTestId('metric-bars-bias-by-model')).toBeTruthy() + // Winner is starred in the bar list. + expect(screen.getAllByText('★ regression').length).toBeGreaterThan(0) + }) +}) diff --git a/frontend/src/components/champion-selector/results/comparison-charts.tsx b/frontend/src/components/champion-selector/results/comparison-charts.tsx new file mode 100644 index 00000000..5e192a22 --- /dev/null +++ b/frontend/src/components/champion-selector/results/comparison-charts.tsx @@ -0,0 +1,105 @@ +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { MultiSeriesChart } from '@/components/charts/multi-series-chart' +import { BIAS_EXPLANATION } from '@/components/champion-selector/copy' +import type { ModelSelectionChartData } from '@/types/api' + +interface ComparisonChartsProps { + chartData: ModelSelectionChartData + winnerModelType?: string +} + +/** One labelled horizontal bar (CSS — deterministic, no chart lib needed). */ +function MetricBars({ + title, + byModel, + winnerModelType, + signed = false, +}: { + title: string + byModel: Record + winnerModelType?: string + signed?: boolean +}) { + const entries = Object.entries(byModel) + const max = Math.max(1, ...entries.map(([, v]) => Math.abs(v))) + return ( +
+

{title}

+ {entries.map(([model, value]) => ( +
+ + {model === winnerModelType ? `★ ${model}` : model} + +
+
+
+ {value.toFixed(2)} +
+ ))} +
+ ) +} + +/** + * Comparison charts (Slice B): WAPE-by-model + bias-by-model bars, and the + * winner's actual-vs-predicted overlay. Reads the backend `chart_data` payload. + */ +export function ComparisonCharts({ chartData, winnerModelType }: ComparisonChartsProps) { + // Build actual-vs-predicted rows for the winner from the fold chart points. + const avpRows: Record[] = [] + for (const fold of chartData.winner_actual_vs_predicted as Array<{ + dates?: string[] + actuals?: number[] + predictions?: number[] + }>) { + const dates = fold.dates ?? [] + const actuals = fold.actuals ?? [] + const predictions = fold.predictions ?? [] + for (let i = 0; i < dates.length; i++) { + avpRows.push({ + date: dates[i] ?? String(i), + actual: actuals[i] ?? 0, + predicted: predictions[i] ?? 0, + }) + } + } + + return ( + + + Comparison + {BIAS_EXPLANATION} + + +
+ + +
+ {avpRows.length > 0 && ( + + )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/constants.ts b/frontend/src/components/champion-selector/results/constants.ts new file mode 100644 index 00000000..41aa3bb2 --- /dev/null +++ b/frontend/src/components/champion-selector/results/constants.ts @@ -0,0 +1,17 @@ +import type { ModelSelectionStatus } from '@/types/api' + +/** + * Terminal selection-run statuses (Slice B). Polling stops once a run reaches + * one of these. Kept in a `.ts` module so the + * `react-refresh/only-export-components` lint rule never trips. + */ +export const TERMINAL_SELECTION_STATES: ReadonlySet = new Set([ + 'completed', + 'partial', + 'failed', + 'cancelled', +]) + +export function isTerminalSelectionStatus(status: ModelSelectionStatus): boolean { + return TERMINAL_SELECTION_STATES.has(status) +} diff --git a/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx b/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx new file mode 100644 index 00000000..83d90d1b --- /dev/null +++ b/frontend/src/components/champion-selector/results/model-detail-drawer.test.tsx @@ -0,0 +1,43 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ModelDetailDrawer } from './model-detail-drawer' +import type { ModelRankEntry } from '@/types/api' + +// Radix Dialog (Sheet) needs these layout APIs in jsdom. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } +}) + +afterEach(cleanup) + +const entry: ModelRankEntry = { + rank: 1, + model_type: 'regression', + params: { max_depth: 6 }, + included: true, + exclusion_reason: null, + metrics: { wape: 10, smape: 8, mae: 4, rmse: 5, bias: 0.1 }, +} + +describe('ModelDetailDrawer', () => { + it('renders the candidate metrics + params when open', () => { + render( {}} />) + const drawer = screen.getByTestId('model-detail-drawer') + expect(drawer.textContent).toContain('regression') + expect(drawer.textContent).toContain('WAPE') + expect(drawer.textContent).toContain('max_depth') + }) + + it('renders nothing meaningful when closed', () => { + render( {}} />) + expect(screen.queryByTestId('model-detail-drawer')).toBeNull() + }) +}) diff --git a/frontend/src/components/champion-selector/results/model-detail-drawer.tsx b/frontend/src/components/champion-selector/results/model-detail-drawer.tsx new file mode 100644 index 00000000..f7ac0148 --- /dev/null +++ b/frontend/src/components/champion-selector/results/model-detail-drawer.tsx @@ -0,0 +1,79 @@ +import { + Sheet, + SheetContent, + SheetDescription, + SheetHeader, + SheetTitle, +} from '@/components/ui/sheet' +import { Badge } from '@/components/ui/badge' +import type { ModelRankEntry } from '@/types/api' + +interface ModelDetailDrawerProps { + entry: ModelRankEntry | null + open: boolean + onOpenChange: (open: boolean) => void +} + +function fmt(value: number | undefined): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return '—' + return value.toFixed(3) +} + +const METRIC_KEYS: { key: string; label: string }[] = [ + { key: 'wape', label: 'WAPE' }, + { key: 'smape', label: 'sMAPE' }, + { key: 'mae', label: 'MAE' }, + { key: 'rmse', label: 'RMSE' }, + { key: 'bias', label: 'Bias' }, +] + +/** + * Per-model detail drawer (Slice B). Opens from a ranking-row click; shows one + * candidate's metrics, params, and exclusion reason (read-only). + */ +export function ModelDetailDrawer({ entry, open, onOpenChange }: ModelDetailDrawerProps) { + return ( + + + {entry && ( + <> + + + {entry.model_type} + {!entry.included && ( + {entry.exclusion_reason ?? 'excluded'} + )} + + + {entry.rank !== null ? `Ranked #${entry.rank}` : 'Not ranked'} + + +
+
+

Metrics

+ + + {METRIC_KEYS.map((m) => ( + + + + + ))} + +
{m.label} + {fmt(entry.metrics?.[m.key])} +
+
+
+

Parameters

+
+                  {JSON.stringify(entry.params, null, 2)}
+                
+
+
+ + )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/ranking-table.test.tsx b/frontend/src/components/champion-selector/results/ranking-table.test.tsx new file mode 100644 index 00000000..9943ff6b --- /dev/null +++ b/frontend/src/components/champion-selector/results/ranking-table.test.tsx @@ -0,0 +1,50 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { RankingTable } from './ranking-table' +import type { ModelRankEntry } from '@/types/api' + +afterEach(cleanup) + +const ranking: ModelRankEntry[] = [ + { + rank: 1, + model_type: 'regression', + params: {}, + included: true, + exclusion_reason: null, + metrics: { wape: 10, smape: 8, mae: 4, bias: 0.1 }, + }, + { + rank: 2, + model_type: 'naive', + params: {}, + included: true, + exclusion_reason: null, + metrics: { wape: 14, smape: 12, mae: 6, bias: 0.5 }, + }, + { + rank: null, + model_type: 'moving_average', + params: { window_size: 0 }, + included: false, + exclusion_reason: 'failed', + metrics: null, + }, +] + +describe('RankingTable', () => { + it('renders a row per entry; excluded rows show their reason', () => { + render( {}} />) + expect(screen.getByTestId('ranking-row-regression')).toBeTruthy() + expect(screen.getByTestId('ranking-row-naive')).toBeTruthy() + const excluded = screen.getByTestId('ranking-row-moving_average') + expect(excluded.textContent).toContain('failed') + }) + + it('calls onSelectModel with the clicked entry', () => { + const onSelect = vi.fn() + render() + fireEvent.click(screen.getByTestId('ranking-row-naive')) + expect(onSelect).toHaveBeenCalledWith(ranking[1]) + }) +}) diff --git a/frontend/src/components/champion-selector/results/ranking-table.tsx b/frontend/src/components/champion-selector/results/ranking-table.tsx new file mode 100644 index 00000000..a8c0515a --- /dev/null +++ b/frontend/src/components/champion-selector/results/ranking-table.tsx @@ -0,0 +1,90 @@ +import { Trophy } from 'lucide-react' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { cn } from '@/lib/utils' +import { RANKING_TIE_BREAK } from '@/components/champion-selector/copy' +import type { ModelRankEntry } from '@/types/api' + +interface RankingTableProps { + ranking: ModelRankEntry[] + onSelectModel: (entry: ModelRankEntry) => void +} + +function fmt(value: number | undefined): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return '—' + return value.toFixed(2) +} + +/** + * Candidate ranking table (Slice B). Winner row highlighted; excluded + * (failed/cancelled/filtered) rows show their reason and stay visible. Clicking + * a row opens the model-detail drawer. + */ +export function RankingTable({ ranking, onSelectModel }: RankingTableProps) { + return ( + + + Ranking + {RANKING_TIE_BREAK} + + + + + + + + + + + + + + + {ranking.map((entry) => ( + onSelectModel(entry)} + className={cn( + 'cursor-pointer border-t hover:bg-accent/50', + entry.rank === 1 && 'bg-primary/5 font-medium', + !entry.included && 'text-muted-foreground', + )} + > + + + + + + + + ))} + +
RankModelWAPEsMAPEMAEBias
+ {entry.rank === 1 ? ( + + 1 + + ) : ( + (entry.rank ?? '—') + )} + + {entry.model_type} + {!entry.included && ( + + {entry.exclusion_reason ?? 'excluded'} + + )} + + {fmt(entry.metrics?.['wape'])} + + {fmt(entry.metrics?.['smape'])} + + {fmt(entry.metrics?.['mae'])} + + {fmt(entry.metrics?.['bias'])} +
+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx b/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx new file mode 100644 index 00000000..13c4ef54 --- /dev/null +++ b/frontend/src/components/champion-selector/results/run-progress-panel.test.tsx @@ -0,0 +1,57 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { RunProgressPanel } from './run-progress-panel' +import type { CandidateProgress, SelectionProgress } from '@/types/api' + +afterEach(cleanup) + +const progress: SelectionProgress = { + total: 3, + pending: 1, + running: 1, + completed: 1, + failed: 0, + cancelled: 0, +} + +function cand(model_type: string, status: CandidateProgress['status']): CandidateProgress { + return { + candidate_id: `id-${model_type}`, + ordinal: 0, + model_type, + status, + error: status === 'failed' ? 'boom' : null, + started_at: null, + completed_at: null, + duration_ms: status === 'completed' ? 1500 : null, + } +} + +describe('RunProgressPanel', () => { + it('renders status badge, counts, and a per-candidate row', () => { + render( + , + ) + expect(screen.getByTestId('run-status-badge').textContent).toContain('running') + expect(screen.getByText('Total')).toBeTruthy() + expect(screen.getByTestId('candidate-row-naive')).toBeTruthy() + expect(screen.getByTestId('candidate-row-regression')).toBeTruthy() + }) + + it('keeps a failed candidate visible with its error', () => { + render( + , + ) + const row = screen.getByTestId('candidate-row-xgboost') + expect(row.textContent).toContain('failed') + expect(row.textContent).toContain('boom') + }) +}) diff --git a/frontend/src/components/champion-selector/results/run-progress-panel.tsx b/frontend/src/components/champion-selector/results/run-progress-panel.tsx new file mode 100644 index 00000000..4c5699a3 --- /dev/null +++ b/frontend/src/components/champion-selector/results/run-progress-panel.tsx @@ -0,0 +1,87 @@ +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { StatusBadge } from '@/components/common/status-badge' +import { getStatusVariant } from '@/lib/status-utils' +import type { + CandidateProgress, + ModelSelectionStatus, + SelectionProgress, +} from '@/types/api' + +interface RunProgressPanelProps { + status: ModelSelectionStatus + progress: SelectionProgress | null + candidates: CandidateProgress[] +} + +function Count({ label, value }: { label: string; value: number }) { + return ( +
+

{label}

+

{value}

+
+ ) +} + +/** + * Live async-run progress (Slice B): the run status, per-status counts, and a + * per-candidate table. Failed/cancelled candidates stay visible. + */ +export function RunProgressPanel({ status, progress, candidates }: RunProgressPanelProps) { + return ( + + +
+ Comparison progress + + {status} + +
+
+ + {progress && ( +
+ + + + + + +
+ )} + {candidates.length > 0 && ( + + + + + + + + + + {candidates.map((c) => ( + + + + + + ))} + +
ModelStatusDuration
{c.model_type} + + {c.status} + + {c.error && ( + {c.error} + )} + + {c.duration_ms === null ? '—' : `${(c.duration_ms / 1000).toFixed(1)}s`} +
+ )} +
+
+ ) +} diff --git a/frontend/src/components/champion-selector/results/winner-card.test.tsx b/frontend/src/components/champion-selector/results/winner-card.test.tsx new file mode 100644 index 00000000..54054253 --- /dev/null +++ b/frontend/src/components/champion-selector/results/winner-card.test.tsx @@ -0,0 +1,40 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { WinnerCard } from './winner-card' +import type { WinnerSummary } from '@/types/api' + +afterEach(cleanup) + +const winner: WinnerSummary = { + model_type: 'regression', + params: {}, + metrics: { wape: 10, smape: 8, mae: 4, bias: 0.1 }, + rank: 1, +} + +describe('WinnerCard', () => { + it('renders the winner, confidence, metrics, and bias copy', () => { + render() + expect(screen.getByTestId('winner-card').textContent).toContain('regression') + expect(screen.getByTestId('winner-confidence-badge').textContent).toContain('high') + expect(screen.getByText('clear lead')).toBeTruthy() + expect(screen.getByText(/Positive bias means the model under-forecasts/)).toBeTruthy() + }) + + it('renders a no-winner state when winner is null', () => { + render() + expect(screen.getByText('No champion selected')).toBeTruthy() + }) + + it('surfaces the deterministic business_summary headline read-only', () => { + render( + , + ) + expect(screen.getByText('regression wins by 28% WAPE')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/champion-selector/results/winner-card.tsx b/frontend/src/components/champion-selector/results/winner-card.tsx new file mode 100644 index 00000000..c5fa0b8a --- /dev/null +++ b/frontend/src/components/champion-selector/results/winner-card.tsx @@ -0,0 +1,100 @@ +import { Trophy } from 'lucide-react' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Badge } from '@/components/ui/badge' +import { StatusBadge } from '@/components/common/status-badge' +import { BIAS_EXPLANATION } from '@/components/champion-selector/copy' +import type { ConfidenceLevel, WinnerSummary } from '@/types/api' + +interface WinnerCardProps { + winner: WinnerSummary | null + confidence: ConfidenceLevel | null + reasons: string[] + /** The deterministic backend `business_summary` (read-only; Slice C extends). */ + businessSummary?: Record | null +} + +const CONFIDENCE_VARIANT: Record = { + high: 'success', + medium: 'info', + low: 'warning', +} + +function Metric({ label, value }: { label: string; value: number | undefined }) { + return ( +
+

{label}

+

+ {typeof value === 'number' && Number.isFinite(value) ? value.toFixed(2) : '—'} +

+
+ ) +} + +/** + * Winner summary card (Slice B). Null-safe — renders a "no winner" state for a + * failed/cancelled run. Renders the deterministic `business_summary` headline + * READ-ONLY (Slice C adds the decision-layer interpretation on top). + */ +export function WinnerCard({ winner, confidence, reasons, businessSummary }: WinnerCardProps) { + if (winner === null) { + return ( + + + No champion selected + + No candidate produced a valid backtest. Review the failed candidates + below or adjust the selection. + + + + ) + } + + const headline = + typeof businessSummary?.['headline'] === 'string' + ? (businessSummary['headline'] as string) + : null + + return ( + + +
+ + + {winner.model_type} + + {confidence && ( + + {confidence} confidence + + )} +
+ {headline && {headline}} +
+ +
+ + + + +
+ {reasons.length > 0 && ( +
+ {reasons.map((reason, i) => ( +
+ + why + + {reason} +
+ ))} +
+ )} +

{BIAS_EXPLANATION}

+
+
+ ) +} diff --git a/frontend/src/components/champion-selector/run-request.test.ts b/frontend/src/components/champion-selector/run-request.test.ts new file mode 100644 index 00000000..59f4ad0e --- /dev/null +++ b/frontend/src/components/champion-selector/run-request.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest' +import { assembleRunRequest } from './run-request' +import type { SplitConfig } from '@/types/api' + +const SPLIT: SplitConfig = { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 7, // intentionally stale — must be overridden to forecastHorizon +} + +describe('assembleRunRequest', () => { + it('pins auto_train_winner and auto_predict to false (Slice A invariant)', () => { + const req = assembleRunRequest({ + storeId: 7, + productId: 12, + startDate: '2026-01-01', + endDate: '2026-05-31', + forecastHorizon: 14, + rankingMetric: 'wape', + splitConfig: SPLIT, + selectedModels: ['naive', 'regression'], + }) + expect(req.auto_train_winner).toBe(false) + expect(req.auto_predict).toBe(false) + }) + + it('forces split_config.horizon === forecast_horizon', () => { + const req = assembleRunRequest({ + storeId: 1, + productId: 2, + startDate: '2026-01-01', + endDate: '2026-03-31', + forecastHorizon: 21, + rankingMetric: 'wape', + splitConfig: SPLIT, + selectedModels: ['naive'], + }) + expect(req.forecast_horizon).toBe(21) + expect(req.split_config.horizon).toBe(21) + }) + + it('maps selected model types into flat candidate configs and stays V1', () => { + const req = assembleRunRequest({ + storeId: 1, + productId: 2, + startDate: '2026-01-01', + endDate: '2026-03-31', + forecastHorizon: 14, + rankingMetric: 'smape', + splitConfig: SPLIT, + selectedModels: ['naive', 'seasonal_naive'], + }) + expect(req.candidate_models).toEqual([ + { model_type: 'naive', params: {} }, + { model_type: 'seasonal_naive', params: {} }, + ]) + expect(req.feature_frame_version).toBe(1) + expect(req.feature_groups).toBeNull() + expect(req.ranking_metric).toBe('smape') + }) +}) diff --git a/frontend/src/components/champion-selector/run-request.ts b/frontend/src/components/champion-selector/run-request.ts new file mode 100644 index 00000000..253da365 --- /dev/null +++ b/frontend/src/components/champion-selector/run-request.ts @@ -0,0 +1,50 @@ +import type { + ModelSelectionRunRequest, + RankingMetric, + SplitConfig, +} from '@/types/api' + +export interface AssembleRunRequestInput { + storeId: number + productId: number + startDate: string // YYYY-MM-DD + endDate: string // YYYY-MM-DD + forecastHorizon: number + rankingMetric: RankingMetric + splitConfig: SplitConfig + selectedModels: string[] +} + +/** + * Assemble the typed `ModelSelectionRunRequest` from the Champion Selector + * form state. Pure + side-effect-free so it can be unit-tested. + * + * Slice A pins `auto_train_winner` and `auto_predict` to `false`: the async run + * path (Slice B) treats both as NO-OPS, and Slice C owns explicit + * train/predict. `split_config.horizon` is forced equal to `forecast_horizon` + * (the backend `ModelSelectionRunRequest` validator requires it). The request + * is assembled but NOT sent in Slice A — the "Run comparison" CTA is disabled. + */ +export function assembleRunRequest( + input: AssembleRunRequestInput, +): ModelSelectionRunRequest { + return { + store_id: input.storeId, + product_id: input.productId, + selection_window: { + start_date: input.startDate, + end_date: input.endDate, + }, + forecast_horizon: input.forecastHorizon, + ranking_metric: input.rankingMetric, + split_config: { ...input.splitConfig, horizon: input.forecastHorizon }, + candidate_models: input.selectedModels.map((model_type) => ({ + model_type, + params: {}, + })), + feature_frame_version: 1, + feature_groups: null, + auto_train_winner: false, + auto_predict: false, + } +} diff --git a/frontend/src/components/champion-selector/searchable-entity-select.test.tsx b/frontend/src/components/champion-selector/searchable-entity-select.test.tsx new file mode 100644 index 00000000..99b476a7 --- /dev/null +++ b/frontend/src/components/champion-selector/searchable-entity-select.test.tsx @@ -0,0 +1,78 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { SearchableEntitySelect, type SearchableEntityItem } from './searchable-entity-select' + +// Radix Popover positions its content with Popper, which needs ResizeObserver +// + a couple of layout APIs jsdom lacks. Polyfill them locally (the repo has no +// vitest setup file) so the popover can open in the test environment. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +afterEach(cleanup) + +const ITEMS: SearchableEntityItem[] = [ + { id: 7, primary: 'S001 · Downtown', secondary: 'North' }, + { id: 12, primary: 'S002 · Airport', secondary: 'West' }, + { id: 99, primary: 'S003 · Suburb', secondary: 'East' }, +] + +describe('SearchableEntitySelect', () => { + it('shows the placeholder when nothing is selected', () => { + render( + {}} + placeholder="Pick a store…" + />, + ) + expect(screen.getByText('Pick a store…')).toBeTruthy() + }) + + it('filters the list client-side and selects an option on click', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('searchable-entity-select')) + + // All three options visible before filtering. + expect(screen.getByTestId('searchable-entity-select-option-7')).toBeTruthy() + expect(screen.getByTestId('searchable-entity-select-option-12')).toBeTruthy() + expect(screen.getByTestId('searchable-entity-select-option-99')).toBeTruthy() + + // Filter narrows to the Airport row (matches the primary text). + fireEvent.change(screen.getByTestId('searchable-entity-select-filter'), { + target: { value: 'airport' }, + }) + expect(screen.queryByTestId('searchable-entity-select-option-7')).toBeNull() + expect(screen.getByTestId('searchable-entity-select-option-12')).toBeTruthy() + + fireEvent.click(screen.getByTestId('searchable-entity-select-option-12')) + expect(onChange).toHaveBeenCalledWith(12) + }) + + it('filters on the secondary descriptor too', () => { + render( + {}} />, + ) + fireEvent.click(screen.getByTestId('searchable-entity-select')) + fireEvent.change(screen.getByTestId('searchable-entity-select-filter'), { + target: { value: 'east' }, + }) + expect(screen.getByTestId('searchable-entity-select-option-99')).toBeTruthy() + expect(screen.queryByTestId('searchable-entity-select-option-7')).toBeNull() + }) +}) diff --git a/frontend/src/components/champion-selector/searchable-entity-select.tsx b/frontend/src/components/champion-selector/searchable-entity-select.tsx new file mode 100644 index 00000000..f4dcf51b --- /dev/null +++ b/frontend/src/components/champion-selector/searchable-entity-select.tsx @@ -0,0 +1,144 @@ +import { useState } from 'react' +import { Check, ChevronsUpDown, Search } from 'lucide-react' +import { cn } from '@/lib/utils' +import { Button } from '@/components/ui/button' +import { Input } from '@/components/ui/input' +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/components/ui/popover' + +export interface SearchableEntityItem { + id: number + primary: string + secondary?: string +} + +interface SearchableEntitySelectProps { + items: SearchableEntityItem[] + value: number | null + onChange: (id: number) => void + placeholder?: string + loading?: boolean + emptyLabel?: string + /** Forwarded to the trigger button + filter input for scoped test queries. */ + testId?: string +} + +/** + * A combobox built from existing primitives (Popover + Input + a filtered + * ` + + +
+ + setFilter(event.target.value)} + placeholder="Filter…" + data-testid={`${testId}-filter`} + className="h-8 border-0 px-0 shadow-none focus-visible:ring-0" + /> +
+
+ {filtered.length === 0 ? ( +

+ {emptyLabel} +

+ ) : ( + filtered.map((item) => ( + + )) + )} +
+
+ + ) +} diff --git a/frontend/src/components/champion-selector/split-config.ts b/frontend/src/components/champion-selector/split-config.ts new file mode 100644 index 00000000..ecc98f35 --- /dev/null +++ b/frontend/src/components/champion-selector/split-config.ts @@ -0,0 +1,24 @@ +import type { SplitConfig } from '@/types/api' + +/** + * Inline-validate a `SplitConfig` against the backend SplitConfig bounds + * (`app/features/backtesting/schemas.py`). Kept in a `.ts` module (not the + * form `.tsx`) so the `react-refresh/only-export-components` lint rule stays + * happy. Returns a list of human-facing error strings (empty = valid). + */ +export function splitConfigErrors(config: SplitConfig): string[] { + const errors: string[] = [] + if (config.n_splits < 2 || config.n_splits > 20) { + errors.push('Splits must be between 2 and 20.') + } + if (config.min_train_size < 7) { + errors.push('Minimum train size must be at least 7 days.') + } + if (config.gap < 0 || config.gap > 30) { + errors.push('Gap must be between 0 and 30 days.') + } + if (config.gap >= config.horizon) { + errors.push('Gap must be smaller than the horizon.') + } + return errors +} diff --git a/frontend/src/components/demo/RunHistoryStrip.tsx b/frontend/src/components/demo/RunHistoryStrip.tsx index 5605879c..fce287ba 100644 --- a/frontend/src/components/demo/RunHistoryStrip.tsx +++ b/frontend/src/components/demo/RunHistoryStrip.tsx @@ -63,26 +63,35 @@ export function RunHistoryStrip({ onReplay, summary, scenario }: RunHistoryStrip const [items, setItems] = useState(() => loadHistory()) const [lastSummary, setLastSummary] = useState(null) - useEffect(() => { - if (!summary || summary === lastSummary) return - // Persist exactly once per pipeline_complete summary (R18). - const entry: RunHistoryItem = { - id: crypto.randomUUID(), - runId: summary.winningRunId, - timestamp: new Date().toISOString(), - scenario, - status: summary.overallStatus, - wallClockS: summary.wallClockS, - } - const next = [entry, ...items].slice(0, HISTORY_CAP) - setItems(next) - saveHistory(next) + // Append exactly once per pipeline_complete summary (R18). Done DURING render + // (the React "storing information from previous renders" pattern) rather than + // in an effect — calling setState synchronously inside an effect body causes + // cascading renders and is flagged by react-hooks/set-state-in-effect. + if (summary && summary !== lastSummary) { setLastSummary(summary) - }, [summary, lastSummary, items, scenario]) + setItems((prev) => + [ + { + id: crypto.randomUUID(), + runId: summary.winningRunId, + timestamp: new Date().toISOString(), + scenario, + status: summary.overallStatus, + wallClockS: summary.wallClockS, + }, + ...prev, + ].slice(0, HISTORY_CAP), + ) + } + + // Persist the history to localStorage whenever it changes — syncing React + // state to an external system is the sanctioned use of an effect. + useEffect(() => { + saveHistory(items) + }, [items]) const clear = useCallback(() => { setItems([]) - saveHistory([]) }, []) if (items.length === 0) return null diff --git a/frontend/src/hooks/index.ts b/frontend/src/hooks/index.ts index 1c47074d..eebde40d 100644 --- a/frontend/src/hooks/index.ts +++ b/frontend/src/hooks/index.ts @@ -7,6 +7,7 @@ export * from './use-inventory' export * from './use-lifecycle-curve' export * from './use-runs' export * from './use-jobs' +export * from './use-model-selection' export * from './use-ops' export * from './use-scenarios' export * from './use-rag-sources' diff --git a/frontend/src/hooks/use-model-selection.test.ts b/frontend/src/hooks/use-model-selection.test.ts new file mode 100644 index 00000000..5074351b --- /dev/null +++ b/frontend/src/hooks/use-model-selection.test.ts @@ -0,0 +1,364 @@ +/** + * Unit tests for the model-selection query hooks (Champion Selector, Slice A). + * + * Stubs `fetch` to assert the catalog + availability GET URLs and the + * availability `enabled` gating. No real backend is exercised. + */ +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { act, renderHook, waitFor } from '@testing-library/react' +import { afterEach, describe, expect, it, vi } from 'vitest' +import { createElement, type ReactNode } from 'react' + +import { + useCancelSelectionRun, + useModelCatalog, + usePairAvailability, + usePredictWinner, + usePromoteChampion, + useSelectionRun, + useSubmitSelectionRun, + useTrainSelected, + useTrainWinner, +} from './use-model-selection' +import type { + ModelCatalogResponse, + ModelSelectionRunRequest, + PairAvailability, + SubmitRunResponse, +} from '@/types/api' + +function makeWrapper(client: QueryClient) { + return function Wrapper({ children }: { children: ReactNode }) { + return createElement(QueryClientProvider, { client }, children) + } +} + +function makeClient() { + return new QueryClient({ defaultOptions: { queries: { retry: false } } }) +} + +const CATALOG: ModelCatalogResponse = { + models: [ + { + model_type: 'naive', + label: 'Naive', + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: 'Repeats the last observed value.', + }, + ], + default_candidate_model_types: ['naive', 'seasonal_naive', 'moving_average'], +} + +const AVAILABILITY: PairAvailability = { + store_id: 7, + product_id: 12, + first_sales_date: '2026-01-01', + last_sales_date: '2026-05-31', + observed_days: 150, + expected_calendar_days: 151, + coverage_ratio: 0.99, + missing_days: 1, + zero_sale_days: 4, + promotion_days: 3, + average_daily_demand: 9.2, + status: 'ready', + recommended_split_config: { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, + }, + warnings: [], +} + +afterEach(() => { + vi.unstubAllGlobals() +}) + +describe('useModelCatalog', () => { + it('GETs /model-selection/models and returns the parsed catalog', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(CATALOG), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + + const { result } = renderHook(() => useModelCatalog(), { + wrapper: makeWrapper(makeClient()), + }) + + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + expect(fetchMock).toHaveBeenCalledTimes(1) + expect(fetchMock.mock.calls[0]![0]).toContain('/model-selection/models') + expect(result.current.data?.models[0]?.model_type).toBe('naive') + }) +}) + +describe('usePairAvailability', () => { + it('GETs /model-selection/availability with the three query params', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(AVAILABILITY), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + + const { result } = renderHook( + () => usePairAvailability({ storeId: 7, productId: 12, forecastHorizon: 14 }), + { wrapper: makeWrapper(makeClient()) }, + ) + + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const url = String(fetchMock.mock.calls[0]![0]) + expect(url).toContain('/model-selection/availability') + expect(url).toContain('store_id=7') + expect(url).toContain('product_id=12') + expect(url).toContain('forecast_horizon=14') + expect(result.current.data?.status).toBe('ready') + }) + + it('does NOT fetch while the pair is incomplete (enabled gating)', async () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + + renderHook( + () => usePairAvailability({ storeId: null, productId: 12, forecastHorizon: 14 }), + { wrapper: makeWrapper(makeClient()) }, + ) + + // Give TanStack a tick; the disabled query must never call fetch. + await new Promise((resolve) => setTimeout(resolve, 20)) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) + +// --------------------------------------------------------------------- Slice B + +const SUBMIT_RESPONSE: SubmitRunResponse = { + selection_id: 'sel_b', + store_id: 7, + product_id: 12, + status: 'running', + selection_window: { start_date: '2026-01-01', end_date: '2026-05-31' }, + forecast_horizon: 14, + ranking_metric: 'wape', + availability: null, + ranking: [], + winner: null, + recommendation_confidence: null, + confidence_reasons: [], + chart_data: null, + final_model: null, + forecast: null, + business_summary: null, + error_message: null, + created_at: '2026-06-01T12:00:00Z', + started_at: '2026-06-01T12:00:00Z', + completed_at: null, + progress: { total: 1, pending: 1, running: 0, completed: 0, failed: 0, cancelled: 0 }, + candidate_progress: [ + { + candidate_id: 'c0', + ordinal: 0, + model_type: 'naive', + status: 'pending', + error: null, + started_at: null, + completed_at: null, + duration_ms: null, + }, + ], + monitor_url: '/model-selection/sel_b', + cancel_url: '/model-selection/sel_b', +} + +const RUN_REQUEST: ModelSelectionRunRequest = { + store_id: 7, + product_id: 12, + selection_window: { start_date: '2026-01-01', end_date: '2026-05-31' }, + forecast_horizon: 14, + ranking_metric: 'wape', + split_config: { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: 14, + }, + candidate_models: [{ model_type: 'naive', params: {} }], + feature_frame_version: 1, + feature_groups: null, + auto_train_winner: false, + auto_predict: false, +} + +describe('useSubmitSelectionRun', () => { + it('POSTs to /model-selection/runs and seeds the poll cache', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(SUBMIT_RESPONSE), { + status: 202, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const client = makeClient() + const { result } = renderHook(() => useSubmitSelectionRun(), { + wrapper: makeWrapper(client), + }) + await act(async () => { + result.current.mutate(RUN_REQUEST) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/runs') + expect((call[1] as RequestInit).method).toBe('POST') + // The poll cache is seeded so useSelectionRun starts warm. + expect( + client.getQueryData(['model-selection', 'run', 'sel_b']), + ).toEqual(SUBMIT_RESPONSE) + }) +}) + +describe('useSelectionRun', () => { + it('GETs /model-selection/{id} when given a selection id', async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ ...SUBMIT_RESPONSE, status: 'completed' }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useSelectionRun('sel_b'), { + wrapper: makeWrapper(makeClient()), + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + expect(String(fetchMock.mock.calls[0]![0])).toContain('/model-selection/sel_b') + expect(result.current.data?.status).toBe('completed') + }) + + it('does NOT fetch without a selection id (enabled gating)', async () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + renderHook(() => useSelectionRun(null), { wrapper: makeWrapper(makeClient()) }) + await new Promise((resolve) => setTimeout(resolve, 20)) + expect(fetchMock).not.toHaveBeenCalled() + }) +}) + +describe('useCancelSelectionRun', () => { + it('DELETEs /model-selection/{id}', async () => { + const cancelled = { ...SUBMIT_RESPONSE, status: 'cancelled' as const } + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(cancelled), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useCancelSelectionRun(), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate('sel_b') + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_b') + expect((call[1] as RequestInit).method).toBe('DELETE') + }) +}) + +// --------------------------------------------------------------- Slice C hooks + +function jsonResponse(body: unknown) { + return new Response(JSON.stringify(body), { + status: 200, + headers: { 'content-type': 'application/json' }, + }) +} + +describe('useTrainWinner', () => { + it('POSTs /train-winner (no body) and invalidates the run query', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', model_type: 'naive', model_path: 'p', is_override: false, override_warning: null }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useTrainWinner('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate() + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/train-winner') + expect((call[1] as RequestInit).method).toBe('POST') + }) +}) + +describe('useTrainSelected', () => { + it('POSTs /train-selected with the override body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', model_type: 'seasonal_naive', model_path: 'p', is_override: true, override_warning: 'w' }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => useTrainSelected('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ model_type: 'seasonal_naive', override_reason: 'domain' }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/train-selected') + expect((call[1] as RequestInit).method).toBe('POST') + expect(String((call[1] as RequestInit).body)).toContain('seasonal_naive') + }) +}) + +describe('usePredictWinner', () => { + it('POSTs /predict with the decision params body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', forecast: { points: [], total_demand: 0, average_demand: 0, horizon: 14 }, decision: null }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => usePredictWinner('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ lead_time_days: 7, service_level: 0.95 }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/predict') + expect((call[1] as RequestInit).method).toBe('POST') + }) +}) + +describe('usePromoteChampion', () => { + it('POSTs /promote with the promote body', async () => { + const fetchMock = vi.fn().mockResolvedValue( + jsonResponse({ selection_id: 'sel_c', alias_name: 'champion-x', run_id: 'r', run_status: 'success', model_type: 'naive', is_override: false, promoted_at: '2026-06-01T00:00:00Z' }), + ) + vi.stubGlobal('fetch', fetchMock) + const { result } = renderHook(() => usePromoteChampion('sel_c'), { + wrapper: makeWrapper(makeClient()), + }) + await act(async () => { + result.current.mutate({ alias_name: 'champion-x', approved_by: 'gabor' }) + }) + await waitFor(() => expect(result.current.isSuccess).toBe(true)) + const call = fetchMock.mock.calls[0]! + expect(String(call[0])).toContain('/model-selection/sel_c/promote') + expect((call[1] as RequestInit).method).toBe('POST') + expect(String((call[1] as RequestInit).body)).toContain('champion-x') + }) +}) diff --git a/frontend/src/hooks/use-model-selection.ts b/frontend/src/hooks/use-model-selection.ts new file mode 100644 index 00000000..bc861a1b --- /dev/null +++ b/frontend/src/hooks/use-model-selection.ts @@ -0,0 +1,190 @@ +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' +import { api } from '@/lib/api' +import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' +import type { + ForecastDecisionParams, + ModelCatalogResponse, + ModelSelectionRunRequest, + ModelSelectionRunResponse, + PairAvailability, + PredictWinnerResponse, + PromoteRequest, + PromoteResponse, + SubmitRunResponse, + TrainSelectedRequest, + TrainWinnerResponse, +} from '@/types/api' + +/** + * Model-selection query hooks (Champion Selector). + * + * Slice A: catalog + availability GETs. Slice B: async submit / poll / cancel. + * Slice C: train (winner / override) / predict (decision) / promote. + */ + +/** + * Fetch the backend-owned candidate-model capability catalog. + * + * The catalog is static, so it is cached aggressively (no refetch churn). + */ +export function useModelCatalog() { + return useQuery({ + queryKey: ['model-selection', 'models'], + queryFn: () => api('/model-selection/models'), + staleTime: 1000 * 60 * 60, // 1h — the catalog rarely changes within a session + }) +} + +interface UsePairAvailabilityParams { + storeId: number | null + productId: number | null + forecastHorizon: number + enabled?: boolean +} + +/** + * Assess data availability for a (store, product) pair at a given horizon. + * + * Gated like `useStore`: only fires once a real pair is chosen. `storeId` / + * `productId` are nullable so the page can pass its raw selection state without + * coercing un-selected values to a bogus `0`/`1`. + */ +export function usePairAvailability({ + storeId, + productId, + forecastHorizon, + enabled = true, +}: UsePairAvailabilityParams) { + return useQuery({ + queryKey: ['model-selection', 'availability', storeId, productId, forecastHorizon], + queryFn: () => + api('/model-selection/availability', { + params: { + store_id: storeId, + product_id: productId, + forecast_horizon: forecastHorizon, + }, + }), + enabled: enabled && !!storeId && storeId > 0 && !!productId && productId > 0, + }) +} + +/** + * Submit an async selection run (Slice B). `POST /model-selection/runs` returns + * 202 immediately; we seed the poll cache so `useSelectionRun` starts warm. + */ +export function useSubmitSelectionRun() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (request: ModelSelectionRunRequest) => + api('/model-selection/runs', { + method: 'POST', + body: request, + }), + onSuccess: (data) => { + queryClient.setQueryData(['model-selection', 'run', data.selection_id], data) + }, + }) +} + +/** + * Poll one selection run. Refetches every 2s while pending/running, then stops + * once the run reaches a terminal status. Gated on a real selection id. + */ +export function useSelectionRun(selectionId: string | null, enabled = true) { + return useQuery({ + queryKey: ['model-selection', 'run', selectionId], + queryFn: () => + api(`/model-selection/${selectionId}`), + enabled: enabled && !!selectionId, + refetchInterval: (query) => { + const status = query.state.data?.status + return status && isTerminalSelectionStatus(status) ? false : 2000 + }, + }) +} + +/** + * Cancel an in-flight selection run (Slice B). `DELETE /model-selection/{id}` — + * 200 settled / 404 / 409 terminal / 504 drain timeout. Seeds + invalidates the + * poll query on success. + */ +export function useCancelSelectionRun() { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (selectionId: string) => + api(`/model-selection/${selectionId}`, { + method: 'DELETE', + }), + onSuccess: (data) => { + queryClient.setQueryData(['model-selection', 'run', data.selection_id], data) + void queryClient.invalidateQueries({ + queryKey: ['model-selection', 'run', data.selection_id], + }) + }, + }) +} + +/** + * Invalidate the polled run query so a terminal run re-fetches the new + * `final_model_path` / `forecast` / promotion after a Slice C mutation. + */ +function invalidateRun( + queryClient: ReturnType, + selectionId: string, +) { + void queryClient.invalidateQueries({ + queryKey: ['model-selection', 'run', selectionId], + }) +} + +/** Train the ranked winner (`POST /{id}/train-winner`, no body). */ +export function useTrainWinner(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: () => + api(`/model-selection/${selectionId}/train-winner`, { + method: 'POST', + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Train a user-chosen candidate (`POST /{id}/train-selected`, override). */ +export function useTrainSelected(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: TrainSelectedRequest) => + api(`/model-selection/${selectionId}/train-selected`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Forecast with the trained model + decision (`POST /{id}/predict`). */ +export function usePredictWinner(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: ForecastDecisionParams) => + api(`/model-selection/${selectionId}/predict`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} + +/** Promote the trained champion to a registry alias (`POST /{id}/promote`). */ +export function usePromoteChampion(selectionId: string) { + const queryClient = useQueryClient() + return useMutation({ + mutationFn: (body: PromoteRequest) => + api(`/model-selection/${selectionId}/promote`, { + method: 'POST', + body, + }), + onSuccess: () => invalidateRun(queryClient, selectionId), + }) +} diff --git a/frontend/src/lib/approval-report.test.ts b/frontend/src/lib/approval-report.test.ts new file mode 100644 index 00000000..f94f04d9 --- /dev/null +++ b/frontend/src/lib/approval-report.test.ts @@ -0,0 +1,67 @@ +import { describe, it, expect } from 'vitest' +import { formatApprovalReport } from './approval-report' +import type { ApprovalResponse } from '@/types/api' + +describe('formatApprovalReport', () => { + it('reports a successful execution', () => { + const res: ApprovalResponse = { + action_id: 'a1', + approved: true, + status: 'executed', + result: { alias_name: 'champion' }, + } + const msg = formatApprovalReport('create_alias', res) + expect(msg).toContain('✅') + expect(msg).toContain('create_alias') + expect(msg).toContain('executed successfully') + }) + + it('reports an approved-but-failed execution with the error cause', () => { + // The backend marks a failed execution `rejected` with the cause in result.error. + const res: ApprovalResponse = { + action_id: 'a2', + approved: true, + status: 'rejected', + result: { error: 'Run not found: 3c5d', error_type: 'ValueError' }, + } + const msg = formatApprovalReport('create_alias', res) + expect(msg).toContain('❌') + expect(msg).toContain('could not be executed') + expect(msg).toContain('Run not found: 3c5d') + }) + + it('reports an operator rejection (no execution)', () => { + const res: ApprovalResponse = { + action_id: 'a3', + approved: false, + status: 'rejected', + result: null, + } + const msg = formatApprovalReport('archive_run', res) + expect(msg).toContain('🚫') + expect(msg).toContain('Rejected') + expect(msg).toContain('No action was taken') + }) + + it('reports an expired approval', () => { + const res: ApprovalResponse = { + action_id: 'a4', + approved: true, + status: 'expired', + result: null, + } + const msg = formatApprovalReport('save_scenario', res) + expect(msg).toContain('⏰') + expect(msg).toContain('expired') + }) + + it('does not throw on a non-object result', () => { + const res: ApprovalResponse = { + action_id: 'a5', + approved: true, + status: 'executed', + result: 'ok', + } + expect(() => formatApprovalReport('create_alias', res)).not.toThrow() + }) +}) diff --git a/frontend/src/lib/approval-report.ts b/frontend/src/lib/approval-report.ts new file mode 100644 index 00000000..f041157e --- /dev/null +++ b/frontend/src/lib/approval-report.ts @@ -0,0 +1,44 @@ +import type { ApprovalResponse } from '@/types/api' + +/** + * Build a human-readable chat report for an approved/rejected agent action. + * + * The backend's `POST /approve` returns an {@link ApprovalResponse} for every + * outcome, but the chat UI previously discarded it — so a click produced no + * visible result ("nothing returned"). This formats a one-line report for ALL + * outcomes so the operator always sees what happened: + * + * - `executed` → the action ran successfully. + * - approved but `rejected` + error → the action was approved but execution + * failed (the backend marks a failed execution `rejected` and puts the cause + * in `result.error`). + * - `rejected` (not approved) → the operator rejected the action. + * - `expired` → the approval window lapsed before it ran. + * + * @param actionLabel - The gated action name (e.g. `create_alias`). + * @param res - The approval response from the backend. + * @returns A markdown-ish one-line report for the chat transcript. + */ +export function formatApprovalReport(actionLabel: string, res: ApprovalResponse): string { + const result = + res.result && typeof res.result === 'object' + ? (res.result as Record) + : undefined + const errorDetail = + result && 'error' in result ? String(result.error) : undefined + + if (res.status === 'executed') { + return `✅ Approved — \`${actionLabel}\` executed successfully.` + } + if (res.approved && errorDetail) { + return `❌ Approved, but \`${actionLabel}\` could not be executed: ${errorDetail}` + } + if (!res.approved) { + return `🚫 Rejected \`${actionLabel}\`. No action was taken.` + } + if (res.status === 'expired') { + return `⏰ The \`${actionLabel}\` approval expired before it could run.` + } + // Defensive fallback: approved, not executed, no error detail. + return `\`${actionLabel}\` finished with status: ${res.status}.` +} diff --git a/frontend/src/lib/constants.ts b/frontend/src/lib/constants.ts index 6a6de39f..95cb28b8 100644 --- a/frontend/src/lib/constants.ts +++ b/frontend/src/lib/constants.ts @@ -25,6 +25,7 @@ export const ROUTES = { DEMAND: '/visualize/demand', PLANNER: '/visualize/planner', BATCH: '/visualize/batch', + CHAMPION: '/visualize/champion', }, KNOWLEDGE: '/knowledge', CHAT: '/chat', @@ -55,6 +56,7 @@ export const NAV_ITEMS = [ { label: 'Forecast', href: ROUTES.VISUALIZE.FORECAST }, { label: 'Backtest Results', href: ROUTES.VISUALIZE.BACKTEST }, { label: 'Batch Runner', href: ROUTES.VISUALIZE.BATCH }, + { label: 'Champion Selector', href: ROUTES.VISUALIZE.CHAMPION }, ], }, { label: 'Knowledge', href: ROUTES.KNOWLEDGE }, diff --git a/frontend/src/pages/chat.tsx b/frontend/src/pages/chat.tsx index cc22a9d5..6bbaaeb6 100644 --- a/frontend/src/pages/chat.tsx +++ b/frontend/src/pages/chat.tsx @@ -16,8 +16,15 @@ import { SelectValue, } from '@/components/ui/select' import { api } from '@/lib/api' +import { formatApprovalReport } from '@/lib/approval-report' import { WS_URL, ROUTES } from '@/lib/constants' -import type { ChatMessage as ChatMessageType, AgentStreamEvent, AgentType, AgentSession } from '@/types/api' +import type { + ChatMessage as ChatMessageType, + AgentStreamEvent, + AgentType, + AgentSession, + ApprovalResponse, +} from '@/types/api' export default function ChatPage() { const [sessionId, setSessionId] = useState(null) @@ -142,38 +149,42 @@ export default function ChatPage() { send({ session_id: sessionId, message: content }) } - const handleApprove = async () => { - if (!sessionId || !pendingAction?.actionId) return - setIsApproving(true) - try { - await api(`/agents/sessions/${sessionId}/approve`, { - method: 'POST', - body: { action_id: pendingAction.actionId, approved: true }, - }) - setPendingAction(null) - } catch (error) { - console.error('Failed to approve:', error) - } finally { - setIsApproving(false) - } + const appendAssistantMessage = (content: string) => { + setMessages((prev) => [ + ...prev, + { role: 'assistant', content, timestamp: new Date().toISOString() }, + ]) } - const handleReject = async () => { + // Approve or reject a pending action, then ALWAYS surface the execution + // report — for every outcome (executed / failed / rejected / expired). The + // handlers previously discarded the /approve response, so a click left the + // user with no feedback ("nothing returned"). + const decideAction = async (approved: boolean) => { if (!sessionId || !pendingAction?.actionId) return + const actionLabel = pendingAction.action setIsApproving(true) try { - await api(`/agents/sessions/${sessionId}/approve`, { + const res = await api(`/agents/sessions/${sessionId}/approve`, { method: 'POST', - body: { action_id: pendingAction.actionId, approved: false }, + body: { action_id: pendingAction.actionId, approved }, }) setPendingAction(null) + appendAssistantMessage(formatApprovalReport(actionLabel, res)) } catch (error) { - console.error('Failed to reject:', error) + console.error(approved ? 'Failed to approve:' : 'Failed to reject:', error) + setPendingAction(null) + const verb = approved ? 'approve' : 'reject' + const detail = error instanceof Error ? error.message : 'request failed' + appendAssistantMessage(`Error: could not ${verb} \`${actionLabel}\` — ${detail}`) } finally { setIsApproving(false) } } + const handleApprove = () => decideAction(true) + const handleReject = () => decideAction(false) + const handleNewSession = () => { setSessionId(null) setMessages([]) diff --git a/frontend/src/pages/visualize/champion.test.tsx b/frontend/src/pages/visualize/champion.test.tsx new file mode 100644 index 00000000..2ae297ca --- /dev/null +++ b/frontend/src/pages/visualize/champion.test.tsx @@ -0,0 +1,122 @@ +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest' +import { cleanup, render, screen, waitFor } from '@testing-library/react' +import type { ModelCatalogResponse } from '@/types/api' + +// Radix primitives need a couple of layout APIs jsdom lacks. +beforeAll(() => { + class ResizeObserverStub { + observe() {} + unobserve() {} + disconnect() {} + } + vi.stubGlobal('ResizeObserver', ResizeObserverStub) + if (!Element.prototype.hasPointerCapture) { + Element.prototype.hasPointerCapture = () => false + } + if (!Element.prototype.scrollIntoView) { + Element.prototype.scrollIntoView = () => {} + } +}) + +const CATALOG: ModelCatalogResponse = { + models: [ + { + model_type: 'naive', + label: 'Naive', + family: 'baseline', + feature_aware: false, + requires_extra: false, + default_params: {}, + supports_auto_predict: true, + description: 'Repeats the last observed value.', + }, + { + model_type: 'regression', + label: 'Gradient Boosting Regression', + family: 'tree', + feature_aware: true, + requires_extra: false, + default_params: {}, + supports_auto_predict: false, + description: 'Histogram gradient boosting.', + }, + ], + default_candidate_model_types: ['naive', 'regression'], +} + +vi.mock('@/hooks/use-stores', () => ({ + useStores: () => ({ + data: { stores: [{ id: 7, code: 'S001', name: 'Downtown', region: 'North', store_type: 'flagship' }] }, + isLoading: false, + }), +})) +vi.mock('@/hooks/use-products', () => ({ + useProducts: () => ({ + data: { products: [{ id: 12, sku: 'SKU1', name: 'Widget', category: 'tools' }] }, + isLoading: false, + }), +})) +vi.mock('@/hooks/use-model-selection', () => ({ + useModelCatalog: () => ({ + data: CATALOG, + isLoading: false, + isError: false, + error: null, + refetch: () => {}, + }), + usePairAvailability: () => ({ + data: undefined, + isLoading: false, + isError: false, + }), + // Slice B — inert async hooks (no run in progress for the shell test). + useSubmitSelectionRun: () => ({ mutate: vi.fn(), isPending: false }), + useCancelSelectionRun: () => ({ mutate: vi.fn(), isPending: false }), + useSelectionRun: () => ({ data: undefined, isLoading: false, isError: false }), +})) + +import ChampionSelectorPage from './champion' + +afterEach(cleanup) + +describe('ChampionSelectorPage', () => { + it('renders the selection shell', () => { + render() + expect(screen.getByText('Champion Selector')).toBeTruthy() + expect(screen.getByText('1 · Pick a store & product')).toBeTruthy() + expect(screen.getByText('2 · Data availability')).toBeTruthy() + expect(screen.getByText('3 · Candidate models')).toBeTruthy() + expect(screen.getByText('4 · Backtest settings')).toBeTruthy() + }) + + it('drives candidate cards from the backend catalog', () => { + render() + expect(screen.getByTestId('candidate-model-naive')).toBeTruthy() + expect(screen.getByTestId('candidate-model-regression')).toBeTruthy() + }) + + it('pre-selects the catalog default candidate models', async () => { + render() + // The seeding effect selects the default two models. + await waitFor(() => + expect(screen.getByText('2 of 10 selected')).toBeTruthy(), + ) + }) + + it('renders the availability empty state until a pair is chosen', () => { + render() + expect(screen.getByText('Pick a store and product')).toBeTruthy() + }) + + it('keeps the Run comparison CTA disabled and issues no POST', () => { + const fetchMock = vi.fn() + vi.stubGlobal('fetch', fetchMock) + render() + const cta = screen.getByTestId('run-comparison-cta') as HTMLButtonElement + expect(cta.disabled).toBe(true) + // The page itself issues no network calls (the hooks are mocked); in + // particular it never POSTs to /model-selection/run. + expect(fetchMock).not.toHaveBeenCalled() + vi.unstubAllGlobals() + }) +}) diff --git a/frontend/src/pages/visualize/champion.tsx b/frontend/src/pages/visualize/champion.tsx new file mode 100644 index 00000000..30b624c8 --- /dev/null +++ b/frontend/src/pages/visualize/champion.tsx @@ -0,0 +1,387 @@ +import { useMemo, useState } from 'react' +import { format } from 'date-fns' +import { DateRange } from 'react-day-picker' +import { Loader2, Trophy } from 'lucide-react' +import { useStores } from '@/hooks/use-stores' +import { useProducts } from '@/hooks/use-products' +import { + useCancelSelectionRun, + useModelCatalog, + usePairAvailability, + useSelectionRun, + useSubmitSelectionRun, +} from '@/hooks/use-model-selection' +import { DateRangePicker } from '@/components/common/date-range-picker' +import { ErrorDisplay } from '@/components/common/error-display' +import { AvailabilityPanel } from '@/components/champion-selector/availability-panel' +import { BacktestSettingsForm } from '@/components/champion-selector/backtest-settings-form' +import { splitConfigErrors } from '@/components/champion-selector/split-config' +import { CandidateModelPicker } from '@/components/champion-selector/candidate-model-picker' +import { SearchableEntitySelect } from '@/components/champion-selector/searchable-entity-select' +import { assembleRunRequest } from '@/components/champion-selector/run-request' +import { RunProgressPanel } from '@/components/champion-selector/results/run-progress-panel' +import { RankingTable } from '@/components/champion-selector/results/ranking-table' +import { WinnerCard } from '@/components/champion-selector/results/winner-card' +import { ComparisonCharts } from '@/components/champion-selector/results/comparison-charts' +import { ModelDetailDrawer } from '@/components/champion-selector/results/model-detail-drawer' +import { CancelRunDialog } from '@/components/champion-selector/results/cancel-run-dialog' +import { DecisionSection } from '@/components/champion-selector/decision/decision-section' +import { isTerminalSelectionStatus } from '@/components/champion-selector/results/constants' +import { Button } from '@/components/ui/button' +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' +import { Input } from '@/components/ui/input' +import { getErrorMessage } from '@/lib/api' +import type { + ModelRankEntry, + ModelSelectionRunRequest, + SplitConfig, +} from '@/types/api' + +const DEFAULT_HORIZON = 14 + +const DEFAULT_SPLIT: SplitConfig = { + strategy: 'expanding', + n_splits: 5, + min_train_size: 30, + gap: 0, + horizon: DEFAULT_HORIZON, +} + +/** + * Forecast Champion Selector — Slice A. + * + * Configuration + availability triage only. It assembles a typed + * `ModelSelectionRunRequest` in component state and surfaces a DISABLED + * "Run comparison" CTA — the comparison RUN itself (and all results/training) + * lands in Slices B/C. This page calls only the two read GETs (catalog + + * availability); it never POSTs. + */ +export default function ChampionSelectorPage() { + const [storeId, setStoreId] = useState(null) + const [productId, setProductId] = useState(null) + const [dateRange, setDateRange] = useState() + const [forecastHorizon, setForecastHorizon] = useState(DEFAULT_HORIZON) + const [splitConfig, setSplitConfig] = useState(DEFAULT_SPLIT) + const [rankingMetric, setRankingMetric] = useState< + ModelSelectionRunRequest['ranking_metric'] + >('wape') + // `null` means "the user hasn't edited the selection yet" — fall back to the + // catalog's default candidate set (derived below, no effect needed). + const [editedModels, setEditedModels] = useState(null) + + // Slice B — the in-flight/terminal async run + the detail-drawer selection. + const [selectionId, setSelectionId] = useState(null) + const [submitError, setSubmitError] = useState(null) + const [drawerEntry, setDrawerEntry] = useState(null) + const [drawerOpen, setDrawerOpen] = useState(false) + + // /dimensions/{stores,products} both cap page_size at 100 (client-filtered). + const storesQuery = useStores({ page: 1, pageSize: 100 }) + const productsQuery = useProducts({ page: 1, pageSize: 100 }) + const catalogQuery = useModelCatalog() + + const validPair = !!storeId && !!productId + const availabilityQuery = usePairAvailability({ + storeId, + productId, + forecastHorizon, + enabled: validPair, + }) + + // Pre-select the backend default candidate set until the user edits it — + // derived during render rather than seeded via an effect. + const selectedModels = + editedModels ?? catalogQuery.data?.default_candidate_model_types ?? [] + + // split_config.horizon must equal forecast_horizon (the backend validator). + // Force it during render so no effect is needed to keep them in sync. + const effectiveSplit: SplitConfig = useMemo( + () => ({ ...splitConfig, horizon: forecastHorizon }), + [splitConfig, forecastHorizon], + ) + + const storeItems = useMemo( + () => + (storesQuery.data?.stores ?? []).map((store) => ({ + id: store.id, + primary: `${store.code} · ${store.name}`, + secondary: [store.region, store.store_type].filter(Boolean).join(' · '), + })), + [storesQuery.data], + ) + const productItems = useMemo( + () => + (productsQuery.data?.products ?? []).map((product) => ({ + id: product.id, + primary: `${product.sku} · ${product.name}`, + secondary: product.category ?? undefined, + })), + [productsQuery.data], + ) + + const formReady = + validPair && + !!dateRange?.from && + !!dateRange?.to && + forecastHorizon >= 1 && + forecastHorizon <= 90 && + selectedModels.length >= 1 && + splitConfigErrors(effectiveSplit).length === 0 + + // The assembled request — `auto_train_winner`/`auto_predict` pinned false by + // `assembleRunRequest` (no-ops in the async path; Slice C owns train/predict). + const runRequest: ModelSelectionRunRequest | null = + formReady && dateRange?.from && dateRange?.to + ? assembleRunRequest({ + storeId: storeId!, + productId: productId!, + startDate: format(dateRange.from, 'yyyy-MM-dd'), + endDate: format(dateRange.to, 'yyyy-MM-dd'), + forecastHorizon, + rankingMetric, + splitConfig: effectiveSplit, + selectedModels, + }) + : null + + // Slice B — async submit → poll → cancel. + const submitRun = useSubmitSelectionRun() + const cancelRun = useCancelSelectionRun() + const runQuery = useSelectionRun(selectionId) + const run = runQuery.data + const isRunning = !!run && !isTerminalSelectionStatus(run.status) + const isTerminal = !!run && isTerminalSelectionStatus(run.status) + + function handleRunComparison() { + if (!runRequest) return + setSubmitError(null) + submitRun.mutate(runRequest, { + onSuccess: (data) => setSelectionId(data.selection_id), + onError: (err) => setSubmitError(getErrorMessage(err)), + }) + } + + function handleSelectModel(entry: ModelRankEntry) { + setDrawerEntry(entry) + setDrawerOpen(true) + } + + return ( +
+
+

+ + Champion Selector +

+

+ Configure a store, product, time period, horizon and candidate models, + and check whether the pair has enough history to model. Running the + comparison arrives in a later update. +

+
+ + {/* Selection */} + + + 1 · Pick a store & product + + Search by code/SKU or name. The availability check runs automatically + once a valid pair and horizon are chosen. + + + +
+
+ Store + +
+
+ Product + +
+
+ Time period + +
+
+ + Forecast horizon (days) + + + setForecastHorizon(Number(event.target.value) || 0) + } + /> +
+
+
+
+ + {/* Availability */} + + + 2 · Data availability + + Whether this pair has enough observed history for a reliable + comparison, plus the recommended split. + + + + + + + + {/* Candidate models */} + + + 3 · Candidate models + + Pick the models to compare (up to 10). The default five are + pre-selected; opt-in extras are flagged. + + + + {catalogQuery.isError ? ( + catalogQuery.refetch()} + /> + ) : ( + + )} + + + + {/* Backtest settings */} + + + 4 · Backtest settings + + The ranking metric and cross-validation split. Start with the + recommended split or fine-tune under Advanced. + + + + + + + + {/* Run CTA (Slice B — submit the async comparison) */} + + +
+ {formReady + ? `Ready to compare ${selectedModels.length} model${ + selectedModels.length === 1 ? '' : 's' + }.` + : 'Pick a store, product, time period, horizon and at least one model to continue.'} + {submitError && ( + {submitError} + )} +
+
+ {isRunning && ( + selectionId && cancelRun.mutate(selectionId)} + isCancelling={cancelRun.isPending} + /> + )} + +
+
+
+ + {/* Live progress + results (Slice B) */} + {run && ( + + )} + + {isTerminal && run && ( + <> + + {run.chart_data && ( + + )} + {run.ranking.length > 0 && ( + + )} + + {/* Slice C — decide → train → forecast → interpret → promote. Keyed by + selectionId so a fresh run resets the decision state. */} + {selectionId && run.winner && ( + + )} + + )} +
+ ) +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 3c62f684..88a204e1 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -624,6 +624,15 @@ export interface ChatMessage { timestamp: string } +/** Response from POST /agents/sessions/{id}/approve (mirrors backend ApprovalResponse). */ +export interface ApprovalResponse { + action_id: string + approved: boolean + /** Execution result on success, or `{ error, error_type }` when execution failed. */ + result?: unknown + status: 'executed' | 'rejected' | 'expired' +} + export interface ToolCall { tool_name: string arguments: Record @@ -1179,3 +1188,267 @@ export interface ForecastExplanation { as_of_date: string // ISO date generated_at: string // ISO datetime } + +// ============================================================================= +// Model Selection (Champion Selector) — backend slice app/features/model_selection +// ============================================================================= +// +// The FULL workflow contract is declared here so Slices B/C add BEHAVIOR, not +// type definitions. Slice A CONSUMES only `ModelCatalogResponse`, +// `PairAvailability`, and `SplitConfig` (read-only). Everything tagged +// DECLARED-FOR-LATER is wired by Slice B (async run + results) and Slice C +// (train / predict / business summary / override / promotion). + +export type ModelSelectionStatus = + | 'pending' + | 'running' + | 'completed' + | 'partial' + | 'failed' + | 'cancelled' // Slice B — async cancel terminal state +export type CandidateStatus = + | 'pending' + | 'running' + | 'completed' + | 'failed' + | 'cancelled' +export type RankingMetric = 'wape' | 'smape' | 'mae' | 'bias' +export type AvailabilityStatus = 'ready' | 'limited' | 'unusable' +// `ConfidenceLevel` ('high' | 'medium' | 'low') is reused from the +// Explainability section above — the backend uses the same enum. + +// Backtest split config — mirrors `app/features/backtesting/schemas.py` +// `SplitConfig` EXACTLY (bounds enforced client-side so the assembled run +// request is always valid for Slice B). +export type SplitStrategy = 'expanding' | 'sliding' +export interface SplitConfig { + strategy: SplitStrategy // def 'expanding' + n_splits: number // 2..20, def 5 + min_train_size: number // >= 7, def 30 + gap: number // 0..30, def 0 + horizon: number // 1..90, def 14; must be > gap; kept === forecast_horizon +} + +// --- CONSUMED in Slice A --------------------------------------------------- + +export interface CandidateModelInfo { + model_type: string + label: string + family: ModelFamily + feature_aware: boolean + /** lightgbm/xgboost — opt-in extra may be absent at runtime. */ + requires_extra: boolean + default_params: Record + /** false for feature-aware models (the predict path rejects them). */ + supports_auto_predict: boolean + description: string +} + +export interface ModelCatalogResponse { + models: CandidateModelInfo[] + default_candidate_model_types: string[] +} + +export interface PairAvailability { + store_id: number + product_id: number + first_sales_date: string | null + last_sales_date: string | null + observed_days: number + expected_calendar_days: number + coverage_ratio: number + missing_days: number + zero_sale_days: number + promotion_days: number | null + average_daily_demand: number + status: AvailabilityStatus + recommended_split_config: SplitConfig + warnings: string[] +} + +// --- DECLARED-FOR-LATER (Slices B/C wire behavior on these) ---------------- + +export interface SelectionWindow { + start_date: string // ISO date (inclusive) + end_date: string // ISO date (inclusive) +} + +export interface CandidateModelConfig { + model_type: string + params: Record +} + +export interface RankingPolicy { + minimum_sample_size: number + high_confidence_rel_improvement: number + max_acceptable_abs_bias: number +} + +export interface ModelSelectionRunRequest { + store_id: number + product_id: number + selection_window: SelectionWindow + forecast_horizon: number + ranking_metric: RankingMetric + split_config: SplitConfig + candidate_models: CandidateModelConfig[] + feature_frame_version: number // 1 | 2 (Slice A always 1) + feature_groups: string[] | null // only valid when feature_frame_version === 2 + ranking_policy?: RankingPolicy + // Slice A sets BOTH false. The async run path (Slice B `POST /runs`) treats + // them as NO-OPS, and Slice C owns explicit train/predict — so these two + // fields stay false throughout the UI flow and are never surfaced as toggles. + auto_train_winner: boolean + auto_predict: boolean +} + +export interface ModelRankEntry { + rank: number | null + model_type: string + params: Record + included: boolean + exclusion_reason: string | null + metrics: Record | null +} + +export interface WinnerSummary { + model_type: string + params: Record + metrics: Record + rank: number +} + +export interface ModelSelectionChartData { + wape_by_model: Record + bias_by_model: Record + fold_stability: Record + winner_actual_vs_predicted: unknown[] +} + +export interface ModelSelectionForecastSummary { + points: Record[] + total_demand: number + average_demand: number + horizon: number + // Slice C — additive peak/low day (null on legacy snapshots). + peak_date?: string | null + peak_demand?: number | null + low_date?: string | null + low_demand?: number | null +} + +// Slice B — live async progress on a selection run. +export interface CandidateProgress { + candidate_id: string + ordinal: number + model_type: string + status: CandidateStatus + error: string | null + started_at: string | null + completed_at: string | null + duration_ms: number | null +} + +export interface SelectionProgress { + total: number + pending: number + running: number + completed: number + failed: number + cancelled: number +} + +export interface ModelSelectionRunResponse { + selection_id: string + store_id: number + product_id: number + status: ModelSelectionStatus + selection_window: SelectionWindow + forecast_horizon: number + ranking_metric: string + availability: PairAvailability | null + ranking: ModelRankEntry[] + winner: WinnerSummary | null + recommendation_confidence: ConfidenceLevel | null + confidence_reasons: string[] + chart_data: ModelSelectionChartData | null + final_model: Record | null + forecast: ModelSelectionForecastSummary | null + business_summary: Record | null + error_message: string | null + created_at: string // ISO datetime + // Slice B — additive async fields (null/empty on a legacy sync `/run` row). + started_at?: string | null + completed_at: string | null + progress?: SelectionProgress | null + candidate_progress?: CandidateProgress[] +} + +// Slice B — 202 response from `POST /model-selection/runs` (additive superset). +export interface SubmitRunResponse extends ModelSelectionRunResponse { + monitor_url: string + cancel_url: string +} + +// Slice C — forecast decision, override, and promotion contracts. + +/** `POST /model-selection/{id}/train-selected` body (override). */ +export interface TrainSelectedRequest { + model_type: string + override_reason?: string | null +} + +/** Optional `POST /model-selection/{id}/predict` body. */ +export interface ForecastDecisionParams { + lead_time_days: number + service_level: number +} + +/** Deterministic, labeled inventory-decision heuristic (never feeds ranking). */ +export interface ForecastDecision { + method: 'heuristic' + lead_time_days: number + service_level: number + z_value: number + sigma_daily_demand: number + expected_demand_over_lead_time: number + safety_stock: number + reorder_point: number + bias_risk_text: string + caveats: string[] +} + +/** `POST /model-selection/{id}/train-winner` and `/train-selected` response. */ +export interface TrainWinnerResponse { + selection_id: string + model_type: string + model_path: string + is_override: boolean + override_warning: string | null +} + +/** `POST /model-selection/{id}/predict` response (forecast + decision). */ +export interface PredictWinnerResponse { + selection_id: string + forecast: ModelSelectionForecastSummary + decision: ForecastDecision | null +} + +/** `POST /model-selection/{id}/promote` body (approval-gated). */ +export interface PromoteRequest { + alias_name: string + approved_by: string + acknowledge_non_recommended?: boolean + description?: string | null +} + +/** `POST /model-selection/{id}/promote` response. */ +export interface PromoteResponse { + selection_id: string + alias_name: string + run_id: string + run_status: string + model_type: string + is_override: boolean + promoted_at: string // ISO datetime +} diff --git a/tests/test_e2e_demo.py b/tests/test_e2e_demo.py index 31d263d4..ac3a5278 100644 --- a/tests/test_e2e_demo.py +++ b/tests/test_e2e_demo.py @@ -504,23 +504,31 @@ def test_run_demo_showcase_rich_full_epic( f"status={scenario_step['status']!r} detail={scenario_step['detail']!r}" ) - # Any OTHER failed step must be an environment-dependent knowledge-phase step - # (embedding provider unreachable / misconfigured key) -- those skip - # gracefully when the provider is absent (RUNBOOKS 20-22), but a real 401 - # surfaces as a fail locally. Not the #324 cascade. - ENV_DEPENDENT_KNOWLEDGE_STEPS = {"rag_index_subset", "rag_retrieve_probe"} + # ---- PR1 (PRP-42, #329) — knowledge phase must never hard-fail ----------- + # The embedding-provider knowledge steps now SKIP gracefully whether the + # provider is truly unreachable OR rejects an invalid/placeholder key (the + # 401/403 -> EMBEDDING_AUTH classification, RUNBOOKS 20-22). They may pass + # (provider reachable + corpus matches), skip (unreachable / bad key), or + # warn (retrieve indexed but found no hits) -- but they must NOT fail. + KNOWLEDGE_STEPS = {"rag_index_subset", "rag_retrieve_probe"} + for name in KNOWLEDGE_STEPS: + step = by_name.get(name) + if step is not None: + assert step["status"] in {"pass", "skip", "warn"}, ( + f"{name} must skip/warn gracefully on an unreachable/invalid " + f"embedding key (#329), got status={step['status']!r} " + f"detail={step['detail']!r}" + ) + + # No step may hard-fail on showcase_rich now: #324 is fixed and the + # knowledge phase skips instead of 401/502-failing. Any fail is a regression. failed = [s for s in result["steps"] if s["status"] == "fail"] - for step in failed: - assert step["step_name"] in ENV_DEPENDENT_KNOWLEDGE_STEPS, ( - f"unexpected showcase_rich failure (not #324, not env-dependent): " - f"{step['step_name']!r} detail={step['detail']!r}" - ) - # With no env-dependent failures, the per-step statuses and the overall - # status must agree -- the whole pipeline reports pass. - if not failed: - assert result["overall_status"] == "pass", ( - f"no failed steps but overall_status={result['overall_status']!r}" - ) + assert not failed, "unexpected showcase_rich failure(s): " + ", ".join( + f"{s['step_name']!r} (detail={s['detail']!r})" for s in failed + ) + assert result["overall_status"] == "pass", ( + f"no failed steps but overall_status={result['overall_status']!r}" + ) @pytest.mark.integration