diff --git a/.claude/skills/consult/SKILL.md b/.claude/skills/consult/SKILL.md index 83628ff19..4720b0dd1 100644 --- a/.claude/skills/consult/SKILL.md +++ b/.claude/skills/consult/SKILL.md @@ -20,7 +20,7 @@ The `-m` / `--model` flag is **always required** except for `consult stats`. | Flag value | Alias | Notes | |------------|-------|-------| -| `gemini` | `pro` | Fast (~120-150s), file access via --yolo | +| `gemini` | `pro` | Antigravity CLI (`agy`); agentic file access (`--sandbox`), OAuth login; skips non-blockingly if unavailable | | `codex` | `gpt` | Thorough (~200-250s), shell exploration | | `claude` | `opus` | Agent SDK with tool use (~60-120s) | @@ -30,7 +30,7 @@ The `-m` / `--model` flag is **always required** except for `consult stats`. -m, --model Model to use (required except stats) --prompt Inline prompt (general mode) --prompt-file Prompt file path (general mode) ---protocol Protocol: spir, aspir, air, bugfix, tick, maintain +--protocol Protocol: spir, aspir, air, bugfix, maintain -t, --type Review type (see below) --issue Issue number (required in architect context) --output Save result to file diff --git a/AGENTS.md b/AGENTS.md index eeb4f2e1e..b7b8bda03 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -310,14 +310,16 @@ Use sequential numbering with descriptive names (no leading zeros): ## Multi-Agent Consultation **DEFAULT BEHAVIOR**: Consultation is ENABLED by default with: -- **Gemini 3.1 Pro** (gemini-3.1-pro-preview) for deep analysis +- **Gemini** via the **Antigravity CLI (`agy`)** for deep analysis (the retired Gemini CLI's + replacement; OAuth/subscription, agy's default model — no pinned model id). Skips non-blockingly + if `agy` is missing/unauthenticated. - **GPT-5.4 Codex** (gpt-5.4-codex) for coding and architecture perspective To disable: User must explicitly say "without multi-agent consultation" **CRITICAL CONSULTATION CHECKPOINTS (DO NOT SKIP):** -- After writing implementation code → STOP → Consult GPT-5 and Gemini Pro -- After writing tests → STOP → Consult GPT-5 and Gemini Pro +- After writing implementation code → STOP → Consult GPT-5 and Gemini (via agy) +- After writing tests → STOP → Consult GPT-5 and Gemini (via agy) - ONLY THEN present results to user for evaluation ### cmap (Consult Multiple Agents in Parallel) diff --git a/CLAUDE.md b/CLAUDE.md index eeb4f2e1e..b7b8bda03 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -310,14 +310,16 @@ Use sequential numbering with descriptive names (no leading zeros): ## Multi-Agent Consultation **DEFAULT BEHAVIOR**: Consultation is ENABLED by default with: -- **Gemini 3.1 Pro** (gemini-3.1-pro-preview) for deep analysis +- **Gemini** via the **Antigravity CLI (`agy`)** for deep analysis (the retired Gemini CLI's + replacement; OAuth/subscription, agy's default model — no pinned model id). Skips non-blockingly + if `agy` is missing/unauthenticated. - **GPT-5.4 Codex** (gpt-5.4-codex) for coding and architecture perspective To disable: User must explicitly say "without multi-agent consultation" **CRITICAL CONSULTATION CHECKPOINTS (DO NOT SKIP):** -- After writing implementation code → STOP → Consult GPT-5 and Gemini Pro -- After writing tests → STOP → Consult GPT-5 and Gemini Pro +- After writing implementation code → STOP → Consult GPT-5 and Gemini (via agy) +- After writing tests → STOP → Consult GPT-5 and Gemini (via agy) - ONLY THEN present results to user for evaluation ### cmap (Consult Multiple Agents in Parallel) diff --git a/README.md b/README.md index 0ea1b080e..744595bf8 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ See [CLI Reference](codev/resources/commands/overview.md) for details. **AI CLIs** (install all three for multi-model consultation): - Claude Code: `npm install -g @anthropic-ai/claude-code` -- Gemini CLI: [github.com/google-gemini/gemini-cli](https://github.com/google-gemini/gemini-cli) +- Antigravity CLI (`agy`, the `gemini` consult lane): `curl -fsSL https://antigravity.google/cli/install.sh | bash`, then run `agy` once to sign in (OAuth — replaces the retired Gemini CLI) - Codex CLI: `npm install -g @openai/codex` **Agent Farm (optional):** @@ -139,7 +139,7 @@ This tour demonstrates: - How to write specifications that capture all requirements - How the planning phase breaks work into manageable chunks - The implementation phase in action -- Multi-agent consultation with GPT-5 and Gemini Pro +- Multi-agent consultation with GPT-5 and Gemini (via agy) - How lessons learned improve future development ## What is Codev? @@ -206,7 +206,7 @@ In much the same way an operating system has a memory hierarchy, Codev repos hav ### 🤖 AI-Native Workflow - Structured formats that AI agents understand -- Multi-agent consultation support (GPT-5, Gemini Pro, etc.) +- Multi-agent consultation support (GPT-5, Gemini via agy, etc.) - Reduces back-and-forth from dozens of messages to 3-4 document reviews - Supports both AGENTS.md standard (Cursor, Copilot, etc.) and CLAUDE.md (Claude Code) @@ -445,7 +445,11 @@ Configure in `.codev/config.json` (created by `codev init` or `codev adopt`): } ``` -Or for Gemini: +Or for Gemini (the standalone **Gemini CLI** as a *builder/architect* coding agent — a separate +concern from the `gemini` **consult lane**, which now uses the Antigravity CLI `agy`). Note: Google +retired the Gemini CLI for Pro/Ultra/free tiers on 2026-06-18, so this builder harness will stop +working for those tiers — prefer a Claude or Codex builder, or an enterprise Gemini CLI. (Tracked as +a follow-up; out of scope for the consult-lane migration.) ```json { "shell": { diff --git a/codev-skeleton/.claude/skills/consult/SKILL.md b/codev-skeleton/.claude/skills/consult/SKILL.md index 29ce694bc..4720b0dd1 100644 --- a/codev-skeleton/.claude/skills/consult/SKILL.md +++ b/codev-skeleton/.claude/skills/consult/SKILL.md @@ -20,7 +20,7 @@ The `-m` / `--model` flag is **always required** except for `consult stats`. | Flag value | Alias | Notes | |------------|-------|-------| -| `gemini` | `pro` | Fast (~120-150s), file access via --yolo | +| `gemini` | `pro` | Antigravity CLI (`agy`); agentic file access (`--sandbox`), OAuth login; skips non-blockingly if unavailable | | `codex` | `gpt` | Thorough (~200-250s), shell exploration | | `claude` | `opus` | Agent SDK with tool use (~60-120s) | diff --git a/codev-skeleton/DEPENDENCIES.md b/codev-skeleton/DEPENDENCIES.md index 18459d51a..472b4e566 100644 --- a/codev-skeleton/DEPENDENCIES.md +++ b/codev-skeleton/DEPENDENCIES.md @@ -110,22 +110,33 @@ npm install -g @anthropic-ai/claude-code claude --version ``` -### Gemini CLI +### Antigravity CLI (`agy`) — the `gemini` consult lane + +Replaces the retired Gemini CLI (Google stopped serving Gemini CLI for Pro/Ultra/free tiers on +2026-06-18). The `gemini` consult lane now dispatches to the Antigravity CLI (`agy`). | Requirement | Value | |-------------|-------| -| Purpose | Multi-agent consultation, alternative perspectives | -| Documentation | [github.com/google-gemini/gemini-cli](https://github.com/google-gemini/gemini-cli) | +| Purpose | Multi-agent consultation (the `gemini` lane), alternative perspectives | +| Documentation | [antigravity.google/docs/cli-using](https://antigravity.google/docs/cli-using) | +| Auth | OAuth / Google subscription (no API key) — run `agy` once and sign in | **Installation:** ```bash -npm install -g @google/gemini-cli +curl -fsSL https://antigravity.google/cli/install.sh | bash # installs to ~/.local/bin/agy + +# Sign in (one-time, interactive) +agy # complete the OAuth flow # Verify -gemini --version +agy --version ``` +> Note: the `agy` on the IDE's PATH (`~/.antigravity/.../bin/agy`) is a symlink to the Antigravity +> IDE, not the headless CLI — Codev resolves the real CLI itself. If `agy` is missing or +> unauthenticated, the `gemini` consult lane skips non-blockingly (the run proceeds without it). + ### Codex CLI | Requirement | Value | @@ -152,7 +163,7 @@ codex --version | git | 2.5.0 | Yes | | gh | latest | Yes | | Claude Code | latest | At least one AI CLI | -| Gemini CLI | latest | At least one AI CLI | +| Antigravity CLI (`agy`) | latest | At least one AI CLI | | Codex CLI | latest | At least one AI CLI | --- diff --git a/codev-skeleton/resources/commands/codev.md b/codev-skeleton/resources/commands/codev.md index 3e4c55bf5..88d336d94 100644 --- a/codev-skeleton/resources/commands/codev.md +++ b/codev-skeleton/resources/commands/codev.md @@ -99,7 +99,7 @@ Verifies that all required dependencies are installed and properly configured: **AI CLI Dependencies (at least one required):** - Claude (`@anthropic-ai/claude-code`) -- Gemini (`gemini-cli`) +- Gemini (Antigravity CLI, `agy`) - Codex (`@openai/codex`) **Exit Codes:** diff --git a/codev-skeleton/resources/commands/consult.md b/codev-skeleton/resources/commands/consult.md index 2f67a10ae..85ad04e87 100644 --- a/codev-skeleton/resources/commands/consult.md +++ b/codev-skeleton/resources/commands/consult.md @@ -19,7 +19,7 @@ consult stats [options] | Model | Alias | Backend | Notes | |-------|-------|---------|-------| -| `gemini` | `pro` | gemini-cli | File access via --yolo, fast | +| `gemini` | `pro` | Antigravity CLI (`agy`) | Agentic file access (`--sandbox --add-dir`), OAuth/subscription login. Skips non-blockingly if `agy` is missing/unauthed. | | `codex` | `gpt` | @openai/codex | Read-only sandbox, thorough | | `claude` | `opus` | Claude Agent SDK | Balanced analysis with tool use | | `hermes` | - | hermes CLI (`hermes chat -q`) | Uses Hermes agent as consult backend | @@ -130,7 +130,7 @@ consult -m hermes --protocol spir --type spec | Model | Typical Time | Approach | |-------|--------------|----------| -| Gemini | ~120-150s | File access via --yolo, pure text output | +| Gemini | ~120-180s | Antigravity CLI (`agy`); agentic file access via `--sandbox`, plain text output | | Codex | ~200-250s | Shell command exploration, read-only sandbox | | Claude | ~60-120s | Agent SDK with Read/Glob/Grep tools | @@ -145,14 +145,29 @@ npm install -g @anthropic-ai/claude-code # Codex npm install -g @openai/codex -# Gemini -# See: https://github.com/google-gemini/gemini-cli +# Gemini lane → Antigravity CLI (`agy`), replacing the retired Gemini CLI +curl -fsSL https://antigravity.google/cli/install.sh | bash +agy # run once and sign in (OAuth / Google subscription) ``` -Configure API keys: +Configure auth: - Claude: `ANTHROPIC_API_KEY` - Codex: `OPENAI_API_KEY` -- Gemini: `GOOGLE_API_KEY` or `GEMINI_API_KEY` +- Gemini (`agy`): **OAuth / subscription** — run `agy` once and sign in (no API key). If `agy` + is missing or unauthenticated, the gemini lane skips non-blockingly (the run proceeds without it). + +### Claude auth: subscription vs. metered API + +`consult -m claude` runs on the Claude Agent SDK. When `CLAUDE_CODE_OAUTH_TOKEN` +(a Claude subscription/OAuth token) is present, consult strips `ANTHROPIC_API_KEY` +and `ANTHROPIC_AUTH_TOKEN` from the SDK subprocess env so the consultation +authenticates against the **subscription** rather than the **metered Opus API**. +The Agent SDK otherwise prioritizes `ANTHROPIC_API_KEY`, which silently routes +CMAP/review traffic to the metered API (issue #985). When no OAuth token is set, +the API key is used as before so CI / key-only environments keep working. + +> **Caveat:** dedicated Agent-SDK subscription credit starts **2026-06-15**. +> Before that date, subscription auth draws from the interactive Max quota. ## The Consultant Role diff --git a/codev/DEPENDENCIES.md b/codev/DEPENDENCIES.md index f500c65a9..472b4e566 100644 --- a/codev/DEPENDENCIES.md +++ b/codev/DEPENDENCIES.md @@ -110,22 +110,33 @@ npm install -g @anthropic-ai/claude-code claude --version ``` -### Gemini CLI +### Antigravity CLI (`agy`) — the `gemini` consult lane + +Replaces the retired Gemini CLI (Google stopped serving Gemini CLI for Pro/Ultra/free tiers on +2026-06-18). The `gemini` consult lane now dispatches to the Antigravity CLI (`agy`). | Requirement | Value | |-------------|-------| -| Purpose | Multi-agent consultation, alternative perspectives | -| Documentation | [github.com/google-gemini/gemini-cli](https://github.com/google-gemini/gemini-cli) | +| Purpose | Multi-agent consultation (the `gemini` lane), alternative perspectives | +| Documentation | [antigravity.google/docs/cli-using](https://antigravity.google/docs/cli-using) | +| Auth | OAuth / Google subscription (no API key) — run `agy` once and sign in | **Installation:** ```bash -npm install -g @anthropic-ai/gemini-cli +curl -fsSL https://antigravity.google/cli/install.sh | bash # installs to ~/.local/bin/agy + +# Sign in (one-time, interactive) +agy # complete the OAuth flow # Verify -gemini --version +agy --version ``` +> Note: the `agy` on the IDE's PATH (`~/.antigravity/.../bin/agy`) is a symlink to the Antigravity +> IDE, not the headless CLI — Codev resolves the real CLI itself. If `agy` is missing or +> unauthenticated, the `gemini` consult lane skips non-blockingly (the run proceeds without it). + ### Codex CLI | Requirement | Value | @@ -152,7 +163,7 @@ codex --version | git | 2.5.0 | Yes | | gh | latest | Yes | | Claude Code | latest | At least one AI CLI | -| Gemini CLI | latest | At least one AI CLI | +| Antigravity CLI (`agy`) | latest | At least one AI CLI | | Codex CLI | latest | At least one AI CLI | --- @@ -209,6 +220,18 @@ echo $PATH export PATH="$PATH:$(npm config get prefix)/bin" ``` +### Terminal connection issues + +Ensure no firewall is blocking the ports (default: 4200-4299): + +```bash +# Check if port is in use +lsof -i :4200 + +# Clean up stale port allocations +afx ports cleanup +``` + ### gh authentication issues ```bash diff --git a/codev/plans/778-gemini-cli-antigravity-cli-jun.md b/codev/plans/778-gemini-cli-antigravity-cli-jun.md new file mode 100644 index 000000000..6a1fa8f20 --- /dev/null +++ b/codev/plans/778-gemini-cli-antigravity-cli-jun.md @@ -0,0 +1,230 @@ +--- +approved: 2026-06-02 +validated: [gemini, codex, claude] +--- + +# Plan: Migrate the Gemini consult lane to the Antigravity CLI (`agy`) + +## Metadata +- **ID**: plan-2026-06-02-778-gemini-antigravity-cli +- **Status**: approved (human-approved at the plan-approval gate 2026-06-02) +- **Specification**: `codev/specs/778-gemini-cli-antigravity-cli-jun.md` (APPROVED 2026-06-02, Approach B, single-agy) +- **Created**: 2026-06-02 + +## Executive Summary +The `gemini` consult lane currently shells out to the retiring Google **Gemini CLI** (`gemini +--output-format json --model gemini-3.1-pro-preview`, role via `GEMINI_SYSTEM_MD`, prompt via stdin). +Per the approved spec, we **swap the backend to the Antigravity CLI (`agy`)** — a lean +**single-backend** change (no API key, no second backend, no selector; agy is **OAuth-only** as +verified). The model identifier stays `gemini` everywhere (only the backend changes). + +The lane invokes **`agy --print --sandbox --add-dir `** with the reviewer role folded into +the prompt, **preserving agentic file-reading** (the diff/repo are read from disk), using `agy`'s +**default model** (currently Flash — no pro-pinning). Because `agy --print` returns plain text (no +usage JSON) and authenticates via interactive OAuth, the plan also covers: **graceful cost +degradation**, a **non-blocking `COMMENT` skip** when `agy` is missing/unauthed (so porch runs still +advance — the CI/headless story), verified **binary resolution** (never launch the IDE symlink), and +**doctor/docs** updates. + +## Success Metrics +- [ ] All spec success criteria met (single-agy; see spec). +- [ ] `consult -m gemini` runs via `agy --print` and returns a review that used file contents + (agentic reading), verified end-to-end on a spec, a plan, and a PR. +- [ ] Missing/unauthed/IDE-stub `agy` → non-blocking `COMMENT` skip; porch run still advances (2-way). +- [ ] Cost/usage rows degrade gracefully (no `NaN`). +- [ ] `codev doctor` reports the real `agy` CLI + auth accurately, with current guidance. +- [ ] Model identifier stays `gemini` (no rename); `pro` alias kept; Codex/Claude lanes unchanged. +- [ ] Existing consult/doctor/config/porch tests pass; coverage not reduced. + +## Phases (Machine Readable) + + + +```json +{ + "phases": [ + {"id": "agy_backend", "title": "Phase 1: agy backend dispatch (OAuth, agentic file-reading, non-blocking skip)"}, + {"id": "docs_skeleton_e2e", "title": "Phase 2: Doctor + docs + skeleton consistency + e2e verification"} + ] +} +``` + +## Cross-Cutting Implementation Contracts +- **Single backend.** The dispatch stays keyed on `model === 'gemini'` (`consult/index.ts:~631`); no + backend sub-branching, no selector, no new config key. The `gemini`-CLI dispatch is **replaced** by + the `agy` invocation. +- **Backend-aware parsing for plain text.** `extractReviewText`'s `gemini` branch + (`usage-extractor.ts`, currently `JSON.parse(output).response`) is adapted to return the **raw + output** (agy prints plain text); `extractGeminiUsage` returns **null** (no token JSON) → **graceful + cost degradation** (no `NaN`; e.g. "n/a (subscription)"). The old `stats.models` JSON path is removed. +- **Model identifier stays `gemini`** everywhere (`MODEL_CONFIGS` key, `VALID_MODELS`, + `protocol-schema.json` enum, default model lists, user config); `pro` alias kept. **No rename.** +- **No API key anywhere** (agy is OAuth-only; verified). CI/headless story = the non-blocking skip + (optionally a pre-provisioned OAuth token), not an API key. + +> **Test locations (canonical):** unit tests in `packages/codev/src/__tests__/` (`consult.test.ts`, +> `doctor.test.ts`, `config.test.ts`); e2e in `packages/codev/src/__tests__/cli/`; `metrics.test.ts` +> in `packages/codev/src/commands/consult/__tests__/`; `consultation-models.test.ts` in +> `packages/codev/src/commands/porch/__tests__/`. **`packages/codev/tests/e2e|unit` do NOT exist.** + +## Phase Breakdown + +### Phase 1: agy backend dispatch (OAuth, agentic file-reading, non-blocking skip) +**Dependencies**: None + +#### Objectives +- Replace the retiring `gemini`-CLI dispatch in the consult `gemini` lane with an **`agy`** backend + that preserves agentic file-reading and never blocks the run when unavailable. + +#### Implementation Details +- **Files**: `packages/codev/src/commands/consult/index.ts` (dispatch + prompt assembly), + `packages/codev/src/commands/consult/usage-extractor.ts` (`extractReviewText`/usage), + `packages/codev/src/commands/doctor.ts` (agy check). +- **Binary resolution (verified, not PATH-trusting):** resolve the real CLI — prefer + `~/.local/bin/agy`; else a PATH lookup **verified** to be the headless CLI (responds to + `--version`/`--print` as the CLI, not the IDE Electron launcher). If none is valid (missing, or only + the IDE symlink `~/.antigravity/.../agy`), treat the backend as **unavailable → skip** (below) — + never launch the IDE. +- **Invocation:** `agy --print --sandbox --add-dir [--print-timeout ]` with the + reviewer **role folded into the prompt** (`${role}\n\n---\n\n${query}`, the `hermes` precedent at + `index.ts:651-668`). **Keep** the existing "read the diff / explore the filesystem" prompt builders + (agentic reading preserved); large content stays file-referenced (diff temp file + the >100k-char + temp-file pattern) to avoid `E2BIG`. +- **Output:** `--print` returns **plain text** = the review → adapt `extractReviewText`'s `gemini` + branch to return raw output; `extractGeminiUsage` returns null → **cost rows degrade gracefully** + (see Cross-Cutting Contracts). +- **Timeout ownership:** Codev manages its own timeout and SIGTERMs the child if `agy` hangs past it + (does not rely solely on `--print-timeout`). +- **Fast non-blocking skip:** stream stdout/stderr; if the **OAuth URL** appears (unauthed) or the + binary is unavailable/invalid, terminate early and emit **`VERDICT: COMMENT` / `SUMMARY: Skipped + (agy unavailable: )`** — `verdict.ts` treats `COMMENT` as non-blocking (`:42,:54-59`), so + porch advances rather than defaulting to a blocking `REQUEST_CHANGES`. This is the CI/headless story. +- **Doctor (agy):** update the gemini dependency/auth check (`doctor.ts:153-163,266-274`) to detect + the real `agy` CLI + auth via a short-timeout probe (OAuth-URL ⇒ "needs login"); install hint → + official script `antigravity.google/cli/install.sh`; drop the `gemini`-CLI/`--yolo` check. Ensure the + "operational model" count treats an `agy`-usable setup as operational. + +#### Deliverables +- [ ] agy dispatch + verified binary resolution + role-inlined prompt + plain-text handling. +- [ ] Fast non-blocking `COMMENT` skip (unavailable/unauthed/invalid-binary). +- [ ] Graceful cost degradation (no `NaN`). +- [ ] `doctor` agy presence/auth check + install hint + operational counting. +- [ ] Unit/integration tests. + +#### Acceptance Criteria +- [ ] `consult -m gemini` (authed) returns a review that used file contents (agentic). +- [ ] Unauthed/missing/IDE-stub-only → fast `COMMENT` skip (no ~30s hang, no block). +- [ ] No `NaN` cost; `doctor` reports agy status correctly without hanging. +- [ ] All tests pass. + +#### Test Plan +- **Unit** (`packages/codev/src/__tests__/consult.test.ts`): mock `spawn`/binary-resolver — agy + invoked with `--print --sandbox --add-dir`; binary rejection (IDE symlink ⇒ unavailable); OAuth-URL + ⇒ early `COMMENT` skip; plain-text → raw review; graceful cost. +- **Doctor** (`packages/codev/src/__tests__/doctor.test.ts`): agy present+authed / present+unauthed / + absent; operational counting. +- **Integration**: a guarded real `agy --print` smoke (skippable when unauthed in CI). + +#### Risks +- **Risk**: Codev launches the IDE symlink instead of the CLI. **Mitigation**: verified binary + resolution + rejection → skip; binary-resolution test. +- **Risk**: prompt delivery (positional vs stdin) hits arg limits. **Mitigation**: `hermes` temp-file + pattern for large prompts; confirm delivery empirically in this phase. +- **Risk**: `agy` self-updates and changes flags. **Mitigation**: pin observed flags; e2e (Phase 2) + catches drift. + +--- + +### Phase 2: Doctor consolidation + docs + skeleton consistency + e2e verification +**Dependencies**: Phase 1 + +#### Objectives +- Make the docs and skeleton coherent for the agy backend, and verify the headline path end-to-end + (including porch progression on skip). + +#### Implementation Details +- **Files**: docs — `CLAUDE.md`, `AGENTS.md`, `README.md`, + `codev-skeleton/resources/commands/consult.md`, `.claude/skills/consult/SKILL.md` (+ skeleton copy), + `codev-skeleton/DEPENDENCIES.md`; tests — `packages/codev/src/__tests__/cli/` (e2e) + a + porch-progression test. (Any residual `doctor.ts` consolidation not done in Phase 1.) +- **Docs:** agy setup — official install script + one-time interactive `agy` login (subscription); + remove dead references to the retiring `gemini` CLI auth flow. Note the model identifier stays + `gemini` and the `pro` alias is kept. Note that the Gemini-CLI **builder** harness + (`harness.ts:GEMINI_HARNESS`) is a **separate, untouched** concern (out of scope; will break for + affected tiers — follow-up issue). +- **Model-identifier audit:** confirm `gemini` stays in `MODEL_CONFIGS`, `VALID_MODELS` + (`porch/next.ts:51`), the **skeleton** `protocol-schema.json:155` enum (the `codev/protocols` copy + has no model enum — distinct files), and all protocol-JSON default model lists. Keep skeleton ↔ + `codev/` copies identical. +- **E2E / headline path:** run `consult -m gemini` (via agy) on a spec, a plan, and a PR; and a + **porch-orchestrated** test proving phase progression continues when `agy` is unavailable + (`COMMENT` skip → 2-way) — the core failure prevented. + +#### Deliverables +- [ ] Docs + skeleton updated and consistent; model-id-stays-`gemini` audit done. +- [ ] `harness.ts` separate-concern note; retiring-CLI references removed. +- [ ] E2E headline-path test + porch-progression test. + +#### Acceptance Criteria +- [ ] Docs reference only supported setup; skeleton ↔ codev consistent. +- [ ] E2E + porch-progression tests green. + +#### Test Plan +- **E2E** (`packages/codev/src/__tests__/cli/`): agy headline path; porch run advances on skip. +- **Consistency**: skeleton/codev schema+defaults; model-identifier audit assertion. + +#### Risks +- **Risk**: doc/skeleton drift across the four-tier resolver. **Mitigation**: update both trees; a + consistency test. + +## Dependency Map +``` +Phase 1 (agy backend) ──→ Phase 2 (doctor/docs/skeleton/e2e) +``` + +## Risk Analysis +| Risk | Probability | Impact | Mitigation | +|------|------------|--------|------------| +| agy uses Flash (no `--model`) → reviews less deep than old Pro CLI | Med | Low | Accepted (architect: don't pro-pin; lean). | +| Codev launches IDE symlink instead of CLI | Med | High | Verified binary resolution + rejection → skip (Phase 1); test. | +| Unauthed/CI → blocks porch | Med | High | Non-blocking `COMMENT` skip (Phase 1); porch-progression test (Phase 2). | +| First-run auth is interactive (can't run in CI) | Med | Med | Non-blocking skip = CI story; optional pre-provisioned OAuth token; doctor "needs login". | +| No token usage → cost reporting breaks | High | Low | Graceful degradation (no `NaN`). | +| skeleton/`codev` drift | Low | Med | Update both; consistency test (Phase 2). | + +## Validation Checkpoints +1. **After Phase 1**: agy review works + skips non-blockingly; doctor agy ok; graceful cost. +2. **Before done**: e2e headline path + porch progression on skip; docs/skeleton consistent. + +## Documentation Updates Required +- [ ] `CLAUDE.md` / `AGENTS.md` (agy setup; model id stays `gemini`). +- [ ] `README.md`, `codev-skeleton/resources/commands/consult.md`, consult `SKILL.md`, + `codev-skeleton/DEPENDENCIES.md`. + +## Expert Review +**Date**: 2026-06-02 (iteration 1 was on a since-superseded dual-backend draft; the API backend was +dropped per architect — agy is OAuth-only, no API-key auth). The agy-relevant iter-1 findings are +**retained** here: backend-aware plain-text parsing (`extractReviewText`/`extractGeminiUsage`), +corrected test paths (`src/__tests__/…`), doctor operational-model counting, and the `COMMENT`-skip +contract. A re-consult on this single-agy plan can be run at the architect's discretion. + +## Approval +- [ ] Architect review (plan-approval gate) +- [ ] Expert AI consultation complete (3-way) + +## Change Log +| Date | Change | Reason | +|------|--------|--------| +| 2026-06-02 | Initial dual-backend plan | (superseded) | +| 2026-06-02 | Reverted to **single-agy** plan; dropped API backend + selector | Architect: agy is OAuth-only (no API-key auth); API backend unwanted/unbuildable | + +## Notes +- **No time estimates** (per protocol). Phases ship as commits within a single PR. +- Each phase runs the SPIR I-D-E cycle (implement → defend/tests → evaluate). +- **Lean by design:** single backend swap + skip safety + doctor/docs/e2e; no API key, no selector, + no generic gateway, no Codex/Claude-lane changes, `harness.ts` untouched. + +--- + +## Amendment History + diff --git a/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-rebuttals.md b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-rebuttals.md new file mode 100644 index 000000000..bb5d698c9 --- /dev/null +++ b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-rebuttals.md @@ -0,0 +1,39 @@ +# Phase 1 (agy_backend) — Iteration-1 Implement Rebuttals + +**Verdicts:** Gemini COMMENT · Codex REQUEST_CHANGES · Claude APPROVE +**Disposition:** All points **accepted and addressed** (no rejections). Full suite green after fixes +(152 files, 3209 passed, 0 failed). + +## Codex (REQUEST_CHANGES) +- **CX1 — `resolveAgyBin()` only did realpath heuristics, not behavioral `--version` verification of a + PATH candidate.** ✅ Added `agyRespondsToVersion(bin)` (runs `--version`, read-only) and require it + for the **untrusted PATH-fallback** candidate (in addition to the realpath IDE-rejection). The + canonical `~/.local/bin/agy` and the explicit `CODEV_AGY_BIN` override remain realpath-trusted (no + per-call subprocess on the common path). So a bare PATH `agy` is now accepted only if it both isn't + the IDE *and* behaves like the headless CLI. +- **CX2 — `verifyAgy()` used `spawnSync`, so OAuth detection only happened after exit/timeout (could + stall `codev doctor`).** ✅ Rewrote `verifyAgy()` as **async + streaming**: it spawns `agy --print`, + scans the early stdout/stderr stream, and **terminates early the instant the OAuth URL appears**, + reporting "needs login" promptly instead of waiting out the timeout. Call site now `await`s it. +- **CX3 — Test gaps (no behavioral PATH-candidate verification; no fast unauthed doctor test).** ✅ + Added: a `agyRespondsToVersion` unit test (version-emitting vs not vs throwing), and a doctor test + asserting a **prompt "needs login"** when agy streams the OAuth URL (replacing the obsolete + spawnSync-timeout test). + +## Gemini (COMMENT) +- **G1 — Dead `VERIFY_CONFIGS['Gemini']` (old `gemini --yolo` config) left in `doctor.ts`.** ✅ + Removed (the gemini lane is verified via `verifyAgy`, not `VERIFY_CONFIGS`), per the plan's "drop + the `gemini`-CLI/`--yolo` check." +- **G2 — Fake hardcoded-config test in `consult.test.ts` masked the new `'agy'` cli.** ✅ Rewrote + `should have correct CLI configuration for each model` to assert the **real exported + `_MODEL_CONFIGS`** (`gemini.cli === 'agy'`, no `--model` arg, `envVar` null), so a backend change is + now caught. + +## Claude (APPROVE) +- **Minor — `agySkipContent()` called twice on the no-binary path.** ✅ Store the result once. +- (Confirmed dead `VERIFY_CONFIGS['Gemini']` — same as G1, fixed.) + +## Net +Binary resolution now behaviorally verifies untrusted PATH candidates; the doctor auth probe is fast +(streaming OAuth detection); dead code removed; the config test asserts reality. Scope unchanged +(lean backend swap). Full suite green. diff --git a/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-rebuttals.md b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-rebuttals.md new file mode 100644 index 000000000..6c39eceb5 --- /dev/null +++ b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-rebuttals.md @@ -0,0 +1,41 @@ +# Phase 1 (agy_backend) — Iteration-2 Implement Rebuttals + +**Verdicts:** Gemini (empty — see note) · Codex REQUEST_CHANGES · Claude APPROVE +**Disposition:** Codex's points **accepted and addressed**. Default suite green (3210 passed); cli-e2e +green (84 passed). + +## Codex (REQUEST_CHANGES) +- **CX1 — Missing guarded real-`agy` integration smoke + acceptance evidence that `consult -m gemini` + returns a review using file contents.** ✅ Added + `packages/codev/src/__tests__/cli/agy-integration.e2e.test.ts` — a guarded integration test that + runs the **real** agy (no child_process mock), plants a file, invokes the gemini lane, and asserts + the review contains the planted marker (proving agentic file-reading). It **skips cleanly** when agy + is unavailable/unauthed (the non-blocking COMMENT skip is detected), so it's safe in CI. It lives in + the `*.e2e.test.ts` suite (run via `pnpm test:e2e:cli`), correctly **excluded from the default unit + gate**. + - **Acceptance evidence (real run, this machine, authed agy):** the test passed — agy set up its + sandbox, **read `planted.txt` from disk**, and returned *"The codeword found in planted.txt is: + `PLANTED_1780546887783`"* (`[gemini (agy) completed in 14.1s]`). The headline path works + end-to-end. +- **CX2 — `pro`-alias test redefined a local object instead of exercising the real execution path.** + ✅ Added an execution-path test in the agy describe block: `consult({ model: 'pro', ... })` resolves + through `pro → gemini → agy` and spawns the resolved agy binary with `--print`. Also rewrote the + standalone "should support model aliases" test to assert the **real exported `_MODEL_ALIASES`** + (not a hardcoded duplicate). + +## Gemini (empty / no review) +The global `consult -m gemini` lane (which porch invokes) still uses the **retiring Gemini CLI** — +my agy backend is in this worktree, not globally installed. That CLI returned **empty** this +iteration (it produced a review at iter-1). This is precisely the degradation #778 fixes. The agy +backend itself is verified working (see CX1 acceptance evidence). For porch's 3-way, the gemini-model +review can be regenerated via the worktree's agy-backed consult if needed. + +## Claude (APPROVE) +No blocking issues. Minor non-blocking notes acknowledged: `extractReviewText` is now a documented +no-op stub (cleanup is out of this phase's scope); `agyRespondsToVersion`'s shell-quoted `--version` +runs only for untrusted PATH candidates (limited surface). + +## Net +Guarded real-agy integration smoke added with real acceptance evidence (agentic file-reading +confirmed); `pro` alias now execution-tested; alias map assertion uses the real export. Scope +unchanged. All suites green. diff --git a/codev/projects/778-gemini-cli-antigravity-cli-jun/778-plan-iter1-rebuttals.md b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-plan-iter1-rebuttals.md new file mode 100644 index 000000000..13b59c58e --- /dev/null +++ b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-plan-iter1-rebuttals.md @@ -0,0 +1,40 @@ +# Plan 778 — Iteration-1 Rebuttals + +**Verdicts:** Gemini APPROVE · Codex REQUEST_CHANGES · Claude COMMENT +**Disposition:** All substantive points accepted and addressed (no rejections). Codex and Claude +converged on the two key items; both are now pinned in a new **Cross-Cutting Implementation +Contracts** section + Phase 4. Code claims re-verified against the tree. + +## Codex (REQUEST_CHANGES) +- **CX1 — Backend context must reach the parsing/metrics pipeline.** ✅ Verified `extractReviewText`/ + `extractUsage` branch on `model === 'gemini'` and assume the old CLI JSON (`stats.models`). Added a + Cross-Cutting contract: thread the resolved **`backend`** into the extractor — `agy` → raw text + + null usage (graceful degradation); `api` → `usageMetadata` → real cost; old `stats.models` path + removed. (Also addresses Claude #1.) +- **CX2 — `doctor` operational-model counting.** ✅ Added to Phase 4: the Gemini lane must count as + operational when **either** backend is usable, so an **API-only** setup (no `agy`, `GEMINI_API_KEY` + set) is reported operational, not failed. +- **CX3 — Wrong test paths.** ✅ Verified: tests live under `packages/codev/src/__tests__/` (+ + `…/cli/` e2e; `…/commands/consult/__tests__/metrics.test.ts`; + `…/commands/porch/__tests__/consultation-models.test.ts`); `packages/codev/tests/e2e|unit` do **not** + exist. Fixed Phase 4 and added a canonical "Test locations" note. + +## Claude (COMMENT) +- **CL1 — `extractUsage`/`extractReviewText` disambiguation.** ✅ Same as CX1 (backend threaded + through). +- **CL2 — Test path `packages/codev/tests/e2e/` doesn't exist.** ✅ Same as CX3. +- **CL3 — `consult.gemini.backend` is a NEW top-level `consult` key.** ✅ Cross-Cutting contract now + states it is distinct from `porch.consultation.models`; don't nest it there. +- **Dual-dispatch architecture** (single `if (model==='gemini')` → backend-branched). ✅ Cross-Cutting + contract states how Phase 1/2 branches merge under the Phase 3 selector. +- **`protocol-schema.json` conflation** (enum only in the skeleton copy; `codev/protocols` has none). + ✅ Noted in Phase 4 consistency. +- **Config migration.** ✅ Cross-Cutting contract: missing key → default (`auto`); no migration logic. + +## Gemini (APPROVE) +- No issues. + +## Net +Two precision gaps (backend-aware pipeline; test paths) fixed; doctor counting, new-config-key, +dual-dispatch, schema-enum scope, and no-migration all pinned. Scope unchanged (still "two backends + +a selector" + supporting doctor/docs/e2e). No blocker remains. diff --git a/codev/projects/778-gemini-cli-antigravity-cli-jun/778-specify-iter2-rebuttals.md b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-specify-iter2-rebuttals.md new file mode 100644 index 000000000..84639772b --- /dev/null +++ b/codev/projects/778-gemini-cli-antigravity-cli-jun/778-specify-iter2-rebuttals.md @@ -0,0 +1,100 @@ +# Spec 778 — Iteration-2 Rebuttals (Approach-B spec) + +**Verdicts:** Codex REQUEST_CHANGES · Gemini COMMENT · Claude REQUEST_CHANGES +**Disposition:** All substantive points **accepted and addressed**. No point rejected. Several were +*convergent* (the three reviewers reinforced each other), and Gemini supplied the concrete mechanism +that resolves Codex's main ask. Code claims were re-verified against the tree before encoding them. + +--- + +## Unanimous must-fix + +### Stale "lane uses the Pro model class" in Desired State (Codex, Gemini, Claude). ✅ FIXED +When I applied the architect's "don't pin Pro" decision, I updated the directive, the verified-contract +bullet, open questions, success criteria, risks, tests, and notes — but **missed the Desired State +bullet**, which still said "uses the Pro model class." Corrected to "uses `agy`'s default model (no +pinning; currently Gemini 3.5 Flash)". Good catch; this was a real inconsistency. + +--- + +## Claude (REQUEST_CHANGES) + +### C1 — Model identifier must be stated to stay `gemini`. ✅ FIXED +Added explicitly (Desired State + Iteration-2 Decisions): the identifier stays `gemini` across +`MODEL_CONFIGS`, `VALID_MODELS`, `protocol-schema.json` enum, default lists, user config, and the +`pro` alias — only the backend changes; no rename to `agy`/`antigravity`. + +### C2 — `extractReviewText` gemini branch does `JSON.parse` → throws on agy plain text. ✅ ADDRESSED +Verified (`usage-extractor.ts`: `if (model==='gemini'){ JSON.parse(output)…return parsed.response }`). +Iteration-2 Decisions now require adapting that branch to **return the raw output** for the agy +backend; usage extraction returns null → cost rows degrade gracefully. + +### C3 — `hermes` precedent. ✅ ADDED +Verified (`index.ts:39,651-668,1587`): hermes is a CLI model with `envVar:null`, role folded into the +prompt, temp-file when prompt > 100k chars, plain-text output. Spec now points the builder at this as +the working template (also resolves the E2BIG concern below). + +### C4 — `pro` alias semantics. ✅ DECIDED +Keep as-is (historical name; resolves to the `gemini`/agy lane). No rename, no deprecation warning — +leanest, per the architect's "keep it lean." + +### C5 — `harness.ts` `GEMINI_HARNESS` distinct/untouched. ✅ CLARIFIED +Iteration-2 Decisions explicitly state it's untouched and a separate concern from the consult +`MODEL_CONFIGS.gemini` lane. + +### C6 — Timeout interaction (agy `--print-timeout` vs Codev's own kill). ✅ DECIDED +Codev manages its **own** timeout and SIGTERMs the child if `agy` hangs past it; does not rely solely +on `--print-timeout`. Exact values are a Plan detail. + +### C7 — Binary verification criteria. ✅ ADDRESSED (see Codex CX4 below). + +--- + +## Codex (REQUEST_CHANGES) + +### CX1 — Stale Pro contradiction. ✅ FIXED (see unanimous, above). + +### CX2 — Non-blocking skip under-specified at the observable-contract level. ✅ RESOLVED +Adopted the concrete mechanism (Gemini supplied it): the lane emits **`VERDICT: COMMENT` / +`SUMMARY: Skipped (...)`** when `agy` is unavailable. Verified `verdict.ts:42,54-59` — `COMMENT` is +parsed and `allApprove` treats it as non-blocking, while a *missing* verdict defaults to +`REQUEST_CHANGES` (blocks). So the explicit `COMMENT` is mandatory and now specified, not deferred. + +### CX3 — Require a porch-orchestrated progression test. ✅ ADDED +New test scenario 2b: an actual porch SPIR run with `agy` missing/unauthed must show **phase +progression continues** (not just a unit test of the skip). + +### CX4 — Binary-resolution rejection rule. ✅ ADDED +Iteration-2 Decisions: prefer `~/.local/bin/agy`; else a PATH lookup **verified** to be the real +headless CLI (responds to `--print`/`--version` as the CLI, not the IDE Electron launcher); if none +is valid (missing or only the IDE stub/symlink), treat the lane as **unavailable → `COMMENT` skip +with guidance** — never launch the IDE. + +--- + +## Gemini (COMMENT — non-blocking) + +### G1 — Stale Pro contradiction. ✅ FIXED (unanimous). + +### G2 — `E2BIG` / large-prompt mitigation. ✅ ADDRESSED +Follow the `hermes` temp-file pattern (prompt > 100k chars → temp file); and `buildPRQuery` already +writes the diff to a temp file the reviewer reads, so large content stays file-referenced. Captured in +Iteration-2 Decisions; prompt-delivery specifics (positional vs stdin) confirmed as a Plan check. + +### G3 — Auth hangs ~30s. ✅ ADDED +Wrapper streams stdout/stderr and **terminates the child early when the OAuth URL is detected**, +emitting the `COMMENT` skip — so an unauthed lane skips fast instead of blocking the run. + +### G4 — Concrete non-blocking skip via `COMMENT`. ✅ ADOPTED +This is the mechanism now specified (see CX2). Thanks to Gemini for the grounded, minimal approach. + +--- + +## Net change summary +The one real defect (stale Pro line) is fixed. The skip contract is now concrete (`COMMENT` verdict) +rather than deferred, with a fast auth-skip and a binary-rejection rule. Output handling +(`extractReviewText`), timeout ownership, the `hermes` template, the `pro`-alias call, and the +`harness.ts` distinction are all pinned down. All changes preserve the architect's constraints +(agentic file-reading, subscription/OAuth, default model = Flash, lean scope). No open question +remains that blocks implementation; the residual items are Plan-level value choices (timeout numbers, +prompt-delivery confirmation). diff --git a/codev/projects/778-gemini-cli-antigravity-cli-jun/status.yaml b/codev/projects/778-gemini-cli-antigravity-cli-jun/status.yaml new file mode 100644 index 000000000..28aec4a71 --- /dev/null +++ b/codev/projects/778-gemini-cli-antigravity-cli-jun/status.yaml @@ -0,0 +1,97 @@ +id: '778' +title: gemini-cli-antigravity-cli-jun +protocol: spir +phase: review +plan_phases: + - id: agy_backend + title: 'Phase 1: agy backend dispatch (OAuth, agentic file-reading, non-blocking skip)' + status: complete + - id: docs_skeleton_e2e + title: 'Phase 2: Doctor + docs + skeleton consistency + e2e verification' + status: complete +current_plan_phase: null +gates: + spec-approval: + status: approved + requested_at: '2026-06-02T01:32:17.767Z' + approved_at: '2026-06-02T05:35:02.131Z' + plan-approval: + status: approved + requested_at: '2026-06-02T05:57:40.957Z' + approved_at: '2026-06-02T20:33:37.524Z' + pr: + status: approved + requested_at: '2026-06-05T13:16:51.483Z' + approved_at: '2026-06-06T02:50:18.266Z' + verify-approval: + status: pending +iteration: 1 +build_complete: true +history: + - iteration: 1 + plan_phase: agy_backend + build_output: '' + reviews: + - model: gemini + verdict: COMMENT + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-gemini.txt + - model: codex + verdict: REQUEST_CHANGES + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-codex.txt + - model: claude + verdict: APPROVE + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter1-claude.txt + - iteration: 2 + plan_phase: agy_backend + build_output: '' + reviews: + - model: gemini + verdict: REQUEST_CHANGES + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-gemini.txt + - model: codex + verdict: REQUEST_CHANGES + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-codex.txt + - model: claude + verdict: APPROVE + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-agy_backend-iter2-claude.txt + - iteration: 1 + plan_phase: docs_skeleton_e2e + build_output: '' + reviews: + - model: gemini + verdict: APPROVE + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter1-gemini.txt + - model: codex + verdict: REQUEST_CHANGES + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter1-codex.txt + - model: claude + verdict: COMMENT + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter1-claude.txt + - iteration: 2 + plan_phase: docs_skeleton_e2e + build_output: '' + reviews: + - model: gemini + verdict: APPROVE + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter2-gemini.txt + - model: codex + verdict: REQUEST_CHANGES + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter2-codex.txt + - model: claude + verdict: COMMENT + file: >- + /Users/mwk/Development/cluesmith/codev/.builders/spir-778/codev/projects/778-gemini-cli-antigravity-cli-jun/778-docs_skeleton_e2e-iter2-claude.txt +started_at: '2026-06-02T01:16:31.004Z' +updated_at: '2026-06-06T02:50:18.267Z' +pr_ready_for_human: false diff --git a/codev/resources/arch.md b/codev/resources/arch.md index 366ce8793..2d75765e2 100644 --- a/codev/resources/arch.md +++ b/codev/resources/arch.md @@ -1739,7 +1739,7 @@ The `consult` command (`packages/codev/src/commands/consult/index.ts`) is a **CL ``` consult -m gemini spec 42 - → spawns: gemini --yolo "" + → spawns: agy --print --sandbox --add-dir "" consult -m codex spec 42 → spawns: codex exec -c experimental_instructions_file= --full-auto "" @@ -1752,7 +1752,7 @@ consult -m claude spec 42 | Model | CLI Binary | Role Injection | Key Env Var | |-------|-----------|----------------|-------------| -| gemini | `gemini` | Temp file via `GEMINI_SYSTEM_MD` env var | `GOOGLE_API_KEY` | +| gemini | `agy` (Antigravity CLI; resolved real bin, not the IDE symlink) | Folded into the prompt (role + query) | OAuth / subscription (no API key) | | codex | `codex` | Temp file via `-c experimental_instructions_file=` flag | `OPENAI_API_KEY` | | claude | `claude` | Prepended to query string | `ANTHROPIC_API_KEY` | diff --git a/codev/resources/commands/codev.md b/codev/resources/commands/codev.md index 75d093d3c..88ed4b9b0 100644 --- a/codev/resources/commands/codev.md +++ b/codev/resources/commands/codev.md @@ -99,7 +99,7 @@ Verifies that all required dependencies are installed and properly configured: **AI CLI Dependencies (at least one required):** - Claude (`@anthropic-ai/claude-code`) -- Gemini (`gemini-cli`) +- Gemini (Antigravity CLI, `agy`) - Codex (`@openai/codex`) **Exit Codes:** diff --git a/codev/resources/commands/consult.md b/codev/resources/commands/consult.md index 76da6fb97..85ad04e87 100644 --- a/codev/resources/commands/consult.md +++ b/codev/resources/commands/consult.md @@ -19,7 +19,7 @@ consult stats [options] | Model | Alias | Backend | Notes | |-------|-------|---------|-------| -| `gemini` | `pro` | gemini-cli | File access via --yolo, fast | +| `gemini` | `pro` | Antigravity CLI (`agy`) | Agentic file access (`--sandbox --add-dir`), OAuth/subscription login. Skips non-blockingly if `agy` is missing/unauthed. | | `codex` | `gpt` | @openai/codex | Read-only sandbox, thorough | | `claude` | `opus` | Claude Agent SDK | Balanced analysis with tool use | | `hermes` | - | hermes CLI (`hermes chat -q`) | Uses Hermes agent as consult backend | @@ -46,7 +46,7 @@ Cannot combine `--prompt` with `--prompt-file` or `--type`. ### Protocol Mode -Run structured reviews tied to a development protocol (SPIR, TICK, bugfix, maintain). +Run structured reviews tied to a development protocol (SPIR, ASPIR, AIR, bugfix, maintain). ```bash # Review a spec (auto-detects project context in builder worktrees) @@ -69,7 +69,7 @@ consult -m gemini --type integration ``` **Options:** -- `--protocol ` — Protocol: spir, bugfix, tick, maintain +- `--protocol ` — Protocol: spir, aspir, air, bugfix, maintain - `-t, --type ` — Review type: spec, plan, impl, pr, phase, integration - `--issue ` — Issue number (required from architect context) @@ -130,7 +130,7 @@ consult -m hermes --protocol spir --type spec | Model | Typical Time | Approach | |-------|--------------|----------| -| Gemini | ~120-150s | File access via --yolo, pure text output | +| Gemini | ~120-180s | Antigravity CLI (`agy`); agentic file access via `--sandbox`, plain text output | | Codex | ~200-250s | Shell command exploration, read-only sandbox | | Claude | ~60-120s | Agent SDK with Read/Glob/Grep tools | @@ -145,14 +145,16 @@ npm install -g @anthropic-ai/claude-code # Codex npm install -g @openai/codex -# Gemini -# See: https://github.com/google-gemini/gemini-cli +# Gemini lane → Antigravity CLI (`agy`), replacing the retired Gemini CLI +curl -fsSL https://antigravity.google/cli/install.sh | bash +agy # run once and sign in (OAuth / Google subscription) ``` -Configure API keys: +Configure auth: - Claude: `ANTHROPIC_API_KEY` - Codex: `OPENAI_API_KEY` -- Gemini: `GOOGLE_API_KEY` or `GEMINI_API_KEY` +- Gemini (`agy`): **OAuth / subscription** — run `agy` once and sign in (no API key). If `agy` + is missing or unauthenticated, the gemini lane skips non-blockingly (the run proceeds without it). ### Claude auth: subscription vs. metered API diff --git a/codev/resources/lessons-learned.md b/codev/resources/lessons-learned.md index 3a9848df6..9b8fe9d8c 100644 --- a/codev/resources/lessons-learned.md +++ b/codev/resources/lessons-learned.md @@ -321,6 +321,7 @@ Generalizable wisdom extracted from review documents, ordered by impact. Updated - [From 0376] The research agent pattern (spawning a subagent to read all review files in parallel and return structured data) should be documented as a standard approach for future analyses. - [From #909] Cross-file content references in framework files are brittle. Deduplicating shared content across `CLAUDE.md` and a role file via "see X for the table" pointers is a novel pattern with no precedent in this repo (every existing CLAUDE.md mention in framework files is for diffing or scaffolding, not content lookup). The pointer is redundant when the referenced file is auto-loaded and misleading if it isn't. Keep each file self-contained for its audience: `CLAUDE.md` for everyone-loaded content (vocabulary + policy), role files for role-specific content (recipes, workflows). - [From #909] Codev's skeleton has a two-layer design that's intentional: the **internal automation layer** (`packages/codev/scripts/forge//` concept commands, dispatched via `packages/codev/src/lib/forge.ts`) is forge-agnostic; the **user-facing layer** (skeleton docs, AI prompts, protocol prompts) hardcodes `gh` directly throughout. The forge concept set is read-mostly (`issue-view`, `pr-list`, etc.) — no concepts for label management, jq-piping, or interactive ops. When adding new skeleton content, match the established `gh`-direct pattern. Localized forge-CLI awareness in one section creates inconsistency vs. the rest of the skeleton. +- [From 778] In a self-hosted Codev repo the four-tier resolver means `codev/` instance copies *shadow* `codev-skeleton/`, so the two trees (and the `codev/` copies themselves) drift independently. A terminology/backend change (Gemini-CLI → `agy`) cost 3 review iterations because each round surfaced another stale copy (skeleton → `DEPENDENCIES.md` → `resources/commands/consult.md` → `codev.md` + `arch.md`). When changing any shared doc, grep BOTH trees in one pass and run `diff codev/ codev-skeleton/` for every shared file — empty diff is the consistency proof. Distinguish in-scope current docs from historical artifacts (`specs/`, `plans/`, dated analyses) which must keep their original wording. ## 3-Way Reviews diff --git a/codev/reviews/778-gemini-cli-antigravity-cli-jun.md b/codev/reviews/778-gemini-cli-antigravity-cli-jun.md new file mode 100644 index 000000000..cb8fbc2c7 --- /dev/null +++ b/codev/reviews/778-gemini-cli-antigravity-cli-jun.md @@ -0,0 +1,184 @@ +# Review: gemini-cli-antigravity-cli-jun (Spec 778) + +## Summary + +Google retires Gemini-CLI subscription serving (Pro/Ultra/free) on **2026-06-18**. This +project swaps the `gemini` consult lane's backend from the retired Gemini CLI to the +**Antigravity CLI (`agy`)**, keeping everything else about the lane intact: + +- **Single, OAuth-only backend.** `agy` authenticates via Google OAuth / subscription — + it cannot take an API key (verified empirically), so there is no separate Gemini + Developer API backend. +- **Agentic file reading preserved.** `agy --print --sandbox --add-dir ` lets the + reviewer read the workspace (PR diffs, source) the same way the old `--yolo` lane did. +- **agy's default model — no Pro pin.** The model identifier stays `gemini` everywhere + and the `pro` alias is retained; only the *backend binary* changed. +- **Non-blocking skip.** When `agy` is missing, unauthenticated, an IDE-symlink stub, or + times out, the lane emits `VERDICT: COMMENT` ("Gemini lane skipped — …") so porch's + `allApprove` treats it as non-blocking and SPIR/ASPIR/BUGFIX phases still advance on + the remaining reviewers (2-way). This was the core failure mode the spec defended. +- **Real-binary resolution.** `resolveAgyBin()` rejects the Antigravity IDE's `agy` + symlink (by realpath) and prefers the real headless CLI (`~/.local/bin/agy`), with a + `CODEV_AGY_BIN` override. + +Implemented in two plan phases: **Phase 1 (`agy_backend`)** — dispatch, binary +resolution, non-blocking skip, graceful cost/usage degradation, `codev doctor` +integration, and tests; **Phase 2 (`docs_skeleton_e2e`)** — docs + skeleton consistency, +a guarded real-`agy` e2e (front-door + agentic-read), and a porch-orchestrated +progression test. + +## Spec Compliance + +- [x] **Backend swap to `agy`** — `MODEL_CONFIGS.gemini` dispatches via `runAgyConsultation`. +- [x] **Single backend, OAuth-only** — no API-key path; no separate Developer API backend. +- [x] **Agentic file reading (scoped)** — `--sandbox --add-dir + a dedicated + per-process consult sandbox subdir` (never the whole OS temp dir); proven live (e2e read a + planted file). +- [x] **agy default model, no Pro pin** — no `--model` flag; `pro` alias kept; id stays `gemini`. +- [x] **Non-blocking COMMENT skip** — missing / unauthed / IDE-stub / timeout → `COMMENT`. +- [x] **Cost/usage degrade gracefully** — agy emits plain text; usage extraction returns + `null` (no `NaN`), metrics still record. +- [x] **`codev doctor`** — presence via `resolveAgyBin()`; streaming `verifyAgy()` reports + authed / needs-login / timeout with current install guidance. +- [x] **Docs reference only the supported setup; skeleton ↔ codev consistent.** +- [x] **E2E + porch-progression tests green.** +- [x] **Model identifier stays `gemini`** in `MODEL_CONFIGS`, `VALID_MODELS`, the + skeleton `protocol-schema.json` enum, and all protocol-JSON default model lists. + +## Deviations from Plan + +- **Doc file list expanded beyond the plan.** The plan's Phase 2 file list named + `CLAUDE.md`, `AGENTS.md`, `README.md`, the skeleton `consult.md`/`DEPENDENCIES.md`, and + `SKILL.md`. Review iterations surfaced additional **agy-relevant** stale copies that the + self-hosted four-tier resolver shadows: `codev/DEPENDENCIES.md`, + `codev/resources/commands/consult.md`, `codev/`+skeleton `resources/commands/codev.md`, + and the Consult Architecture section of `codev/resources/arch.md`. All were synced; the + `codev/` copies of `consult.md` and `DEPENDENCIES.md` are now byte-identical to their + skeleton twins. Rationale: the acceptance criterion is literally "skeleton ↔ codev + consistent," and leaving these stale would document an unsupported setup. +- **No separate API backend / no Pro pin** — these were *removed* across the spec's own + evolution (architect corrections during Specify), not deviations at implementation time. + +## Lessons Learned + +### What Went Well +- The non-blocking-skip contract (`COMMENT` → `allApprove` passes) made the lane swap + safe by construction: even a totally absent `agy` cannot stall a phase. +- Empirical verification of the `agy` headless contract (flags, OAuth-only auth, IDE + symlink vs. real bin) up front prevented guessing — the real CLI behaves as documented. +- The guarded real-`agy` e2e doubled as headline-path acceptance: it actually read a + planted file and returned the codeword through the `consult -m gemini` front door. + +### Challenges Encountered +- **Self-hosted doc-copy drift**: the biggest time sink. Each Phase-2 review round found + another `codev/` instance copy still referencing the retired CLI. Resolved by a + repo-wide scan that fixed every remaining current-doc reference in one pass and + explicitly scoped out historical artifacts. +- **agy as a reviewer of code diffs (Phase 1)**: agy/Flash needs the diff *content* in + the prompt, not just a file list, or it wanders; for docs (Phase 2) it reads files + directly and reviews cleanly. (Captured as a follow-up on consult's impl-query shape.) + +### What Would Be Done Differently +- Run the `diff codev/ codev-skeleton/` consistency sweep *before* the first + review, not in response to it — it would have collapsed three iterations into one. + +### Methodology Improvements +- A porch/consult pre-flight that, for any doc-touching phase in a self-hosted repo, + lists `codev/` ↔ `codev-skeleton/` divergences would catch this class early. + +## Technical Debt +- The Gemini-CLI **builder** harness (`harness.ts`, plus `README.md` CLI-flag table and + `architect`/`builder` config examples) still references the retired CLI. Out of scope + per the approved spec; tracked as a follow-up. + +## Consultation Feedback + +### Specify Phase +- **Round 1** — gemini **REQUEST_CHANGES** (the single-shot API pivot would break file + access), codex **REQUEST_CHANGES** (two behavior gaps; one feasibility req too strong), + claude **APPROVE**. **Addressed**: pivoted to the agy-backed Approach B that preserves + agentic reading. +- **Round 2** — codex **REQUEST_CHANGES** (one contradiction + under-specified skip + contract), claude **REQUEST_CHANGES** (stale "Pro" reference contradicting the + no-pinning decision), gemini **COMMENT** (endorsed the `COMMENT`-skip strategy). + **Addressed**: removed the Pro references and tightened the skip contract; spec approved + by the human at the `spec-approval` gate. + +### Plan Phase +- **Round 1** — gemini **APPROVE**, codex **REQUEST_CHANGES** (two ambiguous contracts + + wrong test paths), claude **COMMENT** (usage-extractor routing, test paths). + **Addressed**: pinned the usage-extractor backend routing and corrected test-file + locations; plan approved at the `plan-approval` gate. (The dual-backend plan was then + superseded by the single-agy revert per the architect's final direction.) + +### Implement — Phase 1 (`agy_backend`) +- **Round 1** — claude **APPROVE**, codex **REQUEST_CHANGES** (binary-resolution/auth-probe + didn't fully meet the skip-safety contract), gemini **COMMENT** (dead code in doctor/tests). + **Addressed**: hardened `resolveAgyBin`/auth probing; removed dead code. +- **Round 2** — claude **APPROVE**, codex **REQUEST_CHANGES** (missing happy-path + integration verification), gemini **CONSULT skip**. **Addressed**: added the guarded + real-`agy` integration test; added the `--print` timeout → non-blocking-skip handling. +- **Round 3** — gemini **COMMENT** (agy timed out → lane self-skipped), codex **APPROVE**, + claude **APPROVE**. Advanced. + +### Implement — Phase 2 (`docs_skeleton_e2e`) +- **Round 1** — gemini **APPROVE**, claude **COMMENT**, codex **REQUEST_CHANGES** (×4): + e2e bypassed the `consult` front door; progression test not porch-orchestrated; + `SKILL.md` `tick` divergence; stale `--yolo` in `consult.md`. **Addressed**: all four — + added a real-binary front-door e2e case, added a `next()`-driven porch-orchestrated + progression test, removed `tick`, fixed `--yolo`; also ran the live headline path + (`consult -m gemini --type spec|plan`: COMMENT / APPROVE). +- **Round 2** — gemini **APPROVE**, claude **COMMENT**, codex **REQUEST_CHANGES** (×2): + `codev/DEPENDENCIES.md` ↔ skeleton divergence; "Gemini Pro" wording in CLAUDE/AGENTS. + **Addressed**: synced both; "Gemini Pro" → "Gemini (via agy)"; plus a repo-wide scan + that fixed `consult.md`, `codev.md`, `arch.md`, and README blurbs, with out-of-scope + items (historical artifacts, builder harness, generate-image skill) documented. +- **Round 3** — gemini **APPROVE**, codex **APPROVE**, claude **APPROVE**. Advanced to review. + +### Review Phase — PR #988 CMAP (`--type pr`) +- **Round 1** — gemini **APPROVE**, claude **APPROVE**, codex **REQUEST_CHANGES** (3, + integration-readiness): spec/plan lacked approval frontmatter; branch 310 commits + behind `main`; `chore(porch)` commits in history. **Addressed**: added approval + frontmatter (documents the human gate approvals); **merged `origin/main`** (conflict-free + → 0 behind, rebuilt core, full suite green). **Rebutted**: the porch state-commits are + required by repo policy (CLAUDE.md "DO NOT SQUASH MERGE — individual commits document the + development process"). +- **Re-consult** — gemini **APPROVE**, claude **APPROVE**, codex **REQUEST_CHANGES** (2 new, + both valid): (a) **security** — the agy `--add-dir` granted the entire OS `tmpdir()`; + (b) **doc drift** — the `origin/main` merge pulled the #985 "Claude auth" section into + `codev/resources/commands/consult.md` but not the skeleton copy. **Addressed**: (a) added + `consultSandboxDir()` — a per-process `mkdtemp` subdir holding the PR-diff + large-prompt + files; agy is now granted only `workspaceRoot` + that subdir (pinned by a new test); + (b) synced the #985 section into the skeleton so both `consult.md` copies are + byte-identical again. + +## Architecture Updates + +Updated `codev/resources/arch.md` → **Consult Architecture**: the `gemini` lane's spawn +line and model-configuration table row now describe the `agy` mechanism (`agy --print +--sandbox --add-dir `, role folded into the prompt, OAuth/subscription auth — +no API key) instead of the retired `gemini --yolo` / `GEMINI_SYSTEM_MD` / `GOOGLE_API_KEY` +mechanism. No new subsystems or data flows were introduced — this is a backend swap within +the existing CLI-delegation layer, so no structural diagram changes were needed. + +## Lessons Learned Updates + +Added one entry to `codev/resources/lessons-learned.md` under **Documentation**: in a +self-hosted Codev repo the four-tier resolver makes `codev/` instance copies shadow +`codev-skeleton/`, so shared docs drift independently; when changing a shared doc, grep +both trees and `diff` every shared file in one pass, and keep historical artifacts +(`specs/`, `plans/`, dated analyses) at their original wording. (Generalizes the existing +"[From 0099] exhaustive grep before all-instances-fixed" lesson to the skeleton/instance +split.) + +## Flaky Tests +No flaky tests encountered. The full unit suite (3217 passing, 13 skipped) ran green on +every iteration; the 13 skips are the guarded real-`agy` e2e cases (no-op without `agy`). + +## Follow-up Items +- Migrate the Gemini-CLI **builder** harness (`harness.ts`) off the retired CLI (separate + effort, per spec). +- Improve consult's `impl`-review query to include diff *content* (not just a file list) + so the agy/Flash reviewer doesn't wander on code-diff reviews. +- Non-agy pre-existing drift between `codev/resources/commands/codev.md` and its skeleton + twin (unrelated command-doc content) — candidate for a MAINTAIN sweep. diff --git a/codev/specs/778-gemini-cli-antigravity-cli-jun.md b/codev/specs/778-gemini-cli-antigravity-cli-jun.md new file mode 100644 index 000000000..255d05aea --- /dev/null +++ b/codev/specs/778-gemini-cli-antigravity-cli-jun.md @@ -0,0 +1,296 @@ +--- +approved: 2026-06-02 +validated: [gemini, codex, claude] +--- + +# Specification: Migrate the Gemini consult lane to the Antigravity CLI (`agy`) + +## Metadata +- **ID**: spec-2026-06-01-778-gemini-antigravity-cli +- **Status**: approved (Approach B, single-agy; human-approved at the spec-approval gate 2026-06-02) +- **Created**: 2026-06-01 +- **Issue**: #778 +- **Deadline**: 2026-06-18 (Gemini CLI subscription serving retires) + +## Architect Directive (supersedes prior draft) +The first draft recommended pivoting the Gemini lane to the Gemini **Developer API** (Approach A). +The architect **rejected** that at the spec-approval gate and directed **Approach B — swap the lane's +backend from the `gemini` CLI to the Antigravity CLI (`agy`)** — with these fixed priorities: +1. **Preserve agentic file-reading.** `agy` is an agent that reads files from disk like the old + `gemini` CLI. Do **not** inline-and-strip filesystem access (that was an A-path quality + regression). Keep the existing "read the diff / explore the filesystem" reviewer prompts. +2. ~~Keep the Pro model.~~ **SUPERSEDED (2026-06-02): do NOT pin the model — use `agy`'s default.** + The architect decided against Pro-pinning to keep the swap lean. The lane uses whatever `agy` + defaults to (currently **Gemini 3.5 Flash (High)**). **Accepted tradeoff:** Flash < Pro for review + depth, accepted in exchange for avoiding a brittle, non-obvious `--print` model-pinning mechanism. +3. **Subscription / OAuth auth** (AI Ultra) — ~3× cheaper than per-token API for our volume. **Not** + an API key. +4. **Keep it lean.** This is fundamentally a backend swap (`cli:'gemini'` → `agy` + flags) + auth + + the skip safety, **not** a redesign. + +The prior draft's two good catches are **retained**: (a) a dead/unavailable lane must be a +**porch-safe NON-BLOCKING skip** (porch's verdict parser defaults missing/short output to +`REQUEST_CHANGES`, which would otherwise block phase progression); (b) usage/cost handling must +**degrade gracefully** (subscription credits aren't per-token). The Gemini-API approach is now +**out of scope** (see Out of Scope). + +## Problem Statement +Codev's `consult` tool uses the Google **Gemini CLI** (`gemini`) as one of three default reviewer +lanes (with Codex and Claude). On **June 18, 2026**, the Gemini CLI / Code Assist subscription +serving stops for Google AI Pro/Ultra/free-individual users. Because `gemini` is a *default* model +and porch's verdict parser blocks on a missing/error review, the dead lane would not just reduce +review coverage — it would **block** SPIR/ASPIR/BUGFIX/AIR/PIR/MAINTAIN phase progression for +affected users on a hard deadline. Codev must move the "Gemini perspective" onto Google's +replacement, the **Antigravity CLI (`agy`)**, using the user's subscription auth. + +## Verified `agy` Contract (empirical, 2026-06-01) +All of the following was confirmed by installing and running the real CLI on macOS (darwin_arm64): + +- **The real CLI is a standalone binary, distinct from the IDE.** `which agy` resolves to + `~/.antigravity/antigravity/bin/agy`, which is a **symlink to the Antigravity IDE Electron binary** + (`/Applications/Antigravity.app/.../bin/antigravity`) — *not* the headless CLI. The real CLI is a + ~142 MB native Go binary, **v1.0.4**, installed via the official Unix script + `https://antigravity.google/cli/install.sh` (SHA512-verified) to **`~/.local/bin/agy`**. The + installer prepends `~/.local/bin` to PATH (`.zshrc`/`.zprofile`), so fresh shells resolve the real + CLI — but Codev must **not trust PATH** (stale shells / the IDE symlink shadow it). → **Footgun: + Codev must invoke the real CLI deterministically** (pin path and/or verify the resolved binary is + the CLI, e.g. it answers `--print`, not the IDE launcher). +- **Headless mode:** `agy --print` (aliases `-p`, `--prompt`) — "run a single prompt + non-interactively and print the response." `--print-timeout ` (default 5m). +- **File access (preserves agentic reading):** `agy --print --sandbox --add-dir ""` + reads files from `` non-interactively **without** `--dangerously-skip-permissions` — + verified end-to-end (the reviewer read a planted file and returned its contents). `--sandbox` + ("terminal restrictions enabled") auto-grants read access to `--add-dir` paths without a TTY + prompt. This is the **recommended, more-constrained** mechanism; the broader + `--dangerously-skip-permissions` (auto-approve *all* tool requests) is **not needed** and was + (rightly) flagged as a risk — avoid it. +- **Auth = OAuth / subscription** (matches priority #3): first run prints a Google OAuth URL (scopes + `cloud-platform`, `userinfo.email/profile`, `openid`) and accepts a browser sign-in or a pasted + auth code; the token then persists (under `~/Library/Application Support/Antigravity`) and + subsequent `--print` runs need no re-auth. No API key. **Caveat:** the first-run auth wait is short + (~30s) and **interactive** — it cannot be completed head-less in CI. +- **No `--model` flag; the lane uses `agy`'s default model (per architect decision — no pinning).** + Per Antigravity docs the CLI defaults to **Gemini 3.5 Flash (High)**; Pro is selectable only via the + interactive **`/model`** slash command (no `--print` equivalent). The architect decided **not** to + pin Pro (keep it lean), so the lane simply uses the default — currently Flash. No action needed for + model selection. (Binary internals show a model-tier system; a self-id probe timed out, so the + served model id isn't reliably introspectable via `--print` — noted, not blocking.) +- **No JSON / usage output.** `--print` returns plain text only — no token-usage stats. → cost rows + must degrade gracefully. +- **No system-prompt/role flag** (no `GEMINI_SYSTEM_MD` equivalent). → fold the reviewer role into + the `--print` prompt text. +- **Instruction-following works** in `--print` (a constrained "reply with only X" task returned + exactly X). + +## Current State (Codev's `gemini` surface — audited 2026-06-01) +- `packages/codev/src/commands/consult/index.ts:37-40` — `MODEL_CONFIGS.gemini = { cli:'gemini', + args:['--model','gemini-3.1-pro-preview'], envVar:'GEMINI_SYSTEM_MD' }`; spawns with + `--output-format json`, role via `GEMINI_SYSTEM_MD` temp file, prompt via stdin (heap handling for + >500 KB diffs, bugfix #680), parses JSON usage. Alias `pro → gemini` (`:54-58`). +- Prompt builders rely on **agentic file-reading** (to be PRESERVED): `:884` "Read the diff file from + `${diffPath}`", `:1051` "Explore the filesystem", `:885/1042/1154/664/1588` "full filesystem + access". `buildPRQuery` writes the diff to a temp file and points the reviewer at it. +- `packages/codev/src/lib/config.ts:88` — default consult models `['gemini','codex','claude']`. +- `codev-skeleton/protocols/{spir,aspir,maintain}/protocol.json` default `["gemini","codex","claude"]`; + `{air,pir,bugfix}` default `["gemini","codex"]`. `protocol-schema.json:155` enum includes `gemini`; + `porch/next.ts:51` `VALID_MODELS` includes `gemini`. +- `packages/codev/src/commands/porch/verdict.ts:27,46-47,55` — missing/short/error verdict → defaults + to `REQUEST_CHANGES`; `CONSULT_ERROR`/`REQUEST_CHANGES` block approval. (Why the skip must be + explicitly non-blocking.) +- `packages/codev/src/commands/doctor.ts:153-163` (presence check, hint → gemini-cli github) and + `:266-274` (auth check `gemini --yolo 'Reply with just OK'`). +- `packages/codev/src/commands/consult/usage-extractor.ts` — pricing key `gemini-3.1-pro`. +- Other surfaces (scoped below): `agent-farm/utils/harness.ts:114,240` (Gemini-CLI *builder* + harness), `generate-image.ts` (Gemini **API**, unaffected), `bench.ts` (benchmark defaults), docs. +- ~60 tests across `consult.test.ts`, `consult.e2e.test.ts`, `metrics.test.ts`, + `consultation-models.test.ts`, `doctor.test.ts`, `config.test.ts`. + +## Desired State +- The Gemini consult lane invokes **`agy --print --sandbox --add-dir `** (role folded into + the prompt), reaching Gemini via the user's **subscription/OAuth** auth, with the reviewer still + **reading the diff/repo from disk** (agentic behavior preserved). +- The lane uses `agy`'s **default** model (no pinning, per architect decision — currently Gemini 3.5 + Flash). The **model identifier stays `gemini`** everywhere (`MODEL_CONFIGS` key, `VALID_MODELS`, + `protocol-schema.json` enum, default model lists, user-facing config, the `pro` alias) — only the + *backend* changes; **no rename** to `agy`/`antigravity`. +- Codev invokes the **real `agy` CLI deterministically**, never the IDE symlink; if it cannot resolve + a binary that satisfies the headless contract, it **skips the lane** (below) rather than launching + the IDE. +- A missing/unauthed/timed-out/invalid-binary `agy` lane is a **non-blocking skip**: the lane emits + **`VERDICT: COMMENT` / `SUMMARY: Skipped (agy unavailable: )`**, which `verdict.ts` treats + as non-blocking (`allApprove` accepts `COMMENT`; verified `:54-59`). Porch-orchestrated runs still + advance (Codex/Claude complete; Gemini reported skipped — never a blocking `REQUEST_CHANGES`/ + `CONSULT_ERROR` caused merely by unavailability). +- Cost/usage rows **degrade gracefully** (no `NaN`; show e.g. "n/a (subscription)"). +- `codev doctor` checks for the real `agy` CLI + auth and gives correct, current setup guidance + (official install script; one-time `agy` login). No API-key guidance. +- Docs/skill reference the `agy` setup. Codex/Claude lanes unchanged. + +## Iteration-2 Decisions (resolved from 3-way re-consult, 2026-06-02) +Concrete resolutions to reviewer feedback (Codex REQUEST_CHANGES, Gemini COMMENT, Claude +REQUEST_CHANGES). These keep scope lean while removing ambiguity: +- **Model identifier stays `gemini`** — no rename anywhere; only the backend changes. (Claude must-fix.) +- **Non-blocking skip = C2 (decided, not deferred):** the lane's wrapper emits `VERDICT: COMMENT` + + a `SUMMARY: Skipped (...)` line when `agy` is unavailable. `verdict.ts` treats `COMMENT` as + non-blocking (verified `:42,:54-59`); a *missing* verdict would default to `REQUEST_CHANGES` and + block, so the explicit `COMMENT` is mandatory. (Gemini's concrete mechanism; resolves Codex's + "observable skip contract" ask.) +- **Fast auth-skip:** an unauthed `agy --print` prints an OAuth URL and waits ~30s. The wrapper + **streams stdout/stderr and terminates the child early when the OAuth URL is detected**, emitting + the `COMMENT` skip — so an unauthed lane doesn't block the run for 30s. (Gemini.) +- **Binary resolution + rejection rule:** prefer the known install path (`~/.local/bin/agy`), else a + PATH lookup that is **verified** to be the real headless CLI (responds to `--print`/`--version` as + the CLI, not the IDE Electron launcher). If no valid CLI is found (missing, or only the IDE + symlink/stub), treat the lane as **unavailable → `COMMENT` skip with actionable guidance** — never + launch the IDE. (Codex + Claude.) +- **Timeout ownership:** Codev manages its **own** timeout and SIGTERMs the child if `agy` hangs + past it; it does not rely solely on `--print-timeout`. (Claude.) +- **Output handling:** `agy --print` returns **plain text** = the review. `extractReviewText`'s + current `gemini` branch (`JSON.parse(output).response`) must be **adapted to return the raw output** + for the agy backend (else it throws on plain text); usage/cost extraction returns null → cost rows + degrade gracefully. (Claude.) +- **Follow the `hermes` precedent** (`index.ts:39,651-668,1587`): a CLI model with `envVar:null`, + role folded into the prompt (`${role}\n\n---\n\n${query}`), and the **temp-file pattern when the + prompt exceeds `CLI_PROMPT_INLINE_MAX_CHARS` (100k)** — which also handles `E2BIG`/large-diff + inlining (Gemini's concern). The existing `buildPRQuery` already writes the diff to a temp file the + reviewer reads, so large content stays file-referenced. (Claude + Gemini.) +- **`pro` alias:** kept **as-is** (historical name; resolves to the `gemini`/agy lane). No rename, no + deprecation warning — leanest. (Claude.) +- **`harness.ts` `GEMINI_HARNESS` is explicitly untouched** and is a **separate concern** from the + consult `MODEL_CONFIGS.gemini` lane; this spec changes only the consult lane. (Claude.) + +## Success Criteria +- [ ] `consult -m gemini` runs through `agy --print` and returns a real review that **reflects file + contents it read** (diff/repo), verified **end-to-end** on a spec, a plan, and a PR (headline- + path lesson — not just mocked unit tests). +- [ ] The lane uses `agy`'s **default** model (no pinning) — per architect decision; Flash is the + accepted default. +- [ ] Auth is **subscription/OAuth**; no API key is required or used by the lane. +- [ ] Codev resolves and runs the **standalone CLI**, not the IDE symlink (a stale-PATH / IDE-symlink + environment does not cause Codev to launch the Electron app). +- [ ] A missing/unauthed `agy` does **not** block porch runs: the lane is skipped non-blockingly and + the user is told why; Codex/Claude still complete. +- [ ] Cost/usage reporting degrades gracefully for the lane (no `NaN`/crash; clear "no per-token + data" indication). +- [ ] `codev doctor` reports real `agy` CLI presence + auth status with correct setup guidance. +- [ ] Existing consult/doctor/config/porch tests pass; new tests cover the `agy` dispatch, the + non-blocking skip, the `pro` alias, and graceful cost degradation. Coverage does not regress. +- [ ] No regression to the Codex/Claude lanes. + +## Constraints +- **Deadline 2026-06-18.** `agy` is available and verified today (v1.0.4), so the swap is buildable now. +- **Lean scope:** backend swap + auth + non-blocking skip + cost degradation. No redesign, no new + abstraction layer, no changes to the Codex/Claude lanes beyond keeping the 3-way coherent. +- **Preserve** the agentic file-reading prompt builders (do not inline-and-strip). +- **First-run auth is interactive** (browser/code) and cannot be automated head-less — treat as a + one-time user setup step (like the old `gemini /auth`), surfaced by `doctor`/docs. +- Keep skeleton ↔ `codev/` copies consistent across the four-tier resolver. + +## Out of Scope +- **The Gemini Developer API pivot (former Approach A) — rejected by the architect.** +- A generic multi-provider gateway / model-router. +- The `harness.ts` Gemini-CLI **builder** path: out-of-scope-but-acknowledged (a *builder* using the + `gemini` CLI as its coding agent also breaks for affected tiers; recommend a docs note + follow-up + issue, not a rebuild here). +- `generate-image.ts` (already Gemini **API**, unaffected) — intentionally unchanged. +- `bench.ts` benchmark defaults — naming only if needed. +- `--dangerously-skip-permissions` (unnecessary given `--sandbox --add-dir` works). + +## Open Questions +### Critical +- **RESOLVED (2026-06-02): model selection.** The architect decided **not to pin Pro** — the lane + uses `agy`'s default model (currently Gemini 3.5 Flash). No model-selection work; no `--model` + handling. (This removes what was the only critical open question.) +### Important — mostly resolved by Iteration-2 Decisions +- [x] Binary resolution strategy → **resolved** (prefer `~/.local/bin/agy`; else verified PATH CLI; + reject IDE stub → skip). +- [x] `doctor`/consult auth probe without hanging → **resolved** (stream output, detect OAuth URL, + terminate early → `COMMENT` skip; `doctor` uses a short timeout and reports "needs login"). +- [x] Skip mechanism → **resolved** (C2: emit `VERDICT: COMMENT`). +- [ ] **`--print-timeout` value + Codev's own timeout value** for large/agentic reviews — exact + numbers are a Plan detail (ownership is decided: Codev manages its own timeout). +- [ ] **Confirm the precise `agy --print` prompt-delivery** (positional arg vs stdin) in the Plan, to + pick inline vs temp-file per the `hermes` precedent (empirically: positional arg works; large + content already goes via the diff temp file). + +## Security Considerations +- Auth tokens are managed by `agy` (OAuth), stored in the Antigravity app-support dir; Codev never + reads/logs them. +- Prefer `--sandbox --add-dir ` over `--dangerously-skip-permissions` to limit the + agent's tool surface during reviews. +- Codev must execute the **verified** CLI binary (not an arbitrary PATH `agy`), avoiding accidental + launch of the IDE or a shadowed binary. +- The reviewer transmits the same content as today (diff + role + repo files it reads) to Google over + the subscription session; ensure parity (no extra data). + +## Test Scenarios +### Functional +1. **Happy path:** `consult -m gemini` → `agy --print --sandbox --add-dir ` returns a review + that demonstrably used file contents (e.g., references a changed file's actual code). +2. **Non-blocking skip (unit):** no `agy` / not authed / IDE-stub-only → the lane emits + `VERDICT: COMMENT` (Skipped), and `allApprove` is not blocked by it. +2b. **Non-blocking skip (porch-orchestrated, end-to-end):** in an actual porch SPIR run with `agy` + missing/unauthed, **phase progression continues** (the gate isn't blocked by the skipped Gemini + lane) — this is the core failure being prevented, so it must be exercised, not just unit-tested. + (Codex.) +3. **`pro` alias:** `consult -m pro` resolves to the `agy` lane (note: the alias name is historical; + the lane uses agy's default model, not necessarily "Pro"). +4. **Binary resolution:** with the IDE symlink first on PATH, Codev still invokes the real CLI. +5. **End-to-end headline path:** run on a spec, a plan, and a real PR. +### Non-Functional +1. Cost/usage degradation (no `NaN`; clear "no per-token data"). +2. `doctor` reports agy presence + auth (authed / needs-login) without hanging. +3. No regression in Codex/Claude lanes; skeleton ↔ `codev/` schema/defaults consistent. + +## Dependencies +- **External:** Antigravity CLI (`agy`, v1.0.4+) + a Google subscription (AI Ultra) login. +- **Internal:** `consult` dispatch + (preserved) prompt builders, `usage-extractor`, `porch` + verdict/gate + consultation config, `doctor`, skeleton protocol JSONs, four-tier resolver. + +## References +- Issue #778. Google blog (Gemini CLI → Antigravity CLI): + https://developers.googleblog.com/an-important-update-transitioning-gemini-cli-to-antigravity-cli/ +- Official CLI install: `https://antigravity.google/cli/install.sh` (Unix) — verified v1.0.4. +- Docs (JS-rendered; not extractable via fetch at spec time): antigravity.google/docs/cli-install, + /cli-using, /cli-reference. Contract above established **empirically** instead. +- Prior related work: #680 (large-prompt handling), #878 (gemini lane model id). + +## Risks and Mitigation +| Risk | P | I | Mitigation | +|---|---|---|---| +| Lane uses Flash (agy default), weaker reviews than Pro | High | Low | **Accepted tradeoff** per architect decision (no pinning, for leanness). Revisit if review quality suffers; Pro could be added later if `agy` exposes a non-interactive selector. | +| Codev launches IDE symlink instead of CLI | Med | High | Pin/verify the real binary; binary-resolution test (#5). | +| Unauthed users block porch | Med | High | Non-blocking skip (C1/C2); doctor + docs guide one-time `agy` login. | +| First-run auth can't run in CI | Med | Med | Treat as one-time user setup; doctor detects "needs login" fast; skip in CI. | +| No token usage → cost reporting breaks | High | Low | Degrade cost rows gracefully (no NaN). | +| `agy` self-updates / contract drifts | Low | Med | Pin observed flags; e2e headline test catches breakage. | +| skeleton/`codev` config drift | Low | Med | Update both; consistency test. | + +## Expert Consultation +**Iteration 1 (2026-06-01, on the prior Approach-A draft):** Gemini REQUEST_CHANGES (filesystem +access), Codex REQUEST_CHANGES (porch skip semantics, enterprise contradiction, doctor scope), +Claude APPROVE. The porch-skip and graceful-cost findings are carried forward; the filesystem-access +concern is now **moot** because Approach B preserves agentic file-reading by design. +**Iteration 2 (2026-06-02, on this Approach-B spec):** Codex **REQUEST_CHANGES**, Gemini **COMMENT**, +Claude **REQUEST_CHANGES**. All substantive points addressed (see Iteration-2 Decisions + rebuttal +`778-specify-iter2-rebuttals.md`): fixed the stale Desired-State "Pro" line (unanimous); stated the +model id stays `gemini` (Claude); adopted the concrete `COMMENT`-verdict skip mechanism (Gemini, +resolving Codex's observable-contract ask); added fast auth-skip, binary rejection rule, timeout +ownership, `extractReviewText` adaptation, `hermes` precedent, `pro`-alias decision, harness +distinction, and a porch-orchestrated progression test. Code claims verified against the tree. + +## Approval +- [ ] Architect review (spec-approval gate) — re-presented for Approach B +- [ ] Expert AI consultation on the Approach-B spec (iteration 2) + +## Notes +Architect noted the work was "over-scoped as full SPIR" — this rewrite is deliberately lean (backend +swap + auth + skip safety + cost degradation). Plan sequencing: (1) `agy` dispatch in the gemini lane +(real-binary resolution, `--print --sandbox --add-dir`, role inlined, agy's default model) + +non-blocking skip; (2) graceful cost/usage degradation; (3) doctor + docs + tests; keep +skeleton/`codev` in lockstep. + +--- + +## Amendments + diff --git a/codev/state/spir-778_thread.md b/codev/state/spir-778_thread.md new file mode 100644 index 000000000..c948629f0 --- /dev/null +++ b/codev/state/spir-778_thread.md @@ -0,0 +1,146 @@ +# spir-778 — Gemini CLI → Antigravity CLI migration + +## Context +Issue #778: Gemini CLI is being retired **June 18, 2026** (today is 2026-06-01 — 17 days out). +Google blog: transition Gemini CLI → Antigravity CLI. Codev uses `gemini` as a consult backend. + +## Research findings (Specify phase) +- **What breaks June 18**: subscription/OAuth serving via the Gemini CLI & Code Assist for + Google AI Pro / Ultra / free-individual tiers. Enterprise (Standard/Enterprise licenses, + Google Cloud) unaffected. +- **What survives**: the Gemini **Developer API** (GEMINI_API_KEY, Google AI Studio) and Vertex AI. + The API is NOT deprecated. (Wrinkle: from June 19, unrestricted API keys are blocked — keys must + be scoped to the Generative Language API.) +- **Antigravity CLI (`agy`)**: Go-based, agentic, async, multi-agent. NOT a clean drop-in for + Codev's one-shot `gemini --output-format json --model X` pattern. Headless/JSON support + unconfirmed; not on any public package manager as of late May 2026. Migration guide page + (antigravity.google/docs/gcli-migration) had no extractable technical detail yet. + +## Codev's Gemini surface area (ground truth) +- `packages/codev/src/commands/consult/index.ts:37-40` — MODEL_CONFIGS.gemini = + `{ cli: 'gemini', args: ['--model','gemini-3.1-pro-preview'], envVar: 'GEMINI_SYSTEM_MD' }`; + spawn at ~682 with `--output-format json`, prompt via stdin, NODE_OPTIONS heap bump (#680). +- `packages/codev/src/lib/config.ts:88` — default consult models `['gemini','codex','claude']`. +- `packages/codev/src/commands/consult/usage-extractor.ts:19` — pricing key `gemini-3.1-pro`. +- `packages/codev/src/commands/doctor.ts:153-163` (dep check) + `266-274` (auth via `gemini --yolo`). +- `codev-skeleton/protocol-schema.json:155` model enum; `porch/next.ts:51` VALID_MODELS. +- 7 skeleton protocol JSONs reference "gemini"; docs (CLAUDE/AGENTS/README/consult skill). +- ~60 tests across consult/doctor/config/porch. + +## Key decision (for spec-approval gate) +Three migration strategies explored in spec. Leaning: **pivot gemini lane → Gemini Developer API** +(robust, deadline-safe, API not retiring) **with graceful degradation** (claude+codex) when no key. +Antigravity-CLI adoption is risky right now (agentic mismatch, unconfirmed headless, not packaged). +The issue *title* literally says "Gemini CLI > Antigravity CLI" — flagging the divergence to the +architect since my research says the literal Antigravity path is the higher-risk one. + +## Iteration-1 consultation (2026-06-01) +- **Gemini: REQUEST_CHANGES** (fatal): consult prompt builders rely on FILESYSTEM ACCESS + (buildPRQuery writes diff to temp file → "Read the diff file from ${diffPath}"; impl review → + "Explore the filesystem"). A single-shot Gemini API call can't read files. Fix: inline content + (A1) or tool-use loop (A2). Also: enterprise contradiction; decide default-list. +- **Codex: REQUEST_CHANGES** (fatal): porch graceful-skip underspecified — `verdict.ts:27,46-47` + defaults missing/short/error to REQUEST_CHANGES (blocks). Must define non-blocking skip (drop + lane from effective set OR neutral skipped-artifact). Also: enterprise contradiction; doctor + can't locally detect unrestricted keys (relax); scope other gemini surfaces. +- **Claude: APPROVE** with notes: `@google/genai ^1.0.0` ALREADY a dependency (lowers cost); + clarify CLI-keep-vs-remove; `hermes` in VALID_MODELS but not schema enum (divergence precedent); + add `pro` alias test; check Gemini API input-size limit for >500KB diffs. + +## Decisions made in revision +- API is DEFAULT gemini backend; **keep legacy CLI as optional backend** (enterprise not regressed). +- API lane gets **inlined review content** (A1); drop "read from disk" instructions for that backend. + Tool-use loop (A2) = future fidelity upgrade. +- **Keep `gemini` in default lists** + porch-safe graceful skip when uncredentialed (non-blocking). +- Doctor: report presence/reachability + June-19 guidance; no proactive unrestricted-key detection. +- harness.ts Gemini *builder* path = out-of-scope-but-acknowledged; generate-image already API + (unaffected); bench = naming only. + +## Status +- [x] Specify: research + ground-truth map done +- [x] Specify: spec drafted, 3-way consult iter-1, REVISED addressing all REQUEST_CHANGES +- [x] Specify: rebuttal written (778-specify-iter1-rebuttals.md) +- [x] **GATE: spec-approval REQUESTED (2026-06-01) — WAITING FOR HUMAN**. Architect notified. +- [ ] After approval → Plan phase + +## ARCHITECT OVERRODE → Approach B (Antigravity `agy`), 2026-06-02 +Architect rejected the API pivot at the gate. Corrected my stale research: `agy` IS packaged + +documented now. Directive: swap gemini lane → `agy`; PRESERVE agentic file-reading (don't +inline-strip); KEEP Pro model (not flash); SUBSCRIPTION/OAuth (AI Ultra, ~3x cheaper, not API key); +keep porch-safe non-blocking skip + graceful cost degradation; keep it LEAN (CLI swap, not redesign). + +## Empirical agy verification (2026-06-01) — done +Installed real CLI v1.0.4 via official `antigravity.google/cli/install.sh` (SHA512-verified) → +`~/.local/bin/agy`. (PATH `agy` = IDE symlink, NOT the CLI → Codev must pin/verify the real binary.) +- Headless: `agy --print` (`-p`/`--prompt`), `--print-timeout` (5m default). ✅ +- File-reading: `--print --sandbox --add-dir ` reads files NON-interactively WITHOUT + `--dangerously-skip-permissions` (verified: read planted file → BANANA-42). ✅ Use sandbox+add-dir. +- Auth: OAuth/subscription (AI Ultra), one-time interactive browser/code login; token persists in + ~/Library/Application Support/Antigravity. No API key. First-run auth can't run head-less/CI. +- NO `--model` flag → Pro-pinning is OPEN (self-id probe timed out). NO JSON/usage → degrade cost. + NO role flag → fold role into prompt. Instruction-following works. +- harness blocked `--dangerously-skip-permissions` (I added it unprompted) — correct, and we don't + need it. + +## Spec rewritten to Approach B (lean) — committed +Replaced the Approach-A spec wholesale. Pivot rejected→out of scope; kept porch-safe skip + graceful +cost from prior; preserved agentic file-reading prompts. + +## ⏸ AWAITING ARCHITECT (still at spec-approval gate; NOT approved) +1. ~~Pro-pinning~~ **RESOLVED 2026-06-02**: user said "Don't pro-pin. Let agy use its defaults." + → lane uses agy default (Gemini 3.5 Flash). Accepted tradeoff (Flash < Pro) for leanness. This + reverses the earlier "keep Pro" priority — user decided it right after I flagged default=Flash. + Spec updated throughout (directive #2 superseded, open-Q resolved, success/risk/tests adjusted). + NOTE the contradiction with the (re-sent, stale) architect block that still says "keep Pro" — + treating the direct "don't pro-pin" message as the newer authoritative word. +2. **Gate mechanics**: how to re-present — re-run 3-way consult on the B-spec (needs porch to reopen + specify), or approve directly given "keep it lean"? Architect to direct (strict mode: I won't + touch porch state / approve). +User wanted to "clarify" the file-access AskUserQuestion — now empirically answered (sandbox+add-dir, +no dangerous flag). Held the outward consult-rerun + afx notify pending their return. + +## Iteration-2 3-way consult on Approach-B spec (2026-06-02) — architect chose "re-run consult" +Porch couldn't re-consult from the gate (rollback rejected: already in specify; approve=human-only; +done=would bypass gate). So ran the 3 consults MANUALLY (iter2 files), no porch state change. +- **Codex REQUEST_CHANGES**: stale Pro line in Desired State; specify observable skip contract; add + porch-orchestrated progression test; binary-resolution rejection rule. +- **Gemini COMMENT**: same stale Pro; concrete skip = emit `VERDICT: COMMENT` (verdict.ts treats as + non-blocking); auth-hang fast-skip (kill on OAuth URL); E2BIG → hermes temp-file pattern. +- **Claude REQUEST_CHANGES**: stale Pro (must-fix); state model id stays `gemini` (must-fix); + extractReviewText JSON.parse throws on plain text; pro-alias decision; harness GEMINI_HARNESS + distinct; hermes precedent; timeout ownership. +Root miss: when I applied "don't pin Pro" I updated everything EXCEPT the Desired-State bullet — all +3 caught it. Verified verdict.ts (COMMENT non-blocking), extractReviewText (gemini JSON.parse), +hermes (CLI/role-inlined/temp-file) before encoding. +→ Spec revised (added Iteration-2 Decisions section; concrete COMMENT-skip; binary rejection rule; +fast auth-skip; extractReviewText adaptation; hermes precedent; pro-alias kept; harness distinct; +porch-orchestrated test 2b). Rebuttal: 778-specify-iter2-rebuttals.md. Committed. + +## ✅ spec-approval APPROVED by human (2026-06-02) + Amendment A1 added +Architect approved the gate (verified via porch: 778 no longer pending; status shows normal specify). +I did NOT self-approve. + +**Amendment A1 (architect-added at approval): API as CO-EQUAL second backend** (reverses +API-out-of-scope). Gemini lane now supports TWO backends + a selector: +- **agy/OAuth**: agentic file-reading, default model (Flash), cheap. (keeps all iter-2 work) +- **Gemini Developer API/GEMINI_API_KEY** (former A1): inline content, Pro model + (gemini-3.1-pro-preview), parse usageMetadata for REAL cost rows, CI-friendly (env-var, no login). +- **Selector** `consult.gemini.backend: agy|api|auto` — DESIGN in Plan; auto-precedence is a + cost-vs-quality tradeoff → PROPOSE + flag for architect, don't hard-code silently. +Spec amended (Out-of-Scope/Desired/Success/Tests + Amendment A1 section). Committed. + +## Plan phase — DRAFTED (2026-06-02) +Advanced specify→plan (porch done). Drafted codev/plans/778-...md — 4 lean phases (passes +plan_exists/has_phases_json/min_two_phases checks): +1. **agy_backend** — agy --print --sandbox --add-dir; verified binary resolution (reject IDE stub); + role inlined (hermes precedent); plain-text→adapt extractReviewText; graceful cost; Codev-owned + timeout; fast COMMENT-skip on OAuth-URL/unavailable; agy doctor check. +2. **api_backend** — @google/genai generateContent, gemini-3.1-pro-preview (Pro), role→ + systemInstruction, INLINED content (no agentic read), usageMetadata→real cost, GEMINI_API_KEY + env auth (CI-friendly), COMMENT-skip when no key, large-input fallback; api doctor check. +3. **backend_selector** — consult.gemini.backend: agy|api|auto. AUTO PRECEDENCE proposed + (prefer api if key present, else agy if authed, else skip) + FLAGGED for architect (cost-vs- + quality) — not hard-coded silently. +4. **docs_skeleton_e2e** — doctor consolidation, docs/skeleton (model id stays gemini, no rename), + harness.ts noted untouched, e2e headline both backends + porch-progression test. +Next: porch done → Plan's 3-way consult (reviews combined design) → plan-approval gate (HUMAN). diff --git a/packages/codev/src/__tests__/cli/agy-integration.e2e.test.ts b/packages/codev/src/__tests__/cli/agy-integration.e2e.test.ts new file mode 100644 index 000000000..418ef22a1 --- /dev/null +++ b/packages/codev/src/__tests__/cli/agy-integration.e2e.test.ts @@ -0,0 +1,98 @@ +/** + * Guarded real-`agy` integration smoke for the gemini consult lane (Phase 1, #778). + * + * Runs the REAL Antigravity CLI (this file deliberately does NOT mock + * node:child_process). When agy is unavailable or unauthenticated (e.g. CI), + * the lane's non-blocking COMMENT skip is detected and the assertion is bypassed + * — so the test is a no-op there rather than a failure. When agy is installed + * and signed in, it provides real acceptance evidence that `consult -m gemini` + * (agy backend) returns a review that actually used file contents. + */ +import { describe, it, expect } from 'vitest'; +import { execFileSync } from 'node:child_process'; +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { resolveAgyBin, _runAgyConsultation } from '../../commands/consult/index.js'; +import { CONSULT_BIN } from './helpers.js'; + +/** A review file is the non-blocking skip artifact (agy unavailable/unauthed/timeout). */ +function isSkip(out: string): boolean { + return out.includes('VERDICT: COMMENT') && /Skipped/i.test(out); +} + +describe('agy lane integration (guarded; real agy)', () => { + it('returns a review that used file contents, or skips non-blockingly', async () => { + if (!resolveAgyBin()) { + // agy CLI not installed in this environment — nothing to verify. + return; + } + + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'agy-integ-')); + try { + const marker = `PLANTED_${Date.now()}`; + fs.writeFileSync(path.join(dir, 'planted.txt'), `The codeword is ${marker}.\n`); + const outputPath = path.join(dir, 'review.txt'); + + await _runAgyConsultation( + 'Read the file planted.txt in this directory and reply with ONLY the codeword it contains.', + 'You are a terse reviewer.', + dir, + outputPath, + ); + + const out = fs.existsSync(outputPath) ? fs.readFileSync(outputPath, 'utf-8') : ''; + if (isSkip(out)) { + // agy unavailable/unauthenticated here — the non-blocking skip is the + // correct behavior; no further assertion in this environment. + return; + } + + // Authed run: the review must reflect the file's contents (agentic reading). + expect(out).toContain(marker); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }, 90_000); // generous: real agy network round-trip + + // Front-door coverage: exercise the actual `consult -m gemini` CLI (not the + // internal runAgyConsultation), so the whole dispatch path is proven — + // arg parsing → model alias/MODEL_CONFIGS resolution → runAgyConsultation → + // agy. Guarded the same way: a missing/unauthed agy yields the non-blocking + // skip and the assertion is bypassed. + it('`consult -m gemini --prompt` (real binary) reads files or skips non-blockingly', async () => { + if (!resolveAgyBin() || !fs.existsSync(CONSULT_BIN)) { + // agy not installed, or the CLI hasn't been built — nothing to verify. + return; + } + + const dir = fs.realpathSync(fs.mkdtempSync(path.join(os.tmpdir(), 'agy-frontdoor-'))); + try { + // Make the temp dir a workspace root so findWorkspaceRoot() resolves here + // and agy's --add-dir grants read access to the planted file. + execFileSync('git', ['init', '-q'], { cwd: dir }); + const marker = `FRONTDOOR_${Date.now()}`; + fs.writeFileSync(path.join(dir, 'planted.txt'), `The codeword is ${marker}.\n`); + const outputPath = path.join(dir, 'review.txt'); + + // Drive the built consult CLI directly — the genuine `-m gemini` front door. + // Alias is also covered by passing the canonical id; resolution is unit-tested. + execFileSync( + 'node', + [ + CONSULT_BIN, + '-m', 'gemini', + '--prompt', 'Read the file planted.txt in this directory and reply with ONLY the codeword it contains.', + '--output', outputPath, + ], + { cwd: dir, env: { ...process.env, HOME: path.join(dir, 'home') }, stdio: 'pipe', timeout: 150_000 }, + ); + + const out = fs.existsSync(outputPath) ? fs.readFileSync(outputPath, 'utf-8') : ''; + if (isSkip(out)) return; // non-blocking skip — correct when agy is unavailable + expect(out).toContain(marker); + } finally { + fs.rmSync(dir, { recursive: true, force: true }); + } + }, 180_000); // real agy round-trip via a freshly-spawned node CLI process +}); diff --git a/packages/codev/src/__tests__/consult.test.ts b/packages/codev/src/__tests__/consult.test.ts index 8d63dd2e6..9ce997955 100644 --- a/packages/codev/src/__tests__/consult.test.ts +++ b/packages/codev/src/__tests__/consult.test.ts @@ -6,6 +6,21 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import * as fs from 'node:fs'; import * as path from 'node:path'; import { tmpdir } from 'node:os'; +import { EventEmitter } from 'node:events'; + +// Fake child process for agy tests: stdout/stderr emitters + kill, emits on next tick. +function makeFakeAgyProc(opts: { stdout?: string; stderr?: string; code?: number; closeAfter?: boolean }): any { + const proc: any = new EventEmitter(); + proc.stdout = new EventEmitter(); + proc.stderr = new EventEmitter(); + proc.kill = vi.fn(); + setImmediate(() => { + if (opts.stdout) proc.stdout.emit('data', Buffer.from(opts.stdout)); + if (opts.stderr) proc.stderr.emit('data', Buffer.from(opts.stderr)); + if (opts.closeAfter !== false) proc.emit('close', opts.code ?? 0); + }); + return proc; +} // Mock forge module (imported by consult/index.ts) vi.mock('../lib/forge.js', () => ({ @@ -68,37 +83,28 @@ describe('consult command', () => { }); describe('model configuration', () => { - it('should support model aliases', () => { - // The MODEL_ALIASES mapping - const aliases: Record = { - 'pro': 'gemini', - 'gpt': 'codex', - 'opus': 'claude', - }; - - expect(aliases['pro']).toBe('gemini'); - expect(aliases['gpt']).toBe('codex'); - expect(aliases['opus']).toBe('claude'); + it('should support model aliases', async () => { + // Assert the REAL exported alias map (not a hardcoded duplicate). The + // `pro` alias is additionally exercised through the real execution path + // in the agy describe block below. + const { _MODEL_ALIASES } = await import('../commands/consult/index.js'); + expect(_MODEL_ALIASES['pro']).toBe('gemini'); + expect(_MODEL_ALIASES['gpt']).toBe('codex'); + expect(_MODEL_ALIASES['opus']).toBe('claude'); }); - it('should have correct CLI configuration for each model', () => { - // Note: Codex now uses model_instructions_file config flag - // The args are built dynamically in runConsultation, not stored in MODEL_CONFIGS - // Claude uses Agent SDK (not CLI) — see 'Claude Agent SDK integration' tests - // Hermes is invoked via `hermes chat -q` in MODEL_CONFIGS - // Bugfix #370: --yolo removed from MODEL_CONFIGS; added conditionally in - // runConsultation only for protocol mode (not general mode) - const configs: Record = { - gemini: { cli: 'gemini', args: [] }, - codex: { cli: 'codex', args: ['exec', '--full-auto'] }, - hermes: { cli: 'hermes', args: ['chat', '-q'] }, - }; - - expect(configs.gemini.cli).toBe('gemini'); - expect(configs.gemini.args).toEqual([]); - expect(configs.codex.args).toContain('--full-auto'); - expect(configs.hermes.cli).toBe('hermes'); - expect(configs.hermes.args).toEqual(['chat', '-q']); + it('should have correct CLI configuration for each model', async () => { + // Assert the REAL exported config (not a hardcoded fake), so a backend + // change is caught. Claude/Codex use SDKs (not MODEL_CONFIGS). + const { _MODEL_CONFIGS } = await import('../commands/consult/index.js'); + // gemini lane dispatches to the Antigravity CLI (agy) via runAgyConsultation + // (#778): cli marker 'agy', no pinned --model, no system-prompt env var. + expect(_MODEL_CONFIGS.gemini.cli).toBe('agy'); + expect(_MODEL_CONFIGS.gemini.args).not.toContain('--model'); + expect(_MODEL_CONFIGS.gemini.envVar).toBeNull(); + // hermes unchanged. + expect(_MODEL_CONFIGS.hermes.cli).toBe('hermes'); + expect(_MODEL_CONFIGS.hermes.args).toEqual(['chat', '-q']); }); it('should use model_instructions_file for codex (not env var)', () => { @@ -284,30 +290,40 @@ describe('consult command', () => { }); describe('CLI availability check', () => { - it('should check if CLI exists before running', async () => { - // Mock execSync to return not found for gemini - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which gemini')) { - throw new Error('not found'); - } - return Buffer.from(''); - }); - + it('gemini lane skips non-blockingly when the agy CLI is unavailable', async () => { + // The gemini lane uses the Antigravity CLI (agy). When agy is unavailable + // it must NOT throw/block — it emits a non-blocking COMMENT skip so porch + // runs still advance (was: the old gemini-CLI threw on a missing binary). fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); fs.writeFileSync( path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), '# Consultant Role' ); - process.chdir(testBaseDir); - vi.resetModules(); - const { consult } = await import('../commands/consult/index.js'); + const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + const priorBin = process.env.CODEV_AGY_BIN; + process.env.CODEV_AGY_BIN = path.join(testBaseDir, 'no-such-agy'); // does not exist → unavailable + try { + vi.resetModules(); + const { consult } = await import('../commands/consult/index.js'); - await expect( - consult({ model: 'gemini', prompt: 'test' }) - ).rejects.toThrow(/not found/); + let threw = false; + try { + await consult({ model: 'gemini', prompt: 'test' }); + } catch { + threw = true; + } + expect(threw).toBe(false); // non-blocking: resolves, never throws + + const written = stdoutSpy.mock.calls.map(c => String(c[0])).join(''); + expect(written).toContain('VERDICT: COMMENT'); + expect(written).toMatch(/skipped/i); + } finally { + stdoutSpy.mockRestore(); + if (priorBin === undefined) delete process.env.CODEV_AGY_BIN; + else process.env.CODEV_AGY_BIN = priorBin; + } }); }); @@ -707,227 +723,219 @@ describe('consult command', () => { }); }); - describe('Gemini --yolo mode restriction (Bugfix #370)', () => { - it('general mode should NOT pass --yolo to Gemini CLI', async () => { - // Bugfix #370: consult -m gemini general "..." was passing --yolo, allowing - // Gemini to auto-approve file writes in the main worktree. General mode - // consultations must be read-only. - vi.resetModules(); + describe('Gemini lane via Antigravity CLI (agy)', () => { + let agyBin: string; + beforeEach(() => { + // A real (non-IDE) file so resolveAgyBin() accepts the override. + agyBin = path.join(testBaseDir, 'agy-fake'); + fs.writeFileSync(agyBin, '#!/bin/sh\n'); + process.env.CODEV_AGY_BIN = agyBin; fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); fs.writeFileSync( path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), '# Consultant Role' ); process.chdir(testBaseDir); + }); - // Mock execSync so commandExists('gemini') returns true - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - return Buffer.from(''); - }); - - const { spawn } = await import('node:child_process'); - const { consult } = await import('../commands/consult/index.js'); - - await consult({ model: 'gemini', prompt: 'audit all files' }); - - // Verify spawn was called without --yolo - const spawnCalls = vi.mocked(spawn).mock.calls; - const geminiCall = spawnCalls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const args = geminiCall![1] as string[]; - expect(args).not.toContain('--yolo'); + afterEach(() => { + delete process.env.CODEV_AGY_BIN; }); - it('protocol mode should NOT pass --yolo to Gemini CLI', async () => { - // After Bugfix #370 fix (commit 2ea868d0), --yolo is never passed to - // Gemini in any mode — consultations must be read-only. + async function loadAgy() { vi.resetModules(); - - // Clear spawn mock calls from previous tests - const { spawn: spawnBefore } = await import('node:child_process'); - vi.mocked(spawnBefore).mockClear(); - - fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'specs'), { recursive: true }); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'consult-types'), { recursive: true }); - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), - '# Consultant Role' - ); - // resolveProtocolPrompt builds "${type}-review.md", so type 'spec' → 'spec-review.md' - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'consult-types', 'spec-review.md'), - '# Review the spec' - ); - // resolveArchitectQuery needs a spec file matching issue number (padded to 4 digits) - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'specs', '0001-test-feature.md'), - '# Test Feature Spec' - ); - process.chdir(testBaseDir); - - // Mock execSync to return git info for protocol mode queries - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - if (cmd.includes('git')) return Buffer.from(''); - return Buffer.from(''); - }); - - const { spawn } = await import('node:child_process'); + const cp = await import('node:child_process'); const { consult } = await import('../commands/consult/index.js'); + return { consult, spawn: vi.mocked(cp.spawn) }; + } + + it('invokes agy with --print --sandbox --add-dir (agentic, never the IDE/yolo)', async () => { + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); - // type 'spec' resolves to template 'spec-review.md' - // --issue required from architect context - await consult({ model: 'gemini', type: 'spec', issue: '1' }); + await consult({ model: 'gemini', prompt: 'review this' }); - // Verify spawn was called WITHOUT --yolo (never used in any mode) - const spawnCalls = vi.mocked(spawn).mock.calls; - const geminiCall = spawnCalls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const args = geminiCall![1] as string[]; - expect(args).not.toContain('--yolo'); + const call = spawn.mock.calls.find(c => c[0] === agyBin); + expect(call).toBeDefined(); + const args = call![1] as string[]; + expect(args).toContain('--print'); + expect(args).toContain('--sandbox'); + expect(args).toContain('--add-dir'); + // Safety (replaces the #370 --yolo concern): never auto-approve all tools. + expect(args).not.toContain('--dangerously-skip-permissions'); }); - }); - describe('Gemini large-prompt crash mitigation (Bugfix #680)', () => { - // V8 old-space exhaustion crashed gemini-cli v0.37.x on PR diffs >500KB. - // Fix: bump heap via NODE_OPTIONS and pipe the prompt via stdin (no argv). + it('scopes --add-dir to workspace + a dedicated subdir, never the whole OS temp dir', async () => { + // Security (#778 CMAP): granting the entire tmpdir() would expose unrelated + // /tmp files to the sandboxed reviewer. Grant only the consult sandbox subdir. + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); + + await consult({ model: 'gemini', prompt: 'review this' }); + + const call = spawn.mock.calls.find(c => c[0] === agyBin); + expect(call).toBeDefined(); + const args = call![1] as string[]; + const grantedDirs = args.filter((_a, i) => args[i - 1] === '--add-dir'); + // Never grant the entire OS temp dir. + expect(grantedDirs).not.toContain(tmpdir()); + // Exactly one granted dir is a dedicated, owned consult sandbox subdir under tmp. + expect(grantedDirs.some(d => d.startsWith(tmpdir()) && /[/\\]codev-consult-/.test(d))).toBe(true); + }); - it('should bump NODE_OPTIONS heap when spawning gemini', async () => { - vi.resetModules(); - const { spawn: spawnBefore } = await import('node:child_process'); - vi.mocked(spawnBefore).mockClear(); + it('routes the `pro` alias through the real execution path to the agy lane', async () => { + // `pro` → gemini → agy: exercise the actual resolution, not a hardcoded map. + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), - '# Consultant Role' - ); - process.chdir(testBaseDir); + await consult({ model: 'pro', prompt: 'review this' }); - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - return Buffer.from(''); - }); + const call = spawn.mock.calls.find(c => c[0] === agyBin); + expect(call).toBeDefined(); // resolved to the agy backend + expect(call![1] as string[]).toContain('--print'); + }); - const { spawn } = await import('node:child_process'); - const { consult } = await import('../commands/consult/index.js'); + it('folds the reviewer role into the prompt (no GEMINI_SYSTEM_MD env)', async () => { + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); - await consult({ model: 'gemini', prompt: 'review this PR' }); + await consult({ model: 'gemini', prompt: 'UNIQUE_QUERY_MARKER' }); - const geminiCall = vi.mocked(spawn).mock.calls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const spawnOpts = geminiCall![2] as { env?: Record }; - expect(spawnOpts.env).toBeDefined(); - expect(spawnOpts.env!.NODE_OPTIONS).toContain('--max-old-space-size=8192'); + const call = spawn.mock.calls.find(c => c[0] === agyBin); + expect(call).toBeDefined(); + const args = call![1] as string[]; + const promptArg = args[args.length - 1]; + expect(promptArg).toContain('UNIQUE_QUERY_MARKER'); // query inlined + expect(promptArg).toContain('Consultant Role'); // role folded in + const opts = call![2] as { env?: Record }; + expect(opts.env?.GEMINI_SYSTEM_MD).toBeUndefined(); }); - it('should NOT pass the query as a positional argv to gemini', async () => { - // Large queries on argv risk E2BIG and force V8 to hold the prompt twice. - // The query must flow through stdin, not argv. - vi.resetModules(); - const { spawn: spawnBefore } = await import('node:child_process'); - vi.mocked(spawnBefore).mockClear(); + it('writes a very large prompt to a temp file instead of argv (E2BIG safety)', async () => { + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), - '# Consultant Role' - ); - process.chdir(testBaseDir); - - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - return Buffer.from(''); - }); + const huge = 'X'.repeat(200_000); + await consult({ model: 'gemini', prompt: huge }); - const { spawn } = await import('node:child_process'); - const { consult } = await import('../commands/consult/index.js'); + const call = spawn.mock.calls.find(c => c[0] === agyBin); + expect(call).toBeDefined(); + const args = call![1] as string[]; + const promptArg = args[args.length - 1]; + expect(promptArg).not.toContain(huge); // not inlined on argv + expect(promptArg).toMatch(/Read the full consultation prompt from this file/); + }); - const uniqueQuery = 'UNIQUE_BUGFIX_680_SENTINEL_' + Date.now(); - await consult({ model: 'gemini', prompt: uniqueQuery }); + it('passes plain-text agy output through as the review', async () => { + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); + spawn.mockReturnValueOnce(makeFakeAgyProc({ stdout: 'PLAINTEXT_REVIEW_BODY', code: 0 })); - const geminiCall = vi.mocked(spawn).mock.calls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const args = geminiCall![1] as string[]; - expect(args.some(a => a.includes(uniqueQuery))).toBe(false); + const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + try { + await consult({ model: 'gemini', prompt: 'review' }); + const written = stdoutSpy.mock.calls.map(c => String(c[0])).join(''); + expect(written).toContain('PLAINTEXT_REVIEW_BODY'); + } finally { + stdoutSpy.mockRestore(); + } }); - it('should pipe the query to stdin instead of argv', async () => { - // stdio[0] must be 'pipe' for gemini (so we can write the prompt), not 'ignore'. - vi.resetModules(); - const { spawn: spawnBefore } = await import('node:child_process'); - vi.mocked(spawnBefore).mockClear(); + it('skips non-blockingly (VERDICT: COMMENT) when agy is unauthenticated', async () => { + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); + spawn.mockReturnValueOnce(makeFakeAgyProc({ + stderr: 'Authentication required. Please visit the URL to log in:\nhttps://accounts.google.com/o/oauth2/auth?client_id=x', + closeAfter: false, + })); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), - '# Consultant Role' - ); - process.chdir(testBaseDir); + const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + try { + let threw = false; + try { await consult({ model: 'gemini', prompt: 'review' }); } catch { threw = true; } + expect(threw).toBe(false); // non-blocking + const written = stdoutSpy.mock.calls.map(c => String(c[0])).join(''); + expect(written).toContain('VERDICT: COMMENT'); + expect(written).toMatch(/not authenticated/i); + } finally { + stdoutSpy.mockRestore(); + } + }); - const { execSync } = await import('node:child_process'); - vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - return Buffer.from(''); - }); + it('skips non-blockingly when agy times out producing the review (non-response message)', async () => { + // On a heavy agentic task that outruns --print-timeout, agy returns a + // "timed out waiting for response" message (not a review) — treat as a skip. + const { consult, spawn } = await loadAgy(); + spawn.mockClear(); + spawn.mockReturnValueOnce(makeFakeAgyProc({ + stdout: 'An background process has been started to run `agy --sandbox`.\nError: timed out waiting for response', + code: 0, + })); + + const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + try { + let threw = false; + try { await consult({ model: 'gemini', prompt: 'review' }); } catch { threw = true; } + expect(threw).toBe(false); // non-blocking + const written = stdoutSpy.mock.calls.map(c => String(c[0])).join(''); + expect(written).toContain('VERDICT: COMMENT'); + expect(written).toMatch(/timed out/i); + } finally { + stdoutSpy.mockRestore(); + } + }); + }); - const { spawn } = await import('node:child_process'); - const { consult } = await import('../commands/consult/index.js'); + describe('agy binary resolution (resolveAgyBin / isRealAgyCli)', () => { + afterEach(() => { delete process.env.CODEV_AGY_BIN; }); - await consult({ model: 'gemini', prompt: 'small prompt' }); + it('isRealAgyCli accepts a real standalone binary', async () => { + const { isRealAgyCli } = await import('../commands/consult/index.js'); + const real = path.join(testBaseDir, 'agy-real'); + fs.writeFileSync(real, '#!/bin/sh\n'); + expect(isRealAgyCli(real)).toBe(true); + }); - const geminiCall = vi.mocked(spawn).mock.calls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const spawnOpts = geminiCall![2] as { stdio?: Array }; - expect(spawnOpts.stdio).toBeDefined(); - expect(spawnOpts.stdio![0]).toBe('pipe'); + it('isRealAgyCli rejects a nonexistent path', async () => { + const { isRealAgyCli } = await import('../commands/consult/index.js'); + expect(isRealAgyCli(path.join(testBaseDir, 'nope-agy'))).toBe(false); }); - it('should preserve the caller NODE_OPTIONS when appending max-old-space-size', async () => { - vi.resetModules(); - const { spawn: spawnBefore } = await import('node:child_process'); - vi.mocked(spawnBefore).mockClear(); + it('isRealAgyCli rejects the Antigravity IDE launcher symlink', async () => { + const { isRealAgyCli } = await import('../commands/consult/index.js'); + // Simulate the IDE: a symlink whose target is under Antigravity.app. + const ideDir = path.join(testBaseDir, 'Antigravity.app', 'Contents', 'Resources', 'app', 'bin'); + fs.mkdirSync(ideDir, { recursive: true }); + const ideTarget = path.join(ideDir, 'antigravity'); + fs.writeFileSync(ideTarget, '#!/bin/sh\n'); + const link = path.join(testBaseDir, 'agy-ide-link'); + fs.symlinkSync(ideTarget, link); + expect(isRealAgyCli(link)).toBe(false); + }); - fs.mkdirSync(path.join(testBaseDir, 'codev', 'roles'), { recursive: true }); - fs.writeFileSync( - path.join(testBaseDir, 'codev', 'roles', 'consultant.md'), - '# Consultant Role' - ); - process.chdir(testBaseDir); + it('resolveAgyBin honors a valid CODEV_AGY_BIN override, rejects an invalid one', async () => { + const { resolveAgyBin } = await import('../commands/consult/index.js'); + const real = path.join(testBaseDir, 'agy-override'); + fs.writeFileSync(real, '#!/bin/sh\n'); + process.env.CODEV_AGY_BIN = real; + expect(resolveAgyBin()).toBe(real); + process.env.CODEV_AGY_BIN = path.join(testBaseDir, 'missing-agy'); + expect(resolveAgyBin()).toBeNull(); + }); + it('agyRespondsToVersion behaviorally verifies a PATH candidate (--version)', async () => { + // A bare PATH `agy` is only accepted if it behaves like the headless CLI. const { execSync } = await import('node:child_process'); + const { agyRespondsToVersion } = await import('../commands/consult/index.js'); vi.mocked(execSync).mockImplementation((cmd: string) => { - if (cmd.includes('which')) return Buffer.from('/usr/bin/gemini'); - return Buffer.from(''); + if (cmd.includes('good-agy')) return '1.0.4\n' as unknown as Buffer; // prints a version + if (cmd.includes('bad-agy')) return '' as unknown as Buffer; // no version output + throw new Error('not a known command'); }); - - const priorNodeOptions = process.env.NODE_OPTIONS; - process.env.NODE_OPTIONS = '--enable-source-maps'; - try { - const { spawn } = await import('node:child_process'); - const { consult } = await import('../commands/consult/index.js'); - - await consult({ model: 'gemini', prompt: 'test' }); - - const geminiCall = vi.mocked(spawn).mock.calls.find(call => call[0] === 'gemini'); - expect(geminiCall).toBeDefined(); - const spawnOpts = geminiCall![2] as { env?: Record }; - expect(spawnOpts.env!.NODE_OPTIONS).toContain('--enable-source-maps'); - expect(spawnOpts.env!.NODE_OPTIONS).toContain('--max-old-space-size=8192'); - } finally { - if (priorNodeOptions === undefined) { - delete process.env.NODE_OPTIONS; - } else { - process.env.NODE_OPTIONS = priorNodeOptions; - } - } + expect(agyRespondsToVersion('good-agy')).toBe(true); + expect(agyRespondsToVersion('bad-agy')).toBe(false); + expect(agyRespondsToVersion('throws-agy')).toBe(false); }); }); diff --git a/packages/codev/src/__tests__/doctor.test.ts b/packages/codev/src/__tests__/doctor.test.ts index fea95029d..2676375e6 100644 --- a/packages/codev/src/__tests__/doctor.test.ts +++ b/packages/codev/src/__tests__/doctor.test.ts @@ -3,7 +3,7 @@ */ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; -import { execSync, spawnSync } from 'node:child_process'; +import { execSync, spawnSync, spawn } from 'node:child_process'; import * as fs from 'node:fs'; import * as path from 'node:path'; import { tmpdir } from 'node:os'; @@ -34,10 +34,32 @@ vi.mock('../lib/forge.js', () => ({ resolveAllConcepts: resolveAllConceptsMock, })); +// A minimal fake child process for the async `spawn`-based agy auth probe. +// verifyAgy streams stdout/stderr and kills on the OAuth marker. +const makeFakeChild = vi.hoisted(() => (opts: { stdout?: string; stderr?: string; code?: number | null }) => { + const procH: Record void> = {}; + const outH: Record void> = {}; + const errH: Record void> = {}; + const child = { + stdout: { on: (ev: string, cb: (b: Buffer) => void) => { outH[ev] = cb; } }, + stderr: { on: (ev: string, cb: (b: Buffer) => void) => { errH[ev] = cb; } }, + on: (ev: string, cb: (arg?: unknown) => void) => { procH[ev] = cb; }, + kill: () => {}, + }; + setImmediate(() => { + if (opts.stdout) outH['data']?.(Buffer.from(opts.stdout)); + if (opts.stderr) errH['data']?.(Buffer.from(opts.stderr)); + procH['close']?.(opts.code ?? 0); + }); + return child; +}); + // Mock child_process vi.mock('node:child_process', () => ({ execSync: vi.fn(), spawnSync: vi.fn(), + // Default for the async agy auth probe: reply "OK" → operational. + spawn: vi.fn(() => makeFakeChild({ stdout: 'OK', code: 0 })), })); // Mock Claude Agent SDK - returns success by default @@ -184,6 +206,7 @@ describe('doctor command', () => { output: [null, 'working', ''], pid: 0, })), + spawn: vi.fn(() => makeFakeChild({ stdout: 'OK', code: 0 })), })); const { doctor } = await import('../commands/doctor.js'); @@ -193,7 +216,8 @@ describe('doctor command', () => { }); it('should return 1 when no AI CLI is available', async () => { - // Mock all core deps present but no AI CLIs + // Mock all core deps present but no AI CLIs (incl. agy unavailable). + process.env.CODEV_AGY_BIN = path.join(tmpdir(), `no-such-agy-${Date.now()}`); vi.mocked(execSync).mockImplementation((cmd: string) => { if (cmd.includes('which claude') || cmd.includes('which gemini') || cmd.includes('which codex')) { throw new Error('not found'); @@ -233,7 +257,12 @@ describe('doctor command', () => { })() ); - const result = await doctor(); + let result: number; + try { + result = await doctor(); + } finally { + delete process.env.CODEV_AGY_BIN; + } expect(result).toBe(1); }); }); @@ -795,7 +824,14 @@ describe('doctor command', () => { expect(hasAuthError).toBe(true); }); - it('should show timeout message for network issues', async () => { + it('reports "needs login" promptly when agy is unauthenticated (fast OAuth detection)', async () => { + // The gemini lane verifies via agy. An unauthenticated agy prints an OAuth + // URL and waits; verifyAgy streams the output and must detect it on the + // early stream (not stall for the full timeout), reporting "needs login". + const agyBin = path.join(testBaseDir, 'agy-fake'); + fs.writeFileSync(agyBin, '#!/bin/sh\n'); + process.env.CODEV_AGY_BIN = agyBin; + vi.mocked(execSync).mockImplementation((cmd: string) => { if (cmd.includes('which')) { return Buffer.from('/usr/bin/command'); @@ -806,45 +842,11 @@ describe('doctor command', () => { return Buffer.from(''); }); - vi.mocked(spawnSync).mockImplementation((cmd: string, args?: string[]) => { - // Gemini version check succeeds, but auth check times out - if (cmd === 'gemini') { - // Version check (--version) succeeds - if (args?.includes('--version')) { - return { - status: 0, - stdout: '0.1.0', - stderr: '', - signal: null, - output: [null, '0.1.0', ''], - pid: 0, - }; - } - // Auth check (--yolo) times out - return { - status: null, - stdout: '', - stderr: '', - signal: 'SIGTERM', - output: [null, '', ''], - pid: 0, - }; - } - - const responses: Record = { - 'node': 'v20.0.0', - 'tmux': 'tmux 3.4', - 'git': 'git version 2.40.0', - }; - return { - status: 0, - stdout: responses[cmd] || 'working', - stderr: '', - signal: null, - output: [null, responses[cmd] || 'working', ''], - pid: 0, - }; - }); + // agy --print emits the OAuth URL on stderr (then would hang) → fast skip. + vi.mocked(spawn).mockReturnValue(makeFakeChild({ + stderr: 'Authentication required. Please visit the URL to log in:\nhttps://accounts.google.com/o/oauth2/auth?client_id=x', + code: null, + }) as unknown as ReturnType); vi.resetModules(); @@ -854,14 +856,19 @@ describe('doctor command', () => { logOutput.push(args.join(' ')); }); - const { doctor } = await import('../commands/doctor.js'); - await doctor(); + try { + const { doctor } = await import('../commands/doctor.js'); + await doctor(); - // Should show timeout with network hint - const hasTimeoutHint = logOutput.some(line => - line.includes('Gemini') && (line.includes('timeout') || line.includes('network')) - ); - expect(hasTimeoutHint).toBe(true); + // The Gemini (agy) line should report "needs login". + const hasNeedsLogin = logOutput.some(line => + line.includes('Gemini') && line.includes('needs login') + ); + expect(hasNeedsLogin).toBe(true); + } finally { + delete process.env.CODEV_AGY_BIN; + vi.mocked(spawn).mockReset(); + } }); it('should show operational when Codex login status succeeds', async () => { diff --git a/packages/codev/src/commands/consult/__tests__/metrics.test.ts b/packages/codev/src/commands/consult/__tests__/metrics.test.ts index 767fe779c..7adae5a85 100644 --- a/packages/codev/src/commands/consult/__tests__/metrics.test.ts +++ b/packages/codev/src/commands/consult/__tests__/metrics.test.ts @@ -141,72 +141,19 @@ describe('MetricsDB summary', () => { }); // Test 3: extractUsage() for Gemini parses JSON output -describe('extractUsage for Gemini', () => { - it('extracts token counts and computes cost from single-model JSON output', () => { - const geminiOutput = JSON.stringify({ - response: 'Review text', - stats: { - models: { - 'gemini-3-flash-preview': { - tokens: { prompt: 8000, candidates: 500, cached: 2000, thoughts: 100 }, - }, - }, - }, - }); - const usage = extractUsage('gemini', geminiOutput); - expect(usage).not.toBeNull(); - expect(usage!.inputTokens).toBe(8000); - expect(usage!.cachedInputTokens).toBe(2000); - expect(usage!.outputTokens).toBe(500); - expect(usage!.costUsd).toBeGreaterThan(0); - }); - - it('sums tokens across multiple models', () => { - const geminiOutput = JSON.stringify({ - response: 'Review text', - stats: { - models: { - 'gemini-2.5-flash-lite': { - tokens: { prompt: 3000, candidates: 50, cached: 0 }, - }, - 'gemini-3-flash-preview': { - tokens: { prompt: 5000, candidates: 200, cached: 1000 }, - }, - }, - }, - }); - const usage = extractUsage('gemini', geminiOutput); - expect(usage).not.toBeNull(); - expect(usage!.inputTokens).toBe(8000); - expect(usage!.cachedInputTokens).toBe(1000); - expect(usage!.outputTokens).toBe(250); - expect(usage!.costUsd).toBeGreaterThan(0); +describe('extractUsage for Gemini (agy backend)', () => { + // The gemini lane now uses the Antigravity CLI (agy), which emits plain text + // with no token-usage JSON. Usage degrades gracefully to null (no cost row). + it('returns null for plain-text agy output', () => { + expect(extractUsage('gemini', 'A plain-text review from agy.')).toBeNull(); }); - it('returns null for non-JSON output', () => { - const usage = extractUsage('gemini', 'plain text output'); - expect(usage).toBeNull(); + it('returns null even if the output happens to be JSON (no token data from agy)', () => { + expect(extractUsage('gemini', JSON.stringify({ response: 'text' }))).toBeNull(); }); - it('returns null when stats.models is missing', () => { - const usage = extractUsage('gemini', JSON.stringify({ response: 'text' })); - expect(usage).toBeNull(); - }); - - it('clamps cost to non-negative when cached exceeds input', () => { - const geminiOutput = JSON.stringify({ - response: 'Review', - stats: { - models: { - 'gemini-3-flash-preview': { - tokens: { prompt: 1000, candidates: 100, cached: 3000 }, - }, - }, - }, - }); - const usage = extractUsage('gemini', geminiOutput); - expect(usage).not.toBeNull(); - expect(usage!.costUsd).toBeGreaterThanOrEqual(0); + it('returns null for empty output', () => { + expect(extractUsage('gemini', '')).toBeNull(); }); }); @@ -511,22 +458,16 @@ describe('SQLite write failure', () => { }); // Test 10: Gemini extractReviewText parses JSON response field -describe('Gemini extractReviewText', () => { - it('extracts response field from JSON output', () => { - const rawJson = JSON.stringify({ response: 'This is the review text', stats: {} }); - const text = extractReviewText('gemini', rawJson); - expect(text).toBe('This is the review text'); - }); - - it('returns null for non-JSON output', () => { - const text = extractReviewText('gemini', 'This is plain text'); - expect(text).toBeNull(); +describe('Gemini extractReviewText (agy backend)', () => { + // agy emits plain text that is used as-is — extractReviewText returns null so + // the caller falls back to the raw output (no JSON parsing). + it('returns null (plain text used as-is, no extraction)', () => { + expect(extractReviewText('gemini', 'A plain-text review from agy.')).toBeNull(); }); - it('returns null when response field is missing', () => { - const rawJson = JSON.stringify({ stats: {} }); - const text = extractReviewText('gemini', rawJson); - expect(text).toBeNull(); + it('returns null even for JSON-looking output (no response-field parsing)', () => { + const rawJson = JSON.stringify({ response: 'unused', stats: {} }); + expect(extractReviewText('gemini', rawJson)).toBeNull(); }); }); @@ -610,46 +551,13 @@ describe('Gemini graceful fallback for malformed output', () => { expect(usage).toBeNull(); }); - it('extractUsage computes per-model cost correctly for 3.1 Pro pricing', () => { - const geminiOutput = JSON.stringify({ - response: 'Review', - stats: { - models: { - 'gemini-3.1-pro-preview': { - tokens: { prompt: 1_000_000, candidates: 1_000_000, cached: 0 }, - }, - }, - }, - }); - const usage = extractUsage('gemini', geminiOutput); - expect(usage).not.toBeNull(); - // 3.1 Pro pricing: 1M input * $2.00/1M + 1M output * $12.00/1M = $14.00 - expect(usage!.costUsd).toBeCloseTo(14.00, 1); - }); - - it('gemini lane points at a current Gemini 3.x model identifier (regression: #878)', () => { - // #878: gemini-3-pro-preview was retired by Google on 2026-03-09; every - // CMAP gemini-side fast-failed with an opaque error. Guard against the - // identifier silently regressing back to a retired model. - const modelArg = _MODEL_CONFIGS.gemini.args[_MODEL_CONFIGS.gemini.args.indexOf('--model') + 1]; - expect(modelArg).toBe('gemini-3.1-pro-preview'); - }); - - it('extractUsage computes per-model cost correctly for Flash pricing', () => { - const geminiOutput = JSON.stringify({ - response: 'Review', - stats: { - models: { - 'gemini-3-flash-preview': { - tokens: { prompt: 1_000_000, candidates: 1_000_000, cached: 0 }, - }, - }, - }, - }); - const usage = extractUsage('gemini', geminiOutput); - expect(usage).not.toBeNull(); - // Flash pricing: 1M input * $0.15/1M + 1M output * $0.60/1M = $0.75 - expect(usage!.costUsd).toBeCloseTo(0.75, 1); + it('gemini lane uses the agy backend, no pinned model id (#778, supersedes #878)', () => { + // #878 guarded the pinned Gemini-CLI model id. #778 migrates the lane to the + // Antigravity CLI (agy), which has no --model flag (uses its default). Guard + // that the lane routes to agy and no longer pins a (retirable) model id. + expect(_MODEL_CONFIGS.gemini.cli).toBe('agy'); + expect(_MODEL_CONFIGS.gemini.args).not.toContain('--model'); + expect(_MODEL_CONFIGS.gemini.envVar).toBeNull(); }); }); diff --git a/packages/codev/src/commands/consult/index.ts b/packages/codev/src/commands/consult/index.ts index c481b184d..9a33c811e 100644 --- a/packages/codev/src/commands/consult/index.ts +++ b/packages/codev/src/commands/consult/index.ts @@ -10,7 +10,7 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; import { spawn, execSync, execFileSync } from 'node:child_process'; -import { tmpdir } from 'node:os'; +import { tmpdir, homedir } from 'node:os'; import chalk from 'chalk'; import { query as claudeQuery } from '@anthropic-ai/claude-agent-sdk'; import { Codex } from '@openai/codex-sdk'; @@ -35,7 +35,10 @@ interface ModelConfig { } const MODEL_CONFIGS: Record = { - gemini: { cli: 'gemini', args: ['--model', 'gemini-3.1-pro-preview'], envVar: 'GEMINI_SYSTEM_MD' }, + // gemini dispatches to the Antigravity CLI (`agy`) via runAgyConsultation — + // this entry exists only for model validation and the `pro` alias; its + // cli/args are NOT used for dispatch (agy's binary path is resolved at runtime). + gemini: { cli: 'agy', args: [], envVar: null }, hermes: { cli: 'hermes', args: ['chat', '-q'], envVar: null }, }; @@ -608,6 +611,280 @@ async function runClaudeConsultation( } } +// ── Antigravity CLI (`agy`) backend for the `gemini` lane ────────────────── +// Replaces the retiring Gemini CLI. agy is an agent (reads files from disk via +// --add-dir under --sandbox), OAuth-only, default model = Flash, plain-text +// output (no usage JSON). See spec/plan 778. + +// Markers that indicate agy is NOT authenticated (it prints an OAuth URL and +// waits ~30s for an interactive login that can't complete headlessly). When +// seen, we terminate early and emit a non-blocking COMMENT skip. +export const AGY_OAUTH_MARKERS = [ + 'accounts.google.com/o/oauth2', + 'Authentication required', + 'paste the authorization code', + 'Waiting for authentication', +]; +const AGY_PRINT_TIMEOUT = '5m'; // passed to `agy --print-timeout` +const AGY_TIMEOUT_MS = 6 * 60 * 1000; // Codev-owned hard cap (> agy's own timeout) +// OAuth banner appears before any review text; only scan the early stream. +const AGY_MARKER_SCAN_LIMIT = 8192; +// agy's own print-timeout message: on an agentic task that outruns --print-timeout, +// it returns this (often with a "monitoring the task" note) instead of a review. +// Treat it as a non-response → non-blocking skip rather than a garbage "review". +const AGY_NONRESPONSE_MARKER = 'timed out waiting for response'; + +/** + * Verify a path is the real headless `agy` CLI, not the Antigravity IDE + * launcher. The IDE ships `~/.antigravity/.../bin/agy` as a symlink to the + * Electron app binary (`Antigravity.app/.../antigravity`); resolving it and + * launching it would open the IDE, never produce a `--print` review. We reject + * by realpath WITHOUT executing anything (no risk of launching the GUI). + */ +export function isRealAgyCli(p: string): boolean { + try { + if (!fs.existsSync(p)) return false; + const real = fs.realpathSync(p); + if (real.includes('Antigravity.app')) return false; // IDE app bundle + if (/[/\\]antigravity(\.exe)?$/.test(real)) return false; // IDE launcher binary + return true; + } catch { + return false; + } +} + +/** + * Resolve the real `agy` CLI binary deterministically — never trust a bare + * PATH lookup (a stale shell or the IDE symlink shadows the CLI). Prefers the + * official installer path, then a PATH `agy` verified not to be the IDE. + * Returns null if no valid headless CLI is found. + */ +/** + * Positively verify a candidate behaves like the real headless agy CLI by + * running `--version` (read-only, fast). `isRealAgyCli` rejects the IDE launcher + * by realpath; this adds behavioral verification for an *untrusted* PATH + * candidate so we only run a binary proven to be the CLI. + */ +export function agyRespondsToVersion(bin: string): boolean { + try { + const out = execSync(`"${bin}" --version 2>/dev/null`, { encoding: 'utf-8', timeout: 5000 }).trim(); + return out.length > 0; + } catch { + return false; + } +} + +export function resolveAgyBin(): string | null { + // Explicit override (advanced users / tests): use it if valid, never silently + // fall back to a different binary the user didn't ask for. + const override = process.env.CODEV_AGY_BIN; + if (override) return isRealAgyCli(override) ? override : null; + + // Canonical install path — trusted location; realpath-reject the IDE only. + const preferred = path.join(homedir(), '.local', 'bin', 'agy'); + if (isRealAgyCli(preferred)) return preferred; + + // A bare PATH `agy` is untrusted: require it to NOT be the IDE (realpath) AND + // to behave like the headless CLI (`--version`) before we'll run it. + try { + const found = execSync('command -v agy 2>/dev/null', { encoding: 'utf-8' }).trim(); + if (found && isRealAgyCli(found) && agyRespondsToVersion(found)) return found; + } catch { + // not on PATH + } + return null; +} + +/** Non-blocking skip artifact: porch's verdict parser treats COMMENT as non-blocking. */ +function agySkipContent(reason: string): string { + return [ + '---', + 'VERDICT: COMMENT', + `SUMMARY: Gemini lane skipped — ${reason}`, + 'CONFIDENCE: LOW', + '---', + '', + `The Gemini (Antigravity \`agy\`) reviewer was skipped: ${reason}.`, + 'This is a non-blocking skip; the remaining reviewers still apply. To enable the', + 'Gemini lane, install the CLI (https://antigravity.google/cli/install.sh) and run', + '`agy` once to sign in.', + ].join('\n'); +} + +/** + * Per-process sandbox temp dir for consult artifacts (the PR diff written by + * buildPRQuery, and the large-prompt file written by runAgyConsultation). + * + * Created once per CLI invocation (each `consult` run is its own process), so the + * sandboxed `agy` reviewer can be granted exactly this directory via `--add-dir` + * instead of the entire OS temp dir — keeping the grant scoped to the artifacts + * this flow creates. `mkdtempSync` yields a private, user-owned dir; callers still + * write with mode 0o600 / flag 'wx' to defeat symlink/clobber races. + */ +let _consultSandboxDir: string | null = null; +function consultSandboxDir(): string { + if (!_consultSandboxDir) { + _consultSandboxDir = fs.mkdtempSync(path.join(tmpdir(), 'codev-consult-')); + } + return _consultSandboxDir; +} + +function writeConsultOutput(outputPath: string | undefined, content: string): void { + if (!outputPath || content.length === 0) return; + const outputDir = path.dirname(outputPath); + if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir, { recursive: true }); + fs.writeFileSync(outputPath, content); + console.error(`\nOutput written to: ${outputPath}`); +} + +function recordAgyMetrics( + metricsCtx: MetricsContext | undefined, + startTime: number, + exitCode: number, + errorMessage: string | null, +): void { + if (!metricsCtx) return; + recordMetrics(metricsCtx, { + durationSeconds: (Date.now() - startTime) / 1000, + // agy --print emits plain text, no token usage → cost rows degrade gracefully (null). + inputTokens: null, + cachedInputTokens: null, + outputTokens: null, + costUsd: null, + exitCode, + errorMessage, + }); +} + +/** + * Run the `gemini` consult lane via the Antigravity CLI (`agy --print`). + * Preserves agentic file-reading (--sandbox --add-dir), folds the role into the + * prompt, and NEVER blocks the run: a missing/unauthed/invalid CLI or a + * timeout/error produces a non-blocking COMMENT skip instead of throwing. + */ +async function runAgyConsultation( + queryText: string, + role: string, + workspaceRoot: string, + outputPath?: string, + metricsCtx?: MetricsContext, +): Promise { + const startTime = Date.now(); + + const bin = resolveAgyBin(); + if (!bin) { + const reason = 'agy CLI not found (install: https://antigravity.google/cli/install.sh)'; + const content = agySkipContent(reason); + process.stdout.write(content); + writeConsultOutput(outputPath, content); + recordAgyMetrics(metricsCtx, startTime, 0, reason); + console.error(`\n[gemini (agy) skipped: ${reason}]`); + return; + } + + // agy has no system-prompt flag — fold the role into the prompt (hermes precedent). + const prompt = `${role}\n\n---\n\n${queryText}`; + // Grant the sandboxed agent read access to the workspace AND the dedicated consult + // sandbox dir (where buildPRQuery writes the diff and, below, a large-prompt file + // lands) — NOT the entire OS temp dir, which would over-expose unrelated /tmp files. + const addDirs = [workspaceRoot, consultSandboxDir()]; + let tempFile: string | null = null; + let promptArg = prompt; + // Large prompts can exceed ARG_MAX (E2BIG) — write to a temp file and point agy at it. + if (prompt.length > CLI_PROMPT_INLINE_MAX_CHARS) { + tempFile = path.join(consultSandboxDir(), `codev-consult-prompt-${Date.now()}.md`); + fs.writeFileSync(tempFile, prompt); + promptArg = [ + `Read the full consultation prompt from this file: ${tempFile}`, + 'You have file access. Read files directly from disk to review code.', + ].join('\n\n'); + } + + const args = ['--print', '--sandbox', '--print-timeout', AGY_PRINT_TIMEOUT]; + for (const d of addDirs) args.push('--add-dir', d); + args.push(promptArg); + + const cleanup = () => { + if (tempFile && fs.existsSync(tempFile)) { + try { fs.unlinkSync(tempFile); } catch { /* best-effort */ } + } + }; + + return new Promise((resolve) => { + const proc = spawn(bin, args, { + cwd: workspaceRoot, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + const outChunks: Buffer[] = []; + let scanBuf = ''; + let settled = false; + + const settleSkip = (reason: string, exitCode = 0) => { + if (settled) return; + settled = true; + clearTimeout(timer); + try { proc.kill('SIGTERM'); } catch { /* already gone */ } + cleanup(); + const content = agySkipContent(reason); + process.stdout.write(content); + writeConsultOutput(outputPath, content); + recordAgyMetrics(metricsCtx, startTime, exitCode, reason); + console.error(`\n[gemini (agy) skipped: ${reason}]`); + resolve(); + }; + + const timer = setTimeout( + () => settleSkip('agy timed out (no response)', 1), + AGY_TIMEOUT_MS, + ); + + const watch = (buf: Buffer, isStdout: boolean) => { + if (isStdout) outChunks.push(buf); + if (scanBuf.length < AGY_MARKER_SCAN_LIMIT) { + scanBuf += buf.toString('utf-8'); + if (AGY_OAUTH_MARKERS.some((m) => scanBuf.includes(m))) { + settleSkip('agy not authenticated — run `agy` once to sign in (OAuth)', 1); + } + } + }; + proc.stdout?.on('data', (b: Buffer) => watch(b, true)); + proc.stderr?.on('data', (b: Buffer) => watch(b, false)); + + proc.on('error', (err) => { + settleSkip(`agy failed to start: ${err.message}`, 1); + }); + + proc.on('close', (code) => { + if (settled) return; + settled = true; + clearTimeout(timer); + cleanup(); + const raw = Buffer.concat(outChunks).toString('utf-8').trim(); + if (code !== 0 || raw.length === 0 || raw.includes(AGY_NONRESPONSE_MARKER)) { + const reason = code !== 0 + ? `agy exited with code ${code}` + : raw.includes(AGY_NONRESPONSE_MARKER) + ? 'agy timed out producing the review' + : 'agy produced no review output'; + const content = agySkipContent(reason); + process.stdout.write(content); + writeConsultOutput(outputPath, content); + recordAgyMetrics(metricsCtx, startTime, code ?? 1, reason); + console.error(`\n[gemini (agy) skipped: ${reason}]`); + resolve(); + return; + } + // Plain-text stdout IS the review. + process.stdout.write(raw); + writeConsultOutput(outputPath, raw); + recordAgyMetrics(metricsCtx, startTime, 0, null); + console.error(`\n[gemini (agy) completed in ${((Date.now() - startTime) / 1000).toFixed(1)}s]`); + resolve(); + }); + }); +} + /** * Run the consultation — dispatches to the correct model runner. */ @@ -639,6 +916,15 @@ async function runConsultation( return; } + // gemini lane → Antigravity CLI (`agy`); handles its own logging, metrics, + // and non-blocking skip (see runAgyConsultation). + if (model === 'gemini') { + const startTime = Date.now(); + await runAgyConsultation(query, role, workspaceRoot, outputPath, metricsCtx); + logQuery(workspaceRoot, model, query, (Date.now() - startTime) / 1000); + return; + } + const config = MODEL_CONFIGS[model]; if (!config) { @@ -651,33 +937,9 @@ async function runConsultation( } let tempFile: string | null = null; - const env: Record = {}; let cmd: string[]; - // When true, the query is written to the child's stdin instead of argv. - // Used for gemini to avoid V8 heap exhaustion on large prompts (#680). - let stdinPayload: string | null = null; - if (model === 'gemini') { - // Gemini uses GEMINI_SYSTEM_MD env var for role - tempFile = path.join(tmpdir(), `codev-role-${Date.now()}.md`); - fs.writeFileSync(tempFile, role); - env['GEMINI_SYSTEM_MD'] = tempFile; - - // Bugfix #680: gemini-cli v0.37.x crashes on large PR diffs (>500KB) due to - // V8 old-space exhaustion in the spawned subprocess. Mitigations: - // 1. Bump heap via NODE_OPTIONS (survives gemini-cli's internal relaunch). - // 2. Pipe the prompt via stdin instead of argv — avoids ARG_MAX and keeps - // V8 from holding the full prompt buffer twice. - env['NODE_OPTIONS'] = [process.env.NODE_OPTIONS ?? '', '--max-old-space-size=8192'] - .join(' ') - .trim(); - stdinPayload = query; - - // Use --output-format json to capture token usage/cost in structured output. - // Never use --yolo — it allows Gemini to write files (#370). - // No positional query arg: prompt arrives on stdin (triggers non-interactive mode). - cmd = [config.cli, '--output-format', 'json', ...config.args]; - } else if (model === 'hermes') { + if (model === 'hermes') { // Hermes does not have a dedicated system prompt flag for single-shot mode. // Include role context at the top of the prompt. const hermesPrompt = `${role}\n\n---\n\n${query}`; @@ -700,30 +962,16 @@ async function runConsultation( throw new Error(`Unknown model: ${model}`); } - // Execute with passthrough stdio. - // Use 'ignore' for stdin when no payload — prevents blocking when spawned as subprocess. - // Use 'pipe' when we need to stream the prompt in (e.g. gemini, see #680). - const fullEnv = { ...process.env, ...env }; + // Execute with passthrough stdio. stdin is 'ignore' (hermes passes its prompt + // via argv) — prevents blocking when spawned as a subprocess. const startTime = Date.now(); - const stdinMode: 'ignore' | 'pipe' = stdinPayload !== null ? 'pipe' : 'ignore'; return new Promise((resolve, reject) => { const proc = spawn(cmd[0], cmd.slice(1), { cwd: workspaceRoot, - env: fullEnv, - stdio: [stdinMode, 'pipe', 'inherit'], + stdio: ['ignore', 'pipe', 'inherit'], }); - if (stdinPayload !== null && proc.stdin) { - proc.stdin.on('error', (err) => { - // EPIPE can happen if the child exits before reading all input — not fatal. - if ((err as NodeJS.ErrnoException).code !== 'EPIPE') { - reject(err); - } - }); - proc.stdin.end(stdinPayload, 'utf-8'); - } - const chunks: Buffer[] = []; if (proc.stdout) { @@ -954,9 +1202,10 @@ function buildPRQuery(prId: string): string { const diff = fetchPRDiff(prId); // Private-per-user dir to avoid world-readable /tmp diffs + symlink/clobber - // races: mkdtempSync creates a fresh dir owned by us; writeFileSync with + // races: consultSandboxDir() is a fresh mkdtempSync dir owned by us (and the + // only temp dir granted to the sandboxed agy reviewer); writeFileSync with // flag 'wx' refuses to follow a symlink or overwrite an existing file. - const diffDir = fs.mkdtempSync(path.join(tmpdir(), 'codev-pr-')); + const diffDir = consultSandboxDir(); const diffPath = path.join(diffDir, `pr-${prId}.diff`); fs.writeFileSync(diffPath, diff, { encoding: 'utf-8', mode: 0o600, flag: 'wx' }); @@ -1661,4 +1910,7 @@ export { composePRQueryText as _composePRQueryText, computePersistentOutputPath as _computePersistentOutputPath, MODEL_CONFIGS as _MODEL_CONFIGS, + MODEL_ALIASES as _MODEL_ALIASES, + runAgyConsultation as _runAgyConsultation, + agySkipContent as _agySkipContent, }; diff --git a/packages/codev/src/commands/consult/usage-extractor.ts b/packages/codev/src/commands/consult/usage-extractor.ts index 523922c64..02541c84f 100644 --- a/packages/codev/src/commands/consult/usage-extractor.ts +++ b/packages/codev/src/commands/consult/usage-extractor.ts @@ -1,30 +1,16 @@ /** * Usage extraction from structured model output * - * Extracts token counts, cost, and review text from Claude SDK results - * and Gemini JSON output. All parsing is wrapped in try/catch — returns - * null on failure, never throws. + * Extracts token counts, cost, and review text from Claude SDK results. * - * Codex usage and review text are captured directly from SDK events in - * runCodexConsultation() — no JSONL parsing needed. - * - * Gemini: Uses --output-format json to get structured output with - * token counts in stats.models. Cost is computed from per-model pricing. + * - Claude: usage comes from the Agent SDK result (total_cost_usd + usage). + * - Codex: usage and review text are captured directly from SDK events in + * runCodexConsultation() — no parsing here. + * - gemini (Antigravity `agy`) and hermes: CLI lanes that emit plain text with + * no token-usage data. Usage degrades gracefully to null (no cost row); the + * review IS the plain-text output (no extraction needed). See spec 778. */ -// Gemini per-model pricing (USD per 1M tokens) -// Maps model name prefixes to pricing tiers. -// Longer prefixes must appear before shorter ones (e.g., flash-lite before flash). -const GEMINI_PRICING: Record = { - 'gemini-3.1-pro': { inputPer1M: 2.00, cachedInputPer1M: 0.50, outputPer1M: 12.00 }, - 'gemini-3-pro': { inputPer1M: 1.25, cachedInputPer1M: 0.315, outputPer1M: 5.00 }, - 'gemini-2.5-pro': { inputPer1M: 1.25, cachedInputPer1M: 0.315, outputPer1M: 5.00 }, - 'gemini-3-flash': { inputPer1M: 0.15, cachedInputPer1M: 0.0375, outputPer1M: 0.60 }, - 'gemini-2.5-flash-lite': { inputPer1M: 0.075, cachedInputPer1M: 0.019, outputPer1M: 0.30 }, - 'gemini-2.5-flash': { inputPer1M: 0.15, cachedInputPer1M: 0.0375, outputPer1M: 0.60 }, -}; -const GEMINI_DEFAULT_PRICING = { inputPer1M: 0.15, cachedInputPer1M: 0.0375, outputPer1M: 0.60 }; - export interface UsageData { inputTokens: number | null; cachedInputTokens: number | null; @@ -55,72 +41,17 @@ function extractClaudeUsage(sdkResult: SDKResultLike): UsageData { }; } -function getGeminiPricing(modelName: string): typeof GEMINI_DEFAULT_PRICING { - for (const [prefix, pricing] of Object.entries(GEMINI_PRICING)) { - if (modelName.startsWith(prefix)) return pricing; - } - return GEMINI_DEFAULT_PRICING; -} - -function extractGeminiUsage(output: string): UsageData | null { - const parsed = JSON.parse(output); - const models = parsed?.stats?.models; - if (!models || typeof models !== 'object') return null; - - const modelKeys = Object.keys(models); - if (modelKeys.length === 0) return null; - - // Sum tokens and cost across all models (Gemini CLI may use multiple) - let totalInput = 0; - let totalCached = 0; - let totalOutput = 0; - let totalCost = 0; - let hasTokenData = false; - - for (const key of modelKeys) { - const tokens = models[key]?.tokens; - if (!tokens) continue; - - const input = typeof tokens.prompt === 'number' ? tokens.prompt : 0; - const cached = typeof tokens.cached === 'number' ? tokens.cached : 0; - const candidates = typeof tokens.candidates === 'number' ? tokens.candidates : 0; - - if (input > 0 || candidates > 0 || cached > 0) hasTokenData = true; - - totalInput += input; - totalCached += cached; - totalOutput += candidates; - - const pricing = getGeminiPricing(key); - const uncached = Math.max(0, input - cached); - totalCost += (uncached / 1_000_000) * pricing.inputPer1M - + (cached / 1_000_000) * pricing.cachedInputPer1M - + (candidates / 1_000_000) * pricing.outputPer1M; - } - - if (!hasTokenData) return null; - - return { - inputTokens: totalInput, - cachedInputTokens: totalCached, - outputTokens: totalOutput, - costUsd: totalCost, - }; -} - /** * Extract token counts and cost from structured model output. - * Returns null if extraction fails entirely (logs warning to stderr). + * Returns null when no token data is available (e.g. the plain-text CLI lanes), + * so cost rows degrade gracefully rather than throwing. */ export function extractUsage(model: string, output: string, sdkResult?: SDKResultLike): UsageData | null { try { if (model === 'claude' && sdkResult) { return extractClaudeUsage(sdkResult); } - if (model === 'gemini') { - return extractGeminiUsage(output); - } - // Codex: usage is captured directly from SDK events in runCodexConsultation() + // codex → captured from SDK events; gemini (agy) / hermes → plain text, no usage. return null; } catch (err) { console.error(`[warn] Failed to extract usage for ${model}: ${err instanceof Error ? err.message : String(err)}`); @@ -130,21 +61,12 @@ export function extractUsage(model: string, output: string, sdkResult?: SDKResul /** * Extract plain-text review content from structured model output. - * Returns null if extraction fails (caller should fall back to raw output). + * Claude/Codex capture text via their SDK loops; the gemini (agy) and hermes + * CLI lanes emit plain text that the caller uses as-is. Returns null so callers + * fall back to the raw output. */ export function extractReviewText(model: string, output: string): string | null { - try { - if (model === 'gemini') { - const parsed = JSON.parse(output); - if (typeof parsed?.response === 'string') { - return parsed.response; - } - return null; - } - - // Claude and Codex: text is captured directly by their SDK streaming loops - return null; - } catch { - return null; - } + void model; + void output; + return null; } diff --git a/packages/codev/src/commands/doctor.ts b/packages/codev/src/commands/doctor.ts index 169b950f3..968d742de 100644 --- a/packages/codev/src/commands/doctor.ts +++ b/packages/codev/src/commands/doctor.ts @@ -4,7 +4,7 @@ * Port of codev/bin/codev-doctor to TypeScript */ -import { execSync, spawnSync } from 'node:child_process'; +import { execSync, spawnSync, spawn } from 'node:child_process'; import { existsSync, readFileSync } from 'node:fs'; import { dirname, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -13,6 +13,7 @@ import { query as claudeQuery } from '@anthropic-ai/claude-agent-sdk'; import { executeForgeCommandSync, loadForgeConfig, validateForgeConfig, resolveAllConcepts, type ConceptResolution } from '../lib/forge.js'; import { detectHarnessFromCommand } from '../agent-farm/utils/harness.js'; import { auditPrGates, formatPrGateWarning } from '../lib/pr-gate-audit.js'; +import { resolveAgyBin, AGY_OAUTH_MARKERS } from './consult/index.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); @@ -149,18 +150,9 @@ const CORE_DEPENDENCIES: Dependency[] = [ // AI CLI dependencies - at least one required // Note: Claude is verified via Agent SDK (not CLI), handled separately below +// Note: the gemini lane now uses the Antigravity CLI (agy) — checked separately +// via resolveAgyBin (the bare `which agy` resolves to the IDE symlink, not the CLI). const AI_DEPENDENCIES: Dependency[] = [ - { - name: 'Gemini', - command: 'gemini', - versionArg: '--version', - versionExtract: () => 'working', - required: false, - installHint: { - macos: 'see github.com/google-gemini/gemini-cli', - linux: 'see github.com/google-gemini/gemini-cli', - }, - }, { name: 'Codex', command: 'codex', @@ -262,16 +254,8 @@ const VERIFY_CONFIGS: Record = { successCheck: (r) => r.status === 0, authHint: 'Run "opencode --version" to verify installation', }, - // Claude is verified via Agent SDK — see verifyClaudeViaSDK() below - 'Gemini': { - // gemini --version verifies the CLI works, but not auth - // A minimal query is needed to verify API connectivity - command: 'gemini', - args: ['--yolo', 'Reply with just OK'], - timeout: 30000, - successCheck: (r) => r.status === 0, - authHint: 'Run: gemini (interactive) then /auth, or set GOOGLE_API_KEY', - }, + // Claude is verified via Agent SDK — see verifyClaudeViaSDK() below. + // The gemini lane (Antigravity `agy`) is verified via verifyAgy() — not here. }; /** @@ -377,6 +361,75 @@ function verifyAiModel(modelName: string): CheckResult { } } +const AGY_INSTALL_HINT = 'install: curl -fsSL https://antigravity.google/cli/install.sh | bash, then run `agy` once to sign in'; + +/** + * Presence check for the Antigravity CLI (agy) — the gemini lane's backend. + * Uses resolveAgyBin (rejects the IDE symlink); never a bare `which agy`. + */ +function checkAgy(): CheckResult { + const bin = resolveAgyBin(); + if (!bin) { + return { status: 'skip', version: 'not installed', note: AGY_INSTALL_HINT }; + } + return { status: 'ok', version: 'CLI' }; +} + +/** + * Verify agy is authenticated via a tiny non-interactive --print probe. + * Streams output and detects the OAuth URL on the *early* stream so an + * unauthenticated agy reports "needs login" promptly (it would otherwise print + * the URL and wait ~30s) — rather than stalling `codev doctor` for the full + * auth wait. Always resolves (never throws). + */ +function verifyAgy(): Promise { + const bin = resolveAgyBin(); + if (!bin) return Promise.resolve({ status: 'skip', version: 'not installed', note: AGY_INSTALL_HINT }); + + return new Promise((resolve) => { + const proc = spawn(bin, ['--print', '--print-timeout', '20s', 'Reply with just OK'], { + stdio: ['ignore', 'pipe', 'pipe'], + }); + let settled = false; + let scan = ''; + const out: string[] = []; + let timer: ReturnType; + + const finish = (r: CheckResult) => { + if (settled) return; + settled = true; + clearTimeout(timer); + try { proc.kill('SIGTERM'); } catch { /* already gone */ } + resolve(r); + }; + + timer = setTimeout( + () => finish({ status: 'fail', version: 'timeout', note: 'check network connection / run `agy` to verify sign-in' }), + 30000, + ); + + const watch = (buf: Buffer, isStdout: boolean) => { + const s = buf.toString('utf-8'); + if (isStdout) out.push(s); + if (scan.length < 8192) { + scan += s; + // Fast path: OAuth URL appears immediately on an unauthenticated run. + if (AGY_OAUTH_MARKERS.some((m) => scan.includes(m))) { + finish({ status: 'fail', version: 'needs login', note: 'run `agy` once to sign in (OAuth)' }); + } + } + }; + proc.stdout?.on('data', (b: Buffer) => watch(b, true)); + proc.stderr?.on('data', (b: Buffer) => watch(b, false)); + proc.on('error', () => finish({ status: 'fail', version: 'error', note: 'run `agy` to verify sign-in' })); + proc.on('close', (code) => { + const text = out.join('').trim(); + if (code === 0 && text.length > 0) finish({ status: 'ok', version: 'operational' }); + else finish({ status: 'fail', version: 'not responding', note: 'run `agy` to verify sign-in' }); + }); + }); +} + /** * Find the project root with a codev/ directory */ @@ -535,7 +588,7 @@ export async function doctor(): Promise { printStatus('Claude', { status: 'ok', version: 'Agent SDK' }); installedAiClis.push('Claude'); - // Check CLI-based AI dependencies (Gemini, Codex) + // Check CLI-based AI dependencies (Codex, OpenCode) for (const dep of AI_DEPENDENCIES) { const result = checkDependency(dep); if (result.status === 'ok') { @@ -544,6 +597,12 @@ export async function doctor(): Promise { printStatus(dep.name, result); } + // gemini lane → Antigravity CLI (agy): custom presence check (resolveAgyBin + // rejects the IDE symlink; a bare `which agy` would resolve the wrong binary). + const agyPresence = checkAgy(); + if (agyPresence.status === 'ok') installedAiClis.push('Gemini (agy)'); + printStatus('Gemini (agy)', agyPresence); + // Verify installed CLIs are actually operational console.log(''); console.log(chalk.bold('AI Model Verification') + ' (checking auth & connectivity)'); @@ -565,8 +624,8 @@ export async function doctor(): Promise { }); } - // Verify CLI-based models - for (const cliName of installedAiClis.filter(n => n !== 'Claude')) { + // Verify CLI-based models (agy handled separately below — custom OAuth probe) + for (const cliName of installedAiClis.filter(n => n !== 'Claude' && n !== 'Gemini (agy)')) { console.log(chalk.blue(` ⋯ ${cliName.padEnd(12)} verifying...`)); process.stdout.write('\x1b[1A\x1b[2K'); @@ -585,6 +644,25 @@ export async function doctor(): Promise { } } + // Verify the gemini lane (agy) via its custom OAuth-aware probe so an + // agy-only setup still counts as an operational model. + if (installedAiClis.includes('Gemini (agy)')) { + console.log(chalk.blue(` ⋯ ${'Gemini (agy)'.padEnd(12)} verifying...`)); + process.stdout.write('\x1b[1A\x1b[2K'); + const agyVerify = await verifyAgy(); + printStatus('Gemini (agy)', agyVerify); + if (agyVerify.status === 'ok') { + aiCliCount++; + } else if (agyVerify.status === 'fail') { + warnings++; + warningDetails.push({ + name: 'Gemini (agy)', + issue: agyVerify.version, + recommendation: agyVerify.note, + }); + } + } + if (aiCliCount === 0) { console.log(''); console.log(chalk.red(' ✗') + ' No AI model operational! Check API keys and authentication.'); diff --git a/packages/codev/src/commands/porch/__tests__/agy-porch-progression.test.ts b/packages/codev/src/commands/porch/__tests__/agy-porch-progression.test.ts new file mode 100644 index 000000000..c71ec9b92 --- /dev/null +++ b/packages/codev/src/commands/porch/__tests__/agy-porch-progression.test.ts @@ -0,0 +1,163 @@ +/** + * Porch-orchestrated phase-progression test for the agy backend (Spec 778). + * + * This is the integration counterpart to agy-skip-progression.test.ts (which pins + * the verdict-parsing contract in isolation). Here we drive the REAL porch entry + * point — `next()` — with on-disk review files, so the whole orchestration path is + * exercised: findReviewFiles → parseVerdict → allApprove → handleVerifyApproved / + * rebuttal. The gemini lane's review file is the genuine `agySkipContent` artifact + * produced when `agy` is missing/unauthenticated/timed-out. + * + * The core failure this defends against: a skipped gemini lane stalling a SPIR + * phase. The skip must be non-blocking — porch must advance on the strength of the + * remaining reviewers (2-way) — yet must NOT mask a genuine REQUEST_CHANGES. + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { tmpdir } from 'node:os'; +import { next } from '../next.js'; +import { writeState, getProjectDir, getStatusPath } from '../state.js'; +import { _agySkipContent } from '../../consult/index.js'; +import type { ProjectState } from '../types.js'; + +// Pin consultation models to the 3-way default so workspace/global config can't +// leak in and change the lane set (mirrors done-verification.test.ts). +vi.mock('../../../lib/config.js', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + loadConfig: (_workspaceRoot: string) => ({ + porch: { consultation: { models: ['gemini', 'codex', 'claude'] } }, + }), + }; +}); + +function createTestDir(): string { + const dir = path.join(tmpdir(), `porch-agy-prog-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`); + fs.mkdirSync(dir, { recursive: true }); + return dir; +} + +function setupProtocol(testDir: string): void { + // Single build_verify phase with a 3-way PR-style consult and a `pr` gate. + // On all-approve, porch requests the `pr` gate (status: gate_pending); on any + // REQUEST_CHANGES it asks for a rebuttal — exactly the two outcomes we assert. + const protocol = { + name: 'agy-prog-proto', + version: '1.0.0', + phases: [ + { + id: 'review', + name: 'Review', + type: 'build_verify', + build: { prompt: 'review.md', artifact: 'codev/reviews/${PROJECT_ID}-*.md' }, + verify: { type: 'pr', models: ['gemini', 'codex', 'claude'] }, + gate: 'pr', + next: null, + }, + ], + }; + const protocolDir = path.join(testDir, 'codev', 'protocols', 'agy-prog-proto'); + fs.mkdirSync(protocolDir, { recursive: true }); + fs.writeFileSync(path.join(protocolDir, 'protocol.json'), JSON.stringify(protocol, null, 2)); +} + +function makeState(overrides: Partial = {}): ProjectState { + return { + id: '0778', + title: 'agy-progression', + protocol: 'agy-prog-proto', + phase: 'review', + plan_phases: [], + current_plan_phase: null, + gates: { pr: { status: 'pending' as const } }, + iteration: 1, + build_complete: true, + history: [], + started_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + ...overrides, + }; +} + +/** Write the three iter-1 review files porch expects, with the given verdicts. */ +function writeReviews( + testDir: string, + state: ProjectState, + verdicts: { gemini: string; codex: string; claude: string }, +): void { + const projectDir = getProjectDir(testDir, state.id, state.title); + fs.mkdirSync(projectDir, { recursive: true }); + const phase = state.current_plan_phase || state.phase; + const write = (model: string, content: string) => + fs.writeFileSync(path.join(projectDir, `${state.id}-${phase}-iter${state.iteration}-${model}.txt`), content); + write('gemini', verdicts.gemini); + write('codex', verdicts.codex); + write('claude', verdicts.claude); +} + +const APPROVE = 'Looks correct and complete; nothing blocking here.\n\n---\nVERDICT: APPROVE\nSUMMARY: ok\nCONFIDENCE: HIGH\n---'; +const REQUEST = 'A required behavior is missing and must be fixed before merge.\n\n---\nVERDICT: REQUEST_CHANGES\nSUMMARY: missing\nCONFIDENCE: HIGH\n---'; + +describe('porch progression with a skipped agy/gemini lane (drives next())', () => { + let testDir: string; + let logSpy: ReturnType; + + beforeEach(() => { + testDir = createTestDir(); + setupProtocol(testDir); + logSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + }); + + afterEach(() => { + fs.rmSync(testDir, { recursive: true, force: true }); + logSpy.mockRestore(); + }); + + it('advances (2-way) when gemini is skipped but codex + claude APPROVE', async () => { + const state = makeState(); + const statusPath = getStatusPath(testDir, state.id, state.title); + fs.mkdirSync(path.dirname(statusPath), { recursive: true }); + writeState(statusPath, state); + + // gemini lane = the real skip artifact agy emits when unavailable → COMMENT + writeReviews(testDir, state, { + gemini: _agySkipContent('agy CLI not found'), + codex: APPROVE, + claude: APPROVE, + }); + + const res = await next(testDir, '0778'); + + // Porch advanced: it requested the human `pr` gate ("All reviewers approved!"), + // NOT a rebuttal/re-iteration. The skipped lane did not block progression. + expect(res.status).toBe('gate_pending'); + expect(res.gate).toBe('pr'); + const subjects = (res.tasks ?? []).map(t => t.subject).join(' | '); + expect(subjects).not.toMatch(/rebuttal/i); + expect((res.tasks ?? []).map(t => t.description).join('\n')).toMatch(/All reviewers approved/); + }); + + it('does NOT mask a genuine REQUEST_CHANGES (gemini skipped, codex blocks)', async () => { + const state = makeState(); + const statusPath = getStatusPath(testDir, state.id, state.title); + fs.mkdirSync(path.dirname(statusPath), { recursive: true }); + writeState(statusPath, state); + + writeReviews(testDir, state, { + gemini: _agySkipContent('authentication required (OAuth)'), + codex: REQUEST, + claude: APPROVE, + }); + + const res = await next(testDir, '0778'); + + // The skip is non-blocking, but a real REQUEST_CHANGES still blocks: porch asks + // for a rebuttal rather than advancing to the gate. + expect(res.status).toBe('tasks'); + expect(res.gate).toBeUndefined(); + expect((res.tasks ?? []).map(t => t.subject).join(' | ')).toMatch(/rebuttal/i); + }); +}); diff --git a/packages/codev/src/commands/porch/__tests__/agy-skip-progression.test.ts b/packages/codev/src/commands/porch/__tests__/agy-skip-progression.test.ts new file mode 100644 index 000000000..8f801f797 --- /dev/null +++ b/packages/codev/src/commands/porch/__tests__/agy-skip-progression.test.ts @@ -0,0 +1,55 @@ +import { describe, it, expect } from 'vitest'; +import { parseVerdict, allApprove } from '../verdict'; +import { _agySkipContent } from '../../consult/index.js'; +import type { ReviewResult } from '../types.js'; + +/** + * Phase-progression guarantee for the agy backend (Spec 778). + * + * When the Antigravity CLI (`agy`) is missing, unauthenticated, or times out, the + * gemini consult lane emits a non-blocking skip artifact instead of failing the run. + * Porch parses that artifact as COMMENT, and `allApprove` treats COMMENT as + * non-blocking — so a SPIR/ASPIR/BUGFIX phase still advances on the strength of the + * remaining reviewers (codex + claude). These tests pin that contract end-to-end + * against the REAL skip artifact, so a regression in either the artifact wording or + * the verdict parser is caught. + */ +describe('agy skip is non-blocking for porch progression', () => { + const skipReasons = [ + 'agy CLI not found', + 'authentication required (OAuth)', + 'no response before timeout', + ]; + + for (const reason of skipReasons) { + it(`real skip artifact (${reason}) parses as COMMENT`, () => { + expect(parseVerdict(_agySkipContent(reason))).toBe('COMMENT'); + }); + } + + it('a 3-way phase with gemini skipped still passes (2-way effective)', () => { + const reviews: ReviewResult[] = [ + { model: 'gemini', verdict: parseVerdict(_agySkipContent('agy CLI not found')), file: '/tmp/g.md' }, + { model: 'codex', verdict: 'APPROVE', file: '/tmp/c.md' }, + { model: 'claude', verdict: 'APPROVE', file: '/tmp/cl.md' }, + ]; + expect(reviews[0].verdict).toBe('COMMENT'); + expect(allApprove(reviews)).toBe(true); + }); + + it('the skip does NOT mask a genuine REQUEST_CHANGES from another reviewer', () => { + const reviews: ReviewResult[] = [ + { model: 'gemini', verdict: parseVerdict(_agySkipContent('agy CLI not found')), file: '/tmp/g.md' }, + { model: 'codex', verdict: 'REQUEST_CHANGES', file: '/tmp/c.md' }, + { model: 'claude', verdict: 'APPROVE', file: '/tmp/cl.md' }, + ]; + expect(allApprove(reviews)).toBe(false); + }); + + it('skip artifact is self-describing (names the lane and the remediation)', () => { + const content = _agySkipContent('authentication required'); + expect(content).toMatch(/Gemini lane skipped/); + expect(content).toMatch(/non-blocking/); + expect(content).toMatch(/antigravity\.google/); + }); +});