Azure · v1212 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
@@ -0,0 +1,180 @@
+name: "ext-azure-ai-agents-e2e"
+
+# E2E tests for the azure.ai.agents CLI extension.
+#
+# Architecture (identical to local testing):
+#   Copilot CLI (LLM brain, installed via npm)
+#     ↕ MCP protocol (stdio, config in ~/.copilot/mcp-config.json)
+#   cli-interactive-tester (MCP server, manages tmux sessions)
+#     ↕ tmux
+#   azd ai agent CLI (under test)
+#
+# Copilot CLI runs in programmatic mode (-p), reads scenario goals from YAML,
+# and drives the terminal via MCP tool — same flow as local testing.
+#
+# Manual trigger only — switch to PR-trigger once pipeline is stable.
+# Scenarios live on branch trangevi/test-scenarios until PR #8524 merges.
+
+on:
+  workflow_dispatch:
+    inputs:
+      tier:
+        description: "Which tiers to run"
+        type: choice
+        options:
+          - "0"
+          - "0+1"
+          - "0+1+2"
+        default: "0"
+      confirm_tier2_cost:
+        description: "Confirm Tier 2 Azure costs (~$2-5)"
+        type: boolean
+        default: false
+
+concurrency:
+  group: ${{ github.workflow }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+  id-token: write  # Required for azure/login OIDC federated credentials (Tier 1/2)
+
+env:
+  SCENARIOS_DIR: cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios
+  AZD_AGENTS_FIXTURES: cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios/fixtures
+
+jobs:
+  e2e-test:
+    name: "E2E scenarios (Tier ${{ inputs.tier }})"
+    runs-on: ubuntu-22.04
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          # TODO: change to 'main' after PR #8524 merges
+          ref: trangevi/test-scenarios
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - uses: actions/setup-go@v6
+        with:
+          go-version-file: "cli/azd/go.mod"
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Copilot CLI
+        run: npm install -g @github/copilot
+
+      - name: Build azd + install extension
+        working-directory: cli/azd
+        run: |
+          go build -o ./azd .
+          export PATH="$PWD:$PATH"
+          azd extension install azure.ai.agents --source ./extensions/azure.ai.agents
+          echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH"
+
+      - name: Install cli-interactive-tester
+        run: |
+          sudo apt-get install -y tmux
+          # TODO: confirm repo visibility. If private, use:
+          #   git clone https://x-access-token:${{ secrets.GH_TOKEN }}@github.com/coreai-microsoft/cli-interactive-tester.git
+          git clone https://github.com/coreai-microsoft/cli-interactive-tester.git /tmp/cli-interactive-tester
+          cd /tmp/cli-interactive-tester
+          pip install -e .
+
+      - name: Install uv (for Tier 2 run-local scenario)
+        if: contains(inputs.tier, '2')
+        uses: astral-sh/setup-uv@v6
+
+      - name: Validate Tier 2 cost confirmation
+        if: contains(inputs.tier, '2') && !inputs.confirm_tier2_cost
+        run: |
+          echo "::error::Tier 2 creates Azure resources (~\$2-5). Set confirm_tier2_cost=true to proceed."
+          exit 1
+
+      - name: Azure Login
+        if: contains(inputs.tier, '1') || contains(inputs.tier, '2')
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+
+      - name: GitHub CLI Auth
+        if: contains(inputs.tier, '1') || contains(inputs.tier, '2')
+        run: echo "${{ secrets.GH_TOKEN }}" | gh auth login --with-token
+
+      - name: Create test profile
+        if: contains(inputs.tier, '1') || contains(inputs.tier, '2')
+        working-directory: ${{ env.SCENARIOS_DIR }}
+        env:
+          AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+          FOUNDRY_PROJECT_ENDPOINT: ${{ secrets.FOUNDRY_PROJECT_ENDPOINT }}
+        run: |
+          printf '%s\n' \
+            "prefix: \"ci-${{ github.run_number }}\"" \
+            "subscription: \"${AZURE_SUBSCRIPTION_ID}\"" \
+            "region: \"North Central US\"" \
+            "foundry_project_endpoint: \"${FOUNDRY_PROJECT_ENDPOINT}\"" \
+            > profile.local.yaml
+
+      - name: Configure MCP for Copilot
+        run: |
+          mkdir -p ~/.copilot
+          printf '%s\n' '{' \
+            '  "mcpServers": {' \
+            '    "cli-interactive-tester": {' \
+            '      "type": "stdio",' \
+            '      "command": "bash",' \
+            '      "args": ["-c", "cd /tmp/cli-interactive-tester && python -m auto_test_tool.mcp_server"]' \
+            '    }' \
+            '  }' \
+            '}' > ~/.copilot/mcp-config.json
+
+      - name: Run E2E scenarios via Copilot + MCP tool
+        working-directory: ${{ env.SCENARIOS_DIR }}
+        env:
+          COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT }}
+          TIER: ${{ inputs.tier }}
+          AZD_AGENTS_FIXTURES: ${{ github.workspace }}/${{ env.AZD_AGENTS_FIXTURES }}
+          UV_HTTP_TIMEOUT: "300"
+        run: |
+          if [ ! -f prompt-ci-run.md ]; then
+            echo "::error::prompt-ci-run.md not found in $(pwd)"
+            exit 1
+          fi
+          # Copilot CLI in programmatic mode:
+          #   -p = non-interactive, exit on completion
+          #   --no-ask-user = never prompt for user input
+          #   --allow-tool = pre-authorize MCP tool access
+          #
+          # TODO: confirm --allow-tool syntax for MCP-registered tools.
+          # Possible forms: 'cli-interactive-tester(*)', 'cli-interactive-tester'
+          copilot -p "$(cat prompt-ci-run.md)" \
+            --allow-tool='cli-interactive-tester(*)' \
+            --no-ask-user
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-results-tier-${{ inputs.tier }}
+          path: |
+            ${{ env.SCENARIOS_DIR }}/reports/
+            ${{ env.SCENARIOS_DIR }}/full-pipeline-run-results.md
+          retention-days: 30
+
+      - name: Teardown (Tier 2 always cleanup)
+        if: always() && contains(inputs.tier, '2')
+        working-directory: ${{ env.SCENARIOS_DIR }}
+        run: |
+          WORK_DIR=$(ls -d ~/working/azd-agents-shared/*/ 2>/dev/null | head -1)
+          if [ -z "$WORK_DIR" ]; then
+            echo "::warning::No working directory found — teardown skipped, check for leaked resources"
+            exit 0
+          fi
+          cd "$WORK_DIR" && azd down --force --purge
@@ -0,0 +1,114 @@
+# CI Pipeline Prompt — E2E Test Scenarios
+
+Autonomously run azure.ai.agents CLI test scenarios using the cli-interactive-tester MCP tool. Do NOT ask questions — make all decisions yourself and run to completion.
+
+## Environment
+
+- Runner: Ubuntu 22.04 (GitHub Actions)
+- azd is on PATH (pre-built in earlier step)
+- Azure auth: active if TIER includes 1 or 2 (federated identity via earlier step)
+- GitHub CLI auth: active if TIER includes 1 or 2 (earlier step)
+- Scenarios directory: current working directory
+- All paths are POSIX (Linux runner)
+
+## Profile Setup
+
+Read and merge (local overrides shared):
+1. `./profile.yaml`
+2. `./profile.local.yaml`
+
+Derive `shared_agent_name = "{prefix}-{shared_agent_suffix}"`. Pass merged map as `session_vars` on every MCP call.
+
+## Tier Selection
+
+Check the environment variable `TIER` to determine which tiers to run:
+- `TIER=0` → run Phase 0 only
+- `TIER=0+1` → run Phase 0 + Phase 1
+- `TIER=0+1+2` → run all phases
+
+If `TIER` is not set, default to `0`.
+
+## Run Plan
+
+Record the START TIME at the beginning.
+
+### Phase 0 — Tier 0 (offline, no auth, fast)
+
+Run ALL `00-*.yaml` scenarios sequentially:
+1. `00-version.yaml`
+2. `00-help-root.yaml`
+3. `00-sample-list-text.yaml`
+4. `00-sample-list-json-filters.yaml`
+5. `00-doctor-empty-dir.yaml`
+6. `00-doctor-local-only.yaml`
+7. `00-doctor-partial-failure.yaml`
+8. `00-init-validate-mutually-exclusive.yaml`
+9. `00-init-validate-no-prompt-missing.yaml`
+10. `00-invoke-validate-protocol.yaml`
+11. `00-eval-context-required.yaml`
+12. `00-optimize-apply-requires-candidate.yaml`
+13. `00-delete-help.yaml`
+14. `00-endpoint-show-help.yaml`
+15. `00-code-download-help.yaml`
+16. `00-init-picker-navigation.yaml` — interactive picker UX, abort with Ctrl-C
+
+### Phase 1 — Tier 1 (auth required, scaffold only, NO azd provision)
+
+1. `10-init-template-python.yaml`
+2. `10-init-template-dotnet.yaml`
+3. `10-init-deploy-mode-code.yaml`
+4. `10-init-deploy-mode-container.yaml`
+5. `10-init-from-code.yaml`
+6. `10-init-from-manifest-url.yaml`
+7. `10-init-flags-agent-name-model.yaml`
+8. `10-init-validate-deploy-mode.yaml`
+
+### Phase 2 — Tier 2 (real Azure resources, strict order)
+
+1. `20-setup-deploy-shared-agent.yaml` — FIRST
+2. `21-show.yaml`
+3. `21-show-json.yaml`
+4. `22-invoke-remote.yaml`
+5. `22-invoke-input-file.yaml`
+6. `22-invoke-new-session.yaml`
+7. `23-invoke-protocol-invocations.yaml`
+8. `23-sessions-lifecycle.yaml`
+9. `24-files-lifecycle.yaml`
+10. `25-monitor-console.yaml`
+11. `25-monitor-system.yaml`
+12. `26-endpoint-update.yaml`
+13. `27-run-local-and-invoke-local.yaml` — needs two sessions (port allocation)
+14. `28-eval-lifecycle.yaml`
+15. `29-optimize-submit-and-cancel.yaml`
+16. `2A-doctor-provisioned-all-pass.yaml`
+17. `2B-endpoint-show.yaml`
+18. `2C-code-download.yaml`
+19. `2D-delete.yaml`
+20. `2Z-teardown-down.yaml` — LAST
+
+## Rules
+
+- Per scenario: `load_scenario` → `run_pre_hooks` (if any) → `start_session` → accomplish goals → `finish_session` → `run_post_hooks` (if any).
+- Use `run_name=<scenario-stem>` for each `start_session`.
+- Record start/end time for EACH scenario (wall clock).
+- If a scenario fails, record FAIL with reason and **continue to next** (do not abort).
+- Don't verify/retry after a select — treat select miss as hard failure.
+- Prefer `choice_text` over `choice_index`.
+- Clear pre-filled text fields before typing (select-all + delete).
+- For Tier 2 setup: select Container deploy mode, Python language, Basic Responses template.
+- For subscription selection: use the subscription from profile.
+- For region: select region from profile.
+- For model: select model from profile.
+
+## Output
+
+When all scenarios are done, write a markdown report to `./full-pipeline-run-results.md` with:
+
+1. **Summary table**: total scenarios, PASS/FAIL/SKIP counts per tier
+2. **Per-scenario detail**: name, tier, result, duration, notes
+3. **Timing**: total wall clock, time per tier, top 5 slowest
+4. **Issues**: any bugs, failures, or unexpected behaviors
+
+Record END TIME and total duration.
+
+Start now. Begin with Phase 0.