diff --git a/.github/workflows/e2e-ext-azure-ai-agents.yml b/.github/workflows/e2e-ext-azure-ai-agents.yml new file mode 100644 index 00000000000..682c519d4e3 --- /dev/null +++ b/.github/workflows/e2e-ext-azure-ai-agents.yml @@ -0,0 +1,180 @@ +name: "ext-azure-ai-agents-e2e" + +# E2E tests for the azure.ai.agents CLI extension. +# +# Architecture (identical to local testing): +# Copilot CLI (LLM brain, installed via npm) +# ↕ MCP protocol (stdio, config in ~/.copilot/mcp-config.json) +# cli-interactive-tester (MCP server, manages tmux sessions) +# ↕ tmux +# azd ai agent CLI (under test) +# +# Copilot CLI runs in programmatic mode (-p), reads scenario goals from YAML, +# and drives the terminal via MCP tool — same flow as local testing. +# +# Manual trigger only — switch to PR-trigger once pipeline is stable. +# Scenarios live on branch trangevi/test-scenarios until PR #8524 merges. + +on: + workflow_dispatch: + inputs: + tier: + description: "Which tiers to run" + type: choice + options: + - "0" + - "0+1" + - "0+1+2" + default: "0" + confirm_tier2_cost: + description: "Confirm Tier 2 Azure costs (~$2-5)" + type: boolean + default: false + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +permissions: + contents: read + id-token: write # Required for azure/login OIDC federated credentials (Tier 1/2) + +env: + SCENARIOS_DIR: cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios + AZD_AGENTS_FIXTURES: cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios/fixtures + +jobs: + e2e-test: + name: "E2E scenarios (Tier ${{ inputs.tier }})" + runs-on: ubuntu-22.04 + timeout-minutes: 90 + steps: + - uses: actions/checkout@v6 + with: + # TODO: change to 'main' after PR #8524 merges + ref: trangevi/test-scenarios + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - uses: actions/setup-go@v6 + with: + go-version-file: "cli/azd/go.mod" + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Copilot CLI + run: npm install -g @github/copilot + + - name: Build azd + install extension + working-directory: cli/azd + run: | + go build -o ./azd . + export PATH="$PWD:$PATH" + azd extension install azure.ai.agents --source ./extensions/azure.ai.agents + echo "${{ github.workspace }}/cli/azd" >> "$GITHUB_PATH" + + - name: Install cli-interactive-tester + run: | + sudo apt-get install -y tmux + # TODO: confirm repo visibility. If private, use: + # git clone https://x-access-token:${{ secrets.GH_TOKEN }}@github.com/coreai-microsoft/cli-interactive-tester.git + git clone https://github.com/coreai-microsoft/cli-interactive-tester.git /tmp/cli-interactive-tester + cd /tmp/cli-interactive-tester + pip install -e . + + - name: Install uv (for Tier 2 run-local scenario) + if: contains(inputs.tier, '2') + uses: astral-sh/setup-uv@v6 + + - name: Validate Tier 2 cost confirmation + if: contains(inputs.tier, '2') && !inputs.confirm_tier2_cost + run: | + echo "::error::Tier 2 creates Azure resources (~\$2-5). Set confirm_tier2_cost=true to proceed." + exit 1 + + - name: Azure Login + if: contains(inputs.tier, '1') || contains(inputs.tier, '2') + uses: azure/login@v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: GitHub CLI Auth + if: contains(inputs.tier, '1') || contains(inputs.tier, '2') + run: echo "${{ secrets.GH_TOKEN }}" | gh auth login --with-token + + - name: Create test profile + if: contains(inputs.tier, '1') || contains(inputs.tier, '2') + working-directory: ${{ env.SCENARIOS_DIR }} + env: + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + FOUNDRY_PROJECT_ENDPOINT: ${{ secrets.FOUNDRY_PROJECT_ENDPOINT }} + run: | + printf '%s\n' \ + "prefix: \"ci-${{ github.run_number }}\"" \ + "subscription: \"${AZURE_SUBSCRIPTION_ID}\"" \ + "region: \"North Central US\"" \ + "foundry_project_endpoint: \"${FOUNDRY_PROJECT_ENDPOINT}\"" \ + > profile.local.yaml + + - name: Configure MCP for Copilot + run: | + mkdir -p ~/.copilot + printf '%s\n' '{' \ + ' "mcpServers": {' \ + ' "cli-interactive-tester": {' \ + ' "type": "stdio",' \ + ' "command": "bash",' \ + ' "args": ["-c", "cd /tmp/cli-interactive-tester && python -m auto_test_tool.mcp_server"]' \ + ' }' \ + ' }' \ + '}' > ~/.copilot/mcp-config.json + + - name: Run E2E scenarios via Copilot + MCP tool + working-directory: ${{ env.SCENARIOS_DIR }} + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_PAT }} + TIER: ${{ inputs.tier }} + AZD_AGENTS_FIXTURES: ${{ github.workspace }}/${{ env.AZD_AGENTS_FIXTURES }} + UV_HTTP_TIMEOUT: "300" + run: | + if [ ! -f prompt-ci-run.md ]; then + echo "::error::prompt-ci-run.md not found in $(pwd)" + exit 1 + fi + # Copilot CLI in programmatic mode: + # -p = non-interactive, exit on completion + # --no-ask-user = never prompt for user input + # --allow-tool = pre-authorize MCP tool access + # + # TODO: confirm --allow-tool syntax for MCP-registered tools. + # Possible forms: 'cli-interactive-tester(*)', 'cli-interactive-tester' + copilot -p "$(cat prompt-ci-run.md)" \ + --allow-tool='cli-interactive-tester(*)' \ + --no-ask-user + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-results-tier-${{ inputs.tier }} + path: | + ${{ env.SCENARIOS_DIR }}/reports/ + ${{ env.SCENARIOS_DIR }}/full-pipeline-run-results.md + retention-days: 30 + + - name: Teardown (Tier 2 always cleanup) + if: always() && contains(inputs.tier, '2') + working-directory: ${{ env.SCENARIOS_DIR }} + run: | + WORK_DIR=$(ls -d ~/working/azd-agents-shared/*/ 2>/dev/null | head -1) + if [ -z "$WORK_DIR" ]; then + echo "::warning::No working directory found — teardown skipped, check for leaked resources" + exit 0 + fi + cd "$WORK_DIR" && azd down --force --purge diff --git a/cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios/prompt-ci-run.md b/cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios/prompt-ci-run.md new file mode 100644 index 00000000000..d5fe42104ad --- /dev/null +++ b/cli/azd/extensions/azure.ai.agents/tests/cli-interactive-tester-scenarios/prompt-ci-run.md @@ -0,0 +1,114 @@ +# CI Pipeline Prompt — E2E Test Scenarios + +Autonomously run azure.ai.agents CLI test scenarios using the cli-interactive-tester MCP tool. Do NOT ask questions — make all decisions yourself and run to completion. + +## Environment + +- Runner: Ubuntu 22.04 (GitHub Actions) +- azd is on PATH (pre-built in earlier step) +- Azure auth: active if TIER includes 1 or 2 (federated identity via earlier step) +- GitHub CLI auth: active if TIER includes 1 or 2 (earlier step) +- Scenarios directory: current working directory +- All paths are POSIX (Linux runner) + +## Profile Setup + +Read and merge (local overrides shared): +1. `./profile.yaml` +2. `./profile.local.yaml` + +Derive `shared_agent_name = "{prefix}-{shared_agent_suffix}"`. Pass merged map as `session_vars` on every MCP call. + +## Tier Selection + +Check the environment variable `TIER` to determine which tiers to run: +- `TIER=0` → run Phase 0 only +- `TIER=0+1` → run Phase 0 + Phase 1 +- `TIER=0+1+2` → run all phases + +If `TIER` is not set, default to `0`. + +## Run Plan + +Record the START TIME at the beginning. + +### Phase 0 — Tier 0 (offline, no auth, fast) + +Run ALL `00-*.yaml` scenarios sequentially: +1. `00-version.yaml` +2. `00-help-root.yaml` +3. `00-sample-list-text.yaml` +4. `00-sample-list-json-filters.yaml` +5. `00-doctor-empty-dir.yaml` +6. `00-doctor-local-only.yaml` +7. `00-doctor-partial-failure.yaml` +8. `00-init-validate-mutually-exclusive.yaml` +9. `00-init-validate-no-prompt-missing.yaml` +10. `00-invoke-validate-protocol.yaml` +11. `00-eval-context-required.yaml` +12. `00-optimize-apply-requires-candidate.yaml` +13. `00-delete-help.yaml` +14. `00-endpoint-show-help.yaml` +15. `00-code-download-help.yaml` +16. `00-init-picker-navigation.yaml` — interactive picker UX, abort with Ctrl-C + +### Phase 1 — Tier 1 (auth required, scaffold only, NO azd provision) + +1. `10-init-template-python.yaml` +2. `10-init-template-dotnet.yaml` +3. `10-init-deploy-mode-code.yaml` +4. `10-init-deploy-mode-container.yaml` +5. `10-init-from-code.yaml` +6. `10-init-from-manifest-url.yaml` +7. `10-init-flags-agent-name-model.yaml` +8. `10-init-validate-deploy-mode.yaml` + +### Phase 2 — Tier 2 (real Azure resources, strict order) + +1. `20-setup-deploy-shared-agent.yaml` — FIRST +2. `21-show.yaml` +3. `21-show-json.yaml` +4. `22-invoke-remote.yaml` +5. `22-invoke-input-file.yaml` +6. `22-invoke-new-session.yaml` +7. `23-invoke-protocol-invocations.yaml` +8. `23-sessions-lifecycle.yaml` +9. `24-files-lifecycle.yaml` +10. `25-monitor-console.yaml` +11. `25-monitor-system.yaml` +12. `26-endpoint-update.yaml` +13. `27-run-local-and-invoke-local.yaml` — needs two sessions (port allocation) +14. `28-eval-lifecycle.yaml` +15. `29-optimize-submit-and-cancel.yaml` +16. `2A-doctor-provisioned-all-pass.yaml` +17. `2B-endpoint-show.yaml` +18. `2C-code-download.yaml` +19. `2D-delete.yaml` +20. `2Z-teardown-down.yaml` — LAST + +## Rules + +- Per scenario: `load_scenario` → `run_pre_hooks` (if any) → `start_session` → accomplish goals → `finish_session` → `run_post_hooks` (if any). +- Use `run_name=` for each `start_session`. +- Record start/end time for EACH scenario (wall clock). +- If a scenario fails, record FAIL with reason and **continue to next** (do not abort). +- Don't verify/retry after a select — treat select miss as hard failure. +- Prefer `choice_text` over `choice_index`. +- Clear pre-filled text fields before typing (select-all + delete). +- For Tier 2 setup: select Container deploy mode, Python language, Basic Responses template. +- For subscription selection: use the subscription from profile. +- For region: select region from profile. +- For model: select model from profile. + +## Output + +When all scenarios are done, write a markdown report to `./full-pipeline-run-results.md` with: + +1. **Summary table**: total scenarios, PASS/FAIL/SKIP counts per tier +2. **Per-scenario detail**: name, tier, result, duration, notes +3. **Timing**: total wall clock, time per tier, top 5 slowest +4. **Issues**: any bugs, failures, or unexpected behaviors + +Record END TIME and total duration. + +Start now. Begin with Phase 0.