diff --git a/packages/agent/scripts/dev-server.ts b/packages/agent/scripts/dev-server.ts new file mode 100644 index 000000000..80415022d --- /dev/null +++ b/packages/agent/scripts/dev-server.ts @@ -0,0 +1,121 @@ +#!/usr/bin/env tsx +// Boots AgentServer locally for poking with curl. Mocks the PostHog API via +// msw and stamps a fake session so /command works without a real backend or +// agent runtime spawn. Use only for endpoint smoke tests. + +import { execFileSync } from "node:child_process"; +import { generateKeyPairSync } from "node:crypto"; +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import jwt from "jsonwebtoken"; +import { setupServer } from "msw/node"; +import { AgentServer } from "../src/server/agent-server"; +import { SANDBOX_CONNECTION_AUDIENCE } from "../src/server/jwt"; +import { createPostHogHandlers } from "../src/test/mocks/msw-handlers"; + +const PORT = Number(process.env.PORT ?? 3001); +const TASK_ID = process.env.TASK_ID ?? "test-task-id"; +const RUN_ID = process.env.RUN_ID ?? "test-run-id"; +const PROJECT_ID = Number(process.env.PROJECT_ID ?? 1); +const API_URL = "http://localhost:8000"; + +const { privateKey, publicKey } = generateKeyPairSync("rsa", { + modulusLength: 2048, + publicKeyEncoding: { type: "spki", format: "pem" }, + privateKeyEncoding: { type: "pkcs8", format: "pem" }, +}); + +const mswServer = setupServer(...createPostHogHandlers({ baseUrl: API_URL })); +mswServer.listen({ onUnhandledRequest: "bypass" }); + +const repoPath = mkdtempSync(join(tmpdir(), "agent-server-dev-")); +execFileSync("git", ["init", "-q"], { cwd: repoPath }); +execFileSync("git", ["config", "user.email", "dev@local"], { cwd: repoPath }); +execFileSync("git", ["config", "user.name", "Dev"], { cwd: repoPath }); +execFileSync("git", ["config", "commit.gpgsign", "false"], { cwd: repoPath }); +execFileSync("git", ["commit", "-q", "--allow-empty", "-m", "init"], { + cwd: repoPath, +}); + +const server = new AgentServer({ + port: PORT, + jwtPublicKey: publicKey, + repositoryPath: repoPath, + apiUrl: API_URL, + apiKey: "dev-api-key", + projectId: PROJECT_ID, + mode: "interactive", + taskId: TASK_ID, + runId: RUN_ID, +}); + +// start() runs autoInitializeSession() which would spawn a real Claude/Codex +// runtime. We don't need that to exercise the HTTP routes — no-op it. +( + server as unknown as { autoInitializeSession: () => Promise } +).autoInitializeSession = async () => { + console.log("[dev-server] skipped autoInitializeSession"); +}; + +await server.start(); + +// /command's route checks session.payload.run_id; set_token's handler only +// needs `this.session` to be truthy. Stamp the minimum shape that satisfies +// both. Other commands (user_message, cancel, etc.) will still fail because +// they touch session.clientConnection — that's fine. +(server as unknown as { session: unknown }).session = { + payload: { + run_id: RUN_ID, + task_id: TASK_ID, + team_id: PROJECT_ID, + user_id: 0, + distinct_id: "dev", + mode: "interactive", + }, +}; + +const token = jwt.sign( + { + run_id: RUN_ID, + task_id: TASK_ID, + team_id: PROJECT_ID, + user_id: 1, + distinct_id: "dev", + mode: "interactive", + aud: SANDBOX_CONNECTION_AUDIENCE, + }, + privateKey, + { algorithm: "RS256", expiresIn: 3600 }, +); + +const base = `http://localhost:${PORT}`; +console.log(""); +console.log(`agent-server listening on ${base}`); +console.log(`repo: ${repoPath}`); +console.log(""); +console.log(`export TOKEN='${token}'`); +console.log(""); +console.log("# health (no auth)"); +console.log(`curl ${base}/health`); +console.log(""); +console.log("# /gh (loopback only, no auth)"); +console.log( + `curl -X POST ${base}/gh -H 'Content-Type: application/json' -d '{"args":["--version"]}'`, +); +console.log(""); +console.log("# set_token via /command (JWT required)"); +console.log( + `curl -X POST ${base}/command -H "Authorization: Bearer $TOKEN" -H 'Content-Type: application/json' -d '{"jsonrpc":"2.0","id":1,"method":"set_token","params":{"token":"ghp_dev_test"}}'`, +); +console.log(""); +console.log("ctrl-c to stop."); + +const shutdown = async () => { + console.log("\n[dev-server] shutting down"); + mswServer.close(); + await server.stop(); + process.exit(0); +}; +process.on("SIGINT", shutdown); +process.on("SIGTERM", shutdown); diff --git a/packages/agent/src/server/agent-server.test.ts b/packages/agent/src/server/agent-server.test.ts index f18e4ebb7..36597c7fc 100644 --- a/packages/agent/src/server/agent-server.test.ts +++ b/packages/agent/src/server/agent-server.test.ts @@ -382,6 +382,88 @@ describe("AgentServer HTTP Mode", () => { }, 20000); }); + describe("set_token command", () => { + // Build an AgentServer that is NOT stored in the outer `server` ref so the + // afterEach hook doesn't try to clean up its (fake) session and cascade + // cleanup failures into the test output. + const buildBareServer = () => { + const bare = new AgentServer({ + port, + jwtPublicKey: TEST_PUBLIC_KEY, + repositoryPath: repo.path, + apiUrl: "http://localhost:8000", + apiKey: "test-api-key", + projectId: 1, + mode: "interactive", + taskId: "test-task-id", + runId: "test-run-id", + }) as unknown as { + session: unknown; + executeCommand( + method: string, + params: Record, + ): Promise; + }; + // Pass executeCommand's "no active session" guard. + bare.session = {}; + return bare; + }; + + it.each([ + { method: "set_token", token: "ghp_refreshed" }, + { method: "posthog/set_token", token: "ghp_aliased" }, + ])( + "updates GH_TOKEN and GITHUB_TOKEN via $method", + async ({ method, token }) => { + const previousGh = process.env.GH_TOKEN; + const previousGithub = process.env.GITHUB_TOKEN; + delete process.env.GH_TOKEN; + delete process.env.GITHUB_TOKEN; + + try { + const result = await buildBareServer().executeCommand(method, { + token, + }); + + expect(result).toEqual({ updated: true }); + expect(process.env.GH_TOKEN).toBe(token); + expect(process.env.GITHUB_TOKEN).toBe(token); + } finally { + if (previousGh === undefined) delete process.env.GH_TOKEN; + else process.env.GH_TOKEN = previousGh; + if (previousGithub === undefined) delete process.env.GITHUB_TOKEN; + else process.env.GITHUB_TOKEN = previousGithub; + } + }, + ); + }); + + describe("POST /gh", () => { + // Each case rejects with 400 (validation), proving 1) the body schema is + // enforced and 2) the route doesn't require a JWT — a 401 would mean the + // route is gated, which would defeat the wrapper-script use case. + it.each([ + { name: "malformed JSON", body: "not-json" }, + { name: "missing args", body: JSON.stringify({ cwd: "/tmp" }) }, + { name: "empty args array", body: JSON.stringify({ args: [] }) }, + { name: "empty body (no JWT required)", body: JSON.stringify({}) }, + ])( + "rejects $name with 400", + async ({ body }) => { + await createServer().start(); + + const response = await fetch(`http://localhost:${port}/gh`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + }); + + expect(response.status).toBe(400); + }, + 20000, + ); + }); + describe("session lifecycle", () => { it("emits _posthog/run_started after session initialization", async () => { await createServer().start(); diff --git a/packages/agent/src/server/agent-server.ts b/packages/agent/src/server/agent-server.ts index 8e949b8c8..2836a1372 100644 --- a/packages/agent/src/server/agent-server.ts +++ b/packages/agent/src/server/agent-server.ts @@ -12,6 +12,7 @@ import { PROTOCOL_VERSION, } from "@agentclientprotocol/sdk"; import { type ServerType, serve } from "@hono/node-server"; +import { getConnInfo } from "@hono/node-server/conninfo"; import { getCurrentBranch } from "@posthog/git/queries"; import { Hono } from "hono"; import { z } from "zod"; @@ -53,8 +54,10 @@ import { normalizeCloudPromptContent, promptBlocksToText, } from "./cloud-prompt"; +import { isLoopbackAddress, runGh } from "./gh-exec"; import { type JwtPayload, JwtValidationError, validateJwt } from "./jwt"; import { + ghRequestSchema, handoffLocalGitStateSchema, jsonRpcRequestSchema, validateCommandParams, @@ -450,6 +453,43 @@ export class AgentServer { } }); + // Sandbox-local exec for `gh`. Codex agents run in an isolated child process + // whose environment is captured at spawn time, so refreshing GH_TOKEN in the + // agent server doesn't reach them. Their gh-wrapper script calls this route + // to run gh against the agent server's freshly-refreshed env. Loopback-only; + // intentionally not JWT-protected so the wrapper has nothing to forward. + app.post("/gh", async (c) => { + const remote = getConnInfo(c).remote.address; + if (!isLoopbackAddress(remote)) { + this.logger.warn("Rejected non-loopback /gh request", { remote }); + return c.json({ error: "Forbidden" }, 403); + } + + const rawBody = await c.req.json().catch(() => null); + const parsed = ghRequestSchema.safeParse(rawBody); + if (!parsed.success) { + return c.json({ error: parsed.error.message }, 400); + } + + const { args, cwd, timeoutMs } = parsed.data; + try { + const result = await runGh(args, { + cwd: cwd ?? this.config.repositoryPath ?? process.cwd(), + timeoutMs: timeoutMs ?? 60_000, + logger: this.logger, + }); + return c.json(result); + } catch (error) { + this.logger.error("Failed to run gh", error); + return c.json( + { + error: error instanceof Error ? error.message : "Unknown error", + }, + 500, + ); + } + }); + app.notFound((c) => { return c.json({ error: "Not found" }, 404); }); @@ -694,6 +734,15 @@ export class AgentServer { ); } + case "posthog/set_token": + case "set_token": { + const token = params.token as string; + process.env.GH_TOKEN = token; + process.env.GITHUB_TOKEN = token; + this.logger.info("GH token refreshed"); + return { updated: true }; + } + case POSTHOG_NOTIFICATIONS.PERMISSION_RESPONSE: case "permission_response": { const requestId = params.requestId as string; diff --git a/packages/agent/src/server/gh-exec.test.ts b/packages/agent/src/server/gh-exec.test.ts new file mode 100644 index 000000000..be429678e --- /dev/null +++ b/packages/agent/src/server/gh-exec.test.ts @@ -0,0 +1,113 @@ +import { tmpdir } from "node:os"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { isLoopbackAddress, runGh, spawnAndCollect } from "./gh-exec"; + +describe("isLoopbackAddress", () => { + it.each([ + ["127.0.0.1", true], + ["127.0.0.2", true], + ["127.1.2.3", true], + ["::1", true], + ["::ffff:127.0.0.1", true], + ["::ffff:127.1.2.3", true], + ["localhost", true], + ["10.0.0.1", false], + ["192.168.1.1", false], + ["::ffff:10.0.0.1", false], + ["8.8.8.8", false], + ["128.0.0.1", false], + ["", false], + [undefined, false], + ])("address %s -> %s", (input, expected) => { + expect(isLoopbackAddress(input)).toBe(expected); + }); +}); + +describe("runGh", () => { + let previous: string | undefined; + + beforeEach(() => { + previous = process.env.POSTHOG_GH_BINARY; + }); + + afterEach(() => { + if (previous === undefined) delete process.env.POSTHOG_GH_BINARY; + else process.env.POSTHOG_GH_BINARY = previous; + }); + + it("uses POSTHOG_GH_BINARY when set, so dev/test can swap the binary", async () => { + process.env.POSTHOG_GH_BINARY = process.execPath; + const result = await runGh(["-e", "process.stdout.write('via-env');"], { + cwd: tmpdir(), + timeoutMs: 5_000, + }); + + expect(result.stdout).toBe("via-env"); + expect(result.exitCode).toBe(0); + }); +}); + +// Test the spawn/collect plumbing via spawnAndCollect so tests don't depend on +// a real `gh` install. runGh's default-binary path is verified by inspection. +describe("spawnAndCollect", () => { + it("captures stdout, stderr, and a zero exit code", async () => { + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.stdout.write('hi'); process.stderr.write('err');"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ); + + expect(result.stdout).toBe("hi"); + expect(result.stderr).toBe("err"); + expect(result.exitCode).toBe(0); + expect(result.timedOut).toBe(false); + }); + + it("surfaces non-zero exit codes without throwing", async () => { + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.exit(2)"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ); + + expect(result.exitCode).toBe(2); + expect(result.timedOut).toBe(false); + }); + + it("rejects when the binary is missing", async () => { + await expect( + spawnAndCollect( + "/nonexistent/binary/that/cannot/possibly/exist", + ["--version"], + { cwd: tmpdir(), timeoutMs: 5_000 }, + ), + ).rejects.toThrow(); + }); + + it("kills the process on timeout and reports timedOut: true", async () => { + const start = Date.now(); + const result = await spawnAndCollect( + process.execPath, + ["-e", "setTimeout(() => process.exit(0), 60_000);"], + { cwd: tmpdir(), timeoutMs: 200 }, + ); + + expect(result.timedOut).toBe(true); + // Should be killed well before the 60s sleep would have completed. + expect(Date.now() - start).toBeLessThan(10_000); + }); + + it("respects the cwd option", async () => { + const expected = tmpdir(); + const result = await spawnAndCollect( + process.execPath, + ["-e", "process.stdout.write(process.cwd());"], + { cwd: expected, timeoutMs: 5_000 }, + ); + + // Resolve through realpath-style equivalence: tmpdir on macOS may be a + // symlink (e.g. /var/folders -> /private/var/folders). + expect([expected, `/private${expected}`]).toContain(result.stdout); + expect(result.exitCode).toBe(0); + }); +}); diff --git a/packages/agent/src/server/gh-exec.ts b/packages/agent/src/server/gh-exec.ts new file mode 100644 index 000000000..b0f643031 --- /dev/null +++ b/packages/agent/src/server/gh-exec.ts @@ -0,0 +1,112 @@ +import { spawn } from "node:child_process"; +import type { Logger } from "../utils/logger"; + +export interface GhExecOptions { + cwd: string; + timeoutMs: number; + logger?: Logger; +} + +export interface GhExecResult { + stdout: string; + stderr: string; + exitCode: number | null; + signal: NodeJS.Signals | null; + timedOut: boolean; +} + +const KILL_GRACE_MS = 2_000; + +export function runGh( + args: string[], + options: GhExecOptions, +): Promise { + // Binary resolution is owned by the server process, not request callers: + // POSTHOG_GH_BINARY overrides the default for dev/test setups where `gh` + // isn't on PATH or a stand-in is needed. The HTTP body schema still strips + // unknowns, so untrusted clients can't pick the binary. + const binary = process.env.POSTHOG_GH_BINARY ?? "gh"; + return spawnAndCollect(binary, args, options); +} + +export async function spawnAndCollect( + binary: string, + args: string[], + options: GhExecOptions, +): Promise { + const logger = options.logger; + + logger?.debug("Running command", { + binary, + args, + cwd: options.cwd, + timeoutMs: options.timeoutMs, + }); + + return new Promise((resolve, reject) => { + const child = spawn(binary, args, { + cwd: options.cwd, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + let timedOut = false; + let killTimer: NodeJS.Timeout | null = null; + + const timeout = setTimeout(() => { + timedOut = true; + logger?.warn("Command timed out, sending SIGTERM", { + pid: child.pid, + timeoutMs: options.timeoutMs, + }); + child.kill("SIGTERM"); + killTimer = setTimeout(() => { + if (!child.killed) { + logger?.warn("Command did not exit, sending SIGKILL", { + pid: child.pid, + }); + child.kill("SIGKILL"); + } + }, KILL_GRACE_MS); + }, options.timeoutMs); + + child.stdout?.setEncoding("utf8"); + child.stderr?.setEncoding("utf8"); + child.stdout?.on("data", (chunk: string) => { + stdout += chunk; + }); + child.stderr?.on("data", (chunk: string) => { + stderr += chunk; + }); + + child.on("error", (err) => { + clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); + reject(err); + }); + + child.on("close", (code, signal) => { + clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); + resolve({ + stdout, + stderr, + exitCode: code, + signal, + timedOut, + }); + }); + }); +} + +export function isLoopbackAddress(address: string | undefined): boolean { + if (!address) return false; + if (address === "::1" || address === "localhost") return true; + // IPv4 reserves the full 127.0.0.0/8 range as loopback, and IPv4-mapped + // IPv6 covers ::ffff:127.0.0.0/104 — match by prefix rather than equality. + if (address.startsWith("127.")) return true; + if (address.startsWith("::ffff:127.")) return true; + return false; +} diff --git a/packages/agent/src/server/schemas.ts b/packages/agent/src/server/schemas.ts index 2dfa791a9..ed1c71310 100644 --- a/packages/agent/src/server/schemas.ts +++ b/packages/agent/src/server/schemas.ts @@ -96,6 +96,18 @@ export const refreshSessionParamsSchema = z.object({ mcpServers: mcpServersSchema, }); +export const setTokenParamsSchema = z.object({ + token: z.string().min(1, "token is required"), +}); + +export const ghRequestSchema = z.object({ + args: z.array(z.string()).min(1, "args is required"), + cwd: z.string().optional(), + timeoutMs: z.number().int().positive().max(600_000).optional(), +}); + +export type GhRequest = z.infer; + export const closeParamsSchema = z .object({ localGitState: handoffLocalGitStateSchema.optional(), @@ -116,6 +128,8 @@ export const commandParamsSchemas = { refresh_session: refreshSessionParamsSchema, "posthog/refresh_session": refreshSessionParamsSchema, "_posthog/refresh_session": refreshSessionParamsSchema, + set_token: setTokenParamsSchema, + "posthog/set_token": setTokenParamsSchema, } as const; export type CommandMethod = keyof typeof commandParamsSchemas;