diff --git a/server/cmd/api/api/display.go b/server/cmd/api/api/display.go index 79b59829..68522151 100644 --- a/server/cmd/api/api/display.go +++ b/server/cmd/api/api/display.go @@ -115,6 +115,14 @@ func (s *ApiService) PatchDisplay(ctx context.Context, req oapi.PatchDisplayRequ // Route to appropriate resolution change handler if displayMode == "xorg" { + // The realizable X root width is quantized to a multiple of 8 by + // libxcvt (CVT timing), so round the request up to that grid before + // applying. This makes the mode neko creates match what we ask for, + // and the response reports the size the client actually gets. + if gridded := roundUpToWidthGrid(width); gridded != width { + log.Info("rounded width up to realizable grid", "requested", width, "applied", gridded) + width = gridded + } if s.isNekoEnabled() { log.Info("using Neko API for Xorg resolution change") err = s.setResolutionViaNeko(ctx, width, height, refreshRate) @@ -122,36 +130,32 @@ func (s *ApiService) PatchDisplay(ctx context.Context, req oapi.PatchDisplayRequ log.Info("using xrandr for Xorg resolution change (Neko disabled)") err = s.setResolutionXorgViaXrandr(ctx, width, height, refreshRate) } - // Re-assert the maximized window state via CDP, then verify the - // X root has reached the requested size. Mutter reflows a - // maximized (or fullscreen) window onto the new root - // automatically, so the CDP call's only job is to make sure the - // window is in the state that triggers the reflow. The X root - // poll is the authoritative post-condition: it's the value the - // server actually set and stays panel-robust if mutter ever - // gains a taskbar (a maximized window would then be smaller - // than the root by the panel's reserved space). - // - // Both are fatal on failure — returning 200 with the X root - // still at the old size, or the window stuck in normal state, - // would leave the caller with no signal of the mismatch. The + // Re-assert the maximized window state via CDP. Mutter reflows a + // maximized (or fullscreen) window onto the new root automatically, + // so this call's only job is to ensure the window is in the state + // that triggers the reflow. This stays fatal: a failure here means + // the devtools connection is broken, not a resolution mismatch. The // previous approach of restarting chromium so it could re-apply - // --start-maximized had the same effective contract (the - // restart blocked the response) but cost ~9s per resize and - // wiped browser-side state (Emulation.* overrides, devtools - // sessions). The restart_chromium request field is still - // accepted for API compatibility but no longer triggers a - // restart on this path. + // --start-maximized had the same effective contract (the restart + // blocked the response) but cost ~9s per resize and wiped + // browser-side state (Emulation.* overrides, devtools sessions). + // The restart_chromium request field is still accepted for API + // compatibility but no longer triggers a restart on this path. if err == nil { if cdpErr := s.setWindowMaximizedViaCDP(ctx); cdpErr != nil { log.Error("CDP maximize re-assert failed after Xorg resolution change", "error", cdpErr) err = fmt.Errorf("CDP maximize re-assert failed: %w", cdpErr) } } + // Poll the X root as a non-fatal guard. With width pre-rounded to the + // realizable grid the root normally matches exactly; if it doesn't, + // the resize was still applied at the driver's nearest size, so log + // for visibility but never fail. A display-config mismatch must not + // taint the instance — treating it as fatal is what turned a cosmetic + // pixel rounding into a fleet-wide outage. if err == nil { if waitErr := s.waitForXRootSize(ctx, width, height, 3*time.Second); waitErr != nil { - log.Error("X root did not reach requested size after resize", "error", waitErr) - err = fmt.Errorf("X root verification: %w", waitErr) + log.Warn("X root did not reach requested size after resize (non-fatal)", "error", waitErr, "requested", fmt.Sprintf("%dx%d", width, height)) } } } else if len(stopped) > 0 { @@ -448,7 +452,18 @@ func (s *ApiService) setWindowMaximizedViaCDP(ctx context.Context) error { return nil } -// waitForXRootSize polls the X root resolution via xrandr until it matches +// roundUpToWidthGrid rounds a requested width up to the next multiple of 8, +// the horizontal granularity libxcvt (CVT timing) quantizes generated modes +// to. The Xorg dummy driver can only realize widths on this grid, so a +// request that isn't a multiple of 8 would otherwise land at a different +// size than asked for (e.g. libxcvt snaps 1365 down to 1360). Rounding up +// before the resize lets neko create a mode that matches exactly. Heights +// have no such constraint and are left untouched. +func roundUpToWidthGrid(width int) int { + return (width + 7) &^ 7 +} + +// waitForXRootSize polls the X root resolution via xrandr until it reaches // the requested size, or the deadline expires. This is the authoritative // post-condition for PATCH /display: the X root is what the server set // (via Neko or xrandr), and the rest of the stack — mutter, chromium — diff --git a/server/cmd/api/api/display_test.go b/server/cmd/api/api/display_test.go index 154b6359..86a1c8c1 100644 --- a/server/cmd/api/api/display_test.go +++ b/server/cmd/api/api/display_test.go @@ -323,6 +323,27 @@ func TestResolveDisplayParams(t *testing.T) { } } +func TestRoundUpToWidthGrid(t *testing.T) { + cases := []struct { + name string + width, want int + }{ + {name: "already a multiple of 8", width: 1360, want: 1360}, + {name: "1280 unchanged", width: 1280, want: 1280}, + {name: "odd width rounds up", width: 1365, want: 1368}, + {name: "even non-multiple rounds up", width: 1366, want: 1368}, + {name: "one below the grid rounds up", width: 1367, want: 1368}, + {name: "1199 rounds up to 1200", width: 1199, want: 1200}, + {name: "1920 unchanged", width: 1920, want: 1920}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, roundUpToWidthGrid(tc.width)) + }) + } +} + func TestHeadlessRefreshRateSticky(t *testing.T) { t.Run("setViewportOverride records sticky rate", func(t *testing.T) { svc := &ApiService{} diff --git a/server/e2e/e2e_display_resize_window_test.go b/server/e2e/e2e_display_resize_window_test.go index 6e47c207..56e12e4e 100644 --- a/server/e2e/e2e_display_resize_window_test.go +++ b/server/e2e/e2e_display_resize_window_test.go @@ -457,7 +457,6 @@ func TestDisplayResizeChromiumWindow(t *testing.T) { } } - // navigateBlank points the active page at about:blank via playwright so the // renderer is alive before we query window dimensions. func navigateBlank(t *testing.T, ctx context.Context, c *TestContainer) { @@ -523,3 +522,62 @@ func waitForXRootResolution(t *testing.T, ctx context.Context, c *TestContainer, } } } + +// TestDisplayResizeOddDimensions reproduces the production failure where a +// PATCH /display to a non-multiple-of-8 width returned 500 and tainted the +// browser instance. libxcvt quantizes the X root width to a multiple of 8, so +// the server rounds the request up to that grid and never fails the request +// on a resolution mismatch. The call must return 200 (not 500) and report the +// rounded width; the height is not gridded and is preserved as requested. +// +// Asserts the HTTP contract rather than re-reading the X root: physical +// convergence of a (clean) resize is already covered by +// TestDisplayResizeChromiumWindow, and re-reading the root here is sensitive +// to neko's startup-mode race on the heavy headful image. +// +// Runs on the neko (WebRTC) path — the production configuration. +func TestDisplayResizeOddDimensions(t *testing.T) { + if _, err := exec.LookPath("docker"); err != nil { + t.Skipf("docker not available: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + env := map[string]string{ + "WIDTH": "1024", + "HEIGHT": "768", + "ENABLE_WEBRTC": "true", + "NEKO_ADMIN_PASSWORD": "admin", + "CHROMIUM_FLAGS": "--user-data-dir=/home/kernel/user-data --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=*", + } + + c := NewTestContainer(t, headfulImage) + require.NoError(t, c.Start(ctx, ContainerConfig{Env: env}), "failed to start container") + defer c.Stop(ctx) + require.NoError(t, c.WaitReady(ctx), "api not ready") + require.NoError(t, c.WaitDevTools(ctx), "devtools not ready") + navigateBlank(t, ctx, c) + + // Width 1365 isn't on libxcvt's 8px grid, so the server rounds it up to + // 1368. Pre-fix this returned 500 and tainted the instance. The height + // (769) is odd but not gridded, so it is preserved exactly. + reqW, reqH := 1365, 769 + const wantW = 1368 // roundUpToWidthGrid(1365) + + client, err := c.APIClient() + require.NoError(t, err) + rate := instanceoapi.PatchDisplayRequestRefreshRate(60) + rsp, err := client.PatchDisplayWithResponse(ctx, instanceoapi.PatchDisplayJSONRequestBody{ + Width: &reqW, + Height: &reqH, + RefreshRate: &rate, + }) + require.NoError(t, err) + require.Equal(t, http.StatusOK, rsp.StatusCode(), "PATCH /display: %s body=%s", rsp.Status(), string(rsp.Body)) + require.NotNil(t, rsp.JSON200) + require.NotNil(t, rsp.JSON200.Width) + require.NotNil(t, rsp.JSON200.Height) + require.Equal(t, wantW, *rsp.JSON200.Width, "width should be rounded up to the 8px grid") + require.Equal(t, reqH, *rsp.JSON200.Height, "height should be preserved") +}