diff --git a/packages/cli/src/browser/manager.test.ts b/packages/cli/src/browser/manager.test.ts index f19d1993b..7f166efb9 100644 --- a/packages/cli/src/browser/manager.test.ts +++ b/packages/cli/src/browser/manager.test.ts @@ -21,7 +21,7 @@ * `background-removal/manager.test.ts`) so we don't touch the real * `HOME` cache. */ -import { join } from "node:path"; +import { join, sep } from "node:path"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; // Use `path.join` so the fake paths line up with whatever separator Node's @@ -30,7 +30,10 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; // `/fake/home/...` literals would fail on Windows because the set lookup // would never match the `\\`-joined real paths. const FAKE_HOME = join("/", "fake", "home"); +const CACHE_ROOT = join(FAKE_HOME, ".cache", "hyperframes"); const HF_CACHE = join(FAKE_HOME, ".cache", "hyperframes", "chrome"); +const HF_LOCK = join(CACHE_ROOT, ".chrome.install.lock"); +const HF_RECLAIM_LOCK = join(CACHE_ROOT, ".chrome.install.reclaim.lock"); const PUPPETEER_CACHE = join(FAKE_HOME, ".cache", "puppeteer", "chrome-headless-shell"); const PUPPETEER_BINARY = join( PUPPETEER_CACHE, @@ -75,8 +78,15 @@ function installFsMocks({ existing, dirs }: FsMockOptions) { mtimes.set(p, Date.now()); }, rmSync: (p: string) => { - paths.delete(p); - mtimes.delete(p); + // Real rmSync({recursive:true}) removes the target AND everything under + // it; the mock's flat path Set has no real tree structure, so simulate + // that by also dropping any tracked path nested under `p`. + for (const existingPath of [...paths]) { + if (existingPath === p || existingPath.startsWith(p + sep)) { + paths.delete(existingPath); + mtimes.delete(existingPath); + } + } }, statSync: (p: string) => { if (!paths.has(p)) { @@ -97,9 +107,10 @@ function installFsMocks({ existing, dirs }: FsMockOptions) { function installPuppeteerBrowsersMock( opts: { - installedInHfCache?: Array<{ browser: string; executablePath: string }>; + installedInHfCache?: Array<{ browser: string; executablePath: string; path?: string }>; installedInHfCacheError?: Error; installResult?: { executablePath: string }; + installImpl?: () => Promise<{ executablePath: string }>; } = {}, ) { vi.doMock("@puppeteer/browsers", () => ({ @@ -108,7 +119,11 @@ function installPuppeteerBrowsersMock( getInstalledBrowsers: opts.installedInHfCacheError ? vi.fn().mockRejectedValue(opts.installedInHfCacheError) : vi.fn().mockResolvedValue(opts.installedInHfCache ?? []), - install: vi.fn().mockResolvedValue(opts.installResult ?? { executablePath: HF_BINARY }), + install: vi + .fn() + .mockImplementation( + opts.installImpl ?? (async () => opts.installResult ?? { executablePath: HF_BINARY }), + ), })); } @@ -169,9 +184,15 @@ describe("findBrowser — cache resolution", () => { "chrome-headless-shell-linux64", "redownloaded-chrome-headless-shell", ); - installFsMocks({ existing: new Set([HF_CACHE]) }); + const staleInstallDir = join(HF_CACHE, "chrome-headless-shell", "linux-131.0.6778.85"); + // The stale install DIR is present (extraction got partway through, e.g. an + // ABOUT/LICENSE-only extract) even though the exe itself is missing — + // exercises the purge-before-redownload fix, not just the redownload path. + const paths = installFsMocks({ existing: new Set([HF_CACHE, staleInstallDir]) }); installPuppeteerBrowsersMock({ - installedInHfCache: [{ browser: "chrome-headless-shell", executablePath: HF_BINARY }], + installedInHfCache: [ + { browser: "chrome-headless-shell", executablePath: HF_BINARY, path: staleInstallDir }, + ], installResult: { executablePath: redownloadedBinary }, }); const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); @@ -181,6 +202,67 @@ describe("findBrowser — cache resolution", () => { expect(result).toEqual({ executablePath: redownloadedBinary, source: "download" }); expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("Cached binary missing")); + // The stale directory must be gone before @puppeteer/browsers' install() + // sees it — otherwise install() throws "folder exists but exe missing" + // instead of re-extracting (the exact bug both feedback reports hit). + expect(paths.has(staleInstallDir)).toBe(false); + }); + + it("ensureBrowser({force: true}) purges the whole cache before downloading, bypassing any cache/system shortcut", async () => { + const staleInstallDir = join(HF_CACHE, "chrome-headless-shell", "linux-131.0.6778.85"); + const downloadedBinary = join(HF_CACHE, "chrome-headless-shell", "force-downloaded"); + // A HEALTHY cached binary AND system Chrome are both present — force must + // ignore both shortcuts and always re-download, which is the whole point + // of the flag (the reported bug: --force did nothing, a stale dir kept + // winning over every retry). + const paths = installFsMocks({ + existing: new Set([HF_CACHE, HF_BINARY, staleInstallDir, SYSTEM_CHROME]), + }); + installPuppeteerBrowsersMock({ + installedInHfCache: [ + { browser: "chrome-headless-shell", executablePath: HF_BINARY, path: staleInstallDir }, + ], + installResult: { executablePath: downloadedBinary }, + }); + + const { ensureBrowser } = await import("./manager.js"); + const result = await ensureBrowser({ force: true }); + + expect(result).toEqual({ executablePath: downloadedBinary, source: "download" }); + // clearBrowser() wipes prior contents; withInstallLock uses a sibling lock + // outside CACHE_DIR, so assert the purge on what was actually INSIDE it, + // not the directory's own existence. + expect(paths.has(staleInstallDir)).toBe(false); + expect(paths.has(HF_BINARY)).toBe(false); + }); + + it("serializes concurrent force downloads so one purge cannot delete another installer's lock", async () => { + const downloadedBinary = join(HF_CACHE, "chrome-headless-shell", "force-downloaded"); + const paths = installFsMocks({ existing: new Set([CACHE_ROOT, HF_CACHE, HF_BINARY]) }); + let activeInstalls = 0; + let maxActiveInstalls = 0; + installPuppeteerBrowsersMock({ + installedInHfCache: [{ browser: "chrome-headless-shell", executablePath: HF_BINARY }], + installImpl: async () => { + activeInstalls += 1; + maxActiveInstalls = Math.max(maxActiveInstalls, activeInstalls); + expect(paths.has(HF_LOCK)).toBe(true); + await new Promise((resolve) => setTimeout(resolve, 20)); + activeInstalls -= 1; + return { executablePath: downloadedBinary }; + }, + }); + + const { ensureBrowser } = await import("./manager.js"); + + await expect( + Promise.all([ensureBrowser({ force: true }), ensureBrowser({ force: true })]), + ).resolves.toEqual([ + { executablePath: downloadedBinary, source: "download" }, + { executablePath: downloadedBinary, source: "download" }, + ]); + expect(maxActiveInstalls).toBe(1); + expect(paths.has(HF_LOCK)).toBe(false); }); it("ensureBrowser does not leak the install lock directory after a successful download", async () => { @@ -191,7 +273,6 @@ describe("findBrowser — cache resolution", () => { // same extract target. mkdirSync as an atomic mutex closes that race; // this asserts the lock is actually released afterward (a leaked lock // would permanently wedge every future render on this machine). - const HF_LOCK = join(HF_CACHE, ".install.lock"); const downloadedBinary = join(HF_CACHE, "chrome-headless-shell", "downloaded"); // Cache dir exists but is empty (no manifest entries) — distinct from the // ENOTDIR "cache unreadable" case, which falls back to system instead. @@ -216,8 +297,7 @@ describe("findBrowser — cache resolution", () => { // withInstallLock directly with tiny real timeouts (it takes an // injectable timeoutMs/pollMs for exactly this) rather than mocking // Date.now()/setTimeout through the full ensureBrowser call graph. - const HF_LOCK = join(HF_CACHE, ".install.lock"); - const paths = installFsMocks({ existing: new Set([HF_CACHE, HF_LOCK]) }); + const paths = installFsMocks({ existing: new Set([CACHE_ROOT, HF_LOCK]) }); const { withInstallLock } = await import("./manager.js"); const result = await withInstallLock(async () => "done", 10, 5); @@ -231,9 +311,7 @@ describe("findBrowser — cache resolution", () => { // the stale-lock deadline together, waiter A can reclaim the stale lock and // acquire a fresh one. Waiter B's old deadline is still expired, but it must // not delete A's fresh lock. - const HF_LOCK = join(HF_CACHE, ".install.lock"); - const RECLAIM_LOCK = join(HF_CACHE, ".install.reclaim.lock"); - const paths = installFsMocks({ existing: new Set([HF_CACHE, HF_LOCK]) }); + const paths = installFsMocks({ existing: new Set([CACHE_ROOT, HF_LOCK]) }); const { withInstallLock } = await import("./manager.js"); const first = withInstallLock( @@ -248,7 +326,7 @@ describe("findBrowser — cache resolution", () => { await expect(Promise.all([first, second])).resolves.toEqual(["first", "second"]); expect(paths.has(HF_LOCK)).toBe(false); - expect(paths.has(RECLAIM_LOCK)).toBe(false); + expect(paths.has(HF_RECLAIM_LOCK)).toBe(false); }); it("warns and falls through when the hyperframes cache cannot be read", async () => { diff --git a/packages/cli/src/browser/manager.ts b/packages/cli/src/browser/manager.ts index a7c6ea14f..a2a345eb4 100644 --- a/packages/cli/src/browser/manager.ts +++ b/packages/cli/src/browser/manager.ts @@ -21,6 +21,7 @@ async function loadPuppeteerBrowsers(): Promise { } const CHROME_VERSION = "131.0.6778.85"; +const CACHE_ROOT_DIR = join(homedir(), ".cache", "hyperframes"); const CACHE_DIR = join(homedir(), ".cache", "hyperframes", "chrome"); // Puppeteer's managed cache — where `@puppeteer/browsers install // chrome-headless-shell` (and `puppeteer install`) drop binaries. The engine's @@ -39,8 +40,8 @@ const PUPPETEER_CACHE_DIR = join(homedir(), ".cache", "puppeteer", "chrome-headl // // mkdirSync is atomic (EEXIST if another process already holds it), so it // doubles as a zero-dependency cross-process mutex — no lockfile library needed. -const INSTALL_LOCK_DIR = join(CACHE_DIR, ".install.lock"); -const INSTALL_RECLAIM_LOCK_DIR = join(CACHE_DIR, ".install.reclaim.lock"); +const INSTALL_LOCK_DIR = join(CACHE_ROOT_DIR, ".chrome.install.lock"); +const INSTALL_RECLAIM_LOCK_DIR = join(CACHE_ROOT_DIR, ".chrome.install.reclaim.lock"); const INSTALL_LOCK_TIMEOUT_MS = 120_000; // generous: a real download+extract can take a while const INSTALL_LOCK_POLL_MS = 200; @@ -88,7 +89,9 @@ export async function withInstallLock( pollMs = INSTALL_LOCK_POLL_MS, ): Promise { // recursive:false below needs the parent to already exist (unlike `mkdir -p`). - if (!existsSync(CACHE_DIR)) mkdirSync(CACHE_DIR, { recursive: true }); + // Keep lock dirs outside CACHE_DIR so force-clearing the Chrome cache cannot + // delete another installer's in-flight lock. + if (!existsSync(CACHE_ROOT_DIR)) mkdirSync(CACHE_ROOT_DIR, { recursive: true }); let deadline = Date.now() + timeoutMs; for (;;) { if (existsSync(INSTALL_RECLAIM_LOCK_DIR)) { @@ -127,11 +130,31 @@ export interface BrowserResult { export interface EnsureBrowserOptions { onProgress?: (downloadedBytes: number, totalBytes: number) => void; + // Purge any cached HF-managed download before resolving, so a stale or + // partially-extracted install can't make the retry look like a no-op. + force?: boolean; } interface CacheLookupResult { result?: BrowserResult; staleHyperframesCachePath?: string; + // Root install-folder path for the stale entry (InstalledBrowser#path), NOT + // the missing executablePath above — this is what actually needs deleting. + staleInstallPath?: string; +} + +/** + * Remove one browser version's install directory (not the whole CACHE_DIR). + * @puppeteer/browsers' install() treats an existing-but-incomplete directory + * as already installed and throws "folder exists but executable is missing" + * rather than re-extracting — so an extraction interrupted by a Windows AV + * lock, a sleep/wake cycle, or ctrl-C (left with only alphabetically-early + * files like ABOUT/LICENSE, no exe) wedges every subsequent ensure/render + * with the same error until someone manually deletes the directory. Purging + * it first makes the retry actually retry. + */ +function purgeStaleInstall(installPath: string): void { + rmSync(installPath, { recursive: true, force: true }); } // --- Internal helpers ------------------------------------------------------- @@ -215,7 +238,7 @@ async function findFromCache(): Promise { return { result: { executablePath: match.executablePath, source: "cache" } }; } if (match) { - return { staleHyperframesCachePath: match.executablePath }; + return { staleHyperframesCachePath: match.executablePath, staleInstallPath: match.path }; } } @@ -371,7 +394,10 @@ export async function findBrowser(): Promise { `[browser] Cached binary missing at ${fromCache.staleHyperframesCachePath} — re-downloading...`, ); try { - return await withInstallLock(() => downloadBrowser()); + return await withInstallLock(async () => { + if (fromCache.staleInstallPath) purgeStaleInstall(fromCache.staleInstallPath); + return downloadBrowser(); + }); } catch (err) { const cause = normalizeErrorMessage(err); throw new Error( @@ -445,27 +471,44 @@ export async function ensureBrowser(options?: EnsureBrowserOptions): Promise downloadBrowser(options)); - } + if (!options?.force) { + const fromCache = await findFromCache(); + if (fromCache.result) return fromCache.result; + if (fromCache.staleHyperframesCachePath) { + console.warn( + `[browser] Cached binary missing at ${fromCache.staleHyperframesCachePath} — re-downloading...`, + ); + return withInstallLock(async () => { + if (fromCache.staleInstallPath) purgeStaleInstall(fromCache.staleInstallPath); + return downloadBrowser(options); + }); + } - const fromSystem = findFromSystem(); - if (fromSystem) { - warnSystemFallbackOnce(fromSystem.executablePath); - return fromSystem; + const fromSystem = findFromSystem(); + if (fromSystem) { + warnSystemFallbackOnce(fromSystem.executablePath); + return fromSystem; + } } return withInstallLock(async () => { + if (options?.force) { + // `--force` means "always get a fresh managed download" — purging the + // whole HF-managed cache after acquiring the install lock keeps two + // concurrent force retries from deleting each other's in-flight lock or + // partially extracted install. + clearBrowser(); + } + // Re-check after acquiring the lock: a concurrent invocation may have // finished installing while we were waiting, in which case reuse its - // result instead of downloading and extracting a second time. - const afterLock = await findFromCache(); - if (afterLock.result) return afterLock.result; + // result instead of downloading and extracting a second time. Skipped + // under --force, which already purged and always wants a fresh download. + if (!options?.force) { + const afterLock = await findFromCache(); + if (afterLock.result) return afterLock.result; + if (afterLock.staleInstallPath) purgeStaleInstall(afterLock.staleInstallPath); + } return downloadBrowser(options); }); } diff --git a/packages/cli/src/commands/browser.ts b/packages/cli/src/commands/browser.ts index 20e9ea6c9..d9391cc10 100644 --- a/packages/cli/src/commands/browser.ts +++ b/packages/cli/src/commands/browser.ts @@ -5,6 +5,7 @@ import { c } from "../ui/colors.js"; export const examples: Example[] = [ ["Find or download Chrome for rendering", "hyperframes browser ensure"], + ["Purge a stale/partial download and re-download", "hyperframes browser ensure --force"], ["Print the Chrome executable path", "hyperframes browser path"], ["Remove cached Chrome download", "hyperframes browser clear"], ]; @@ -19,11 +20,11 @@ import { } from "../browser/manager.js"; import { trackBrowserInstall, trackCommandFailure } from "../telemetry/events.js"; -async function runEnsure(): Promise { +async function runEnsure(options?: { force?: boolean }): Promise { clack.intro(c.bold("hyperframes browser ensure")); - // ARM64 Linux: Chrome headless shell is not available. - // Try to find system Chromium first, then attempt auto-install via apt. + // ARM64 Linux: Chrome headless shell is not available (apt-get/system-only + // install flow, no download cache to force a purge of) — --force is a no-op here. if (isLinuxArm()) { const s = clack.spinner(); s.start("Linux ARM64 detected — looking for system Chromium..."); @@ -61,26 +62,31 @@ async function runEnsure(): Promise { } const s = clack.spinner(); - s.start("Looking for an existing browser..."); + if (!options?.force) { + s.start("Looking for an existing browser..."); - const existing = await findBrowser(); - if (existing) { - s.stop(c.success("Browser found")); - console.log(); - console.log(` ${c.dim("Source:")} ${c.bold(existing.source)}`); - console.log(` ${c.dim("Path:")} ${c.bold(existing.executablePath)}`); - console.log(); - clack.outro(c.success("Ready to render.")); - return; - } + const existing = await findBrowser(); + if (existing) { + s.stop(c.success("Browser found")); + console.log(); + console.log(` ${c.dim("Source:")} ${c.bold(existing.source)}`); + console.log(` ${c.dim("Path:")} ${c.bold(existing.executablePath)}`); + console.log(); + clack.outro(c.success("Ready to render.")); + return; + } - s.stop("No browser found — downloading"); + s.stop("No browser found — downloading"); + } else { + s.start("Purging cached download and re-downloading..."); + } const downloadSpinner = clack.spinner(); downloadSpinner.start(`Downloading Chrome Headless Shell ${c.dim("v" + CHROME_VERSION)}...`); let lastPct = -1; const result = await ensureBrowser({ + force: options?.force, onProgress: (downloaded, total) => { if (total <= 0) return; const pct = Math.floor((downloaded / total) * 100); @@ -141,6 +147,12 @@ export default defineCommand({ "ensure = find or download Chrome, path = print executable path, clear = remove cached download", required: false, }, + force: { + type: "boolean", + description: + "ensure only: purge any cached download (including a stale/partial one) and re-download from scratch", + default: false, + }, }, async run({ args }) { const subcommand = args.subcommand; @@ -157,16 +169,17 @@ ${c.bold("SUBCOMMANDS:")} ${c.accent("clear")} ${c.dim("Remove cached Chrome download")} ${c.bold("EXAMPLES:")} - ${c.accent("npx hyperframes browser ensure")} ${c.dim("Download Chrome if needed")} - ${c.accent("npx hyperframes browser path")} ${c.dim("Print path for scripts")} - ${c.accent("npx hyperframes browser clear")} ${c.dim("Remove cached browser")} + ${c.accent("npx hyperframes browser ensure")} ${c.dim("Download Chrome if needed")} + ${c.accent("npx hyperframes browser ensure --force")} ${c.dim("Purge a stale/partial download and re-download")} + ${c.accent("npx hyperframes browser path")} ${c.dim("Print path for scripts")} + ${c.accent("npx hyperframes browser clear")} ${c.dim("Remove cached browser")} `); return; } switch (subcommand) { case "ensure": - return runEnsure(); + return runEnsure({ force: args.force }); case "path": return runPath(); case "clear":