Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion src/kernel/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
is_mapping_t,
get_async_library,
)
from ._compat import cached_property
from ._compat import model_copy, cached_property
from ._models import FinalRequestOptions
from ._version import __version__
from ._streaming import Stream as Stream, AsyncStream as AsyncStream
Expand All @@ -39,8 +39,10 @@
BrowserRouteCache,
BrowserRoutingConfig,
strip_direct_vm_auth,
response_is_browser_gone,
rewrite_direct_vm_options,
browser_routing_config_from_env,
fallback_session_id_for_options,
maybe_evict_browser_route_from_response,
maybe_populate_browser_route_cache_from_response,
)
Expand Down Expand Up @@ -304,6 +306,35 @@ def default_headers(self) -> dict[str, str | Omit]:
**self._custom_headers,
}

@override
def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
*,
stream: bool = False,
stream_cls: type[Stream[Any]] | None = None,
) -> Any:
# Capture the ORIGINAL (pre-rewrite) options so that, if the routed VM
# reports the browser is gone, we can re-issue the exact same request to
# the control plane. `super().request` rewrites these to target the VM.
original_options = model_copy(options)
fallback_session_id = fallback_session_id_for_options(
original_options, cache=self.browser_route_cache, config=self._browser_routing
)
try:
return super().request(cast_to, options, stream=stream, stream_cls=stream_cls)
except APIStatusError as err:
if fallback_session_id is None or not response_is_browser_gone(err.response):
raise
# The browser is authoritatively gone: evict its cached route so the
# re-issued request is NOT rewritten back to the (dead) VM, then hit
# the control plane exactly once with the original request. The route
# is gone, so `_prepare_options` is a no-op, Authorization is restored
# by the normal auth flow, and the jwt query param is dropped.
self.browser_route_cache.delete(fallback_session_id)
return super().request(cast_to, model_copy(original_options), stream=stream, stream_cls=stream_cls)

@override
def _prepare_options(self, options: Any) -> Any:
options = cast(Any, super()._prepare_options(options))
Expand Down Expand Up @@ -635,6 +666,37 @@ def default_headers(self) -> dict[str, str | Omit]:
**self._custom_headers,
}

@override
async def request(
self,
cast_to: Type[ResponseT],
options: FinalRequestOptions,
*,
stream: bool = False,
stream_cls: type[AsyncStream[Any]] | None = None,
) -> Any:
# Capture the ORIGINAL (pre-rewrite) options so that, if the routed VM
# reports the browser is gone, we can re-issue the exact same request to
# the control plane. `super().request` rewrites these to target the VM.
original_options = model_copy(options)
fallback_session_id = fallback_session_id_for_options(
original_options, cache=self.browser_route_cache, config=self._browser_routing
)
try:
return await super().request(cast_to, options, stream=stream, stream_cls=stream_cls)
except APIStatusError as err:
if fallback_session_id is None or not response_is_browser_gone(err.response):
raise
# The browser is authoritatively gone: evict its cached route so the
# re-issued request is NOT rewritten back to the (dead) VM, then hit
# the control plane exactly once with the original request. The route
# is gone, so `_prepare_options` is a no-op, Authorization is restored
# by the normal auth flow, and the jwt query param is dropped.
self.browser_route_cache.delete(fallback_session_id)
return await super().request(
cast_to, model_copy(original_options), stream=stream, stream_cls=stream_cls
)

@override
async def _prepare_options(self, options: Any) -> Any:
options = cast(Any, await super()._prepare_options(options))
Expand Down
84 changes: 84 additions & 0 deletions src/kernel/lib/browser_routing/routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,54 @@ class BrowserRoutingConfig:
_BROWSER_POOL_ACQUIRE_PATH = re.compile(r"^/(?:v\d+/)?browser_pools/[^/]+/acquire/?$")
_BROWSER_POOL_RELEASE_PATH = re.compile(r"^/(?:v\d+/)?browser_pools/[^/]+/release/?$")

# Body code returned by the VM proxy (metro-api, kernel#2317) when a routed
# request targets a DELETED/GONE browser. There is intentionally no special
# response header: we key off this body code only. A live VM's own 404s do not
# carry this code, and transient/real upstream failures return 5xx instead.
BROWSER_GONE_CODE = "browser_gone"

# Registry of routed paths that are ELIGIBLE for control-plane fallback when the
# VM reports the browser is gone (404 + code == "browser_gone"). Eligibility is
# expressed against the parsed routed path as (subresource, suffix). Everything
# not listed here is default-OFF: a browser_gone 404 on a non-eligible path
# propagates unchanged. Adding a future eligible endpoint is a one-line edit.
_FALLBACK_ELIGIBLE_ROUTED_PATHS: frozenset[tuple[str, str]] = frozenset(
{
# PROSPECTIVE: GET /browsers/{id}/telemetry/events. The pull endpoint /
# `telemetry.events(...)` method does NOT exist yet; this pre-registers
# the opt-in so control-plane fallback works the moment that method
# ships, with no further routing-layer changes required.
("telemetry", "/events"),
}
)


def is_fallback_eligible_routed_path(subresource: str, suffix: str) -> bool:
"""Return True if a routed path opted into control-plane fallback.

`subresource` and `suffix` are the components produced by
`match_direct_vm_path` for a `/browsers/{id}/{subresource}{suffix}` URL.
"""
return (subresource, suffix) in _FALLBACK_ELIGIBLE_ROUTED_PATHS


def response_is_browser_gone(response: httpx.Response) -> bool:
"""Return True iff a 404 response body has JSON code == "browser_gone".

Only call this for a 404. The body is read defensively; any
parse/shape problem is treated as "not browser_gone" so the original
response propagates unchanged.
"""
if response.status_code != 404:
return False
try:
body = response.json()
except Exception:
return False
if not isinstance(body, Mapping):
return False
return cast(Mapping[object, object], body).get("code") == BROWSER_GONE_CODE


def browser_routing_config_from_env() -> BrowserRoutingConfig:
raw = os.environ.get("KERNEL_BROWSER_ROUTING_SUBRESOURCES")
Expand Down Expand Up @@ -216,6 +264,42 @@ def rewrite_direct_vm_options(
return rewritten


def fallback_session_id_for_options(
options: FinalRequestOptions,
*,
cache: BrowserRouteCache,
config: BrowserRoutingConfig,
) -> str | None:
"""Return the session id to fall back for, or None if not eligible.

Decides — from the ORIGINAL (pre-rewrite) request options — whether a
control-plane fallback is permitted. All must hold:
1. the request was actually routed to the VM (allowlisted subresource +
a cached route exists for the session);
2. the HTTP method is GET;
3. the routed path is in the fallback-eligible registry.

The caller is still responsible for confirming the VM returned a
browser_gone 404 before acting on the returned session id.
"""
if options.method.upper() != "GET":
return None

match = match_direct_vm_path(options.url)
if match is None:
return None

session_id, subresource, suffix = match
if subresource not in set(config.subresources):
return None
if cache.get(session_id) is None:
return None
if not is_fallback_eligible_routed_path(subresource, suffix):
return None

return session_id


def strip_direct_vm_auth(request: httpx.Request, *, cache: BrowserRouteCache) -> None:
raw = str(request.url)
for route in cache.values():
Expand Down
Loading