From cb84a90e0a4432a0fa8c758e8a20c9ee3b0077fb Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 11:00:02 +0800 Subject: [PATCH 01/19] feat: add run-script and adapter commands to execute JavaScript files in page context --- Cargo.lock | 60 ++++++++++++ Cargo.toml | 1 + src/commands/evaluate.rs | 206 +++++++++++++++++++++++++++++++++++++++ src/commands/executor.rs | 57 +++++++++++ src/lib.rs | 154 +++++++++++++++++++++++++++++ 5 files changed, 478 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 56a1a32..6db65b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -265,6 +265,7 @@ dependencies = [ "tempfile", "tokio", "tokio-tungstenite", + "toml", "toon-format", ] @@ -1767,6 +1768,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "servo_arc" version = "0.4.3" @@ -2118,6 +2128,47 @@ dependencies = [ "tungstenite", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "toon-format" version = "0.5.0" @@ -2684,6 +2735,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index a2e5a52..ec202c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ serde_json = "1" tokio = { version = "1", features = ["full"] } toon-format = "0.5" tokio-tungstenite = "0.24" +toml = "0.8" htmd = "0.5.4" dom_smoothie = "0.18.0" html-escape = "0.2.13" diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index cd719ef..69b0b70 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -91,3 +91,209 @@ pub async fn evaluate( .await?) } } + +/// Run a local JavaScript file inside the page context +pub async fn run_script( + client: &mut CdpClient, + session_id: &str, + file_path: &str, + script_args: &serde_json::Value, + format: OutputFormat, + output: Option<&str>, + track_navigation: bool, +) -> Result { + let script_content = std::fs::read_to_string(file_path) + .map_err(|e| anyhow::anyhow!("Failed to read script file '{}': {}", file_path, e))?; + + let args_str = serde_json::to_string(script_args)?; + + let iife = format!( + r#"(async () => {{ + const ctx = {{ + args: {args_str}, + wait: async (ms) => new Promise(r => setTimeout(r, ms)), + waitForText: async (text, timeout = 30000) => {{ + const start = Date.now(); + while (Date.now() - start < timeout) {{ + if (document.body && document.body.innerText.includes(text)) return; + await new Promise(r => setTimeout(r, 100)); + }} + throw new Error("Timeout waiting for text: " + text); + }}, + waitForSelector: async (selector, timeout = 30000) => {{ + const start = Date.now(); + while (Date.now() - start < timeout) {{ + if (document.querySelector(selector)) return; + await new Promise(r => setTimeout(r, 100)); + }} + throw new Error("Timeout waiting for selector: " + selector); + }}, + click: async (selector) => {{ + const el = document.querySelector(selector); + if (!el) throw new Error("Element not found: " + selector); + el.click(); + }}, + fill: async (selector, value) => {{ + const el = document.querySelector(selector); + if (!el) throw new Error("Element not found: " + selector); + el.value = value; + el.dispatchEvent(new Event('input', {{ bubbles: true }})); + el.dispatchEvent(new Event('change', {{ bubbles: true }})); + }} + }}; + + {script_content} + }})()"# + ); + + evaluate(client, session_id, &iife, format, output, track_navigation).await +} + +/// Extract `@domain` JSDoc comments from a script +fn parse_adapter_domains(content: &str) -> Vec { + let mut domains = Vec::new(); + for line in content.lines() { + if let Some(pos) = line.find("@domain") { + let rest = &line[pos + 7..]; + let domain = rest.trim().split_whitespace().next().unwrap_or(""); + if !domain.is_empty() { + domains.push(domain.to_string()); + } + } + } + domains +} + +/// Check if a URL matches a domain pattern +fn url_matches_domain(url: &str, domain: &str) -> bool { + let url_lower = url.to_lowercase(); + let domain_lower = domain.to_lowercase(); + + let s = url_lower + .strip_prefix("https://") + .or_else(|| url_lower.strip_prefix("http://")) + .unwrap_or(&url_lower); + + let host = s.split('/').next().unwrap_or(s); + let host = host.split(':').next().unwrap_or(host); + + host == domain_lower || host.ends_with(&format!(".{}", domain_lower)) +} + +/// Run a structured custom adapter function inside the page context +pub async fn run_adapter( + client: &mut CdpClient, + session_id: &str, + file_path: &str, + function_name: &str, + script_args: &serde_json::Value, + format: OutputFormat, + output: Option<&str>, + track_navigation: bool, +) -> Result { + let script_content = std::fs::read_to_string(file_path) + .map_err(|e| anyhow::anyhow!("Failed to read adapter file '{}': {}", file_path, e))?; + + // Perform domain protection + let domains = parse_adapter_domains(&script_content); + if !domains.is_empty() { + let current_url = client.current_url(session_id).await?; + let matched = domains.iter().any(|domain| url_matches_domain(¤t_url, domain)); + + if !matched { + let target_domain = &domains[0]; + let target_url = if target_domain.starts_with("http://") || target_domain.starts_with("https://") { + target_domain.clone() + } else { + format!("https://www.{}", target_domain) + }; + eprintln!("[adapter] Current URL '{}' does not match adapter domains {:?}. Auto-navigating to '{}'...", current_url, domains, target_url); + + crate::commands::navigate::navigate( + client, + session_id, + Some(&target_url), + false, + false, + false, + None, + None, + ) + .await?; + } + } + + let args_str = serde_json::to_string(script_args)?; + + let iife = format!( + r#"(async () => {{ + const ctx = {{ + args: {args_str}, + wait: async (ms) => new Promise(r => setTimeout(r, ms)), + waitForText: async (text, timeout = 30000) => {{ + const start = Date.now(); + while (Date.now() - start < timeout) {{ + if (document.body && document.body.innerText.includes(text)) return; + await new Promise(r => setTimeout(r, 100)); + }} + throw new Error("Timeout waiting for text: " + text); + }}, + waitForSelector: async (selector, timeout = 30000) => {{ + const start = Date.now(); + while (Date.now() - start < timeout) {{ + if (document.querySelector(selector)) return; + await new Promise(r => setTimeout(r, 100)); + }} + throw new Error("Timeout waiting for selector: " + selector); + }}, + click: async (selector) => {{ + const el = document.querySelector(selector); + if (!el) throw new Error("Element not found: " + selector); + el.click(); + }}, + fill: async (selector, value) => {{ + const el = document.querySelector(selector); + if (!el) throw new Error("Element not found: " + selector); + el.value = value; + el.dispatchEvent(new Event('input', {{ bubbles: true }})); + el.dispatchEvent(new Event('change', {{ bubbles: true }})); + }} + }}; + + {script_content} + + if (typeof {function_name} !== 'function') {{ + throw new Error("Function '{function_name}' not found in adapter"); + }} + return await {function_name}(ctx); + }})()"# + ); + + evaluate(client, session_id, &iife, format, output, track_navigation).await +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_adapter_domains() { + let content = r#" + // ==UserAdapter== + // @name Xiaohongshu Custom Adapter + // @domain xiaohongshu.com + // @domain creator.xiaohongshu.com + // ==/UserAdapter== + "#; + let domains = parse_adapter_domains(content); + assert_eq!(domains, vec!["xiaohongshu.com", "creator.xiaohongshu.com"]); + } + + #[test] + fn test_url_matches_domain() { + assert!(url_matches_domain("https://www.xiaohongshu.com/explore", "xiaohongshu.com")); + assert!(url_matches_domain("http://creator.xiaohongshu.com", "creator.xiaohongshu.com")); + assert!(url_matches_domain("https://xiaohongshu.com:8080/path", "xiaohongshu.com")); + assert!(!url_matches_domain("https://google.com", "xiaohongshu.com")); + } +} diff --git a/src/commands/executor.rs b/src/commands/executor.rs index f12e1c0..15bea62 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -72,6 +72,8 @@ pub fn known_args(cmd: &str) -> &'static [&'static str] { "console" => &["duration", "type"], "network" => &["duration", "type"], "sw-logs" => &["duration", "extension_id"], + "run-script" => &["file_path", "script_args", "output", "track_navigation"], + "adapter" => &["file_path", "function_name", "script_args", "output", "track_navigation"], "kill-daemon" => &[], _ => &[], } @@ -569,6 +571,61 @@ async fn inner_execute( commands::network::collect_network(client, session_id, duration, types, req.format()) .await } + "run-script" => { + let file_path = args + .get("file_path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("file_path required"))?; + let script_args = args + .get("script_args") + .ok_or_else(|| anyhow!("script_args required"))?; + let output = args.get("output").and_then(|v| v.as_str()); + let track_navigation = args + .get("track_navigation") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + commands::evaluate::run_script( + client, + session_id, + file_path, + script_args, + req.format(), + output, + track_navigation, + ) + .await + } + "adapter" => { + let file_path = args + .get("file_path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("file_path required"))?; + let function_name = args + .get("function_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("function_name required"))?; + let script_args = args + .get("script_args") + .ok_or_else(|| anyhow!("script_args required"))?; + let output = args.get("output").and_then(|v| v.as_str()); + let track_navigation = args + .get("track_navigation") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + commands::evaluate::run_adapter( + client, + session_id, + file_path, + function_name, + script_args, + req.format(), + output, + track_navigation, + ) + .await + } _ => bail!("Unknown command: {cmd}"), } } diff --git a/src/lib.rs b/src/lib.rs index a41c692..c6f0633 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -328,6 +328,40 @@ pub enum Commands { extension_id: Option, }, + /// Run a local JavaScript file in the active page context + #[command(name = "run-script")] + RunScript { + /// Path to the JavaScript file + file_path: String, + /// Optional arguments to pass to the script as key=value pairs (can be repeated) + #[arg(long = "arg", short = 'a')] + script_args: Vec, + /// Write output to a file instead of stdout + #[arg(long, short)] + output: Option, + /// Track URL changes caused by this evaluation + #[arg(long)] + track_navigation: bool, + }, + + /// Run a structured custom site adapter JavaScript function + #[command(name = "adapter")] + Adapter { + /// Path to the JavaScript adapter file + file_path: String, + /// Name of the function in the adapter to run + function_name: String, + /// Optional arguments to pass to the function as key=value pairs (can be repeated) + #[arg(long = "arg", short = 'a')] + script_args: Vec, + /// Write output to a file instead of stdout + #[arg(long, short)] + output: Option, + /// Track URL changes caused by this evaluation + #[arg(long)] + track_navigation: bool, + }, + /// Stop the background daemon process #[command(name = "kill-daemon")] KillDaemon, @@ -373,6 +407,8 @@ impl Cli { Commands::Console { .. } => "console", Commands::Network { .. } => "network", Commands::SwLogs { .. } => "sw-logs", + Commands::RunScript { .. } => "run-script", + Commands::Adapter { .. } => "adapter", Commands::KillDaemon => "kill-daemon", } } @@ -396,6 +432,33 @@ fn absolutize_path(path: &str) -> Result { } } +fn parse_args(args: &[String]) -> serde_json::Value { + let mut map = serde_json::Map::new(); + for arg in args { + if let Some((k, v)) = arg.split_once('=') { + let k = k.trim().to_string(); + let v = v.trim(); + let val = if v.eq_ignore_ascii_case("true") { + serde_json::Value::Bool(true) + } else if v.eq_ignore_ascii_case("false") { + serde_json::Value::Bool(false) + } else if let Ok(n) = v.parse::() { + serde_json::Value::Number(n.into()) + } else if let Ok(f) = v.parse::() { + if let Some(num) = serde_json::Number::from_f64(f) { + serde_json::Value::Number(num) + } else { + serde_json::Value::String(v.to_string()) + } + } else { + serde_json::Value::String(v.to_string()) + }; + map.insert(k, val); + } + } + serde_json::Value::Object(map) +} + fn build_request(cli: &Cli) -> Result { // Resolve relative file paths to absolute so the daemon (which retains its // own startup CWD) resolves them correctly. @@ -541,6 +604,36 @@ fn build_request(cli: &Cli) -> Result { "sw-logs", json!({"duration": duration, "extension_id": extension_id}), ), + Commands::RunScript { + file_path, + script_args, + output, + track_navigation, + } => ( + "run-script", + json!({ + "file_path": absolutize_path(file_path)?, + "script_args": parse_args(script_args), + "output": absolutize(output)?, + "track_navigation": track_navigation + }), + ), + Commands::Adapter { + file_path, + function_name, + script_args, + output, + track_navigation, + } => ( + "adapter", + json!({ + "file_path": absolutize_path(file_path)?, + "function_name": function_name, + "script_args": parse_args(script_args), + "output": absolutize(output)?, + "track_navigation": track_navigation + }), + ), Commands::KillDaemon => unreachable!("KillDaemon is handled before build_request"), Commands::InspectHeapSnapshotNode { .. } => { unreachable!("InspectHeapSnapshotNode is handled before build_request") @@ -1097,6 +1190,42 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { ) .await } + Commands::RunScript { + file_path, + script_args, + output, + track_navigation, + } => { + commands::evaluate::run_script( + &mut client, + &session_id, + file_path, + &parse_args(script_args), + cli.output_format(), + output.as_deref(), + *track_navigation, + ) + .await + } + Commands::Adapter { + file_path, + function_name, + script_args, + output, + track_navigation, + } => { + commands::evaluate::run_adapter( + &mut client, + &session_id, + file_path, + function_name, + &parse_args(script_args), + cli.output_format(), + output.as_deref(), + *track_navigation, + ) + .await + } _ => unreachable!(), }; @@ -1107,3 +1236,28 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { r }) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_args() { + let args = vec![ + "str_val=hello".to_string(), + "int_val=42".to_string(), + "float_val=3.14".to_string(), + "bool_true=true".to_string(), + "bool_false=False".to_string(), + "no_equals".to_string(), + ]; + let parsed = parse_args(&args); + let obj = parsed.as_object().unwrap(); + assert_eq!(obj.get("str_val").unwrap().as_str().unwrap(), "hello"); + assert_eq!(obj.get("int_val").unwrap().as_i64().unwrap(), 42); + assert_eq!(obj.get("float_val").unwrap().as_f64().unwrap(), 3.14); + assert_eq!(obj.get("bool_true").unwrap().as_bool().unwrap(), true); + assert_eq!(obj.get("bool_false").unwrap().as_bool().unwrap(), false); + assert!(obj.get("no_equals").is_none()); + } +} From c913334f8f5f8ba06b5e5b9b663b726afb1fe2e8 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 11:50:00 +0800 Subject: [PATCH 02/19] feat: add run-script and adapter functionality for local JavaScript execution --- skill/chrome-devtools/SKILL.md | 26 +++++- .../examples/deepwiki_adapter.js | 40 +++++++++ .../examples/search_deepwiki.js | 33 +++++++ wiki/adapter.md | 87 +++++++++++++++++++ wiki/run-script.md | 69 +++++++++++++++ 5 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 skill/chrome-devtools/examples/deepwiki_adapter.js create mode 100644 skill/chrome-devtools/examples/search_deepwiki.js create mode 100644 wiki/adapter.md create mode 100644 wiki/run-script.md diff --git a/skill/chrome-devtools/SKILL.md b/skill/chrome-devtools/SKILL.md index c6f9b49..00b4742 100644 --- a/skill/chrome-devtools/SKILL.md +++ b/skill/chrome-devtools/SKILL.md @@ -56,7 +56,7 @@ chrome-devtools --page 0 navigate https://example.com - **Navigation**: `navigate`, `navigate --back`, `navigate --forward`, `navigate --reload` - **Page management**: `list-pages`, `new-page`, `close-page`, `select-page` -- **Extraction**: `screenshot`, `snapshot` (accessibility tree), `evaluate` (JavaScript), `read-page` (page content as markdown) +- **Extraction**: `screenshot`, `snapshot` (accessibility tree), `evaluate` (JavaScript), `read-page` (page content as markdown), `run-script` (run local JS file), `adapter` (run site adapter) - **Interaction**: `click`, `fill`, `type-text`, `press-key`, `hover`, `click-at` - **Emulation**: `emulate` (viewport, mobile, geolocation, URL blocking) - **Inspection**: `console` (logs), `network` (requests), `sw-logs` (extension service workers) @@ -311,6 +311,24 @@ chrome-devtools --target warm-squid read-page --json - `read-page` — you want the page's textual content as readable markdown (articles, docs, wiki pages). Best for summarization, extraction, or feeding content to an LLM. - `snapshot` — you need the full accessibility tree with element IDs, roles, and interactive elements. Best for understanding page structure and finding elements to click/fill. +### Pattern 13: Local JS Scripting (run-script) + +Evaluate a local JavaScript file inside the page context. Dynamic arguments passed via `-a/--arg` are automatically typed and injected into the execution context as `ctx.args`. Standard helper functions are also injected. + +```bash +# Run a script with dynamic arguments +chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_deepwiki.js --arg query="aeroxy/ast-bro" +``` + +### Pattern 14: Custom Domain-Aware Adapters (adapter) + +Run site-specific adapter actions. If the browser is not currently on a matching domain (as defined by `@domain` comments in the JSDoc header), the CLI auto-navigates to that domain first. + +```bash +# Run an adapter function with automatic domain protection and navigation +chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask --arg query="how to write adapter" +``` + ## Complete Command Reference ### Navigation @@ -366,6 +384,12 @@ chrome-devtools --target list-3p-tools chrome-devtools --target execute-3p-tool '' ``` +### Custom Scripting & Adapters +```bash +chrome-devtools --target run-script [--arg key=value] [--output ] [--track-navigation] +chrome-devtools --target adapter [--arg key=value] [--output ] [--track-navigation] +``` + ### Daemon ```bash chrome-devtools kill-daemon # stop the background daemon process diff --git a/skill/chrome-devtools/examples/deepwiki_adapter.js b/skill/chrome-devtools/examples/deepwiki_adapter.js new file mode 100644 index 0000000..a59f623 --- /dev/null +++ b/skill/chrome-devtools/examples/deepwiki_adapter.js @@ -0,0 +1,40 @@ +// ==UserAdapter== +// @name DeepWiki Adapter +// @domain deepwiki.com +// ==/UserAdapter== + +// Run with: chrome-devtools adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask -a query="how to write adapter" + +async function ask(ctx) { + const query = ctx.args.query; + if (!query) throw new Error("query argument is required"); + + // Fill search input and click ask/search + await ctx.fill("input.ask-input, input[placeholder*='Ask']", query); + await ctx.click("button.ask-btn, button[type='submit']"); + + // Wait for AI response to finish streaming/loading + await ctx.waitForSelector(".answer-box, .ai-response", 15000); + await ctx.wait(2000); // Allow text to settle + + const answer = document.querySelector(".answer-box, .ai-response")?.innerText.trim() || ""; + const sources = Array.from(document.querySelectorAll(".sources-list a, .citation-link")).map(el => ({ + title: el.innerText.trim(), + url: el.href + })); + + return { query, answer, sources }; +} + +async function readWiki(ctx) { + const wikiUrl = ctx.args.url; + if (!wikiUrl) throw new Error("url argument is required"); + + window.location.href = wikiUrl; + await ctx.waitForSelector("article, .wiki-content", 10000); + + return { + title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", + content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" + }; +} diff --git a/skill/chrome-devtools/examples/search_deepwiki.js b/skill/chrome-devtools/examples/search_deepwiki.js new file mode 100644 index 0000000..b82fcf7 --- /dev/null +++ b/skill/chrome-devtools/examples/search_deepwiki.js @@ -0,0 +1,33 @@ +// search_deepwiki.js +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_deepwiki.js -a query="aeroxy/ast-bro" + +(async () => { + const query = ctx.args.query; + if (!query) { + throw new Error("Query argument is required. Pass it with '-a query=...'"); + } + + // Navigate to deepwiki if not already there + if (!window.location.href.includes("deepwiki.com")) { + window.location.href = "https://deepwiki.com"; + await ctx.wait(2000); + } + + // Fill in search input and submit + await ctx.fill("input[placeholder*='search']", query); + await ctx.click("button[type='submit']"); + + // Wait for results list to load + await ctx.waitForSelector(".search-results-list, .repo-card", 10000); + + // Extract titles, descriptions, and URLs + const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { + return { + title: el.querySelector(".title, h3")?.innerText.trim() || "", + description: el.querySelector(".description, p")?.innerText.trim() || "", + url: el.querySelector("a")?.href || "" + }; + }); + + return results; +})(); diff --git a/wiki/adapter.md b/wiki/adapter.md new file mode 100644 index 0000000..3d7edae --- /dev/null +++ b/wiki/adapter.md @@ -0,0 +1,87 @@ +# adapter + +Run site-specific custom JavaScript adapter functions with built-in domain protection, auto-navigation, and injected automation helpers. + +## Synopsis + +```bash +chrome-devtools [--target ] adapter [--arg key=value] [--output ] [--track-navigation] +``` + +## Description + +`adapter` reads a local custom JS adapter file, parses the target `@domain` markers, and ensures the browser is on a matching domain before invoking a specific named function exported or defined inside the script. + +### Domain Protection and Auto-Navigation + +By declaring standard `@domain` markers at the top of your adapter file, the CLI checks the current page URL before executing your function. If the active tab is not on the target domain, **it automatically navigates the tab to the first target domain**, waits for it to load, and then runs your adapter. + +```javascript +// ==UserAdapter== +// @name My Custom Adapter +// @domain deepwiki.com +// ==/UserAdapter== +``` + +### Injected Helper Context (`ctx`) + +Like `run-script`, your adapter function is passed a `ctx` context containing helper utilities: + +* `ctx.args`: Object containing typed key-value arguments. +* `ctx.wait(ms)`: Delay utility. +* `ctx.waitForText(text, timeout_ms)`: Text matching polling utility. +* `ctx.waitForSelector(selector, timeout_ms)`: CSS selector matching polling utility. +* `ctx.click(selector)`: DOM clicking helper. +* `ctx.fill(selector, value)`: DOM value input helper. + +## Real-World Example: DeepWiki AI Q&A Adapter + +This adapter has a target domain of `deepwiki.com` and exposes an `ask` Q&A function and a `readWiki` document reader function. + +### Adapter file (`skill/chrome-devtools/examples/deepwiki_adapter.js`) +```javascript +// ==UserAdapter== +// @name DeepWiki Adapter +// @domain deepwiki.com +// ==/UserAdapter== + +async function ask(ctx) { + const query = ctx.args.query; + if (!query) throw new Error("query argument is required"); + + // Fill search input and click ask/search + await ctx.fill("input.ask-input, input[placeholder*='Ask']", query); + await ctx.click("button.ask-btn, button[type='submit']"); + + // Wait for AI response to finish streaming/loading + await ctx.waitForSelector(".answer-box, .ai-response", 15000); + await ctx.wait(2000); // Allow text to settle + + const answer = document.querySelector(".answer-box, .ai-response")?.innerText.trim() || ""; + const sources = Array.from(document.querySelectorAll(".sources-list a, .citation-link")).map(el => ({ + title: el.innerText.trim(), + url: el.href + })); + + return { query, answer, sources }; +} + +async function readWiki(ctx) { + const wikiUrl = ctx.args.url; + if (!wikiUrl) throw new Error("url argument is required"); + + window.location.href = wikiUrl; + await ctx.waitForSelector("article, .wiki-content", 10000); + + return { + title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", + content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" + }; +} +``` + +### CLI Execution +```bash +# Executing 'ask' on deepwiki.com (will auto-navigate there if not already open) +chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask --arg query="how to write adapter" --json +``` diff --git a/wiki/run-script.md b/wiki/run-script.md new file mode 100644 index 0000000..0e3b62d --- /dev/null +++ b/wiki/run-script.md @@ -0,0 +1,69 @@ +# run-script + +Evaluate a local JavaScript file inside the current page context with injected helper utilities and parsed dynamic arguments. + +## Synopsis + +```bash +chrome-devtools [--target ] run-script [--arg key=value] [--output ] [--track-navigation] +``` + +## Description + +`run-script` reads a local JavaScript file off-disk, wraps it inside an Immediately Invoked Function Expression (IIFE), and evaluates it directly inside the target browser's page context. + +Dynamic arguments passed with `-a` / `--arg` are automatically typed (strings, integers, floats, booleans) and made available to the script. + +### Injected Helper Context (`ctx`) + +Before executing your script, `run-script` injects a globally-accessible helper `ctx` object with several standard automation wrappers: + +* `ctx.args`: Object containing key-value arguments parsed from CLI flags. +* `ctx.wait(ms)`: Sleep/delay helper (`await ctx.wait(1000)`). +* `ctx.waitForText(text, timeout_ms)`: Polls the page body text until the string is present (defaults to 30s). +* `ctx.waitForSelector(selector, timeout_ms)`: Polls until an element matching the CSS selector exists in the DOM. +* `ctx.click(selector)`: Clicks an element by CSS selector. +* `ctx.fill(selector, value)`: Fills an input field with the value and fires standard input and change events. + +## Real-World Example: Search DeepWiki + +This script searches `deepwiki.com` for a repository name and extracts the results. + +### Script file (`skill/chrome-devtools/examples/search_deepwiki.js`) +```javascript +(async () => { + const query = ctx.args.query; + if (!query) { + throw new Error("Query argument is required. Pass it with '-a query=...'"); + } + + // Navigate to deepwiki if not already there + if (!window.location.href.includes("deepwiki.com")) { + window.location.href = "https://deepwiki.com"; + await ctx.wait(2000); + } + + // Fill in search input and submit + await ctx.fill("input[placeholder*='search']", query); + await ctx.click("button[type='submit']"); + + // Wait for results list to load + await ctx.waitForSelector(".search-results-list, .repo-card", 10000); + + // Extract titles, descriptions, and URLs + const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { + return { + title: el.querySelector(".title, h3")?.innerText.trim() || "", + description: el.querySelector(".description, p")?.innerText.trim() || "", + url: el.querySelector("a")?.href || "" + }; + }); + + return results; +})(); +``` + +### CLI Execution +```bash +chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_deepwiki.js --arg query="aeroxy/ast-bro" --json +``` From cf0e0ca1c8b4ff9e37ee1869d41638beab2c3bbd Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 13:58:14 +0800 Subject: [PATCH 03/19] fix: stabilize adapter execution, argument parsing, and deepwiki scripts * remove in-adapter navigation from deepwiki examples to avoid tearing down execution context * require pre-navigation and simplify async script structure with direct ctx usage * add `strip_export_keywords` to support ES module-style adapters without syntax errors * fix domain auto-navigation to preserve exact host (avoid forcing `www`) * make `parse_args` return `Result`, validate key=value format, and reject malformed inputs * update callers to handle parsing errors and add tests for new validations --- .../examples/deepwiki_adapter.js | 9 +-- .../examples/search_deepwiki.js | 53 ++++++++------- src/commands/evaluate.rs | 45 ++++++++++++- src/lib.rs | 67 +++++++++++-------- 4 files changed, 114 insertions(+), 60 deletions(-) diff --git a/skill/chrome-devtools/examples/deepwiki_adapter.js b/skill/chrome-devtools/examples/deepwiki_adapter.js index a59f623..d8a9b48 100644 --- a/skill/chrome-devtools/examples/deepwiki_adapter.js +++ b/skill/chrome-devtools/examples/deepwiki_adapter.js @@ -27,12 +27,9 @@ async function ask(ctx) { } async function readWiki(ctx) { - const wikiUrl = ctx.args.url; - if (!wikiUrl) throw new Error("url argument is required"); - - window.location.href = wikiUrl; - await ctx.waitForSelector("article, .wiki-content", 10000); - + // Navigation must happen before running this adapter (e.g. via the `navigate` + // command). Changing window.location here would tear down the Runtime.evaluate + // context mid-execution, so readWiki only scrapes the already-loaded page. return { title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" diff --git a/skill/chrome-devtools/examples/search_deepwiki.js b/skill/chrome-devtools/examples/search_deepwiki.js index b82fcf7..7c91f9f 100644 --- a/skill/chrome-devtools/examples/search_deepwiki.js +++ b/skill/chrome-devtools/examples/search_deepwiki.js @@ -1,33 +1,34 @@ // search_deepwiki.js // Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_deepwiki.js -a query="aeroxy/ast-bro" +// +// run-script injects `ctx` and runs this file inside an async context, so use +// the ctx helpers directly at the top level and `return` the result. Navigating +// would tear down the evaluation context, so this script requires the page to +// already be on deepwiki.com (use the `navigate` command first). -(async () => { - const query = ctx.args.query; - if (!query) { - throw new Error("Query argument is required. Pass it with '-a query=...'"); - } +const query = ctx.args.query; +if (!query) { + throw new Error("Query argument is required. Pass it with '-a query=...'"); +} - // Navigate to deepwiki if not already there - if (!window.location.href.includes("deepwiki.com")) { - window.location.href = "https://deepwiki.com"; - await ctx.wait(2000); - } +if (!window.location.href.includes("deepwiki.com")) { + throw new Error("Not on deepwiki.com — navigate there first: chrome-devtools navigate https://deepwiki.com"); +} - // Fill in search input and submit - await ctx.fill("input[placeholder*='search']", query); - await ctx.click("button[type='submit']"); - - // Wait for results list to load - await ctx.waitForSelector(".search-results-list, .repo-card", 10000); +// Fill in search input and submit +await ctx.fill("input[placeholder*='search']", query); +await ctx.click("button[type='submit']"); - // Extract titles, descriptions, and URLs - const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { - return { - title: el.querySelector(".title, h3")?.innerText.trim() || "", - description: el.querySelector(".description, p")?.innerText.trim() || "", - url: el.querySelector("a")?.href || "" - }; - }); +// Wait for results list to load +await ctx.waitForSelector(".search-results-list, .repo-card", 10000); - return results; -})(); +// Extract titles, descriptions, and URLs +const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { + return { + title: el.querySelector(".title, h3")?.innerText.trim() || "", + description: el.querySelector(".description, p")?.innerText.trim() || "", + url: el.querySelector("a")?.href || "" + }; +}); + +return results; diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index 69b0b70..7164f18 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -180,6 +180,30 @@ fn url_matches_domain(url: &str, domain: &str) -> bool { host == domain_lower || host.ends_with(&format!(".{}", domain_lower)) } +/// Normalize ES-module `export` keywords out of adapter source. +/// +/// Adapters are injected as statements into an async IIFE, where a top-level +/// `export` is a SyntaxError. The supported adapter format is plain function +/// declarations; this strips a leading `export` / `export default` so the common +/// authoring habit parses instead of failing before the function-existence check. +fn strip_export_keywords(content: &str) -> String { + content + .lines() + .map(|line| { + let trimmed = line.trim_start(); + let indent = &line[..line.len() - trimmed.len()]; + if let Some(rest) = trimmed.strip_prefix("export default ") { + format!("{indent}{rest}") + } else if let Some(rest) = trimmed.strip_prefix("export ") { + format!("{indent}{rest}") + } else { + line.to_string() + } + }) + .collect::>() + .join("\n") +} + /// Run a structured custom adapter function inside the page context pub async fn run_adapter( client: &mut CdpClient, @@ -202,10 +226,14 @@ pub async fn run_adapter( if !matched { let target_domain = &domains[0]; + // Preserve the host exactly as declared in `@domain`; only supply a + // scheme when one is missing. Forcing a `www.` subdomain breaks apex + // hosts and adapters that target an existing subdomain + // (e.g. `creator.xiaohongshu.com`). let target_url = if target_domain.starts_with("http://") || target_domain.starts_with("https://") { target_domain.clone() } else { - format!("https://www.{}", target_domain) + format!("https://{}", target_domain) }; eprintln!("[adapter] Current URL '{}' does not match adapter domains {:?}. Auto-navigating to '{}'...", current_url, domains, target_url); @@ -223,6 +251,11 @@ pub async fn run_adapter( } } + // Normalize away `export` so module-style adapter declarations parse when + // injected as statements below. Domain parsing above used the raw source, + // which is unaffected (domains live in comments). + let script_content = strip_export_keywords(&script_content); + let args_str = serde_json::to_string(script_args)?; let iife = format!( @@ -289,6 +322,16 @@ mod tests { assert_eq!(domains, vec!["xiaohongshu.com", "creator.xiaohongshu.com"]); } + #[test] + fn test_strip_export_keywords() { + let src = "export async function ask(ctx) {}\n export function read() {}\nexport const helper = 1;\nexport default function main() {}\nconst x = \"export inside string\";"; + let out = strip_export_keywords(src); + assert_eq!( + out, + "async function ask(ctx) {}\n function read() {}\nconst helper = 1;\nfunction main() {}\nconst x = \"export inside string\";" + ); + } + #[test] fn test_url_matches_domain() { assert!(url_matches_domain("https://www.xiaohongshu.com/explore", "xiaohongshu.com")); diff --git a/src/lib.rs b/src/lib.rs index c6f0633..896a0fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -432,31 +432,37 @@ fn absolutize_path(path: &str) -> Result { } } -fn parse_args(args: &[String]) -> serde_json::Value { +fn parse_args(args: &[String]) -> Result { let mut map = serde_json::Map::new(); for arg in args { - if let Some((k, v)) = arg.split_once('=') { - let k = k.trim().to_string(); - let v = v.trim(); - let val = if v.eq_ignore_ascii_case("true") { - serde_json::Value::Bool(true) - } else if v.eq_ignore_ascii_case("false") { - serde_json::Value::Bool(false) - } else if let Ok(n) = v.parse::() { - serde_json::Value::Number(n.into()) - } else if let Ok(f) = v.parse::() { - if let Some(num) = serde_json::Number::from_f64(f) { - serde_json::Value::Number(num) - } else { - serde_json::Value::String(v.to_string()) - } + // Reject malformed tokens up front so bad `--arg` input surfaces a clear + // error here instead of failing later with a misleading message. + let (k, v) = arg + .split_once('=') + .ok_or_else(|| anyhow::anyhow!("Invalid argument '{arg}': expected key=value"))?; + let k = k.trim(); + if k.is_empty() { + anyhow::bail!("Invalid argument '{arg}': key must not be empty"); + } + let v = v.trim(); + let val = if v.eq_ignore_ascii_case("true") { + serde_json::Value::Bool(true) + } else if v.eq_ignore_ascii_case("false") { + serde_json::Value::Bool(false) + } else if let Ok(n) = v.parse::() { + serde_json::Value::Number(n.into()) + } else if let Ok(f) = v.parse::() { + if let Some(num) = serde_json::Number::from_f64(f) { + serde_json::Value::Number(num) } else { serde_json::Value::String(v.to_string()) - }; - map.insert(k, val); - } + } + } else { + serde_json::Value::String(v.to_string()) + }; + map.insert(k.to_string(), val); } - serde_json::Value::Object(map) + Ok(serde_json::Value::Object(map)) } fn build_request(cli: &Cli) -> Result { @@ -613,7 +619,7 @@ fn build_request(cli: &Cli) -> Result { "run-script", json!({ "file_path": absolutize_path(file_path)?, - "script_args": parse_args(script_args), + "script_args": parse_args(script_args)?, "output": absolutize(output)?, "track_navigation": track_navigation }), @@ -629,7 +635,7 @@ fn build_request(cli: &Cli) -> Result { json!({ "file_path": absolutize_path(file_path)?, "function_name": function_name, - "script_args": parse_args(script_args), + "script_args": parse_args(script_args)?, "output": absolutize(output)?, "track_navigation": track_navigation }), @@ -1200,7 +1206,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { &mut client, &session_id, file_path, - &parse_args(script_args), + &parse_args(script_args)?, cli.output_format(), output.as_deref(), *track_navigation, @@ -1219,7 +1225,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { &session_id, file_path, function_name, - &parse_args(script_args), + &parse_args(script_args)?, cli.output_format(), output.as_deref(), *track_navigation, @@ -1249,15 +1255,22 @@ mod tests { "float_val=3.14".to_string(), "bool_true=true".to_string(), "bool_false=False".to_string(), - "no_equals".to_string(), ]; - let parsed = parse_args(&args); + let parsed = parse_args(&args).unwrap(); let obj = parsed.as_object().unwrap(); assert_eq!(obj.get("str_val").unwrap().as_str().unwrap(), "hello"); assert_eq!(obj.get("int_val").unwrap().as_i64().unwrap(), 42); assert_eq!(obj.get("float_val").unwrap().as_f64().unwrap(), 3.14); assert_eq!(obj.get("bool_true").unwrap().as_bool().unwrap(), true); assert_eq!(obj.get("bool_false").unwrap().as_bool().unwrap(), false); - assert!(obj.get("no_equals").is_none()); + } + + #[test] + fn test_parse_args_rejects_malformed() { + // Missing '=' is now a hard error instead of being silently dropped. + assert!(parse_args(&["no_equals".to_string()]).is_err()); + // Empty keys are rejected. + assert!(parse_args(&["=value".to_string()]).is_err()); + assert!(parse_args(&[" =value".to_string()]).is_err()); } } From 1bcec8192345bbaf22c58ef0f532fa9a4dd6621f Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 14:30:21 +0800 Subject: [PATCH 04/19] chore: centralize polling interval for ctx wait helpers * introduce `POLL_INTERVAL_MS` constant for configurable polling frequency * replace hardcoded 100ms delays in `waitForText` and `waitForSelector` * ensure consistent polling behavior across `run_script` and `run_adapter` --- src/commands/evaluate.rs | 9 +++++---- src/constants.rs | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index 7164f18..b469302 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -2,6 +2,7 @@ use anyhow::Result; use serde_json::json; use crate::cdp::CdpClient; +use crate::constants::POLL_INTERVAL_MS; use crate::format::{format_structured, OutputFormat}; use crate::result::CommandResult; @@ -116,7 +117,7 @@ pub async fn run_script( const start = Date.now(); while (Date.now() - start < timeout) {{ if (document.body && document.body.innerText.includes(text)) return; - await new Promise(r => setTimeout(r, 100)); + await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); }} throw new Error("Timeout waiting for text: " + text); }}, @@ -124,7 +125,7 @@ pub async fn run_script( const start = Date.now(); while (Date.now() - start < timeout) {{ if (document.querySelector(selector)) return; - await new Promise(r => setTimeout(r, 100)); + await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); }} throw new Error("Timeout waiting for selector: " + selector); }}, @@ -267,7 +268,7 @@ pub async fn run_adapter( const start = Date.now(); while (Date.now() - start < timeout) {{ if (document.body && document.body.innerText.includes(text)) return; - await new Promise(r => setTimeout(r, 100)); + await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); }} throw new Error("Timeout waiting for text: " + text); }}, @@ -275,7 +276,7 @@ pub async fn run_adapter( const start = Date.now(); while (Date.now() - start < timeout) {{ if (document.querySelector(selector)) return; - await new Promise(r => setTimeout(r, 100)); + await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); }} throw new Error("Timeout waiting for selector: " + selector); }}, diff --git a/src/constants.rs b/src/constants.rs index 47e7626..f1febcd 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,2 +1,6 @@ /// Default timeout for page navigation (30 seconds) pub const NAVIGATION_TIMEOUT_MS: u64 = 30_000; + +/// Polling interval for the injected `ctx` wait helpers (waitForText / +/// waitForSelector) in run-script and adapter execution. +pub const POLL_INTERVAL_MS: u64 = 100; From 75fb2d6cfe6a5fbc242fda436835c79ea4b1cfe6 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 18:27:03 +0800 Subject: [PATCH 05/19] refactor(evaluate): unify ctx injection, improve domain parsing and URL matching, and update docs/tests * extract shared `build_ctx_object` for consistent ctx helpers across run-script and adapter * harden `@domain` parsing to only accept valid comment metadata and ignore false positives * add hostname normalization and improve domain matching logic (scheme/path/port aware) * simplify run-script execution flow and remove duplicated inline ctx code * expand tests for parsing, normalization, and ctx helper generation * update docs to clarify async execution model and navigation requirements --- src/commands/evaluate.rs | 193 +++++++++++++++++++++++++-------------- wiki/adapter.md | 9 +- wiki/run-script.md | 72 +++++++++------ 3 files changed, 168 insertions(+), 106 deletions(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index b469302..519cc42 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -93,24 +93,15 @@ pub async fn evaluate( } } -/// Run a local JavaScript file inside the page context -pub async fn run_script( - client: &mut CdpClient, - session_id: &str, - file_path: &str, - script_args: &serde_json::Value, - format: OutputFormat, - output: Option<&str>, - track_navigation: bool, -) -> Result { - let script_content = std::fs::read_to_string(file_path) - .map_err(|e| anyhow::anyhow!("Failed to read script file '{}': {}", file_path, e))?; - - let args_str = serde_json::to_string(script_args)?; - - let iife = format!( - r#"(async () => {{ - const ctx = {{ +/// Build the injected `ctx` automation-helper object shared by `run-script` and +/// `adapter`. +/// +/// The returned snippet declares `const ctx = {...}` and is meant to be embedded +/// at the top of an async IIFE, before user code runs. Both call sites reuse it +/// so the helper surface stays in lockstep. +fn build_ctx_object(args_str: &str) -> String { + format!( + r#"const ctx = {{ args: {args_str}, wait: async (ms) => new Promise(r => setTimeout(r, ms)), waitForText: async (text, timeout = 30000) => {{ @@ -141,7 +132,29 @@ pub async fn run_script( el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); }} - }}; + }};"# + ) +} + +/// Run a local JavaScript file inside the page context +pub async fn run_script( + client: &mut CdpClient, + session_id: &str, + file_path: &str, + script_args: &serde_json::Value, + format: OutputFormat, + output: Option<&str>, + track_navigation: bool, +) -> Result { + let script_content = std::fs::read_to_string(file_path) + .map_err(|e| anyhow::anyhow!("Failed to read script file '{}': {}", file_path, e))?; + + let args_str = serde_json::to_string(script_args)?; + let ctx = build_ctx_object(&args_str); + + let iife = format!( + r#"(async () => {{ + {ctx} {script_content} }})()"# @@ -150,35 +163,65 @@ pub async fn run_script( evaluate(client, session_id, &iife, format, output, track_navigation).await } -/// Extract `@domain` JSDoc comments from a script +/// Extract `@domain` JSDoc comments from a script. +/// +/// Only genuine metadata comment lines (`// @domain ...` or the `* @domain ...` +/// JSDoc continuation form) are honored. Matching a bare `@domain` substring +/// would otherwise pick up the marker from string literals or prose elsewhere +/// in the adapter source. fn parse_adapter_domains(content: &str) -> Vec { let mut domains = Vec::new(); for line in content.lines() { - if let Some(pos) = line.find("@domain") { - let rest = &line[pos + 7..]; - let domain = rest.trim().split_whitespace().next().unwrap_or(""); - if !domain.is_empty() { - domains.push(domain.to_string()); - } + // Require the line to be a comment before looking for the marker. + let trimmed = line.trim_start(); + let comment = match trimmed.strip_prefix("//").or_else(|| trimmed.strip_prefix('*')) { + Some(rest) => rest.trim_start(), + None => continue, + }; + + // The marker must lead the comment body and be followed by whitespace so + // tokens like `@domainname` or `foo@domain.com` do not match. + let Some(rest) = comment.strip_prefix("@domain") else { + continue; + }; + if !rest.starts_with(char::is_whitespace) { + continue; + } + + let domain = rest.split_whitespace().next().unwrap_or(""); + if !domain.is_empty() { + domains.push(domain.to_string()); } } domains } -/// Check if a URL matches a domain pattern -fn url_matches_domain(url: &str, domain: &str) -> bool { - let url_lower = url.to_lowercase(); - let domain_lower = domain.to_lowercase(); - - let s = url_lower +/// Strip scheme, path, and port from a raw URL/host string, returning the bare +/// lowercased hostname. +fn normalize_host(raw: &str) -> String { + let lower = raw.trim().to_lowercase(); + let without_scheme = lower .strip_prefix("https://") - .or_else(|| url_lower.strip_prefix("http://")) - .unwrap_or(&url_lower); - - let host = s.split('/').next().unwrap_or(s); + .or_else(|| lower.strip_prefix("http://")) + .unwrap_or(&lower); + let host = without_scheme.split('/').next().unwrap_or(without_scheme); let host = host.split(':').next().unwrap_or(host); - - host == domain_lower || host.ends_with(&format!(".{}", domain_lower)) + host.to_string() +} + +/// Check if a URL matches a domain pattern. +/// +/// Both sides are normalized to a bare hostname first, so an adapter `@domain` +/// written as `https://example.com` or `example.com/path` still matches the +/// page host instead of forcing a spurious auto-navigation. +fn url_matches_domain(url: &str, domain: &str) -> bool { + let host = normalize_host(url); + let domain = normalize_host(domain); + if domain.is_empty() { + return false; + } + + host == domain || host.ends_with(&format!(".{}", domain)) } /// Normalize ES-module `export` keywords out of adapter source. @@ -258,41 +301,11 @@ pub async fn run_adapter( let script_content = strip_export_keywords(&script_content); let args_str = serde_json::to_string(script_args)?; + let ctx = build_ctx_object(&args_str); let iife = format!( r#"(async () => {{ - const ctx = {{ - args: {args_str}, - wait: async (ms) => new Promise(r => setTimeout(r, ms)), - waitForText: async (text, timeout = 30000) => {{ - const start = Date.now(); - while (Date.now() - start < timeout) {{ - if (document.body && document.body.innerText.includes(text)) return; - await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); - }} - throw new Error("Timeout waiting for text: " + text); - }}, - waitForSelector: async (selector, timeout = 30000) => {{ - const start = Date.now(); - while (Date.now() - start < timeout) {{ - if (document.querySelector(selector)) return; - await new Promise(r => setTimeout(r, {POLL_INTERVAL_MS})); - }} - throw new Error("Timeout waiting for selector: " + selector); - }}, - click: async (selector) => {{ - const el = document.querySelector(selector); - if (!el) throw new Error("Element not found: " + selector); - el.click(); - }}, - fill: async (selector, value) => {{ - const el = document.querySelector(selector); - if (!el) throw new Error("Element not found: " + selector); - el.value = value; - el.dispatchEvent(new Event('input', {{ bubbles: true }})); - el.dispatchEvent(new Event('change', {{ bubbles: true }})); - }} - }}; + {ctx} {script_content} @@ -323,6 +336,27 @@ mod tests { assert_eq!(domains, vec!["xiaohongshu.com", "creator.xiaohongshu.com"]); } + #[test] + fn test_parse_adapter_domains_jsdoc_block() { + // The `* @domain` JSDoc continuation form is also honored. + let content = "/**\n * @domain example.com\n */"; + assert_eq!(parse_adapter_domains(content), vec!["example.com"]); + } + + #[test] + fn test_parse_adapter_domains_ignores_non_metadata() { + // Only genuine comment metadata lines count: string literals, prose, and + // tokens like `@domainname` must not be picked up. + let content = r#" + // @domain real.com + const note = "send mail to user@domain.com"; + // contact foo@domain.org for help + // @domainname not-a-real-marker.com + const x = "@domain inside-string.com"; + "#; + assert_eq!(parse_adapter_domains(content), vec!["real.com"]); + } + #[test] fn test_strip_export_keywords() { let src = "export async function ask(ctx) {}\n export function read() {}\nexport const helper = 1;\nexport default function main() {}\nconst x = \"export inside string\";"; @@ -340,4 +374,23 @@ mod tests { assert!(url_matches_domain("https://xiaohongshu.com:8080/path", "xiaohongshu.com")); assert!(!url_matches_domain("https://google.com", "xiaohongshu.com")); } + + #[test] + fn test_url_matches_domain_normalizes_domain() { + // `@domain` written with a scheme and/or path still matches the host. + assert!(url_matches_domain("https://www.example.com/page", "https://example.com")); + assert!(url_matches_domain("https://example.com/explore", "example.com/path")); + assert!(url_matches_domain("https://example.com", "http://example.com:443/")); + assert!(!url_matches_domain("https://example.com", "")); + } + + #[test] + fn test_build_ctx_object_embeds_args_and_helpers() { + let ctx = build_ctx_object(r#"{"query":"hi"}"#); + assert!(ctx.starts_with("const ctx = {")); + assert!(ctx.contains(r#"args: {"query":"hi"}"#)); + for helper in ["wait:", "waitForText:", "waitForSelector:", "click:", "fill:"] { + assert!(ctx.contains(helper), "missing helper: {helper}"); + } + } } diff --git a/wiki/adapter.md b/wiki/adapter.md index 3d7edae..06728ba 100644 --- a/wiki/adapter.md +++ b/wiki/adapter.md @@ -67,12 +67,9 @@ async function ask(ctx) { } async function readWiki(ctx) { - const wikiUrl = ctx.args.url; - if (!wikiUrl) throw new Error("url argument is required"); - - window.location.href = wikiUrl; - await ctx.waitForSelector("article, .wiki-content", 10000); - + // Navigation must happen before running this adapter (e.g. via the `navigate` + // command). Changing window.location here would tear down the Runtime.evaluate + // context mid-execution, so readWiki only scrapes the already-loaded page. return { title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" diff --git a/wiki/run-script.md b/wiki/run-script.md index 0e3b62d..f406f69 100644 --- a/wiki/run-script.md +++ b/wiki/run-script.md @@ -29,41 +29,53 @@ Before executing your script, `run-script` injects a globally-accessible helper This script searches `deepwiki.com` for a repository name and extracts the results. +`run-script` already runs your file inside an async context, so use the `ctx` +helpers at the top level and `return` the result directly — no IIFE wrapper is +needed. Navigating mid-script would tear down the evaluation context, so the page +must already be on `deepwiki.com`; navigate first with +`chrome-devtools navigate https://deepwiki.com`. + ### Script file (`skill/chrome-devtools/examples/search_deepwiki.js`) ```javascript -(async () => { - const query = ctx.args.query; - if (!query) { - throw new Error("Query argument is required. Pass it with '-a query=...'"); - } - - // Navigate to deepwiki if not already there - if (!window.location.href.includes("deepwiki.com")) { - window.location.href = "https://deepwiki.com"; - await ctx.wait(2000); - } - - // Fill in search input and submit - await ctx.fill("input[placeholder*='search']", query); - await ctx.click("button[type='submit']"); - - // Wait for results list to load - await ctx.waitForSelector(".search-results-list, .repo-card", 10000); - - // Extract titles, descriptions, and URLs - const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { - return { - title: el.querySelector(".title, h3")?.innerText.trim() || "", - description: el.querySelector(".description, p")?.innerText.trim() || "", - url: el.querySelector("a")?.href || "" - }; - }); - - return results; -})(); +// search_deepwiki.js +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_deepwiki.js -a query="aeroxy/ast-bro" +// +// run-script injects `ctx` and runs this file inside an async context, so use +// the ctx helpers directly at the top level and `return` the result. Navigating +// would tear down the evaluation context, so this script requires the page to +// already be on deepwiki.com (use the `navigate` command first). + +const query = ctx.args.query; +if (!query) { + throw new Error("Query argument is required. Pass it with '-a query=...'"); +} + +if (!window.location.href.includes("deepwiki.com")) { + throw new Error("Not on deepwiki.com — navigate there first: chrome-devtools navigate https://deepwiki.com"); +} + +// Fill in search input and submit +await ctx.fill("input[placeholder*='search']", query); +await ctx.click("button[type='submit']"); + +// Wait for results list to load +await ctx.waitForSelector(".search-results-list, .repo-card", 10000); + +// Extract titles, descriptions, and URLs +const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { + return { + title: el.querySelector(".title, h3")?.innerText.trim() || "", + description: el.querySelector(".description, p")?.innerText.trim() || "", + url: el.querySelector("a")?.href || "" + }; +}); + +return results; ``` ### CLI Execution ```bash +# Navigate to the target site first, then run the script. +chrome-devtools --target warm-squid navigate https://deepwiki.com chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_deepwiki.js --arg query="aeroxy/ast-bro" --json ``` From 8f1a8da49016f944c7d11cdd7c5a0aeb16be5546 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 18:43:00 +0800 Subject: [PATCH 06/19] feat(evaluate): improve adapter safety, input handling, and execution reliability * handle checkbox/radio inputs correctly in `fill` helper * validate adapter function names as safe JS identifiers to prevent injection * refine export stripping to only affect real declarations * normalize hosts and detect local domains to choose correct navigation scheme * default `script_args` to empty object when omitted * preserve non-canonical numeric args (e.g., leading zeros, `+` prefix) * add tests covering identifier validation, local host detection, and parsing edge cases --- src/commands/evaluate.rs | 127 +++++++++++++++++++++++++++++++++++++-- src/commands/executor.rs | 16 ++--- src/lib.rs | 29 ++++++++- 3 files changed, 158 insertions(+), 14 deletions(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index 519cc42..8524a23 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -128,7 +128,20 @@ fn build_ctx_object(args_str: &str) -> String { fill: async (selector, value) => {{ const el = document.querySelector(selector); if (!el) throw new Error("Element not found: " + selector); - el.value = value; + if (el.type === 'checkbox' || el.type === 'radio') {{ + // Checkboxes/radios toggle via `checked`, not `value`. A + // boolean (or "true"/"false") sets the state directly; any + // other value selects the input whose `value` it matches. + if (value === true || value === false) {{ + el.checked = value; + }} else if (value === 'true' || value === 'false') {{ + el.checked = value === 'true'; + }} else {{ + el.checked = String(value) === el.value; + }} + }} else {{ + el.value = value; + }} el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); }} @@ -209,6 +222,16 @@ fn normalize_host(raw: &str) -> String { host.to_string() } +/// Detect loopback / local-dev hosts that should default to plain HTTP during +/// auto-navigation, since they typically don't serve HTTPS. +fn is_local_host(domain: &str) -> bool { + let host = normalize_host(domain); + host == "localhost" + || host == "127.0.0.1" + || host == "0.0.0.0" + || host.ends_with(".localhost") +} + /// Check if a URL matches a domain pattern. /// /// Both sides are normalized to a bare hostname first, so an adapter `@domain` @@ -224,25 +247,66 @@ fn url_matches_domain(url: &str, domain: &str) -> bool { host == domain || host.ends_with(&format!(".{}", domain)) } +/// True for characters allowed inside a JavaScript identifier (after the first). +fn is_js_ident_char(c: char) -> bool { + c.is_ascii_alphanumeric() || c == '_' || c == '$' +} + +/// Validate that `name` is a plain JavaScript identifier. +/// +/// The adapter function name is interpolated directly into the injected IIFE, so +/// rejecting anything that isn't an identifier prevents both syntax errors and +/// code injection through a crafted `function_name`. +fn is_valid_js_identifier(name: &str) -> bool { + let mut chars = name.chars(); + match chars.next() { + // A leading digit (or any non-identifier-start char) is invalid. + Some(c) if c.is_ascii_alphabetic() || c == '_' || c == '$' => {} + _ => return false, + } + chars.all(is_js_ident_char) +} + /// Normalize ES-module `export` keywords out of adapter source. /// /// Adapters are injected as statements into an async IIFE, where a top-level /// `export` is a SyntaxError. The supported adapter format is plain function /// declarations; this strips a leading `export` / `export default` so the common /// authoring habit parses instead of failing before the function-existence check. +/// +/// The prefix is only stripped when it directly precedes a declaration keyword. +/// This avoids corrupting `export { ... }` re-export blocks or stray `export` +/// text inside multi-line strings/comments. fn strip_export_keywords(content: &str) -> String { + const DECL_KEYWORDS: [&str; 6] = ["function", "async", "class", "const", "let", "var"]; + let declaration_follows = |rest: &str| { + let rest = rest.trim_start(); + DECL_KEYWORDS.iter().any(|kw| match rest.strip_prefix(kw) { + // The keyword must end at a non-identifier boundary so `constant` + // is not mistaken for `const`. + Some(after) => match after.chars().next() { + Some(c) => !is_js_ident_char(c), + None => true, + }, + None => false, + }) + }; + content .lines() .map(|line| { let trimmed = line.trim_start(); let indent = &line[..line.len() - trimmed.len()]; if let Some(rest) = trimmed.strip_prefix("export default ") { - format!("{indent}{rest}") + if declaration_follows(rest) { + return format!("{indent}{rest}"); + } } else if let Some(rest) = trimmed.strip_prefix("export ") { - format!("{indent}{rest}") - } else { - line.to_string() + if declaration_follows(rest) { + return format!("{indent}{rest}"); + } } + line.to_string() }) .collect::>() .join("\n") @@ -259,6 +323,15 @@ pub async fn run_adapter( output: Option<&str>, track_navigation: bool, ) -> Result { + // `function_name` is interpolated straight into the injected IIFE, so reject + // anything that isn't a plain identifier before touching Chrome or the disk. + if !is_valid_js_identifier(function_name) { + anyhow::bail!( + "Invalid adapter function name '{}': must be a valid JavaScript identifier", + function_name + ); + } + let script_content = std::fs::read_to_string(file_path) .map_err(|e| anyhow::anyhow!("Failed to read adapter file '{}': {}", file_path, e))?; @@ -275,7 +348,12 @@ pub async fn run_adapter( // hosts and adapters that target an existing subdomain // (e.g. `creator.xiaohongshu.com`). let target_url = if target_domain.starts_with("http://") || target_domain.starts_with("https://") { + // An explicit scheme always wins, so authors can force http/https + // by writing it in `@domain` (e.g. `@domain http://localhost:3000`). target_domain.clone() + } else if is_local_host(target_domain) { + // Local dev servers generally speak http, not https. + format!("http://{}", target_domain) } else { format!("https://{}", target_domain) }; @@ -367,6 +445,42 @@ mod tests { ); } + #[test] + fn test_strip_export_keywords_preserves_non_declarations() { + // Re-export blocks, `export *`, and prose that merely starts with the + // word must be left untouched (only declarations are stripped). + let src = "export { ask, read };\nexport * from './x';\nexport const ok = 1;\nexport constants = 2;"; + let out = strip_export_keywords(src); + assert_eq!( + out, + "export { ask, read };\nexport * from './x';\nconst ok = 1;\nexport constants = 2;" + ); + } + + #[test] + fn test_is_valid_js_identifier() { + assert!(is_valid_js_identifier("ask")); + assert!(is_valid_js_identifier("_private")); + assert!(is_valid_js_identifier("$dollar")); + assert!(is_valid_js_identifier("readWiki2")); + assert!(!is_valid_js_identifier("")); + assert!(!is_valid_js_identifier("2fast")); + assert!(!is_valid_js_identifier("foo.bar")); + assert!(!is_valid_js_identifier("foo(); evil")); + assert!(!is_valid_js_identifier("foo bar")); + } + + #[test] + fn test_is_local_host() { + assert!(is_local_host("localhost")); + assert!(is_local_host("localhost:3000")); + assert!(is_local_host("127.0.0.1:8080")); + assert!(is_local_host("app.localhost")); + assert!(is_local_host("http://localhost:5173/path")); + assert!(!is_local_host("example.com")); + assert!(!is_local_host("notlocalhost.com")); + } + #[test] fn test_url_matches_domain() { assert!(url_matches_domain("https://www.xiaohongshu.com/explore", "xiaohongshu.com")); @@ -392,5 +506,8 @@ mod tests { for helper in ["wait:", "waitForText:", "waitForSelector:", "click:", "fill:"] { assert!(ctx.contains(helper), "missing helper: {helper}"); } + // fill must special-case checkable inputs instead of setting `value`. + assert!(ctx.contains("el.type === 'checkbox' || el.type === 'radio'")); + assert!(ctx.contains("el.checked =")); } } diff --git a/src/commands/executor.rs b/src/commands/executor.rs index 15bea62..4fdf8c7 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -576,9 +576,9 @@ async fn inner_execute( .get("file_path") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow!("file_path required"))?; - let script_args = args - .get("script_args") - .ok_or_else(|| anyhow!("script_args required"))?; + // script_args is conceptually optional; default to an empty object so + // clients can omit it when a script takes no arguments. + let script_args = args.get("script_args").cloned().unwrap_or_else(|| json!({})); let output = args.get("output").and_then(|v| v.as_str()); let track_navigation = args .get("track_navigation") @@ -589,7 +589,7 @@ async fn inner_execute( client, session_id, file_path, - script_args, + &script_args, req.format(), output, track_navigation, @@ -605,9 +605,9 @@ async fn inner_execute( .get("function_name") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow!("function_name required"))?; - let script_args = args - .get("script_args") - .ok_or_else(|| anyhow!("script_args required"))?; + // script_args is conceptually optional; default to an empty object so + // clients can omit it when an adapter takes no arguments. + let script_args = args.get("script_args").cloned().unwrap_or_else(|| json!({})); let output = args.get("output").and_then(|v| v.as_str()); let track_navigation = args .get("track_navigation") @@ -619,7 +619,7 @@ async fn inner_execute( session_id, file_path, function_name, - script_args, + &script_args, req.format(), output, track_navigation, diff --git a/src/lib.rs b/src/lib.rs index 896a0fa..fa94c65 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -450,7 +450,14 @@ fn parse_args(args: &[String]) -> Result { } else if v.eq_ignore_ascii_case("false") { serde_json::Value::Bool(false) } else if let Ok(n) = v.parse::() { - serde_json::Value::Number(n.into()) + // Keep values like ZIP codes or phone numbers verbatim: if the parsed + // integer doesn't round-trip to the original token (leading zeros, a + // leading '+', etc.), it isn't canonical, so preserve it as a string. + if n.to_string() == v { + serde_json::Value::Number(n.into()) + } else { + serde_json::Value::String(v.to_string()) + } } else if let Ok(f) = v.parse::() { if let Some(num) = serde_json::Number::from_f64(f) { serde_json::Value::Number(num) @@ -1265,6 +1272,26 @@ mod tests { assert_eq!(obj.get("bool_false").unwrap().as_bool().unwrap(), false); } + #[test] + fn test_parse_args_preserves_leading_zeros() { + // ZIP codes, phone numbers, and signed tokens must not be rewritten as + // canonical integers (which would drop leading zeros or the '+'). + let args = vec![ + "zip=01234".to_string(), + "phone=+12025550123".to_string(), + "zero=0".to_string(), + "neg=-5".to_string(), + "plain=42".to_string(), + ]; + let parsed = parse_args(&args).unwrap(); + let obj = parsed.as_object().unwrap(); + assert_eq!(obj.get("zip").unwrap().as_str().unwrap(), "01234"); + assert_eq!(obj.get("phone").unwrap().as_str().unwrap(), "+12025550123"); + assert_eq!(obj.get("zero").unwrap().as_i64().unwrap(), 0); + assert_eq!(obj.get("neg").unwrap().as_i64().unwrap(), -5); + assert_eq!(obj.get("plain").unwrap().as_i64().unwrap(), 42); + } + #[test] fn test_parse_args_rejects_malformed() { // Missing '=' is now a hard error instead of being silently dropped. From 49c403c853741894dd1bd16d191963c68a545010 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 21:23:39 +0800 Subject: [PATCH 07/19] fix(run_adapter): validate post-navigation URL against adapter domains --- src/commands/evaluate.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index 8524a23..e07e903 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -370,6 +370,17 @@ pub async fn run_adapter( None, ) .await?; + + let post_nav_url = client.current_url(session_id).await?; + let post_matched = domains.iter().any(|domain| url_matches_domain(&post_nav_url, domain)); + if !post_matched { + anyhow::bail!( + "Auto-navigation to '{}' resulted in URL '{}' which does not match adapter domains {:?}", + target_url, + post_nav_url, + domains + ); + } } } From a160cac70018f94a0a2bf8759b57e31f0bd709f1 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 21:26:36 +0800 Subject: [PATCH 08/19] fix(evaluate): enhance contenteditable support and improve host normalization --- src/commands/evaluate.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index e07e903..c6ffc55 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -139,6 +139,8 @@ fn build_ctx_object(args_str: &str) -> String { }} else {{ el.checked = String(value) === el.value; }} + }} else if (el.isContentEditable) {{ + el.innerText = value; }} else {{ el.value = value; }} @@ -218,7 +220,17 @@ fn normalize_host(raw: &str) -> String { .or_else(|| lower.strip_prefix("http://")) .unwrap_or(&lower); let host = without_scheme.split('/').next().unwrap_or(without_scheme); - let host = host.split(':').next().unwrap_or(host); + let host = if host.starts_with('[') { + if let Some(idx) = host.rfind(']') { + &host[..=idx] + } else { + host + } + } else if host.matches(':').count() > 1 { + host + } else { + host.split(':').next().unwrap_or(host) + }; host.to_string() } @@ -229,6 +241,8 @@ fn is_local_host(domain: &str) -> bool { host == "localhost" || host == "127.0.0.1" || host == "0.0.0.0" + || host == "[::1]" + || host == "::1" || host.ends_with(".localhost") } @@ -486,6 +500,9 @@ mod tests { assert!(is_local_host("localhost")); assert!(is_local_host("localhost:3000")); assert!(is_local_host("127.0.0.1:8080")); + assert!(is_local_host("[::1]")); + assert!(is_local_host("[::1]:8080")); + assert!(is_local_host("::1")); assert!(is_local_host("app.localhost")); assert!(is_local_host("http://localhost:5173/path")); assert!(!is_local_host("example.com")); @@ -497,6 +514,7 @@ mod tests { assert!(url_matches_domain("https://www.xiaohongshu.com/explore", "xiaohongshu.com")); assert!(url_matches_domain("http://creator.xiaohongshu.com", "creator.xiaohongshu.com")); assert!(url_matches_domain("https://xiaohongshu.com:8080/path", "xiaohongshu.com")); + assert!(url_matches_domain("http://[::1]:3000", "[::1]")); assert!(!url_matches_domain("https://google.com", "xiaohongshu.com")); } @@ -520,5 +538,8 @@ mod tests { // fill must special-case checkable inputs instead of setting `value`. assert!(ctx.contains("el.type === 'checkbox' || el.type === 'radio'")); assert!(ctx.contains("el.checked =")); + // fill must support contenteditable elements. + assert!(ctx.contains("el.isContentEditable")); + assert!(ctx.contains("el.innerText =")); } } From 433bc4c4c6d6f8187b02d74e992a3ad1efc17151 Mon Sep 17 00:00:00 2001 From: Aero Date: Tue, 30 Jun 2026 23:55:30 +0800 Subject: [PATCH 09/19] feat(cli): add custom scripting & adapter support with auto-navigation - Introduced CUSTOM_SCRIPTING.md guide for creating `run-script` and `adapter` files - Added flexible argument parsing (positional + named) with automatic mapping to ctx.args - Implemented comment-based @url/@navigate auto-navigation for scripts - Added domain-aware adapter execution with @domain protection - Enhanced ctx helpers (wait, waitForSelector, click, fill) for SPA compatibility - Updated SKILL.md examples to reference new scripting/adapters - Replaced DeepWiki examples with Hacker News search scripts/adapters - Improved argument parsing to support trailing raw args (`query`, `_0`, `_1`, etc.) - Added URL encoding for safe interpolation in auto-navigation - Refined evaluate.rs to remove intrusive hints and improve DOM input handling --- README.md | 1 - skill/chrome-devtools/CUSTOM_SCRIPTING.md | 134 ++++++++++++++++++ skill/chrome-devtools/SKILL.md | 14 +- .../examples/deepwiki_adapter.js | 37 ----- skill/chrome-devtools/examples/hn_adapter.js | 32 +++++ .../examples/search_deepwiki.js | 34 ----- skill/chrome-devtools/examples/search_hn.js | 28 ++++ src/commands/evaluate.rs | 96 +++++++++++-- src/commands/executor.rs | 4 +- src/lib.rs | 125 +++++++++++----- wiki/adapter.md | 80 ++++++----- wiki/run-script.md | 85 +++++++---- 12 files changed, 482 insertions(+), 188 deletions(-) create mode 100644 skill/chrome-devtools/CUSTOM_SCRIPTING.md delete mode 100644 skill/chrome-devtools/examples/deepwiki_adapter.js create mode 100644 skill/chrome-devtools/examples/hn_adapter.js delete mode 100644 skill/chrome-devtools/examples/search_deepwiki.js create mode 100644 skill/chrome-devtools/examples/search_hn.js diff --git a/README.md b/README.md index 72ab50a..ec7ea2d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ High-performance rust CLI that connects to an existing Chrome browser via the De [![crates.io](https://img.shields.io/crates/v/chrome-devtools-cli.svg)](https://crates.io/crates/chrome-devtools-cli) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE) -[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aeroxy/chrome-devtools-cli) ## Installation diff --git a/skill/chrome-devtools/CUSTOM_SCRIPTING.md b/skill/chrome-devtools/CUSTOM_SCRIPTING.md new file mode 100644 index 0000000..220c0b2 --- /dev/null +++ b/skill/chrome-devtools/CUSTOM_SCRIPTING.md @@ -0,0 +1,134 @@ +# Custom Scripting & Adapters Guide + +This guide details how to create and execute custom JavaScript scripts (`run-script`) and custom domain-aware adapters (`adapter`) using the Chrome DevTools CLI. + +--- + +## 1. Custom Scripts (`run-script`) + +`run-script` reads a local JavaScript file, wraps it inside an Immediately Invoked Function Expression (IIFE), and evaluates it directly inside the target browser's page context. + +### Flexible Argument Syntax +Dynamic arguments passed to the script can be specified in several styles and are automatically parsed and made available inside `ctx.args`: + +1. **Pure Positional Style (Recommended for single queries):** + Simply append raw positional strings at the end of the command. A single trailing positional argument is automatically mapped to `ctx.args.query` (as well as `ctx.args._0`): + ```bash + chrome-devtools run-script search_hn.js "Rust" + ``` +2. **Hybrid Style (Positional + Named):** + ```bash + chrome-devtools run-script search_hn.js "Rust" limit=10 safeSearch=true + ``` +3. **Pure Named Style:** + ```bash + chrome-devtools run-script search_hn.js query="Rust" limit=10 + ``` + +### Comment-based Auto-Navigation +By declaring a standard `// @url ` or `// @navigate ` comment marker at the top of your script file, the CLI will check the active tab's current URL before executing your script. + +If the active tab is not currently on a domain matching the target URL, **the CLI will automatically navigate the tab to the target URL first**, wait for the page to load, and then execute your script. You can use `{arg_name}` placeholders inside the `@url` template to interpolate CLI arguments dynamically: +```javascript +// @url https://hn.algolia.com/?query={query} +``` + +--- + +## 2. Custom Domain-Aware Adapters (`adapter`) + +`adapter` reads a local custom JS adapter file, parses the target `@domain` JSDoc markers, and ensures the browser is on a matching domain before invoking a specific named function inside the script. + +### Domain Protection and Auto-Navigation +By declaring standard `@domain` markers at the top of your adapter file, the CLI checks the active page URL before executing your function. If the active tab is not on the target domain, **it automatically navigates the tab to the first target domain**, waits for it to load, and then runs your adapter. + +```javascript +// ==UserAdapter== +// @name Hacker News Search Adapter +// @domain hn.algolia.com +// ==/UserAdapter== +``` + +--- + +## 3. Injected Helper Context (`ctx`) + +Both `run-script` and `adapter` functions are passed an injected `ctx` context containing standard helper utilities: + +* `ctx.args`: Object containing typed key-value arguments. +* `ctx.wait(ms)`: Sleep/delay utility (`await ctx.wait(1000)`). +* `ctx.waitForText(text, timeout_ms)`: Polls the page body text until the string is present (defaults to 30s). +* `ctx.waitForSelector(selector, timeout_ms)`: Polls until an element matching the CSS selector exists in the DOM. +* `ctx.click(selector)`: DOM clicking helper. +* `ctx.fill(selector, value)`: DOM value input helper. Highly compatible with stateful frameworks (like React, Vue, and Angular) as it overrides standard value setters and fires appropriate events. + +--- + +## 4. Real-World SPA Example (Hacker News Search) + +These real-world examples work on `hn.algolia.com`. + +### Script file (`skill/chrome-devtools/examples/search_hn.js`) +```javascript +// @url https://hn.algolia.com/?query={query} + +// search_hn.js +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_hn.js "Rust" + +const query = ctx.args.query; +if (!query) { + throw new Error("Query argument is required."); +} + +// Wait for results to update/load +await ctx.waitForSelector("article.Story", 10000); + +// Extract results +const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); + return { + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" + }; +}); + +return results; +``` + +### Adapter file (`skill/chrome-devtools/examples/hn_adapter.js`) +```javascript +// ==UserAdapter== +// @name Hacker News Search Adapter +// @domain hn.algolia.com +// ==/UserAdapter== + +// Run with: chrome-devtools adapter skill/chrome-devtools/examples/hn_adapter.js search "Rust" + +async function search(ctx) { + const query = ctx.args.query; + if (!query) throw new Error("query argument is required"); + + // Fill search input (the SPA will fetch and render results dynamically) + await ctx.fill("input.SearchInput", query); + + // Wait a brief moment for React and the network request to resolve and update the DOM + await ctx.wait(1500); + + // Wait for results to update/load + await ctx.waitForSelector("article.Story", 10000); + + const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); + return { + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" + }; + }); + + return results; +} +``` diff --git a/skill/chrome-devtools/SKILL.md b/skill/chrome-devtools/SKILL.md index 00b4742..7bd8d1f 100644 --- a/skill/chrome-devtools/SKILL.md +++ b/skill/chrome-devtools/SKILL.md @@ -313,20 +313,24 @@ chrome-devtools --target warm-squid read-page --json ### Pattern 13: Local JS Scripting (run-script) -Evaluate a local JavaScript file inside the page context. Dynamic arguments passed via `-a/--arg` are automatically typed and injected into the execution context as `ctx.args`. Standard helper functions are also injected. +Evaluate a local JavaScript file inside the page context. Dynamic arguments can be passed as raw positional values at the end of the command or via `-a/--arg` keys, and are automatically typed and injected into the execution context as `ctx.args`. Supports comment-based `@url` auto-navigation. + +See the dedicated [Custom Scripting Guide](./CUSTOM_SCRIPTING.md) for full documentation on script creation, argument parsing, and auto-navigation. ```bash -# Run a script with dynamic arguments -chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_deepwiki.js --arg query="aeroxy/ast-bro" +# Run a script with trailing positional arguments (auto-navigates if @url is present) +chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_hn.js "Rust" ``` ### Pattern 14: Custom Domain-Aware Adapters (adapter) Run site-specific adapter actions. If the browser is not currently on a matching domain (as defined by `@domain` comments in the JSDoc header), the CLI auto-navigates to that domain first. +See the dedicated [Custom Scripting Guide](./CUSTOM_SCRIPTING.md) for full documentation on custom adapters, domain protection, and argument parsing. + ```bash -# Run an adapter function with automatic domain protection and navigation -chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask --arg query="how to write adapter" +# Run an adapter function with positional args (auto-navigates if target domain is mismatch) +chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/hn_adapter.js search "Rust" ``` ## Complete Command Reference diff --git a/skill/chrome-devtools/examples/deepwiki_adapter.js b/skill/chrome-devtools/examples/deepwiki_adapter.js deleted file mode 100644 index d8a9b48..0000000 --- a/skill/chrome-devtools/examples/deepwiki_adapter.js +++ /dev/null @@ -1,37 +0,0 @@ -// ==UserAdapter== -// @name DeepWiki Adapter -// @domain deepwiki.com -// ==/UserAdapter== - -// Run with: chrome-devtools adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask -a query="how to write adapter" - -async function ask(ctx) { - const query = ctx.args.query; - if (!query) throw new Error("query argument is required"); - - // Fill search input and click ask/search - await ctx.fill("input.ask-input, input[placeholder*='Ask']", query); - await ctx.click("button.ask-btn, button[type='submit']"); - - // Wait for AI response to finish streaming/loading - await ctx.waitForSelector(".answer-box, .ai-response", 15000); - await ctx.wait(2000); // Allow text to settle - - const answer = document.querySelector(".answer-box, .ai-response")?.innerText.trim() || ""; - const sources = Array.from(document.querySelectorAll(".sources-list a, .citation-link")).map(el => ({ - title: el.innerText.trim(), - url: el.href - })); - - return { query, answer, sources }; -} - -async function readWiki(ctx) { - // Navigation must happen before running this adapter (e.g. via the `navigate` - // command). Changing window.location here would tear down the Runtime.evaluate - // context mid-execution, so readWiki only scrapes the already-loaded page. - return { - title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", - content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" - }; -} diff --git a/skill/chrome-devtools/examples/hn_adapter.js b/skill/chrome-devtools/examples/hn_adapter.js new file mode 100644 index 0000000..8c05a62 --- /dev/null +++ b/skill/chrome-devtools/examples/hn_adapter.js @@ -0,0 +1,32 @@ +// ==UserAdapter== +// @name Hacker News Search Adapter +// @domain hn.algolia.com +// ==/UserAdapter== + +// Run with: chrome-devtools adapter skill/chrome-devtools/examples/hn_adapter.js search -a query="Rust" + +async function search(ctx) { + const query = ctx.args.query; + if (!query) throw new Error("query argument is required"); + + // Fill search input (the SPA will fetch and render results dynamically) + await ctx.fill("input.SearchInput", query); + + // Wait a brief moment for React and the network request to resolve and update the DOM + await ctx.wait(1500); + + // Wait for results to update/load + await ctx.waitForSelector("article.Story", 10000); + + const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); + return { + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" + }; + }); + + return results; +} diff --git a/skill/chrome-devtools/examples/search_deepwiki.js b/skill/chrome-devtools/examples/search_deepwiki.js deleted file mode 100644 index 7c91f9f..0000000 --- a/skill/chrome-devtools/examples/search_deepwiki.js +++ /dev/null @@ -1,34 +0,0 @@ -// search_deepwiki.js -// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_deepwiki.js -a query="aeroxy/ast-bro" -// -// run-script injects `ctx` and runs this file inside an async context, so use -// the ctx helpers directly at the top level and `return` the result. Navigating -// would tear down the evaluation context, so this script requires the page to -// already be on deepwiki.com (use the `navigate` command first). - -const query = ctx.args.query; -if (!query) { - throw new Error("Query argument is required. Pass it with '-a query=...'"); -} - -if (!window.location.href.includes("deepwiki.com")) { - throw new Error("Not on deepwiki.com — navigate there first: chrome-devtools navigate https://deepwiki.com"); -} - -// Fill in search input and submit -await ctx.fill("input[placeholder*='search']", query); -await ctx.click("button[type='submit']"); - -// Wait for results list to load -await ctx.waitForSelector(".search-results-list, .repo-card", 10000); - -// Extract titles, descriptions, and URLs -const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { - return { - title: el.querySelector(".title, h3")?.innerText.trim() || "", - description: el.querySelector(".description, p")?.innerText.trim() || "", - url: el.querySelector("a")?.href || "" - }; -}); - -return results; diff --git a/skill/chrome-devtools/examples/search_hn.js b/skill/chrome-devtools/examples/search_hn.js new file mode 100644 index 0000000..7cb11fc --- /dev/null +++ b/skill/chrome-devtools/examples/search_hn.js @@ -0,0 +1,28 @@ +// @url https://hn.algolia.com/?query={query} + +// search_hn.js +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_hn.js -a query="Rust" +// +// run-script injects `ctx` and runs this file inside an async context. +// Setting `@url` above tells the CLI to automatically navigate to the pre-rendered query URL first! + +const query = ctx.args.query; +if (!query) { + throw new Error("Query argument is required. Pass it with '-a query=...'"); +} + +// Wait for results to update/load +await ctx.waitForSelector("article.Story", 10000); + +// Extract results +const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); + return { + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" + }; +}); + +return results; diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index c6ffc55..bf96655 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -44,16 +44,14 @@ pub async fn evaluate( let desc = exception["exception"]["description"] .as_str() .unwrap_or(text); - anyhow::bail!( - "{desc}\n\n[HINT: To explore the page DOM, use the `snapshot` command instead of `evaluate`. To interact with elements, use `click` or `fill`.]" - ); + anyhow::bail!("{desc}"); } let value = &result["result"]; let val_type = value["type"].as_str().unwrap_or("undefined"); let output_hint = if format.is_text() { - let mut text = match val_type { + let text = match val_type { "undefined" => "undefined".to_string(), "string" => value["value"].as_str().unwrap_or("").to_string(), _ => { @@ -65,13 +63,6 @@ pub async fn evaluate( } }; - if expression.contains("querySelector") - || expression.contains("document.body") - || expression.contains("getElementById") - || expression.contains("getElementsBy") - { - text.push_str("\n\n[HINT: Avoid using `evaluate` for DOM traversal. Use the `snapshot` command to get a clean accessibility tree of the page, then use `click` or `fill`.]"); - } text } else { let v = value.get("value").unwrap_or(value); @@ -142,7 +133,13 @@ fn build_ctx_object(args_str: &str) -> String { }} else if (el.isContentEditable) {{ el.innerText = value; }} else {{ - el.value = value; + const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value')?.set + || Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value')?.set; + if (setter) {{ + setter.call(el, value); + }} else {{ + el.value = value; + }} }} el.dispatchEvent(new Event('input', {{ bubbles: true }})); el.dispatchEvent(new Event('change', {{ bubbles: true }})); @@ -151,6 +148,24 @@ fn build_ctx_object(args_str: &str) -> String { ) } +fn url_encode(input: &str) -> String { + let mut encoded = String::new(); + for b in input.bytes() { + match b { + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + encoded.push(b as char); + } + b' ' => { + encoded.push('+'); + } + _ => { + encoded.push_str(&format!("%{:02X}", b)); + } + } + } + encoded +} + /// Run a local JavaScript file inside the page context pub async fn run_script( client: &mut CdpClient, @@ -164,6 +179,63 @@ pub async fn run_script( let script_content = std::fs::read_to_string(file_path) .map_err(|e| anyhow::anyhow!("Failed to read script file '{}': {}", file_path, e))?; + // Perform auto-navigation if @url or @navigate comments exist at the top of the file + let mut target_url = None; + for line in script_content.lines() { + let trimmed = line.trim_start(); + if let Some(comment) = trimmed.strip_prefix("//").or_else(|| trimmed.strip_prefix('*')) { + let comment = comment.trim_start(); + if let Some(rest) = comment.strip_prefix("@url") { + target_url = Some(rest.trim().to_string()); + break; + } else if let Some(rest) = comment.strip_prefix("@navigate") { + target_url = Some(rest.trim().to_string()); + break; + } + } + } + + if let Some(ref url) = target_url { + // Interpolate {arg_name} placeholders from script_args + let mut interpolated_url = url.clone(); + if let Some(obj) = script_args.as_object() { + for (key, val) in obj { + let placeholder = format!("{{{}}}", key); + let val_str = match val { + serde_json::Value::String(s) => s.clone(), + other => other.to_string(), + }; + let encoded_val = url_encode(&val_str); + interpolated_url = interpolated_url.replace(&placeholder, &encoded_val); + } + } + + let current_url = client.current_url(session_id).await?; + if !url_matches_domain(¤t_url, &interpolated_url) { + eprintln!("[script] Current URL '{}' does not match target URL '{}'. Auto-navigating...", current_url, interpolated_url); + + let nav_url = if interpolated_url.starts_with("http://") || interpolated_url.starts_with("https://") { + interpolated_url.clone() + } else if is_local_host(&interpolated_url) { + format!("http://{}", interpolated_url) + } else { + format!("https://{}", interpolated_url) + }; + + crate::commands::navigate::navigate( + client, + session_id, + Some(&nav_url), + false, + false, + false, + None, + None, + ) + .await?; + } + } + let args_str = serde_json::to_string(script_args)?; let ctx = build_ctx_object(&args_str); diff --git a/src/commands/executor.rs b/src/commands/executor.rs index 4fdf8c7..fcb4e51 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -72,8 +72,8 @@ pub fn known_args(cmd: &str) -> &'static [&'static str] { "console" => &["duration", "type"], "network" => &["duration", "type"], "sw-logs" => &["duration", "extension_id"], - "run-script" => &["file_path", "script_args", "output", "track_navigation"], - "adapter" => &["file_path", "function_name", "script_args", "output", "track_navigation"], + "run-script" => &["file_path", "script_args", "raw_args", "output", "track_navigation"], + "adapter" => &["file_path", "function_name", "script_args", "raw_args", "output", "track_navigation"], "kill-daemon" => &[], _ => &[], } diff --git a/src/lib.rs b/src/lib.rs index fa94c65..6682ad3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -336,6 +336,9 @@ pub enum Commands { /// Optional arguments to pass to the script as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, + /// Extra trailing raw/positional arguments + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + raw_args: Vec, /// Write output to a file instead of stdout #[arg(long, short)] output: Option, @@ -354,6 +357,9 @@ pub enum Commands { /// Optional arguments to pass to the function as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, + /// Extra trailing raw/positional arguments + #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + raw_args: Vec, /// Write output to a file instead of stdout #[arg(long, short)] output: Option, @@ -432,11 +438,33 @@ fn absolutize_path(path: &str) -> Result { } } -fn parse_args(args: &[String]) -> Result { +fn parse_json_value(v: &str) -> serde_json::Value { + if v.eq_ignore_ascii_case("true") { + serde_json::Value::Bool(true) + } else if v.eq_ignore_ascii_case("false") { + serde_json::Value::Bool(false) + } else if let Ok(n) = v.parse::() { + if n.to_string() == v { + serde_json::Value::Number(n.into()) + } else { + serde_json::Value::String(v.to_string()) + } + } else if let Ok(f) = v.parse::() { + if let Some(num) = serde_json::Number::from_f64(f) { + serde_json::Value::Number(num) + } else { + serde_json::Value::String(v.to_string()) + } + } else { + serde_json::Value::String(v.to_string()) + } +} + +fn parse_args(named_args: &[String], raw_args: &[String]) -> Result { let mut map = serde_json::Map::new(); - for arg in args { - // Reject malformed tokens up front so bad `--arg` input surfaces a clear - // error here instead of failing later with a misleading message. + + // Parse named_args (from -a/--arg) first + for arg in named_args { let (k, v) = arg .split_once('=') .ok_or_else(|| anyhow::anyhow!("Invalid argument '{arg}': expected key=value"))?; @@ -445,30 +473,34 @@ fn parse_args(args: &[String]) -> Result { anyhow::bail!("Invalid argument '{arg}': key must not be empty"); } let v = v.trim(); - let val = if v.eq_ignore_ascii_case("true") { - serde_json::Value::Bool(true) - } else if v.eq_ignore_ascii_case("false") { - serde_json::Value::Bool(false) - } else if let Ok(n) = v.parse::() { - // Keep values like ZIP codes or phone numbers verbatim: if the parsed - // integer doesn't round-trip to the original token (leading zeros, a - // leading '+', etc.), it isn't canonical, so preserve it as a string. - if n.to_string() == v { - serde_json::Value::Number(n.into()) - } else { - serde_json::Value::String(v.to_string()) - } - } else if let Ok(f) = v.parse::() { - if let Some(num) = serde_json::Number::from_f64(f) { - serde_json::Value::Number(num) - } else { - serde_json::Value::String(v.to_string()) + map.insert(k.to_string(), parse_json_value(v)); + } + + // Parse raw_args (trailing positional arguments) + let mut positional_count = 0; + for arg in raw_args { + if let Some((k, v)) = arg.split_once('=') { + let k = k.trim(); + if k.is_empty() { + anyhow::bail!("Invalid argument '{arg}': key must not be empty"); } + let v = v.trim(); + map.insert(k.to_string(), parse_json_value(v)); } else { - serde_json::Value::String(v.to_string()) - }; - map.insert(k.to_string(), val); + // Positional argument without `=` + let val = parse_json_value(arg.trim()); + + // Populated as `_0`, `_1`, etc. + map.insert(format!("_{}", positional_count), val.clone()); + + // If it's the first positional argument, also map it to `"query"` + if positional_count == 0 { + map.insert("query".to_string(), val); + } + positional_count += 1; + } } + Ok(serde_json::Value::Object(map)) } @@ -620,13 +652,14 @@ fn build_request(cli: &Cli) -> Result { Commands::RunScript { file_path, script_args, + raw_args, output, track_navigation, } => ( "run-script", json!({ "file_path": absolutize_path(file_path)?, - "script_args": parse_args(script_args)?, + "script_args": parse_args(script_args, raw_args)?, "output": absolutize(output)?, "track_navigation": track_navigation }), @@ -635,6 +668,7 @@ fn build_request(cli: &Cli) -> Result { file_path, function_name, script_args, + raw_args, output, track_navigation, } => ( @@ -642,7 +676,7 @@ fn build_request(cli: &Cli) -> Result { json!({ "file_path": absolutize_path(file_path)?, "function_name": function_name, - "script_args": parse_args(script_args)?, + "script_args": parse_args(script_args, raw_args)?, "output": absolutize(output)?, "track_navigation": track_navigation }), @@ -1206,6 +1240,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { Commands::RunScript { file_path, script_args, + raw_args, output, track_navigation, } => { @@ -1213,7 +1248,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { &mut client, &session_id, file_path, - &parse_args(script_args)?, + &parse_args(script_args, raw_args)?, cli.output_format(), output.as_deref(), *track_navigation, @@ -1224,6 +1259,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { file_path, function_name, script_args, + raw_args, output, track_navigation, } => { @@ -1232,7 +1268,7 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { &session_id, file_path, function_name, - &parse_args(script_args)?, + &parse_args(script_args, raw_args)?, cli.output_format(), output.as_deref(), *track_navigation, @@ -1263,7 +1299,7 @@ mod tests { "bool_true=true".to_string(), "bool_false=False".to_string(), ]; - let parsed = parse_args(&args).unwrap(); + let parsed = parse_args(&args, &[]).unwrap(); let obj = parsed.as_object().unwrap(); assert_eq!(obj.get("str_val").unwrap().as_str().unwrap(), "hello"); assert_eq!(obj.get("int_val").unwrap().as_i64().unwrap(), 42); @@ -1283,7 +1319,7 @@ mod tests { "neg=-5".to_string(), "plain=42".to_string(), ]; - let parsed = parse_args(&args).unwrap(); + let parsed = parse_args(&args, &[]).unwrap(); let obj = parsed.as_object().unwrap(); assert_eq!(obj.get("zip").unwrap().as_str().unwrap(), "01234"); assert_eq!(obj.get("phone").unwrap().as_str().unwrap(), "+12025550123"); @@ -1295,9 +1331,30 @@ mod tests { #[test] fn test_parse_args_rejects_malformed() { // Missing '=' is now a hard error instead of being silently dropped. - assert!(parse_args(&["no_equals".to_string()]).is_err()); + assert!(parse_args(&["no_equals".to_string()], &[]).is_err()); // Empty keys are rejected. - assert!(parse_args(&["=value".to_string()]).is_err()); - assert!(parse_args(&[" =value".to_string()]).is_err()); + assert!(parse_args(&["=value".to_string()], &[]).is_err()); + assert!(parse_args(&[" =value".to_string()], &[]).is_err()); + } + + #[test] + fn test_parse_args_with_raw_args() { + // Standard trailing argument gets mapped to "query" and "_0" + let parsed = parse_args(&[], &["what is a witch".to_string()]).unwrap(); + let obj = parsed.as_object().unwrap(); + assert_eq!(obj.get("query").unwrap().as_str().unwrap(), "what is a witch"); + assert_eq!(obj.get("_0").unwrap().as_str().unwrap(), "what is a witch"); + + // Mixture of key=value trailing args and raw positional args + let parsed = parse_args( + &["user=admin".to_string()], + &["what is a witch".to_string(), "limit=10".to_string(), "second_arg".to_string()], + ).unwrap(); + let obj = parsed.as_object().unwrap(); + assert_eq!(obj.get("user").unwrap().as_str().unwrap(), "admin"); + assert_eq!(obj.get("query").unwrap().as_str().unwrap(), "what is a witch"); + assert_eq!(obj.get("_0").unwrap().as_str().unwrap(), "what is a witch"); + assert_eq!(obj.get("limit").unwrap().as_i64().unwrap(), 10); + assert_eq!(obj.get("_1").unwrap().as_str().unwrap(), "second_arg"); } } diff --git a/wiki/adapter.md b/wiki/adapter.md index 06728ba..eafb2f6 100644 --- a/wiki/adapter.md +++ b/wiki/adapter.md @@ -5,13 +5,35 @@ Run site-specific custom JavaScript adapter functions with built-in domain prote ## Synopsis ```bash -chrome-devtools [--target ] adapter [--arg key=value] [--output ] [--track-navigation] +chrome-devtools [--target ] adapter [--arg key=value] [raw_args...] [--output ] [--track-navigation] ``` ## Description `adapter` reads a local custom JS adapter file, parses the target `@domain` markers, and ensures the browser is on a matching domain before invoking a specific named function exported or defined inside the script. +### Flexible Argument Syntax + +Dynamic arguments passed to the adapter function can be specified in several clean and intuitive styles: + +1. **Pure Positional Style (Recommended for single queries):** + Simply append raw positional strings at the end of the command. If a single argument is passed, it is automatically mapped to `ctx.args.query` (as well as `ctx.args._0`): + ```bash + chrome-devtools adapter hn_adapter.js search "Rust" + ``` +2. **Hybrid Style (Positional + Named):** + For functions with multiple parameters, you can pass the main parameter positionally, and other options as explicit `key=value` pairs: + ```bash + chrome-devtools adapter hn_adapter.js search "Rust" limit=10 safeSearch=true + ``` +3. **Pure Named Style:** + Specify named options as explicit key-value pairs at the end of the command or via the `-a/--arg` flag: + ```bash + chrome-devtools adapter hn_adapter.js search query="Rust" limit=10 + ``` + +All values are automatically parsed into their appropriate JavaScript types (e.g. `10` to number, `true` to boolean, etc.) and made available inside `ctx.args`. + ### Domain Protection and Auto-Navigation By declaring standard `@domain` markers at the top of your adapter file, the CLI checks the current page URL before executing your function. If the active tab is not on the target domain, **it automatically navigates the tab to the first target domain**, waits for it to load, and then runs your adapter. @@ -19,7 +41,7 @@ By declaring standard `@domain` markers at the top of your adapter file, the CLI ```javascript // ==UserAdapter== // @name My Custom Adapter -// @domain deepwiki.com +// @domain wikipedia.org // ==/UserAdapter== ``` @@ -34,51 +56,45 @@ Like `run-script`, your adapter function is passed a `ctx` context containing he * `ctx.click(selector)`: DOM clicking helper. * `ctx.fill(selector, value)`: DOM value input helper. -## Real-World Example: DeepWiki AI Q&A Adapter +## Real-World Example: Hacker News Search Adapter -This adapter has a target domain of `deepwiki.com` and exposes an `ask` Q&A function and a `readWiki` document reader function. +This adapter has a target domain of `hn.algolia.com` and exposes a `search` function. -### Adapter file (`skill/chrome-devtools/examples/deepwiki_adapter.js`) +### Adapter file (`skill/chrome-devtools/examples/hn_adapter.js`) ```javascript // ==UserAdapter== -// @name DeepWiki Adapter -// @domain deepwiki.com +// @name Hacker News Search Adapter +// @domain hn.algolia.com // ==/UserAdapter== -async function ask(ctx) { +// Run with: chrome-devtools adapter skill/chrome-devtools/examples/hn_adapter.js search -a query="Rust" + +async function search(ctx) { const query = ctx.args.query; if (!query) throw new Error("query argument is required"); - // Fill search input and click ask/search - await ctx.fill("input.ask-input, input[placeholder*='Ask']", query); - await ctx.click("button.ask-btn, button[type='submit']"); - - // Wait for AI response to finish streaming/loading - await ctx.waitForSelector(".answer-box, .ai-response", 15000); - await ctx.wait(2000); // Allow text to settle + // Fill search input (the SPA will fetch and render results dynamically) + await ctx.fill("input.SearchInput", query); - const answer = document.querySelector(".answer-box, .ai-response")?.innerText.trim() || ""; - const sources = Array.from(document.querySelectorAll(".sources-list a, .citation-link")).map(el => ({ - title: el.innerText.trim(), - url: el.href - })); + // Wait for results to update/load + await ctx.waitForSelector("article.Story", 10000); - return { query, answer, sources }; -} + const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); + return { + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" + }; + }); -async function readWiki(ctx) { - // Navigation must happen before running this adapter (e.g. via the `navigate` - // command). Changing window.location here would tear down the Runtime.evaluate - // context mid-execution, so readWiki only scrapes the already-loaded page. - return { - title: document.querySelector("h1, .wiki-title")?.innerText.trim() || "", - content: document.querySelector("article, .wiki-content")?.innerText.trim() || "" - }; + return results; } ``` ### CLI Execution ```bash -# Executing 'ask' on deepwiki.com (will auto-navigate there if not already open) -chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/deepwiki_adapter.js ask --arg query="how to write adapter" --json +# Executing 'search' on hn.algolia.com (will auto-navigate there if not already open) +chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/hn_adapter.js search --arg query="Rust" --json ``` diff --git a/wiki/run-script.md b/wiki/run-script.md index f406f69..a27334c 100644 --- a/wiki/run-script.md +++ b/wiki/run-script.md @@ -5,14 +5,42 @@ Evaluate a local JavaScript file inside the current page context with injected h ## Synopsis ```bash -chrome-devtools [--target ] run-script [--arg key=value] [--output ] [--track-navigation] +chrome-devtools [--target ] run-script [--arg key=value] [raw_args...] [--output ] [--track-navigation] ``` ## Description `run-script` reads a local JavaScript file off-disk, wraps it inside an Immediately Invoked Function Expression (IIFE), and evaluates it directly inside the target browser's page context. -Dynamic arguments passed with `-a` / `--arg` are automatically typed (strings, integers, floats, booleans) and made available to the script. +### Flexible Argument Syntax + +Dynamic arguments passed to the script can be specified in several clean and intuitive styles: + +1. **Pure Positional Style (Recommended for single queries):** + Simply append raw positional strings at the end of the command. If a single argument is passed, it is automatically mapped to `ctx.args.query` (as well as `ctx.args._0`): + ```bash + chrome-devtools run-script search_hn.js "Rust" + ``` +2. **Hybrid Style (Positional + Named):** + For scripts with multiple parameters, you can pass the main parameter positionally, and other options as explicit `key=value` pairs: + ```bash + chrome-devtools run-script search_hn.js "Rust" limit=10 safeSearch=true + ``` +3. **Pure Named Style:** + Specify named options as explicit key-value pairs at the end of the command or via the `-a/--arg` flag: + ```bash + chrome-devtools run-script search_hn.js query="Rust" limit=10 + ``` + +All values are automatically parsed into their appropriate JavaScript types (e.g. `10` to number, `true` to boolean, etc.) and made available inside `ctx.args`. + +### Auto-Navigation and Page Opening + +By declaring a standard `// @url ` or `// @navigate ` comment marker at the top of your script file, the CLI will check the active tab's current URL before executing your script. If the active tab is not currently on a domain matching the target URL, **the CLI will automatically navigate the tab to the target URL first**, wait for the page to load, and then execute your script. This allows you to run automated scripts without needing to pre-open or pre-navigate the page manually! + +```javascript +// @url https://hn.algolia.com +``` ### Injected Helper Context (`ctx`) @@ -25,48 +53,44 @@ Before executing your script, `run-script` injects a globally-accessible helper * `ctx.click(selector)`: Clicks an element by CSS selector. * `ctx.fill(selector, value)`: Fills an input field with the value and fires standard input and change events. -## Real-World Example: Search DeepWiki +## Real-World Example: Search Hacker News -This script searches `deepwiki.com` for a repository name and extracts the results. +This script searches `hn.algolia.com` (Hacker News Search) for a query and extracts the results dynamically without triggering a full page reload. `run-script` already runs your file inside an async context, so use the `ctx` helpers at the top level and `return` the result directly — no IIFE wrapper is -needed. Navigating mid-script would tear down the evaluation context, so the page -must already be on `deepwiki.com`; navigate first with -`chrome-devtools navigate https://deepwiki.com`. +needed. Because we have defined the `@url` metadata tag, the CLI will automatically +navigate to `https://hn.algolia.com` if the browser is not already on that site. -### Script file (`skill/chrome-devtools/examples/search_deepwiki.js`) +### Script file (`skill/chrome-devtools/examples/search_hn.js`) ```javascript -// search_deepwiki.js -// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_deepwiki.js -a query="aeroxy/ast-bro" +// @url https://hn.algolia.com + +// search_hn.js +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_hn.js -a query="Rust" // -// run-script injects `ctx` and runs this file inside an async context, so use -// the ctx helpers directly at the top level and `return` the result. Navigating -// would tear down the evaluation context, so this script requires the page to -// already be on deepwiki.com (use the `navigate` command first). +// run-script injects `ctx` and runs this file inside an async context. +// Setting `@url` above tells the CLI to automatically navigate to the target site first! const query = ctx.args.query; if (!query) { throw new Error("Query argument is required. Pass it with '-a query=...'"); } -if (!window.location.href.includes("deepwiki.com")) { - throw new Error("Not on deepwiki.com — navigate there first: chrome-devtools navigate https://deepwiki.com"); -} - -// Fill in search input and submit -await ctx.fill("input[placeholder*='search']", query); -await ctx.click("button[type='submit']"); +// Fill in search input (the SPA will fetch and render results dynamically) +await ctx.fill("input.SearchInput", query); -// Wait for results list to load -await ctx.waitForSelector(".search-results-list, .repo-card", 10000); +// Wait for results to update/load +await ctx.waitForSelector("article.Story", 10000); -// Extract titles, descriptions, and URLs -const results = Array.from(document.querySelectorAll(".repo-card, .wiki-page-item")).map(el => { +// Extract results +const results = Array.from(document.querySelectorAll("article.Story")).map(el => { + const titleEl = el.querySelector(".Story_title a"); + const metaEl = el.querySelector(".Story_meta"); return { - title: el.querySelector(".title, h3")?.innerText.trim() || "", - description: el.querySelector(".description, p")?.innerText.trim() || "", - url: el.querySelector("a")?.href || "" + title: titleEl?.innerText.trim() || "", + meta: metaEl?.innerText.trim() || "", + url: titleEl?.href || "" }; }); @@ -75,7 +99,6 @@ return results; ### CLI Execution ```bash -# Navigate to the target site first, then run the script. -chrome-devtools --target warm-squid navigate https://deepwiki.com -chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_deepwiki.js --arg query="aeroxy/ast-bro" --json +# Execute the script directly — the CLI handles the auto-navigation seamlessly! +chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_hn.js --arg query="Rust" --json ``` From d86a029f61573eff11641b71dac1edcf069a801a Mon Sep 17 00:00:00 2001 From: Aero Date: Wed, 1 Jul 2026 00:19:34 +0800 Subject: [PATCH 10/19] fix(run_script): improve URL validation and auto-navigation handling --- src/commands/evaluate.rs | 29 +++++++++++++++++++---------- src/lib.rs | 31 ++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index bf96655..cf8c991 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -210,17 +210,17 @@ pub async fn run_script( } } + let nav_url = if interpolated_url.starts_with("http://") || interpolated_url.starts_with("https://") { + interpolated_url.clone() + } else if is_local_host(&interpolated_url) { + format!("http://{}", interpolated_url) + } else { + format!("https://{}", interpolated_url) + }; + let current_url = client.current_url(session_id).await?; - if !url_matches_domain(¤t_url, &interpolated_url) { - eprintln!("[script] Current URL '{}' does not match target URL '{}'. Auto-navigating...", current_url, interpolated_url); - - let nav_url = if interpolated_url.starts_with("http://") || interpolated_url.starts_with("https://") { - interpolated_url.clone() - } else if is_local_host(&interpolated_url) { - format!("http://{}", interpolated_url) - } else { - format!("https://{}", interpolated_url) - }; + if current_url != nav_url { + eprintln!("[script] Current URL '{}' does not match target URL '{}'. Auto-navigating...", current_url, nav_url); crate::commands::navigate::navigate( client, @@ -233,6 +233,15 @@ pub async fn run_script( None, ) .await?; + + let post_nav_url = client.current_url(session_id).await?; + if post_nav_url != nav_url { + anyhow::bail!( + "Auto-navigation to '{}' resulted in URL '{}' which does not match target URL", + nav_url, + post_nav_url + ); + } } } diff --git a/src/lib.rs b/src/lib.rs index 6682ad3..31cabed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -336,8 +336,8 @@ pub enum Commands { /// Optional arguments to pass to the script as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, - /// Extra trailing raw/positional arguments - #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + /// Extra trailing raw/positional arguments (placed after '--') + #[arg(last = true)] raw_args: Vec, /// Write output to a file instead of stdout #[arg(long, short)] @@ -357,8 +357,8 @@ pub enum Commands { /// Optional arguments to pass to the function as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, - /// Extra trailing raw/positional arguments - #[arg(trailing_var_arg = true, allow_hyphen_values = true)] + /// Extra trailing raw/positional arguments (placed after '--') + #[arg(last = true)] raw_args: Vec, /// Write output to a file instead of stdout #[arg(long, short)] @@ -451,7 +451,11 @@ fn parse_json_value(v: &str) -> serde_json::Value { } } else if let Ok(f) = v.parse::() { if let Some(num) = serde_json::Number::from_f64(f) { - serde_json::Value::Number(num) + if f.to_string() == v { + serde_json::Value::Number(num) + } else { + serde_json::Value::String(v.to_string()) + } } else { serde_json::Value::String(v.to_string()) } @@ -1328,6 +1332,23 @@ mod tests { assert_eq!(obj.get("plain").unwrap().as_i64().unwrap(), 42); } + #[test] + fn test_parse_args_preserves_non_canonical_floats() { + // Non-canonical float representations must remain strings verbatim + let args = vec![ + "val1=01.50".to_string(), + "val2=1e3".to_string(), + "val3=+3.5".to_string(), + "val4=3.5".to_string(), // Canonical should still parse as float number + ]; + let parsed = parse_args(&args, &[]).unwrap(); + let obj = parsed.as_object().unwrap(); + assert_eq!(obj.get("val1").unwrap().as_str().unwrap(), "01.50"); + assert_eq!(obj.get("val2").unwrap().as_str().unwrap(), "1e3"); + assert_eq!(obj.get("val3").unwrap().as_str().unwrap(), "+3.5"); + assert_eq!(obj.get("val4").unwrap().as_f64().unwrap(), 3.5); + } + #[test] fn test_parse_args_rejects_malformed() { // Missing '=' is now a hard error instead of being silently dropped. From d0d585659c93b745f02029c210eb0fe057a9f142 Mon Sep 17 00:00:00 2001 From: Aero Date: Wed, 1 Jul 2026 11:45:21 +0800 Subject: [PATCH 11/19] fix(evaluate): enhance value handling in build_ctx_object and improve URL encoding --- src/commands/evaluate.rs | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index cf8c991..bdbcbdd 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -134,7 +134,8 @@ fn build_ctx_object(args_str: &str) -> String { el.innerText = value; }} else {{ const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value')?.set - || Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value')?.set; + || Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value')?.set + || Object.getOwnPropertyDescriptor(window.HTMLSelectElement.prototype, 'value')?.set; if (setter) {{ setter.call(el, value); }} else {{ @@ -155,9 +156,6 @@ fn url_encode(input: &str) -> String { b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { encoded.push(b as char); } - b' ' => { - encoded.push('+'); - } _ => { encoded.push_str(&format!("%{:02X}", b)); } @@ -182,9 +180,12 @@ pub async fn run_script( // Perform auto-navigation if @url or @navigate comments exist at the top of the file let mut target_url = None; for line in script_content.lines() { - let trimmed = line.trim_start(); - if let Some(comment) = trimmed.strip_prefix("//").or_else(|| trimmed.strip_prefix('*')) { - let comment = comment.trim_start(); + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + if let Some(comment) = trimmed.strip_prefix("//").or_else(|| trimmed.strip_prefix("/*")).or_else(|| trimmed.strip_prefix('*')) { + let comment = comment.trim(); if let Some(rest) = comment.strip_prefix("@url") { target_url = Some(rest.trim().to_string()); break; @@ -192,6 +193,8 @@ pub async fn run_script( target_url = Some(rest.trim().to_string()); break; } + } else { + break; } } @@ -219,7 +222,7 @@ pub async fn run_script( }; let current_url = client.current_url(session_id).await?; - if current_url != nav_url { + if current_url.trim_end_matches('/') != nav_url.trim_end_matches('/') { eprintln!("[script] Current URL '{}' does not match target URL '{}'. Auto-navigating...", current_url, nav_url); crate::commands::navigate::navigate( @@ -235,7 +238,7 @@ pub async fn run_script( .await?; let post_nav_url = client.current_url(session_id).await?; - if post_nav_url != nav_url { + if post_nav_url.trim_end_matches('/') != nav_url.trim_end_matches('/') { anyhow::bail!( "Auto-navigation to '{}' resulted in URL '{}' which does not match target URL", nav_url, @@ -608,6 +611,13 @@ mod tests { assert!(!url_matches_domain("https://example.com", "")); } + #[test] + fn test_url_encode() { + assert_eq!(url_encode("hello world"), "hello%20world"); + assert_eq!(url_encode("foo+bar"), "foo%2Bbar"); + assert_eq!(url_encode("a-z_A-Z_0-9"), "a-z_A-Z_0-9"); + } + #[test] fn test_build_ctx_object_embeds_args_and_helpers() { let ctx = build_ctx_object(r#"{"query":"hi"}"#); @@ -622,5 +632,7 @@ mod tests { // fill must support contenteditable elements. assert!(ctx.contains("el.isContentEditable")); assert!(ctx.contains("el.innerText =")); + // fill must check HTMLSelectElement for state updates in frameworks. + assert!(ctx.contains("window.HTMLSelectElement.prototype")); } } From f29afadfab09f448284b500e0f9c68d1eb19f7fa Mon Sep 17 00:00:00 2001 From: Aero Date: Wed, 1 Jul 2026 15:28:33 +0800 Subject: [PATCH 12/19] feat: improve script execution UX and refactor CLI argument handling * clarify argument parsing rules and recommend `-a/--arg` usage in docs * enforce `--` separator for positional arguments and update examples * add shared `ScriptExecArgs` helper to deduplicate run-script/adapter parsing * simplify executor logic for script and adapter commands * enhance CLI help text for argument ordering and behavior * document polling interval rationale for wait helpers --- skill/chrome-devtools/CUSTOM_SCRIPTING.md | 26 ++++---- src/commands/executor.rs | 75 +++++++++++++---------- src/constants.rs | 5 +- src/lib.rs | 26 ++++++-- 4 files changed, 81 insertions(+), 51 deletions(-) diff --git a/skill/chrome-devtools/CUSTOM_SCRIPTING.md b/skill/chrome-devtools/CUSTOM_SCRIPTING.md index 220c0b2..606e720 100644 --- a/skill/chrome-devtools/CUSTOM_SCRIPTING.md +++ b/skill/chrome-devtools/CUSTOM_SCRIPTING.md @@ -9,20 +9,21 @@ This guide details how to create and execute custom JavaScript scripts (`run-scr `run-script` reads a local JavaScript file, wraps it inside an Immediately Invoked Function Expression (IIFE), and evaluates it directly inside the target browser's page context. ### Flexible Argument Syntax -Dynamic arguments passed to the script can be specified in several styles and are automatically parsed and made available inside `ctx.args`: +Dynamic arguments passed to the script can be specified in several styles and are automatically parsed and made available inside `ctx.args`. Note that raw positional values (styles 1 & 2 below) must come after a literal `--`, and any options like `--output`/`--track-navigation` must be given *before* it: -1. **Pure Positional Style (Recommended for single queries):** - Simply append raw positional strings at the end of the command. A single trailing positional argument is automatically mapped to `ctx.args.query` (as well as `ctx.args._0`): +1. **Named Style via `-a`/`--arg` (Recommended):** + Pass one or more `key=value` pairs with the repeatable `-a`/`--arg` flag. This form doesn't need a `--` separator: ```bash - chrome-devtools run-script search_hn.js "Rust" + chrome-devtools run-script search_hn.js -a query="Rust" ``` -2. **Hybrid Style (Positional + Named):** +2. **Pure Positional Style:** + Append raw positional strings after `--`. A single trailing positional argument is automatically mapped to `ctx.args.query` (as well as `ctx.args._0`): ```bash - chrome-devtools run-script search_hn.js "Rust" limit=10 safeSearch=true + chrome-devtools run-script search_hn.js -- "Rust" ``` -3. **Pure Named Style:** +3. **Hybrid Style (Positional + Named, after `--`):** ```bash - chrome-devtools run-script search_hn.js query="Rust" limit=10 + chrome-devtools run-script search_hn.js -- "Rust" limit=10 safeSearch=true ``` ### Comment-based Auto-Navigation @@ -73,11 +74,14 @@ These real-world examples work on `hn.algolia.com`. // @url https://hn.algolia.com/?query={query} // search_hn.js -// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_hn.js "Rust" +// Run with: chrome-devtools run-script skill/chrome-devtools/examples/search_hn.js -a query="Rust" +// +// run-script injects `ctx` and runs this file inside an async context. +// Setting `@url` above tells the CLI to automatically navigate to the pre-rendered query URL first! const query = ctx.args.query; if (!query) { - throw new Error("Query argument is required."); + throw new Error("Query argument is required. Pass it with '-a query=...'"); } // Wait for results to update/load @@ -104,7 +108,7 @@ return results; // @domain hn.algolia.com // ==/UserAdapter== -// Run with: chrome-devtools adapter skill/chrome-devtools/examples/hn_adapter.js search "Rust" +// Run with: chrome-devtools adapter skill/chrome-devtools/examples/hn_adapter.js search -a query="Rust" async function search(ctx) { const query = ctx.args.query; diff --git a/src/commands/executor.rs b/src/commands/executor.rs index fcb4e51..7846bc5 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -308,6 +308,38 @@ pub async fn execute_command(client: &mut CdpClient, req: &DaemonRequest) -> Res }) } +/// Arguments shared by the `run-script` and `adapter` commands. +struct ScriptExecArgs<'a> { + file_path: &'a str, + script_args: serde_json::Value, + output: Option<&'a str>, + track_navigation: bool, +} + +/// Extract the arguments common to `run-script` and `adapter` from the raw +/// command args. `script_args` is conceptually optional; it defaults to an +/// empty object so clients can omit it when a script/adapter takes no +/// arguments. +fn script_exec_args(args: &serde_json::Value) -> Result> { + let file_path = args + .get("file_path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("file_path required"))?; + let script_args = args.get("script_args").cloned().unwrap_or_else(|| json!({})); + let output = args.get("output").and_then(|v| v.as_str()); + let track_navigation = args + .get("track_navigation") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + Ok(ScriptExecArgs { + file_path, + script_args, + output, + track_navigation, + }) +} + /// Execute a page-level command within an active session. async fn inner_execute( client: &mut CdpClient, @@ -572,57 +604,34 @@ async fn inner_execute( .await } "run-script" => { - let file_path = args - .get("file_path") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("file_path required"))?; - // script_args is conceptually optional; default to an empty object so - // clients can omit it when a script takes no arguments. - let script_args = args.get("script_args").cloned().unwrap_or_else(|| json!({})); - let output = args.get("output").and_then(|v| v.as_str()); - let track_navigation = args - .get("track_navigation") - .and_then(|v| v.as_bool()) - .unwrap_or(false); - + let a = script_exec_args(args)?; commands::evaluate::run_script( client, session_id, - file_path, - &script_args, + a.file_path, + &a.script_args, req.format(), - output, - track_navigation, + a.output, + a.track_navigation, ) .await } "adapter" => { - let file_path = args - .get("file_path") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("file_path required"))?; + let a = script_exec_args(args)?; let function_name = args .get("function_name") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow!("function_name required"))?; - // script_args is conceptually optional; default to an empty object so - // clients can omit it when an adapter takes no arguments. - let script_args = args.get("script_args").cloned().unwrap_or_else(|| json!({})); - let output = args.get("output").and_then(|v| v.as_str()); - let track_navigation = args - .get("track_navigation") - .and_then(|v| v.as_bool()) - .unwrap_or(false); commands::evaluate::run_adapter( client, session_id, - file_path, + a.file_path, function_name, - &script_args, + &a.script_args, req.format(), - output, - track_navigation, + a.output, + a.track_navigation, ) .await } diff --git a/src/constants.rs b/src/constants.rs index f1febcd..59f467d 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -2,5 +2,8 @@ pub const NAVIGATION_TIMEOUT_MS: u64 = 30_000; /// Polling interval for the injected `ctx` wait helpers (waitForText / -/// waitForSelector) in run-script and adapter execution. +/// waitForSelector) in run-script and adapter execution. 100ms balances +/// responsiveness (how quickly a satisfied condition is noticed) against +/// avoiding excessive CPU/JS engine overhead from tight polling loops +/// running in the page during script/adapter execution. pub const POLL_INTERVAL_MS: u64 = 100; diff --git a/src/lib.rs b/src/lib.rs index 31cabed..eaa6955 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -329,6 +329,12 @@ pub enum Commands { }, /// Run a local JavaScript file in the active page context + /// + /// `raw_args` is a clap "last" positional: it only takes effect after a + /// literal `--`, and everything after that `--` is treated as a raw + /// argument rather than a flag. Put --output/--track-navigation *before* + /// the `--`, e.g.: `run-script foo.js --output out.json -- "query" limit=10` + /// (a bare `run-script foo.js "query"`, with no `--`, is rejected by clap). #[command(name = "run-script")] RunScript { /// Path to the JavaScript file @@ -336,18 +342,25 @@ pub enum Commands { /// Optional arguments to pass to the script as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, - /// Extra trailing raw/positional arguments (placed after '--') + /// Extra trailing raw/positional arguments. Must be placed after a + /// literal '--'; put other options before it (see command help). #[arg(last = true)] raw_args: Vec, - /// Write output to a file instead of stdout + /// Write output to a file instead of stdout (must precede '--') #[arg(long, short)] output: Option, - /// Track URL changes caused by this evaluation + /// Track URL changes caused by this evaluation (must precede '--') #[arg(long)] track_navigation: bool, }, /// Run a structured custom site adapter JavaScript function + /// + /// `raw_args` is a clap "last" positional: it only takes effect after a + /// literal `--`, and everything after that `--` is treated as a raw + /// argument rather than a flag. Put --output/--track-navigation *before* + /// the `--`, e.g.: `adapter foo.js myFn --output out.json -- "query"` + /// (a bare `adapter foo.js myFn "query"`, with no `--`, is rejected by clap). #[command(name = "adapter")] Adapter { /// Path to the JavaScript adapter file @@ -357,13 +370,14 @@ pub enum Commands { /// Optional arguments to pass to the function as key=value pairs (can be repeated) #[arg(long = "arg", short = 'a')] script_args: Vec, - /// Extra trailing raw/positional arguments (placed after '--') + /// Extra trailing raw/positional arguments. Must be placed after a + /// literal '--'; put other options before it (see command help). #[arg(last = true)] raw_args: Vec, - /// Write output to a file instead of stdout + /// Write output to a file instead of stdout (must precede '--') #[arg(long, short)] output: Option, - /// Track URL changes caused by this evaluation + /// Track URL changes caused by this evaluation (must precede '--') #[arg(long)] track_navigation: bool, }, From 812a238b2c5d947404327f1d8579ce9ea28c53e6 Mon Sep 17 00:00:00 2001 From: Aero Date: Wed, 1 Jul 2026 20:31:41 +0800 Subject: [PATCH 13/19] fix(cli,evaluate): require `--` for raw args, improve script URL parsing, and robust value setter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update CLI docs and examples to require `--` separator for raw positional args in `run-script` and `adapter` * add `parse_script_url_marker` to reliably extract `@url`/`@navigate` from leading comment blocks (supports line, block, JSDoc; trims `*/`; skips blanks; stops at first code line) * refactor auto-navigation to use new parser and downgrade URL mismatch to warning instead of hard failure * fix input value setting by resolving setter from element’s own prototype (`Object.getPrototypeOf(el)`) to avoid incompatible receiver errors * expand tests for URL marker parsing and add regression tests for prototype-based value setter * update wiki documentation to reflect `--` semantics and argument passing rules --- skill/chrome-devtools/SKILL.md | 4 +- src/commands/evaluate.rs | 153 +++++++++++++++++++++++++++------ wiki/adapter.md | 17 ++-- wiki/run-script.md | 17 ++-- 4 files changed, 147 insertions(+), 44 deletions(-) diff --git a/skill/chrome-devtools/SKILL.md b/skill/chrome-devtools/SKILL.md index 7bd8d1f..3a9335f 100644 --- a/skill/chrome-devtools/SKILL.md +++ b/skill/chrome-devtools/SKILL.md @@ -319,7 +319,7 @@ See the dedicated [Custom Scripting Guide](./CUSTOM_SCRIPTING.md) for full docum ```bash # Run a script with trailing positional arguments (auto-navigates if @url is present) -chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_hn.js "Rust" +chrome-devtools --target warm-squid run-script skill/chrome-devtools/examples/search_hn.js -- "Rust" ``` ### Pattern 14: Custom Domain-Aware Adapters (adapter) @@ -330,7 +330,7 @@ See the dedicated [Custom Scripting Guide](./CUSTOM_SCRIPTING.md) for full docum ```bash # Run an adapter function with positional args (auto-navigates if target domain is mismatch) -chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/hn_adapter.js search "Rust" +chrome-devtools --target warm-squid adapter skill/chrome-devtools/examples/hn_adapter.js search -- "Rust" ``` ## Complete Command Reference diff --git a/src/commands/evaluate.rs b/src/commands/evaluate.rs index bdbcbdd..6f29009 100644 --- a/src/commands/evaluate.rs +++ b/src/commands/evaluate.rs @@ -133,9 +133,7 @@ fn build_ctx_object(args_str: &str) -> String { }} else if (el.isContentEditable) {{ el.innerText = value; }} else {{ - const setter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, 'value')?.set - || Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, 'value')?.set - || Object.getOwnPropertyDescriptor(window.HTMLSelectElement.prototype, 'value')?.set; + const setter = Object.getOwnPropertyDescriptor(Object.getPrototypeOf(el), 'value')?.set; if (setter) {{ setter.call(el, value); }} else {{ @@ -164,6 +162,40 @@ fn url_encode(input: &str) -> String { encoded } +/// Extract the `@url` / `@navigate` auto-navigation target from a script's +/// leading comment block, if present. +/// +/// Only lines at the very top of the file that are comments (`//`, `/*`, or a +/// `*` JSDoc continuation line) are considered; scanning stops at the first +/// blank-then-non-comment line. A trailing `*/` on single-line block comments +/// (e.g. `/* @url https://example.com */`) is stripped so it isn't captured +/// as part of the URL. +fn parse_script_url_marker(content: &str) -> Option { + for line in content.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let comment = trimmed + .strip_prefix("//") + .or_else(|| trimmed.strip_prefix("/*")) + .or_else(|| trimmed.strip_prefix('*'))?; + + let mut comment = comment.trim(); + if let Some(stripped) = comment.strip_suffix("*/") { + comment = stripped.trim(); + } + + if let Some(rest) = comment.strip_prefix("@url") { + return Some(rest.trim().to_string()); + } + if let Some(rest) = comment.strip_prefix("@navigate") { + return Some(rest.trim().to_string()); + } + } + None +} + /// Run a local JavaScript file inside the page context pub async fn run_script( client: &mut CdpClient, @@ -178,25 +210,7 @@ pub async fn run_script( .map_err(|e| anyhow::anyhow!("Failed to read script file '{}': {}", file_path, e))?; // Perform auto-navigation if @url or @navigate comments exist at the top of the file - let mut target_url = None; - for line in script_content.lines() { - let trimmed = line.trim(); - if trimmed.is_empty() { - continue; - } - if let Some(comment) = trimmed.strip_prefix("//").or_else(|| trimmed.strip_prefix("/*")).or_else(|| trimmed.strip_prefix('*')) { - let comment = comment.trim(); - if let Some(rest) = comment.strip_prefix("@url") { - target_url = Some(rest.trim().to_string()); - break; - } else if let Some(rest) = comment.strip_prefix("@navigate") { - target_url = Some(rest.trim().to_string()); - break; - } - } else { - break; - } - } + let target_url = parse_script_url_marker(&script_content); if let Some(ref url) = target_url { // Interpolate {arg_name} placeholders from script_args @@ -239,10 +253,13 @@ pub async fn run_script( let post_nav_url = client.current_url(session_id).await?; if post_nav_url.trim_end_matches('/') != nav_url.trim_end_matches('/') { - anyhow::bail!( - "Auto-navigation to '{}' resulted in URL '{}' which does not match target URL", - nav_url, - post_nav_url + // Not a hard failure: sites commonly redirect (www., trailing + // slashes, locale/auth redirects, SPA router normalization), + // and `navigate()` already surfaces real navigation failures + // (CDP errors, load timeouts) before we get here. + eprintln!( + "[script] Warning: auto-navigation to '{}' resulted in URL '{}'. Continuing anyway...", + nav_url, post_nav_url ); } } @@ -544,6 +561,67 @@ mod tests { assert_eq!(parse_adapter_domains(content), vec!["real.com"]); } + #[test] + fn test_parse_script_url_marker_line_comment() { + let content = "// @url https://example.com\nconst x = 1;"; + assert_eq!( + parse_script_url_marker(content), + Some("https://example.com".to_string()) + ); + } + + #[test] + fn test_parse_script_url_marker_navigate_alias() { + let content = "// @navigate https://example.com\nconst x = 1;"; + assert_eq!( + parse_script_url_marker(content), + Some("https://example.com".to_string()) + ); + } + + #[test] + fn test_parse_script_url_marker_single_line_block_comment() { + // A single-line block comment's trailing `*/` must not be captured as + // part of the URL. + let content = "/* @url https://example.com */\nconst x = 1;"; + assert_eq!( + parse_script_url_marker(content), + Some("https://example.com".to_string()) + ); + } + + #[test] + fn test_parse_script_url_marker_jsdoc_block() { + let content = "/**\n * @url https://example.com\n */\nconst x = 1;"; + assert_eq!( + parse_script_url_marker(content), + Some("https://example.com".to_string()) + ); + } + + #[test] + fn test_parse_script_url_marker_skips_leading_blank_lines() { + let content = "\n\n \n// @url https://example.com\nconst x = 1;"; + assert_eq!( + parse_script_url_marker(content), + Some("https://example.com".to_string()) + ); + } + + #[test] + fn test_parse_script_url_marker_stops_at_first_non_comment_line() { + // The marker only counts if it's part of the leading comment block; + // once code starts, scanning stops even if a later comment has one. + let content = "const x = 1;\n// @url https://example.com"; + assert_eq!(parse_script_url_marker(content), None); + } + + #[test] + fn test_parse_script_url_marker_absent() { + let content = "// just a regular comment\nconst x = 1;"; + assert_eq!(parse_script_url_marker(content), None); + } + #[test] fn test_strip_export_keywords() { let src = "export async function ask(ctx) {}\n export function read() {}\nexport const helper = 1;\nexport default function main() {}\nconst x = \"export inside string\";"; @@ -632,7 +710,26 @@ mod tests { // fill must support contenteditable elements. assert!(ctx.contains("el.isContentEditable")); assert!(ctx.contains("el.innerText =")); - // fill must check HTMLSelectElement for state updates in frameworks. - assert!(ctx.contains("window.HTMLSelectElement.prototype")); + // fill must look up the native value setter from the element's own + // prototype (not a hardcoded HTMLInputElement/etc. chain, which would + // throw on