From f892c900927a37c37828daf2353aa77290f16ff7 Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Tue, 9 Jun 2026 11:48:11 +0200 Subject: [PATCH 1/5] Make Termux command parsing quote-tolerant --- .../google/ai/sample/util/CommandParser.kt | 2 +- .../ai/sample/util/CommandParserTest.kt | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt index 79cc0b8..d1c458f 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/CommandParser.kt @@ -47,7 +47,7 @@ object CommandParser { // Write text patterns PatternInfo("writeText1", Regex("(?i)\\bwriteText\\([\"']([^\"']+)[\"']\\)"), { match -> Command.WriteText(match.groupValues[1]) }, CommandTypeEnum.WRITE_TEXT), - PatternInfo("termux1", Regex("(?i)\\bTermux\\([\"']([^\"']+)[\"']\\)"), { match -> Command.TermuxCommand(match.groupValues[1]) }, CommandTypeEnum.TERMUX_COMMAND), + PatternInfo("termux1", Regex("""(?i)\bTermux\(\s*(["'])((?:\\.|(?!\1\s*\)).)*)\1\s*\)"""), { match -> Command.TermuxCommand(match.groupValues[2]) }, CommandTypeEnum.TERMUX_COMMAND), // Click (long) button patterns PatternInfo("clickBtn1", Regex("(?i)\\bclick\\([\"']([^\"']+)[\"']"), { match -> Command.ClickButton(match.groupValues[1]) }, CommandTypeEnum.CLICK_BUTTON), diff --git a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt index 4b26399..91fe644 100644 --- a/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt +++ b/app/src/test/java/com/google/ai/sample/util/CommandParserTest.kt @@ -97,4 +97,30 @@ class CommandParserTest { assertEquals(1, commands.count { it is Command.Completed }) } + @Test + fun parseCommands_extractsTermuxCommandWithNestedSingleQuotes() { + val commands = CommandParser.parseCommands( + """Termux("su -c 'ifconfig'")""", + clearBuffer = true + ) + + assertEquals(1, commands.size) + val command = commands.first() + assertTrue(command is Command.TermuxCommand) + assertEquals("su -c 'ifconfig'", (command as Command.TermuxCommand).command) + } + + @Test + fun parseCommands_extractsTermuxCommandWithNestedDoubleQuotes() { + val commands = CommandParser.parseCommands( + """Termux('su -c "ifconfig"')""", + clearBuffer = true + ) + + assertEquals(1, commands.size) + val command = commands.first() + assertTrue(command is Command.TermuxCommand) + assertEquals("su -c \"ifconfig\"", (command as Command.TermuxCommand).command) + } + } From c08105680ef42386520ddffa7671d538fbe13449 Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:11:08 +0200 Subject: [PATCH 2/5] Update SystemMessagePreferences.kt --- .../com/google/ai/sample/util/SystemMessagePreferences.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index f3e2df9..ac7d63f 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -12,9 +12,7 @@ object SystemMessagePreferences { private const val PREFS_NAME = "system_message_prefs" private const val KEY_SYSTEM_MESSAGE = "system_message" private const val KEY_FIRST_START_COMPLETED = "first_start_completed" // New flag - - // Content from pasted_content.txt - private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" "Termux("command")" "completed()" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and use completed() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. After each message, you will see the screen with additional information about it. Say "completed()" when the task is finished.""" +private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" "Termux("command")" "completed()" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and use completed() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. After each message, you will see the screen with additional information about it. Say "completed()" when the task is finished.""" private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /** From c7f465a6c5e6f248f5ba54a541d2c636a4a5acf5 Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:11:32 +0200 Subject: [PATCH 3/5] Update SystemMessagePreferences.kt --- .../com/google/ai/sample/util/SystemMessagePreferences.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index ac7d63f..2d74e77 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -12,7 +12,7 @@ object SystemMessagePreferences { private const val PREFS_NAME = "system_message_prefs" private const val KEY_SYSTEM_MESSAGE = "system_message" private const val KEY_FIRST_START_COMPLETED = "first_start_completed" // New flag -private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" "Termux("command")" "completed()" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and use completed() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. After each message, you will see the screen with additional information about it. Say "completed()" when the task is finished.""" + private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" "Termux("command")" "completed()" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and use completed() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. After each message, you will see the screen with additional information about it. Say "completed()" when the task is finished.""" private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /** From a018a1ef34dfdd88b8b31b8344cce79b2c246a38 Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Tue, 9 Jun 2026 13:52:46 +0200 Subject: [PATCH 4/5] Update SystemMessagePreferences.kt --- .../sample/util/SystemMessagePreferences.kt | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index 2d74e77..7911998 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -12,7 +12,30 @@ object SystemMessagePreferences { private const val PREFS_NAME = "system_message_prefs" private const val KEY_SYSTEM_MESSAGE = "system_message" private const val KEY_FIRST_START_COMPLETED = "first_start_completed" // New flag - private const val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" "Termux("command")" "completed()" To write text, search and click the textfield thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. Don't write the commands if you're just planing about it or messaging me. If you have questions, open Screen Operator, ask your question(s), and use completed() until you receive an answer. Retrieve information using "retrieve("sample")" if some is passed to your task. After each message, you will see the screen with additional information about it. Say "completed()" when the task is finished.""" + private val DEFAULT_SYSTEM_MESSAGE_ON_FIRST_START = """You are on an App on a Smartphone. Your app is called Screen Operator. You start from this app. Proceed step by step! DON'T USE TOOL CODE! +You must operate the screen with exactly following commands: "home()" "back()" "recentApps()" "openApp("sample")" for buttons and words: "click("sample")" "longClick("sample")" "tapAtCoordinates(x, y)" "tapAtCoordinates(x percent of screen%, y percent of screen%)" "scrollDown()" "scrollUp()" "scrollLeft()" "scrollRight()" "scrollDown(x, y, how much pixel to scroll, duration in milliseconds)" "scrollUp(x, y, how much pixel to scroll, duration in milliseconds)" "scrollLeft(x, y, how much pixel to scroll, duration in milliseconds)" "scrollRight(x, y, how much pixel to scroll, duration in milliseconds)" "scrollDown(x percent of screen%, y percent of screen%, how much percent to scroll%, duration in milliseconds)" "scrollUp(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollLeft(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" "scrollRight(x percent of screen%, y percent of screen%, how much percent to scroll, duration in milliseconds)" scroll status bar down: "scrollUp(540, 0, 1100, 50)" "Wait(seconds)" + +"Termux("command")" +1. You don't need to open Termux because a run.command intent is being called. + +2. Each call to Termux("command") starts a new session. To prevent this, you must first write Termux("tmux new-session -A -s main") and then pass all subsequent commands with Termux("tmux send-keys -t main "command" Enter"). + +3. Do not use wait(seconds) commands when using Termux. + +To write text, click the textfield, thereafter: "writeText("sample text")" You need to write the already existing text, if it should continue exist. +If the keyboard is displayed, you can press "Enter()". Otherwise, you have to open the keyboard by clicking on the text field. + + +Say "completed()" when the task is finished. + + +Notes: +1. Don't write the commands if you're just planing about it or messaging me. + + +2. If you have questions, open Screen Operator, ask your question(s), and use "completed()" until you receive an human response. + +3. After each message, you will see the screen with additional information about it.""".trimIntent() private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /** From dbedc5080ee41a708c8fd0a32c475fa6d5d2c762 Mon Sep 17 00:00:00 2001 From: Android PowerUser <88908510+Android-PowerUser@users.noreply.github.com> Date: Tue, 9 Jun 2026 14:02:30 +0200 Subject: [PATCH 5/5] Update SystemMessagePreferences.kt --- .../com/google/ai/sample/util/SystemMessagePreferences.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt index 7911998..f816fd8 100644 --- a/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt +++ b/app/src/main/kotlin/com/google/ai/sample/util/SystemMessagePreferences.kt @@ -35,7 +35,7 @@ Notes: 2. If you have questions, open Screen Operator, ask your question(s), and use "completed()" until you receive an human response. -3. After each message, you will see the screen with additional information about it.""".trimIntent() +3. After each message, you will see the screen with additional information about it.""".trimIndent() private fun prefs(context: Context) = context.getSharedPreferences(PREFS_NAME, Context.MODE_PRIVATE) /**