99func TestExec_OpenAI (t * testing.T ) {
1010 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "What's 2+2?" )
1111
12- require .Equal (t , "\n --- Agent: root --- \n 2 + 2 equals 4." , out )
12+ require .Equal (t , "2 + 2 equals 4." , out )
1313}
1414
1515// TestExec_OpenAI_V3Config tests that v3 configs work correctly with thinking disabled by default.
@@ -18,7 +18,7 @@ func TestExec_OpenAI_V3Config(t *testing.T) {
1818 out := runCLI (t , "run" , "--exec" , "testdata/basic_v3.yaml" , "What's 2+2?" )
1919
2020 // v3 config with gpt-5 should work correctly (thinking disabled by default for old configs)
21- require .Equal (t , "\n --- Agent: root --- \n 4 " , out )
21+ require .Equal (t , "4 " , out )
2222}
2323
2424// TestExec_OpenAI_WithThinkingBudget tests that when thinking_budget is explicitly configured
@@ -28,57 +28,52 @@ func TestExec_OpenAI_WithThinkingBudget(t *testing.T) {
2828
2929 // With thinking_budget explicitly configured, response should include reasoning
3030 // The output format includes the reasoning summary when thinking is enabled
31- require .Contains (t , out , "--- Agent: root ---" )
3231 require .Contains (t , out , "4" )
3332}
3433
3534func TestExec_OpenAI_ToolCall (t * testing.T ) {
3635 out := runCLI (t , "run" , "--exec" , "testdata/fs_tools.yaml" , "How many files in testdata/working_dir? Only output the number." )
3736
38- require .Equal (t , "\n --- Agent: root --- \n \ n Calling list_directory(path: \" testdata/working_dir\" )\n \n list_directory response → \" FILE README.me\\ n\" \n 1" , out )
37+ require .Equal (t , "\n Calling list_directory(path: \" testdata/working_dir\" )\n \n list_directory response → \" FILE README.me\\ n\" \n 1" , out )
3938}
4039
4140func TestExec_OpenAI_HideToolCalls (t * testing.T ) {
4241 out := runCLI (t , "run" , "--exec" , "testdata/fs_tools.yaml" , "--hide-tool-calls" , "How many files in testdata/working_dir? Only output the number." )
4342
44- require .Equal (t , "\n --- Agent: root --- \n 1 " , out )
43+ require .Equal (t , "1 " , out )
4544}
4645
4746func TestExec_OpenAI_gpt5 (t * testing.T ) {
4847 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=openai/gpt-5" , "What's 2+2?" )
4948
5049 // With thinking enabled by default, response may include reasoning summary
51- require .Contains (t , out , "--- Agent: root ---" )
5250 require .Contains (t , out , "4" )
5351}
5452
5553func TestExec_OpenAI_gpt5_1 (t * testing.T ) {
5654 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=openai/gpt-5.1" , "What's 2+2?" )
5755
58- require .Equal (t , "\n --- Agent: root --- \n 2 + 2 = 4." , out )
56+ require .Equal (t , "2 + 2 = 4." , out )
5957}
6058
6159func TestExec_OpenAI_gpt5_codex (t * testing.T ) {
6260 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=openai/gpt-5-codex" , "What's 2+2?" )
6361
6462 // Model reasoning summary varies, just check for the core response
65- require .Contains (t , out , "--- Agent: root ---" )
6663 require .Contains (t , out , "4" )
6764}
6865
6966func TestExec_Anthropic (t * testing.T ) {
7067 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=anthropic/claude-sonnet-4-0" , "What's 2+2?" )
7168
7269 // With interleaved thinking enabled by default, Anthropic responses include thinking content
73- require .Contains (t , out , "--- Agent: root ---" )
7470 require .Contains (t , out , "2 + 2 = 4" )
7571}
7672
7773func TestExec_Anthropic_ToolCall (t * testing.T ) {
7874 out := runCLI (t , "run" , "--exec" , "testdata/fs_tools.yaml" , "--model=anthropic/claude-sonnet-4-0" , "How many files in testdata/working_dir? Only output the number." )
7975
8076 // With interleaved thinking enabled by default, Anthropic responses include thinking content
81- require .Contains (t , out , "--- Agent: root ---" )
8277 require .Contains (t , out , `Calling list_directory(path: "testdata/working_dir")` )
8378 require .Contains (t , out , `list_directory response → "FILE README.me\n"` )
8479 // The response should end with "1" (the count)
@@ -89,15 +84,13 @@ func TestExec_Anthropic_AgentsMd(t *testing.T) {
8984 out := runCLI (t , "run" , "--exec" , "testdata/agents-md.yaml" , "--model=anthropic/claude-sonnet-4-0" , "What's 2+2?" )
9085
9186 // With interleaved thinking enabled by default, Anthropic responses include thinking content
92- require .Contains (t , out , "--- Agent: root ---" )
9387 require .Contains (t , out , "2 + 2 = 4" )
9488}
9589
9690func TestExec_Gemini (t * testing.T ) {
9791 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=google/gemini-2.5-flash" , "What's 2+2?" )
9892
9993 // With thinking enabled by default (dynamic thinking for Gemini 2.5), responses may include thinking content
100- require .Contains (t , out , "--- Agent: root ---" )
10194 // The response should contain the answer "4" somewhere
10295 require .Contains (t , out , "4" )
10396}
@@ -106,7 +99,6 @@ func TestExec_Gemini_ToolCall(t *testing.T) {
10699 out := runCLI (t , "run" , "--exec" , "testdata/fs_tools.yaml" , "--model=google/gemini-2.5-flash" , "How many files in testdata/working_dir? Only output the number." )
107100
108101 // With thinking enabled by default (dynamic thinking for Gemini 2.5), responses include thinking content
109- require .Contains (t , out , "--- Agent: root ---" )
110102 require .Contains (t , out , `Calling list_directory(path: "testdata/working_dir")` )
111103 require .Contains (t , out , `list_directory response → "FILE README.me\n"` )
112104 // The response should end with "1" (the count)
@@ -116,13 +108,13 @@ func TestExec_Gemini_ToolCall(t *testing.T) {
116108func TestExec_Mistral (t * testing.T ) {
117109 out := runCLI (t , "run" , "--exec" , "testdata/basic.yaml" , "--model=mistral/mistral-small" , "What's 2+2?" )
118110
119- require .Equal (t , "\n --- Agent: root --- \n The sum of 2 + 2 is 4." , out )
111+ require .Equal (t , "The sum of 2 + 2 is 4." , out )
120112}
121113
122114func TestExec_Mistral_ToolCall (t * testing.T ) {
123115 out := runCLI (t , "run" , "--exec" , "testdata/fs_tools.yaml" , "--model=mistral/mistral-small" , "How many files in testdata/working_dir? Only output the number." )
124116
125- require .Equal (t , "\n --- Agent: root --- \n \ n Calling list_directory(path: \" testdata/working_dir\" )\n \n list_directory response → \" FILE README.me\\ n\" \n 1" , out )
117+ require .Equal (t , "\n Calling list_directory(path: \" testdata/working_dir\" )\n \n list_directory response → \" FILE README.me\\ n\" \n 1" , out )
126118}
127119
128120func TestExec_ToolCallsNeedAcceptance (t * testing.T ) {
0 commit comments