Skip to content

Commit 350e632

Browse files
committed
test: add regression tests for robots.txt per-host caching and error paths
Add tests addressing PR review feedback: - TestFetch_RobotsCachePerHost_MultipleURLs: verifies robots.txt is fetched once per host but each URL path is evaluated individually (regression test for the caching bug this PR fixes) - TestFetch_RobotsUnexpectedStatus: covers the new error path when robots.txt returns a non-200/non-404 status code Assisted-By: docker-agent
1 parent 9c789d9 commit 350e632

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

pkg/tools/builtin/fetch_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,69 @@ func TestFetch_RobotsMissing(t *testing.T) {
290290
assert.Contains(t, result.Output, "Content without robots.txt")
291291
}
292292

293+
func TestFetch_RobotsCachePerHost_MultipleURLs(t *testing.T) {
294+
// Regression test: robots.txt should be fetched once per host,
295+
// but each URL's path must be evaluated individually.
296+
robotsContent := "User-agent: *\nDisallow: /secret\nAllow: /"
297+
298+
robotsRequests := 0
299+
url := runHTTPServer(t, func(w http.ResponseWriter, r *http.Request) {
300+
switch r.URL.Path {
301+
case "/robots.txt":
302+
robotsRequests++
303+
w.Header().Set("Content-Type", "text/plain")
304+
fmt.Fprint(w, robotsContent)
305+
case "/public":
306+
fmt.Fprint(w, "public content")
307+
case "/secret/data":
308+
fmt.Fprint(w, "secret content")
309+
default:
310+
http.NotFound(w, r)
311+
}
312+
})
313+
314+
tool := NewFetchTool()
315+
result, err := tool.handler.CallTool(t.Context(), FetchToolArgs{
316+
URLs: []string{url + "/public", url + "/secret/data"},
317+
Format: "text",
318+
})
319+
require.NoError(t, err)
320+
321+
var results []FetchResult
322+
err = json.Unmarshal([]byte(result.Output), &results)
323+
require.NoError(t, err)
324+
require.Len(t, results, 2)
325+
326+
// First URL should succeed
327+
assert.Equal(t, 200, results[0].StatusCode)
328+
assert.Equal(t, "public content", results[0].Body)
329+
330+
// Second URL should be blocked
331+
assert.Contains(t, results[1].Error, "URL blocked by robots.txt")
332+
333+
// robots.txt should have been fetched exactly once
334+
assert.Equal(t, 1, robotsRequests, "robots.txt should be fetched once per host")
335+
}
336+
337+
func TestFetch_RobotsUnexpectedStatus(t *testing.T) {
338+
url := runHTTPServer(t, func(w http.ResponseWriter, r *http.Request) {
339+
if r.URL.Path == "/robots.txt" {
340+
w.WriteHeader(http.StatusInternalServerError)
341+
return
342+
}
343+
fmt.Fprint(w, "content")
344+
})
345+
346+
tool := NewFetchTool()
347+
result, err := tool.handler.CallTool(t.Context(), FetchToolArgs{
348+
URLs: []string{url + "/page"},
349+
Format: "text",
350+
})
351+
require.NoError(t, err)
352+
assert.Contains(t, result.Output, "robots.txt check failed")
353+
assert.Contains(t, result.Output, "unexpected status 500")
354+
}
355+
293356
func TestFetchTool_OutputSchema(t *testing.T) {
294357
tool := NewFetchTool()
295358

0 commit comments

Comments
 (0)