Skip to content

Commit 00cb2d5

Browse files
authored
Merge pull request #2210 from dgageot/rag-toolset
Refactor RAG from agent-level config to standard toolset type
2 parents f92ffe7 + fdf6028 commit 00cb2d5

24 files changed

Lines changed: 492 additions & 457 deletions

agent-schema.json

Lines changed: 60 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@
5959
},
6060
"rag": {
6161
"type": "object",
62-
"description": "Map of RAG (Retrieval-Augmented Generation) configurations",
62+
"description": "Map of reusable RAG source definitions. Define RAG sources here and reference them by name from agent toolsets to avoid duplication.",
6363
"additionalProperties": {
64-
"$ref": "#/definitions/RAGConfig"
64+
"$ref": "#/definitions/RAGToolset"
6565
}
6666
},
6767
"metadata": {
@@ -299,13 +299,6 @@
299299
],
300300
"additionalProperties": false
301301
},
302-
"rag": {
303-
"type": "array",
304-
"description": "List of RAG sources to use for this agent",
305-
"items": {
306-
"type": "string"
307-
}
308-
},
309302
"add_description_parameter": {
310303
"type": "boolean",
311304
"description": "Whether to add a 'description' parameter to tool calls, allowing the LLM to provide context about why it is calling a tool"
@@ -807,6 +800,51 @@
807800
],
808801
"additionalProperties": false
809802
},
803+
"RAGToolset": {
804+
"type": "object",
805+
"description": "Reusable RAG source definition. Define once at the top level and reference by name from agent toolsets. RAG config fields (tool, docs, strategies, results, respect_vcs) are specified directly alongside toolset fields.",
806+
"allOf": [
807+
{
808+
"$ref": "#/definitions/RAGConfig"
809+
},
810+
{
811+
"type": "object",
812+
"properties": {
813+
"instruction": {
814+
"type": "string",
815+
"description": "Custom instruction for this RAG source"
816+
},
817+
"tools": {
818+
"type": "array",
819+
"description": "Optional list of tools to expose",
820+
"items": {
821+
"type": "string"
822+
}
823+
},
824+
"name": {
825+
"type": "string",
826+
"description": "Optional display name override for the RAG tool"
827+
},
828+
"defer": {
829+
"description": "Deferred loading configuration",
830+
"oneOf": [
831+
{
832+
"type": "boolean",
833+
"description": "Set to true to defer all tools"
834+
},
835+
{
836+
"type": "array",
837+
"description": "Array of tool names to defer",
838+
"items": {
839+
"type": "string"
840+
}
841+
}
842+
]
843+
}
844+
}
845+
}
846+
]
847+
},
810848
"Toolset": {
811849
"type": "object",
812850
"description": "Tool configuration",
@@ -830,7 +868,8 @@
830868
"user_prompt",
831869
"openapi",
832870
"model_picker",
833-
"background_agents"
871+
"background_agents",
872+
"rag"
834873
]
835874
},
836875
"instruction": {
@@ -910,6 +949,10 @@
910949
"$ref": "#/definitions/ApiConfig",
911950
"description": "API tool configuration"
912951
},
952+
"rag_config": {
953+
"$ref": "#/definitions/RAGConfig",
954+
"description": "RAG configuration for type: rag toolsets"
955+
},
913956
"ignore_vcs": {
914957
"type": "boolean",
915958
"description": "Whether to ignore VCS files (.git directories and .gitignore patterns) in filesystem operations. Default: true",
@@ -1119,6 +1162,13 @@
11191162
"const": "background_agents"
11201163
}
11211164
}
1165+
},
1166+
{
1167+
"properties": {
1168+
"type": {
1169+
"const": "rag"
1170+
}
1171+
}
11221172
}
11231173
]
11241174
},

docs/features/rag/index.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ agents:
3636
model: openai/gpt-4o
3737
instruction: |
3838
You have access to a knowledge base. Use it to answer questions.
39-
rag: [my_docs]
39+
toolsets:
40+
- type: rag
41+
ref: my_docs
4042
```
4143
4244
## Retrieval Strategies

examples/rag.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
agents:
23
root:
34
model: gpt-5-minimal
@@ -7,8 +8,10 @@ agents:
78
can use when it makes sense to do so, based on the user's question.
89
If you receive sources from the knowledge base, always include them as
910
a markdown list of links to local files at the very end of your response.
10-
rag:
11-
- blork_knowledge_base
11+
welcome_message: Ask me anything about Blorks.
12+
toolsets:
13+
- type: rag
14+
ref: blork_knowledge_base
1215

1316
models:
1417
gpt-5-minimal:
@@ -27,4 +30,4 @@ rag:
2730
- type: chunked-embeddings
2831
embedding_model: openai/text-embedding-3-small
2932
database: ./rag/chunked_embeddings.db
30-
vector_dimensions: 1536
33+
vector_dimensions: 1536

examples/rag/bm25.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
12
agents:
23
root:
34
model: openai/gpt-5-mini
45
description: a helpful assistant with keyword search
56
instruction: |
67
You are a helpful assistant that uses BM25 keyword-based search
78
to find relevant information in documents.
8-
rag:
9-
- blork_knowledge_base
9+
toolsets:
10+
- type: rag
11+
ref: blork_knowledge_base
1012

1113
rag:
1214
blork_knowledge_base:

examples/rag/hybrid.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ agents:
1313
instruction: |
1414
You are a helpful assistant with access to hybrid retrieval
1515
combining semantic and keyword search for comprehensive results.
16-
rag:
17-
- knowledge_base
16+
toolsets:
17+
- type: rag
18+
ref: knowledge_base
1819

1920
rag:
2021
knowledge_base:

examples/rag/reranking.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ agents:
1818
instruction: |
1919
You are a helpful assistant with access to hybrid retrieval
2020
combining semantic and keyword search for comprehensive results.
21-
rag:
22-
- knowledge_base
21+
toolsets:
22+
- type: rag
23+
ref: knowledge_base
2324

2425
rag:
2526
knowledge_base:

examples/rag/semantic_embeddings.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ agents:
1717
instruction: |
1818
You are a helpful coding assistant with access to semantic code search.
1919
Use the search tool to find relevant code based on meaning, not just keywords.
20-
rag:
21-
- codebase
20+
toolsets:
21+
- type: rag
22+
ref: codebase
2223

2324
rag:
2425
codebase:
@@ -78,7 +79,7 @@ rag:
7879
chunking:
7980
size: 1000
8081
respect_word_boundaries: true
81-
code_aware: true # Use tree-sitter for AST-aware chunking
82+
code_aware: true # Use tree-sitter for AST-based chunking
8283

8384
results:
8485
# Optional: rerank results using an LLM for better relevance
@@ -94,4 +95,3 @@ rag:
9495
deduplicate: true
9596
return_full_content: false # return full document content instead of just the matched chunks
9697
limit: 5
97-

pkg/app/app.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ import (
3030
"github.com/docker/docker-agent/pkg/tui/messages"
3131
)
3232

33-
// RAGInitializer is implemented by runtimes that support background RAG initialization.
34-
// Local runtimes use this to start indexing early; remote runtimes typically do not.
35-
type RAGInitializer interface {
36-
StartBackgroundRAGInit(ctx context.Context, sendEvent func(runtime.Event))
37-
}
38-
3933
type App struct {
4034
runtime runtime.Runtime
4135
session *session.Session
@@ -122,18 +116,6 @@ func New(ctx context.Context, rt runtime.Runtime, sess *session.Session, opts ..
122116
}
123117
}()
124118

125-
// If the runtime supports background RAG initialization, start it
126-
// and forward events to the TUI. Remote runtimes typically handle RAG server-side
127-
// and won't implement this optional interface.
128-
if ragRuntime, ok := rt.(RAGInitializer); ok {
129-
go ragRuntime.StartBackgroundRAGInit(ctx, func(event runtime.Event) {
130-
select {
131-
case app.events <- event:
132-
case <-ctx.Done():
133-
}
134-
})
135-
}
136-
137119
// Subscribe to tool list changes so the sidebar updates immediately
138120
// when an MCP server adds or removes tools (outside of a RunStream).
139121
if tcs, ok := rt.(runtime.ToolsChangeSubscriber); ok {

pkg/config/config.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ func validateConfig(cfg *latest.Config) error {
124124
return err
125125
}
126126

127+
if err := resolveRAGDefinitions(cfg); err != nil {
128+
return err
129+
}
130+
127131
allNames := map[string]bool{}
128132
for _, agent := range cfg.Agents {
129133
allNames[agent.Name] = true

pkg/config/latest/parse.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,34 @@ func upgradeIfNeeded(c any, _ []byte) (any, error) {
2626

2727
var config Config
2828
types.CloneThroughJSON(old, &config)
29+
30+
// Migrate AgentConfig.RAG []string → toolsets with type: rag + ref
31+
for i, agent := range old.Agents {
32+
if len(agent.RAG) == 0 {
33+
continue
34+
}
35+
for _, ragName := range agent.RAG {
36+
config.Agents[i].Toolsets = append(config.Agents[i].Toolsets, Toolset{
37+
Type: "rag",
38+
Ref: ragName,
39+
})
40+
}
41+
}
42+
43+
// Migrate top-level RAG map from RAGConfig to RAGToolset
44+
if len(old.RAG) > 0 && config.RAG == nil {
45+
config.RAG = make(map[string]RAGToolset)
46+
}
47+
for name, oldRAG := range old.RAG {
48+
var ragCfg RAGConfig
49+
types.CloneThroughJSON(oldRAG, &ragCfg)
50+
config.RAG[name] = RAGToolset{
51+
Toolset: Toolset{
52+
Type: "rag",
53+
RAGConfig: &ragCfg,
54+
},
55+
}
56+
}
57+
2958
return config, nil
3059
}

0 commit comments

Comments
 (0)