Skip to content

Commit 1304519

Browse files
authored
Merge pull request #812 from docker/docker-model-gateway
Add model-cli gateway
2 parents 95247fe + 0d9fc40 commit 1304519

24 files changed

Lines changed: 4722 additions & 4 deletions

.github/workflows/e2e-test.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,12 @@ jobs:
3434
if: matrix.os == 'ubuntu-latest'
3535
uses: docker/setup-docker-action@1a6edb0ba9ac496f6850236981f15d8f9a82254d
3636

37+
- name: Install build dependencies (Linux)
38+
if: matrix.os == 'ubuntu-latest'
39+
run: sudo apt-get update && sudo apt-get install -y libssl-dev pkg-config
40+
41+
- name: Set up Rust
42+
uses: dtolnay/rust-toolchain@stable
43+
3744
- name: Run e2e tests
3845
run: make e2e

cmd/cli/.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
model-cli
22
.idea/
3-
dist/
3+
dist/
4+
commands/gateway_lib/

cmd/cli/Makefile

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: all build clean link mock unit-tests docs
1+
.PHONY: all build build-gateway clean link mock unit-tests docs
22

33
BINARY_NAME=model-cli
44

@@ -10,11 +10,22 @@ VERSION ?=
1010
MACOS_MIN_VERSION := 14.0
1111
MACOS_MIN_VERSION_LDFLAG := -mmacosx-version-min=$(MACOS_MIN_VERSION)
1212

13+
GATEWAY_RUST_DIR := ../../model-cli
14+
# The static library produced by cargo and consumed by CGo.
15+
GATEWAY_LIB_DIR := commands/gateway_lib
16+
1317
all: build
1418

15-
build:
19+
build-gateway:
20+
@echo "Building gateway static library (Rust)..."
21+
@mkdir -p $(GATEWAY_LIB_DIR)
22+
cargo build --release --manifest-path $(GATEWAY_RUST_DIR)/Cargo.toml
23+
@cp $(GATEWAY_RUST_DIR)/target/release/libmodel_cli_gateway.a $(GATEWAY_LIB_DIR)/libgateway.a
24+
@echo "Gateway library staged at $(GATEWAY_LIB_DIR)/libgateway.a"
25+
26+
build: build-gateway
1627
@echo "Building $(BINARY_NAME)..."
17-
go build -ldflags="-s -w -X github.com/docker/model-runner/cmd/cli/desktop.Version=$(shell git describe --tags --always --dirty --match 'v*')" -o $(BINARY_NAME) .
28+
CGO_ENABLED=1 go build -tags gateway -ldflags="-s -w -X github.com/docker/model-runner/cmd/cli/desktop.Version=$(shell git describe --tags --always --dirty --match 'v*')" -o $(BINARY_NAME) .
1829

1930
link:
2031
@if [ ! -f $(BINARY_NAME) ]; then \
@@ -69,6 +80,7 @@ unit-tests:
6980
clean:
7081
@echo "Cleaning up..."
7182
@rm -f $(BINARY_NAME)
83+
@rm -rf $(GATEWAY_LIB_DIR)
7284
@echo "Cleaned!"
7385

7486
docs:

cmd/cli/commands/gateway.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//go:build gateway
2+
3+
package commands
4+
5+
/*
6+
#cgo LDFLAGS: -L${SRCDIR}/gateway_lib -lgateway -lm
7+
#cgo darwin LDFLAGS: -framework CoreFoundation -framework Security -framework SystemConfiguration
8+
#cgo linux LDFLAGS: -lpthread -ldl -lssl -lcrypto
9+
10+
#include <stdlib.h>
11+
12+
extern int run_gateway(int argc, const char **argv);
13+
*/
14+
import "C"
15+
16+
import (
17+
"fmt"
18+
"unsafe"
19+
20+
"github.com/spf13/cobra"
21+
)
22+
23+
func newGatewayCmd() *cobra.Command {
24+
return &cobra.Command{
25+
Use: "gateway",
26+
Short: "Run an OpenAI-compatible LLM gateway",
27+
Long: `Run an OpenAI-compatible LLM gateway that routes requests to configured providers.
28+
29+
Supported providers include Docker Model Runner, Ollama, OpenAI, Anthropic,
30+
Groq, Mistral, Azure OpenAI, and many more OpenAI-compatible endpoints.`,
31+
// Pass all flags straight through to the Rust arg parser.
32+
DisableFlagParsing: true,
33+
SilenceUsage: true,
34+
RunE: func(cmd *cobra.Command, args []string) error {
35+
// Build a C argv: ["model-cli"] + args
36+
cArgs := make([]*C.char, 0, len(args)+1)
37+
cArgs = append(cArgs, C.CString("model-cli"))
38+
for _, a := range args {
39+
cArgs = append(cArgs, C.CString(a))
40+
}
41+
defer func() {
42+
for _, p := range cArgs {
43+
C.free(unsafe.Pointer(p))
44+
}
45+
}()
46+
47+
rc := C.run_gateway(C.int(len(cArgs)), (**C.char)(unsafe.Pointer(&cArgs[0])))
48+
if rc != 0 {
49+
return fmt.Errorf("gateway exited with code %d", rc)
50+
}
51+
return nil
52+
},
53+
}
54+
}

cmd/cli/commands/gateway_stub.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//go:build !gateway
2+
3+
package commands
4+
5+
import (
6+
"errors"
7+
8+
"github.com/spf13/cobra"
9+
)
10+
11+
// newGatewayCmd returns a metadata-only stub used by the docs generator and
12+
// any build that was compiled without -tags gateway (i.e. without the Rust
13+
// static library). The command is fully described so that 'make docs' can
14+
// generate correct reference documentation, but it exits with an error if
15+
// actually invoked.
16+
func newGatewayCmd() *cobra.Command {
17+
var (
18+
config string
19+
host string
20+
port uint16
21+
verbose bool
22+
)
23+
24+
c := &cobra.Command{
25+
Use: "gateway",
26+
Short: "Run an OpenAI-compatible LLM gateway",
27+
Long: `Run an OpenAI-compatible LLM gateway that routes requests to configured providers.
28+
29+
Supported providers include Docker Model Runner, Ollama, OpenAI, Anthropic,
30+
Groq, Mistral, Azure OpenAI, and many more OpenAI-compatible endpoints.`,
31+
RunE: func(cmd *cobra.Command, args []string) error {
32+
return errors.New("gateway is not available in this build; rebuild with 'make build-cli'")
33+
},
34+
}
35+
36+
c.Flags().StringVarP(&config, "config", "c", "", "Path to the YAML configuration file")
37+
c.Flags().StringVar(&host, "host", "0.0.0.0", "Host address to bind to")
38+
c.Flags().Uint16VarP(&port, "port", "p", 4000, "Port to listen on")
39+
c.Flags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose (debug) logging")
40+
_ = c.MarkFlagRequired("config")
41+
42+
return c
43+
}

cmd/cli/commands/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ func NewRootCmd(cli *command.DockerCli) *cobra.Command {
9696
newSearchCmd(),
9797
newSkillsCmd(),
9898
)
99+
rootCmd.AddCommand(newGatewayCmd())
99100

100101
// Commands that require a running model runner. These are wrapped to ensure the standalone runner is available.
101102
for _, cmd := range []*cobra.Command{

cmd/cli/docs/reference/docker_model.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ plink: docker.yaml
88
cname:
99
- docker model bench
1010
- docker model df
11+
- docker model gateway
1112
- docker model inspect
1213
- docker model install-runner
1314
- docker model launch
@@ -36,6 +37,7 @@ cname:
3637
clink:
3738
- docker_model_bench.yaml
3839
- docker_model_df.yaml
40+
- docker_model_gateway.yaml
3941
- docker_model_inspect.yaml
4042
- docker_model_install-runner.yaml
4143
- docker_model_launch.yaml
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
command: docker model gateway
2+
short: Run an OpenAI-compatible LLM gateway
3+
long: |-
4+
`docker model gateway` starts a local OpenAI-compatible HTTP gateway that routes
5+
requests to one or more configured LLM providers. It supports Docker Model Runner
6+
as a first-class provider, alongside Ollama, OpenAI, Anthropic, Groq, Mistral,
7+
Azure OpenAI, and many other OpenAI-compatible endpoints.
8+
9+
The gateway is configured through a YAML file that declares the model list,
10+
provider routing, load-balancing, retries, and fallbacks.
11+
12+
### Configuration file format
13+
14+
```yaml
15+
model_list:
16+
- model_name: <alias exposed to clients>
17+
params:
18+
model: <provider>/<upstream-model-name>
19+
api_base: <optional base URL override>
20+
api_key: <optional key or os.environ/VAR_NAME>
21+
22+
general_settings:
23+
master_key: <optional API key required by clients>
24+
num_retries: <optional integer, default 0>
25+
fallbacks:
26+
- <primary-alias>: [<fallback-alias>, ...]
27+
```
28+
29+
The `model` field under `params` uses the format `provider/model-name`.
30+
Supported provider prefixes include: `docker_model_runner`, `openai`,
31+
`anthropic`, `ollama`, `groq`, `mistral`, `together_ai`, `deepseek`,
32+
`fireworks_ai`, `openrouter`, `perplexity`, `xai`, `nvidia_nim`,
33+
`cerebras`, `sambanova`, `deepinfra`, `azure`, `azure_ai`, `vllm`,
34+
`lm_studio`, `huggingface`.
35+
36+
API keys can be supplied inline, as `os.environ/VAR_NAME` references, or as
37+
`${VAR_NAME}` references. The gateway resolves well-known environment variables
38+
automatically (for example, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`).
39+
usage: docker model gateway
40+
pname: docker model
41+
plink: docker_model.yaml
42+
options:
43+
- option: config
44+
shorthand: c
45+
value_type: string
46+
description: Path to the YAML configuration file
47+
deprecated: false
48+
hidden: false
49+
experimental: false
50+
experimentalcli: false
51+
kubernetes: false
52+
swarm: false
53+
- option: host
54+
value_type: string
55+
default_value: 0.0.0.0
56+
description: Host address to bind to
57+
deprecated: false
58+
hidden: false
59+
experimental: false
60+
experimentalcli: false
61+
kubernetes: false
62+
swarm: false
63+
- option: port
64+
shorthand: p
65+
value_type: uint16
66+
default_value: "4000"
67+
description: Port to listen on
68+
deprecated: false
69+
hidden: false
70+
experimental: false
71+
experimentalcli: false
72+
kubernetes: false
73+
swarm: false
74+
- option: verbose
75+
shorthand: v
76+
value_type: bool
77+
default_value: "false"
78+
description: Enable verbose (debug) logging
79+
deprecated: false
80+
hidden: false
81+
experimental: false
82+
experimentalcli: false
83+
kubernetes: false
84+
swarm: false
85+
examples: |-
86+
### Route requests to Docker Model Runner
87+
88+
```yaml
89+
model_list:
90+
- model_name: smollm2
91+
params:
92+
model: docker_model_runner/ai/smollm2
93+
api_base: http://localhost:12434/engines/llama.cpp/v1
94+
```
95+
96+
```console
97+
$ docker model gateway --config config.yaml
98+
```
99+
100+
The gateway starts on `http://0.0.0.0:4000`. Send requests using any
101+
OpenAI-compatible client:
102+
103+
```console
104+
$ curl http://localhost:4000/v1/chat/completions \
105+
-H "Content-Type: application/json" \
106+
-d '{
107+
"model": "smollm2",
108+
"messages": [{"role": "user", "content": "Hello"}]
109+
}'
110+
```
111+
112+
### Route requests to multiple providers with fallback
113+
114+
```yaml
115+
model_list:
116+
- model_name: fast
117+
params:
118+
model: groq/llama-3.1-8b-instant
119+
api_key: os.environ/GROQ_API_KEY
120+
- model_name: smart
121+
params:
122+
model: openai/gpt-4o
123+
api_key: os.environ/OPENAI_API_KEY
124+
- model_name: local
125+
params:
126+
model: docker_model_runner/ai/smollm2
127+
api_base: http://localhost:12434/engines/llama.cpp/v1
128+
129+
general_settings:
130+
num_retries: 2
131+
fallbacks:
132+
- fast: [local]
133+
- smart: [fast, local]
134+
```
135+
136+
```console
137+
$ docker model gateway --config config.yaml --port 8080
138+
```
139+
140+
### Secure the gateway with an API key
141+
142+
```yaml
143+
model_list:
144+
- model_name: smollm2
145+
params:
146+
model: docker_model_runner/ai/smollm2
147+
api_base: http://localhost:12434/engines/llama.cpp/v1
148+
149+
general_settings:
150+
master_key: os.environ/GATEWAY_API_KEY
151+
```
152+
153+
```console
154+
$ GATEWAY_API_KEY=my-secret docker model gateway --config config.yaml
155+
```
156+
157+
Clients must then pass the key as a Bearer token or via the `x-api-key` header:
158+
159+
```console
160+
$ curl http://localhost:4000/v1/chat/completions \
161+
-H "Content-Type: application/json" \
162+
-d '{"model": "smollm2", "messages": [{"role": "user", "content": "Hi"}]}'
163+
```
164+
165+
### Use a custom host and port
166+
167+
```console
168+
$ docker model gateway --config config.yaml --host 127.0.0.1 --port 9000
169+
```
170+
171+
### Enable debug logging
172+
173+
```console
174+
$ docker model gateway --config config.yaml --verbose
175+
```
176+
deprecated: false
177+
hidden: false
178+
experimental: false
179+
experimentalcli: false
180+
kubernetes: false
181+
swarm: false
182+

cmd/cli/docs/reference/model.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Docker Model Runner
99
|:------------------------------------------------|:-----------------------------------------------------------------------|
1010
| [`bench`](model_bench.md) | Benchmark a model's performance at different concurrency levels |
1111
| [`df`](model_df.md) | Show Docker Model Runner disk usage |
12+
| [`gateway`](model_gateway.md) | Run an OpenAI-compatible LLM gateway |
1213
| [`inspect`](model_inspect.md) | Display detailed information on one model |
1314
| [`install-runner`](model_install-runner.md) | Install Docker Model Runner (Docker Engine only) |
1415
| [`launch`](model_launch.md) | Launch an app configured to use Docker Model Runner |

0 commit comments

Comments
 (0)