-
Notifications
You must be signed in to change notification settings - Fork 116
Expand file tree
/
Copy pathinstall-runner.go
More file actions
472 lines (431 loc) · 17.4 KB
/
install-runner.go
File metadata and controls
472 lines (431 loc) · 17.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
package commands
import (
"context"
"errors"
"fmt"
"os"
"strings"
"time"
"github.com/docker/model-runner/cmd/cli/commands/completion"
"github.com/docker/model-runner/cmd/cli/desktop"
gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
"github.com/docker/model-runner/cmd/cli/pkg/standalone"
"github.com/docker/model-runner/cmd/cli/pkg/types"
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
"github.com/docker/model-runner/pkg/inference/backends/vllm"
"github.com/docker/model-runner/pkg/inference/platform"
"github.com/moby/moby/api/types/container"
"github.com/spf13/cobra"
)
const (
// installWaitTries controls how many times the automatic installation will
// try to reach the model runner while waiting for it to be ready.
installWaitTries = 20
// installWaitRetryInterval controls the interval at which automatic
// installation will try to reach the model runner while waiting for it to
// be ready.
installWaitRetryInterval = 500 * time.Millisecond
backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "). Default: " + llamacpp.Name
)
// waitForStandaloneRunnerAfterInstall waits for a standalone model runner
// container to come online after installation. The CPU version can take about a
// second to start serving requests once the container has started, the CUDA
// version can take several seconds.
func waitForStandaloneRunnerAfterInstall(ctx context.Context) error {
for tries := installWaitTries; tries > 0; tries-- {
if status := desktopClient.Status(); status.Error == nil && status.Running {
return nil
}
select {
case <-time.After(installWaitRetryInterval):
case <-ctx.Done():
return errors.New("cancelled waiting for standalone model runner to initialize")
}
}
return errors.New("standalone model runner took too long to initialize")
}
// standaloneRunner encodes the standalone runner configuration, if one exists.
type standaloneRunner struct {
// hostPort is the port that the runner is listening to on the host.
hostPort uint16
// gatewayIP is the gateway IP address that the runner is listening on.
//
// TODO(thaJeztah): consider changing this to a netip.Addr
gatewayIP string
// gatewayPort is the gateway port that the runner is listening on.
gatewayPort uint16
}
// inspectStandaloneRunner inspects a standalone runner container and extracts
// its configuration.
func inspectStandaloneRunner(container container.Summary) *standaloneRunner {
result := &standaloneRunner{}
for _, port := range container.Ports {
if port.IP.IsLoopback() {
result.hostPort = port.PublicPort
} else {
// We don't really have a good way of knowing what the gateway IP
// address is, but in the standard standalone configuration we only
// bind to two interfaces: 127.0.0.1 and the gateway interface.
if port.IP.IsValid() {
result.gatewayIP = port.IP.String()
}
result.gatewayPort = port.PublicPort
}
}
return result
}
// ensureStandaloneRunnerAvailable is a utility function that other commands can
// use to initialize a default standalone model runner. It is a no-op in
// unsupported contexts or if automatic installs have been disabled.
func ensureStandaloneRunnerAvailable(ctx context.Context, printer standalone.StatusPrinter, debug bool) (*standaloneRunner, error) {
// If the model runner context wasn't initialized, then don't do anything.
if modelRunner == nil {
return nil, nil
}
// If we're not in a supported model runner context, then don't do anything.
engineKind := modelRunner.EngineKind()
standaloneSupported := engineKind == types.ModelRunnerEngineKindMoby ||
engineKind == types.ModelRunnerEngineKindCloud
if !standaloneSupported {
return nil, nil
}
// If automatic installation has been disabled, then don't do anything.
if os.Getenv("MODEL_RUNNER_NO_AUTO_INSTALL") != "" {
return nil, nil
}
// Ensure that the output printer is non-nil.
if printer == nil {
printer = standalone.NoopPrinter()
}
// Create a Docker client for the active context.
dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
if err != nil {
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
// Check if a model runner container exists.
containerID, _, container, err := standalone.FindControllerContainer(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to identify existing standalone model runner: %w", err)
} else if containerID != "" {
return inspectStandaloneRunner(container), nil
}
// Automatically determine GPU support.
gpu, err := gpupkg.ProbeGPUSupport(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to probe GPU support: %w", err)
}
// Ensure that we have an up-to-date copy of the image.
if err := standalone.EnsureControllerImage(ctx, dockerClient, gpu, "", printer); err != nil {
return nil, fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
}
// Ensure that we have a model storage volume.
modelStorageVolume, err := standalone.EnsureModelStorageVolume(ctx, dockerClient, printer)
if err != nil {
return nil, fmt.Errorf("unable to initialize standalone model storage: %w", err)
}
// Create the model runner container.
port := uint16(standalone.DefaultControllerPortMoby)
// For auto-installation, always bind to localhost for security.
// Users can run install-runner explicitly with --host to change this.
host := "127.0.0.1"
environment := "moby"
if engineKind == types.ModelRunnerEngineKindCloud {
port = standalone.DefaultControllerPortCloud
environment = "cloud"
}
// TLS is disabled by default for auto-installation
tlsOpts := standalone.TLSOptions{Enabled: false}
if err := standalone.CreateControllerContainer(ctx, dockerClient, port, host, environment, false, gpu, "", modelStorageVolume, printer, engineKind, debug, false, "", tlsOpts); err != nil {
return nil, fmt.Errorf("unable to initialize standalone model runner container: %w", err)
}
// Poll until we get a response from the model runner.
if err := waitForStandaloneRunnerAfterInstall(ctx); err != nil {
return nil, err
}
// Find the runner container.
//
// TODO: We should actually find this before calling
// waitForStandaloneRunnerAfterInstall (or have CreateControllerContainer
// return the container information), and probably pass the target
// information info waitForStandaloneRunnerAfterInstall, but let's wait
// until we do listener port customization / detection in the next PR.
containerID, _, container, err = standalone.FindControllerContainer(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to identify existing standalone model runner: %w", err)
} else if containerID == "" {
return nil, errors.New("standalone model runner not found after installation")
}
return inspectStandaloneRunner(container), nil
}
// withStandaloneRunner wraps a command's RunE to ensure the standalone runner
// is available before executing the command. This is a no-op in unsupported
// contexts (e.g., Docker Desktop) or if automatic installations have been disabled.
func withStandaloneRunner(cmd *cobra.Command) *cobra.Command {
if cmd.RunE == nil {
return cmd
}
originalRunE := cmd.RunE
cmd.RunE = func(cmd *cobra.Command, args []string) error {
if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), false); err != nil {
return fmt.Errorf("unable to initialize standalone model runner: %w", err)
}
return originalRunE(cmd, args)
}
return cmd
}
// getStandaloneRunner returns the standalone runner info by finding the controller container.
// This is useful for commands that need runner details after withStandaloneRunner has run.
// Returns nil for non-standalone contexts (e.g., Docker Desktop).
func getStandaloneRunner(ctx context.Context) (*standaloneRunner, error) {
// Only standalone contexts have a runner container to inspect.
engineKind := modelRunner.EngineKind()
standaloneSupported := engineKind == types.ModelRunnerEngineKindMoby ||
engineKind == types.ModelRunnerEngineKindCloud
if !standaloneSupported {
return nil, nil
}
if dockerCLI == nil {
return nil, nil
}
dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
if err != nil {
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
containerID, _, ctr, err := standalone.FindControllerContainer(ctx, dockerClient)
if err != nil {
return nil, fmt.Errorf("unable to find standalone model runner: %w", err)
}
if containerID == "" {
return nil, nil
}
return inspectStandaloneRunner(ctr), nil
}
// runnerOptions holds common configuration for install/start/reinstall commands
type runnerOptions struct {
port uint16
host string
gpuMode string
backend string
doNotTrack bool
pullImage bool
pruneContainers bool
proxyCert string
tls bool
tlsPort uint16
tlsCert string
tlsKey string
}
// runInstallOrStart is shared logic for install-runner and start-runner commands
func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error {
// On macOS ARM64, the vllm backend requires deferred installation
// (on-demand via the running model runner), not as a standalone container.
if opts.backend == vllm.Name && platform.SupportsVLLMMetal() {
cmd.Println("Installing vllm backend...")
if err := desktopClient.InstallBackend(vllm.Name); err != nil {
return fmt.Errorf("failed to install vllm backend: %w", err)
}
cmd.Println("vllm backend installed successfully")
return nil
}
// On macOS/Windows, the llama.cpp backend uses deferred installation.
// Trigger installation (and binary update) via the running model runner.
if opts.backend == llamacpp.Name && llamacpp.NeedsDeferredInstall() {
cmd.Println("Installing llama.cpp backend...")
if err := desktopClient.InstallBackend(llamacpp.Name); err != nil {
return fmt.Errorf("failed to install llama.cpp backend: %w", err)
}
cmd.Println("llama.cpp backend installed successfully")
return nil
}
// The diffusers backend uses deferred installation: it pulls a Docker
// image, extracts a self-contained Python environment, and installs it
// to a well-known local folder. Trigger installation via the running
// model runner's API, the same way vllm-metal is handled above.
if opts.backend == diffusers.Name && platform.SupportsDiffusers() {
// For standalone contexts (Moby/Cloud), ensure a base runner is
// available first so we have an API endpoint to call.
engineKind := modelRunner.EngineKind()
if engineKind == types.ModelRunnerEngineKindMoby || engineKind == types.ModelRunnerEngineKindCloud {
if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
return fmt.Errorf("unable to initialize standalone model runner: %w", err)
}
}
cmd.Println("Installing diffusers backend...")
if err := desktopClient.InstallBackend(diffusers.Name); err != nil {
return fmt.Errorf("failed to install diffusers backend: %w", err)
}
cmd.Println("diffusers backend installed successfully")
return nil
}
var vllmOnWSL bool
// Ensure that we're running in a supported model runner context.
engineKind := modelRunner.EngineKind()
if engineKind == types.ModelRunnerEngineKindDesktop {
if opts.backend == vllm.Name && desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) {
engineKind = types.ModelRunnerEngineKindMoby
vllmOnWSL = true
} else {
// TODO: We may eventually want to auto-forward this to
// docker desktop enable model-runner, but we should first make
// sure the CLI flags match.
cmd.Println("Standalone installation not supported with Docker Desktop")
cmd.Println("Use `docker desktop enable model-runner` instead")
return nil
}
}
port := opts.port
if port == 0 {
// Use "0" as a sentinel default flag value so it's not displayed automatically.
// The default values are written in the usage string.
// Hence, the user currently won't be able to set the port to 0 in order to get a random available port.
port = standalone.DefaultControllerPortMoby
}
// HACK: If we're in a Cloud context, then we need to use a
// different default port because it conflicts with Docker Desktop's
// default model runner host-side port. Unfortunately we can't make
// the port flag default dynamic (at least not easily) because of
// when context detection happens. So assume that a default value
// indicates that we want the Cloud default port. This is less
// problematic in Cloud since the UX there is mostly invisible.
if engineKind == types.ModelRunnerEngineKindCloud &&
port == standalone.DefaultControllerPortMoby {
port = standalone.DefaultControllerPortCloud
}
// Set the appropriate environment.
environment := "moby"
if engineKind == types.ModelRunnerEngineKindCloud {
environment = "cloud"
}
// Create a Docker client for the active context.
dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
if err != nil {
return fmt.Errorf("failed to create Docker client: %w", err)
}
// If pruning containers (reinstall), remove any existing model runner containers.
if opts.pruneContainers {
if err := standalone.PruneControllerContainers(cmd.Context(), dockerClient, false, asPrinter(cmd)); err != nil {
return fmt.Errorf("unable to remove model runner container(s): %w", err)
}
} else {
// Check if an active model runner container already exists (install only).
if ctrID, ctrName, _, err := standalone.FindControllerContainer(cmd.Context(), dockerClient); err != nil {
return err
} else if ctrID != "" {
if ctrName != "" {
cmd.Printf("Model Runner container %s (%s) is already running\n", ctrName, ctrID[:12])
} else {
cmd.Printf("Model Runner container %s is already running\n", ctrID[:12])
}
return nil
}
}
// Determine GPU support.
var gpu gpupkg.GPUSupport
if opts.gpuMode == "auto" {
gpu, err = gpupkg.ProbeGPUSupport(cmd.Context(), dockerClient)
if err != nil {
return fmt.Errorf("unable to probe GPU support: %w", err)
}
} else if opts.gpuMode == "cuda" {
gpu = gpupkg.GPUSupportCUDA
} else if opts.gpuMode == "rocm" {
gpu = gpupkg.GPUSupportROCm
} else if opts.gpuMode == "musa" {
gpu = gpupkg.GPUSupportMUSA
} else if opts.gpuMode == "cann" {
gpu = gpupkg.GPUSupportCANN
} else if opts.gpuMode != "none" {
return fmt.Errorf("unknown GPU specification: %q", opts.gpuMode)
}
// Validate backend selection
validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name}
if opts.backend != "" {
isValid := false
for _, valid := range validBackends {
if opts.backend == valid {
isValid = true
break
}
}
if !isValid {
return fmt.Errorf("unknown backend: %q (supported: %s)", opts.backend, strings.Join(validBackends, ", "))
}
}
// Validate backend-GPU compatibility (only on Linux; macOS ARM64 uses Metal)
if opts.backend == vllm.Name && !platform.SupportsVLLMMetal() && gpu != gpupkg.GPUSupportCUDA {
return fmt.Errorf("--backend vllm requires CUDA GPU support (--gpu=cuda or auto-detected CUDA)")
}
// Ensure that we have an up-to-date copy of the image, if requested.
if opts.pullImage {
if err := standalone.EnsureControllerImage(cmd.Context(), dockerClient, gpu, opts.backend, asPrinter(cmd)); err != nil {
return fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
}
}
// Ensure that we have a model storage volume.
modelStorageVolume, err := standalone.EnsureModelStorageVolume(cmd.Context(), dockerClient, asPrinter(cmd))
if err != nil {
return fmt.Errorf("unable to initialize standalone model storage: %w", err)
}
// Build TLS options
tlsOpts := standalone.TLSOptions{
Enabled: opts.tls,
Port: opts.tlsPort,
CertPath: opts.tlsCert,
KeyPath: opts.tlsKey,
}
// Create the model runner container.
if err := standalone.CreateControllerContainer(cmd.Context(), dockerClient, port, opts.host, environment, opts.doNotTrack, gpu, opts.backend, modelStorageVolume, asPrinter(cmd), engineKind, debug, vllmOnWSL, opts.proxyCert, tlsOpts); err != nil {
return fmt.Errorf("unable to initialize standalone model runner container: %w", err)
}
// Poll until we get a response from the model runner.
return waitForStandaloneRunnerAfterInstall(cmd.Context())
}
func newInstallRunner() *cobra.Command {
var port uint16
var host string
var gpuMode string
var backend string
var doNotTrack bool
var debug bool
var proxyCert string
var tlsEnabled bool
var tlsPort uint16
var tlsCert string
var tlsKey string
c := &cobra.Command{
Use: "install-runner",
Short: "Install Docker Model Runner (Docker Engine only)",
RunE: func(cmd *cobra.Command, args []string) error {
return runInstallOrStart(cmd, runnerOptions{
port: port,
host: host,
gpuMode: gpuMode,
backend: backend,
doNotTrack: doNotTrack,
pullImage: true,
pruneContainers: false,
proxyCert: proxyCert,
tls: tlsEnabled,
tlsPort: tlsPort,
tlsCert: tlsCert,
tlsKey: tlsKey,
}, debug)
},
ValidArgsFunction: completion.NoComplete,
}
addRunnerFlags(c, runnerFlagOptions{
Port: &port,
Host: &host,
GpuMode: &gpuMode,
Backend: &backend,
DoNotTrack: &doNotTrack,
Debug: &debug,
ProxyCert: &proxyCert,
TLS: &tlsEnabled,
TLSPort: &tlsPort,
TLSCert: &tlsCert,
TLSKey: &tlsKey,
})
return c
}