model-runner/cmd/cli/commands/install-runner.go at a2db150733469e180249b0f8f4e1f59183ff628f · docker/model-runner · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
package commands

import (
	"context"
	"errors"
	"fmt"
	"os"
	"strings"
	"time"

	"github.com/docker/model-runner/cmd/cli/commands/completion"
	"github.com/docker/model-runner/cmd/cli/desktop"
	gpupkg "github.com/docker/model-runner/cmd/cli/pkg/gpu"
	"github.com/docker/model-runner/cmd/cli/pkg/standalone"
	"github.com/docker/model-runner/cmd/cli/pkg/types"
	"github.com/docker/model-runner/pkg/inference/backends/diffusers"
	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
	"github.com/docker/model-runner/pkg/inference/backends/vllm"
	"github.com/docker/model-runner/pkg/inference/platform"
	"github.com/moby/moby/api/types/container"
	"github.com/spf13/cobra"
)

const (
	// installWaitTries controls how many times the automatic installation will
	// try to reach the model runner while waiting for it to be ready.
	installWaitTries = 20
	// installWaitRetryInterval controls the interval at which automatic
	// installation will try to reach the model runner while waiting for it to
	// be ready.
	installWaitRetryInterval = 500 * time.Millisecond
	backendUsage             = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "). Default: " + llamacpp.Name
)

// waitForStandaloneRunnerAfterInstall waits for a standalone model runner
// container to come online after installation. The CPU version can take about a
// second to start serving requests once the container has started, the CUDA
// version can take several seconds.
func waitForStandaloneRunnerAfterInstall(ctx context.Context) error {
	for tries := installWaitTries; tries > 0; tries-- {
		if status := desktopClient.Status(); status.Error == nil && status.Running {
			return nil
		}
		select {
		case <-time.After(installWaitRetryInterval):
		case <-ctx.Done():
			return errors.New("cancelled waiting for standalone model runner to initialize")
		}
	}
	return errors.New("standalone model runner took too long to initialize")
}

// standaloneRunner encodes the standalone runner configuration, if one exists.
type standaloneRunner struct {
	// hostPort is the port that the runner is listening to on the host.
	hostPort uint16
	// gatewayIP is the gateway IP address that the runner is listening on.
	//
	// TODO(thaJeztah): consider changing this to a netip.Addr
	gatewayIP string
	// gatewayPort is the gateway port that the runner is listening on.
	gatewayPort uint16
}

// inspectStandaloneRunner inspects a standalone runner container and extracts
// its configuration.
func inspectStandaloneRunner(container container.Summary) *standaloneRunner {
	result := &standaloneRunner{}
	for _, port := range container.Ports {
		if port.IP.IsLoopback() {
			result.hostPort = port.PublicPort
		} else {
			// We don't really have a good way of knowing what the gateway IP
			// address is, but in the standard standalone configuration we only
			// bind to two interfaces: 127.0.0.1 and the gateway interface.
			if port.IP.IsValid() {
				result.gatewayIP = port.IP.String()
			}
			result.gatewayPort = port.PublicPort
		}
	}
	return result
}

// ensureStandaloneRunnerAvailable is a utility function that other commands can
// use to initialize a default standalone model runner. It is a no-op in
// unsupported contexts or if automatic installs have been disabled.
func ensureStandaloneRunnerAvailable(ctx context.Context, printer standalone.StatusPrinter, debug bool) (*standaloneRunner, error) {
	// If the model runner context wasn't initialized, then don't do anything.
	if modelRunner == nil {
		return nil, nil
	}

	// If we're not in a supported model runner context, then don't do anything.
	engineKind := modelRunner.EngineKind()
	standaloneSupported := engineKind == types.ModelRunnerEngineKindMoby ||
		engineKind == types.ModelRunnerEngineKindCloud
	if !standaloneSupported {
		return nil, nil
	}

	// If automatic installation has been disabled, then don't do anything.
	if os.Getenv("MODEL_RUNNER_NO_AUTO_INSTALL") != "" {
		return nil, nil
	}

	// Ensure that the output printer is non-nil.
	if printer == nil {
		printer = standalone.NoopPrinter()
	}

	// Create a Docker client for the active context.
	dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
	if err != nil {
		return nil, fmt.Errorf("failed to create Docker client: %w", err)
	}

	// Check if a model runner container exists.
	containerID, _, container, err := standalone.FindControllerContainer(ctx, dockerClient)
	if err != nil {
		return nil, fmt.Errorf("unable to identify existing standalone model runner: %w", err)
	} else if containerID != "" {
		return inspectStandaloneRunner(container), nil
	}

	// Automatically determine GPU support.
	gpu, err := gpupkg.ProbeGPUSupport(ctx, dockerClient)
	if err != nil {
		return nil, fmt.Errorf("unable to probe GPU support: %w", err)
	}

	// Ensure that we have an up-to-date copy of the image.
	if err := standalone.EnsureControllerImage(ctx, dockerClient, gpu, "", printer); err != nil {
		return nil, fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
	}

	// Ensure that we have a model storage volume.
	modelStorageVolume, err := standalone.EnsureModelStorageVolume(ctx, dockerClient, printer)
	if err != nil {
		return nil, fmt.Errorf("unable to initialize standalone model storage: %w", err)
	}

	// Create the model runner container.
	port := uint16(standalone.DefaultControllerPortMoby)
	// For auto-installation, always bind to localhost for security.
	// Users can run install-runner explicitly with --host to change this.
	host := "127.0.0.1"
	environment := "moby"
	if engineKind == types.ModelRunnerEngineKindCloud {
		port = standalone.DefaultControllerPortCloud
		environment = "cloud"
	}
	// TLS is disabled by default for auto-installation
	tlsOpts := standalone.TLSOptions{Enabled: false}
	if err := standalone.CreateControllerContainer(ctx, dockerClient, port, host, environment, false, gpu, "", modelStorageVolume, printer, engineKind, debug, false, "", tlsOpts); err != nil {
		return nil, fmt.Errorf("unable to initialize standalone model runner container: %w", err)
	}

	// Poll until we get a response from the model runner.
	if err := waitForStandaloneRunnerAfterInstall(ctx); err != nil {
		return nil, err
	}

	// Find the runner container.
	//
	// TODO: We should actually find this before calling
	// waitForStandaloneRunnerAfterInstall (or have CreateControllerContainer
	// return the container information), and probably pass the target
	// information info waitForStandaloneRunnerAfterInstall, but let's wait
	// until we do listener port customization / detection in the next PR.
	containerID, _, container, err = standalone.FindControllerContainer(ctx, dockerClient)
	if err != nil {
		return nil, fmt.Errorf("unable to identify existing standalone model runner: %w", err)
	} else if containerID == "" {
		return nil, errors.New("standalone model runner not found after installation")
	}
	return inspectStandaloneRunner(container), nil
}

// withStandaloneRunner wraps a command's RunE to ensure the standalone runner
// is available before executing the command. This is a no-op in unsupported
// contexts (e.g., Docker Desktop) or if automatic installations have been disabled.
func withStandaloneRunner(cmd *cobra.Command) *cobra.Command {
	if cmd.RunE == nil {
		return cmd
	}
	originalRunE := cmd.RunE
	cmd.RunE = func(cmd *cobra.Command, args []string) error {
		if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), false); err != nil {
			return fmt.Errorf("unable to initialize standalone model runner: %w", err)
		}
		return originalRunE(cmd, args)
	}
	return cmd
}

// getStandaloneRunner returns the standalone runner info by finding the controller container.
// This is useful for commands that need runner details after withStandaloneRunner has run.
// Returns nil for non-standalone contexts (e.g., Docker Desktop).
func getStandaloneRunner(ctx context.Context) (*standaloneRunner, error) {
	// Only standalone contexts have a runner container to inspect.
	engineKind := modelRunner.EngineKind()
	standaloneSupported := engineKind == types.ModelRunnerEngineKindMoby ||
		engineKind == types.ModelRunnerEngineKindCloud
	if !standaloneSupported {
		return nil, nil
	}

	if dockerCLI == nil {
		return nil, nil
	}

	dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
	if err != nil {
		return nil, fmt.Errorf("failed to create Docker client: %w", err)
	}
	containerID, _, ctr, err := standalone.FindControllerContainer(ctx, dockerClient)
	if err != nil {
		return nil, fmt.Errorf("unable to find standalone model runner: %w", err)
	}
	if containerID == "" {
		return nil, nil
	}
	return inspectStandaloneRunner(ctr), nil
}

// runnerOptions holds common configuration for install/start/reinstall commands
type runnerOptions struct {
	port            uint16
	host            string
	gpuMode         string
	backend         string
	doNotTrack      bool
	pullImage       bool
	pruneContainers bool
	proxyCert       string
	tls             bool
	tlsPort         uint16
	tlsCert         string
	tlsKey          string
}

// runInstallOrStart is shared logic for install-runner and start-runner commands
func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error {
	// On macOS ARM64, the vllm backend requires deferred installation
	// (on-demand via the running model runner), not as a standalone container.
	if opts.backend == vllm.Name && platform.SupportsVLLMMetal() {
		cmd.Println("Installing vllm backend...")
		if err := desktopClient.InstallBackend(vllm.Name); err != nil {
			return fmt.Errorf("failed to install vllm backend: %w", err)
		}
		cmd.Println("vllm backend installed successfully")
		return nil
	}

	// On macOS/Windows, the llama.cpp backend uses deferred installation.
	// Trigger installation (and binary update) via the running model runner.
	if opts.backend == llamacpp.Name && llamacpp.NeedsDeferredInstall() {
		cmd.Println("Installing llama.cpp backend...")
		if err := desktopClient.InstallBackend(llamacpp.Name); err != nil {
			return fmt.Errorf("failed to install llama.cpp backend: %w", err)
		}
		cmd.Println("llama.cpp backend installed successfully")
		return nil
	}

	// The diffusers backend uses deferred installation: it pulls a Docker
	// image, extracts a self-contained Python environment, and installs it
	// to a well-known local folder. Trigger installation via the running
	// model runner's API, the same way vllm-metal is handled above.
	if opts.backend == diffusers.Name && platform.SupportsDiffusers() {
		// For standalone contexts (Moby/Cloud), ensure a base runner is
		// available first so we have an API endpoint to call.
		engineKind := modelRunner.EngineKind()
		if engineKind == types.ModelRunnerEngineKindMoby || engineKind == types.ModelRunnerEngineKindCloud {
			if _, err := ensureStandaloneRunnerAvailable(cmd.Context(), asPrinter(cmd), debug); err != nil {
				return fmt.Errorf("unable to initialize standalone model runner: %w", err)
			}
		}

		cmd.Println("Installing diffusers backend...")
		if err := desktopClient.InstallBackend(diffusers.Name); err != nil {
			return fmt.Errorf("failed to install diffusers backend: %w", err)
		}
		cmd.Println("diffusers backend installed successfully")
		return nil
	}

	var vllmOnWSL bool
	// Ensure that we're running in a supported model runner context.
	engineKind := modelRunner.EngineKind()
	if engineKind == types.ModelRunnerEngineKindDesktop {
		if opts.backend == vllm.Name && desktop.IsDesktopWSLContext(cmd.Context(), dockerCLI) {
			engineKind = types.ModelRunnerEngineKindMoby
			vllmOnWSL = true
		} else {
			// TODO: We may eventually want to auto-forward this to
			// docker desktop enable model-runner, but we should first make
			// sure the CLI flags match.
			cmd.Println("Standalone installation not supported with Docker Desktop")
			cmd.Println("Use `docker desktop enable model-runner` instead")
			return nil
		}
	}

	port := opts.port
	if port == 0 {
		// Use "0" as a sentinel default flag value so it's not displayed automatically.
		// The default values are written in the usage string.
		// Hence, the user currently won't be able to set the port to 0 in order to get a random available port.
		port = standalone.DefaultControllerPortMoby
	}
	// HACK: If we're in a Cloud context, then we need to use a
	// different default port because it conflicts with Docker Desktop's
	// default model runner host-side port. Unfortunately we can't make
	// the port flag default dynamic (at least not easily) because of
	// when context detection happens. So assume that a default value
	// indicates that we want the Cloud default port. This is less
	// problematic in Cloud since the UX there is mostly invisible.
	if engineKind == types.ModelRunnerEngineKindCloud &&
		port == standalone.DefaultControllerPortMoby {
		port = standalone.DefaultControllerPortCloud
	}

	// Set the appropriate environment.
	environment := "moby"
	if engineKind == types.ModelRunnerEngineKindCloud {
		environment = "cloud"
	}

	// Create a Docker client for the active context.
	dockerClient, err := desktop.DockerClientForContext(dockerCLI, dockerCLI.CurrentContext())
	if err != nil {
		return fmt.Errorf("failed to create Docker client: %w", err)
	}

	// If pruning containers (reinstall), remove any existing model runner containers.
	if opts.pruneContainers {
		if err := standalone.PruneControllerContainers(cmd.Context(), dockerClient, false, asPrinter(cmd)); err != nil {
			return fmt.Errorf("unable to remove model runner container(s): %w", err)
		}
	} else {
		// Check if an active model runner container already exists (install only).
		if ctrID, ctrName, _, err := standalone.FindControllerContainer(cmd.Context(), dockerClient); err != nil {
			return err
		} else if ctrID != "" {
			if ctrName != "" {
				cmd.Printf("Model Runner container %s (%s) is already running\n", ctrName, ctrID[:12])
			} else {
				cmd.Printf("Model Runner container %s is already running\n", ctrID[:12])
			}
			return nil
		}
	}

	// Determine GPU support.
	var gpu gpupkg.GPUSupport
	if opts.gpuMode == "auto" {
		gpu, err = gpupkg.ProbeGPUSupport(cmd.Context(), dockerClient)
		if err != nil {
			return fmt.Errorf("unable to probe GPU support: %w", err)
		}
	} else if opts.gpuMode == "cuda" {
		gpu = gpupkg.GPUSupportCUDA
	} else if opts.gpuMode == "rocm" {
		gpu = gpupkg.GPUSupportROCm
	} else if opts.gpuMode == "musa" {
		gpu = gpupkg.GPUSupportMUSA
	} else if opts.gpuMode == "cann" {
		gpu = gpupkg.GPUSupportCANN
	} else if opts.gpuMode != "none" {
		return fmt.Errorf("unknown GPU specification: %q", opts.gpuMode)
	}

	// Validate backend selection
	validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name}
	if opts.backend != "" {
		isValid := false
		for _, valid := range validBackends {
			if opts.backend == valid {
				isValid = true
				break
			}
		}
		if !isValid {
			return fmt.Errorf("unknown backend: %q (supported: %s)", opts.backend, strings.Join(validBackends, ", "))
		}
	}

	// Validate backend-GPU compatibility (only on Linux; macOS ARM64 uses Metal)
	if opts.backend == vllm.Name && !platform.SupportsVLLMMetal() && gpu != gpupkg.GPUSupportCUDA {
		return fmt.Errorf("--backend vllm requires CUDA GPU support (--gpu=cuda or auto-detected CUDA)")
	}

	// Ensure that we have an up-to-date copy of the image, if requested.
	if opts.pullImage {
		if err := standalone.EnsureControllerImage(cmd.Context(), dockerClient, gpu, opts.backend, asPrinter(cmd)); err != nil {
			return fmt.Errorf("unable to pull latest standalone model runner image: %w", err)
		}
	}

	// Ensure that we have a model storage volume.
	modelStorageVolume, err := standalone.EnsureModelStorageVolume(cmd.Context(), dockerClient, asPrinter(cmd))
	if err != nil {
		return fmt.Errorf("unable to initialize standalone model storage: %w", err)
	}

	// Build TLS options
	tlsOpts := standalone.TLSOptions{
		Enabled:  opts.tls,
		Port:     opts.tlsPort,
		CertPath: opts.tlsCert,
		KeyPath:  opts.tlsKey,
	}

	// Create the model runner container.
	if err := standalone.CreateControllerContainer(cmd.Context(), dockerClient, port, opts.host, environment, opts.doNotTrack, gpu, opts.backend, modelStorageVolume, asPrinter(cmd), engineKind, debug, vllmOnWSL, opts.proxyCert, tlsOpts); err != nil {
		return fmt.Errorf("unable to initialize standalone model runner container: %w", err)
	}

	// Poll until we get a response from the model runner.
	return waitForStandaloneRunnerAfterInstall(cmd.Context())
}

func newInstallRunner() *cobra.Command {
	var port uint16
	var host string
	var gpuMode string
	var backend string
	var doNotTrack bool
	var debug bool
	var proxyCert string
	var tlsEnabled bool
	var tlsPort uint16
	var tlsCert string
	var tlsKey string
	c := &cobra.Command{
		Use:   "install-runner",
		Short: "Install Docker Model Runner (Docker Engine only)",
		RunE: func(cmd *cobra.Command, args []string) error {
			return runInstallOrStart(cmd, runnerOptions{
				port:            port,
				host:            host,
				gpuMode:         gpuMode,
				backend:         backend,
				doNotTrack:      doNotTrack,
				pullImage:       true,
				pruneContainers: false,
				proxyCert:       proxyCert,
				tls:             tlsEnabled,
				tlsPort:         tlsPort,
				tlsCert:         tlsCert,
				tlsKey:          tlsKey,
			}, debug)
		},
		ValidArgsFunction: completion.NoComplete,
	}
	addRunnerFlags(c, runnerFlagOptions{
		Port:       &port,
		Host:       &host,
		GpuMode:    &gpuMode,
		Backend:    &backend,
		DoNotTrack: &doNotTrack,
		Debug:      &debug,
		ProxyCert:  &proxyCert,
		TLS:        &tlsEnabled,
		TLSPort:    &tlsPort,
		TLSCert:    &tlsCert,
		TLSKey:     &tlsKey,
	})
	return c
}