feat: add support for DDUF/diffusers backend selection in model scheduler (#809)

ilopezluna · web-flow · commit 722ec526dfc3 · 2026-03-30T14:55:47.000+02:00
diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go
@@ -206,7 +206,7 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
 		// Non-blocking call to track the model usage.
 		h.scheduler.tracker.TrackModel(model, r.UserAgent(), action)
 
-		// Automatically identify models for vLLM.
+		// Automatically select backend for given model.
 		backend = h.scheduler.selectBackendForModel(model, backend, request.Model)
 	}
 
diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/docker/model-runner/pkg/distribution/types"
 	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/backends/diffusers"
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
 	"github.com/docker/model-runner/pkg/inference/backends/mlx"
 	"github.com/docker/model-runner/pkg/inference/backends/sglang"
@@ -30,6 +31,7 @@ type PlatformSupport interface {
 	SupportsVLLM() bool
 	SupportsVLLMMetal() bool
 	SupportsSGLang() bool
+	SupportsDiffusers() bool
 }
 
 // defaultPlatformSupport delegates to the platform package.
@@ -39,6 +41,7 @@ func (defaultPlatformSupport) SupportsMLX() bool       { return platform.Support
 func (defaultPlatformSupport) SupportsVLLM() bool      { return platform.SupportsVLLM() }
 func (defaultPlatformSupport) SupportsVLLMMetal() bool { return platform.SupportsVLLMMetal() }
 func (defaultPlatformSupport) SupportsSGLang() bool    { return platform.SupportsSGLang() }
+func (defaultPlatformSupport) SupportsDiffusers() bool { return platform.SupportsDiffusers() }
 
 // Scheduler is used to coordinate inference scheduling across multiple backends
 // and models.
@@ -121,18 +124,18 @@ func (s *Scheduler) Run(ctx context.Context) error {
 }
 
 // selectBackendForModel selects the appropriate backend for a model based on its format.
-// If the model is in safetensors format, it will prefer the best available backend:
-// - vLLM (handles platform dispatch internally: vllm-metal on macOS ARM64, standard vLLM on Linux)
-// - MLX on macOS
-// - SGLang on Linux
+// For safetensors models, it prefers: vLLM > MLX > SGLang.
+// For DDUF/diffusers models, it selects the diffusers backend.
+// For other formats (e.g. GGUF), it returns the provided default backend.
 func (s *Scheduler) selectBackendForModel(model types.Model, backend inference.Backend, modelRef string) inference.Backend {
 	config, err := model.Config()
 	if err != nil {
 		s.log.Warn("failed to fetch model config", "error", err)
 		return backend
 	}
 
-	if config.GetFormat() == types.FormatSafetensors {
+	switch config.GetFormat() {
+	case types.FormatSafetensors:
 		// Prefer vLLM for safetensors models (handles platform dispatch internally)
 		if s.platformSupport.SupportsVLLM() || s.platformSupport.SupportsVLLMMetal() {
 			if vllmBackend, ok := s.backends[vllm.Name]; ok && vllmBackend != nil {
@@ -151,8 +154,32 @@ func (s *Scheduler) selectBackendForModel(model types.Model, backend inference.B
 				return sglangBackend
 			}
 		}
+		backendName := "none"
+		if backend != nil {
+			backendName = backend.Name()
+		}
 		s.log.Warn("Model is in safetensors format but no compatible backend is available",
-			"model", utils.SanitizeForLog(modelRef), "backend", backend.Name())
+			"model", utils.SanitizeForLog(modelRef), "backend", backendName)
+
+	case types.FormatDDUF, types.FormatDiffusers: //nolint:staticcheck // FormatDiffusers kept for backward compatibility
+		// Select the diffusers backend for DDUF and legacy diffusers format models
+		if s.platformSupport.SupportsDiffusers() {
+			if diffusersBackend, ok := s.backends[diffusers.Name]; ok && diffusersBackend != nil {
+				return diffusersBackend
+			}
+		}
+		backendName := "none"
+		if backend != nil {
+			backendName = backend.Name()
+		}
+		s.log.Warn("Model is in DDUF/diffusers format but no compatible backend is available",
+			"model", utils.SanitizeForLog(modelRef), "backend", backendName)
+
+	case types.FormatGGUF:
+		// GGUF models use the default backend (llamacpp)
+
+	default:
+		// Unknown formats use the default backend
 	}
 
 	return backend
diff --git a/pkg/inference/scheduling/select_backend_test.go b/pkg/inference/scheduling/select_backend_test.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/docker/model-runner/pkg/distribution/types"
 	"github.com/docker/model-runner/pkg/inference"
+	"github.com/docker/model-runner/pkg/inference/backends/diffusers"
 	"github.com/docker/model-runner/pkg/inference/backends/mlx"
 	"github.com/docker/model-runner/pkg/inference/backends/sglang"
 	"github.com/docker/model-runner/pkg/inference/backends/vllm"
@@ -17,12 +18,14 @@ type mockPlatformSupport struct {
 	vllm      bool
 	vllmMetal bool
 	sglang    bool
+	diffusers bool
 }
 
 func (m mockPlatformSupport) SupportsMLX() bool       { return m.mlx }
 func (m mockPlatformSupport) SupportsVLLM() bool      { return m.vllm }
 func (m mockPlatformSupport) SupportsVLLMMetal() bool { return m.vllmMetal }
 func (m mockPlatformSupport) SupportsSGLang() bool    { return m.sglang }
+func (m mockPlatformSupport) SupportsDiffusers() bool { return m.diffusers }
 
 // mockModel is a minimal Model implementation for testing.
 type mockModel struct {
@@ -55,9 +58,12 @@ func TestSelectBackendForModel(t *testing.T) {
 	mlxBackend := &mockBackend{name: mlx.Name}
 	vllmBackend := &mockBackend{name: vllm.Name}
 	sglangBackend := &mockBackend{name: sglang.Name}
+	diffusersBackend := &mockBackend{name: diffusers.Name}
 
 	safetensorsModel := &mockModel{config: &types.Config{Format: types.FormatSafetensors}}
 	ggufModel := &mockModel{config: &types.Config{Format: types.FormatGGUF}}
+	ddufModel := &mockModel{config: &types.Config{Format: types.FormatDDUF}}
+	legacyDiffusersModel := &mockModel{config: &types.Config{Format: types.FormatDiffusers}} //nolint:staticcheck // testing backward compatibility
 
 	tests := []struct {
 		name            string
@@ -153,6 +159,49 @@ func TestSelectBackendForModel(t *testing.T) {
 			model:           safetensorsModel,
 			expectedBackend: vllm.Name,
 		},
+		{
+			name: "DDUF model selects diffusers backend when platform supports it",
+			backends: map[string]inference.Backend{
+				"llamacpp":     llamacppBackend,
+				diffusers.Name: diffusersBackend,
+			},
+			defaultBackend:  llamacppBackend,
+			platform:        mockPlatformSupport{diffusers: true},
+			model:           ddufModel,
+			expectedBackend: diffusers.Name,
+		},
+		{
+			name: "DDUF model falls back to default when platform does not support diffusers",
+			backends: map[string]inference.Backend{
+				"llamacpp":     llamacppBackend,
+				diffusers.Name: diffusersBackend,
+			},
+			defaultBackend:  llamacppBackend,
+			platform:        mockPlatformSupport{diffusers: false},
+			model:           ddufModel,
+			expectedBackend: "llamacpp",
+		},
+		{
+			name: "DDUF model falls back to default when diffusers backend not registered",
+			backends: map[string]inference.Backend{
+				"llamacpp": llamacppBackend,
+			},
+			defaultBackend:  llamacppBackend,
+			platform:        mockPlatformSupport{diffusers: true},
+			model:           ddufModel,
+			expectedBackend: "llamacpp",
+		},
+		{
+			name: "legacy diffusers format model selects diffusers backend",
+			backends: map[string]inference.Backend{
+				"llamacpp":     llamacppBackend,
+				diffusers.Name: diffusersBackend,
+			},
+			defaultBackend:  llamacppBackend,
+			platform:        mockPlatformSupport{diffusers: true},
+			model:           legacyDiffusersModel,
+			expectedBackend: diffusers.Name,
+		},
 	}
 
 	for _, tt := range tests {

Original file line number	Diff line number	Diff line change
`@@ -206,7 +206,7 @@ func (h HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r http.Reque`
`206`	`206`	`// Non-blocking call to track the model usage.`
`207`	`207`	`h.scheduler.tracker.TrackModel(model, r.UserAgent(), action)`
`208`	`208`
`209`		`- // Automatically identify models for vLLM.`
	`209`	`+ // Automatically select backend for given model.`
`210`	`210`	`backend = h.scheduler.selectBackendForModel(model, backend, request.Model)`
`211`	`211`	`}`
`212`	`212`