@@ -97,6 +97,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc {
9797 m ["GET " + inference .InferencePrefix + "/v1/models" ] = h .handleModels
9898 m ["GET " + inference .InferencePrefix + "/v1/models/{name...}" ] = h .handleModels
9999
100+ m ["POST " + inference .InferencePrefix + "/install-backend" ] = h .InstallBackend
100101 m ["GET " + inference .InferencePrefix + "/status" ] = h .GetBackendStatus
101102 m ["GET " + inference .InferencePrefix + "/ps" ] = h .GetRunningBackends
102103 m ["GET " + inference .InferencePrefix + "/df" ] = h .GetDiskUsage
@@ -211,6 +212,8 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
211212 // shutting down (since that will also cancel the request context).
212213 // Either way, provide a response, even if it's ignored.
213214 http .Error (w , "service unavailable" , http .StatusServiceUnavailable )
215+ } else if errors .Is (err , errBackendNotInstalled ) {
216+ http .Error (w , fmt .Sprintf ("backend %q is not installed; run: docker model install-runner --backend %s" , backend .Name (), backend .Name ()), http .StatusPreconditionFailed )
214217 } else if errors .Is (err , vllm .ErrorNotFound ) {
215218 http .Error (w , err .Error (), http .StatusPreconditionFailed )
216219 } else {
@@ -336,6 +339,38 @@ func (h *HTTPHandler) Unload(w http.ResponseWriter, r *http.Request) {
336339 }
337340}
338341
342+ // installBackendRequest is the JSON body for the install-backend endpoint.
343+ type installBackendRequest struct {
344+ Backend string `json:"backend"`
345+ }
346+
347+ // InstallBackend handles POST <inference-prefix>/install-backend requests.
348+ // It triggers on-demand installation of a deferred backend.
349+ func (h * HTTPHandler ) InstallBackend (w http.ResponseWriter , r * http.Request ) {
350+ body , err := io .ReadAll (http .MaxBytesReader (w , r .Body , maximumOpenAIInferenceRequestSize ))
351+ if err != nil {
352+ http .Error (w , "failed to read request body" , http .StatusInternalServerError )
353+ return
354+ }
355+
356+ var req installBackendRequest
357+ if err := json .Unmarshal (body , & req ); err != nil || req .Backend == "" {
358+ http .Error (w , "invalid request: backend is required" , http .StatusBadRequest )
359+ return
360+ }
361+
362+ if err := h .scheduler .InstallBackend (r .Context (), req .Backend ); err != nil {
363+ if errors .Is (err , ErrBackendNotFound ) {
364+ http .Error (w , err .Error (), http .StatusNotFound )
365+ } else {
366+ http .Error (w , fmt .Sprintf ("backend installation failed: %v" , err ), http .StatusInternalServerError )
367+ }
368+ return
369+ }
370+
371+ w .WriteHeader (http .StatusOK )
372+ }
373+
339374// Configure handles POST <inference-prefix>/{backend}/_configure requests.
340375func (h * HTTPHandler ) Configure (w http.ResponseWriter , r * http.Request ) {
341376 // Determine the requested backend and ensure that it's valid.
0 commit comments