Skip to content

Commit 6aca887

Browse files
authored
Merge pull request #621 from docker/add-default-runner-config
feat: add default runner configuration if none exists
2 parents 0f37ac7 + d8681f4 commit 6aca887

2 files changed

Lines changed: 20 additions & 3 deletions

File tree

pkg/distribution/internal/store/bundles.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,13 @@ func (s *LocalStore) BundleForModel(ref string) (types.ModelBundle, error) {
3030
return nil, fmt.Errorf("get model ID: %w", err)
3131
}
3232
path := s.bundlePath(dgst)
33-
if bdl, err := bundle.Parse(path); err != nil {
33+
bdl, err := bundle.Parse(path)
34+
if err != nil {
3435
// create for first time or replace bad/corrupted bundle
3536
return s.createBundle(path, mdl)
36-
} else {
37-
return bdl, nil
3837
}
38+
39+
return bdl, nil
3940
}
4041

4142
// createBundle unpacks the bundle to path, replacing existing bundle if one is found

pkg/inference/scheduling/loader.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,22 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
436436
}
437437
}
438438

439+
// If no explicit config exists, create a default one with the model's context size
440+
// so that the OpenAI recorder can report the actual configuration being used.
441+
if runnerConfig == nil {
442+
defaultConfig := inference.BackendConfiguration{}
443+
if l.modelManager != nil {
444+
if bundle, err := l.modelManager.GetBundle(modelID); err != nil {
445+
l.log.Warnf("Failed to get bundle for model %s to determine default context size: %v", modelID, err)
446+
} else if runtimeConfig := bundle.RuntimeConfig(); runtimeConfig != nil {
447+
if ctxSize := runtimeConfig.GetContextSize(); ctxSize != nil {
448+
defaultConfig.ContextSize = ctxSize
449+
}
450+
}
451+
}
452+
runnerConfig = &defaultConfig
453+
}
454+
439455
l.log.Infof("Loading %s backend runner with model %s in %s mode", backendName, modelID, mode)
440456

441457
// Acquire the loader lock and defer its release.

0 commit comments

Comments
 (0)