Skip to content

Commit 8a7bb7c

Browse files
committed
zen: tpm routing
1 parent 22d33c5 commit 8a7bb7c

File tree

1 file changed

+4
-11
lines changed
  • packages/console/app/src/routes/zen/util

1 file changed

+4
-11
lines changed

packages/console/app/src/routes/zen/util/handler.ts

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -448,35 +448,28 @@ export async function handler(
448448
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
449449
}
450450

451-
// Filter out TPM limited providers
452-
const allProviders = modelInfo.providers.filter((provider) => {
453-
if (!provider.tpmLimit) return true
454-
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
455-
return usage < provider.tpmLimit * 1_000_000
456-
})
457-
458451
// Always use the same provider for the same session
459452
if (stickyProvider) {
460-
const provider = allProviders.find((provider) => provider.id === stickyProvider)
453+
const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider)
461454
if (provider) return provider
462455
}
463456

464457
if (trialProviders) {
465458
const trialProvider = trialProviders[Math.floor(Math.random() * trialProviders.length)]
466-
const provider = allProviders.find((provider) => provider.id === trialProvider)
459+
const provider = modelInfo.providers.find((provider) => provider.id === trialProvider)
467460
if (provider) return provider
468461
}
469462

470463
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
471464
let topPriority = Infinity
472-
const providers = allProviders
465+
const providers = modelInfo.providers
473466
.filter((provider) => !provider.disabled)
474467
.filter((provider) => provider.weight !== 0)
475468
.filter((provider) => !retry.excludeProviders.includes(provider.id))
476469
.filter((provider) => {
477470
if (!provider.tpmLimit) return true
478471
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
479-
return usage < provider.tpmLimit * 1_000_000 * 0.8
472+
return usage < provider.tpmLimit * 1_000_000
480473
})
481474
.map((provider) => {
482475
topPriority = Math.min(topPriority, provider.priority)

0 commit comments

Comments
 (0)