Skip to content

Commit f74a255

Browse files
committed
zen: tpm routing
1 parent 3e8abac commit f74a255

5 files changed

Lines changed: 2746 additions & 29 deletions

File tree

packages/console/app/src/routes/zen/util/handler.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -448,31 +448,40 @@ export async function handler(
448448
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
449449
}
450450

451+
// Filter out TPM limited providers
452+
const allProviders = modelInfo.providers.filter((provider) => {
453+
if (!provider.tpmLimit) return true
454+
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
455+
return usage < provider.tpmLimit * 1_000_000
456+
})
457+
451458
// Always use the same provider for the same session
452459
if (stickyProvider) {
453-
const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider)
460+
const provider = allProviders.find((provider) => provider.id === stickyProvider)
454461
if (provider) return provider
455462
}
456463

457464
if (trialProviders) {
458465
const trialProvider = trialProviders[Math.floor(Math.random() * trialProviders.length)]
459-
const provider = modelInfo.providers.find((provider) => provider.id === trialProvider)
466+
const provider = allProviders.find((provider) => provider.id === trialProvider)
460467
if (provider) return provider
461468
}
462469

463470
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
464-
const allProviders = modelInfo.providers
471+
let topPriority = Infinity
472+
const providers = allProviders
465473
.filter((provider) => !provider.disabled)
466474
.filter((provider) => provider.weight !== 0)
467475
.filter((provider) => !retry.excludeProviders.includes(provider.id))
468476
.filter((provider) => {
469477
if (!provider.tpmLimit) return true
470478
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
471-
return usage < provider.tpmLimit * 1_000_000
479+
return usage < provider.tpmLimit * 1_000_000 * 0.8
480+
})
481+
.map((provider) => {
482+
topPriority = Math.min(topPriority, provider.priority)
483+
return provider
472484
})
473-
474-
const topPriority = Math.min(...allProviders.map((p) => p.priority))
475-
const providers = allProviders
476485
.filter((p) => p.priority <= topPriority)
477486
.flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider))
478487

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,25 @@
11
import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
2-
import { ModelTpmLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
2+
import { ModelTpmRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
33
import { UsageInfo } from "./provider/provider"
44

55
export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) {
66
const ids = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`)
77
if (ids.length === 0) return
88

9-
const yyyyMMddHHmm = new Date(Date.now())
10-
.toISOString()
11-
.replace(/[^0-9]/g, "")
12-
.substring(0, 12)
9+
const yyyyMMddHHmm = parseInt(
10+
new Date(Date.now())
11+
.toISOString()
12+
.replace(/[^0-9]/g, "")
13+
.substring(0, 12),
14+
)
1315

1416
return {
1517
check: async () => {
1618
const data = await Database.use((tx) =>
1719
tx
1820
.select()
19-
.from(ModelTpmLimitTable)
20-
.where(
21-
inArray(
22-
ModelTpmLimitTable.id,
23-
ids.map((id) => formatId(id, yyyyMMddHHmm)),
24-
),
25-
),
21+
.from(ModelTpmRateLimitTable)
22+
.where(and(inArray(ModelTpmRateLimitTable.id, ids), eq(ModelTpmRateLimitTable.interval, yyyyMMddHHmm))),
2623
)
2724

2825
// convert to map of model to count
@@ -41,14 +38,10 @@ export function createModelTpmLimiter(providers: { id: string; model: string; tp
4138
if (usage <= 0) return
4239
await Database.use((tx) =>
4340
tx
44-
.insert(ModelTpmLimitTable)
45-
.values({ id: formatId(id, yyyyMMddHHmm), count: usage })
46-
.onDuplicateKeyUpdate({ set: { count: sql`${ModelTpmLimitTable.count} + ${usage}` } }),
41+
.insert(ModelTpmRateLimitTable)
42+
.values({ id, interval: yyyyMMddHHmm, count: usage })
43+
.onDuplicateKeyUpdate({ set: { count: sql`${ModelTpmRateLimitTable.count} + ${usage}` } }),
4744
)
4845
},
4946
}
50-
51-
function formatId(id: string, yyyyMMddHHmm: string) {
52-
return `${id.substring(0, 200)}/${yyyyMMddHHmm}`
53-
}
5447
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CREATE TABLE `model_tpm_rate_limit` (
2+
`id` varchar(255) PRIMARY KEY,
3+
`interval` bigint NOT NULL,
4+
`count` int NOT NULL
5+
);

0 commit comments

Comments
 (0)