summaryrefslogtreecommitdiffhomepage
path: root/packages/console/app/src
diff options
context:
space:
mode:
authorFrank <[email protected]>2026-04-17 09:54:44 -0400
committerFrank <[email protected]>2026-04-17 09:54:47 -0400
commit3707e4a49cb97639408a9e0da7cf148ca5ce8834 (patch)
treea0d91540f8a639ff09c32ba9f9032af5129d1a53 /packages/console/app/src
parentcb425ac927cde685ef9b6e38a62ad71c408a47df (diff)
downloadopencode-3707e4a49cb97639408a9e0da7cf148ca5ce8834.tar.gz
opencode-3707e4a49cb97639408a9e0da7cf148ca5ce8834.zip
zen: routing logic
Diffstat (limited to 'packages/console/app/src')
-rw-r--r--packages/console/app/src/routes/zen/util/handler.ts24
-rw-r--r--packages/console/app/src/routes/zen/util/modelTpmLimiter.ts49
2 files changed, 71 insertions, 2 deletions
diff --git a/packages/console/app/src/routes/zen/util/handler.ts b/packages/console/app/src/routes/zen/util/handler.ts
index d1c5985a8..2e576eaf6 100644
--- a/packages/console/app/src/routes/zen/util/handler.ts
+++ b/packages/console/app/src/routes/zen/util/handler.ts
@@ -45,6 +45,7 @@ import { LiteData } from "@opencode-ai/console-core/lite.js"
import { Resource } from "@opencode-ai/console-resource"
import { i18n, type Key } from "~/i18n"
import { localeFromRequest } from "~/lib/language"
+import { createModelTpmLimiter } from "./modelTpmLimiter"
type ZenData = Awaited<ReturnType<typeof ZenData.list>>
type RetryOptions = {
@@ -121,6 +122,8 @@ export async function handler(
const authInfo = await authenticate(modelInfo, zenApiKey)
const billingSource = validateBilling(authInfo, modelInfo)
logger.metric({ source: billingSource })
+ const modelTpmLimiter = createModelTpmLimiter(modelInfo.providers)
+ const modelTpmLimits = await modelTpmLimiter?.check()
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
const providerInfo = selectProvider(
@@ -133,6 +136,7 @@ export async function handler(
trialProviders,
retry,
stickyProvider,
+ modelTpmLimits,
)
validateModelSettings(billingSource, authInfo)
updateProviderKey(authInfo, providerInfo)
@@ -229,6 +233,7 @@ export async function handler(
const usageInfo = providerInfo.normalizeUsage(json.usage)
const costInfo = calculateCost(modelInfo, usageInfo)
await trialLimiter?.track(usageInfo)
+ await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
await reload(billingSource, authInfo, costInfo)
json.cost = calculateOccurredCost(billingSource, costInfo)
@@ -278,6 +283,7 @@ export async function handler(
const usageInfo = providerInfo.normalizeUsage(usage)
const costInfo = calculateCost(modelInfo, usageInfo)
await trialLimiter?.track(usageInfo)
+ await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
await reload(billingSource, authInfo, costInfo)
const cost = calculateOccurredCost(billingSource, costInfo)
@@ -433,12 +439,16 @@ export async function handler(
trialProviders: string[] | undefined,
retry: RetryOptions,
stickyProvider: string | undefined,
+ modelTpmLimits: Record<string, number> | undefined,
) {
const modelProvider = (() => {
+ // Byok is top priority b/c if user set their own API key, we should use it
+ // instead of using the sticky provider for the same session
if (authInfo?.provider?.credentials) {
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
}
+ // Always use the same provider for the same session
if (stickyProvider) {
const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider)
if (provider) return provider
@@ -451,10 +461,20 @@ export async function handler(
}
if (retry.retryCount !== MAX_FAILOVER_RETRIES) {
- const providers = modelInfo.providers
+ const allProviders = modelInfo.providers
.filter((provider) => !provider.disabled)
+ .filter((provider) => provider.weight !== 0)
.filter((provider) => !retry.excludeProviders.includes(provider.id))
- .flatMap((provider) => Array<typeof provider>(provider.weight ?? 1).fill(provider))
+ .filter((provider) => {
+ if (!provider.tpmLimit) return true
+ const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
+ return usage < provider.tpmLimit * 1_000_000
+ })
+
+ const topPriority = Math.min(...allProviders.map((p) => p.priority))
+ const providers = allProviders
+ .filter((p) => p.priority <= topPriority)
+ .flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider))
// Use the last 4 characters of session ID to select a provider
const identifier = sessionId.length ? sessionId : ip
diff --git a/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts b/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts
new file mode 100644
index 000000000..eeb89da5f
--- /dev/null
+++ b/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts
@@ -0,0 +1,49 @@
+import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
+import { ModelRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
+import { UsageInfo } from "./provider/provider"
+
+export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) {
+ const keys = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`)
+ if (keys.length === 0) return
+
+ const yyyyMMddHHmm = new Date(Date.now())
+ .toISOString()
+ .replace(/[^0-9]/g, "")
+ .substring(0, 12)
+
+ return {
+ check: async () => {
+ const data = await Database.use((tx) =>
+ tx
+ .select()
+ .from(ModelRateLimitTable)
+ .where(and(inArray(ModelRateLimitTable.key, keys), eq(ModelRateLimitTable.interval, yyyyMMddHHmm))),
+ )
+
+ // convert to map of model to count
+ return data.reduce(
+ (acc, curr) => {
+ acc[curr.key] = curr.count
+ return acc
+ },
+ {} as Record<string, number>,
+ )
+ },
+ track: async (id: string, model: string, usageInfo: UsageInfo) => {
+ const usage =
+ usageInfo.inputTokens +
+ usageInfo.outputTokens +
+ (usageInfo.reasoningTokens ?? 0) +
+ (usageInfo.cacheReadTokens ?? 0) +
+ (usageInfo.cacheWrite5mTokens ?? 0) +
+ (usageInfo.cacheWrite1hTokens ?? 0)
+ if (usage <= 0) return
+ await Database.use((tx) =>
+ tx
+ .insert(ModelRateLimitTable)
+ .values({ key: `${id}/${model}`, interval: yyyyMMddHHmm, count: usage })
+ .onDuplicateKeyUpdate({ set: { count: sql`${ModelRateLimitTable.count} + ${usage}` } }),
+ )
+ },
+ }
+}