diff options
| author | Frank <[email protected]> | 2026-04-17 09:54:44 -0400 |
|---|---|---|
| committer | Frank <[email protected]> | 2026-04-17 09:54:47 -0400 |
| commit | 3707e4a49cb97639408a9e0da7cf148ca5ce8834 (patch) | |
| tree | a0d91540f8a639ff09c32ba9f9032af5129d1a53 /packages/console/app/src | |
| parent | cb425ac927cde685ef9b6e38a62ad71c408a47df (diff) | |
| download | opencode-3707e4a49cb97639408a9e0da7cf148ca5ce8834.tar.gz opencode-3707e4a49cb97639408a9e0da7cf148ca5ce8834.zip | |
zen: routing logic
Diffstat (limited to 'packages/console/app/src')
| -rw-r--r-- | packages/console/app/src/routes/zen/util/handler.ts | 24 | ||||
| -rw-r--r-- | packages/console/app/src/routes/zen/util/modelTpmLimiter.ts | 49 |
2 files changed, 71 insertions, 2 deletions
diff --git a/packages/console/app/src/routes/zen/util/handler.ts b/packages/console/app/src/routes/zen/util/handler.ts index d1c5985a8..2e576eaf6 100644 --- a/packages/console/app/src/routes/zen/util/handler.ts +++ b/packages/console/app/src/routes/zen/util/handler.ts @@ -45,6 +45,7 @@ import { LiteData } from "@opencode-ai/console-core/lite.js" import { Resource } from "@opencode-ai/console-resource" import { i18n, type Key } from "~/i18n" import { localeFromRequest } from "~/lib/language" +import { createModelTpmLimiter } from "./modelTpmLimiter" type ZenData = Awaited<ReturnType<typeof ZenData.list>> type RetryOptions = { @@ -121,6 +122,8 @@ export async function handler( const authInfo = await authenticate(modelInfo, zenApiKey) const billingSource = validateBilling(authInfo, modelInfo) logger.metric({ source: billingSource }) + const modelTpmLimiter = createModelTpmLimiter(modelInfo.providers) + const modelTpmLimits = await modelTpmLimiter?.check() const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => { const providerInfo = selectProvider( @@ -133,6 +136,7 @@ export async function handler( trialProviders, retry, stickyProvider, + modelTpmLimits, ) validateModelSettings(billingSource, authInfo) updateProviderKey(authInfo, providerInfo) @@ -229,6 +233,7 @@ export async function handler( const usageInfo = providerInfo.normalizeUsage(json.usage) const costInfo = calculateCost(modelInfo, usageInfo) await trialLimiter?.track(usageInfo) + await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo) await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo) await reload(billingSource, authInfo, costInfo) json.cost = calculateOccurredCost(billingSource, costInfo) @@ -278,6 +283,7 @@ export async function handler( const usageInfo = providerInfo.normalizeUsage(usage) const costInfo = calculateCost(modelInfo, usageInfo) await trialLimiter?.track(usageInfo) + await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo) await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo) await reload(billingSource, authInfo, costInfo) const cost = calculateOccurredCost(billingSource, costInfo) @@ -433,12 +439,16 @@ export async function handler( trialProviders: string[] | undefined, retry: RetryOptions, stickyProvider: string | undefined, + modelTpmLimits: Record<string, number> | undefined, ) { const modelProvider = (() => { + // Byok is top priority b/c if user set their own API key, we should use it + // instead of using the sticky provider for the same session if (authInfo?.provider?.credentials) { return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider) } + // Always use the same provider for the same session if (stickyProvider) { const provider = modelInfo.providers.find((provider) => provider.id === stickyProvider) if (provider) return provider @@ -451,10 +461,20 @@ export async function handler( } if (retry.retryCount !== MAX_FAILOVER_RETRIES) { - const providers = modelInfo.providers + const allProviders = modelInfo.providers .filter((provider) => !provider.disabled) + .filter((provider) => provider.weight !== 0) .filter((provider) => !retry.excludeProviders.includes(provider.id)) - .flatMap((provider) => Array<typeof provider>(provider.weight ?? 1).fill(provider)) + .filter((provider) => { + if (!provider.tpmLimit) return true + const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0 + return usage < provider.tpmLimit * 1_000_000 + }) + + const topPriority = Math.min(...allProviders.map((p) => p.priority)) + const providers = allProviders + .filter((p) => p.priority <= topPriority) + .flatMap((provider) => Array<typeof provider>(provider.weight).fill(provider)) // Use the last 4 characters of session ID to select a provider const identifier = sessionId.length ? sessionId : ip diff --git a/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts b/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts new file mode 100644 index 000000000..eeb89da5f --- /dev/null +++ b/packages/console/app/src/routes/zen/util/modelTpmLimiter.ts @@ -0,0 +1,49 @@ +import { and, Database, eq, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js" +import { ModelRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js" +import { UsageInfo } from "./provider/provider" + +export function createModelTpmLimiter(providers: { id: string; model: string; tpmLimit?: number }[]) { + const keys = providers.filter((p) => p.tpmLimit).map((p) => `${p.id}/${p.model}`) + if (keys.length === 0) return + + const yyyyMMddHHmm = new Date(Date.now()) + .toISOString() + .replace(/[^0-9]/g, "") + .substring(0, 12) + + return { + check: async () => { + const data = await Database.use((tx) => + tx + .select() + .from(ModelRateLimitTable) + .where(and(inArray(ModelRateLimitTable.key, keys), eq(ModelRateLimitTable.interval, yyyyMMddHHmm))), + ) + + // convert to map of model to count + return data.reduce( + (acc, curr) => { + acc[curr.key] = curr.count + return acc + }, + {} as Record<string, number>, + ) + }, + track: async (id: string, model: string, usageInfo: UsageInfo) => { + const usage = + usageInfo.inputTokens + + usageInfo.outputTokens + + (usageInfo.reasoningTokens ?? 0) + + (usageInfo.cacheReadTokens ?? 0) + + (usageInfo.cacheWrite5mTokens ?? 0) + + (usageInfo.cacheWrite1hTokens ?? 0) + if (usage <= 0) return + await Database.use((tx) => + tx + .insert(ModelRateLimitTable) + .values({ key: `${id}/${model}`, interval: yyyyMMddHHmm, count: usage }) + .onDuplicateKeyUpdate({ set: { count: sql`${ModelRateLimitTable.count} + ${usage}` } }), + ) + }, + } +} |
