From 2d276669a0cb41959fc67d17bc58e77853dc3eb5 Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Sun, 28 Jun 2026 12:43:29 +0900 Subject: feat(concurrency-fixes): usage-gate + adaptive headroom + configurable cooldown --- packages/kernel/src/contracts/index.ts | 1 + packages/kernel/src/contracts/provider.ts | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'packages/kernel/src') diff --git a/packages/kernel/src/contracts/index.ts b/packages/kernel/src/contracts/index.ts index 28e0a0b..fc19267 100644 --- a/packages/kernel/src/contracts/index.ts +++ b/packages/kernel/src/contracts/index.ts @@ -103,6 +103,7 @@ export type { ProviderEvent, ProviderStreamOptions, ProviderToolCallEvent, + ProviderUsage, ReasoningDeltaEvent, ReasoningEffort, TextDeltaEvent, diff --git a/packages/kernel/src/contracts/provider.ts b/packages/kernel/src/contracts/provider.ts index 3137073..dea6c17 100644 --- a/packages/kernel/src/contracts/provider.ts +++ b/packages/kernel/src/contracts/provider.ts @@ -103,6 +103,19 @@ export interface ProviderStreamOptions { readonly logger?: Logger; } +/** + * A snapshot of the provider's current upstream usage. Returned by a + * provider's optional `getUsage` so a concurrency limiter can gate slot grants + * on the REAL upstream in-flight count (not just the limiter's local accounting, + * which lags the upstream `concurrent_sessions` counter by the release + * cooldown). `concurrentSessions` is the number of requests the provider counts + * as currently in flight. + */ +export interface ProviderUsage { + /** Upstream count of currently in-flight (generating) sessions. */ + readonly concurrentSessions: number; +} + /** * Metadata describing a single model a provider can serve. Returned by * `listModels` so a catalog (e.g. the credential-store) can enumerate the @@ -154,4 +167,18 @@ export interface ProviderContract { * credentials in; today the provider uses the key it resolved at activate. */ readonly listModels?: () => Promise; + + /** + * Fetch the provider's current upstream usage snapshot. Optional: a provider + * that cannot (or chooses not to) report usage omits it, and a concurrency + * limiter falls back to cooldown-only slot recycling (no usage gate). When + * present, the limiter polls this before admitting a QUEUED agent and grants + * only when `concurrentSessions` is below the configured limit — preventing an + * N+1 overshoot from the upstream accounting lag. + * + * May return `undefined` (e.g. the endpoint returned an unexpected shape or a + * non-200) — the limiter treats `undefined` as "no usage info available" and + * falls back to granting (cooldown-only behavior) for that poll. + */ + readonly getUsage?: () => Promise; } -- cgit v1.2.3