blob: 5da7fd781dcb9cb080a795dc3dbf60fe47fbab9a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
import type { ProviderUsage } from "@dispatch/kernel";
import type { FetchLike } from "@dispatch/trace-replay";
/**
* Generic OpenAI-compatible usage fetch. The Umans `/v1/usage` endpoint returns:
*
* { usage: { concurrent_sessions: number }, limits: { concurrency: { limit, hard_cap } } }
*
* We extract only `concurrent_sessions` (the count a concurrency limiter gates
* on). Lives in this library (`@dispatch/openai-stream`) so any OpenAI-compatible
* provider extension reuses it without cross-extension code import
* (isolation-over-DRY: coupling is via this typed library surface). A provider
* supplies its own `id` (used in error labels) via `createOpenAICompatProvider`.
*/
/** The raw shape of the `/v1/usage` response (only the fields we read). */
interface UsageResponse {
readonly usage?: {
readonly concurrent_sessions?: number;
};
}
export interface GetUsageConfig {
readonly baseURL: string;
readonly apiKey: string;
readonly fetchFn?: FetchLike;
readonly providerId: string;
}
/**
* Fetch + map the upstream usage snapshot. Returns `undefined` on any error,
* non-200, or unexpected shape so the caller (the concurrency limiter) falls
* back to cooldown-only slot recycling (no usage gate) — never throws.
*
* Pure-ish I/O wrapper: the only effect is the injected fetch. Extracted for
* direct unit testing with a fake fetch.
*/
export async function getUsage(config: GetUsageConfig): Promise<ProviderUsage | undefined> {
const effectiveFetch: FetchLike = config.fetchFn ?? fetch;
const url = `${config.baseURL}/usage`;
let response: Response;
try {
response = await effectiveFetch(url, {
method: "GET",
headers: {
Authorization: `Bearer ${config.apiKey}`,
},
});
} catch {
// Network error — the upstream is unreachable; treat as "no usage info".
return undefined;
}
if (!response.ok) {
// 404 / 401 / 5xx — the endpoint is unsupported or rejected the request.
return undefined;
}
let body: UsageResponse;
try {
body = (await response.json()) as UsageResponse;
} catch {
return undefined;
}
const raw = body.usage?.concurrent_sessions;
if (typeof raw !== "number" || !Number.isFinite(raw) || raw < 0) {
return undefined;
}
return { concurrentSessions: Math.trunc(raw) };
}
|