src/core/metrics/format.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

import type { StepMetrics, TurnMetrics, Usage } from "@dispatch/wire";
import type { CacheRateView, StepMetricsView, TurnMetricsView } from "./types";

function formatTokens(n: number): string {
	return n.toLocaleString("en-US");
}

function formatDuration(ms: number | undefined): string | null {
	if (ms === undefined || ms <= 0) return null;
	if (ms < 1000) return `${Math.round(ms)}ms`;
	return `${(ms / 1000).toFixed(1)}s`;
}

function formatTps(tps: number | null): string | null {
	if (tps === null) return null;
	if (tps < 10) return `${tps.toFixed(1)} tok/s`;
	return `${Math.round(tps)} tok/s`;
}

/**
 * Format the current context size for display. A defined count renders as
 * `"<n> tokens in context"` (thousands-separated); `undefined` ("unknown" — no
 * per-step usage reported yet) renders the placeholder `"context size unknown"`.
 * Never renders `0` for the unknown case.
 */
export function formatContextSize(n: number | undefined): string {
	if (n === undefined) return "context size unknown";
	return `${formatTokens(n)} tokens in context`;
}

/**
 * Compact token count for a slim status bar: `812`, `12.3k`, `1.2M`. Full
 * thousands-separated numbers live elsewhere; this trades precision for width.
 */
export function formatCompactTokens(n: number): string {
	if (n < 1000) return `${n}`;
	if (n < 1_000_000) {
		const k = n / 1000;
		return `${k >= 100 ? Math.round(k) : k.toFixed(1)}k`;
	}
	const m = n / 1_000_000;
	return `${m >= 100 ? Math.round(m) : m.toFixed(1)}M`;
}

/**
 * Context-window occupancy: the current size against a max window limit.
 *
 * `current` is the latest turn's context size (0 when unknown); `max` is the
 * model's window limit (or `null` when unknown). `percent` is
 * `current / max * 100` clamped to [0, 100], UNROUNDED (the UI picks the
 * precision) — so a few-thousand-token context against a 1,000,000 window still
 * reads non-zero. `percent` is `null` when `max` is unknown (no bar/denominator).
 */
export interface ContextUsage {
	readonly current: number;
	readonly max: number | null;
	readonly percent: number | null;
}

export function computeContextUsage(
	contextSize: number | undefined,
	contextLimit: number | null | undefined,
): ContextUsage {
	const current = contextSize ?? 0;
	const max = typeof contextLimit === "number" && contextLimit > 0 ? contextLimit : null;
	const percent = max === null ? null : Math.max(0, Math.min(100, (current / max) * 100));
	return { current, max, percent };
}

/** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */
export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null {
	if (elapsedMs === undefined || elapsedMs <= 0) return null;
	return outputTokens / (elapsedMs / 1000);
}

function totalTokens(u: Usage): number {
	return u.inputTokens + u.outputTokens;
}

function formatBreakdown(u: Usage): string {
	let s = `${formatTokens(u.inputTokens)} in / ${formatTokens(u.outputTokens)} out`;
	if (u.cacheReadTokens !== undefined && u.cacheReadTokens > 0) {
		s += ` / ${formatTokens(u.cacheReadTokens)} cache`;
	}
	return s;
}

/** Build a formatted view of a single step's metrics. */
export function viewStepMetrics(step: StepMetrics, index: number): StepMetricsView {
	const total = totalTokens(step.usage);
	const tps = computeTps(step.usage.outputTokens, step.decodeMs ?? step.genTotalMs);
	return {
		label: `step ${index + 1}`,
		tokensLabel: `${formatTokens(total)} tok`,
		tps: formatTps(tps),
		ttft: formatDuration(step.ttftMs),
		decode: formatDuration(step.decodeMs),
		genTotal: formatDuration(step.genTotalMs),
	};
}

/**
 * Cache hit rate as a 0..100 integer percentage: `cacheReadTokens / inputTokens`,
 * clamped to [0,1]. Absent cache field counts as 0; a 0% rate is legitimate (not
 * missing data). Returns 0 when there are no input tokens.
 */
export function computeCachePct(u: Usage): number {
	const read = u.cacheReadTokens ?? 0;
	if (u.inputTokens <= 0) return 0;
	const rate = read / u.inputTokens;
	const clamped = rate < 0 ? 0 : rate > 1 ? 1 : rate;
	return Math.round(clamped * 100);
}

/** Colour severity for a cache hit percentage (badge colour). */
function cacheLevel(pct: number): "success" | "warning" | "error" {
	if (pct >= 66) return "success";
	if (pct >= 33) return "warning";
	return "error";
}

/** Build a view of a cache hit rate (percentage + colour level + hit flag). */
export function viewCacheRate(u: Usage): CacheRateView {
	const pct = computeCachePct(u);
	return { pct, level: cacheLevel(pct), isHit: (u.cacheReadTokens ?? 0) > 0 };
}

/**
 * Expected cache (retention): of the cache that existed going INTO this turn, how
 * much was read back — `clamp01(cacheRead_N / (cacheRead_{N-1} + cacheWrite_{N-1}))`.
 * The denominator is the PRIOR turn's cached prefix (what it read + what it wrote).
 * Ideally ~100% on every turn after the first; <100% = the cache busted/expired.
 *
 * Returns `null` when it cannot be derived: no prior turn (`prev === null`) or the
 * prior turn cached nothing (denominator <= 0) — distinct from a real 0%.
 */
export function computeExpectedCachePct(current: Usage, prev: Usage | null): number | null {
	if (prev === null) return null;
	const denom = (prev.cacheReadTokens ?? 0) + (prev.cacheWriteTokens ?? 0);
	if (denom <= 0) return null;
	const read = current.cacheReadTokens ?? 0;
	const rate = read / denom;
	const clamped = rate < 0 ? 0 : rate > 1 ? 1 : rate;
	return Math.round(clamped * 100);
}

/**
 * Build a view of the cross-turn retention (percentage + colour level + hit flag),
 * or `null` when it can't be derived (see `computeExpectedCachePct`).
 */
export function viewExpectedCache(current: Usage, prev: Usage | null): CacheRateView | null {
	const pct = computeExpectedCachePct(current, prev);
	if (pct === null) return null;
	return { pct, level: cacheLevel(pct), isHit: (current.cacheReadTokens ?? 0) > 0 };
}

/** Build a formatted view of a turn's aggregate metrics. */
export function viewTurnMetrics(turn: TurnMetrics, turnNumber?: number): TurnMetricsView {
	const total = totalTokens(turn.usage);
	let totalGenMs: number | undefined;
	for (const step of turn.steps) {
		const stepMs = step.decodeMs ?? step.genTotalMs;
		if (stepMs !== undefined) {
			totalGenMs = (totalGenMs ?? 0) + stepMs;
		}
	}
	const tps = computeTps(turn.usage.outputTokens, totalGenMs);
	return {
		label: turnNumber !== undefined ? `turn ${turnNumber}` : "turn",
		tokensLabel: `${formatTokens(total)} tok`,
		breakdown: formatBreakdown(turn.usage),
		tps: formatTps(tps),
		duration: formatDuration(turn.durationMs),
	};
}