packages/throughput-store/src/aggregate.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

/**
 * Pure throughput aggregation.
 *
 * Per model, tokens-per-second is the TOKEN-WEIGHTED average:
 *
 *     tok/s = Σ(output_tokens) / Σ(generation_seconds)
 *
 * i.e. total tokens over total generation time across the period's turns. This
 * makes a turn that generated more tokens count proportionally more than a small
 * turn — large turns dominate, exactly as intended. Generation time is the pure
 * decode time (excludes tool-execution waits).
 */

export interface ThroughputSample {
	readonly model: string;
	/** Epoch-ms the turn completed. */
	readonly ts: number;
	/** Output tokens generated in the turn. */
	readonly outputTokens: number;
	/** Pure generation time for the turn (ms), summed across its steps. */
	readonly genMs: number;
}

export interface ModelThroughput {
	readonly model: string;
	/** Token-weighted average tokens/second over the period. */
	readonly tokensPerSecond: number;
	readonly totalOutputTokens: number;
	readonly totalGenMs: number;
	/** Number of turns that contributed. */
	readonly turns: number;
}

/**
 * Aggregate samples within the half-open range `[start, end)` into per-model
 * throughput, sorted by tok/s descending (ties broken by model name).
 */
export function aggregateSamples(
	samples: readonly ThroughputSample[],
	start: number,
	end: number,
): ModelThroughput[] {
	const byModel = new Map<string, { tokens: number; genMs: number; turns: number }>();

	for (const s of samples) {
		if (s.ts < start || s.ts >= end) continue;
		const acc = byModel.get(s.model) ?? { tokens: 0, genMs: 0, turns: 0 };
		acc.tokens += s.outputTokens;
		acc.genMs += s.genMs;
		acc.turns += 1;
		byModel.set(s.model, acc);
	}

	const result: ModelThroughput[] = [];
	for (const [model, acc] of byModel) {
		const tokensPerSecond = acc.genMs > 0 ? round2(acc.tokens / (acc.genMs / 1000)) : 0;
		result.push({
			model,
			tokensPerSecond,
			totalOutputTokens: acc.tokens,
			totalGenMs: acc.genMs,
			turns: acc.turns,
		});
	}

	result.sort((a, b) => b.tokensPerSecond - a.tokensPerSecond || a.model.localeCompare(b.model));
	return result;
}

function round2(n: number): number {
	return Math.round(n * 100) / 100;
}