1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
|
#!/usr/bin/env ruby
# frozen_string_literal: true
# Sample app: measure how much subscription "usage budget" each token
# currently consumes.
#
# Usage:
# bundle exec ruby examples/usage_per_token.rb
#
# How it works:
# 1. Pick a model.
# 2. Read your subscription usage report (OAuth Pro/Max only).
# 3. Send a tiny chat request: 'Reply with only the following: .'
# 4. Read the usage report again.
# 5. Compute the change in "used_fraction" per token, and extrapolate
# to a per-million-tokens rate. Anthropic's subscription accounting
# varies throughout the day depending on global load, so this gives
# you the *current* burn rate at this moment in time.
#
# Caveats:
# - Requires an OAuth (Pro/Max) login. Raw API keys have no per-account
# subscription quota, so usage_report returns nil and this script
# bails out.
# - Anthropic's usage aggregation lags a few seconds behind requests;
# the script polls the usage endpoint up to POLL_TIMEOUT_S.
require "bundler/setup"
require "dispatch/adapter/claude"
PROMPT = "Reply with only the following: ."
POLL_TIMEOUT_S = 30
POLL_INTERVAL_S = 2
# ── Pick a model ─────────────────────────────────────────────────────────────
known_models = Dispatch::Adapter::Claude::PricingTable.known_ids.sort
puts "\nAvailable models:"
known_models.each_with_index do |id, idx|
puts " #{(idx + 1).to_s.rjust(2)}) #{id}"
end
print "\nSelect a model (1-#{known_models.length}): "
choice = $stdin.gets&.strip
selected_index = begin
Integer(choice, 10) - 1
rescue StandardError
-1
end
if selected_index.negative? || selected_index >= known_models.length
warn "Invalid selection."
exit 1
end
model_id = known_models[selected_index]
puts "→ Using #{model_id}\n\n"
# ── Build the adapter (force OAuth — ignore ANTHROPIC_API_KEY) ───────────────
adapter = Dispatch::Adapter::Claude.new(model: model_id, api_key: nil)
status = adapter.authenticate!
puts "Auth: #{status}"
if status == :api_key
warn "\nThis example requires an OAuth (Claude Pro/Max) login."
warn "Raw API keys do not have per-account subscription quota data."
exit 1
end
# ── Helper: collapse a UsageReport into a {limit_id => used_fraction} hash ──
def used_fractions(report)
return {} if report.nil?
report.limits.each_with_object({}) do |entry, h|
next if entry.amount.nil? || entry.amount.used_fraction.nil?
h[entry.id] = {
label: entry.label,
used_fraction: entry.amount.used_fraction.to_f,
window_label: entry.window&.label,
window_resets_at: entry.window&.resets_at
}
end
end
# ── Probe BEFORE ─────────────────────────────────────────────────────────────
puts "\nFetching usage snapshot (before)..."
before_report = adapter.usage_report
before = used_fractions(before_report)
if before.empty?
warn "\n`adapter.usage_report` returned no data."
warn ""
warn "Possible causes:"
warn " - Anthropic's /api/oauth/usage endpoint is having an outage"
warn " (open issue: https://github.com/anthropics/claude-code/issues/30930)"
warn " - You're not on a Pro/Max plan (raw API keys: not supported)"
warn " - Refresh-token chain is broken; try `rm ~/.config/dispatch/claude/credentials.json`"
warn " and re-run to trigger a fresh OAuth login."
exit 1
end
puts "Current windows:"
before.each do |id, data|
pct = data[:used_fraction] * 100
resets = data[:window_resets_at] ? " (resets #{data[:window_resets_at]})" : ""
puts " #{data[:label] || id}: #{format("%.4f", pct)}%#{resets}"
end
# Show the EXACT raw payload from Anthropic so we can see precision.
require "json"
puts "\nRaw /api/oauth/usage payload (BEFORE):"
puts JSON.pretty_generate(before_report.raw)
# ── Send the probe request ───────────────────────────────────────────────────
messages = [
Dispatch::Adapter::Message.new(
role: "user",
content: [Dispatch::Adapter::TextBlock.new(text: PROMPT)]
)
]
puts "\nSending probe request..."
puts "Q: #{PROMPT}"
response = adapter.chat(messages, stream: false, thinking: false)
reply_text = response.content.grep(Dispatch::Adapter::TextBlock).map(&:text).join.strip
puts "A: #{reply_text.inspect}"
usage = response.usage
input_tokens = usage.input_tokens.to_i
output_tokens = usage.output_tokens.to_i
cache_read_tokens = usage.cache_read_tokens.to_i
cache_create_tokens = usage.cache_creation_tokens.to_i
total_tokens = input_tokens + output_tokens + cache_read_tokens + cache_create_tokens
puts "\nResponse token usage:"
puts " input_tokens: #{input_tokens}"
puts " output_tokens: #{output_tokens}"
puts " cache_read_tokens: #{cache_read_tokens}"
puts " cache_create_tokens: #{cache_create_tokens}"
puts " total_tokens: #{total_tokens}"
if usage.cost
puts " cost (USD): $#{format("%.6f", usage.cost.total)}"
if total_tokens.positive?
per_million_usd = (usage.cost.total / total_tokens) * 1_000_000
puts " cost per 1M tokens: $#{format("%.4f", per_million_usd)} (from pricing table)"
end
end
# ── Poll AFTER until usage moves ─────────────────────────────────────────────
puts "\nPolling usage report for delta (lag tolerance up to #{POLL_TIMEOUT_S}s)..."
after = nil
after_report = nil
deadline = Time.now + POLL_TIMEOUT_S
loop do
after_report = adapter.usage_report
after = used_fractions(after_report)
delta_seen = before.any? do |id, data|
after.key?(id) && after[id][:used_fraction] > data[:used_fraction]
end
break if delta_seen
break if Time.now >= deadline
print "."
$stdout.flush
sleep POLL_INTERVAL_S
end
puts
puts "\nRaw /api/oauth/usage payload (AFTER):"
puts JSON.pretty_generate(after_report&.raw || {})
# ── Compute and report deltas ────────────────────────────────────────────────
puts "\nDelta per window:"
any_movement = false
before.each do |id, before_data|
after_data = after[id]
next unless after_data
delta_fraction = after_data[:used_fraction] - before_data[:used_fraction]
next if delta_fraction <= 0
any_movement = true
delta_pct = delta_fraction * 100
label = before_data[:label] || id
puts " #{label}:"
puts " before: #{format("%.6f", before_data[:used_fraction] * 100)}%"
puts " after: #{format("%.6f", after_data[:used_fraction] * 100)}%"
puts " delta: #{format("%.6f", delta_pct)}%"
next unless total_tokens.positive?
per_token_pct = delta_pct / total_tokens
per_million_pct = per_token_pct * 1_000_000
tokens_per_full_pct = total_tokens / delta_pct
tokens_to_exhaust = total_tokens / delta_fraction
puts " per token: #{format("%.8f", per_token_pct)}% of #{label}"
puts " per 1M tokens: #{format("%.4f", per_million_pct)}% of #{label}"
puts " tokens per 1%: #{format("%.0f", tokens_per_full_pct)}"
puts " tokens to exhaust remaining quota: #{format("%.0f", tokens_to_exhaust * (1.0 - after_data[:used_fraction]))}"
end
unless any_movement
warn "\nNo measurable change in any usage window after #{POLL_TIMEOUT_S}s."
warn "The request may have been too small to register, or aggregation is lagging."
warn "Try a larger PROMPT or re-run in a moment."
end
|