examples/usage_per_token.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203

#!/usr/bin/env ruby
# frozen_string_literal: true

# Sample app: measure how much subscription "usage budget" each token
# currently consumes.
#
# Usage:
#   bundle exec ruby examples/usage_per_token.rb
#
# How it works:
#   1. Pick a model.
#   2. Read your subscription usage report (OAuth Pro/Max only).
#   3. Send a tiny chat request: 'Reply with only the following: .'
#   4. Read the usage report again.
#   5. Compute the change in "used_fraction" per token, and extrapolate
#      to a per-million-tokens rate. Anthropic's subscription accounting
#      varies throughout the day depending on global load, so this gives
#      you the *current* burn rate at this moment in time.
#
# Caveats:
#   - Requires an OAuth (Pro/Max) login. Raw API keys have no per-account
#     subscription quota, so usage_report returns nil and this script
#     bails out.
#   - Anthropic's usage aggregation lags a few seconds behind requests;
#     the script polls the usage endpoint up to POLL_TIMEOUT_S.

require "bundler/setup"
require "dispatch/adapter/claude"

PROMPT = "Reply with only the following: ."
POLL_TIMEOUT_S = 30
POLL_INTERVAL_S = 2

# ── Pick a model ─────────────────────────────────────────────────────────────
known_models = Dispatch::Adapter::Claude::PricingTable.known_ids.sort
puts "\nAvailable models:"
known_models.each_with_index do |id, idx|
  puts "  #{(idx + 1).to_s.rjust(2)}) #{id}"
end
print "\nSelect a model (1-#{known_models.length}): "
choice = $stdin.gets&.strip
selected_index = begin
  Integer(choice, 10) - 1
rescue StandardError
  -1
end
if selected_index.negative? || selected_index >= known_models.length
  warn "Invalid selection."
  exit 1
end
model_id = known_models[selected_index]
puts "→ Using #{model_id}\n\n"

# ── Build the adapter (force OAuth — ignore ANTHROPIC_API_KEY) ───────────────
adapter = Dispatch::Adapter::Claude.new(model: model_id, api_key: nil)
status  = adapter.authenticate!
puts "Auth: #{status}"

if status == :api_key
  warn "\nThis example requires an OAuth (Claude Pro/Max) login."
  warn "Raw API keys do not have per-account subscription quota data."
  exit 1
end

# ── Helper: collapse a UsageReport into a {limit_id => used_fraction} hash ──
def used_fractions(report)
  return {} if report.nil?

  report.limits.each_with_object({}) do |entry, h|
    next if entry.amount.nil? || entry.amount.used_fraction.nil?

    h[entry.id] = {
      label: entry.label,
      used_fraction: entry.amount.used_fraction.to_f,
      window_label: entry.window&.label,
      window_resets_at: entry.window&.resets_at
    }
  end
end

# ── Probe BEFORE ─────────────────────────────────────────────────────────────
puts "\nFetching usage snapshot (before)..."
before_report = adapter.usage_report
before = used_fractions(before_report)
if before.empty?
  warn "\n`adapter.usage_report` returned no data."
  warn ""
  warn "Possible causes:"
  warn "  - Anthropic's /api/oauth/usage endpoint is having an outage"
  warn "    (open issue: https://github.com/anthropics/claude-code/issues/30930)"
  warn "  - You're not on a Pro/Max plan (raw API keys: not supported)"
  warn "  - Refresh-token chain is broken; try `rm ~/.config/dispatch/claude/credentials.json`"
  warn "    and re-run to trigger a fresh OAuth login."
  exit 1
end
puts "Current windows:"
before.each do |id, data|
  pct = data[:used_fraction] * 100
  resets = data[:window_resets_at] ? " (resets #{data[:window_resets_at]})" : ""
  puts "  #{data[:label] || id}: #{format("%.4f", pct)}%#{resets}"
end

# Show the EXACT raw payload from Anthropic so we can see precision.
require "json"
puts "\nRaw /api/oauth/usage payload (BEFORE):"
puts JSON.pretty_generate(before_report.raw)

# ── Send the probe request ───────────────────────────────────────────────────
messages = [
  Dispatch::Adapter::Message.new(
    role: "user",
    content: [Dispatch::Adapter::TextBlock.new(text: PROMPT)]
  )
]

puts "\nSending probe request..."
puts "Q: #{PROMPT}"
response = adapter.chat(messages, stream: false, thinking: false)
reply_text = response.content.grep(Dispatch::Adapter::TextBlock).map(&:text).join.strip
puts "A: #{reply_text.inspect}"

usage = response.usage
input_tokens         = usage.input_tokens.to_i
output_tokens        = usage.output_tokens.to_i
cache_read_tokens    = usage.cache_read_tokens.to_i
cache_create_tokens  = usage.cache_creation_tokens.to_i
total_tokens         = input_tokens + output_tokens + cache_read_tokens + cache_create_tokens

puts "\nResponse token usage:"
puts "  input_tokens:        #{input_tokens}"
puts "  output_tokens:       #{output_tokens}"
puts "  cache_read_tokens:   #{cache_read_tokens}"
puts "  cache_create_tokens: #{cache_create_tokens}"
puts "  total_tokens:        #{total_tokens}"

if usage.cost
  puts "  cost (USD):          $#{format("%.6f", usage.cost.total)}"
  if total_tokens.positive?
    per_million_usd = (usage.cost.total / total_tokens) * 1_000_000
    puts "  cost per 1M tokens:  $#{format("%.4f", per_million_usd)} (from pricing table)"
  end
end

# ── Poll AFTER until usage moves ─────────────────────────────────────────────
puts "\nPolling usage report for delta (lag tolerance up to #{POLL_TIMEOUT_S}s)..."
after = nil
after_report = nil
deadline = Time.now + POLL_TIMEOUT_S
loop do
  after_report = adapter.usage_report
  after = used_fractions(after_report)

  delta_seen = before.any? do |id, data|
    after.key?(id) && after[id][:used_fraction] > data[:used_fraction]
  end
  break if delta_seen
  break if Time.now >= deadline

  print "."
  $stdout.flush
  sleep POLL_INTERVAL_S
end
puts

puts "\nRaw /api/oauth/usage payload (AFTER):"
puts JSON.pretty_generate(after_report&.raw || {})

# ── Compute and report deltas ────────────────────────────────────────────────
puts "\nDelta per window:"
any_movement = false
before.each do |id, before_data|
  after_data = after[id]
  next unless after_data

  delta_fraction = after_data[:used_fraction] - before_data[:used_fraction]
  next if delta_fraction <= 0

  any_movement = true
  delta_pct = delta_fraction * 100
  label = before_data[:label] || id
  puts "  #{label}:"
  puts "    before:           #{format("%.6f", before_data[:used_fraction] * 100)}%"
  puts "    after:            #{format("%.6f", after_data[:used_fraction] * 100)}%"
  puts "    delta:            #{format("%.6f", delta_pct)}%"

  next unless total_tokens.positive?

  per_token_pct       = delta_pct / total_tokens
  per_million_pct     = per_token_pct * 1_000_000
  tokens_per_full_pct = total_tokens / delta_pct
  tokens_to_exhaust   = total_tokens / delta_fraction

  puts "    per token:        #{format("%.8f", per_token_pct)}% of #{label}"
  puts "    per 1M tokens:    #{format("%.4f", per_million_pct)}% of #{label}"
  puts "    tokens per 1%:    #{format("%.0f", tokens_per_full_pct)}"
  puts "    tokens to exhaust remaining quota: #{format("%.0f", tokens_to_exhaust * (1.0 - after_data[:used_fraction]))}"
end

unless any_movement
  warn "\nNo measurable change in any usage window after #{POLL_TIMEOUT_S}s."
  warn "The request may have been too small to register, or aggregation is lagging."
  warn "Try a larger PROMPT or re-run in a moment."
end