#!/usr/bin/env ruby # frozen_string_literal: true # Sample app: measure how much subscription "usage budget" each token # currently consumes. # # Usage: # bundle exec ruby examples/usage_per_token.rb # # How it works: # 1. Pick a model. # 2. Read your subscription usage report (OAuth Pro/Max only). # 3. Send a tiny chat request: 'Reply with only the following: .' # 4. Read the usage report again. # 5. Compute the change in "used_fraction" per token, and extrapolate # to a per-million-tokens rate. Anthropic's subscription accounting # varies throughout the day depending on global load, so this gives # you the *current* burn rate at this moment in time. # # Caveats: # - Requires an OAuth (Pro/Max) login. Raw API keys have no per-account # subscription quota, so usage_report returns nil and this script # bails out. # - Anthropic's usage aggregation lags a few seconds behind requests; # the script polls the usage endpoint up to POLL_TIMEOUT_S. require "bundler/setup" require "dispatch/adapter/claude" PROMPT = "Reply with only the following: ." POLL_TIMEOUT_S = 30 POLL_INTERVAL_S = 2 # ── Pick a model ───────────────────────────────────────────────────────────── known_models = Dispatch::Adapter::Claude::PricingTable.known_ids.sort puts "\nAvailable models:" known_models.each_with_index do |id, idx| puts " #{(idx + 1).to_s.rjust(2)}) #{id}" end print "\nSelect a model (1-#{known_models.length}): " choice = $stdin.gets&.strip selected_index = begin Integer(choice, 10) - 1 rescue StandardError -1 end if selected_index.negative? || selected_index >= known_models.length warn "Invalid selection." exit 1 end model_id = known_models[selected_index] puts "→ Using #{model_id}\n\n" # ── Build the adapter (force OAuth — ignore ANTHROPIC_API_KEY) ─────────────── adapter = Dispatch::Adapter::Claude.new(model: model_id, api_key: nil) status = adapter.authenticate! puts "Auth: #{status}" if status == :api_key warn "\nThis example requires an OAuth (Claude Pro/Max) login." warn "Raw API keys do not have per-account subscription quota data." exit 1 end # ── Helper: collapse a UsageReport into a {limit_id => used_fraction} hash ── def used_fractions(report) return {} if report.nil? report.limits.each_with_object({}) do |entry, h| next if entry.amount.nil? || entry.amount.used_fraction.nil? h[entry.id] = { label: entry.label, used_fraction: entry.amount.used_fraction.to_f, window_label: entry.window&.label, window_resets_at: entry.window&.resets_at } end end # ── Probe BEFORE ───────────────────────────────────────────────────────────── puts "\nFetching usage snapshot (before)..." before_report = adapter.usage_report before = used_fractions(before_report) if before.empty? warn "\n`adapter.usage_report` returned no data." warn "" warn "Possible causes:" warn " - Anthropic's /api/oauth/usage endpoint is having an outage" warn " (open issue: https://github.com/anthropics/claude-code/issues/30930)" warn " - You're not on a Pro/Max plan (raw API keys: not supported)" warn " - Refresh-token chain is broken; try `rm ~/.config/dispatch/claude/credentials.json`" warn " and re-run to trigger a fresh OAuth login." exit 1 end puts "Current windows:" before.each do |id, data| pct = data[:used_fraction] * 100 resets = data[:window_resets_at] ? " (resets #{data[:window_resets_at]})" : "" puts " #{data[:label] || id}: #{format("%.4f", pct)}%#{resets}" end # Show the EXACT raw payload from Anthropic so we can see precision. require "json" puts "\nRaw /api/oauth/usage payload (BEFORE):" puts JSON.pretty_generate(before_report.raw) # ── Send the probe request ─────────────────────────────────────────────────── messages = [ Dispatch::Adapter::Message.new( role: "user", content: [Dispatch::Adapter::TextBlock.new(text: PROMPT)] ) ] puts "\nSending probe request..." puts "Q: #{PROMPT}" response = adapter.chat(messages, stream: false, thinking: false) reply_text = response.content.grep(Dispatch::Adapter::TextBlock).map(&:text).join.strip puts "A: #{reply_text.inspect}" usage = response.usage input_tokens = usage.input_tokens.to_i output_tokens = usage.output_tokens.to_i cache_read_tokens = usage.cache_read_tokens.to_i cache_create_tokens = usage.cache_creation_tokens.to_i total_tokens = input_tokens + output_tokens + cache_read_tokens + cache_create_tokens puts "\nResponse token usage:" puts " input_tokens: #{input_tokens}" puts " output_tokens: #{output_tokens}" puts " cache_read_tokens: #{cache_read_tokens}" puts " cache_create_tokens: #{cache_create_tokens}" puts " total_tokens: #{total_tokens}" if usage.cost puts " cost (USD): $#{format("%.6f", usage.cost.total)}" if total_tokens.positive? per_million_usd = (usage.cost.total / total_tokens) * 1_000_000 puts " cost per 1M tokens: $#{format("%.4f", per_million_usd)} (from pricing table)" end end # ── Poll AFTER until usage moves ───────────────────────────────────────────── puts "\nPolling usage report for delta (lag tolerance up to #{POLL_TIMEOUT_S}s)..." after = nil after_report = nil deadline = Time.now + POLL_TIMEOUT_S loop do after_report = adapter.usage_report after = used_fractions(after_report) delta_seen = before.any? do |id, data| after.key?(id) && after[id][:used_fraction] > data[:used_fraction] end break if delta_seen break if Time.now >= deadline print "." $stdout.flush sleep POLL_INTERVAL_S end puts puts "\nRaw /api/oauth/usage payload (AFTER):" puts JSON.pretty_generate(after_report&.raw || {}) # ── Compute and report deltas ──────────────────────────────────────────────── puts "\nDelta per window:" any_movement = false before.each do |id, before_data| after_data = after[id] next unless after_data delta_fraction = after_data[:used_fraction] - before_data[:used_fraction] next if delta_fraction <= 0 any_movement = true delta_pct = delta_fraction * 100 label = before_data[:label] || id puts " #{label}:" puts " before: #{format("%.6f", before_data[:used_fraction] * 100)}%" puts " after: #{format("%.6f", after_data[:used_fraction] * 100)}%" puts " delta: #{format("%.6f", delta_pct)}%" next unless total_tokens.positive? per_token_pct = delta_pct / total_tokens per_million_pct = per_token_pct * 1_000_000 tokens_per_full_pct = total_tokens / delta_pct tokens_to_exhaust = total_tokens / delta_fraction puts " per token: #{format("%.8f", per_token_pct)}% of #{label}" puts " per 1M tokens: #{format("%.4f", per_million_pct)}% of #{label}" puts " tokens per 1%: #{format("%.0f", tokens_per_full_pct)}" puts " tokens to exhaust remaining quota: #{format("%.0f", tokens_to_exhaust * (1.0 - after_data[:used_fraction]))}" end unless any_movement warn "\nNo measurable change in any usage window after #{POLL_TIMEOUT_S}s." warn "The request may have been too small to register, or aggregation is lagging." warn "Try a larger PROMPT or re-run in a moment." end