diff options
Diffstat (limited to 'lib/dispatch/adapter/interface')
| -rw-r--r-- | lib/dispatch/adapter/interface/base.rb | 61 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/message.rb | 23 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/model_info.rb | 14 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/pricing.rb | 34 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/rate_limiter.rb | 174 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/response.rb | 33 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/tool_definition.rb | 14 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/usage_report.rb | 43 | ||||
| -rw-r--r-- | lib/dispatch/adapter/interface/version.rb | 2 |
9 files changed, 389 insertions, 9 deletions
diff --git a/lib/dispatch/adapter/interface/base.rb b/lib/dispatch/adapter/interface/base.rb index 4b7a6ed..682fae0 100644 --- a/lib/dispatch/adapter/interface/base.rb +++ b/lib/dispatch/adapter/interface/base.rb @@ -3,7 +3,43 @@ module Dispatch module Adapter class Base - def chat(_messages, system: nil, tools: [], stream: false, max_tokens: nil, thinking: nil, &_block) + # Send a chat request. + # + # @param _messages [Array<Message>] the conversation messages + # @param system [String, Array<TextBlock, Hash>, nil] system prompt; + # a String or an array of TextBlock / Hash content blocks (for + # providers that support cached system prompts). + # @param tools [Array<ToolDefinition, Hash>] tool definitions + # @param stream [Boolean] whether to stream the response + # @param max_tokens [Integer, nil] maximum tokens to generate + # @param thinking [String, Hash, nil] extended thinking config; + # adapters do their own validation. + # - String: "low" | "medium" | "high" + # - Hash: e.g. { enabled: true, budget_tokens: 10_000 } + # @param tool_choice [Symbol, Hash, nil] tool-selection policy: + # :auto | :any | :none | { type: :tool, name: "fn" } + # Adapters MAY ignore this. + # @param cache_retention [Symbol, nil] caching hint: + # :none | :short | :long | nil + # Adapters MAY ignore this. + # @param metadata [Hash, nil] arbitrary passthrough metadata (e.g. { user_id: "u1" }). + # Adapters MAY ignore this. + # @param betas [Array<String>, String, nil] extra provider-beta entries. + # Adapters MAY ignore this. + # @return [Response] + def chat( + _messages, + system: nil, + tools: [], + stream: false, + max_tokens: nil, + thinking: nil, + tool_choice: nil, # rubocop:disable Lint/UnusedMethodArgument + cache_retention: nil, # rubocop:disable Lint/UnusedMethodArgument + metadata: nil, # rubocop:disable Lint/UnusedMethodArgument + betas: nil, # rubocop:disable Lint/UnusedMethodArgument + &_block + ) raise NotImplementedError, "#{self.class}#chat must be implemented" end @@ -26,6 +62,29 @@ module Dispatch def max_context_tokens nil end + + # Subscription quota / utilisation. Return nil if the provider has no + # such concept (raw API-key tier, etc.). + # @return [Dispatch::Adapter::UsageReport, nil] + def usage_report + nil + end + + # Idempotent — perform any interactive login required (device flow, + # OAuth PKCE, etc). Safe to call before the first chat/usage_report. + def authenticate! + nil + end + + # True iff cached credentials are present and presumed valid. + def authenticated? + true + end + + # Drop cached credentials. + def logout! + nil + end end end end diff --git a/lib/dispatch/adapter/interface/message.rb b/lib/dispatch/adapter/interface/message.rb index eb51c99..dfe7301 100644 --- a/lib/dispatch/adapter/interface/message.rb +++ b/lib/dispatch/adapter/interface/message.rb @@ -4,9 +4,14 @@ module Dispatch module Adapter Message = Struct.new(:role, :content, keyword_init: true) - TextBlock = Struct.new(:type, :text, keyword_init: true) do - def initialize(text:, type: "text") - super(type:, text:) + # +cache_control+ values: + # nil — no cache breakpoint (default) + # { type: :ephemeral } — provider default TTL + # { type: :ephemeral, ttl: :"5m" } — short-lived cache + # { type: :ephemeral, ttl: :"1h" } — long-lived cache + TextBlock = Struct.new(:type, :text, :cache_control, keyword_init: true) do + def initialize(text:, cache_control: nil, type: "text") + super(type:, text:, cache_control:) end end @@ -27,5 +32,17 @@ module Dispatch super(type:, tool_use_id:, content:, is_error:) end end + + ThinkingBlock = Struct.new(:type, :thinking, :signature, keyword_init: true) do + def initialize(thinking:, signature: nil, type: "thinking") + super(type:, thinking:, signature:) + end + end + + RedactedThinkingBlock = Struct.new(:type, :data, keyword_init: true) do + def initialize(data:, type: "redacted_thinking") + super(type:, data:) + end + end end end diff --git a/lib/dispatch/adapter/interface/model_info.rb b/lib/dispatch/adapter/interface/model_info.rb index 8ba2977..29228a8 100644 --- a/lib/dispatch/adapter/interface/model_info.rb +++ b/lib/dispatch/adapter/interface/model_info.rb @@ -2,14 +2,26 @@ module Dispatch module Adapter + ModelPricing = Struct.new( + :input_per_mtok, :output_per_mtok, + :cache_read_per_mtok, :cache_write_per_mtok, + keyword_init: true + ) do + def initialize(input_per_mtok:, output_per_mtok:, + cache_read_per_mtok: 0.0, cache_write_per_mtok: 0.0) + super + end + end + ModelInfo = Struct.new( :id, :name, :max_context_tokens, :supports_vision, :supports_tool_use, :supports_streaming, :premium_request_multiplier, + :pricing, keyword_init: true ) do def initialize(id:, name:, max_context_tokens:, supports_vision:, supports_tool_use:, supports_streaming:, - premium_request_multiplier: nil) + premium_request_multiplier: nil, pricing: nil) super end end diff --git a/lib/dispatch/adapter/interface/pricing.rb b/lib/dispatch/adapter/interface/pricing.rb new file mode 100644 index 0000000..a3ec9eb --- /dev/null +++ b/lib/dispatch/adapter/interface/pricing.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Dispatch + module Adapter + module Pricing + module_function + + # Calculates UsageCost from a Usage and ModelInfo. + # + # NOTE: Reasoning tokens are NOT separately priced here. + # Anthropic bills them as output tokens; OpenAI o-series likewise. + # It is assumed that output_tokens includes reasoning_tokens if applicable. + def calculate(usage, model_info) + return nil unless model_info&.pricing + + p = model_info.pricing + mtok = ->(tokens, rate) { (rate.to_f / 1_000_000.0) * tokens.to_i } + + input = mtok.call(usage.input_tokens, p.input_per_mtok) + output = mtok.call(usage.output_tokens, p.output_per_mtok) + cread = mtok.call(usage.cache_read_tokens, p.cache_read_per_mtok) + cwrite = mtok.call(usage.cache_creation_tokens, p.cache_write_per_mtok) + + UsageCost.new( + input: input, + output: output, + cache_read: cread, + cache_write: cwrite, + total: input + output + cread + cwrite + ) + end + end + end +end diff --git a/lib/dispatch/adapter/interface/rate_limiter.rb b/lib/dispatch/adapter/interface/rate_limiter.rb new file mode 100644 index 0000000..1b05582 --- /dev/null +++ b/lib/dispatch/adapter/interface/rate_limiter.rb @@ -0,0 +1,174 @@ +# frozen_string_literal: true + +require "json" +require "fileutils" + +module Dispatch + module Adapter + class RateLimiter + def initialize(rate_limit_path:, min_request_interval:, rate_limit:) + validate_min_request_interval!(min_request_interval) + validate_rate_limit!(rate_limit) + + @rate_limit_path = rate_limit_path + @min_request_interval = min_request_interval + @rate_limit = rate_limit + end + + def wait! + return if disabled? + + loop do + wait_time = 0.0 + done = false + + File.open(rate_limit_file, File::RDWR | File::CREAT) do |file| + file.flock(File::LOCK_EX) + state = read_state(file) + now = Time.now.to_f + wait_time = compute_wait(state, now) + + if wait_time <= 0 + record_request(state, now) + write_state(file, state) + done = true + end + end + + return if done + + sleep(wait_time) + end + end + + private + + def disabled? + effective_min_interval.nil? && @rate_limit.nil? + end + + def effective_min_interval + return nil if @min_request_interval.nil? + return nil if @min_request_interval.zero? + + @min_request_interval + end + + def rate_limit_file + FileUtils.mkdir_p(File.dirname(@rate_limit_path)) + File.chmod(0o600, @rate_limit_path) if File.exist?(@rate_limit_path) + @rate_limit_path + end + + def read_state(file) + file.rewind + content = file.read + return default_state if content.nil? || content.strip.empty? + + parsed = JSON.parse(content) + { + "last_request_at" => parsed["last_request_at"]&.to_f, + "request_log" => Array(parsed["request_log"]).map(&:to_f) + } + rescue JSON::ParserError + default_state + end + + def default_state + { "last_request_at" => nil, "request_log" => [] } + end + + def write_state(file, state) + file.rewind + file.truncate(0) + file.write(JSON.generate(state)) + file.flush + + File.chmod(0o600, @rate_limit_path) + end + + def compute_wait(state, now) + cooldown_wait = compute_cooldown_wait(state, now) + window_wait = compute_window_wait(state, now) + [ cooldown_wait, window_wait ].max + end + + def compute_cooldown_wait(state, now) + interval = effective_min_interval + return 0.0 if interval.nil? + + last = state["last_request_at"] + return 0.0 if last.nil? + + elapsed = now - last + remaining = interval - elapsed + remaining.positive? ? remaining : 0.0 + end + + def compute_window_wait(state, now) + return 0.0 if @rate_limit.nil? + + max_requests = @rate_limit[:requests] + period = @rate_limit[:period] + window_start = now - period + + log = state["request_log"].select { |t| t > window_start } + + return 0.0 if log.size < max_requests + + oldest_in_window = log.min + wait = oldest_in_window + period - now + wait.positive? ? wait : 0.0 + end + + def record_request(state, now) + state["last_request_at"] = now + state["request_log"] << now + prune_log(state, now) + end + + def prune_log(state, now) + if @rate_limit + period = @rate_limit[:period] + cutoff = now - period + state["request_log"] = state["request_log"].select { |t| t > cutoff } + else + state["request_log"] = [] + end + end + + def validate_min_request_interval!(value) + return if value.nil? + + unless value.is_a?(Numeric) + raise ArgumentError, + "min_request_interval must be nil or a Numeric >= 0, got #{value.inspect}" + end + + return unless value.negative? + + raise ArgumentError, + "min_request_interval must be nil or a Numeric >= 0, got #{value.inspect}" + end + + def validate_rate_limit!(value) + return if value.nil? + + unless value.is_a?(Hash) + raise ArgumentError, + "rate_limit must be nil or a Hash with :requests and :period keys, got #{value.inspect}" + end + + unless value.key?(:requests) && value[:requests].is_a?(Integer) && value[:requests].positive? + raise ArgumentError, + "rate_limit[:requests] must be a positive Integer, got #{value[:requests].inspect}" + end + + return if value.key?(:period) && value[:period].is_a?(Numeric) && value[:period].positive? + + raise ArgumentError, + "rate_limit[:period] must be a positive Numeric, got #{value[:period].inspect}" + end + end + end +end diff --git a/lib/dispatch/adapter/interface/response.rb b/lib/dispatch/adapter/interface/response.rb index b4ba3eb..391b033 100644 --- a/lib/dispatch/adapter/interface/response.rb +++ b/lib/dispatch/adapter/interface/response.rb @@ -2,18 +2,47 @@ module Dispatch module Adapter + # stop_reason ∈ + # :end_turn — natural completion + # :max_tokens — output truncated by max_tokens + # :tool_use — assistant emitted tool calls + # :pause_turn — provider asked us to resubmit (Anthropic) + # :refusal — provider refused to answer + # :sensitive — output blocked by safety filters + # :error — adapter-level failure Response = Struct.new(:content, :tool_calls, :model, :stop_reason, :usage, keyword_init: true) do def initialize(model:, stop_reason:, usage:, content: nil, tool_calls: []) super end end - Usage = Struct.new(:input_tokens, :output_tokens, :cache_read_tokens, :cache_creation_tokens, keyword_init: true) do - def initialize(input_tokens:, output_tokens:, cache_read_tokens: 0, cache_creation_tokens: 0) + UsageCost = Struct.new( + :input, :output, :cache_read, :cache_write, :total, + keyword_init: true + ) do + def initialize(input: 0.0, output: 0.0, cache_read: 0.0, + cache_write: 0.0, total: 0.0) super end end + Usage = Struct.new( + :input_tokens, :output_tokens, + :cache_read_tokens, :cache_creation_tokens, + :reasoning_tokens, :premium_requests, :cost, + keyword_init: true + ) do + def initialize(input_tokens:, output_tokens:, + cache_read_tokens: 0, cache_creation_tokens: 0, + reasoning_tokens: 0, premium_requests: nil, cost: nil) + super + end + end + + # Recognised :type values: + # :text_start, :text_delta, :text_end + # :thinking_start, :thinking_delta, :thinking_end + # :tool_use_start, :tool_use_delta, :tool_use_end StreamDelta = Struct.new(:type, :text, :tool_call_id, :tool_name, :argument_delta, keyword_init: true) do def initialize(type:, text: nil, tool_call_id: nil, tool_name: nil, argument_delta: nil) super diff --git a/lib/dispatch/adapter/interface/tool_definition.rb b/lib/dispatch/adapter/interface/tool_definition.rb index 7b435a3..4a8ca5a 100644 --- a/lib/dispatch/adapter/interface/tool_definition.rb +++ b/lib/dispatch/adapter/interface/tool_definition.rb @@ -2,6 +2,18 @@ module Dispatch module Adapter - ToolDefinition = Struct.new(:name, :description, :parameters, keyword_init: true) + # +cache_control+ values: + # nil — no cache breakpoint (default) + # { type: :ephemeral } — provider default TTL + # { type: :ephemeral, ttl: :"5m" } — short-lived cache + # { type: :ephemeral, ttl: :"1h" } — long-lived cache + ToolDefinition = Struct.new( + :name, :description, :parameters, :cache_control, + keyword_init: true + ) do + def initialize(name:, description:, parameters:, cache_control: nil) + super + end + end end end diff --git a/lib/dispatch/adapter/interface/usage_report.rb b/lib/dispatch/adapter/interface/usage_report.rb new file mode 100644 index 0000000..132c484 --- /dev/null +++ b/lib/dispatch/adapter/interface/usage_report.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +module Dispatch + module Adapter + UsageWindow = Struct.new(:id, :label, :duration_ms, :resets_at, keyword_init: true) do + def initialize(id:, label:, duration_ms: nil, resets_at: nil) + super + end + end + + UsageAmount = Struct.new( + :used, :limit, :remaining, + :used_fraction, :remaining_fraction, + :unit, keyword_init: true + ) do + # unit ∈ :percent | :tokens | :requests | :usd | :minutes | :bytes | :unknown + def initialize(unit:, used: nil, limit: nil, remaining: nil, + used_fraction: nil, remaining_fraction: nil) + super + end + end + + UsageLimitEntry = Struct.new( + :id, :label, :scope, :window, :amount, :status, :notes, + keyword_init: true + ) do + # status ∈ :ok | :warning | :exhausted | :unknown + def initialize(id:, label:, scope:, amount:, window: nil, + status: :unknown, notes: []) + super + end + end + + UsageReport = Struct.new( + :provider, :fetched_at, :limits, :metadata, :raw, + keyword_init: true + ) do + def initialize(provider:, limits:, fetched_at: Time.now, metadata: {}, raw: nil) + super + end + end + end +end diff --git a/lib/dispatch/adapter/interface/version.rb b/lib/dispatch/adapter/interface/version.rb index 0a53b34..44b41db 100644 --- a/lib/dispatch/adapter/interface/version.rb +++ b/lib/dispatch/adapter/interface/version.rb @@ -3,7 +3,7 @@ module Dispatch module Adapter module Interface - VERSION = "0.1.0" + VERSION = "0.3.0" end end end |
