# frozen_string_literal: true module Dispatch module Adapter class Claude < Base module RequestBuilder # Translates the interface's `thinking:` kwarg into the Anthropic # `thinking` + `output_config.effort` parameters, with version-specific # handling for Opus 4.7+ (adaptive + display) vs. Opus/Sonnet 4.6 # (adaptive only) vs. older models (enabled + budget_tokens). # # Also handles: # - `disableThinkingIfToolChoiceForced`: remove thinking/output_config # when tool_choice forces a specific tool (:any or {type: :tool}). # - `ensureMaxTokensForThinking`: clamp max_tokens to at least # budget_tokens + OUTPUT_FALLBACK_BUFFER for enabled-mode requests. module Thinking # Extra output buffer added on top of thinking budget_tokens to # ensure there is room for the assistant's final response. # Matches oh-my-pi's OUTPUT_FALLBACK_BUFFER constant. OUTPUT_FALLBACK_BUFFER = 4096 # Budget-token values used for "enabled" thinking mode on older models # (pre-4.6) when the caller supplies a string effort level rather than # an explicit {type: :enabled, budget_tokens: N} hash. # These mirrors the human-intuitive ladder used by oh-my-pi / omp. EFFORT_BUDGET_MAP = { "low" => 1_024, "medium" => 4_000, "high" => 10_000, "max" => 32_000, "xhigh" => 32_000 }.freeze # Recognised effort level strings accepted by the Anthropic API. VALID_EFFORT_LEVELS = %w[low medium high max xhigh].freeze # Model-version regex for detecting adaptive/enabled thinking support. MODEL_VERSION_RE = /claude-(opus|sonnet)-(\d{1,2})(?:[.-](\d{1,2})(?!\d))?/ OPUS_VERSION_RE = /claude-opus-(\d{1,2})(?:[.-](\d{1,2})(?!\d))?/ module_function # Apply thinking configuration to a request params hash (mutates # params in-place). # # @param params [Hash] assembled request params to mutate # @param model_id [String] Anthropic model identifier # @param thinking [String, Hash, nil] the interface's `thinking:` kwarg # - String "low"|"medium"|"high"|"max" — effort level for adaptive # - Hash {type: :enabled, budget_tokens: N} — explicit enabled config # - nil / false — no thinking; method returns immediately # @param tool_choice [Symbol, Hash, nil] tool selection policy; # :any or {type: :tool} strips thinking from the request # @param max_output_tokens [Integer, nil] upper bound for max_tokens # clamping (from PricingTable); nil means no clamp applied # @return [Hash] the mutated params hash def apply(params, model_id:, thinking: nil, tool_choice: nil, max_output_tokens: nil) return params if thinking.nil? || thinking == false # Skip silently for models that don't support extended thinking # (e.g. Haiku family). This lets callers set a global default of # "high" without breaking when they switch to a non-thinking model. return params unless supports_thinking?(model_id) # Step 1: build thinking config appropriate for this model if adaptive_mode?(model_id) apply_adaptive(params, model_id, thinking) else apply_enabled(params, thinking) end # Step 2: strip thinking when tool_choice forces a specific tool disable_if_tool_choice_forced(params, tool_choice) # Step 3: ensure max_tokens is sufficient for budget-based thinking ensure_max_tokens(params, max_output_tokens) params end def apply_adaptive(params, model_id, thinking) adaptive = { type: "adaptive" } adaptive[:display] = "summarized" if supports_adaptive_display?(model_id) params[:thinking] = adaptive effort = extract_effort(thinking) params[:output_config] = { effort: effort } if effort end # ── Enabled-mode configuration ───────────────────────────────────── # # Used for models older than Opus/Sonnet 4.6 that support thinking # but require an explicit token budget. def apply_enabled(params, thinking) budget = case thinking when Hash t = thinking.transform_keys(&:to_sym) (t[:budget_tokens] || EFFORT_BUDGET_MAP["high"]).to_i when String # Map effort level string to a sensible token budget. # Falls back to the "high" budget for unrecognised values. EFFORT_BUDGET_MAP.fetch(thinking, EFFORT_BUDGET_MAP["high"]) else EFFORT_BUDGET_MAP["high"] end params[:thinking] = { type: "enabled", budget_tokens: budget } end # ── Effort extraction ────────────────────────────────────────────── # Extract an effort-level string from the `thinking:` kwarg. # Returns nil when no valid effort level is found. def extract_effort(thinking) case thinking when String thinking if VALID_EFFORT_LEVELS.include?(thinking) when Hash t = thinking.transform_keys(&:to_sym) effort = t[:effort]&.to_s effort if effort && VALID_EFFORT_LEVELS.include?(effort) end end # ── Tool-choice guard ────────────────────────────────────────────── # Remove thinking and output_config when tool_choice forces a # specific tool (:any) or a named tool ({type: :tool}). # The Anthropic API returns 400 if thinking is present alongside # these tool_choice values. def disable_if_tool_choice_forced(params, tool_choice) return unless forced_tool_choice?(tool_choice) params.delete(:thinking) params.delete(:output_config) end def forced_tool_choice?(tool_choice) case tool_choice when :any true when Hash type = (tool_choice[:type] || tool_choice["type"]).to_s %w[any tool].include?(type) else false end end # ── max_tokens guard ─────────────────────────────────────────────── # For budget-based (enabled) thinking, max_tokens must be at least # budget_tokens + OUTPUT_FALLBACK_BUFFER so the model has room to # emit both thinking and response content. # If max_output_tokens is provided (from PricingTable), clamp the # result to that upper bound. def ensure_max_tokens(params, max_output_tokens) thinking = params[:thinking] return unless thinking.is_a?(Hash) && thinking[:type].to_s == "enabled" budget_tokens = thinking[:budget_tokens].to_i return unless budget_tokens.positive? current = params[:max_tokens].to_i required = budget_tokens + OUTPUT_FALLBACK_BUFFER return unless current < required clamped = max_output_tokens ? [required, max_output_tokens.to_i].min : required params[:max_tokens] = clamped end # ── Model capability detection ───────────────────────────────────── # # Model IDs follow the pattern: # claude-(opus|sonnet)-MAJOR-MINOR[-date] # # Examples: # claude-opus-4-6 → opus 4.6 → adaptive # claude-opus-4-7 → opus 4.7 → adaptive + display # claude-opus-4-7-20251018 → opus 4.7 → adaptive + display # claude-sonnet-4-6 → sonnet 4.6 → adaptive (no display) # claude-sonnet-4-5 → sonnet 4.5 → enabled mode # claude-opus-4-20250514 → opus 4.0 → enabled mode # # The negative lookahead (?!\d) after the MINOR group prevents the # regex from matching partial digits in date suffixes # (e.g. the "20" in "20250514"). # Returns true for any Claude model that supports extended thinking # (Opus 3.7+ / Sonnet 3.7+). Haiku models return false. def supports_thinking?(model_id) MODEL_VERSION_RE.match?(canonical_id(model_id)) end # Returns true for models that use adaptive thinking: # Opus 4.6+ and Sonnet 4.6+. def adaptive_mode?(model_id) m = MODEL_VERSION_RE.match(canonical_id(model_id)) return false unless m major = m[2].to_i minor = m[3].to_i major > 4 || (major == 4 && minor >= 6) end # Returns true for models that support the `display: "summarized"` # field on adaptive thinking: Opus 4.7+ only. def supports_adaptive_display?(model_id) m = OPUS_VERSION_RE.match(canonical_id(model_id)) return false unless m major = m[1].to_i minor = m[2].to_i major > 4 || (major == 4 && minor >= 7) end # Strip any Bedrock/Vertex/proxy path prefix from a model ID # (e.g. "anthropic.claude-opus-4-7" → "claude-opus-4-7"). def canonical_id(model_id) id = model_id.to_s idx = id.rindex("/") idx ? id[(idx + 1)..] : id end end end end end end