lib/dispatch/adapter/claude/request_builder/thinking.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

# frozen_string_literal: true

module Dispatch
  module Adapter
    class Claude < Base
      module RequestBuilder
        # Translates the interface's `thinking:` kwarg into the Anthropic
        # `thinking` + `output_config.effort` parameters, with version-specific
        # handling for Opus 4.7+ (adaptive + display) vs. Opus/Sonnet 4.6
        # (adaptive only) vs. older models (enabled + budget_tokens).
        #
        # Also handles:
        #   - `disableThinkingIfToolChoiceForced`: remove thinking/output_config
        #     when tool_choice forces a specific tool (:any or {type: :tool}).
        #   - `ensureMaxTokensForThinking`: clamp max_tokens to at least
        #     budget_tokens + OUTPUT_FALLBACK_BUFFER for enabled-mode requests.
        module Thinking
          # Extra output buffer added on top of thinking budget_tokens to
          # ensure there is room for the assistant's final response.
          # Matches oh-my-pi's OUTPUT_FALLBACK_BUFFER constant.
          OUTPUT_FALLBACK_BUFFER = 4096

          # Budget-token values used for "enabled" thinking mode on older models
          # (pre-4.6) when the caller supplies a string effort level rather than
          # an explicit {type: :enabled, budget_tokens: N} hash.
          # These mirrors the human-intuitive ladder used by oh-my-pi / omp.
          EFFORT_BUDGET_MAP = {
            "low" => 1_024,
            "medium" => 4_000,
            "high" => 10_000,
            "max" => 32_000,
            "xhigh" => 32_000
          }.freeze

          # Recognised effort level strings accepted by the Anthropic API.
          VALID_EFFORT_LEVELS = %w[low medium high max xhigh].freeze

          # Model-version regex for detecting adaptive/enabled thinking support.
          MODEL_VERSION_RE = /claude-(opus|sonnet)-(\d{1,2})(?:[.-](\d{1,2})(?!\d))?/
          OPUS_VERSION_RE  = /claude-opus-(\d{1,2})(?:[.-](\d{1,2})(?!\d))?/

          module_function

          # Apply thinking configuration to a request params hash (mutates
          # params in-place).
          #
          # @param params       [Hash]    assembled request params to mutate
          # @param model_id     [String]  Anthropic model identifier
          # @param thinking     [String, Hash, nil]  the interface's `thinking:` kwarg
          #   - String "low"|"medium"|"high"|"max" — effort level for adaptive
          #   - Hash {type: :enabled, budget_tokens: N} — explicit enabled config
          #   - nil / false — no thinking; method returns immediately
          # @param tool_choice  [Symbol, Hash, nil]  tool selection policy;
          #   :any or {type: :tool} strips thinking from the request
          # @param max_output_tokens [Integer, nil]  upper bound for max_tokens
          #   clamping (from PricingTable); nil means no clamp applied
          # @return [Hash] the mutated params hash
          def apply(params, model_id:, thinking: nil, tool_choice: nil, max_output_tokens: nil)
            return params if thinking.nil? || thinking == false

            # Skip silently for models that don't support extended thinking
            # (e.g. Haiku family). This lets callers set a global default of
            # "high" without breaking when they switch to a non-thinking model.
            return params unless supports_thinking?(model_id)

            # Step 1: build thinking config appropriate for this model
            if adaptive_mode?(model_id)
              apply_adaptive(params, model_id, thinking)
            else
              apply_enabled(params, thinking)
            end

            # Step 2: strip thinking when tool_choice forces a specific tool
            disable_if_tool_choice_forced(params, tool_choice)

            # Step 3: ensure max_tokens is sufficient for budget-based thinking
            ensure_max_tokens(params, max_output_tokens)

            params
          end

          def apply_adaptive(params, model_id, thinking)
            adaptive = { type: "adaptive" }
            adaptive[:display] = "summarized" if supports_adaptive_display?(model_id)
            params[:thinking] = adaptive

            effort = extract_effort(thinking)
            params[:output_config] = { effort: effort } if effort
          end

          # ── Enabled-mode configuration ─────────────────────────────────────
          #
          # Used for models older than Opus/Sonnet 4.6 that support thinking
          # but require an explicit token budget.

          def apply_enabled(params, thinking)
            budget = case thinking
                     when Hash
                       t = thinking.transform_keys(&:to_sym)
                       (t[:budget_tokens] || EFFORT_BUDGET_MAP["high"]).to_i
                     when String
                       # Map effort level string to a sensible token budget.
                       # Falls back to the "high" budget for unrecognised values.
                       EFFORT_BUDGET_MAP.fetch(thinking, EFFORT_BUDGET_MAP["high"])
                     else
                       EFFORT_BUDGET_MAP["high"]
                     end

            params[:thinking] = { type: "enabled", budget_tokens: budget }
          end

          # ── Effort extraction ──────────────────────────────────────────────

          # Extract an effort-level string from the `thinking:` kwarg.
          # Returns nil when no valid effort level is found.
          def extract_effort(thinking)
            case thinking
            when String
              thinking if VALID_EFFORT_LEVELS.include?(thinking)
            when Hash
              t = thinking.transform_keys(&:to_sym)
              effort = t[:effort]&.to_s
              effort if effort && VALID_EFFORT_LEVELS.include?(effort)
            end
          end

          # ── Tool-choice guard ──────────────────────────────────────────────

          # Remove thinking and output_config when tool_choice forces a
          # specific tool (:any) or a named tool ({type: :tool}).
          # The Anthropic API returns 400 if thinking is present alongside
          # these tool_choice values.
          def disable_if_tool_choice_forced(params, tool_choice)
            return unless forced_tool_choice?(tool_choice)

            params.delete(:thinking)
            params.delete(:output_config)
          end

          def forced_tool_choice?(tool_choice)
            case tool_choice
            when :any
              true
            when Hash
              type = (tool_choice[:type] || tool_choice["type"]).to_s
              %w[any tool].include?(type)
            else
              false
            end
          end

          # ── max_tokens guard ───────────────────────────────────────────────

          # For budget-based (enabled) thinking, max_tokens must be at least
          # budget_tokens + OUTPUT_FALLBACK_BUFFER so the model has room to
          # emit both thinking and response content.
          # If max_output_tokens is provided (from PricingTable), clamp the
          # result to that upper bound.
          def ensure_max_tokens(params, max_output_tokens)
            thinking = params[:thinking]
            return unless thinking.is_a?(Hash) && thinking[:type].to_s == "enabled"

            budget_tokens = thinking[:budget_tokens].to_i
            return unless budget_tokens.positive?

            current  = params[:max_tokens].to_i
            required = budget_tokens + OUTPUT_FALLBACK_BUFFER
            return unless current < required

            clamped = max_output_tokens ? [required, max_output_tokens.to_i].min : required
            params[:max_tokens] = clamped
          end

          # ── Model capability detection ─────────────────────────────────────
          #
          # Model IDs follow the pattern:
          #   claude-(opus|sonnet)-MAJOR-MINOR[-date]
          #
          # Examples:
          #   claude-opus-4-6             → opus 4.6 → adaptive
          #   claude-opus-4-7             → opus 4.7 → adaptive + display
          #   claude-opus-4-7-20251018    → opus 4.7 → adaptive + display
          #   claude-sonnet-4-6           → sonnet 4.6 → adaptive (no display)
          #   claude-sonnet-4-5           → sonnet 4.5 → enabled mode
          #   claude-opus-4-20250514      → opus 4.0 → enabled mode
          #
          # The negative lookahead (?!\d) after the MINOR group prevents the
          # regex from matching partial digits in date suffixes
          # (e.g. the "20" in "20250514").

          # Returns true for any Claude model that supports extended thinking
          # (Opus 3.7+ / Sonnet 3.7+). Haiku models return false.
          def supports_thinking?(model_id)
            MODEL_VERSION_RE.match?(canonical_id(model_id))
          end

          # Returns true for models that use adaptive thinking:
          # Opus 4.6+ and Sonnet 4.6+.
          def adaptive_mode?(model_id)
            m = MODEL_VERSION_RE.match(canonical_id(model_id))
            return false unless m

            major = m[2].to_i
            minor = m[3].to_i
            major > 4 || (major == 4 && minor >= 6)
          end

          # Returns true for models that support the `display: "summarized"`
          # field on adaptive thinking: Opus 4.7+ only.
          def supports_adaptive_display?(model_id)
            m = OPUS_VERSION_RE.match(canonical_id(model_id))
            return false unless m

            major = m[1].to_i
            minor = m[2].to_i
            major > 4 || (major == 4 && minor >= 7)
          end

          # Strip any Bedrock/Vertex/proxy path prefix from a model ID
          # (e.g. "anthropic.claude-opus-4-7" → "claude-opus-4-7").
          def canonical_id(model_id)
            id = model_id.to_s
            idx = id.rindex("/")
            idx ? id[(idx + 1)..] : id
          end
        end
      end
    end
  end
end