summaryrefslogtreecommitdiffhomepage
path: root/lib/dispatch/adapter/claude/request_builder/messages.rb
blob: 7f219c75fea767acf9c645373a2b5543d66caa89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# frozen_string_literal: true

module Dispatch
  module Adapter
    class Claude < Base
      module RequestBuilder
        # Converts an Array<Dispatch::Adapter::Message> into the Anthropic
        # messages wire format: [{role:, content:}, …].
        #
        # Key transformations applied here:
        #   - Adjacent "tool result" messages are merged into one user message.
        #   - Thinking block signature rules are applied on assistant messages.
        #   - Image blocks are dropped when the model doesn't accept images.
        #   - Empty text/thinking blocks are elided.
        #   - A trailing assistant message gets a synthetic "Continue." user turn.
        #   - Tool names in tool_use blocks are prefixed with "proxy_" for OAuth.
        #
        # A message is treated as a "tool result message" when its content is an
        # Array exclusively containing ToolResultBlock objects.
        module Messages
          module_function

          # Convert messages to Anthropic wire format.
          #
          # @param messages [Array<Dispatch::Adapter::Message>]
          # @param model_info [Dispatch::Adapter::ModelInfo, nil]
          # @param is_oauth [Boolean]
          # @return [Array<Hash>]
          def build(messages, model_info: nil, is_oauth: false)
            params = []
            i = 0
            while i < messages.length
              msg = messages[i]

              # ── Tool result batching ──────────────────────────────────────────
              if tool_result_message?(msg)
                tool_blocks = []
                while i < messages.length && tool_result_message?(messages[i])
                  tool_blocks.concat(extract_tool_result_blocks(messages[i]))
                  i += 1
                end
                params << { role: "user", content: tool_blocks }
                next
              end

              # ── Normal message roles ──────────────────────────────────────────
              case msg.role.to_s
              when "user", "developer"
                wire = convert_user_message(msg, model_info: model_info)
                params << wire if wire
              when "assistant"
                wire = convert_assistant_message(msg, is_oauth: is_oauth)
                params << wire if wire
              end
              # Any unrecognised roles (e.g. "system") are silently skipped;
              # system prompts are handled separately by the Cloaking module.

              i += 1
            end

            # If the last emitted message is assistant, the API requires a user
            # follow-up (otherwise it returns an error). Use the same synthetic
            # "Continue." that oh-my-pi uses.
            params << { role: "user", content: "Continue." } if params.last&.dig(:role) == "assistant"

            params
          end

          # ── Tool result helpers ─────────────────────────────────────────────

          # Returns true when the message's content is exclusively
          # ToolResultBlock objects (the canonical Ruby wire shape for tool
          # results).
          def tool_result_message?(msg)
            content = msg.content
            return false unless content.is_a?(Array)
            return false if content.empty?

            content.all?(ToolResultBlock)
          end

          # Convert all ToolResultBlock objects in a message to Anthropic
          # tool_result content block hashes.
          def extract_tool_result_blocks(msg)
            msg.content.map do |block|
              wire = {
                type: "tool_result",
                tool_use_id: block.tool_use_id,
                is_error: block.is_error
              }
              converted = convert_tool_result_content(block.content)
              wire[:content] = converted unless converted.nil?
              wire
            end
          end

          # Convert the inner content of a ToolResultBlock to the Anthropic
          # wire shape (String or Array<{type:text|image, …}>).
          def convert_tool_result_content(content)
            case content
            when String
              content.empty? ? nil : content
            when Array
              blocks = content.flat_map { |b| convert_content_for_tool_result(b) }.compact
              blocks.empty? ? nil : blocks
            when nil
              nil
            else
              content.to_s.then { |s| s.empty? ? nil : s }
            end
          end

          def convert_content_for_tool_result(block)
            case block
            when TextBlock
              text = block.text.to_s
              text.empty? ? [] : [{ type: "text", text: text }]
            when ImageBlock
              [build_image_block(block)]
            else
              []
            end
          end

          # ── User / developer messages ────────────────────────────────────────

          def convert_user_message(msg, model_info:)
            case msg.content
            when String
              return nil if msg.content.strip.empty?

              { role: "user", content: msg.content }
            when Array
              blocks = msg.content.flat_map { |b| convert_user_block(b) }.compact

              # Strip image blocks when the model does not support vision
              blocks = blocks.reject { |b| b[:type] == "image" } unless vision_supported?(model_info)

              # Drop empty text blocks
              blocks.reject! { |b| b[:type] == "text" && b[:text].to_s.strip.empty? }

              return nil if blocks.empty?

              { role: "user", content: blocks }
            end
          end

          def convert_user_block(block)
            case block
            when TextBlock
              text = block.text.to_s
              text.empty? ? [] : [{ type: "text", text: text }]
            when ImageBlock
              [build_image_block(block)]
            when ToolResultBlock
              # ToolResultBlock objects within a user message array are handled
              # inline (single message that is already batched). This path is a
              # safety net for mixed-content messages — they are passed through
              # as tool_result blocks inside the user message.
              wire = {
                type: "tool_result",
                tool_use_id: block.tool_use_id,
                is_error: block.is_error
              }
              converted = convert_tool_result_content(block.content)
              wire[:content] = converted unless converted.nil?
              [wire]
            else
              []
            end
          end

          def build_image_block(block)
            {
              type: "image",
              source: {
                type: "base64",
                media_type: block.media_type,
                data: block.source
              }
            }
          end

          def vision_supported?(model_info)
            return true if model_info.nil?

            model_info.supports_vision
          end

          # ── Assistant messages ───────────────────────────────────────────────

          def convert_assistant_message(msg, is_oauth:)
            content = msg.content
            return nil unless content.is_a?(Array)

            # Determine signature policy for thinking blocks in this message.
            # If ANY sibling thinking block is signed, we are in "signed context":
            #   - signed blocks → {type:"thinking", thinking:, signature:}
            #   - unsigned blocks → downgraded to plain text
            # Otherwise (no signed siblings):
            #   - signed blocks → {type:"thinking", thinking:, signature:}
            #   - unsigned blocks → plain text
            has_signed_thinking = content.any? do |b|
              b.is_a?(ThinkingBlock) && !b.signature.to_s.strip.empty?
            end

            blocks = content.flat_map do |block|
              convert_assistant_block(block,
                                      has_signed_thinking: has_signed_thinking,
                                      is_oauth: is_oauth)
            end.compact

            return nil if blocks.empty?

            { role: "assistant", content: blocks }
          end

          def convert_assistant_block(block, has_signed_thinking:, is_oauth:)
            case block
            when TextBlock
              return [] if block.text.to_s.strip.empty?

              [{ type: "text", text: block.text }]
            when ThinkingBlock
              convert_thinking_block(block, has_signed_thinking: has_signed_thinking)
            when RedactedThinkingBlock
              return [] if block.data.to_s.strip.empty?

              [{ type: "redacted_thinking", data: block.data }]
            when ToolUseBlock
              name = is_oauth ? Cloaking.apply_prefix(block.name) : block.name
              [{ type: "tool_use", id: block.id, name: name, input: block.arguments || {} }]
            else
              []
            end
          end

          # Apply the thinking-block signature rules described in research §3.1.
          #
          # Signed context (has_signed_thinking = true):
          #   - Block has non-empty signature → pass through as "thinking"
          #   - Block has no / empty signature → downgrade to plain text
          #     (drop if the thinking text is also empty)
          #
          # Unsigned context (has_signed_thinking = false):
          #   - Block has non-empty signature → pass through as "thinking"
          #   - Block has no signature → plain text (drop if empty)
          def convert_thinking_block(block, has_signed_thinking: false) # rubocop:disable Lint/UnusedMethodArgument
            signed = !block.signature.to_s.strip.empty?

            if signed
              [{ type: "thinking", thinking: block.thinking, signature: block.signature }]
            else
              # Unsigned block: downgrade to text or drop (both signed-context
              # and unsigned-context cases produce identical output).
              return [] if block.thinking.to_s.strip.empty?

              [{ type: "text", text: block.thinking }]
            end
          end
        end
      end
    end
  end
end