lib/dispatch/adapter/tester/playbook.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

# frozen_string_literal: true

require "json"

module Dispatch
  module Adapter
    module Tester
      # A deterministic, scriptable adapter for integration testing.
      #
      # Playbook is a drop-in replacement for Dispatch::Adapter::Copilot.
      # Instead of calling a real LLM API, it replays a pre-defined sequence
      # of steps from a JSON script. Each call to #chat consumes the next step
      # and returns a Dispatch::Adapter::Response built from it.
      #
      # Usage:
      #   steps_json = '[{"step":1,"type":"message","content":"Hello"}]'
      #   adapter = Dispatch::Adapter::Tester::Playbook.new(steps_json: steps_json)
      #   response = adapter.chat(messages, system: "...", tools: [...])
      #   response.content  # => "Hello"
      #
      class Playbook < Dispatch::Adapter::Base
        MODEL_NAME = "tester-playbook"
        PROVIDER_NAME = "Tester Playbook"
        MAX_CONTEXT_TOKENS = 1_000_000

        attr_reader :steps, :current_index, :call_log

        # @param steps_json [String] JSON string containing an array of step objects
        # @param model [String] model name to report (default: "tester-playbook")
        # @param max_tokens [Integer] reported max tokens (unused, for interface compat)
        # @param kwargs [Hash] absorbs any extra keyword arguments for drop-in compat
        #   (e.g. min_request_interval, rate_limit, etc.)
        def initialize(steps_json: "[]", model: MODEL_NAME, max_tokens: 200_000, **_kwargs)
          super()
          @model = model
          @max_tokens = max_tokens
          @steps = parse_steps(steps_json)
          @current_index = 0
          @call_log = []
          @mutex = Mutex.new

          validate_step_ids_unique!
        end

        # Consume the next step and return a Response.
        #
        # Accepts the same signature as Dispatch::Adapter::Base#chat.
        # The messages, system, tools, stream, max_tokens, and thinking
        # parameters are recorded in the call_log for assertion purposes
        # but do not affect the response (which is entirely driven by the script).
        #
        # @return [Dispatch::Adapter::Response]
        def chat(messages, system: nil, tools: [], stream: false, max_tokens: nil, thinking: nil, &_block)
          @mutex.synchronize do
            if @current_index >= @steps.length
              raise PlaybookExhaustedError.new(
                total_steps: @steps.length,
                calls_made: @current_index + 1
              )
            end

            step = @steps[@current_index]
            @current_index += 1

            @call_log << {
              step: step,
              messages: messages,
              system: system,
              tools: tools,
              stream: stream,
              max_tokens: max_tokens,
              thinking: thinking
            }

            build_response_from_step(step)
          end
        end

        def model_name
          @model
        end

        def provider_name
          PROVIDER_NAME
        end

        def max_context_tokens
          MAX_CONTEXT_TOKENS
        end

        def list_models
          [
            Dispatch::Adapter::ModelInfo.new(
              id: MODEL_NAME,
              name: "Tester Playbook",
              max_context_tokens: MAX_CONTEXT_TOKENS,
              supports_vision: false,
              supports_tool_use: true,
              supports_streaming: false
            )
          ]
        end

        # --- Test helper methods ---

        # Check if all steps have been consumed.
        # @return [Boolean]
        def finished?
          @current_index >= @steps.length
        end

        # Returns the number of remaining unconsumed steps.
        # @return [Integer]
        def remaining_steps
          @steps.length - @current_index
        end

        # Raises UnconsumedStepsError if there are steps left.
        # Call this at the end of a test to ensure the full script was exercised.
        def verify_all_consumed!
          return if finished?

          remaining_ids = @steps[@current_index..].map(&:step_id)
          raise UnconsumedStepsError.new(
            total_steps: @steps.length,
            consumed: @current_index,
            remaining_step_ids: remaining_ids
          )
        end

        # Resets the playbook to the beginning.
        # Useful if you need to replay the same script.
        def reset!
          @mutex.synchronize do
            @current_index = 0
            @call_log.clear
          end
        end

        private

        def parse_steps(steps_json)
          raw = if steps_json.is_a?(String)
                  parsed = JSON.parse(steps_json)
                  unless parsed.is_a?(Array)
                    raise InvalidPlaybookError, "Playbook JSON must be an array, got #{parsed.class}"
                  end

                  parsed
                elsif steps_json.is_a?(Array)
                  steps_json
                else
                  raise InvalidPlaybookError,
                        "steps_json must be a JSON string or Array, got #{steps_json.class}"
                end

          raw.map { |data| Step.new(data) }
        rescue JSON::ParserError => e
          raise InvalidPlaybookError, "Failed to parse playbook JSON: #{e.message}"
        end

        def validate_step_ids_unique!
          ids = @steps.map(&:step_id)
          duplicates = ids.group_by(&:itself).select { |_, v| v.size > 1 }.keys
          return if duplicates.empty?

          raise InvalidPlaybookError,
                "Duplicate step IDs found: #{duplicates.inspect}. Each step must have a unique 'step' value."
        end

        def build_response_from_step(step)
          tool_calls = step.tool_calls.map do |tc|
            Dispatch::Adapter::ToolUseBlock.new(
              id: tc["id"],
              name: tc["name"],
              arguments: tc["arguments"]
            )
          end

          stop_reason = tool_calls.any? ? :tool_use : :end_turn

          Dispatch::Adapter::Response.new(
            content: step.content,
            tool_calls: tool_calls,
            model: @model,
            stop_reason: stop_reason,
            usage: Dispatch::Adapter::Usage.new(
              input_tokens: 0,
              output_tokens: 0
            )
          )
        end
      end
    end
  end
end