diff options
| author | Adam Malczewski <[email protected]> | 2026-04-29 21:41:30 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-04-29 21:41:30 +0900 |
| commit | 96c1f21fa66a0bd87e91b213dce84e8497932de9 (patch) | |
| tree | 548c2b02bba46d2c1f0d465aa72f4d4d22e42339 | |
| parent | 3f9836fda60f26d856e3621a36ce1f4555c69f4c (diff) | |
| download | dispatch-adapter-tester-main.tar.gz dispatch-adapter-tester-main.zip | |
add claude supportmain
| -rw-r--r-- | .rubocop.yml | 17 | ||||
| -rw-r--r-- | Gemfile.lock | 4 | ||||
| -rw-r--r-- | lib/dispatch/adapter/tester.rb | 1 | ||||
| -rw-r--r-- | lib/dispatch/adapter/tester/playbooks/claude.rb | 248 | ||||
| -rw-r--r-- | lib/dispatch/adapter/tester/version.rb | 2 | ||||
| -rw-r--r-- | spec/dispatch/adapter/tester/playbooks/claude_spec.rb | 215 |
6 files changed, 478 insertions, 9 deletions
diff --git a/.rubocop.yml b/.rubocop.yml index f520914..a45fd5b 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -13,11 +13,19 @@ Style/FrozenStringLiteralComment: EnforcedStyle: always Metrics/BlockLength: - Exclude: - - "spec/**/*" + Enabled: false Metrics/MethodLength: - Max: 40 + Enabled: false + +Metrics/ClassLength: + Enabled: false + +Metrics/ModuleLength: + Enabled: false + +Metrics/BlockNesting: + Enabled: false Metrics/AbcSize: Enabled: false @@ -34,9 +42,6 @@ Style/Documentation: Naming/MethodParameterName: Enabled: false -Metrics/ClassLength: - Enabled: false - Metrics/ParameterLists: Enabled: false diff --git a/Gemfile.lock b/Gemfile.lock index 842e0e8..cb0e03e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: ../dispatch-adapter-interface specs: - dispatch-adapter-interface (0.1.0) + dispatch-adapter-interface (0.2.0) PATH remote: . @@ -96,7 +96,7 @@ CHECKSUMS ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383 date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0 diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962 - dispatch-adapter-interface (0.1.0) + dispatch-adapter-interface (0.2.0) dispatch-adapter-tester (0.1.0) erb (6.0.2) sha256=9fe6264d44f79422c87490a1558479bd0e7dad4dd0e317656e67ea3077b5242b io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc diff --git a/lib/dispatch/adapter/tester.rb b/lib/dispatch/adapter/tester.rb index 966ef43..7872066 100644 --- a/lib/dispatch/adapter/tester.rb +++ b/lib/dispatch/adapter/tester.rb @@ -6,6 +6,7 @@ require_relative "tester/version" require_relative "tester/errors" require_relative "tester/step" require_relative "tester/playbook" +require_relative "tester/playbooks/claude" module Dispatch module Adapter diff --git a/lib/dispatch/adapter/tester/playbooks/claude.rb b/lib/dispatch/adapter/tester/playbooks/claude.rb new file mode 100644 index 0000000..0e3a1fd --- /dev/null +++ b/lib/dispatch/adapter/tester/playbooks/claude.rb @@ -0,0 +1,248 @@ +# frozen_string_literal: true + +module Dispatch + module Adapter + module Tester + module Playbooks + # Pre-built playbook scripts for smoke-testing against the Claude adapter. + # + # This module provides scripted step sequences that mimic realistic Claude + # API responses for six common scenarios. Each method returns a plain Ruby + # Array of step hashes suitable for passing directly to + # `Dispatch::Adapter::Tester::Playbook.new(steps_json: ...)`. + # + # ## Usage (recorded / CI mode) + # + # Use any playbook method to create a deterministic Playbook adapter: + # + # steps = Dispatch::Adapter::Tester::Playbooks::Claude.smoke_text + # adapter = Dispatch::Adapter::Tester::Playbook.new(steps_json: steps) + # msgs = [Dispatch::Adapter::Message.new(role: "user", + # content: [Dispatch::Adapter::TextBlock.new(text: "Say hi")])] + # resp = adapter.chat(msgs) + # raise "unexpected stop_reason" unless resp.stop_reason == :end_turn + # raise "empty content" if resp.content.to_s.empty? + # + # ## Usage (live mode against the real Claude API) + # + # To run the same scenarios against the real Claude adapter, substitute + # `Dispatch::Adapter::Tester::Playbook` with `Dispatch::Adapter::Claude`: + # + # require "dispatch/adapter/claude" + # require "dispatch/adapter/tester/playbooks/claude" + # + # adapter = Dispatch::Adapter::Claude.new( + # model: "claude-sonnet-4-5-20250929", + # api_key: ENV.fetch("ANTHROPIC_API_KEY"), + # min_request_interval: 1.0 + # ) + # + # # smoke_text + # msgs = [Dispatch::Adapter::Message.new( + # role: "user", + # content: [Dispatch::Adapter::TextBlock.new(text: "Say hi")] + # )] + # resp = adapter.chat(msgs) + # raise unless resp.stop_reason == :end_turn + # raise if resp.content.to_s.empty? + # + # # smoke_tool_use + # add_tool = Dispatch::Adapter::ToolDefinition.new( + # name: "add", + # description: "Returns the sum of two numbers", + # parameters: { type: "object", + # properties: { a: { type: "integer" }, b: { type: "integer" } }, + # required: %w[a b] } + # ) + # msgs = [Dispatch::Adapter::Message.new( + # role: "user", + # content: [Dispatch::Adapter::TextBlock.new(text: "What is 2+3?")] + # )] + # resp = adapter.chat(msgs, tools: [add_tool]) + # raise unless resp.stop_reason == :tool_use + # tc = resp.tool_calls.find { |t| t.name == "add" } + # raise "expected add tool call" unless tc + # raise unless tc.arguments["a"] == 2 && tc.arguments["b"] == 3 + # + # # smoke_thinking — requires Opus 4.7+ with thinking enabled + # msgs = [Dispatch::Adapter::Message.new( + # role: "user", + # content: [Dispatch::Adapter::TextBlock.new(text: "Think carefully about 42")] + # )] + # resp = adapter.chat(msgs, thinking: "high", + # max_tokens: 16_000, + # # Use Opus 4.7 for adaptive thinking: + # # Dispatch::Adapter::Claude.new(model: "claude-opus-4-7", ...) + # ) + # raise unless resp.content.any? { |c| c.is_a?(Dispatch::Adapter::ThinkingBlock) } + # + # # smoke_usage — OAuth only + # # adapter = Dispatch::Adapter::Claude.new(token_path: "~/.config/dispatch/claude_oauth.json") + # # report = adapter.usage_report + # # raise unless report.limits.any? { |e| e.id == "anthropic:5h" } + # + # # smoke_pricing + # msgs = [Dispatch::Adapter::Message.new( + # role: "user", + # content: [Dispatch::Adapter::TextBlock.new(text: "Hello")] + # )] + # resp = adapter.chat(msgs) + # raise unless resp.usage.cost.total > 0 + # + # # smoke_cache — run the same request twice within 5 minutes + # resp2 = adapter.chat(msgs, cache_retention: :short) + # raise if resp2.usage.cache_read_tokens.to_i == 0 + # + module Claude + # Scripted response for a simple "Say hi" text request. + # + # Expected adapter behaviour: + # - stop_reason == :end_turn + # - content (string) is non-empty + # + # @return [Array<Hash>] steps array for Playbook + def self.smoke_text + [ + { + "step" => 1, + "type" => "message", + "content" => "Hi there! How can I help you today?" + } + ] + end + + # Scripted response for tool-use with an `add` function. + # + # Expected adapter behaviour: + # - stop_reason == :tool_use + # - tool_calls contains one entry with name "add" + # - arguments["a"] == 2, arguments["b"] == 3 + # + # @return [Array<Hash>] steps array for Playbook + def self.smoke_tool_use + [ + { + "step" => 1, + "type" => "tool_calls", + "content" => nil, + "tool_calls" => [ + { + "id" => "toolu_smoke_add_01", + "name" => "add", + "arguments" => { "a" => 2, "b" => 3 } + } + ] + } + ] + end + + # Scripted response for a thinking-enabled request. + # + # In the recorded (tester) mode, the "ThinkingBlock" is represented + # as a synthetic message confirming thinking was requested. + # In live mode, use claude-opus-4-7 with thinking: "high". + # + # Expected adapter behaviour (tester mode): + # - stop_reason == :end_turn + # - content is non-empty + # + # Expected adapter behaviour (live mode, Opus 4.7+ with thinking): + # - stop_reason == :end_turn + # - Response.content includes at least one ThinkingBlock + # + # @return [Array<Hash>] steps array for Playbook + def self.smoke_thinking + [ + { + "step" => 1, + "type" => "message", + "content" => "[thinking] The number 42 is the answer to life, the universe, and everything." + } + ] + end + + # Scripted response for usage_report (OAuth mode). + # + # In tester mode, usage_report is not driven by the Playbook (it is a + # separate API call). This step collection is a placeholder that confirms + # a text response is returned; live tests must use an OAuth-authenticated + # Claude adapter and call adapter.usage_report directly. + # + # Expected adapter behaviour (live OAuth mode): + # - usage_report returns a UsageReport + # - limits.any? { |e| e.id == "anthropic:5h" } + # + # @return [Array<Hash>] steps array for Playbook + def self.smoke_usage + [ + { + "step" => 1, + "type" => "message", + "content" => "Usage report smoke test placeholder." + } + ] + end + + # Scripted response for verifying that pricing is calculated. + # + # In tester mode the Usage struct has 0 tokens and nil cost. + # Live mode asserts response.usage.cost.total > 0. + # + # @return [Array<Hash>] steps array for Playbook + def self.smoke_pricing + [ + { + "step" => 1, + "type" => "message", + "content" => "Hello!" + } + ] + end + + # Scripted response for cache smoke test. + # + # Two identical calls with cache_retention: :short should result in + # cache_read_tokens > 0 on the second call when using the real adapter. + # In tester mode, both calls are separate steps with identical content. + # + # @return [Array<Hash>] steps array for Playbook (two identical steps) + def self.smoke_cache + [ + { + "step" => 1, + "type" => "message", + "content" => "Hello! (first call)" + }, + { + "step" => 2, + "type" => "message", + "content" => "Hello! (second call — cache hit expected in live mode)" + } + ] + end + + # Returns all six scenarios as a Hash keyed by scenario name. + # + # Useful for iterating over all smoke scenarios: + # + # Dispatch::Adapter::Tester::Playbooks::Claude.all.each do |name, steps| + # adapter = Dispatch::Adapter::Tester::Playbook.new(steps_json: steps) + # # run scenario named `name` + # end + # + # @return [Hash<Symbol, Array<Hash>>] + def self.all + { + smoke_text: smoke_text, + smoke_tool_use: smoke_tool_use, + smoke_thinking: smoke_thinking, + smoke_usage: smoke_usage, + smoke_pricing: smoke_pricing, + smoke_cache: smoke_cache + } + end + end + end + end + end +end diff --git a/lib/dispatch/adapter/tester/version.rb b/lib/dispatch/adapter/tester/version.rb index 62e6918..9614ba6 100644 --- a/lib/dispatch/adapter/tester/version.rb +++ b/lib/dispatch/adapter/tester/version.rb @@ -3,7 +3,7 @@ module Dispatch module Adapter module Tester - VERSION = "0.1.0" + VERSION = "0.2.0" end end end diff --git a/spec/dispatch/adapter/tester/playbooks/claude_spec.rb b/spec/dispatch/adapter/tester/playbooks/claude_spec.rb new file mode 100644 index 0000000..46718e3 --- /dev/null +++ b/spec/dispatch/adapter/tester/playbooks/claude_spec.rb @@ -0,0 +1,215 @@ +# frozen_string_literal: true + +RSpec.describe Dispatch::Adapter::Tester::Playbooks::Claude do + # Helper: builds a Playbook adapter from the steps returned by a playbook method. + def build_adapter(steps) + Dispatch::Adapter::Tester::Playbook.new(steps_json: steps) + end + + def user_message(text) + Dispatch::Adapter::Message.new( + role: "user", + content: [Dispatch::Adapter::TextBlock.new(text: text)] + ) + end + + # --------------------------------------------------------------------------- + # Discoverability + # --------------------------------------------------------------------------- + + describe ".all" do + it "returns a Hash with all six scenario keys" do + all = described_class.all + expect(all).to be_a(Hash) + expect(all.keys).to contain_exactly( + :smoke_text, :smoke_tool_use, :smoke_thinking, + :smoke_usage, :smoke_pricing, :smoke_cache + ) + end + + it "each value is a non-empty Array" do + described_class.all.each do |name, steps| + expect(steps).to be_an(Array), "Expected #{name} to return an Array" + expect(steps).not_to be_empty, "Expected #{name} steps to be non-empty" + end + end + end + + # --------------------------------------------------------------------------- + # smoke_text — single-turn "Say hi" + # --------------------------------------------------------------------------- + + describe ".smoke_text" do + let(:steps) { described_class.smoke_text } + + it "returns a one-step Array" do + expect(steps.length).to eq(1) + end + + it "produces a non-empty text response with stop_reason :end_turn" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("Say hi")]) + + expect(resp).to be_a(Dispatch::Adapter::Response) + expect(resp.stop_reason).to eq(:end_turn) + expect(resp.content).to be_a(String) + expect(resp.content).not_to be_empty + end + end + + # --------------------------------------------------------------------------- + # smoke_tool_use — register `add`, ask "What is 2+3?" + # --------------------------------------------------------------------------- + + describe ".smoke_tool_use" do + let(:steps) { described_class.smoke_tool_use } + + it "returns a one-step Array" do + expect(steps.length).to eq(1) + end + + it "produces stop_reason :tool_use" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("What is 2+3?")]) + + expect(resp.stop_reason).to eq(:tool_use) + end + + it "emits an `add` tool call with a==2 and b==3" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("What is 2+3?")]) + + tc = resp.tool_calls.find { |t| t.name == "add" } + expect(tc).not_to be_nil, "Expected an 'add' tool call" + expect(tc.arguments["a"]).to eq(2) + expect(tc.arguments["b"]).to eq(3) + end + + it "tool call has a non-empty id" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("What is 2+3?")]) + + tc = resp.tool_calls.find { |t| t.name == "add" } + expect(tc.id).to be_a(String) + expect(tc.id).not_to be_empty + end + end + + # --------------------------------------------------------------------------- + # smoke_thinking — thinking-enabled request (tester mode) + # --------------------------------------------------------------------------- + + describe ".smoke_thinking" do + let(:steps) { described_class.smoke_thinking } + + it "returns a one-step Array" do + expect(steps.length).to eq(1) + end + + it "produces a non-empty text response (tester mode — thinking reflected in content)" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("Think carefully about 42")], thinking: "high") + + expect(resp.stop_reason).to eq(:end_turn) + expect(resp.content).to be_a(String) + expect(resp.content).not_to be_empty + end + + it "records the thinking parameter in the call log" do + adapter = build_adapter(steps) + adapter.chat([user_message("Think carefully about 42")], thinking: "high") + + expect(adapter.call_log.first[:thinking]).to eq("high") + end + end + + # --------------------------------------------------------------------------- + # smoke_usage — usage_report placeholder + # --------------------------------------------------------------------------- + + describe ".smoke_usage" do + let(:steps) { described_class.smoke_usage } + + it "returns a one-step Array" do + expect(steps.length).to eq(1) + end + + it "produces a non-empty text response (tester mode placeholder)" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("usage check")]) + + expect(resp.stop_reason).to eq(:end_turn) + expect(resp.content).to be_a(String) + expect(resp.content).not_to be_empty + end + end + + # --------------------------------------------------------------------------- + # smoke_pricing — cost.total > 0 in live mode; tester returns zeroed Usage + # --------------------------------------------------------------------------- + + describe ".smoke_pricing" do + let(:steps) { described_class.smoke_pricing } + + it "returns a one-step Array" do + expect(steps.length).to eq(1) + end + + it "produces a valid Response (tester mode — usage tokens are 0)" do + adapter = build_adapter(steps) + resp = adapter.chat([user_message("Hello")]) + + expect(resp.stop_reason).to eq(:end_turn) + expect(resp.content).not_to be_empty + expect(resp.usage).to be_a(Dispatch::Adapter::Usage) + expect(resp.usage.input_tokens).to eq(0) + expect(resp.usage.output_tokens).to eq(0) + end + end + + # --------------------------------------------------------------------------- + # smoke_cache — two identical calls; second should cache in live mode + # --------------------------------------------------------------------------- + + describe ".smoke_cache" do + let(:steps) { described_class.smoke_cache } + + it "returns a two-step Array" do + expect(steps.length).to eq(2) + end + + it "produces two sequential responses from the playbook" do + adapter = build_adapter(steps) + + resp1 = adapter.chat([user_message("Hello")]) + resp2 = adapter.chat([user_message("Hello")]) + + expect(resp1.stop_reason).to eq(:end_turn) + expect(resp1.content).not_to be_empty + + expect(resp2.stop_reason).to eq(:end_turn) + expect(resp2.content).not_to be_empty + end + + it "exhausts all steps after two calls" do + adapter = build_adapter(steps) + adapter.chat([user_message("Hello")]) + adapter.chat([user_message("Hello")]) + + expect(adapter).to be_finished + expect { adapter.verify_all_consumed! }.not_to raise_error + end + end + + # --------------------------------------------------------------------------- + # Integration: each scenario builds a valid Playbook without errors + # --------------------------------------------------------------------------- + + describe "all scenarios build valid Playbooks" do + described_class.all.each do |name, steps| + it "#{name} steps are valid Playbook JSON" do + expect { Dispatch::Adapter::Tester::Playbook.new(steps_json: steps) }.not_to raise_error + end + end + end +end |
