feat(chat): reasoning-effort selector — sticky per-conversation thinking-depth knob

Consume the backend's reasoning-effort handoff ([email protected] ReasoningEffort + [email protected] GET/PUT /conversations/:id/reasoning-effort, ChatRequest.reasoningEffort): a 5-level selector in the sidebar Model view, under the provider + model dropdowns. null renders as 'high (default)' per the server-owned resolution chain; PUT on change (effective next turn); error + revert on 400; per-conversation re-mount incl. drafts (the draft id survives promotion, so an effort set on a draft applies from turn 1). Re-mirrored .dispatch references; GLOSSARY 'reasoning effort'; handoff updated. 616 tests green; live curl probe passed.
author: Adam Malczewski <[email protected]> 2026-06-12 20:38:57 +0900
committer: Adam Malczewski <[email protected]> 2026-06-12 20:38:57 +0900
commit: baa6f6c9d21de2f6ffc60e00f53c61d026155933 (patch)
tree: fecae91d99d906a7b5054b398e4d3d90894567a0
parent: 7dcc06eecb5b691b0c0daec26db9d5e407d0a60e (diff)
download: dispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.tar.gz
dispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.zip
13 files changed, 593 insertions, 20 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index 774cfb0..1c3d993 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,10 +5,27 @@
 > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
 > this file is for READING only.
 >
-> **Orchestrator:** SNAPSHOT of `[email protected]` (CR-5 history windowing shipped).
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
+> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort shipped).
+> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
 > `ui-contract.reference.md`).
 >
+> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):**
+> the thinking-depth knob (`ReasoningEffort`, re-exported from `[email protected]`) lands in TWO scopes,
+> resolved server-side per turn (per-turn override → persisted conversation value → default
+> `"high"`; do NOT re-implement the chain client-side):
+> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and
+>    therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists.
+>    OMIT the key for "no override" (never send `null`/`""`).
+> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` →
+>    `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }`
+>    (`null` = never set ⇒ the default `"high"` applies, NOT "off") and
+>    `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest
+>    { reasoningEffort }`. Takes effect from the NEXT turn.
+> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the
+> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the
+> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill);
+> a stable setting stays cache-safe (warming uses the same resolved effort).
+>
 > **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape
 > change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`:
 > **`limit=<k>`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k`
@@ -126,6 +143,11 @@
 - `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers
   on the first call per cwd (can take a moment; cached after); returns once each settles to
   `connected`/`error`. `servers` is `[]` when `cwd` is null.
+- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null`
+  when never set ⇒ default `"high"` applies). Works for an unseen/draft id.
+- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` →
+  `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the
+  valid levels). Persists the conversation's sticky level; effective from the NEXT turn.
 - WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
   (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
   `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
@@ -150,9 +172,15 @@
  */
 
 import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
-import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire";
+import type { AgentEvent, ReasoningEffort, StoredChunk, TurnMetrics } from "@dispatch/wire";
 
-export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire";
+export type {
+	AgentEvent,
+	ReasoningEffort,
+	StepMetrics,
+	StoredChunk,
+	TurnMetrics,
+} from "@dispatch/wire";
 
 /**
  * Request body for `POST /chat` (sent as JSON).
@@ -184,6 +212,14 @@ export interface ChatRequest {
 	 * prompt (so it does not affect prompt caching).
 	 */
 	readonly cwd?: string;
+
+	/**
+	 * Reasoning-effort override for THIS turn only (does not persist). When
+	 * omitted, the server resolves the conversation's persisted value, falling
+	 * back to `"high"`. Must be one of the `ReasoningEffort` levels; an
+	 * unrecognized value → HTTP 400 `{ error }`.
+	 */
+	readonly reasoningEffort?: ReasoningEffort;
 }
 
 /**
@@ -315,6 +351,28 @@ export interface SetCwdRequest {
 	readonly cwd: string;
 }
 
+// ─── Per-conversation reasoning effort ────────────────────────────────────────
+
+/**
+ * Response of `GET /conversations/:id/reasoning-effort`. `reasoningEffort` is
+ * null when never set (the server then resolves turns at the default,
+ * `"high"`).
+ */
+export interface ReasoningEffortResponse {
+	readonly conversationId: string;
+	readonly reasoningEffort: ReasoningEffort | null;
+}
+
+/**
+ * Body of `PUT /conversations/:id/reasoning-effort` — persists the
+ * conversation's sticky reasoning-effort level (used for every later turn that
+ * does not carry a per-turn `ChatRequest.reasoningEffort` override). An
+ * unrecognized level → HTTP 400 `{ error }`.
+ */
+export interface SetReasoningEffortRequest {
+	readonly reasoningEffort: ReasoningEffort;
+}
+
 // ─── Conversation close (explicit tab close) ──────────────────────────────────
 
 /**
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index 1d761bf..34984d2 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,8 +4,18 @@
 > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
 > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
 >
-> **Orchestrator:** SNAPSHOT of `[email protected]` (doc-only bump: the 1-based gap-free seq guarantee
-> codified on `StoredChunk`). Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort — the thinking-depth knob).
+> Regenerate whenever `@dispatch/wire` changes.
+>
+> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.6.1` → `0.7.0`, ADDITIVE):**
+> adds the **`ReasoningEffort`** type — the per-request thinking-depth ladder
+> `"low" | "medium" | "high" | "xhigh" | "max"`. Provider-agnostic; the Anthropic provider maps
+> levels to extended-thinking token budgets (low 4096 · medium 10240 · high 16384 · xhigh 32768 ·
+> max 65536); providers without a thinking knob ignore it. Resolution is SERVER-owned (do not
+> re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value
+> (`GET`/`PUT /conversations/:id/reasoning-effort`, see `[email protected]`) → default
+> `"high"`. Higher levels mean longer runs of `reasoning-delta` events before the first text delta.
+> See the `ReasoningEffort` definition below.
 >
 > **2026-06-12 delta (CR-5 history windowing — package bumped `0.6.0` → `0.6.1`, DOC-ONLY):** the
 > per-conversation `seq` numbering is now a WRITTEN CONTRACTUAL GUARANTEE on `StoredChunk`:
@@ -196,6 +206,20 @@ export interface StoredChunk {
 	readonly chunk: Chunk;
 }
 
+// ─── Reasoning effort ───────────────────────────────────────────────────────
+
+/**
+ * The per-request thinking-depth knob: how much extended thinking / reasoning
+ * the model should spend before answering. Provider-agnostic ladder; each
+ * provider maps a level to its native knob in its own code (e.g. an Anthropic
+ * provider maps it to a `thinking.budget_tokens` value) and MAY ignore levels
+ * (or the field entirely) that its backend cannot express.
+ *
+ * Resolution (owned by the session-orchestrator): per-turn request value →
+ * persisted per-conversation value → default `"high"`.
+ */
+export type ReasoningEffort = "low" | "medium" | "high" | "xhigh" | "max";
+
 // ─── Usage ──────────────────────────────────────────────────────────────────
 
 /**
diff --git a/AGENTS.md b/AGENTS.md
index bc16ef5..4c9f3dd 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -140,7 +140,9 @@ context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, mu
 live view (subscribe/reconnect + the user prompt on the event stream), and the chat limit
 (bulk quarter-unload past `dispatch.chatLimit`, 75% fresh-load window, show-earlier page-in;
 `core/chunks/trim.ts`; CR-5 `?limit=`/`?beforeSeq=` CONSUMED — server-windowed cold loads +
-show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract). Plan in
+show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract), and the
+reasoning-effort selector (Model view, under the provider/model dropdowns; sticky per-conversation
+`GET`/`PUT /reasoning-effort`, `null` ⇒ "high (default)"). Plan in
 `../arch-rewrite/notes/frontend-design.md` §10.
 
 ## Reports
diff --git a/GLOSSARY.md b/GLOSSARY.md
index a9c7017..90acdd8 100644
--- a/GLOSSARY.md
+++ b/GLOSSARY.md
@@ -20,6 +20,7 @@
 | **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte |
 | **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — |
 | **context size** | The tokens a conversation currently occupies: the most recent turn's FINAL step `inputTokens + outputTokens` (NOT the aggregate per-turn `usage`, which sums per-step prompts and overcounts a multi-step turn). On the wire as `TurnDoneEvent.contextSize` (live `done`) + `TurnMetrics.contextSize` (persisted); the FE reads the LATEST turn's value as current usage, and treats `undefined` as "unknown" (renders a placeholder, never `0`). Mirrors the backend GLOSSARY. | context usage, context length, tokens used (and do NOT call it "context window" — that's the limit) |
+| **reasoning effort** | The per-request thinking-depth knob: how much extended thinking the model spends before answering. Canonical ladder `ReasoningEffort = "low" \| "medium" \| "high" \| "xhigh" \| "max"` (`[email protected]`). Resolution is SERVER-owned (never re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value (`GET`/`PUT /conversations/:id/reasoning-effort`) → default `"high"` — so `null` from the GET means "default (`high`) applies", not "off". Changing the level can bust the prompt cache for the next turn (one-time re-prefill); a stable setting stays cache-safe. | thinking setting, thinking level, effort level, thinking budget |
 | **context window** | The model's MAXIMUM token capacity (the limit a **context size** is measured against). A FUTURE backend field — not on the wire yet. **Placeholder:** the composer status bar currently HARDCODES a `1,000,000`-token window for the `size / limit · pct%` readout + fill bar; swap to the real per-model value when the backend ships it (see `backend-handoff.md` §3). | max context, token limit (distinct from **context size**, the current usage) |
 
 ## Frontend-specific
diff --git a/backend-handoff.md b/backend-handoff.md
index 17b907b..7c7da05 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,17 +5,37 @@
 > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
 > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here.
 
-_Last updated: 2026-06-12 (CR-5 consumed). **FE is current on `[email protected]` /
-`[email protected]` / `[email protected]`.** All handoffs to date are consumed: surfaces + WS,
-conversation transcript/metrics, tabs + model selector, cache-warming (incl. authoritative timer
-+ retention + cache-rate fix + the CR-4 lifecycle below), **per-conversation cwd + LSP status**,
-**context size**, **turn continuity + multi-client live view**, and the **chat limit + CR-5
-history windowing** (below).
+_Last updated: 2026-06-12 (reasoning-effort handoff consumed). **FE is current on
+`[email protected]` / `[email protected]` / `[email protected]`.** All handoffs to date are
+consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming
+(incl. authoritative timer + retention + cache-rate fix + the CR-4 lifecycle below),
+**per-conversation cwd + LSP status**, **context size**, **turn continuity + multi-client live
+view**, the **chat limit + CR-5 history windowing**, and the **reasoning effort
+(thinking-depth knob)** (below).
 **Open asks: NONE.** CR-1/CR-2/CR-4/CR-5 all RESOLVED ✅ (see §2); §3 lists likely next asks.
 **CR-3 (watcher couldn't see the USER prompt until seal) → RESOLVED ✅** — backend shipped the
 `user-message` turn event; FE re-pinned + consumption live.
 The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed**._
 
+**Reasoning-effort handoff (`frontend-reasoning-effort-handoff.md`) → CONSUMED ✅
+(curl-probed live: GET null on unseen id · PUT `xhigh` → echo + sticky GET · bad level → 400
+listing the ladder · CORS preflight allows PUT).** Re-pinned `[email protected]→0.7.0` +
+`[email protected]→0.11.0`; re-mirrored both `.dispatch/*.reference.md`; added
+"reasoning effort" to FE `GLOSSARY.md`. FE work: a **per-conversation effort selector** in the
+sidebar's **Model view**, under the provider + model dropdowns
+(`features/chat/ui/ReasoningEffortSelector.svelte`, pure helpers in
+`features/chat/reasoning-effort.ts`): renders `null` as "high (default)" per the server-owned
+resolution chain, PUTs on change (effective next turn), shows the save error + reverts on 400,
+disables while in flight; re-mounted per conversation (incl. drafts — the draft id survives
+promotion, so an effort set on a draft applies from turn 1, same pattern as cwd). The app store
+seeds it on every focus change via `GET /conversations/:id/reasoning-effort` (cleared first so a
+switch never flashes the previous conversation's level) and exposes
+`reasoningEffort`/`setReasoningEffort`. The optional per-turn `chat.send` override is NOT built
+(no composer affordance yet — `chat.send` still omits the key, which the contract specifies as
+"no override"). The "expect more thinking" note needs no change: the transcript already renders
+arbitrary runs of reasoning deltas, and `generating` is structural (not timer-based). 616 tests
+green. NO new backend ask._
+
 **CR-4 cache-warming lifecycle (`frontend-cache-warming-lifecycle-handoff.md`) → CONSUMED ✅
 (live-probed 17/17 against `bin/up`).** Re-pinned `[email protected]→0.2.0` +
 `[email protected]→0.9.0` (`wire` unchanged); re-mirrored both `.dispatch/*.reference.md`. FE
@@ -61,25 +81,26 @@ backend ask — but the max-limit denominator is now a live FE need; see §3.
 
 ## 1. Pinned backend contracts (consumed by the FE)
 
-Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
+Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
 
 | Package | Used for |
 |---|---|
 | `@dispatch/ui-contract` | surfaces + surface WS protocol |
-| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`** |
-| `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` |
+| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`**, **`ReasoningEffort`** |
+| `@dispatch/transport-contract` | `ChatRequest`(+`reasoningEffort`)/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + `ReasoningEffortResponse`/`SetReasoningEffortRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` |
 
 Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`):
 `POST /chat` (NDJSON) · `GET /models` ·
 `GET /conversations/:id?sinceSeq=<n>&beforeSeq=<s>&limit=<k>` (CR-5 windowing) ·
 `GET /conversations/:id/metrics` · `GET`/`PUT /conversations/:id/cwd` ·
+`GET`/`PUT /conversations/:id/reasoning-effort` (sticky thinking-depth; `null` ⇒ default `high`) ·
 `GET /conversations/:id/lsp` · `POST /chat/warm` · `POST /conversations/:id/close` (explicit
 tab-close: abort turn + stop/disable warming) · WS `chat.send`→`chat.delta` ·
 WS `chat.subscribe`/`chat.unsubscribe` (watch a conversation's turns without sending; replay + live).
 
 Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md`
 (regenerate on any contract bump; all current as of `[email protected]` /
-`[email protected]` / `[email protected]`).
+`[email protected]` / `[email protected]`).
 
 ## 2. Open asks FOR THE BACKEND
 
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 4c5a82b..dffa937 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,4 +1,5 @@
 <script lang="ts">
+	import type { ReasoningEffort } from "@dispatch/transport-contract";
 	import type { InvokeMessage } from "@dispatch/ui-contract";
 	import { tick } from "svelte";
 	import Table from "../components/Table.svelte";
@@ -12,6 +13,8 @@
 		Composer,
 		manifest as chatManifest,
 		ModelSelector,
+		ReasoningEffortSelector,
+		type ReasoningEffortSaveResult,
 	} from "../features/chat";
 	import { manifest as conversationCacheManifest } from "../features/conversation-cache";
 	import { manifest as markdownManifest } from "../features/markdown";
@@ -154,6 +157,17 @@
 			: { ok: false, error: result.error };
 	}
 
+	// Adapt the store's reasoning-effort result to the chat feature's port.
+	async function saveReasoningEffort(
+		level: ReasoningEffort,
+	): Promise<ReasoningEffortSaveResult | null> {
+		const result = await store.setReasoningEffort(level);
+		if (result === null) return null;
+		return result.ok
+			? { ok: true, reasoningEffort: result.reasoningEffort }
+			: { ok: false, error: result.error };
+	}
+
 	// Adapt the store's cwd/LSP results to the workspace feature's ports.
 	async function saveCwd(cwd: string): Promise<CwdSaveResult | null> {
 		const result = await store.setCwd(cwd);
@@ -295,10 +309,11 @@
 	{#if kind === "model"}
 		<div class="flex flex-col gap-3">
 			<ModelSelector models={store.models} selected={store.activeModel} onSelect={handleSelectModel} />
-			<!-- Keyed on the workspace conversation (active tab OR draft) so the input
-			     re-mounts per conversation — incl. switching between drafts — and can't
-			     bleed across tabs. Editable for a draft too (cwd applies from turn 1). -->
+			<!-- Keyed on the workspace conversation (active tab OR draft) so the inputs
+			     re-mount per conversation — incl. switching between drafts — and can't
+			     bleed across tabs. Editable for a draft too (cwd + effort apply from turn 1). -->
 			{#key store.currentConversationId}
+				<ReasoningEffortSelector persisted={store.reasoningEffort} save={saveReasoningEffort} />
 				<CwdField cwd={store.cwd} canEdit={true} save={saveCwd} />
 			{/key}
 		</div>
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index 999f2be..05577a6 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -6,7 +6,10 @@ import type {
 	CwdResponse,
 	LspStatusResponse,
 	ModelsResponse,
+	ReasoningEffort,
+	ReasoningEffortResponse,
 	SetCwdRequest,
+	SetReasoningEffortRequest,
 	WarmRequest,
 	WarmResponse,
 } from "@dispatch/transport-contract";
@@ -52,6 +55,11 @@ export type LspResult =
 	| { readonly ok: true; readonly response: LspStatusResponse }
 	| { readonly ok: false; readonly error: string };
 
+/** Outcome of `PUT /conversations/:id/reasoning-effort`. */
+export type ReasoningEffortResult =
+	| { readonly ok: true; readonly reasoningEffort: ReasoningEffort }
+	| { readonly ok: false; readonly error: string };
+
 export interface AppStore {
 	readonly tabs: readonly Tab[];
 	readonly activeConversationId: string | null;
@@ -85,6 +93,18 @@ export interface AppStore {
 	 */
 	setCwd(cwd: string): Promise<CwdResult | null>;
 	/**
+	 * The workspace conversation's persisted reasoning effort, or null when never
+	 * set (the server then resolves turns at the default, `"high"`).
+	 */
+	readonly reasoningEffort: ReasoningEffort | null;
+	/**
+	 * Persist the workspace conversation's reasoning effort
+	 * (`PUT /conversations/:id/reasoning-effort`). Works for a draft too (its id
+	 * survives promotion), so the first turn already runs at the chosen level.
+	 * Takes effect from the NEXT turn; resolution stays server-owned.
+	 */
+	setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null>;
+	/**
 	 * Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`).
 	 * The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
 	 */
@@ -234,6 +254,29 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		}
 	}
 
+	// The workspace conversation's persisted reasoning effort. Seeded from the
+	// backend on focus change; null = never set (the server default applies).
+	let reasoningEffort = $state<ReasoningEffort | null>(null);
+
+	/** Refetch the workspace conversation's reasoning effort (works for a draft too). */
+	async function refreshReasoningEffort(): Promise<void> {
+		const id = workspaceConversationId();
+		// Clear immediately so a switch never shows the PREVIOUS conversation's level
+		// while the fetch is in flight (null renders as the server default).
+		reasoningEffort = null;
+		try {
+			const res = await fetchImpl(
+				`${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`,
+			);
+			if (!res.ok) return;
+			const data = (await res.json()) as ReasoningEffortResponse;
+			// Guard a slow response losing a race with a conversation switch.
+			if (workspaceConversationId() === id) reasoningEffort = data.reasoningEffort ?? null;
+		} catch {
+			// Non-fatal: an effort fetch failure just leaves the default rendering.
+		}
+	}
+
 	function getActiveChat(): ChatStore {
 		const activeId = tabsStore.activeConversationId;
 		if (activeId === null) {
@@ -434,6 +477,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 
 	refreshActiveChat();
 	void refreshCwd();
+	void refreshReasoningEffort();
 
 	return {
 		get tabs(): readonly Tab[] {
@@ -468,6 +512,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		get cwd(): string | null {
 			return cwd;
 		},
+		get reasoningEffort(): ReasoningEffort | null {
+			return reasoningEffort;
+		},
 		get currentConversationId(): string {
 			return workspaceConversationId();
 		},
@@ -499,6 +546,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 				// surfaces (e.g. cache-warming) to its id.
 				syncSubscriptions();
 				void refreshCwd();
+				void refreshReasoningEffort();
 				// Now send on the promoted store
 				chatStores.get(conversationId)?.send(text);
 			} else {
@@ -525,6 +573,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			refreshActiveChat();
 			syncSubscriptions();
 			void refreshCwd();
+			void refreshReasoningEffort();
 		},
 
 		selectTab(conversationId: string): void {
@@ -536,6 +585,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			refreshActiveChat();
 			syncSubscriptions();
 			void refreshCwd();
+			void refreshReasoningEffort();
 		},
 
 		closeTab(conversationId: string): void {
@@ -554,6 +604,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			refreshActiveChat();
 			syncSubscriptions();
 			void refreshCwd();
+			void refreshReasoningEffort();
 		},
 
 		invoke(surfaceId: string, actionId: string, payload?: unknown): void {
@@ -612,6 +663,37 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			}
 		},
 
+		async setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null> {
+			const id = workspaceConversationId();
+			const body: SetReasoningEffortRequest = { reasoningEffort: level };
+			try {
+				const res = await fetchImpl(
+					`${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`,
+					{
+						method: "PUT",
+						headers: { "content-type": "application/json" },
+						body: JSON.stringify(body),
+					},
+				);
+				if (!res.ok) {
+					const errBody = (await res.json().catch(() => null)) as { error?: string } | null;
+					return {
+						ok: false,
+						error: errBody?.error ?? `Set reasoning effort failed (HTTP ${res.status})`,
+					};
+				}
+				const data = (await res.json()) as ReasoningEffortResponse;
+				const next = data.reasoningEffort ?? level;
+				if (workspaceConversationId() === id) reasoningEffort = next;
+				return { ok: true, reasoningEffort: next };
+			} catch (err) {
+				return {
+					ok: false,
+					error: err instanceof Error ? err.message : "Set reasoning effort request failed",
+				};
+			}
+		},
+
 		async lspStatus(): Promise<LspResult | null> {
 			const id = workspaceConversationId();
 			try {
diff --git a/src/app/store.test.ts b/src/app/store.test.ts
index f4b5a0f..db6fdaa 100644
--- a/src/app/store.test.ts
+++ b/src/app/store.test.ts
@@ -708,6 +708,103 @@ describe("createAppStore", () => {
 		store.dispose();
 	});
 
+	it("seeds reasoningEffort from GET /conversations/:id/reasoning-effort (null = never set)", async () => {
+		const base = fakeFetchImpl();
+		const fetchImpl: typeof fetch = async (input, init) => {
+			const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+			if (url.endsWith("/reasoning-effort")) {
+				return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: "xhigh" }), {
+					status: 200,
+				});
+			}
+			return base(input, init);
+		};
+		const ws = fakeSocket();
+		const store = createAppStore({
+			socketFactory: () => ws,
+			fetchImpl,
+			localStorage: createFakeStorage(),
+		});
+		ws.resolveOpen();
+
+		await vi.waitFor(() => {
+			expect(store.reasoningEffort).toBe("xhigh");
+		});
+
+		store.dispose();
+	});
+
+	it("setReasoningEffort PUTs the level and updates local state from the echo", async () => {
+		const calls: { url: string; method: string; body: string | undefined }[] = [];
+		const base = fakeFetchImpl();
+		const fetchImpl: typeof fetch = async (input, init) => {
+			const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+			calls.push({ url, method: init?.method ?? "GET", body: init?.body as string | undefined });
+			if (url.endsWith("/reasoning-effort") && init?.method === "PUT") {
+				const sent = JSON.parse(init.body as string) as { reasoningEffort: string };
+				return new Response(
+					JSON.stringify({ conversationId: "x", reasoningEffort: sent.reasoningEffort }),
+					{ status: 200 },
+				);
+			}
+			if (url.endsWith("/reasoning-effort")) {
+				return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), {
+					status: 200,
+				});
+			}
+			return base(input, init);
+		};
+		const ws = fakeSocket();
+		const store = createAppStore({
+			socketFactory: () => ws,
+			fetchImpl,
+			localStorage: createFakeStorage(),
+		});
+		ws.resolveOpen();
+
+		const result = await store.setReasoningEffort("max");
+		expect(result).toEqual({ ok: true, reasoningEffort: "max" });
+		expect(store.reasoningEffort).toBe("max");
+
+		const put = calls.find((c) => c.method === "PUT" && c.url.endsWith("/reasoning-effort"));
+		expect(put).toBeDefined();
+		// The PUT targets the workspace conversation (draft id works too) and
+		// carries exactly the SetReasoningEffortRequest body.
+		expect(put?.url).toContain(`/conversations/${store.currentConversationId}/`);
+		expect(JSON.parse(put?.body ?? "{}")).toEqual({ reasoningEffort: "max" });
+
+		store.dispose();
+	});
+
+	it("setReasoningEffort surfaces a 400 error and leaves state unchanged", async () => {
+		const base = fakeFetchImpl();
+		const fetchImpl: typeof fetch = async (input, init) => {
+			const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+			if (url.endsWith("/reasoning-effort") && init?.method === "PUT") {
+				return new Response(JSON.stringify({ error: "bad level" }), { status: 400 });
+			}
+			if (url.endsWith("/reasoning-effort")) {
+				return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), {
+					status: 200,
+				});
+			}
+			return base(input, init);
+		};
+		const ws = fakeSocket();
+		const store = createAppStore({
+			socketFactory: () => ws,
+			fetchImpl,
+			localStorage: createFakeStorage(),
+		});
+		ws.resolveOpen();
+
+		const result = await store.setReasoningEffort("max");
+		expect(result).toEqual({ ok: false, error: "bad level" });
+		expect(store.reasoningEffort).toBeNull();
+
+		store.dispose();
+	});
+
 	it("does NOT re-scope a scope:'global' surface on conversation switch (no churn)", () => {
 		const ws = fakeSocket();
 		const store = createAppStore({
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 139a64f..9b94392 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -2,11 +2,24 @@ export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chun
 export { groupRenderedChunks } from "../../core/chunks";
 export type { TurnMetricsEntry } from "../../core/metrics";
 export type { ChatTransport, HistorySync, HistoryWindow, MetricsSync } from "./ports";
+export type {
+	EffortOption,
+	ReasoningEffortSaveResult,
+	SaveReasoningEffort,
+} from "./reasoning-effort";
+export {
+	DEFAULT_REASONING_EFFORT,
+	effectiveEffort,
+	effortOptions,
+	isReasoningEffort,
+	REASONING_EFFORT_LEVELS,
+} from "./reasoning-effort";
 export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
 export { createChatStore } from "./store.svelte";
 export { default as ChatView } from "./ui/ChatView.svelte";
 export { default as Composer } from "./ui/Composer.svelte";
 export { default as ModelSelector } from "./ui/ModelSelector.svelte";
+export { default as ReasoningEffortSelector } from "./ui/ReasoningEffortSelector.svelte";
 
 /** Public module manifest — aggregated by the shell's "Loaded Modules" view. */
 export const manifest = {
diff --git a/src/features/chat/reasoning-effort.test.ts b/src/features/chat/reasoning-effort.test.ts
new file mode 100644
index 0000000..8f76dea
--- /dev/null
+++ b/src/features/chat/reasoning-effort.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from "vitest";
+import {
+	DEFAULT_REASONING_EFFORT,
+	effectiveEffort,
+	effortOptions,
+	isReasoningEffort,
+	REASONING_EFFORT_LEVELS,
+} from "./reasoning-effort";
+
+describe("reasoning-effort helpers", () => {
+	it("ladder matches the wire contract, in ascending depth order", () => {
+		expect(REASONING_EFFORT_LEVELS).toEqual(["low", "medium", "high", "xhigh", "max"]);
+	});
+
+	it("the server default is high", () => {
+		expect(DEFAULT_REASONING_EFFORT).toBe("high");
+	});
+
+	it("isReasoningEffort narrows ladder strings and rejects everything else", () => {
+		for (const level of REASONING_EFFORT_LEVELS) {
+			expect(isReasoningEffort(level)).toBe(true);
+		}
+		expect(isReasoningEffort("banana")).toBe(false);
+		expect(isReasoningEffort("")).toBe(false);
+		expect(isReasoningEffort("HIGH")).toBe(false);
+	});
+
+	it("effectiveEffort maps null (never set) to the default, not 'off'", () => {
+		expect(effectiveEffort(null)).toBe("high");
+	});
+
+	it("effectiveEffort passes a persisted value through", () => {
+		expect(effectiveEffort("xhigh")).toBe("xhigh");
+		expect(effectiveEffort("low")).toBe("low");
+	});
+
+	it("effortOptions lists every level once and marks only the default", () => {
+		const options = effortOptions();
+		expect(options.map((o) => o.value)).toEqual([...REASONING_EFFORT_LEVELS]);
+		expect(options.find((o) => o.value === "high")?.label).toBe("high (default)");
+		for (const option of options) {
+			if (option.value !== "high") expect(option.label).toBe(option.value);
+		}
+	});
+});
diff --git a/src/features/chat/reasoning-effort.ts b/src/features/chat/reasoning-effort.ts
new file mode 100644
index 0000000..2a55089
--- /dev/null
+++ b/src/features/chat/reasoning-effort.ts
@@ -0,0 +1,66 @@
+import type { ReasoningEffort } from "@dispatch/transport-contract";
+
+/**
+ * Pure helpers for the reasoning-effort selector (the thinking-depth knob).
+ *
+ * The canonical ladder + resolution chain are SERVER-owned (`[email protected]`
+ * `ReasoningEffort`; per-turn override → persisted conversation value → default
+ * `"high"`). These helpers only shape the persisted value for display: a `null`
+ * from `GET /conversations/:id/reasoning-effort` means "never set ⇒ the default
+ * applies", so the selector shows `high (default)` — never "off". Zero DOM,
+ * zero Svelte.
+ */
+
+/** The canonical ladder, in ascending thinking-depth order (`[email protected]`). */
+export const REASONING_EFFORT_LEVELS: readonly ReasoningEffort[] = [
+	"low",
+	"medium",
+	"high",
+	"xhigh",
+	"max",
+];
+
+/** The server's fallback when nothing is set (the resolution chain's tail). */
+export const DEFAULT_REASONING_EFFORT: ReasoningEffort = "high";
+
+/** Narrow an untrusted string (e.g. a `<select>` value) to the ladder. */
+export function isReasoningEffort(value: string): value is ReasoningEffort {
+	return (REASONING_EFFORT_LEVELS as readonly string[]).includes(value);
+}
+
+/**
+ * The level the selector should show as selected: the persisted value, or the
+ * server default when never set (`null` = "default applies", not "off").
+ */
+export function effectiveEffort(persisted: ReasoningEffort | null): ReasoningEffort {
+	return persisted ?? DEFAULT_REASONING_EFFORT;
+}
+
+/** One `<option>` of the selector. */
+export interface EffortOption {
+	readonly value: ReasoningEffort;
+	readonly label: string;
+}
+
+/**
+ * The selector's options: every ladder level, with the server default marked
+ * `(default)` so a never-set conversation reads "high (default)".
+ */
+export function effortOptions(): readonly EffortOption[] {
+	return REASONING_EFFORT_LEVELS.map((level) => ({
+		value: level,
+		label: level === DEFAULT_REASONING_EFFORT ? `${level} (default)` : level,
+	}));
+}
+
+// ── Injected port (consumer-defines-port; the composition root adapts the
+//    store's `PUT /conversations/:id/reasoning-effort` to this shape). ────────
+
+/** Outcome of `PUT /conversations/:id/reasoning-effort`. */
+export type ReasoningEffortSaveResult =
+	| { readonly ok: true; readonly reasoningEffort: ReasoningEffort }
+	| { readonly ok: false; readonly error: string };
+
+export type SaveReasoningEffort = (
+	level: ReasoningEffort,
+) => Promise<ReasoningEffortSaveResult | null>;
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index 7174821..e541015 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -7,6 +7,7 @@ import type { TurnMetricsEntry } from "../../core/metrics";
 import ChatView from "./ui/ChatView.svelte";
 import Composer from "./ui/Composer.svelte";
 import ModelSelector from "./ui/ModelSelector.svelte";
+import ReasoningEffortSelector from "./ui/ReasoningEffortSelector.svelte";
 
 describe("ChatView", () => {
 	it("renders a message's text chunk", () => {
@@ -695,3 +696,76 @@ describe("ModelSelector", () => {
 		expect(onSelect).toHaveBeenCalledWith("openai/gpt-4o");
 	});
 });
+
+describe("ReasoningEffortSelector", () => {
+	it("renders null (never set) as the default level, marked '(default)'", () => {
+		render(ReasoningEffortSelector, { props: { persisted: null, save: vi.fn() } });
+
+		const select = screen.getByRole("combobox", { name: "Reasoning effort" });
+		expect(select).toHaveValue("high");
+		expect(within(select).getByRole("option", { name: "high (default)" })).toBeInTheDocument();
+		// All five ladder levels are offered.
+		expect(within(select).getAllByRole("option")).toHaveLength(5);
+	});
+
+	it("renders a persisted level as selected", () => {
+		render(ReasoningEffortSelector, { props: { persisted: "xhigh", save: vi.fn() } });
+
+		expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("xhigh");
+	});
+
+	it("selecting a level saves it via the injected port and confirms", async () => {
+		const save = vi.fn(async (level: "low" | "medium" | "high" | "xhigh" | "max") => ({
+			ok: true as const,
+			reasoningEffort: level,
+		}));
+		const user = userEvent.setup();
+
+		render(ReasoningEffortSelector, { props: { persisted: null, save } });
+
+		await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+		expect(save).toHaveBeenCalledTimes(1);
+		expect(save).toHaveBeenCalledWith("max");
+		await vi.waitFor(() => {
+			expect(screen.getByText(/applies from the next turn/i)).toBeInTheDocument();
+		});
+		expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("max");
+	});
+
+	it("a failed save shows the error and reverts to the persisted value", async () => {
+		const save = vi.fn(async () => ({ ok: false as const, error: "nope" }));
+		const user = userEvent.setup();
+
+		render(ReasoningEffortSelector, { props: { persisted: "low", save } });
+
+		await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+		await vi.waitFor(() => {
+			expect(screen.getByText("nope")).toBeInTheDocument();
+		});
+		expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("low");
+	});
+
+	it("disables the select while a save is in flight (no double-fire)", async () => {
+		let resolveSave: ((r: { ok: true; reasoningEffort: "max" }) => void) | undefined;
+		const save = vi.fn(
+			() =>
+				new Promise<{ ok: true; reasoningEffort: "max" }>((resolve) => {
+					resolveSave = resolve;
+				}),
+		);
+		const user = userEvent.setup();
+
+		render(ReasoningEffortSelector, { props: { persisted: null, save } });
+
+		await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+		expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeDisabled();
+
+		resolveSave?.({ ok: true, reasoningEffort: "max" });
+		await vi.waitFor(() => {
+			expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeEnabled();
+		});
+	});
+});
diff --git a/src/features/chat/ui/ReasoningEffortSelector.svelte b/src/features/chat/ui/ReasoningEffortSelector.svelte
new file mode 100644
index 0000000..8c7b193
--- /dev/null
+++ b/src/features/chat/ui/ReasoningEffortSelector.svelte
@@ -0,0 +1,75 @@
+<script lang="ts">
+	import type { ReasoningEffort } from "@dispatch/transport-contract";
+	import {
+		effectiveEffort,
+		effortOptions,
+		isReasoningEffort,
+		type SaveReasoningEffort,
+	} from "../reasoning-effort";
+
+	let {
+		persisted,
+		save,
+	}: {
+		/** The conversation's persisted level, or null when never set (default applies). */
+		persisted: ReasoningEffort | null;
+		save: SaveReasoningEffort;
+	} = $props();
+
+	const options = effortOptions();
+
+	// The user's in-flight choice; null = mirror the (async-loaded) persisted prop.
+	// Re-mounted per conversation, so there is no cross-tab bleed.
+	let chosen = $state<ReasoningEffort | null>(null);
+	let saving = $state(false);
+	let error = $state<string | null>(null);
+	let justSaved = $state(false);
+
+	const selected = $derived(chosen ?? effectiveEffort(persisted));
+
+	async function handleChange(value: string) {
+		if (!isReasoningEffort(value) || saving) return;
+		chosen = value;
+		saving = true;
+		error = null;
+		justSaved = false;
+		const result = await save(value);
+		saving = false;
+		if (result === null) return;
+		if (result.ok) {
+			justSaved = true;
+		} else {
+			error = result.error;
+			chosen = null; // revert to the persisted value
+		}
+	}
+</script>
+
+<div class="flex flex-col gap-1">
+	<span class="text-xs font-semibold uppercase opacity-60">Reasoning effort</span>
+	<div class="flex items-center gap-2">
+		<select
+			class="select select-sm w-full"
+			value={selected}
+			disabled={saving}
+			onchange={(e) => handleChange(e.currentTarget.value)}
+			aria-label="Reasoning effort"
+		>
+			{#each options as option (option.value)}
+				<option value={option.value}>{option.label}</option>
+			{/each}
+		</select>
+		{#if saving}
+			<span class="loading loading-spinner loading-xs" aria-label="Saving reasoning effort"></span>
+		{/if}
+	</div>
+	{#if error}
+		<p class="text-xs text-error">{error}</p>
+	{:else if justSaved}
+		<p class="text-xs text-success">Saved — applies from the next turn.</p>
+	{:else}
+		<p class="text-xs opacity-50">
+			How long the model thinks before answering. Changing it can re-prefill the prompt cache once.
+		</p>
+	{/if}
+</div>
author	Adam Malczewski <[email protected]>	2026-06-12 20:38:57 +0900
committer	Adam Malczewski <[email protected]>	2026-06-12 20:38:57 +0900
commit	baa6f6c9d21de2f6ffc60e00f53c61d026155933 (patch)
tree	fecae91d99d906a7b5054b398e4d3d90894567a0
parent	7dcc06eecb5b691b0c0daec26db9d5e407d0a60e (diff)
download	dispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.tar.gz dispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.zip