summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-12 20:38:57 +0900
committerAdam Malczewski <[email protected]>2026-06-12 20:38:57 +0900
commitbaa6f6c9d21de2f6ffc60e00f53c61d026155933 (patch)
treefecae91d99d906a7b5054b398e4d3d90894567a0
parent7dcc06eecb5b691b0c0daec26db9d5e407d0a60e (diff)
downloaddispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.tar.gz
dispatch-web-baa6f6c9d21de2f6ffc60e00f53c61d026155933.zip
feat(chat): reasoning-effort selector — sticky per-conversation thinking-depth knob
Consume the backend's reasoning-effort handoff ([email protected] ReasoningEffort + [email protected] GET/PUT /conversations/:id/reasoning-effort, ChatRequest.reasoningEffort): a 5-level selector in the sidebar Model view, under the provider + model dropdowns. null renders as 'high (default)' per the server-owned resolution chain; PUT on change (effective next turn); error + revert on 400; per-conversation re-mount incl. drafts (the draft id survives promotion, so an effort set on a draft applies from turn 1). Re-mirrored .dispatch references; GLOSSARY 'reasoning effort'; handoff updated. 616 tests green; live curl probe passed.
-rw-r--r--.dispatch/transport-contract.reference.md66
-rw-r--r--.dispatch/wire.reference.md28
-rw-r--r--AGENTS.md4
-rw-r--r--GLOSSARY.md1
-rw-r--r--backend-handoff.md41
-rw-r--r--src/app/App.svelte21
-rw-r--r--src/app/store.svelte.ts82
-rw-r--r--src/app/store.test.ts97
-rw-r--r--src/features/chat/index.ts13
-rw-r--r--src/features/chat/reasoning-effort.test.ts45
-rw-r--r--src/features/chat/reasoning-effort.ts66
-rw-r--r--src/features/chat/ui.test.ts74
-rw-r--r--src/features/chat/ui/ReasoningEffortSelector.svelte75
13 files changed, 593 insertions, 20 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index 774cfb0..1c3d993 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,10 +5,27 @@
> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
> this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]` (CR-5 history windowing shipped).
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
+> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort shipped).
+> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
> `ui-contract.reference.md`).
>
+> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):**
+> the thinking-depth knob (`ReasoningEffort`, re-exported from `[email protected]`) lands in TWO scopes,
+> resolved server-side per turn (per-turn override → persisted conversation value → default
+> `"high"`; do NOT re-implement the chain client-side):
+> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and
+> therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists.
+> OMIT the key for "no override" (never send `null`/`""`).
+> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` →
+> `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }`
+> (`null` = never set ⇒ the default `"high"` applies, NOT "off") and
+> `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest
+> { reasoningEffort }`. Takes effect from the NEXT turn.
+> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the
+> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the
+> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill);
+> a stable setting stays cache-safe (warming uses the same resolved effort).
+>
> **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape
> change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`:
> **`limit=<k>`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k`
@@ -126,6 +143,11 @@
- `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers
on the first call per cwd (can take a moment; cached after); returns once each settles to
`connected`/`error`. `servers` is `[]` when `cwd` is null.
+- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null`
+ when never set ⇒ default `"high"` applies). Works for an unseen/draft id.
+- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` →
+ `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the
+ valid levels). Persists the conversation's sticky level; effective from the NEXT turn.
- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
(`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
`WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
@@ -150,9 +172,15 @@
*/
import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
-import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire";
+import type { AgentEvent, ReasoningEffort, StoredChunk, TurnMetrics } from "@dispatch/wire";
-export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire";
+export type {
+ AgentEvent,
+ ReasoningEffort,
+ StepMetrics,
+ StoredChunk,
+ TurnMetrics,
+} from "@dispatch/wire";
/**
* Request body for `POST /chat` (sent as JSON).
@@ -184,6 +212,14 @@ export interface ChatRequest {
* prompt (so it does not affect prompt caching).
*/
readonly cwd?: string;
+
+ /**
+ * Reasoning-effort override for THIS turn only (does not persist). When
+ * omitted, the server resolves the conversation's persisted value, falling
+ * back to `"high"`. Must be one of the `ReasoningEffort` levels; an
+ * unrecognized value → HTTP 400 `{ error }`.
+ */
+ readonly reasoningEffort?: ReasoningEffort;
}
/**
@@ -315,6 +351,28 @@ export interface SetCwdRequest {
readonly cwd: string;
}
+// ─── Per-conversation reasoning effort ────────────────────────────────────────
+
+/**
+ * Response of `GET /conversations/:id/reasoning-effort`. `reasoningEffort` is
+ * null when never set (the server then resolves turns at the default,
+ * `"high"`).
+ */
+export interface ReasoningEffortResponse {
+ readonly conversationId: string;
+ readonly reasoningEffort: ReasoningEffort | null;
+}
+
+/**
+ * Body of `PUT /conversations/:id/reasoning-effort` — persists the
+ * conversation's sticky reasoning-effort level (used for every later turn that
+ * does not carry a per-turn `ChatRequest.reasoningEffort` override). An
+ * unrecognized level → HTTP 400 `{ error }`.
+ */
+export interface SetReasoningEffortRequest {
+ readonly reasoningEffort: ReasoningEffort;
+}
+
// ─── Conversation close (explicit tab close) ──────────────────────────────────
/**
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index 1d761bf..34984d2 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,8 +4,18 @@
> types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
> prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]` (doc-only bump: the 1-based gap-free seq guarantee
-> codified on `StoredChunk`). Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort — the thinking-depth knob).
+> Regenerate whenever `@dispatch/wire` changes.
+>
+> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.6.1` → `0.7.0`, ADDITIVE):**
+> adds the **`ReasoningEffort`** type — the per-request thinking-depth ladder
+> `"low" | "medium" | "high" | "xhigh" | "max"`. Provider-agnostic; the Anthropic provider maps
+> levels to extended-thinking token budgets (low 4096 · medium 10240 · high 16384 · xhigh 32768 ·
+> max 65536); providers without a thinking knob ignore it. Resolution is SERVER-owned (do not
+> re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value
+> (`GET`/`PUT /conversations/:id/reasoning-effort`, see `[email protected]`) → default
+> `"high"`. Higher levels mean longer runs of `reasoning-delta` events before the first text delta.
+> See the `ReasoningEffort` definition below.
>
> **2026-06-12 delta (CR-5 history windowing — package bumped `0.6.0` → `0.6.1`, DOC-ONLY):** the
> per-conversation `seq` numbering is now a WRITTEN CONTRACTUAL GUARANTEE on `StoredChunk`:
@@ -196,6 +206,20 @@ export interface StoredChunk {
readonly chunk: Chunk;
}
+// ─── Reasoning effort ───────────────────────────────────────────────────────
+
+/**
+ * The per-request thinking-depth knob: how much extended thinking / reasoning
+ * the model should spend before answering. Provider-agnostic ladder; each
+ * provider maps a level to its native knob in its own code (e.g. an Anthropic
+ * provider maps it to a `thinking.budget_tokens` value) and MAY ignore levels
+ * (or the field entirely) that its backend cannot express.
+ *
+ * Resolution (owned by the session-orchestrator): per-turn request value →
+ * persisted per-conversation value → default `"high"`.
+ */
+export type ReasoningEffort = "low" | "medium" | "high" | "xhigh" | "max";
+
// ─── Usage ──────────────────────────────────────────────────────────────────
/**
diff --git a/AGENTS.md b/AGENTS.md
index bc16ef5..4c9f3dd 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -140,7 +140,9 @@ context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, mu
live view (subscribe/reconnect + the user prompt on the event stream), and the chat limit
(bulk quarter-unload past `dispatch.chatLimit`, 75% fresh-load window, show-earlier page-in;
`core/chunks/trim.ts`; CR-5 `?limit=`/`?beforeSeq=` CONSUMED — server-windowed cold loads +
-show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract). Plan in
+show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract), and the
+reasoning-effort selector (Model view, under the provider/model dropdowns; sticky per-conversation
+`GET`/`PUT /reasoning-effort`, `null` ⇒ "high (default)"). Plan in
`../arch-rewrite/notes/frontend-design.md` §10.
## Reports
diff --git a/GLOSSARY.md b/GLOSSARY.md
index a9c7017..90acdd8 100644
--- a/GLOSSARY.md
+++ b/GLOSSARY.md
@@ -20,6 +20,7 @@
| **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte |
| **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — |
| **context size** | The tokens a conversation currently occupies: the most recent turn's FINAL step `inputTokens + outputTokens` (NOT the aggregate per-turn `usage`, which sums per-step prompts and overcounts a multi-step turn). On the wire as `TurnDoneEvent.contextSize` (live `done`) + `TurnMetrics.contextSize` (persisted); the FE reads the LATEST turn's value as current usage, and treats `undefined` as "unknown" (renders a placeholder, never `0`). Mirrors the backend GLOSSARY. | context usage, context length, tokens used (and do NOT call it "context window" — that's the limit) |
+| **reasoning effort** | The per-request thinking-depth knob: how much extended thinking the model spends before answering. Canonical ladder `ReasoningEffort = "low" \| "medium" \| "high" \| "xhigh" \| "max"` (`[email protected]`). Resolution is SERVER-owned (never re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value (`GET`/`PUT /conversations/:id/reasoning-effort`) → default `"high"` — so `null` from the GET means "default (`high`) applies", not "off". Changing the level can bust the prompt cache for the next turn (one-time re-prefill); a stable setting stays cache-safe. | thinking setting, thinking level, effort level, thinking budget |
| **context window** | The model's MAXIMUM token capacity (the limit a **context size** is measured against). A FUTURE backend field — not on the wire yet. **Placeholder:** the composer status bar currently HARDCODES a `1,000,000`-token window for the `size / limit · pct%` readout + fill bar; swap to the real per-model value when the backend ships it (see `backend-handoff.md` §3). | max context, token limit (distinct from **context size**, the current usage) |
## Frontend-specific
diff --git a/backend-handoff.md b/backend-handoff.md
index 17b907b..7c7da05 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,17 +5,37 @@
> **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
> `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here.
-_Last updated: 2026-06-12 (CR-5 consumed). **FE is current on `[email protected]` /
-`[email protected]` / `[email protected]`.** All handoffs to date are consumed: surfaces + WS,
-conversation transcript/metrics, tabs + model selector, cache-warming (incl. authoritative timer
-+ retention + cache-rate fix + the CR-4 lifecycle below), **per-conversation cwd + LSP status**,
-**context size**, **turn continuity + multi-client live view**, and the **chat limit + CR-5
-history windowing** (below).
+_Last updated: 2026-06-12 (reasoning-effort handoff consumed). **FE is current on
+`[email protected]` / `[email protected]` / `[email protected]`.** All handoffs to date are
+consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming
+(incl. authoritative timer + retention + cache-rate fix + the CR-4 lifecycle below),
+**per-conversation cwd + LSP status**, **context size**, **turn continuity + multi-client live
+view**, the **chat limit + CR-5 history windowing**, and the **reasoning effort
+(thinking-depth knob)** (below).
**Open asks: NONE.** CR-1/CR-2/CR-4/CR-5 all RESOLVED ✅ (see §2); §3 lists likely next asks.
**CR-3 (watcher couldn't see the USER prompt until seal) → RESOLVED ✅** — backend shipped the
`user-message` turn event; FE re-pinned + consumption live.
The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed**._
+**Reasoning-effort handoff (`frontend-reasoning-effort-handoff.md`) → CONSUMED ✅
+(curl-probed live: GET null on unseen id · PUT `xhigh` → echo + sticky GET · bad level → 400
+listing the ladder · CORS preflight allows PUT).** Re-pinned `[email protected]→0.7.0` +
+`[email protected]→0.11.0`; re-mirrored both `.dispatch/*.reference.md`; added
+"reasoning effort" to FE `GLOSSARY.md`. FE work: a **per-conversation effort selector** in the
+sidebar's **Model view**, under the provider + model dropdowns
+(`features/chat/ui/ReasoningEffortSelector.svelte`, pure helpers in
+`features/chat/reasoning-effort.ts`): renders `null` as "high (default)" per the server-owned
+resolution chain, PUTs on change (effective next turn), shows the save error + reverts on 400,
+disables while in flight; re-mounted per conversation (incl. drafts — the draft id survives
+promotion, so an effort set on a draft applies from turn 1, same pattern as cwd). The app store
+seeds it on every focus change via `GET /conversations/:id/reasoning-effort` (cleared first so a
+switch never flashes the previous conversation's level) and exposes
+`reasoningEffort`/`setReasoningEffort`. The optional per-turn `chat.send` override is NOT built
+(no composer affordance yet — `chat.send` still omits the key, which the contract specifies as
+"no override"). The "expect more thinking" note needs no change: the transcript already renders
+arbitrary runs of reasoning deltas, and `generating` is structural (not timer-based). 616 tests
+green. NO new backend ask._
+
**CR-4 cache-warming lifecycle (`frontend-cache-warming-lifecycle-handoff.md`) → CONSUMED ✅
(live-probed 17/17 against `bin/up`).** Re-pinned `[email protected]→0.2.0` +
`[email protected]→0.9.0` (`wire` unchanged); re-mirrored both `.dispatch/*.reference.md`. FE
@@ -61,25 +81,26 @@ backend ask — but the max-limit denominator is now a live FE need; see §3.
## 1. Pinned backend contracts (consumed by the FE)
-Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
+Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
| Package | Used for |
|---|---|
| `@dispatch/ui-contract` | surfaces + surface WS protocol |
-| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`** |
-| `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` |
+| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`**, **`ReasoningEffort`** |
+| `@dispatch/transport-contract` | `ChatRequest`(+`reasoningEffort`)/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + `ReasoningEffortResponse`/`SetReasoningEffortRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` |
Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`):
`POST /chat` (NDJSON) · `GET /models` ·
`GET /conversations/:id?sinceSeq=<n>&beforeSeq=<s>&limit=<k>` (CR-5 windowing) ·
`GET /conversations/:id/metrics` · `GET`/`PUT /conversations/:id/cwd` ·
+`GET`/`PUT /conversations/:id/reasoning-effort` (sticky thinking-depth; `null` ⇒ default `high`) ·
`GET /conversations/:id/lsp` · `POST /chat/warm` · `POST /conversations/:id/close` (explicit
tab-close: abort turn + stop/disable warming) · WS `chat.send`→`chat.delta` ·
WS `chat.subscribe`/`chat.unsubscribe` (watch a conversation's turns without sending; replay + live).
Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md`
(regenerate on any contract bump; all current as of `[email protected]` /
## 2. Open asks FOR THE BACKEND
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 4c5a82b..dffa937 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,4 +1,5 @@
<script lang="ts">
+ import type { ReasoningEffort } from "@dispatch/transport-contract";
import type { InvokeMessage } from "@dispatch/ui-contract";
import { tick } from "svelte";
import Table from "../components/Table.svelte";
@@ -12,6 +13,8 @@
Composer,
manifest as chatManifest,
ModelSelector,
+ ReasoningEffortSelector,
+ type ReasoningEffortSaveResult,
} from "../features/chat";
import { manifest as conversationCacheManifest } from "../features/conversation-cache";
import { manifest as markdownManifest } from "../features/markdown";
@@ -154,6 +157,17 @@
: { ok: false, error: result.error };
}
+ // Adapt the store's reasoning-effort result to the chat feature's port.
+ async function saveReasoningEffort(
+ level: ReasoningEffort,
+ ): Promise<ReasoningEffortSaveResult | null> {
+ const result = await store.setReasoningEffort(level);
+ if (result === null) return null;
+ return result.ok
+ ? { ok: true, reasoningEffort: result.reasoningEffort }
+ : { ok: false, error: result.error };
+ }
+
// Adapt the store's cwd/LSP results to the workspace feature's ports.
async function saveCwd(cwd: string): Promise<CwdSaveResult | null> {
const result = await store.setCwd(cwd);
@@ -295,10 +309,11 @@
{#if kind === "model"}
<div class="flex flex-col gap-3">
<ModelSelector models={store.models} selected={store.activeModel} onSelect={handleSelectModel} />
- <!-- Keyed on the workspace conversation (active tab OR draft) so the input
- re-mounts per conversation — incl. switching between drafts — and can't
- bleed across tabs. Editable for a draft too (cwd applies from turn 1). -->
+ <!-- Keyed on the workspace conversation (active tab OR draft) so the inputs
+ re-mount per conversation — incl. switching between drafts — and can't
+ bleed across tabs. Editable for a draft too (cwd + effort apply from turn 1). -->
{#key store.currentConversationId}
+ <ReasoningEffortSelector persisted={store.reasoningEffort} save={saveReasoningEffort} />
<CwdField cwd={store.cwd} canEdit={true} save={saveCwd} />
{/key}
</div>
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index 999f2be..05577a6 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -6,7 +6,10 @@ import type {
CwdResponse,
LspStatusResponse,
ModelsResponse,
+ ReasoningEffort,
+ ReasoningEffortResponse,
SetCwdRequest,
+ SetReasoningEffortRequest,
WarmRequest,
WarmResponse,
} from "@dispatch/transport-contract";
@@ -52,6 +55,11 @@ export type LspResult =
| { readonly ok: true; readonly response: LspStatusResponse }
| { readonly ok: false; readonly error: string };
+/** Outcome of `PUT /conversations/:id/reasoning-effort`. */
+export type ReasoningEffortResult =
+ | { readonly ok: true; readonly reasoningEffort: ReasoningEffort }
+ | { readonly ok: false; readonly error: string };
+
export interface AppStore {
readonly tabs: readonly Tab[];
readonly activeConversationId: string | null;
@@ -85,6 +93,18 @@ export interface AppStore {
*/
setCwd(cwd: string): Promise<CwdResult | null>;
/**
+ * The workspace conversation's persisted reasoning effort, or null when never
+ * set (the server then resolves turns at the default, `"high"`).
+ */
+ readonly reasoningEffort: ReasoningEffort | null;
+ /**
+ * Persist the workspace conversation's reasoning effort
+ * (`PUT /conversations/:id/reasoning-effort`). Works for a draft too (its id
+ * survives promotion), so the first turn already runs at the chosen level.
+ * Takes effect from the NEXT turn; resolution stays server-owned.
+ */
+ setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null>;
+ /**
* Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`).
* The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
*/
@@ -234,6 +254,29 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
}
}
+ // The workspace conversation's persisted reasoning effort. Seeded from the
+ // backend on focus change; null = never set (the server default applies).
+ let reasoningEffort = $state<ReasoningEffort | null>(null);
+
+ /** Refetch the workspace conversation's reasoning effort (works for a draft too). */
+ async function refreshReasoningEffort(): Promise<void> {
+ const id = workspaceConversationId();
+ // Clear immediately so a switch never shows the PREVIOUS conversation's level
+ // while the fetch is in flight (null renders as the server default).
+ reasoningEffort = null;
+ try {
+ const res = await fetchImpl(
+ `${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`,
+ );
+ if (!res.ok) return;
+ const data = (await res.json()) as ReasoningEffortResponse;
+ // Guard a slow response losing a race with a conversation switch.
+ if (workspaceConversationId() === id) reasoningEffort = data.reasoningEffort ?? null;
+ } catch {
+ // Non-fatal: an effort fetch failure just leaves the default rendering.
+ }
+ }
+
function getActiveChat(): ChatStore {
const activeId = tabsStore.activeConversationId;
if (activeId === null) {
@@ -434,6 +477,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
refreshActiveChat();
void refreshCwd();
+ void refreshReasoningEffort();
return {
get tabs(): readonly Tab[] {
@@ -468,6 +512,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
get cwd(): string | null {
return cwd;
},
+ get reasoningEffort(): ReasoningEffort | null {
+ return reasoningEffort;
+ },
get currentConversationId(): string {
return workspaceConversationId();
},
@@ -499,6 +546,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
// surfaces (e.g. cache-warming) to its id.
syncSubscriptions();
void refreshCwd();
+ void refreshReasoningEffort();
// Now send on the promoted store
chatStores.get(conversationId)?.send(text);
} else {
@@ -525,6 +573,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
refreshActiveChat();
syncSubscriptions();
void refreshCwd();
+ void refreshReasoningEffort();
},
selectTab(conversationId: string): void {
@@ -536,6 +585,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
refreshActiveChat();
syncSubscriptions();
void refreshCwd();
+ void refreshReasoningEffort();
},
closeTab(conversationId: string): void {
@@ -554,6 +604,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
refreshActiveChat();
syncSubscriptions();
void refreshCwd();
+ void refreshReasoningEffort();
},
invoke(surfaceId: string, actionId: string, payload?: unknown): void {
@@ -612,6 +663,37 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
}
},
+ async setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null> {
+ const id = workspaceConversationId();
+ const body: SetReasoningEffortRequest = { reasoningEffort: level };
+ try {
+ const res = await fetchImpl(
+ `${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`,
+ {
+ method: "PUT",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify(body),
+ },
+ );
+ if (!res.ok) {
+ const errBody = (await res.json().catch(() => null)) as { error?: string } | null;
+ return {
+ ok: false,
+ error: errBody?.error ?? `Set reasoning effort failed (HTTP ${res.status})`,
+ };
+ }
+ const data = (await res.json()) as ReasoningEffortResponse;
+ const next = data.reasoningEffort ?? level;
+ if (workspaceConversationId() === id) reasoningEffort = next;
+ return { ok: true, reasoningEffort: next };
+ } catch (err) {
+ return {
+ ok: false,
+ error: err instanceof Error ? err.message : "Set reasoning effort request failed",
+ };
+ }
+ },
+
async lspStatus(): Promise<LspResult | null> {
const id = workspaceConversationId();
try {
diff --git a/src/app/store.test.ts b/src/app/store.test.ts
index f4b5a0f..db6fdaa 100644
--- a/src/app/store.test.ts
+++ b/src/app/store.test.ts
@@ -708,6 +708,103 @@ describe("createAppStore", () => {
store.dispose();
});
+ it("seeds reasoningEffort from GET /conversations/:id/reasoning-effort (null = never set)", async () => {
+ const base = fakeFetchImpl();
+ const fetchImpl: typeof fetch = async (input, init) => {
+ const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+ if (url.endsWith("/reasoning-effort")) {
+ return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: "xhigh" }), {
+ status: 200,
+ });
+ }
+ return base(input, init);
+ };
+ const ws = fakeSocket();
+ const store = createAppStore({
+ socketFactory: () => ws,
+ fetchImpl,
+ localStorage: createFakeStorage(),
+ });
+ ws.resolveOpen();
+
+ await vi.waitFor(() => {
+ expect(store.reasoningEffort).toBe("xhigh");
+ });
+
+ store.dispose();
+ });
+
+ it("setReasoningEffort PUTs the level and updates local state from the echo", async () => {
+ const calls: { url: string; method: string; body: string | undefined }[] = [];
+ const base = fakeFetchImpl();
+ const fetchImpl: typeof fetch = async (input, init) => {
+ const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+ calls.push({ url, method: init?.method ?? "GET", body: init?.body as string | undefined });
+ if (url.endsWith("/reasoning-effort") && init?.method === "PUT") {
+ const sent = JSON.parse(init.body as string) as { reasoningEffort: string };
+ return new Response(
+ JSON.stringify({ conversationId: "x", reasoningEffort: sent.reasoningEffort }),
+ { status: 200 },
+ );
+ }
+ if (url.endsWith("/reasoning-effort")) {
+ return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), {
+ status: 200,
+ });
+ }
+ return base(input, init);
+ };
+ const ws = fakeSocket();
+ const store = createAppStore({
+ socketFactory: () => ws,
+ fetchImpl,
+ localStorage: createFakeStorage(),
+ });
+ ws.resolveOpen();
+
+ const result = await store.setReasoningEffort("max");
+ expect(result).toEqual({ ok: true, reasoningEffort: "max" });
+ expect(store.reasoningEffort).toBe("max");
+
+ const put = calls.find((c) => c.method === "PUT" && c.url.endsWith("/reasoning-effort"));
+ expect(put).toBeDefined();
+ // The PUT targets the workspace conversation (draft id works too) and
+ // carries exactly the SetReasoningEffortRequest body.
+ expect(put?.url).toContain(`/conversations/${store.currentConversationId}/`);
+ expect(JSON.parse(put?.body ?? "{}")).toEqual({ reasoningEffort: "max" });
+
+ store.dispose();
+ });
+
+ it("setReasoningEffort surfaces a 400 error and leaves state unchanged", async () => {
+ const base = fakeFetchImpl();
+ const fetchImpl: typeof fetch = async (input, init) => {
+ const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
+ if (url.endsWith("/reasoning-effort") && init?.method === "PUT") {
+ return new Response(JSON.stringify({ error: "bad level" }), { status: 400 });
+ }
+ if (url.endsWith("/reasoning-effort")) {
+ return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), {
+ status: 200,
+ });
+ }
+ return base(input, init);
+ };
+ const ws = fakeSocket();
+ const store = createAppStore({
+ socketFactory: () => ws,
+ fetchImpl,
+ localStorage: createFakeStorage(),
+ });
+ ws.resolveOpen();
+
+ const result = await store.setReasoningEffort("max");
+ expect(result).toEqual({ ok: false, error: "bad level" });
+ expect(store.reasoningEffort).toBeNull();
+
+ store.dispose();
+ });
+
it("does NOT re-scope a scope:'global' surface on conversation switch (no churn)", () => {
const ws = fakeSocket();
const store = createAppStore({
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 139a64f..9b94392 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -2,11 +2,24 @@ export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chun
export { groupRenderedChunks } from "../../core/chunks";
export type { TurnMetricsEntry } from "../../core/metrics";
export type { ChatTransport, HistorySync, HistoryWindow, MetricsSync } from "./ports";
+export type {
+ EffortOption,
+ ReasoningEffortSaveResult,
+ SaveReasoningEffort,
+} from "./reasoning-effort";
+export {
+ DEFAULT_REASONING_EFFORT,
+ effectiveEffort,
+ effortOptions,
+ isReasoningEffort,
+ REASONING_EFFORT_LEVELS,
+} from "./reasoning-effort";
export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
export { createChatStore } from "./store.svelte";
export { default as ChatView } from "./ui/ChatView.svelte";
export { default as Composer } from "./ui/Composer.svelte";
export { default as ModelSelector } from "./ui/ModelSelector.svelte";
+export { default as ReasoningEffortSelector } from "./ui/ReasoningEffortSelector.svelte";
/** Public module manifest — aggregated by the shell's "Loaded Modules" view. */
export const manifest = {
diff --git a/src/features/chat/reasoning-effort.test.ts b/src/features/chat/reasoning-effort.test.ts
new file mode 100644
index 0000000..8f76dea
--- /dev/null
+++ b/src/features/chat/reasoning-effort.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from "vitest";
+import {
+ DEFAULT_REASONING_EFFORT,
+ effectiveEffort,
+ effortOptions,
+ isReasoningEffort,
+ REASONING_EFFORT_LEVELS,
+} from "./reasoning-effort";
+
+describe("reasoning-effort helpers", () => {
+ it("ladder matches the wire contract, in ascending depth order", () => {
+ expect(REASONING_EFFORT_LEVELS).toEqual(["low", "medium", "high", "xhigh", "max"]);
+ });
+
+ it("the server default is high", () => {
+ expect(DEFAULT_REASONING_EFFORT).toBe("high");
+ });
+
+ it("isReasoningEffort narrows ladder strings and rejects everything else", () => {
+ for (const level of REASONING_EFFORT_LEVELS) {
+ expect(isReasoningEffort(level)).toBe(true);
+ }
+ expect(isReasoningEffort("banana")).toBe(false);
+ expect(isReasoningEffort("")).toBe(false);
+ expect(isReasoningEffort("HIGH")).toBe(false);
+ });
+
+ it("effectiveEffort maps null (never set) to the default, not 'off'", () => {
+ expect(effectiveEffort(null)).toBe("high");
+ });
+
+ it("effectiveEffort passes a persisted value through", () => {
+ expect(effectiveEffort("xhigh")).toBe("xhigh");
+ expect(effectiveEffort("low")).toBe("low");
+ });
+
+ it("effortOptions lists every level once and marks only the default", () => {
+ const options = effortOptions();
+ expect(options.map((o) => o.value)).toEqual([...REASONING_EFFORT_LEVELS]);
+ expect(options.find((o) => o.value === "high")?.label).toBe("high (default)");
+ for (const option of options) {
+ if (option.value !== "high") expect(option.label).toBe(option.value);
+ }
+ });
+});
diff --git a/src/features/chat/reasoning-effort.ts b/src/features/chat/reasoning-effort.ts
new file mode 100644
index 0000000..2a55089
--- /dev/null
+++ b/src/features/chat/reasoning-effort.ts
@@ -0,0 +1,66 @@
+import type { ReasoningEffort } from "@dispatch/transport-contract";
+
+/**
+ * Pure helpers for the reasoning-effort selector (the thinking-depth knob).
+ *
+ * The canonical ladder + resolution chain are SERVER-owned (`[email protected]`
+ * `ReasoningEffort`; per-turn override → persisted conversation value → default
+ * `"high"`). These helpers only shape the persisted value for display: a `null`
+ * from `GET /conversations/:id/reasoning-effort` means "never set ⇒ the default
+ * applies", so the selector shows `high (default)` — never "off". Zero DOM,
+ * zero Svelte.
+ */
+
+/** The canonical ladder, in ascending thinking-depth order (`[email protected]`). */
+export const REASONING_EFFORT_LEVELS: readonly ReasoningEffort[] = [
+ "low",
+ "medium",
+ "high",
+ "xhigh",
+ "max",
+];
+
+/** The server's fallback when nothing is set (the resolution chain's tail). */
+export const DEFAULT_REASONING_EFFORT: ReasoningEffort = "high";
+
+/** Narrow an untrusted string (e.g. a `<select>` value) to the ladder. */
+export function isReasoningEffort(value: string): value is ReasoningEffort {
+ return (REASONING_EFFORT_LEVELS as readonly string[]).includes(value);
+}
+
+/**
+ * The level the selector should show as selected: the persisted value, or the
+ * server default when never set (`null` = "default applies", not "off").
+ */
+export function effectiveEffort(persisted: ReasoningEffort | null): ReasoningEffort {
+ return persisted ?? DEFAULT_REASONING_EFFORT;
+}
+
+/** One `<option>` of the selector. */
+export interface EffortOption {
+ readonly value: ReasoningEffort;
+ readonly label: string;
+}
+
+/**
+ * The selector's options: every ladder level, with the server default marked
+ * `(default)` so a never-set conversation reads "high (default)".
+ */
+export function effortOptions(): readonly EffortOption[] {
+ return REASONING_EFFORT_LEVELS.map((level) => ({
+ value: level,
+ label: level === DEFAULT_REASONING_EFFORT ? `${level} (default)` : level,
+ }));
+}
+
+// ── Injected port (consumer-defines-port; the composition root adapts the
+// store's `PUT /conversations/:id/reasoning-effort` to this shape). ────────
+
+/** Outcome of `PUT /conversations/:id/reasoning-effort`. */
+export type ReasoningEffortSaveResult =
+ | { readonly ok: true; readonly reasoningEffort: ReasoningEffort }
+ | { readonly ok: false; readonly error: string };
+
+export type SaveReasoningEffort = (
+ level: ReasoningEffort,
+) => Promise<ReasoningEffortSaveResult | null>;
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index 7174821..e541015 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -7,6 +7,7 @@ import type { TurnMetricsEntry } from "../../core/metrics";
import ChatView from "./ui/ChatView.svelte";
import Composer from "./ui/Composer.svelte";
import ModelSelector from "./ui/ModelSelector.svelte";
+import ReasoningEffortSelector from "./ui/ReasoningEffortSelector.svelte";
describe("ChatView", () => {
it("renders a message's text chunk", () => {
@@ -695,3 +696,76 @@ describe("ModelSelector", () => {
expect(onSelect).toHaveBeenCalledWith("openai/gpt-4o");
});
});
+
+describe("ReasoningEffortSelector", () => {
+ it("renders null (never set) as the default level, marked '(default)'", () => {
+ render(ReasoningEffortSelector, { props: { persisted: null, save: vi.fn() } });
+
+ const select = screen.getByRole("combobox", { name: "Reasoning effort" });
+ expect(select).toHaveValue("high");
+ expect(within(select).getByRole("option", { name: "high (default)" })).toBeInTheDocument();
+ // All five ladder levels are offered.
+ expect(within(select).getAllByRole("option")).toHaveLength(5);
+ });
+
+ it("renders a persisted level as selected", () => {
+ render(ReasoningEffortSelector, { props: { persisted: "xhigh", save: vi.fn() } });
+
+ expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("xhigh");
+ });
+
+ it("selecting a level saves it via the injected port and confirms", async () => {
+ const save = vi.fn(async (level: "low" | "medium" | "high" | "xhigh" | "max") => ({
+ ok: true as const,
+ reasoningEffort: level,
+ }));
+ const user = userEvent.setup();
+
+ render(ReasoningEffortSelector, { props: { persisted: null, save } });
+
+ await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+ expect(save).toHaveBeenCalledTimes(1);
+ expect(save).toHaveBeenCalledWith("max");
+ await vi.waitFor(() => {
+ expect(screen.getByText(/applies from the next turn/i)).toBeInTheDocument();
+ });
+ expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("max");
+ });
+
+ it("a failed save shows the error and reverts to the persisted value", async () => {
+ const save = vi.fn(async () => ({ ok: false as const, error: "nope" }));
+ const user = userEvent.setup();
+
+ render(ReasoningEffortSelector, { props: { persisted: "low", save } });
+
+ await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+ await vi.waitFor(() => {
+ expect(screen.getByText("nope")).toBeInTheDocument();
+ });
+ expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("low");
+ });
+
+ it("disables the select while a save is in flight (no double-fire)", async () => {
+ let resolveSave: ((r: { ok: true; reasoningEffort: "max" }) => void) | undefined;
+ const save = vi.fn(
+ () =>
+ new Promise<{ ok: true; reasoningEffort: "max" }>((resolve) => {
+ resolveSave = resolve;
+ }),
+ );
+ const user = userEvent.setup();
+
+ render(ReasoningEffortSelector, { props: { persisted: null, save } });
+
+ await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max");
+
+ expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeDisabled();
+
+ resolveSave?.({ ok: true, reasoningEffort: "max" });
+ await vi.waitFor(() => {
+ expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeEnabled();
+ });
+ });
+});
diff --git a/src/features/chat/ui/ReasoningEffortSelector.svelte b/src/features/chat/ui/ReasoningEffortSelector.svelte
new file mode 100644
index 0000000..8c7b193
--- /dev/null
+++ b/src/features/chat/ui/ReasoningEffortSelector.svelte
@@ -0,0 +1,75 @@
+<script lang="ts">
+ import type { ReasoningEffort } from "@dispatch/transport-contract";
+ import {
+ effectiveEffort,
+ effortOptions,
+ isReasoningEffort,
+ type SaveReasoningEffort,
+ } from "../reasoning-effort";
+
+ let {
+ persisted,
+ save,
+ }: {
+ /** The conversation's persisted level, or null when never set (default applies). */
+ persisted: ReasoningEffort | null;
+ save: SaveReasoningEffort;
+ } = $props();
+
+ const options = effortOptions();
+
+ // The user's in-flight choice; null = mirror the (async-loaded) persisted prop.
+ // Re-mounted per conversation, so there is no cross-tab bleed.
+ let chosen = $state<ReasoningEffort | null>(null);
+ let saving = $state(false);
+ let error = $state<string | null>(null);
+ let justSaved = $state(false);
+
+ const selected = $derived(chosen ?? effectiveEffort(persisted));
+
+ async function handleChange(value: string) {
+ if (!isReasoningEffort(value) || saving) return;
+ chosen = value;
+ saving = true;
+ error = null;
+ justSaved = false;
+ const result = await save(value);
+ saving = false;
+ if (result === null) return;
+ if (result.ok) {
+ justSaved = true;
+ } else {
+ error = result.error;
+ chosen = null; // revert to the persisted value
+ }
+ }
+</script>
+
+<div class="flex flex-col gap-1">
+ <span class="text-xs font-semibold uppercase opacity-60">Reasoning effort</span>
+ <div class="flex items-center gap-2">
+ <select
+ class="select select-sm w-full"
+ value={selected}
+ disabled={saving}
+ onchange={(e) => handleChange(e.currentTarget.value)}
+ aria-label="Reasoning effort"
+ >
+ {#each options as option (option.value)}
+ <option value={option.value}>{option.label}</option>
+ {/each}
+ </select>
+ {#if saving}
+ <span class="loading loading-spinner loading-xs" aria-label="Saving reasoning effort"></span>
+ {/if}
+ </div>
+ {#if error}
+ <p class="text-xs text-error">{error}</p>
+ {:else if justSaved}
+ <p class="text-xs text-success">Saved — applies from the next turn.</p>
+ {:else}
+ <p class="text-xs opacity-50">
+ How long the model thinks before answering. Changing it can re-prefill the prompt cache once.
+ </p>
+ {/if}
+</div>