diff options
| -rw-r--r-- | .dispatch/transport-contract.reference.md | 66 | ||||
| -rw-r--r-- | .dispatch/wire.reference.md | 28 | ||||
| -rw-r--r-- | AGENTS.md | 4 | ||||
| -rw-r--r-- | GLOSSARY.md | 1 | ||||
| -rw-r--r-- | backend-handoff.md | 41 | ||||
| -rw-r--r-- | src/app/App.svelte | 21 | ||||
| -rw-r--r-- | src/app/store.svelte.ts | 82 | ||||
| -rw-r--r-- | src/app/store.test.ts | 97 | ||||
| -rw-r--r-- | src/features/chat/index.ts | 13 | ||||
| -rw-r--r-- | src/features/chat/reasoning-effort.test.ts | 45 | ||||
| -rw-r--r-- | src/features/chat/reasoning-effort.ts | 66 | ||||
| -rw-r--r-- | src/features/chat/ui.test.ts | 74 | ||||
| -rw-r--r-- | src/features/chat/ui/ReasoningEffortSelector.svelte | 75 |
13 files changed, 593 insertions, 20 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index 774cfb0..1c3d993 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -5,10 +5,27 @@ > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — > this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]` (CR-5 history windowing shipped). -> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see +> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort shipped). +> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see > `ui-contract.reference.md`). > +> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):** +> the thinking-depth knob (`ReasoningEffort`, re-exported from `[email protected]`) lands in TWO scopes, +> resolved server-side per turn (per-turn override → persisted conversation value → default +> `"high"`; do NOT re-implement the chain client-side): +> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and +> therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists. +> OMIT the key for "no override" (never send `null`/`""`). +> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` → +> `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }` +> (`null` = never set ⇒ the default `"high"` applies, NOT "off") and +> `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest +> { reasoningEffort }`. Takes effect from the NEXT turn. +> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the +> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the +> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill); +> a stable setting stays cache-safe (warming uses the same resolved effort). +> > **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape > change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`: > **`limit=<k>`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k` @@ -126,6 +143,11 @@ - `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers on the first call per cwd (can take a moment; cached after); returns once each settles to `connected`/`error`. `servers` is `[]` when `cwd` is null. +- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null` + when never set ⇒ default `"high"` applies). Works for an unseen/draft id. +- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` → + `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the + valid levels). Persists the conversation's sticky level; effective from the NEXT turn. - WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`** @@ -150,9 +172,15 @@ */ import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract"; -import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire"; +import type { AgentEvent, ReasoningEffort, StoredChunk, TurnMetrics } from "@dispatch/wire"; -export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire"; +export type { + AgentEvent, + ReasoningEffort, + StepMetrics, + StoredChunk, + TurnMetrics, +} from "@dispatch/wire"; /** * Request body for `POST /chat` (sent as JSON). @@ -184,6 +212,14 @@ export interface ChatRequest { * prompt (so it does not affect prompt caching). */ readonly cwd?: string; + + /** + * Reasoning-effort override for THIS turn only (does not persist). When + * omitted, the server resolves the conversation's persisted value, falling + * back to `"high"`. Must be one of the `ReasoningEffort` levels; an + * unrecognized value → HTTP 400 `{ error }`. + */ + readonly reasoningEffort?: ReasoningEffort; } /** @@ -315,6 +351,28 @@ export interface SetCwdRequest { readonly cwd: string; } +// ─── Per-conversation reasoning effort ──────────────────────────────────────── + +/** + * Response of `GET /conversations/:id/reasoning-effort`. `reasoningEffort` is + * null when never set (the server then resolves turns at the default, + * `"high"`). + */ +export interface ReasoningEffortResponse { + readonly conversationId: string; + readonly reasoningEffort: ReasoningEffort | null; +} + +/** + * Body of `PUT /conversations/:id/reasoning-effort` — persists the + * conversation's sticky reasoning-effort level (used for every later turn that + * does not carry a per-turn `ChatRequest.reasoningEffort` override). An + * unrecognized level → HTTP 400 `{ error }`. + */ +export interface SetReasoningEffortRequest { + readonly reasoningEffort: ReasoningEffort; +} + // ─── Conversation close (explicit tab close) ────────────────────────────────── /** diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md index 1d761bf..34984d2 100644 --- a/.dispatch/wire.reference.md +++ b/.dispatch/wire.reference.md @@ -4,8 +4,18 @@ > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]` (doc-only bump: the 1-based gap-free seq guarantee -> codified on `StoredChunk`). Regenerate whenever `@dispatch/wire` changes. +> **Orchestrator:** SNAPSHOT of `[email protected]` (reasoning effort — the thinking-depth knob). +> Regenerate whenever `@dispatch/wire` changes. +> +> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.6.1` → `0.7.0`, ADDITIVE):** +> adds the **`ReasoningEffort`** type — the per-request thinking-depth ladder +> `"low" | "medium" | "high" | "xhigh" | "max"`. Provider-agnostic; the Anthropic provider maps +> levels to extended-thinking token budgets (low 4096 · medium 10240 · high 16384 · xhigh 32768 · +> max 65536); providers without a thinking knob ignore it. Resolution is SERVER-owned (do not +> re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value +> (`GET`/`PUT /conversations/:id/reasoning-effort`, see `[email protected]`) → default +> `"high"`. Higher levels mean longer runs of `reasoning-delta` events before the first text delta. +> See the `ReasoningEffort` definition below. > > **2026-06-12 delta (CR-5 history windowing — package bumped `0.6.0` → `0.6.1`, DOC-ONLY):** the > per-conversation `seq` numbering is now a WRITTEN CONTRACTUAL GUARANTEE on `StoredChunk`: @@ -196,6 +206,20 @@ export interface StoredChunk { readonly chunk: Chunk; } +// ─── Reasoning effort ─────────────────────────────────────────────────────── + +/** + * The per-request thinking-depth knob: how much extended thinking / reasoning + * the model should spend before answering. Provider-agnostic ladder; each + * provider maps a level to its native knob in its own code (e.g. an Anthropic + * provider maps it to a `thinking.budget_tokens` value) and MAY ignore levels + * (or the field entirely) that its backend cannot express. + * + * Resolution (owned by the session-orchestrator): per-turn request value → + * persisted per-conversation value → default `"high"`. + */ +export type ReasoningEffort = "low" | "medium" | "high" | "xhigh" | "max"; + // ─── Usage ────────────────────────────────────────────────────────────────── /** @@ -140,7 +140,9 @@ context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, mu live view (subscribe/reconnect + the user prompt on the event stream), and the chat limit (bulk quarter-unload past `dispatch.chatLimit`, 75% fresh-load window, show-earlier page-in; `core/chunks/trim.ts`; CR-5 `?limit=`/`?beforeSeq=` CONSUMED — server-windowed cold loads + -show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract). Plan in +show-earlier server backfill; `hasOlder` from the 1-based gap-free seq contract), and the +reasoning-effort selector (Model view, under the provider/model dropdowns; sticky per-conversation +`GET`/`PUT /reasoning-effort`, `null` ⇒ "high (default)"). Plan in `../arch-rewrite/notes/frontend-design.md` §10. ## Reports diff --git a/GLOSSARY.md b/GLOSSARY.md index a9c7017..90acdd8 100644 --- a/GLOSSARY.md +++ b/GLOSSARY.md @@ -20,6 +20,7 @@ | **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte | | **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — | | **context size** | The tokens a conversation currently occupies: the most recent turn's FINAL step `inputTokens + outputTokens` (NOT the aggregate per-turn `usage`, which sums per-step prompts and overcounts a multi-step turn). On the wire as `TurnDoneEvent.contextSize` (live `done`) + `TurnMetrics.contextSize` (persisted); the FE reads the LATEST turn's value as current usage, and treats `undefined` as "unknown" (renders a placeholder, never `0`). Mirrors the backend GLOSSARY. | context usage, context length, tokens used (and do NOT call it "context window" — that's the limit) | +| **reasoning effort** | The per-request thinking-depth knob: how much extended thinking the model spends before answering. Canonical ladder `ReasoningEffort = "low" \| "medium" \| "high" \| "xhigh" \| "max"` (`[email protected]`). Resolution is SERVER-owned (never re-implement): per-turn `ChatRequest.reasoningEffort` override → persisted per-conversation value (`GET`/`PUT /conversations/:id/reasoning-effort`) → default `"high"` — so `null` from the GET means "default (`high`) applies", not "off". Changing the level can bust the prompt cache for the next turn (one-time re-prefill); a stable setting stays cache-safe. | thinking setting, thinking level, effort level, thinking budget | | **context window** | The model's MAXIMUM token capacity (the limit a **context size** is measured against). A FUTURE backend field — not on the wire yet. **Placeholder:** the composer status bar currently HARDCODES a `1,000,000`-token window for the `size / limit · pct%` readout + fill bar; swap to the real per-model value when the backend ships it (see `backend-handoff.md` §3). | max context, token limit (distinct from **context size**, the current usage) | ## Frontend-specific diff --git a/backend-handoff.md b/backend-handoff.md index 17b907b..7c7da05 100644 --- a/backend-handoff.md +++ b/backend-handoff.md @@ -5,17 +5,37 @@ > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user. > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here. -_Last updated: 2026-06-12 (CR-5 consumed). **FE is current on `[email protected]` / -`[email protected]` / `[email protected]`.** All handoffs to date are consumed: surfaces + WS, -conversation transcript/metrics, tabs + model selector, cache-warming (incl. authoritative timer -+ retention + cache-rate fix + the CR-4 lifecycle below), **per-conversation cwd + LSP status**, -**context size**, **turn continuity + multi-client live view**, and the **chat limit + CR-5 -history windowing** (below). +_Last updated: 2026-06-12 (reasoning-effort handoff consumed). **FE is current on +consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming +(incl. authoritative timer + retention + cache-rate fix + the CR-4 lifecycle below), +**per-conversation cwd + LSP status**, **context size**, **turn continuity + multi-client live +view**, the **chat limit + CR-5 history windowing**, and the **reasoning effort +(thinking-depth knob)** (below). **Open asks: NONE.** CR-1/CR-2/CR-4/CR-5 all RESOLVED ✅ (see §2); §3 lists likely next asks. **CR-3 (watcher couldn't see the USER prompt until seal) → RESOLVED ✅** — backend shipped the `user-message` turn event; FE re-pinned + consumption live. The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed**._ +**Reasoning-effort handoff (`frontend-reasoning-effort-handoff.md`) → CONSUMED ✅ +(curl-probed live: GET null on unseen id · PUT `xhigh` → echo + sticky GET · bad level → 400 +listing the ladder · CORS preflight allows PUT).** Re-pinned `[email protected]→0.7.0` + +`[email protected]→0.11.0`; re-mirrored both `.dispatch/*.reference.md`; added +"reasoning effort" to FE `GLOSSARY.md`. FE work: a **per-conversation effort selector** in the +sidebar's **Model view**, under the provider + model dropdowns +(`features/chat/ui/ReasoningEffortSelector.svelte`, pure helpers in +`features/chat/reasoning-effort.ts`): renders `null` as "high (default)" per the server-owned +resolution chain, PUTs on change (effective next turn), shows the save error + reverts on 400, +disables while in flight; re-mounted per conversation (incl. drafts — the draft id survives +promotion, so an effort set on a draft applies from turn 1, same pattern as cwd). The app store +seeds it on every focus change via `GET /conversations/:id/reasoning-effort` (cleared first so a +switch never flashes the previous conversation's level) and exposes +`reasoningEffort`/`setReasoningEffort`. The optional per-turn `chat.send` override is NOT built +(no composer affordance yet — `chat.send` still omits the key, which the contract specifies as +"no override"). The "expect more thinking" note needs no change: the transcript already renders +arbitrary runs of reasoning deltas, and `generating` is structural (not timer-based). 616 tests +green. NO new backend ask._ + **CR-4 cache-warming lifecycle (`frontend-cache-warming-lifecycle-handoff.md`) → CONSUMED ✅ (live-probed 17/17 against `bin/up`).** Re-pinned `[email protected]→0.2.0` + `[email protected]→0.9.0` (`wire` unchanged); re-mirrored both `.dispatch/*.reference.md`. FE @@ -61,25 +81,26 @@ backend ask — but the max-limit denominator is now a live FE need; see §3. ## 1. Pinned backend contracts (consumed by the FE) | Package | Used for | |---|---| | `@dispatch/ui-contract` | surfaces + surface WS protocol | -| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`** | -| `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` | +| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`**, **`ReasoningEffort`** | +| `@dispatch/transport-contract` | `ChatRequest`(+`reasoningEffort`)/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + `ReasoningEffortResponse`/`SetReasoningEffortRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` | Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`): `POST /chat` (NDJSON) · `GET /models` · `GET /conversations/:id?sinceSeq=<n>&beforeSeq=<s>&limit=<k>` (CR-5 windowing) · `GET /conversations/:id/metrics` · `GET`/`PUT /conversations/:id/cwd` · +`GET`/`PUT /conversations/:id/reasoning-effort` (sticky thinking-depth; `null` ⇒ default `high`) · `GET /conversations/:id/lsp` · `POST /chat/warm` · `POST /conversations/:id/close` (explicit tab-close: abort turn + stop/disable warming) · WS `chat.send`→`chat.delta` · WS `chat.subscribe`/`chat.unsubscribe` (watch a conversation's turns without sending; replay + live). Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md` (regenerate on any contract bump; all current as of `[email protected]` / -`[email protected]` / `[email protected]`). +`[email protected]` / `[email protected]`). ## 2. Open asks FOR THE BACKEND diff --git a/src/app/App.svelte b/src/app/App.svelte index 4c5a82b..dffa937 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -1,4 +1,5 @@ <script lang="ts"> + import type { ReasoningEffort } from "@dispatch/transport-contract"; import type { InvokeMessage } from "@dispatch/ui-contract"; import { tick } from "svelte"; import Table from "../components/Table.svelte"; @@ -12,6 +13,8 @@ Composer, manifest as chatManifest, ModelSelector, + ReasoningEffortSelector, + type ReasoningEffortSaveResult, } from "../features/chat"; import { manifest as conversationCacheManifest } from "../features/conversation-cache"; import { manifest as markdownManifest } from "../features/markdown"; @@ -154,6 +157,17 @@ : { ok: false, error: result.error }; } + // Adapt the store's reasoning-effort result to the chat feature's port. + async function saveReasoningEffort( + level: ReasoningEffort, + ): Promise<ReasoningEffortSaveResult | null> { + const result = await store.setReasoningEffort(level); + if (result === null) return null; + return result.ok + ? { ok: true, reasoningEffort: result.reasoningEffort } + : { ok: false, error: result.error }; + } + // Adapt the store's cwd/LSP results to the workspace feature's ports. async function saveCwd(cwd: string): Promise<CwdSaveResult | null> { const result = await store.setCwd(cwd); @@ -295,10 +309,11 @@ {#if kind === "model"} <div class="flex flex-col gap-3"> <ModelSelector models={store.models} selected={store.activeModel} onSelect={handleSelectModel} /> - <!-- Keyed on the workspace conversation (active tab OR draft) so the input - re-mounts per conversation — incl. switching between drafts — and can't - bleed across tabs. Editable for a draft too (cwd applies from turn 1). --> + <!-- Keyed on the workspace conversation (active tab OR draft) so the inputs + re-mount per conversation — incl. switching between drafts — and can't + bleed across tabs. Editable for a draft too (cwd + effort apply from turn 1). --> {#key store.currentConversationId} + <ReasoningEffortSelector persisted={store.reasoningEffort} save={saveReasoningEffort} /> <CwdField cwd={store.cwd} canEdit={true} save={saveCwd} /> {/key} </div> diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts index 999f2be..05577a6 100644 --- a/src/app/store.svelte.ts +++ b/src/app/store.svelte.ts @@ -6,7 +6,10 @@ import type { CwdResponse, LspStatusResponse, ModelsResponse, + ReasoningEffort, + ReasoningEffortResponse, SetCwdRequest, + SetReasoningEffortRequest, WarmRequest, WarmResponse, } from "@dispatch/transport-contract"; @@ -52,6 +55,11 @@ export type LspResult = | { readonly ok: true; readonly response: LspStatusResponse } | { readonly ok: false; readonly error: string }; +/** Outcome of `PUT /conversations/:id/reasoning-effort`. */ +export type ReasoningEffortResult = + | { readonly ok: true; readonly reasoningEffort: ReasoningEffort } + | { readonly ok: false; readonly error: string }; + export interface AppStore { readonly tabs: readonly Tab[]; readonly activeConversationId: string | null; @@ -85,6 +93,18 @@ export interface AppStore { */ setCwd(cwd: string): Promise<CwdResult | null>; /** + * The workspace conversation's persisted reasoning effort, or null when never + * set (the server then resolves turns at the default, `"high"`). + */ + readonly reasoningEffort: ReasoningEffort | null; + /** + * Persist the workspace conversation's reasoning effort + * (`PUT /conversations/:id/reasoning-effort`). Works for a draft too (its id + * survives promotion), so the first turn already runs at the chosen level. + * Takes effect from the NEXT turn; resolution stays server-owned. + */ + setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null>; + /** * Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`). * The backend lazily spawns servers, so this may take a moment on the first call for a cwd. */ @@ -234,6 +254,29 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { } } + // The workspace conversation's persisted reasoning effort. Seeded from the + // backend on focus change; null = never set (the server default applies). + let reasoningEffort = $state<ReasoningEffort | null>(null); + + /** Refetch the workspace conversation's reasoning effort (works for a draft too). */ + async function refreshReasoningEffort(): Promise<void> { + const id = workspaceConversationId(); + // Clear immediately so a switch never shows the PREVIOUS conversation's level + // while the fetch is in flight (null renders as the server default). + reasoningEffort = null; + try { + const res = await fetchImpl( + `${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`, + ); + if (!res.ok) return; + const data = (await res.json()) as ReasoningEffortResponse; + // Guard a slow response losing a race with a conversation switch. + if (workspaceConversationId() === id) reasoningEffort = data.reasoningEffort ?? null; + } catch { + // Non-fatal: an effort fetch failure just leaves the default rendering. + } + } + function getActiveChat(): ChatStore { const activeId = tabsStore.activeConversationId; if (activeId === null) { @@ -434,6 +477,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { refreshActiveChat(); void refreshCwd(); + void refreshReasoningEffort(); return { get tabs(): readonly Tab[] { @@ -468,6 +512,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { get cwd(): string | null { return cwd; }, + get reasoningEffort(): ReasoningEffort | null { + return reasoningEffort; + }, get currentConversationId(): string { return workspaceConversationId(); }, @@ -499,6 +546,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { // surfaces (e.g. cache-warming) to its id. syncSubscriptions(); void refreshCwd(); + void refreshReasoningEffort(); // Now send on the promoted store chatStores.get(conversationId)?.send(text); } else { @@ -525,6 +573,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { refreshActiveChat(); syncSubscriptions(); void refreshCwd(); + void refreshReasoningEffort(); }, selectTab(conversationId: string): void { @@ -536,6 +585,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { refreshActiveChat(); syncSubscriptions(); void refreshCwd(); + void refreshReasoningEffort(); }, closeTab(conversationId: string): void { @@ -554,6 +604,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { refreshActiveChat(); syncSubscriptions(); void refreshCwd(); + void refreshReasoningEffort(); }, invoke(surfaceId: string, actionId: string, payload?: unknown): void { @@ -612,6 +663,37 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { } }, + async setReasoningEffort(level: ReasoningEffort): Promise<ReasoningEffortResult | null> { + const id = workspaceConversationId(); + const body: SetReasoningEffortRequest = { reasoningEffort: level }; + try { + const res = await fetchImpl( + `${httpBase}/conversations/${encodeURIComponent(id)}/reasoning-effort`, + { + method: "PUT", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + }, + ); + if (!res.ok) { + const errBody = (await res.json().catch(() => null)) as { error?: string } | null; + return { + ok: false, + error: errBody?.error ?? `Set reasoning effort failed (HTTP ${res.status})`, + }; + } + const data = (await res.json()) as ReasoningEffortResponse; + const next = data.reasoningEffort ?? level; + if (workspaceConversationId() === id) reasoningEffort = next; + return { ok: true, reasoningEffort: next }; + } catch (err) { + return { + ok: false, + error: err instanceof Error ? err.message : "Set reasoning effort request failed", + }; + } + }, + async lspStatus(): Promise<LspResult | null> { const id = workspaceConversationId(); try { diff --git a/src/app/store.test.ts b/src/app/store.test.ts index f4b5a0f..db6fdaa 100644 --- a/src/app/store.test.ts +++ b/src/app/store.test.ts @@ -708,6 +708,103 @@ describe("createAppStore", () => { store.dispose(); }); + it("seeds reasoningEffort from GET /conversations/:id/reasoning-effort (null = never set)", async () => { + const base = fakeFetchImpl(); + const fetchImpl: typeof fetch = async (input, init) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.endsWith("/reasoning-effort")) { + return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: "xhigh" }), { + status: 200, + }); + } + return base(input, init); + }; + const ws = fakeSocket(); + const store = createAppStore({ + socketFactory: () => ws, + fetchImpl, + localStorage: createFakeStorage(), + }); + ws.resolveOpen(); + + await vi.waitFor(() => { + expect(store.reasoningEffort).toBe("xhigh"); + }); + + store.dispose(); + }); + + it("setReasoningEffort PUTs the level and updates local state from the echo", async () => { + const calls: { url: string; method: string; body: string | undefined }[] = []; + const base = fakeFetchImpl(); + const fetchImpl: typeof fetch = async (input, init) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + calls.push({ url, method: init?.method ?? "GET", body: init?.body as string | undefined }); + if (url.endsWith("/reasoning-effort") && init?.method === "PUT") { + const sent = JSON.parse(init.body as string) as { reasoningEffort: string }; + return new Response( + JSON.stringify({ conversationId: "x", reasoningEffort: sent.reasoningEffort }), + { status: 200 }, + ); + } + if (url.endsWith("/reasoning-effort")) { + return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), { + status: 200, + }); + } + return base(input, init); + }; + const ws = fakeSocket(); + const store = createAppStore({ + socketFactory: () => ws, + fetchImpl, + localStorage: createFakeStorage(), + }); + ws.resolveOpen(); + + const result = await store.setReasoningEffort("max"); + expect(result).toEqual({ ok: true, reasoningEffort: "max" }); + expect(store.reasoningEffort).toBe("max"); + + const put = calls.find((c) => c.method === "PUT" && c.url.endsWith("/reasoning-effort")); + expect(put).toBeDefined(); + // The PUT targets the workspace conversation (draft id works too) and + // carries exactly the SetReasoningEffortRequest body. + expect(put?.url).toContain(`/conversations/${store.currentConversationId}/`); + expect(JSON.parse(put?.body ?? "{}")).toEqual({ reasoningEffort: "max" }); + + store.dispose(); + }); + + it("setReasoningEffort surfaces a 400 error and leaves state unchanged", async () => { + const base = fakeFetchImpl(); + const fetchImpl: typeof fetch = async (input, init) => { + const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url; + if (url.endsWith("/reasoning-effort") && init?.method === "PUT") { + return new Response(JSON.stringify({ error: "bad level" }), { status: 400 }); + } + if (url.endsWith("/reasoning-effort")) { + return new Response(JSON.stringify({ conversationId: "x", reasoningEffort: null }), { + status: 200, + }); + } + return base(input, init); + }; + const ws = fakeSocket(); + const store = createAppStore({ + socketFactory: () => ws, + fetchImpl, + localStorage: createFakeStorage(), + }); + ws.resolveOpen(); + + const result = await store.setReasoningEffort("max"); + expect(result).toEqual({ ok: false, error: "bad level" }); + expect(store.reasoningEffort).toBeNull(); + + store.dispose(); + }); + it("does NOT re-scope a scope:'global' surface on conversation switch (no churn)", () => { const ws = fakeSocket(); const store = createAppStore({ diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts index 139a64f..9b94392 100644 --- a/src/features/chat/index.ts +++ b/src/features/chat/index.ts @@ -2,11 +2,24 @@ export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chun export { groupRenderedChunks } from "../../core/chunks"; export type { TurnMetricsEntry } from "../../core/metrics"; export type { ChatTransport, HistorySync, HistoryWindow, MetricsSync } from "./ports"; +export type { + EffortOption, + ReasoningEffortSaveResult, + SaveReasoningEffort, +} from "./reasoning-effort"; +export { + DEFAULT_REASONING_EFFORT, + effectiveEffort, + effortOptions, + isReasoningEffort, + REASONING_EFFORT_LEVELS, +} from "./reasoning-effort"; export type { ChatStore, ChatStoreDependencies } from "./store.svelte"; export { createChatStore } from "./store.svelte"; export { default as ChatView } from "./ui/ChatView.svelte"; export { default as Composer } from "./ui/Composer.svelte"; export { default as ModelSelector } from "./ui/ModelSelector.svelte"; +export { default as ReasoningEffortSelector } from "./ui/ReasoningEffortSelector.svelte"; /** Public module manifest — aggregated by the shell's "Loaded Modules" view. */ export const manifest = { diff --git a/src/features/chat/reasoning-effort.test.ts b/src/features/chat/reasoning-effort.test.ts new file mode 100644 index 0000000..8f76dea --- /dev/null +++ b/src/features/chat/reasoning-effort.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_REASONING_EFFORT, + effectiveEffort, + effortOptions, + isReasoningEffort, + REASONING_EFFORT_LEVELS, +} from "./reasoning-effort"; + +describe("reasoning-effort helpers", () => { + it("ladder matches the wire contract, in ascending depth order", () => { + expect(REASONING_EFFORT_LEVELS).toEqual(["low", "medium", "high", "xhigh", "max"]); + }); + + it("the server default is high", () => { + expect(DEFAULT_REASONING_EFFORT).toBe("high"); + }); + + it("isReasoningEffort narrows ladder strings and rejects everything else", () => { + for (const level of REASONING_EFFORT_LEVELS) { + expect(isReasoningEffort(level)).toBe(true); + } + expect(isReasoningEffort("banana")).toBe(false); + expect(isReasoningEffort("")).toBe(false); + expect(isReasoningEffort("HIGH")).toBe(false); + }); + + it("effectiveEffort maps null (never set) to the default, not 'off'", () => { + expect(effectiveEffort(null)).toBe("high"); + }); + + it("effectiveEffort passes a persisted value through", () => { + expect(effectiveEffort("xhigh")).toBe("xhigh"); + expect(effectiveEffort("low")).toBe("low"); + }); + + it("effortOptions lists every level once and marks only the default", () => { + const options = effortOptions(); + expect(options.map((o) => o.value)).toEqual([...REASONING_EFFORT_LEVELS]); + expect(options.find((o) => o.value === "high")?.label).toBe("high (default)"); + for (const option of options) { + if (option.value !== "high") expect(option.label).toBe(option.value); + } + }); +}); diff --git a/src/features/chat/reasoning-effort.ts b/src/features/chat/reasoning-effort.ts new file mode 100644 index 0000000..2a55089 --- /dev/null +++ b/src/features/chat/reasoning-effort.ts @@ -0,0 +1,66 @@ +import type { ReasoningEffort } from "@dispatch/transport-contract"; + +/** + * Pure helpers for the reasoning-effort selector (the thinking-depth knob). + * + * The canonical ladder + resolution chain are SERVER-owned (`[email protected]` + * `ReasoningEffort`; per-turn override → persisted conversation value → default + * `"high"`). These helpers only shape the persisted value for display: a `null` + * from `GET /conversations/:id/reasoning-effort` means "never set ⇒ the default + * applies", so the selector shows `high (default)` — never "off". Zero DOM, + * zero Svelte. + */ + +/** The canonical ladder, in ascending thinking-depth order (`[email protected]`). */ +export const REASONING_EFFORT_LEVELS: readonly ReasoningEffort[] = [ + "low", + "medium", + "high", + "xhigh", + "max", +]; + +/** The server's fallback when nothing is set (the resolution chain's tail). */ +export const DEFAULT_REASONING_EFFORT: ReasoningEffort = "high"; + +/** Narrow an untrusted string (e.g. a `<select>` value) to the ladder. */ +export function isReasoningEffort(value: string): value is ReasoningEffort { + return (REASONING_EFFORT_LEVELS as readonly string[]).includes(value); +} + +/** + * The level the selector should show as selected: the persisted value, or the + * server default when never set (`null` = "default applies", not "off"). + */ +export function effectiveEffort(persisted: ReasoningEffort | null): ReasoningEffort { + return persisted ?? DEFAULT_REASONING_EFFORT; +} + +/** One `<option>` of the selector. */ +export interface EffortOption { + readonly value: ReasoningEffort; + readonly label: string; +} + +/** + * The selector's options: every ladder level, with the server default marked + * `(default)` so a never-set conversation reads "high (default)". + */ +export function effortOptions(): readonly EffortOption[] { + return REASONING_EFFORT_LEVELS.map((level) => ({ + value: level, + label: level === DEFAULT_REASONING_EFFORT ? `${level} (default)` : level, + })); +} + +// ── Injected port (consumer-defines-port; the composition root adapts the +// store's `PUT /conversations/:id/reasoning-effort` to this shape). ──────── + +/** Outcome of `PUT /conversations/:id/reasoning-effort`. */ +export type ReasoningEffortSaveResult = + | { readonly ok: true; readonly reasoningEffort: ReasoningEffort } + | { readonly ok: false; readonly error: string }; + +export type SaveReasoningEffort = ( + level: ReasoningEffort, +) => Promise<ReasoningEffortSaveResult | null>; diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts index 7174821..e541015 100644 --- a/src/features/chat/ui.test.ts +++ b/src/features/chat/ui.test.ts @@ -7,6 +7,7 @@ import type { TurnMetricsEntry } from "../../core/metrics"; import ChatView from "./ui/ChatView.svelte"; import Composer from "./ui/Composer.svelte"; import ModelSelector from "./ui/ModelSelector.svelte"; +import ReasoningEffortSelector from "./ui/ReasoningEffortSelector.svelte"; describe("ChatView", () => { it("renders a message's text chunk", () => { @@ -695,3 +696,76 @@ describe("ModelSelector", () => { expect(onSelect).toHaveBeenCalledWith("openai/gpt-4o"); }); }); + +describe("ReasoningEffortSelector", () => { + it("renders null (never set) as the default level, marked '(default)'", () => { + render(ReasoningEffortSelector, { props: { persisted: null, save: vi.fn() } }); + + const select = screen.getByRole("combobox", { name: "Reasoning effort" }); + expect(select).toHaveValue("high"); + expect(within(select).getByRole("option", { name: "high (default)" })).toBeInTheDocument(); + // All five ladder levels are offered. + expect(within(select).getAllByRole("option")).toHaveLength(5); + }); + + it("renders a persisted level as selected", () => { + render(ReasoningEffortSelector, { props: { persisted: "xhigh", save: vi.fn() } }); + + expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("xhigh"); + }); + + it("selecting a level saves it via the injected port and confirms", async () => { + const save = vi.fn(async (level: "low" | "medium" | "high" | "xhigh" | "max") => ({ + ok: true as const, + reasoningEffort: level, + })); + const user = userEvent.setup(); + + render(ReasoningEffortSelector, { props: { persisted: null, save } }); + + await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max"); + + expect(save).toHaveBeenCalledTimes(1); + expect(save).toHaveBeenCalledWith("max"); + await vi.waitFor(() => { + expect(screen.getByText(/applies from the next turn/i)).toBeInTheDocument(); + }); + expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("max"); + }); + + it("a failed save shows the error and reverts to the persisted value", async () => { + const save = vi.fn(async () => ({ ok: false as const, error: "nope" })); + const user = userEvent.setup(); + + render(ReasoningEffortSelector, { props: { persisted: "low", save } }); + + await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max"); + + await vi.waitFor(() => { + expect(screen.getByText("nope")).toBeInTheDocument(); + }); + expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toHaveValue("low"); + }); + + it("disables the select while a save is in flight (no double-fire)", async () => { + let resolveSave: ((r: { ok: true; reasoningEffort: "max" }) => void) | undefined; + const save = vi.fn( + () => + new Promise<{ ok: true; reasoningEffort: "max" }>((resolve) => { + resolveSave = resolve; + }), + ); + const user = userEvent.setup(); + + render(ReasoningEffortSelector, { props: { persisted: null, save } }); + + await user.selectOptions(screen.getByRole("combobox", { name: "Reasoning effort" }), "max"); + + expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeDisabled(); + + resolveSave?.({ ok: true, reasoningEffort: "max" }); + await vi.waitFor(() => { + expect(screen.getByRole("combobox", { name: "Reasoning effort" })).toBeEnabled(); + }); + }); +}); diff --git a/src/features/chat/ui/ReasoningEffortSelector.svelte b/src/features/chat/ui/ReasoningEffortSelector.svelte new file mode 100644 index 0000000..8c7b193 --- /dev/null +++ b/src/features/chat/ui/ReasoningEffortSelector.svelte @@ -0,0 +1,75 @@ +<script lang="ts"> + import type { ReasoningEffort } from "@dispatch/transport-contract"; + import { + effectiveEffort, + effortOptions, + isReasoningEffort, + type SaveReasoningEffort, + } from "../reasoning-effort"; + + let { + persisted, + save, + }: { + /** The conversation's persisted level, or null when never set (default applies). */ + persisted: ReasoningEffort | null; + save: SaveReasoningEffort; + } = $props(); + + const options = effortOptions(); + + // The user's in-flight choice; null = mirror the (async-loaded) persisted prop. + // Re-mounted per conversation, so there is no cross-tab bleed. + let chosen = $state<ReasoningEffort | null>(null); + let saving = $state(false); + let error = $state<string | null>(null); + let justSaved = $state(false); + + const selected = $derived(chosen ?? effectiveEffort(persisted)); + + async function handleChange(value: string) { + if (!isReasoningEffort(value) || saving) return; + chosen = value; + saving = true; + error = null; + justSaved = false; + const result = await save(value); + saving = false; + if (result === null) return; + if (result.ok) { + justSaved = true; + } else { + error = result.error; + chosen = null; // revert to the persisted value + } + } +</script> + +<div class="flex flex-col gap-1"> + <span class="text-xs font-semibold uppercase opacity-60">Reasoning effort</span> + <div class="flex items-center gap-2"> + <select + class="select select-sm w-full" + value={selected} + disabled={saving} + onchange={(e) => handleChange(e.currentTarget.value)} + aria-label="Reasoning effort" + > + {#each options as option (option.value)} + <option value={option.value}>{option.label}</option> + {/each} + </select> + {#if saving} + <span class="loading loading-spinner loading-xs" aria-label="Saving reasoning effort"></span> + {/if} + </div> + {#if error} + <p class="text-xs text-error">{error}</p> + {:else if justSaved} + <p class="text-xs text-success">Saved — applies from the next turn.</p> + {:else} + <p class="text-xs opacity-50"> + How long the model thinks before answering. Changing it can re-prefill the prompt cache once. + </p> + {/if} +</div> |
