import { dirname } from "node:path";
import type { ProviderOptions } from "@ai-sdk/provider-utils";
import type {
	FilePart,
	ImagePart,
	LanguageModelUsage,
	ModelMessage,
	SystemModelMessage,
	TextPart,
	Tool,
} from "ai";
import { streamText } from "ai";
import { getAgentDirPaths } from "../agents/loader.js";
import { appendEventToChunks } from "../chunks/append.js";
import { buildBillingHeaderValue, SYSTEM_IDENTITY } from "../credentials/claude.js";
import {
	logAgentLoop,
	logStepLifecycle,
	logStreamEvent,
	nextDebugSeq,
} from "../llm/debug-logger.js";
import {
	createProvider,
	type ModelFactory,
	prefixToolName,
	unprefixToolName,
} from "../llm/provider.js";
import { canonicalize } from "../tools/path-utils.js";
import { createToolRegistry } from "../tools/registry.js";
import { analyzeCommand } from "../tools/shell-analyze.js";
import { applyTruncation, SPILL_ROOT } from "../tools/truncate.js";
import type {
	AgentConfig,
	AgentEvent,
	AgentStatus,
	ChatMessage,
	Chunk,
	QueueCallbacks,
	ReasoningEffort,
	ToolCall,
	ToolResult,
	UsageData,
	UserContentPart,
} from "../types/index.js";
import { DEFAULT_REASONING_EFFORT } from "../types/index.js";

/**
 * Rebuild AI SDK `ModelMessage[]` from our internal `ChatMessage[]`.
 *
 * Strip rules (see plan-chunk-refactor.md):
 *  - `role: "system"` messages are skipped wholesale (they're display-only
 *    standalone-system bubbles that exist outside any model turn).
 *  - `error` chunks are skipped — the turn ended; the LLM doesn't need them.
 *  - `system` chunks are skipped — display-only notices.
 *  - `text` chunks → `{ type: "text", text }` parts.
 *  - `thinking` chunks → `{ type: "reasoning", text, providerOptions? }` parts.
 *    The `providerOptions` carries the Anthropic signature blob (if present) so
 *    Anthropic can validate extended-thinking round-trips. Non-Anthropic
 *    reasoning has no metadata and is sent without providerOptions (accepted
 *    by Anthropic — it just won't verify a missing signature).
 *  - `tool-batch` chunks → one `{ type: "tool-call" }` part per entry
 *    inside the current assistant message, followed by a SINGLE
 *    `{ role: "tool", content: [...all tool-result parts] }` message holding
 *    every entry that has a result. Grouping (rather than one message per
 *    result) keeps Anthropic prompt-caching breakpoints landing on the
 *    assistant/tool turn boundary; the wire format is identical for both the
 *    Anthropic and OpenAI-compatible backends.
 *
 * Mixed-resolution tool batches (some entries with `result`, some without):
 * we emit tool-results only for the entries that have them. In practice
 * the agent resolves every tool call in a step before looping back to the
 * LLM, so this case only arises mid-step (where the message hasn't been
 * round-tripped to the LLM yet) and is benign.
 */
/**
 * Marker used to identify the start of a `[USER INTERRUPT]` block embedded
 * in a tool result. Both the agent-level injection
 * (`packages/core/src/agent/agent.ts`) and the tool-level injections in
 * `run-shell`, `youtube-transcribe`, and `retrieve` use the same separator
 * (`\n\n[USER INTERRUPT]`) before the boilerplate, so a single substring
 * search suffices for stripping.
 */
const USER_INTERRUPT_MARKER = "\n\n[USER INTERRUPT]";

/**
 * Remove the `[USER INTERRUPT]` block (and everything after it) from a tool
 * result string. Used when a historical tool-result is being re-serialized
 * for the LLM and the model has already had a chance to address that
 * interrupt — leaving the imperative "You MUST address these" text in
 * place causes the model to re-acknowledge the same interrupt on every
 * subsequent step.
 *
 * The interrupt block is always appended to the end of the tool result, so
 * we strip from the marker to end-of-string.
 */
function stripUserInterruptBlock(result: string): string {
	const idx = result.indexOf(USER_INTERRUPT_MARKER);
	if (idx === -1) return result;
	return result.slice(0, idx);
}

/**
 * Stable key for collapsing byte-identical tool calls inside a single batch.
 *
 * Claude occasionally degenerates into a generation loop and emits the same
 * tool call (same name + same arguments) dozens — even 150+ — times in one
 * parallel batch (see tool-runner-duplication-incident.md). Each copy carries
 * its own unique `toolCallId`, so Anthropic still requires a `tool_result` for
 * every one, but re-executing identical idempotent reads wastes real wall-clock
 * time and money and floods the context with redundant output.
 *
 * Returning a non-null key here lets the executor run the FIRST occurrence and
 * reuse its result for every subsequent identical call in the same batch. The
 * key relies on `JSON.stringify` preserving argument order — two identical
 * generations serialize identically — which covers the observed failure mode.
 * If the arguments can't be serialized we return `null`, which disables dedup
 * for that call (it always executes).
 */
function toolDedupKey(name: string, args: Record<string, unknown>): string | null {
	try {
		return `${name} ${JSON.stringify(args ?? {})}`;
	} catch {
		return null;
	}
}

/**
 * Convert a user turn's ephemeral multimodal `content` into AI SDK user-content
 * parts (`TextPart | ImagePart | FilePart`), preserving order. Images become
 * `ImagePart`s; PDFs (and any non-image attachment) become `FilePart`s. The
 * base64 payload is wrapped in a `data:` URI so the SDK accepts it directly.
 *
 * Returns `null` when there is no actual attachment (no `content`, or only text
 * parts), so the caller falls back to the plain-string path and text-only turns
 * serialize byte-identically to before.
 */
function userContentToParts(
	content: UserContentPart[] | undefined,
): Array<TextPart | ImagePart | FilePart> | null {
	if (!content?.some((p) => p.type === "attachment")) return null;
	const parts: Array<TextPart | ImagePart | FilePart> = [];
	for (const part of content) {
		if (part.type === "text") {
			if (part.text.length > 0) parts.push({ type: "text", text: part.text });
			continue;
		}
		const dataUri = `data:${part.mediaType};base64,${part.data}`;
		if (part.mediaType.startsWith("image/")) {
			parts.push({ type: "image", image: dataUri, mediaType: part.mediaType });
		} else {
			parts.push({
				type: "file",
				data: dataUri,
				mediaType: part.mediaType,
				...(part.name ? { filename: part.name } : {}),
			});
		}
	}
	return parts;
}

function toModelMessages(messages: ChatMessage[], useToolPrefix?: boolean): ModelMessage[] {
	const result: ModelMessage[] = [];

	// A `[USER INTERRUPT]` block in a tool-result is "fresh" — i.e., the
	// model has not yet seen and responded to it — only when ALL of these
	// hold:
	//   1. The tool-batch is in the very last message of the history.
	//   2. That message is an assistant message (a follow-up user message
	//      means the user moved on; the interrupt was addressed).
	//   3. The tool-batch is the LAST chunk in that message (any later
	//      text/thinking/tool-batch in the same message represents the
	//      model's response to the tool results).
	//
	// All other interrupts get stripped from history because the imperative
	// "You MUST address these" otherwise gets re-evaluated as a fresh
	// instruction on every subsequent LLM step.
	let freshestToolBatchMsgIdx = -1;
	let freshestToolBatchChunkIdx = -1;
	const lastMsgIdx = messages.length - 1;
	const lastMsg = messages[lastMsgIdx];
	if (lastMsg && lastMsg.role === "assistant" && lastMsg.chunks.length > 0) {
		const lastChunkIdx = lastMsg.chunks.length - 1;
		const lastChunk = lastMsg.chunks[lastChunkIdx];
		if (lastChunk && lastChunk.type === "tool-batch") {
			freshestToolBatchMsgIdx = lastMsgIdx;
			freshestToolBatchChunkIdx = lastChunkIdx;
		}
	}

	for (let msgIdx = 0; msgIdx < messages.length; msgIdx++) {
		const msg = messages[msgIdx];
		if (!msg || msg.role === "system") continue;

		if (msg.role === "user") {
			// A user turn with ephemeral multimodal `content` (image/pdf
			// attachments interleaved with text) is emitted as an ORDERED parts
			// array so the model sees text and attachments in the exact sequence
			// the user composed. This is only ever set on the in-flight turn —
			// history-rebuilt messages have no `content` and fall through to the
			// plain-text path below.
			const multimodal = userContentToParts(msg.content);
			if (multimodal) {
				result.push({ role: "user", content: multimodal });
				continue;
			}
			// User messages in our model can in theory contain non-text chunks,
			// but in practice the UI only produces text. Concatenate any text
			// chunks; ignore anything else.
			const text = msg.chunks
				.filter((c): c is Extract<Chunk, { type: "text" }> => c.type === "text")
				.map((c) => c.text)
				.join("");
			result.push({ role: "user", content: text });
			continue;
		}

		// role === "assistant"
		//
		// Segment the turn's chunks into per-LLM-step `[assistant, tool]`
		// message pairs, splitting at each `tool-batch` boundary. Every
		// round-trip emits text/thinking then (optionally) one tool-batch, so
		// a tool-batch is exactly a step boundary.
		//
		// This is the cache fix: each prior step's messages are built from
		// immutable chunks at fixed positions, so they serialize
		// byte-identically on every later request → stable Anthropic prompt
		// prefix. Collapsing all steps into ONE assistant + ONE tool message
		// (and then letting the structural-normalisation pass re-bucket the
		// tool_use/tool_result blocks) reshuffled earlier steps' positions on
		// every step and shattered the cache (see cache-miss-report.md).
		type AssistantPart =
			| { type: "text"; text: string }
			| { type: "reasoning"; text: string; providerOptions?: ProviderOptions }
			| { type: "tool-call"; toolCallId: string; toolName: string; input: unknown };
		type StepResult = {
			toolCallId: string;
			toolName: string;
			result: string;
			isError?: boolean;
		};

		let parts: AssistantPart[] = [];
		let stepResults: StepResult[] = [];

		const flushStep = (): void => {
			if (parts.length > 0) {
				result.push({ role: "assistant", content: parts });
			}
			// One grouped `role: "tool"` message per step holds that step's
			// results — `applyAnthropicCaching` then marks the last two
			// non-system messages as `[assistant(step), tool(step)]`, the
			// correct rolling-breakpoint placement.
			if (stepResults.length > 0) {
				result.push({
					role: "tool",
					content: stepResults.map((tr) => ({
						type: "tool-result" as const,
						toolCallId: tr.toolCallId,
						toolName: tr.toolName,
						// v6: `output` (ToolResultOutput) replaces v4's raw `result`
						output: { type: "text" as const, value: tr.result },
						...(tr.isError ? { isError: true } : {}),
					})),
				});
			}
			parts = [];
			stepResults = [];
		};

		for (let chunkIdx = 0; chunkIdx < msg.chunks.length; chunkIdx++) {
			const chunk = msg.chunks[chunkIdx];
			if (!chunk) continue;
			switch (chunk.type) {
				case "text":
					parts.push({ type: "text", text: chunk.text });
					break;
				case "thinking":
					// v6: carry providerOptions (Anthropic signature blob) if present.
					// Non-Anthropic reasoning has no metadata → send without providerOptions
					// (Anthropic accepts it; the round-trip just won't carry a signature).
					parts.push({
						type: "reasoning",
						text: chunk.text,
						...(chunk.metadata !== undefined
							? { providerOptions: chunk.metadata as ProviderOptions }
							: {}),
					});
					break;
				case "tool-batch": {
					// Strip stale `[USER INTERRUPT]` blocks from every
					// tool-batch except the freshest one (most recent
					// tool-batch in the most recent assistant message).
					// Without this, the imperative "You MUST address these"
					// text persists in history and the model re-acknowledges
					// the same interrupt verbatim on every subsequent step.
					const isFreshestToolBatch =
						msgIdx === freshestToolBatchMsgIdx && chunkIdx === freshestToolBatchChunkIdx;
					for (const entry of chunk.calls) {
						const toolName = useToolPrefix ? prefixToolName(entry.name) : entry.name;
						// v6: `input` replaces v4's `args`
						parts.push({
							type: "tool-call",
							toolCallId: entry.id,
							toolName,
							input: entry.arguments,
						});
						if (entry.result !== undefined) {
							const resultText = isFreshestToolBatch
								? entry.result
								: stripUserInterruptBlock(entry.result);
							stepResults.push({
								toolCallId: entry.id,
								toolName,
								result: resultText,
								isError: entry.isError,
							});
						}
					}
					// End of this LLM step → emit its `[assistant, tool]` pair and
					// start a fresh pair for the next step.
					flushStep();
					break;
				}
				case "error":
				case "system":
					// Strip — not sent back to the LLM.
					break;
			}
		}

		// Final step: trailing text/thinking with no tool-batch. `flushStep`
		// skips empties, so a turn of only system/error chunks emits nothing.
		flushStep();
	}
	return result;
}

/**
 * Apply OpenAI-compatible reasoning normalisation.
 *
 * Cribbed from opencode `provider/transform.ts:217-249`. Solves DeepSeek's
 * "The reasoning_content in the thinking mode must be passed back to the
 * API" error.
 *
 * The v6 `@ai-sdk/openai-compatible@2.x` provider extracts `reasoning_content`
 * from assistant `{ type: "reasoning", text }` parts natively
 * (see `node_modules/@ai-sdk/openai-compatible/dist/index.mjs:215-216` and :245).
 * But that native path SKIPS emission when `reasoning.length === 0` —
 * "reasoning_content" is omitted from the wire. DeepSeek (and similar
 * "thinking mode" providers) require the field to be present in every
 * follow-up turn once it was emitted at least once, even if the value is
 * the empty string.
 *
 * Strategy: for every assistant message that has any `reasoning` parts,
 * concatenate their text into `providerOptions.openaiCompatible.reasoning_content`
 * (preserving empty strings) AND strip those parts from content. The
 * resulting payload has a single source of truth for `reasoning_content`
 * coming via the message-level `providerOptions` spread at line 247 of the
 * SDK provider, which fires regardless of empty-vs-non-empty text.
 *
 * Only applied for the default (non-Anthropic) openai-compatible path.
 */
function applyOpenAICompatibleReasoningNormalisation(msgs: ModelMessage[]): ModelMessage[] {
	return msgs.map((msg) => {
		if (msg.role !== "assistant" || !Array.isArray(msg.content)) return msg;

		// Find reasoning parts. If there are none, this message never had
		// a thinking turn — DeepSeek doesn't require `reasoning_content`
		// in that case, so we pass through unchanged.
		const reasoningParts = msg.content.filter(
			(p): p is Extract<typeof p, { type: "reasoning" }> => p.type === "reasoning",
		);
		if (reasoningParts.length === 0) return msg;

		const reasoningText = reasoningParts.map((p) => p.text).join("");
		const filteredContent = msg.content.filter((p) => p.type !== "reasoning");

		const existingOpts = msg.providerOptions ?? {};
		const existingCompat = (existingOpts.openaiCompatible ?? {}) as Record<string, unknown>;

		return {
			...msg,
			content: filteredContent,
			providerOptions: {
				...existingOpts,
				openaiCompatible: {
					...existingCompat,
					// Always set, even when empty. This is the key fix —
					// the SDK's content-side extraction skips empty
					// reasoning, but DeepSeek requires the field present.
					reasoning_content: reasoningText,
				},
			},
		} as ModelMessage;
	});
}

/**
 * Apply Anthropic structural normalisations to a `ModelMessage[]`.
 *
 * Cribbed from opencode `provider/transform.ts:53-148`. Two passes:
 *
 * 1. **Empty-text/reasoning filter**: Drop `text` / `reasoning` parts
 *    whose `text === ""`. Drop messages whose content array becomes
 *    empty. Anthropic rejects assistant turns with empty content.
 *
 * 2. **`toolCallId` scrubbing**: Anthropic only accepts tool call IDs
 *    that match `[a-zA-Z0-9_-]`. Our IDs are crypto.randomUUID() values
 *    which are already safe, but tool-call IDs assigned by upstream
 *    sources (provider-executed tools, subagent retrieval, etc.) may
 *    not be. Defensively scrub. Mirrors opencode `provider/transform.ts:96-122`.
 *
 * 3. **`[tool-call, text]` split**: Anthropic rejects assistant turns
 *    where `tool_use` blocks are followed by non-tool-call content
 *    ("`tool_use` ids were found without `tool_result` blocks immediately
 *    after"). If an assistant message has mixed ordering, split it into
 *    `[non-tool parts] + [tool-call parts]`.
 *
 * Only applied for Anthropic-backed providers (Claude OAuth or
 * opencode-anthropic). Skip for openai-compatible / OpenCode Zen.
 */
const SCRUB_TOOL_CALL_ID = (id: string): string => id.replace(/[^a-zA-Z0-9_-]/g, "_");

function applyAnthropicStructuralNormalisations(msgs: ModelMessage[]): ModelMessage[] {
	// Pass 1: Filter empty text/reasoning parts; drop messages that become empty.
	msgs = msgs
		.map((msg) => {
			if (typeof msg.content === "string") {
				if (msg.content === "") return undefined;
				return msg;
			}
			if (!Array.isArray(msg.content)) return msg;
			const filtered = msg.content.filter((part) => {
				if (part.type === "text" || part.type === "reasoning") {
					// Drop empty text/reasoning parts. Anthropic rejects an empty
					// `reasoning` part ("thinking block must have content") even
					// when it carries a signature, so we strip it on the way out
					// while still persisting the signed block for round-trip
					// fidelity (see the empty-reasoning-part test).
					return (part as { text: string }).text !== "";
				}
				return true;
			});
			if (filtered.length === 0) return undefined;
			return { ...msg, content: filtered };
		})
		.filter((msg): msg is ModelMessage => msg !== undefined && msg.content !== "");

	// Pass 2: Scrub toolCallId chars on both assistant tool-call parts and
	// tool-role tool-result parts. Anthropic rejects anything outside
	// [a-zA-Z0-9_-]. Our internal IDs are crypto.randomUUID() and safe, but
	// upstream provider-executed tools or external sources may not be.
	msgs = msgs.map((msg) => {
		if (msg.role === "assistant" && Array.isArray(msg.content)) {
			return {
				...msg,
				content: msg.content.map((part) => {
					if (part.type === "tool-call" || part.type === "tool-result") {
						return { ...part, toolCallId: SCRUB_TOOL_CALL_ID(part.toolCallId) };
					}
					return part;
				}),
			};
		}
		if (msg.role === "tool" && Array.isArray(msg.content)) {
			return {
				...msg,
				content: msg.content.map((part) => {
					if (part.type === "tool-result") {
						return { ...part, toolCallId: SCRUB_TOOL_CALL_ID(part.toolCallId) };
					}
					return part;
				}),
			};
		}
		return msg;
	});

	// Pass 3: Split assistant messages where tool-calls are followed by non-tool-call parts.
	// [text, tool-call, text] → [text, text] + [tool-call] (text-only first, tools-only second)
	msgs = msgs.flatMap((msg) => {
		if (msg.role !== "assistant" || !Array.isArray(msg.content)) return [msg];
		const parts = msg.content;
		const firstToolCallIdx = parts.findIndex((part) => part.type === "tool-call");
		if (firstToolCallIdx === -1) return [msg]; // no tool calls → pass through
		// If everything from the first tool-call onward is also a tool-call, it's already valid
		if (!parts.slice(firstToolCallIdx).some((part) => part.type !== "tool-call")) return [msg];
		// Has non-tool-call content AFTER a tool-call → split
		const nonToolParts = parts.filter((part) => part.type !== "tool-call");
		const toolParts = parts.filter((part) => part.type === "tool-call");
		return [
			{ ...msg, content: nonToolParts },
			{ ...msg, content: toolParts },
		];
	});

	return msgs;
}

/**
 * Apply Anthropic prompt-caching breakpoints to a message list.
 *
 * Anthropic caches the entire request prefix up to (and including) any block
 * marked with `cache_control`. Up to 4 breakpoints per request; we use three
 * (first system + last 2 non-system).
 *
 * Strategy (mirrors OpenCode's `applyCaching` in transform.ts):
 *  - Mark the first system message → caches system prompt (and tools, which
 *    sit before messages in the request body).
 *  - Mark the last 2 non-system messages → rolling cache that extends through
 *    the conversation each turn.
 *
 * This relies on `toModelMessages` grouping a turn's tool results into one
 * `role: "tool"` message: that makes the last two non-system messages
 * `[assistant(tool-calls), tool(results)]` mid-turn, so the two rolling
 * breakpoints straddle the assistant/tool boundary instead of collapsing onto
 * two adjacent tool-result messages (which wastes a breakpoint — see
 * claude-report.md, Root Cause 2). Anthropic caches the whole prefix up to and
 * including a marked block, so marking the grouped tool message extends the
 * cached prefix across every result the turn produced.
 *
 * Only applied for the Anthropic provider. OpenCode Zen's OpenAI-compatible
 * endpoint (`/zen/v1/chat/completions`) backs models like MiniMax, GLM, Kimi,
 * Grok, etc. — those upstreams do automatic prefix caching server-side and
 * don't accept `cache_control` markers. OpenCode's own transform.ts gates
 * `applyCaching` on Anthropic-family detection for the same reason. Models
 * served via `@ai-sdk/openai` (GPT) and `@ai-sdk/google` (Gemini) likewise
 * use server-side automatic caching.
 */
function applyAnthropicCaching(msgs: ModelMessage[]): void {
	const targets = new Set<ModelMessage>();

	const systemMsgs = msgs.filter((m) => m.role === "system").slice(0, 2);
	for (const m of systemMsgs) targets.add(m);

	const nonSystem = msgs.filter((m) => m.role !== "system").slice(-2);
	for (const m of nonSystem) targets.add(m);

	for (const msg of targets) {
		msg.providerOptions = {
			...msg.providerOptions,
			anthropic: {
				...(msg.providerOptions?.anthropic ?? {}),
				cacheControl: { type: "ephemeral" },
			},
		};
	}
}

function formatError(err: unknown, config: AgentConfig): string {
	const context = `[model=${config.model}, baseURL=${config.baseURL}]`;

	if (err instanceof Error) {
		const cause = err.cause ? ` | cause: ${JSON.stringify(err.cause)}` : "";
		// AI SDK errors often have statusCode, responseBody, or url properties
		const extras: string[] = [];
		const errRecord = err as unknown as Record<string, unknown>;
		if ("statusCode" in errRecord) extras.push(`status=${errRecord.statusCode}`);
		if ("url" in errRecord) extras.push(`url=${errRecord.url}`);
		if ("responseBody" in errRecord) extras.push(`body=${JSON.stringify(errRecord.responseBody)}`);
		if ("responseHeaders" in errRecord)
			extras.push(`headers=${JSON.stringify(errRecord.responseHeaders)}`);

		const detail = extras.length > 0 ? ` (${extras.join(", ")})` : "";
		return `${err.message}${detail}${cause} ${context}`;
	}

	return `${String(err)} ${context}`;
}

/**
 * Build the Anthropic `providerOptions.anthropic` thinking config for a given
 * model + reasoning effort. Detection mirrors opencode's `provider/transform.ts`
 * (`anthropicOpus47OrLater` + `anthropicAdaptiveEfforts`):
 *
 *  - ADAPTIVE models emit `{ thinking: { type: "adaptive" }, effort }` and pick
 *    their own thinking budget within `maxOutputTokens`. A model is adaptive when
 *    it is Opus 4.7+ (parsed from the id) OR Opus/Sonnet 4.6 (matched by id
 *    substring — there is no newer Sonnet to generalise yet).
 *      - `display: "summarized"` is added ONLY for Opus 4.7+. Those models
 *        default to `display: "omitted"` (NO thinking streamed to us), so the
 *        flag must be forced to surface thinking in the UI. Opus/Sonnet 4.6
 *        stream thinking without it (opencode omits `display` there too).
 *      - `effort` (sibling of `thinking`) tells adaptive how hard to think.
 *  - All OTHER Claude models use classic `{ thinking: { type: "enabled",
 *    budgetTokens } }`. The budget is a ceiling, not a requirement — the model
 *    self-regulates thinking vs response within `maxOutputTokens`.
 *
 * Returned as concrete object shapes (no optional/`undefined` properties) so the
 * result is a clean `Record<string, JSONValue>` for `providerOptions.anthropic`.
 * Callers only invoke this when `effort !== "none"`.
 */
export function anthropicThinkingProviderOptions(model: string, effort: ReasoningEffort) {
	const opusVersion = /opus-(\d+)[.-](\d+)(?:[.@-]|$)/i.exec(model);
	const isOpus47OrLater = opusVersion
		? Number(opusVersion[1]) > 4 || (Number(opusVersion[1]) === 4 && Number(opusVersion[2]) >= 7)
		: false;
	const isAdaptive =
		isOpus47OrLater ||
		["opus-4-6", "opus-4.6", "sonnet-4-6", "sonnet-4.6"].some((s) => model.includes(s));

	if (isAdaptive) {
		return isOpus47OrLater
			? { thinking: { type: "adaptive" as const, display: "summarized" as const }, effort }
			: { thinking: { type: "adaptive" as const }, effort };
	}

	const budgetTokens =
		effort === "max"
			? 31999
			: effort === "xhigh"
				? 24000
				: effort === "high"
					? 16000
					: effort === "medium"
						? 5000
						: effort === "low"
							? 2000
							: 0;
	return { thinking: { type: "enabled" as const, budgetTokens } };
}

const MAX_STEPS = 50;

/**
 * The single trivial throwaway user turn appended to a cache-warming replay.
 * Its only purpose is to give the provider a cheap final turn to respond to
 * while the IDENTICAL cached prefix (system + tools + the tab's real history)
 * is re-sent — registering a cache READ that refreshes the provider's ~5-min
 * prompt-cache TTL. Never persisted; never shown to the user.
 */
const WARM_CACHE_PROBE_TEXT = "reply with just a .";

export class Agent {
	status: AgentStatus = "idle";
	messages: ChatMessage[] = [];

	private config: AgentConfig;
	private queueCallbacks?: QueueCallbacks;

	constructor(config: AgentConfig, queueCallbacks?: QueueCallbacks) {
		this.config = config;
		this.queueCallbacks = queueCallbacks;
	}

	private async executeToolWithStreaming(
		tc: ToolCall,
		shellOutputQueue: Array<{ data: string; stream: "stdout" | "stderr" }>,
	): Promise<ToolResult> {
		const registry = createToolRegistry(this.config.tools);
		const tool = registry.getTool(tc.name);
		if (!tool) {
			return {
				toolCallId: tc.id,
				toolName: tc.name,
				result: `Unknown tool: ${tc.name}`,
				isError: true,
			};
		}

		// Permission check for shell commands — only prompt for external directory access.
		// Commands that stay within the working directory are auto-allowed.
		if (tc.name === "run_shell" && this.config.permissionChecker) {
			const command = typeof tc.arguments.command === "string" ? tc.arguments.command : "";
			const workingDirectory = this.config.workingDirectory;
			const analysis = await analyzeCommand(command, workingDirectory);
			const ruleset = this.config.ruleset ?? [];

			// Check for external directory access from shell command
			if (analysis.dirs.length > 0) {
				const dirRequest = {
					permission: "external_directory",
					patterns: analysis.dirs.map((d) => `${d}/*`),
					always: analysis.dirs.map((d) => `${d}/*`),
					description: `Shell command accesses external directory: ${analysis.dirs.join(", ")}`,
					metadata: { command, dirs: analysis.dirs },
				};
				try {
					const dirReply = await this.config.permissionChecker.ask(dirRequest, [ruleset]);
					if (dirReply === "reject") {
						return {
							toolCallId: tc.id,
							toolName: tc.name,
							result: `Permission denied: access to external directories rejected`,
							isError: true,
						};
					}
				} catch {
					return {
						toolCallId: tc.id,
						toolName: tc.name,
						result: `Permission denied: external directory access not allowed`,
						isError: true,
					};
				}
			}
		}

		// Permission check for file tools accessing paths outside workspace
		if (
			this.config.permissionChecker &&
			(tc.name === "read_file" ||
				tc.name === "read_file_slice" ||
				tc.name === "write_file" ||
				tc.name === "list_files")
		) {
			const pathArg = typeof tc.arguments.path === "string" ? tc.arguments.path : ".";

			// Canonicalize all three so symlink-in-workdir escapes are detected
			// at the permission gate (not just relative `../` traversal). The
			// helper walks up to the nearest existing ancestor when the leaf
			// doesn't exist (write_file creating new files), so a parent
			// symlink pointing outside the workdir is still caught. The same
			// helper is used inside the tool implementations — keeping the
			// two layers consistent so a path that looks external here also
			// looks external in the tool, and vice versa.
			const resolvedPath = await canonicalize(this.config.workingDirectory, pathArg);
			const resolvedWorkDir = await canonicalize(this.config.workingDirectory);
			const resolvedSpillRoot = await canonicalize(SPILL_ROOT);

			const isUnderWorkdir =
				resolvedPath === resolvedWorkDir || resolvedPath.startsWith(`${resolvedWorkDir}/`);
			// Dispatch's own tool-output spill directory is implicitly allowed —
			// the AI receives a truncation notice pointing here and is expected
			// to read it without prompting the user. Bypassing the external-
			// directory check here keeps the inspection flow frictionless.
			const isSpillPath =
				resolvedPath === resolvedSpillRoot || resolvedPath.startsWith(`${resolvedSpillRoot}/`);

			// Agent definitions live in well-known directories
			// (`~/.config/dispatch/agents/` and
			// `<workdir>/.dispatch/agents/`). Reading those is a
			// prerequisite for the summon tool's "specify which subagent"
			// flow — the LLM needs to inspect the TOML to know what each
			// agent does. We auto-allow READ-ONLY tools under those paths
			// without prompting the user. Writes (`write_file`) still go
			// through the normal external_directory gate so an agent can't
			// quietly overwrite another agent's definition.
			const isReadOnlyTool =
				tc.name === "read_file" || tc.name === "read_file_slice" || tc.name === "list_files";
			let isAgentsDirReadOnly = false;
			if (isReadOnlyTool) {
				const agentDirs = getAgentDirPaths(this.config.workingDirectory);
				const canonicalAgentDirs = await Promise.all(agentDirs.map((d) => canonicalize(d)));
				isAgentsDirReadOnly = canonicalAgentDirs.some(
					(d) => resolvedPath === d || resolvedPath.startsWith(`${d}/`),
				);
			}

			if (!isUnderWorkdir && !isSpillPath && !isAgentsDirReadOnly) {
				const permissionType =
					tc.name === "read_file" ? "read" : tc.name === "write_file" ? "edit" : "list";

				const parentDir = dirname(resolvedPath);
				const request = {
					permission: "external_directory",
					patterns: [`${parentDir}/*`],
					always: [`${parentDir}/*`],
					description: `${permissionType} file outside workspace: ${resolvedPath}`,
					metadata: { filepath: resolvedPath, parentDir, operation: permissionType },
				};

				const ruleset = this.config.ruleset ?? [];
				try {
					const reply = await this.config.permissionChecker.ask(request, [ruleset]);
					if (reply === "reject") {
						return {
							toolCallId: tc.id,
							toolName: tc.name,
							result: `Permission denied: ${permissionType} access to ${resolvedPath} rejected`,
							isError: true,
						};
					}
				} catch {
					return {
						toolCallId: tc.id,
						toolName: tc.name,
						result: `Permission denied: ${permissionType} access to ${resolvedPath} not allowed`,
						isError: true,
					};
				}
			}
		}

		try {
			const execPromise = tool.execute(tc.arguments, {
				onOutput: (data: string, stream: "stdout" | "stderr") => {
					shellOutputQueue.push({ data, stream });
				},
				queueCallbacks: this.queueCallbacks,
			});

			const rawResult = await execPromise;
			const resultStr = typeof rawResult === "string" ? rawResult : JSON.stringify(rawResult);

			// Compute isError on the raw (untruncated) string so an `Error:` prefix
			// that lives anywhere — including beyond the head excerpt — is still
			// detected. The display result goes through universal truncation so
			// oversized outputs don't blow context. The full content lives in the
			// spill file the truncation notice points to.
			const isError = resultStr.startsWith("Error:");
			const { displayResult } = applyTruncation(resultStr, {
				tabId: this.config.tabId ?? "default",
				callId: tc.id,
				toolName: tc.name,
			});

			return {
				toolCallId: tc.id,
				toolName: tc.name,
				result: displayResult,
				isError,
			};
		} catch (err) {
			return {
				toolCallId: tc.id,
				toolName: tc.name,
				result: err instanceof Error ? err.message : String(err),
				isError: true,
			};
		}
	}

	/**
	 * Build synthetic `tool-result` error events for every tool call in
	 * `stepToolCalls` that does not yet have a recorded result in `chunks`.
	 *
	 * When a tool batch is partially resolved — e.g. one tool emitted a
	 * `tool-error` while its siblings were never executed, or the stream
	 * aborted mid-batch — the unresolved tool-call IDs are registered in the
	 * history (inside `tool-batch` chunks) but have no matching result. On the
	 * next LLM round-trip the AI SDK throws `MissingToolResultsError`. Filling
	 * in `isError: true` results for those orphaned IDs keeps the on-the-wire
	 * tool-call/tool-result pairing complete.
	 *
	 * Caller is responsible for `appendEventToChunks(chunks, evt)` + `yield`ing
	 * each returned event.
	 */
	private synthesizeResidualToolResults(
		stepToolCalls: ToolCall[],
		chunks: Chunk[],
		defaultMessage: string,
	): AgentEvent[] {
		const results: AgentEvent[] = [];
		const resolved = new Set<string>();
		for (const c of chunks) {
			if (c.type !== "tool-batch") continue;
			for (const entry of c.calls) {
				if (entry.result !== undefined) resolved.add(entry.id);
			}
		}
		for (const tc of stepToolCalls) {
			if (resolved.has(tc.id)) continue;
			const tr: ToolResult = {
				toolCallId: tc.id,
				toolName: tc.name,
				result: defaultMessage,
				isError: true,
			};
			const evt: AgentEvent = { type: "tool-result", toolResult: tr };
			results.push(evt);
		}
		return results;
	}

	/**
	 * Build the provider-facing request context shared by `run()` and
	 * `warmCache()`: the model factory, the (possibly `mcp_*`-prefixed) tool
	 * set, the assembled system prompt (with the Claude-Code billing header +
	 * identity preamble for the OAuth flow), and the two provider-family flags.
	 *
	 * `messagesForBilling` is the message list the billing header samples its
	 * first-user-message text from. `run()` passes `this.messages` (which
	 * already includes the just-pushed live user turn); `warmCache()` passes the
	 * genuine history so the header — and therefore the cached prefix — is
	 * byte-identical to what the next real turn will send.
	 */
	private buildLlmContext(messagesForBilling: ChatMessage[]): {
		isClaudeOAuth: boolean;
		usesAnthropicSDK: boolean;
		providerFactory: ModelFactory;
		tools: Record<string, Tool>;
		systemPrompt: string;
	} {
		const registry = createToolRegistry(this.config.tools);
		// `isClaudeOAuth` gates Claude-Code-CLI-specific behavior: billing-header
		// injection, identity preamble, `mcp_*` tool name prefix, and extended
		// thinking config. Only the OAuth flow (provider="anthropic") needs these.
		// `usesAnthropicSDK` is the broader category — any provider whose
		// requests are serialized by `@ai-sdk/anthropic` and therefore expect
		// Anthropic-style `cache_control` markers. Today that's Claude OAuth
		// plus OpenCode Go's MiniMax/Qwen routes.
		const isClaudeOAuth = this.config.provider === "anthropic";
		const usesAnthropicSDK = isClaudeOAuth || this.config.provider === "opencode-anthropic";
		const providerFactory = createProvider({
			apiKey: this.config.apiKey,
			baseURL: this.config.baseURL,
			provider: this.config.provider,
			claudeCredentials: this.config.claudeCredentials,
			tabId: this.config.tabId,
		});

		// Only the Claude OAuth flow expects `mcp_*` prefixed tool names. The
		// OpenCode Go anthropic-format endpoint passes tools through to MiniMax
		// or Qwen, which expect raw names.
		const aiTools = registry.getAISDKTools();
		const tools = isClaudeOAuth
			? Object.fromEntries(
					Object.entries(aiTools).map(([name, tool]) => [prefixToolName(name), tool]),
				)
			: aiTools;

		// Build system prompt — Claude OAuth requests embed a billing header
		// and the Claude Code identity preamble so Anthropic recognizes the
		// request as coming from the official CLI.
		let systemPrompt = this.config.systemPrompt;
		if (isClaudeOAuth) {
			// `buildBillingHeaderValue` historically took `{ role, content: string }[]`.
			// Project the new chunk-based ChatMessage shape onto that legacy shape
			// by stringifying text chunks. Only the first user message's text is
			// actually used (see extractFirstUserMessageText) — non-text chunks
			// safely contribute nothing.
			const legacyShape = messagesForBilling.map((m) => ({
				role: m.role,
				content: m.chunks
					.filter((c): c is Extract<Chunk, { type: "text" }> => c.type === "text")
					.map((c) => c.text)
					.join(""),
			}));
			const billingHeader = buildBillingHeaderValue(legacyShape);
			systemPrompt = `${billingHeader}\n${SYSTEM_IDENTITY}\n\n${systemPrompt}`;
		}

		return { isClaudeOAuth, usesAnthropicSDK, providerFactory, tools, systemPrompt };
	}

	/**
	 * Prompt-cache WARMING replay (NOT a real turn).
	 *
	 * Re-sends the EXACT cached prefix — system prompt + tool definitions + the
	 * tab's genuine `history` with identical Anthropic `cache_control`
	 * breakpoints — plus a single trivial throwaway user turn, in ONE
	 * non-streaming-relevant request. Reusing that identical prefix registers a
	 * cache READ on the provider, which refreshes the ~5-minute prompt-cache TTL
	 * so the user's NEXT real message lands on a still-warm cache. The model's
	 * reply (a bare ".") is discarded.
	 *
	 * Strictly side-effect free, by design:
	 *  - does NOT mutate `this.messages` (history is passed in, never stored);
	 *  - does NOT change `this.status`;
	 *  - emits NO events and persists NOTHING.
	 *
	 * Returns the request's `usage` so the caller can surface a warming-specific
	 * "last request" cache rate WITHOUT polluting the real Cache Rate metric.
	 * Throws on provider error so the caller can surface it in the debug strip.
	 */
	/**
	 * Assemble the `streamText` options that DEFINE the Anthropic message-cache
	 * bucket. Anthropic keys the message cache on `tool_choice` and the extended-
	 * thinking parameters (enable/disable + budget/effort) — see the cache
	 * invalidation table in their prompt-caching docs. `run()` (real turns) and
	 * `warmCache()` (idle warming) MUST produce identical values here, or they
	 * write/read two different caches: the warming replay would keep the wrong
	 * bucket warm and the user's next real message would still miss.
	 *
	 * `maxOutputTokens` is NOT cache-affecting, but it must exceed the thinking
	 * budget or Anthropic rejects the request, so we keep it identical too.
	 */
	private buildStreamOptions(args: {
		model: Parameters<typeof streamText>[0]["model"];
		coreMessages: ModelMessage[];
		tools: Record<string, Tool>;
		isClaudeOAuth: boolean;
		usesAnthropicSDK: boolean;
		effort: ReasoningEffort;
		abortSignal?: AbortSignal;
	}): Parameters<typeof streamText>[0] {
		const { model, coreMessages, tools, isClaudeOAuth, usesAnthropicSDK, effort, abortSignal } =
			args;
		const streamOptions: Parameters<typeof streamText>[0] = {
			model,
			messages: coreMessages,
			tools,
			abortSignal,
		};

		// Encourage tool use on Anthropic. Without an explicit `toolChoice`,
		// Claude (especially Opus 4.7 with adaptive thinking) can decide to
		// "think forever" instead of calling the tools it has been given.
		// `"auto"` keeps Claude free to answer with text when no tool is needed,
		// while making the availability of tools an explicit signal — AND it is
		// a cache-key input, so warming must send the SAME value.
		if (isClaudeOAuth) {
			streamOptions.toolChoice = "auto";
		}

		if (isClaudeOAuth && effort !== "none") {
			// v6 native Anthropic adaptive thinking via providerOptions. Thinking
			// config (adaptive vs enabled, budget, plus display:"summarized" for
			// Opus 4.7+) is derived from the model id + effort. These are cache-key
			// inputs (Anthropic's "Thinking parameters" row), so warming must pass
			// the SAME effort the next real turn will use. `maxOutputTokens` caps
			// thinking + response combined; the model self-regulates within it.
			streamOptions.maxOutputTokens = 32000;
			streamOptions.providerOptions = {
				anthropic: anthropicThinkingProviderOptions(this.config.model, effort),
			};
		} else if (!usesAnthropicSDK && effort !== "none") {
			// OpenAI-compatible models (OpenCode Zen: DeepSeek, GLM, Kimi, etc.).
			// Those endpoints do automatic server-side prefix caching and ignore
			// our markers, but we keep the request shape consistent regardless.
			streamOptions.providerOptions = { openaiCompatible: { reasoningEffort: effort } };
		}

		return streamOptions;
	}

	async warmCache(
		history: ChatMessage[],
		options?: { abortSignal?: AbortSignal; reasoningEffort?: ReasoningEffort },
	): Promise<UsageData> {
		const { isClaudeOAuth, usesAnthropicSDK, providerFactory, tools, systemPrompt } =
			this.buildLlmContext(history);
		// Resolve the SAME effort the next real turn would use (per-tab override →
		// agent default → DEFAULT_REASONING_EFFORT). This drives the thinking
		// providerOptions, which is a message-cache key — warming MUST match it.
		const effort =
			options?.reasoningEffort ?? this.config.reasoningEffort ?? DEFAULT_REASONING_EFFORT;

		// Mirror `run()`'s serialisation exactly so the prefix bytes match the
		// real turn's: system message first (caching needs it inline), then the
		// genuine history, then the throwaway probe turn appended at the END.
		// Appending at the end leaves the first-user-message-derived billing
		// header (and thus the cached prefix) untouched.
		const warmMessages: ChatMessage[] = [
			...history,
			{ role: "user", chunks: [{ type: "text", text: WARM_CACHE_PROBE_TEXT }] },
		];
		const systemMessage: SystemModelMessage = { role: "system", content: systemPrompt };
		let coreMessages: ModelMessage[] = [
			systemMessage,
			...toModelMessages(warmMessages, isClaudeOAuth),
		];
		if (usesAnthropicSDK) {
			coreMessages = applyAnthropicStructuralNormalisations(coreMessages);
			applyAnthropicCaching(coreMessages);
		} else {
			coreMessages = applyOpenAICompatibleReasoningNormalisation(coreMessages);
		}

		// Build the request through the SAME shared helper run() uses, with the
		// SAME resolved effort — so toolChoice + thinking providerOptions +
		// maxOutputTokens match the real turn exactly. This is what makes the
		// warming replay hit (and refresh) the REAL turn's message cache instead
		// of writing a separate "warming-only" bucket. The throwaway probe turn
		// is appended AFTER the last cache breakpoint, so it cannot disturb the
		// cached prefix; it only gives the model a cheap final turn to answer.
		const streamOptions = this.buildStreamOptions({
			model: providerFactory(this.config.model),
			coreMessages,
			tools,
			isClaudeOAuth,
			usesAnthropicSDK,
			effort,
			...(options?.abortSignal ? { abortSignal: options.abortSignal } : {}),
		});

		const usage: UsageData = {
			inputTokens: 0,
			outputTokens: 0,
			cacheReadTokens: 0,
			cacheWriteTokens: 0,
		};
		try {
			const result = streamText(streamOptions);
			for await (const event of result.fullStream) {
				if (event.type === "error") {
					const e = (event as { error?: unknown }).error;
					throw e instanceof Error ? e : new Error(String(e ?? "cache-warm stream error"));
				}
				if (event.type === "finish-step") {
					const u = (event as { usage?: LanguageModelUsage }).usage;
					if (u) {
						const details = u.inputTokenDetails;
						usage.inputTokens = u.inputTokens ?? 0;
						usage.outputTokens = u.outputTokens ?? 0;
						usage.cacheReadTokens = details?.cacheReadTokens ?? 0;
						usage.cacheWriteTokens = details?.cacheWriteTokens ?? 0;
					}
				}
			}
		} catch (err) {
			throw new Error(formatError(err, this.config));
		}
		return usage;
	}

	async *run(
		userMessage: string,
		options?: {
			reasoningEffort?: ReasoningEffort;
			abortSignal?: AbortSignal;
			/**
			 * Ephemeral ordered multimodal content (text + image/pdf attachments)
			 * for THIS user turn. When present, `userMessage` is the text-only
			 * projection (with `[image]`/`[pdf]` markers) used for persistence and
			 * the chunk render, while `content` carries the actual parts forwarded
			 * to the provider. Omitted for plain-text turns (the common case).
			 */
			content?: UserContentPart[];
		},
	): AsyncGenerator<AgentEvent> {
		this.status = "running";
		yield { type: "status", status: "running" };

		// Attach the ephemeral multimodal content (if any) to the in-flight user
		// message so `toModelMessages` can emit interleaved image/pdf parts to the
		// provider. The persisted chunk is still text-only (`userMessage`).
		const hasMultimodal =
			!!options?.content && options.content.some((p) => p.type === "attachment");
		this.messages.push({
			role: "user",
			chunks: [{ type: "text", text: userMessage }],
			...(hasMultimodal ? { content: options?.content } : {}),
		});

		const { isClaudeOAuth, usesAnthropicSDK, providerFactory, tools, systemPrompt } =
			this.buildLlmContext(this.messages);

		try {
			logAgentLoop({
				tabId: this.config.tabId,
				event: "run-start",
				detail: { model: this.config.model, provider: this.config.provider },
			});
			// Single chunk accumulator for the entire assistant turn (all steps).
			// All event-driven mutations go through `appendEventToChunks`.
			const chunks: Chunk[] = [];

			// We build up a local message list for multi-turn within one run() call
			// that includes tool results fed back to the LLM. Each step appends
			// the assistant's evolving chunk list as one ChatMessage; subsequent
			// steps see prior tool calls and their results via the chunks.
			const stepMessages: ChatMessage[] = [...this.messages];
			// The assistant ChatMessage for the current turn, shared across steps
			// so its `chunks` reference matches the accumulator above. We push
			// it once when the first step has actual output to record.
			let assistantTurnMessage: ChatMessage | null = null;

			for (let step = 0; step < MAX_STEPS; step++) {
				const effort =
					options?.reasoningEffort ?? this.config.reasoningEffort ?? DEFAULT_REASONING_EFFORT;
				const debugReqId = nextDebugSeq();
				logStepLifecycle({
					tabId: this.config.tabId,
					step,
					event: "step-start",
					detail: { effort, historyMessages: stepMessages.length },
				});

				// Build stream text options
				const model = providerFactory(this.config.model);

				// Build the message list with the system prompt prepended as a system
				// role message. This is required for Anthropic prompt caching: the
				// `system` shortcut parameter takes a plain string with nowhere to
				// attach `providerOptions.anthropic.cacheControl`. Moving it inline
				// also lets us apply rolling cache breakpoints to the last messages.
				const systemMessage: SystemModelMessage = {
					role: "system",
					content: systemPrompt,
				};

				let coreMessages: ModelMessage[] = [
					systemMessage,
					...toModelMessages(stepMessages, isClaudeOAuth),
				];

				// Apply provider-specific structural normalisations before
				// sending. Anthropic and openai-compatible paths each have
				// their own message-shape requirements; the two passes are
				// mutually exclusive.
				if (usesAnthropicSDK) {
					coreMessages = applyAnthropicStructuralNormalisations(coreMessages);
					applyAnthropicCaching(coreMessages);
				} else {
					coreMessages = applyOpenAICompatibleReasoningNormalisation(coreMessages);
				}

				// Build the request's stream options through the SHARED helper so
				// the cache-affecting parameters (toolChoice, thinking
				// providerOptions, maxOutputTokens) are byte-identical to what the
				// idle cache-warming replay sends. Any drift here would split the
				// Anthropic message cache into two buckets (see buildStreamOptions
				// + Agent.warmCache).
				const streamOptions = this.buildStreamOptions({
					model,
					coreMessages,
					tools,
					isClaudeOAuth,
					usesAnthropicSDK,
					effort,
					abortSignal: options?.abortSignal,
				});

				const result = streamText(streamOptions);

				// Per-step tool-call tracking — needed because we only loop back
				// to the LLM if this step produced tool calls. The actual chunk
				// state for the turn lives in `chunks`.
				const stepToolCalls: ToolCall[] = [];

				// Ensure we have an assistant message in stepMessages whose
				// `chunks` reference is shared with our accumulator. Only push
				// once; subsequent steps mutate the same chunks array.
				if (assistantTurnMessage === null) {
					assistantTurnMessage = { role: "assistant", chunks };
					stepMessages.push(assistantTurnMessage);
				}

				// Unexpected stream errors propagate up to the outer try/catch
				// in `run()`, which formats them and transitions the agent to
				// the "error" status. The AI SDK v6 surfaces unavailable-tool
				// calls as native `tool-error` stream events (handled below),
				// so there's no NoSuchToolError to catch here.
				for await (const event of result.fullStream) {
					logStreamEvent({
						requestId: debugReqId,
						step,
						eventType: event.type,
						tabId: this.config.tabId,
					});
					if (event.type === "text-delta") {
						// v6: text-delta carries `text` (not `textDelta`)
						const internalEvent: AgentEvent = {
							type: "text-delta",
							delta: event.text,
						};
						appendEventToChunks(chunks, internalEvent);
						yield internalEvent;
					} else if (event.type === "reasoning-delta") {
						// v6 new event: reasoning-delta carries `text` (not `textDelta`)
						const internalEvent: AgentEvent = {
							type: "reasoning-delta",
							delta: event.text,
						};
						appendEventToChunks(chunks, internalEvent);
						yield internalEvent;
					} else if (event.type === "reasoning-end") {
						// Only emit when providerMetadata is present — non-Anthropic
						// models send reasoning-end without any metadata to round-trip.
						// Anthropic's signature lives inside providerMetadata as
						// { anthropic: { signature: "..." } }.
						if (event.providerMetadata !== undefined) {
							const internalEvent: AgentEvent = {
								type: "reasoning-end",
								metadata: event.providerMetadata as Record<string, unknown>,
							};
							appendEventToChunks(chunks, internalEvent);
							yield internalEvent;
						}
					} else if (event.type === "tool-call") {
						// v6: tool call input is in `input` (not `args`)
						const rawName = event.toolName;
						const toolName = isClaudeOAuth ? unprefixToolName(rawName) : rawName;
						const toolCall: ToolCall = {
							id: event.toolCallId,
							name: toolName,
							arguments: event.input as Record<string, unknown>,
						};
						stepToolCalls.push(toolCall);
						const internalEvent: AgentEvent = { type: "tool-call", toolCall };
						appendEventToChunks(chunks, internalEvent);
						yield internalEvent;
					} else if (event.type === "tool-error") {
						// The model called a tool that doesn't exist (or a
						// provider-executed / server tool failed). The AI SDK v6
						// emits this as a native stream event carrying the
						// original tool name. Forward both a synthetic
						// tool-result (so the tool-batch entry the model expects
						// has an `isError: true` result) and an error chunk for
						// visibility — without the result the model would
						// silently wait for one that never arrives.
						const evt = event as unknown as {
							toolCallId: string;
							toolName: string;
							error: unknown;
						};
						const toolName = isClaudeOAuth ? unprefixToolName(evt.toolName) : evt.toolName;
						const errMessage = evt.error instanceof Error ? evt.error.message : String(evt.error);

						const trEvent: AgentEvent = {
							type: "tool-result",
							toolResult: {
								toolCallId: evt.toolCallId,
								toolName,
								result: `Error: ${errMessage}`,
								isError: true,
							},
						};
						appendEventToChunks(chunks, trEvent);
						yield trEvent;

						// Emit an error chunk for visibility, but do NOT set the
						// agent status to "error" — the step continues and loops
						// back to the LLM normally.
						const errorMsg = formatError(evt.error, this.config);
						const errChunkEvent: AgentEvent = { type: "error", error: errorMsg };
						appendEventToChunks(chunks, errChunkEvent);
						yield errChunkEvent;
						break;
					} else if (event.type === "abort") {
						// Stream aborted upstream. Surface as an error so the
						// frontend tab transitions out of `running`. We don't
						// currently call abortController.abort() ourselves, so
						// this would only fire from external signal propagation.
						const reason =
							typeof (event as { reason?: unknown }).reason === "string"
								? (event as { reason: string }).reason
								: "stream aborted";
						// Fill in error results for any unresolved tool calls so
						// the orphaned tool-call IDs don't trigger a
						// MissingToolResultsError if this history is ever replayed.
						const abortResidual = this.synthesizeResidualToolResults(
							stepToolCalls,
							chunks,
							`Error: Stream was aborted: ${reason}`,
						);
						for (const r of abortResidual) {
							appendEventToChunks(chunks, r);
							yield r;
						}

						const errorMsg = formatError(new Error(reason), this.config);
						const internalEvent: AgentEvent = { type: "error", error: errorMsg };
						appendEventToChunks(chunks, internalEvent);
						yield internalEvent;
						this.status = "error";
						yield { type: "status", status: "error" };
						return;
					} else if (event.type === "error") {
						const errRecord = event.error as unknown as Record<string, unknown>;
						const statusCode =
							typeof errRecord.statusCode === "number" ? errRecord.statusCode : undefined;
						const errorMsg = formatError(event.error, this.config);
						// Fill in error results for any unresolved tool calls so
						// the orphaned tool-call IDs don't trigger a
						// MissingToolResultsError if this history is ever replayed.
						const streamErrResidual = this.synthesizeResidualToolResults(
							stepToolCalls,
							chunks,
							`Error: ${errorMsg}`,
						);
						for (const r of streamErrResidual) {
							appendEventToChunks(chunks, r);
							yield r;
						}

						const internalEvent: AgentEvent = {
							type: "error",
							error: errorMsg,
							...(statusCode !== undefined ? { statusCode } : {}),
						};
						appendEventToChunks(chunks, internalEvent);
						yield internalEvent;
						this.status = "error";
						yield { type: "status", status: "error" };
						return;
					} else if (event.type === "finish-step") {
						// One `finish-step` per `streamText` step. This — NOT the
						// terminal `finish` part — is where the AI SDK puts the
						// per-step `usage` (the `finish` part only carries the
						// aggregate `totalUsage`). `usage.inputTokenDetails` holds
						// Anthropic's prompt-cache read/write split, which powers the
						// Cache Rate view. `inputTokens` is the TOTAL prompt (cached +
						// fresh). Skip emission when no usage is attached (e.g. test
						// mocks) so we never broadcast an all-zero usage event.
						const u = (event as { usage?: LanguageModelUsage }).usage;
						if (u) {
							const details = u.inputTokenDetails;
							const usageEvent: AgentEvent = {
								type: "usage",
								usage: {
									inputTokens: u.inputTokens ?? 0,
									outputTokens: u.outputTokens ?? 0,
									cacheReadTokens: details?.cacheReadTokens ?? 0,
									cacheWriteTokens: details?.cacheWriteTokens ?? 0,
								},
							};
							// Set DISPATCH_DEBUG_USAGE=1 to print the raw per-step
							// usage to the server log — lets you confirm cache reads
							// are non-zero without the UI.
							if (process.env.DISPATCH_DEBUG_USAGE) {
								console.error("[dispatch usage]", JSON.stringify(usageEvent.usage));
							}
							// Not appended to chunks — usage is telemetry, not
							// message content (see appendEventToChunks).
							yield usageEvent;
						}
					}
					// Ignored events (intentional):
					//   start, text-start, text-end, reasoning-start,
					//   tool-input-start, tool-input-delta, tool-input-end,
					//   tool-result (only fires if tool has execute; ours don't),
					//   start-step, finish (aggregate totalUsage only), raw,
					//   source, file, tool-output-denied, tool-approval-*
				}

				// No tool calls means the agent is done — the assistant message
				// already exists in stepMessages with up-to-date chunks.
				if (stepToolCalls.length === 0) {
					logStepLifecycle({
						tabId: this.config.tabId,
						step,
						event: "step-end-no-tools",
					});
					logAgentLoop({
						tabId: this.config.tabId,
						event: "run-end-clean",
						detail: { steps: step + 1 },
					});
					break;
				}
				logStepLifecycle({
					tabId: this.config.tabId,
					step,
					event: "step-has-tools",
					detail: {
						toolCount: stepToolCalls.length,
						toolNames: stepToolCalls.map((t) => t.name),
					},
				});

				// Execute tool calls manually. Their results merge back into the
				// `chunks` accumulator via `appendEventToChunks`, which routes
				// each result to the correct entry inside its tool-batch chunk.
				// Track which calls already have a recorded result (from the
				// synthetic unavailable-tool error path above) so we don't
				// re-execute them.
				const alreadyResolved = new Set<string>();
				for (const c of chunks) {
					if (c.type !== "tool-batch") continue;
					for (const entry of c.calls) {
						if (entry.result !== undefined) alreadyResolved.add(entry.id);
					}
				}

				// Identify the index of the last tool in this batch that will
				// actually execute. Queued user messages are buffered across
				// this batch and injected into ONLY that tool's result, so the
				// interrupt appears exactly once per step rather than fragmented
				// across whichever tool happened to dequeue first. Tool-level
				// interrupt handlers in run-shell/youtube-transcribe/retrieve
				// still embed their own interrupt text in their return values —
				// that path is independent and remains correct.
				let lastExecutableIdx = -1;
				for (let i = 0; i < stepToolCalls.length; i++) {
					const tcAt = stepToolCalls[i];
					if (tcAt && !alreadyResolved.has(tcAt.id)) lastExecutableIdx = i;
				}

				// Accumulator for messages dequeued during this batch. Drained
				// only at `lastExecutableIdx`. Destructive dequeue at the queue
				// level prevents the same message from appearing in subsequent
				// batches.
				const batchPendingInjection: { id: string; message: string; timestamp: number }[] = [];

				// Results of already-executed calls in this batch, keyed by
				// `toolDedupKey`. Byte-identical repeats reuse the first result
				// instead of re-running the tool (see tool-runner-duplication-
				// incident.md). Only the raw, pre-interrupt-injection result is
				// cached so a reused copy never carries another call's interrupt.
				const dedupResults = new Map<string, ToolResult>();

				for (let tcIdx = 0; tcIdx < stepToolCalls.length; tcIdx++) {
					const tc = stepToolCalls[tcIdx];
					if (!tc || alreadyResolved.has(tc.id)) continue;

					const dedupKey = toolDedupKey(tc.name, tc.arguments);

					let toolResult: ToolResult | undefined;
					const cached = dedupKey !== null ? dedupResults.get(dedupKey) : undefined;
					if (cached) {
						// Identical call already ran in this batch — reuse its
						// output under this call's own id (no execution, no shell
						// output to stream).
						toolResult = { ...cached, toolCallId: tc.id, toolName: tc.name };
					} else {
						const shellOutputQueue: Array<{ data: string; stream: "stdout" | "stderr" }> = [];

						const execPromise = this.executeToolWithStreaming(tc, shellOutputQueue);

						// Poll for shell output while the tool is running, using Promise.race
						// so we can yield shell-output events as they arrive rather than buffering
						// them all until tool completion.
						while (toolResult === undefined) {
							if (shellOutputQueue.length > 0) {
								const item = shellOutputQueue.shift();
								if (item) {
									const shellEvent: AgentEvent = {
										type: "shell-output",
										data: item.data,
										stream: item.stream,
									};
									appendEventToChunks(chunks, shellEvent);
									yield shellEvent;
								}
								continue;
							}
							const raceResult = await Promise.race([
								execPromise.then((r) => ({ done: true as const, value: r })),
								new Promise<{ done: false }>((resolve) =>
									setImmediate(() => resolve({ done: false })),
								),
							]);
							if (raceResult.done) {
								toolResult = raceResult.value;
							}
						}

						// Drain any remaining shell output emitted before we read the result
						while (shellOutputQueue.length > 0) {
							const item = shellOutputQueue.shift();
							if (item) {
								const shellEvent: AgentEvent = {
									type: "shell-output",
									data: item.data,
									stream: item.stream,
								};
								appendEventToChunks(chunks, shellEvent);
								yield shellEvent;
							}
						}

						// Cache the raw result so later identical calls in this
						// batch can reuse it.
						if (dedupKey !== null) dedupResults.set(dedupKey, toolResult);
					}

					// Harvest any queued user messages but DEFER injection until
					// the last tool of the batch. This collapses multiple
					// queued messages into a single interrupt block on a single
					// tool-result instead of fragmenting across the batch.
					if (this.queueCallbacks) {
						const queuedMsgs = this.queueCallbacks.dequeueMessages();
						if (queuedMsgs.length > 0) {
							batchPendingInjection.push(...queuedMsgs);
						}
					}

					let finalToolResult = toolResult;
					if (tcIdx === lastExecutableIdx && batchPendingInjection.length > 0) {
						const userMessages = batchPendingInjection.map((m) => m.message).join("\n---\n");
						finalToolResult = {
							...toolResult,
							result: `${toolResult.result}\n\n[USER INTERRUPT]\nThe user has sent you message(s) while you were working. You MUST address these before continuing with your current task:\n\n${userMessages}`,
						};
						batchPendingInjection.length = 0;
					}

					const trEvent: AgentEvent = { type: "tool-result", toolResult: finalToolResult };
					appendEventToChunks(chunks, trEvent);
					yield trEvent;
				}

				// Safety net: if `lastExecutableIdx` was never reached (e.g.,
				// no tools executed because all were already resolved) but
				// messages were still dequeued, surface them as a user message
				// so they aren't dropped. In practice this is rare — it only
				// happens when the entire batch is unavailable-tool synthesized
				// errors with a message arriving in that narrow window.
				if (batchPendingInjection.length > 0) {
					const userMessages = batchPendingInjection.map((m) => m.message).join("\n---\n");
					this.messages.push({
						role: "user",
						chunks: [{ type: "text", text: userMessages }],
					});
					batchPendingInjection.length = 0;
				}

				// Final safety net: after executing the batch, guarantee every
				// tool-call recorded in this step has a matching result before we
				// loop back to the LLM. Any tool-call ID that slipped through
				// without a result (a path we didn't anticipate) would otherwise
				// orphan and trigger MissingToolResultsError on the next
				// round-trip.
				const safetyResidual = this.synthesizeResidualToolResults(
					stepToolCalls,
					chunks,
					"Error: Internal error: tool result was never produced.",
				);
				for (const r of safetyResidual) {
					appendEventToChunks(chunks, r);
					yield r;
				}
			}

			// Build the final assistant message from the accumulated chunks.
			// If no assistant turn message was ever created (e.g., the model
			// produced nothing — unusual but possible), synthesize an empty one.
			const assistantMessage: ChatMessage = assistantTurnMessage ?? {
				role: "assistant",
				chunks,
			};
			// `assistantTurnMessage` was pushed into `stepMessages` but not into
			// `this.messages` — push it now so the agent's outward-facing
			// history reflects the turn.
			this.messages.push(assistantMessage);

			// NOTE: queued messages that arrive AFTER the last tool call (or with
			// no tool calls at all) are intentionally NOT drained here. Draining
			// them into `this.messages` as a trailing user turn — the old
			// behaviour — left them sitting in history with no model response:
			// the turn ended instead of answering them (the "queue not consumed
			// after the turn ends" bug). They now stay on the queue so the
			// orchestrator (`AgentManager.processMessage`) can start a fresh turn
			// for them once this one settles. Mid-turn interrupts are still
			// injected into the last tool result above — that path is unchanged.

			yield { type: "done", message: assistantMessage };
		} catch (err) {
			const errRecord = err as unknown as Record<string, unknown>;
			const statusCode =
				typeof errRecord.statusCode === "number" ? errRecord.statusCode : undefined;
			const errorMsg = formatError(err, this.config);
			yield { type: "error", error: errorMsg, ...(statusCode !== undefined ? { statusCode } : {}) };
			this.status = "error";
			yield { type: "status", status: "error" };
			return;
		}

		this.status = "idle";
		yield { type: "status", status: "idle" };
	}
}
