diff options
| author | Adam Malczewski <[email protected]> | 2026-06-27 01:12:40 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-27 01:12:40 +0900 |
| commit | 98b0638838a8e754927d8c030ce8bded18d63e7d (patch) | |
| tree | 0d4e21c3d4792fcd77e1040373b260e38efa34ce /packages/kernel/src | |
| parent | d92a4af6191d7d20acf861adf605ad0227b6b287 (diff) | |
| parent | 61e45e60d699ed1ca46f94a8f181c92a940317c6 (diff) | |
| download | dispatch-98b0638838a8e754927d8c030ce8bded18d63e7d.tar.gz dispatch-98b0638838a8e754927d8c030ce8bded18d63e7d.zip | |
Merge branch 'dev' into feature/heartbeat
# Conflicts:
# packages/host-bin/package.json
# packages/host-bin/src/main.ts
# packages/session-orchestrator/src/orchestrator.ts
# packages/system-prompt/src/service.test.ts
# packages/system-prompt/src/service.ts
# packages/system-prompt/src/types.ts
# packages/transport-contract/package.json
# packages/transport-http/package.json
# packages/transport-http/src/app.test.ts
# packages/transport-http/src/app.ts
# packages/transport-http/src/extension.ts
# packages/transport-http/tsconfig.json
# tsconfig.json
Diffstat (limited to 'packages/kernel/src')
28 files changed, 8156 insertions, 8142 deletions
diff --git a/packages/kernel/src/bus/bus.test.ts b/packages/kernel/src/bus/bus.test.ts index 05cf875..9310e1f 100644 --- a/packages/kernel/src/bus/bus.test.ts +++ b/packages/kernel/src/bus/bus.test.ts @@ -5,376 +5,376 @@ import { type Bus, createBus } from "./bus.js"; import { applyFilterChain, dispatchEventSync, sortFilters } from "./pure.js"; interface FakeLogger extends Logger { - readonly errors: Array<{ message: string; args: unknown[] }>; + readonly errors: Array<{ message: string; args: unknown[] }>; } function createFakeLogger(): FakeLogger { - const errors: Array<{ message: string; args: unknown[] }> = []; - const logger: FakeLogger = { - errors, - debug: () => {}, - info: () => {}, - warn: () => {}, - error: (message, attrs) => { - errors.push({ message, args: attrs === undefined ? [] : [attrs] }); - }, - child: () => logger, - span: () => makeNoopSpan(logger), - }; - return logger; + const errors: Array<{ message: string; args: unknown[] }> = []; + const logger: FakeLogger = { + errors, + debug: () => {}, + info: () => {}, + warn: () => {}, + error: (message, attrs) => { + errors.push({ message, args: attrs === undefined ? [] : [attrs] }); + }, + child: () => logger, + span: () => makeNoopSpan(logger), + }; + return logger; } function makeNoopSpan(log: Logger): Span { - const span: Span = { - id: "noop", - log, - setAttributes: () => {}, - addLink: () => {}, - child: () => span, - end: () => {}, - }; - return span; + const span: Span = { + id: "noop", + log, + setAttributes: () => {}, + addLink: () => {}, + child: () => span, + end: () => {}, + }; + return span; } describe("event hooks", () => { - let logger: FakeLogger; - let bus: Bus; - - beforeEach(() => { - logger = createFakeLogger(); - bus = createBus(logger); - }); - - it("fires all registered listeners", () => { - const hook = defineEventHook<{ value: number }>("test/event"); - const received: number[] = []; - - bus.on(hook, (payload) => { - received.push(payload.value); - }); - bus.on(hook, (payload) => { - received.push(payload.value * 10); - }); - - bus.emit(hook, { value: 3 }); - - expect(received).toEqual([3, 30]); - }); - - it("isolates a throwing listener (others still run, error logged)", () => { - const hook = defineEventHook<string>("test/isolate"); - const received: string[] = []; - - bus.on(hook, () => { - throw new Error("boom"); - }); - bus.on(hook, (payload) => { - received.push(payload); - }); - - bus.emit(hook, "hello"); - - expect(received).toEqual(["hello"]); - expect(logger.errors).toHaveLength(1); - expect(logger.errors[0]?.message).toContain("test/isolate"); - }); - - it("isolates an async handler rejection", async () => { - const hook = defineEventHook<string>("test/async-reject"); - const received: string[] = []; - - bus.on(hook, async () => { - throw new Error("async boom"); - }); - bus.on(hook, async (payload) => { - received.push(payload); - }); - - bus.emit(hook, "data"); - - await new Promise((resolve) => { - setTimeout(resolve, 10); - }); - - expect(received).toEqual(["data"]); - expect(logger.errors).toHaveLength(1); - expect(logger.errors[0]?.message).toContain("test/async-reject"); - }); - - it("unsubscribe removes the handler", () => { - const hook = defineEventHook<void>("test/unsub"); - let count = 0; - - const unsub = bus.on(hook, () => { - count++; - }); - - bus.emit(hook, undefined); - expect(count).toBe(1); - - unsub(); - bus.emit(hook, undefined); - expect(count).toBe(1); - }); - - it("emit with no handlers is a no-op", () => { - const hook = defineEventHook<string>("test/empty"); - expect(() => bus.emit(hook, "nothing")).not.toThrow(); - }); - - it("emitAsync awaits all handlers", async () => { - const hook = defineEventHook<number>("test/async"); - const received: number[] = []; - - bus.on(hook, async (payload) => { - await new Promise((resolve) => { - setTimeout(resolve, 5); - }); - received.push(payload); - }); - bus.on(hook, async (payload) => { - received.push(payload * 2); - }); - - await bus.emitAsync(hook, 5); - - expect(received).toEqual([10, 5]); - }); - - it("emitAsync respects timeout", async () => { - const hook = defineEventHook<void>("test/timeout"); - let completed = false; - - bus.on(hook, async () => { - await new Promise((resolve) => { - setTimeout(resolve, 100); - }); - completed = true; - }); - - await bus.emitAsync(hook, undefined, 10); - - expect(completed).toBe(false); - }); + let logger: FakeLogger; + let bus: Bus; + + beforeEach(() => { + logger = createFakeLogger(); + bus = createBus(logger); + }); + + it("fires all registered listeners", () => { + const hook = defineEventHook<{ value: number }>("test/event"); + const received: number[] = []; + + bus.on(hook, (payload) => { + received.push(payload.value); + }); + bus.on(hook, (payload) => { + received.push(payload.value * 10); + }); + + bus.emit(hook, { value: 3 }); + + expect(received).toEqual([3, 30]); + }); + + it("isolates a throwing listener (others still run, error logged)", () => { + const hook = defineEventHook<string>("test/isolate"); + const received: string[] = []; + + bus.on(hook, () => { + throw new Error("boom"); + }); + bus.on(hook, (payload) => { + received.push(payload); + }); + + bus.emit(hook, "hello"); + + expect(received).toEqual(["hello"]); + expect(logger.errors).toHaveLength(1); + expect(logger.errors[0]?.message).toContain("test/isolate"); + }); + + it("isolates an async handler rejection", async () => { + const hook = defineEventHook<string>("test/async-reject"); + const received: string[] = []; + + bus.on(hook, async () => { + throw new Error("async boom"); + }); + bus.on(hook, async (payload) => { + received.push(payload); + }); + + bus.emit(hook, "data"); + + await new Promise((resolve) => { + setTimeout(resolve, 10); + }); + + expect(received).toEqual(["data"]); + expect(logger.errors).toHaveLength(1); + expect(logger.errors[0]?.message).toContain("test/async-reject"); + }); + + it("unsubscribe removes the handler", () => { + const hook = defineEventHook<void>("test/unsub"); + let count = 0; + + const unsub = bus.on(hook, () => { + count++; + }); + + bus.emit(hook, undefined); + expect(count).toBe(1); + + unsub(); + bus.emit(hook, undefined); + expect(count).toBe(1); + }); + + it("emit with no handlers is a no-op", () => { + const hook = defineEventHook<string>("test/empty"); + expect(() => bus.emit(hook, "nothing")).not.toThrow(); + }); + + it("emitAsync awaits all handlers", async () => { + const hook = defineEventHook<number>("test/async"); + const received: number[] = []; + + bus.on(hook, async (payload) => { + await new Promise((resolve) => { + setTimeout(resolve, 5); + }); + received.push(payload); + }); + bus.on(hook, async (payload) => { + received.push(payload * 2); + }); + + await bus.emitAsync(hook, 5); + + expect(received).toEqual([10, 5]); + }); + + it("emitAsync respects timeout", async () => { + const hook = defineEventHook<void>("test/timeout"); + let completed = false; + + bus.on(hook, async () => { + await new Promise((resolve) => { + setTimeout(resolve, 100); + }); + completed = true; + }); + + await bus.emitAsync(hook, undefined, 10); + + expect(completed).toBe(false); + }); }); describe("filter hooks", () => { - let logger: FakeLogger; - let bus: Bus; + let logger: FakeLogger; + let bus: Bus; - beforeEach(() => { - logger = createFakeLogger(); - bus = createBus(logger); - }); + beforeEach(() => { + logger = createFakeLogger(); + bus = createBus(logger); + }); - it("chains filters in registration order", async () => { - const hook = defineFilter<string>("test/chain"); + it("chains filters in registration order", async () => { + const hook = defineFilter<string>("test/chain"); - bus.addFilter(hook, (value) => `${value}-a`); - bus.addFilter(hook, (value) => `${value}-b`); + bus.addFilter(hook, (value) => `${value}-a`); + bus.addFilter(hook, (value) => `${value}-b`); - const result = await bus.applyFilters(hook, "start"); - expect(result).toBe("start-a-b"); - }); + const result = await bus.applyFilters(hook, "start"); + expect(result).toBe("start-a-b"); + }); - it("respects priority ordering (lower runs first)", async () => { - const hook = defineFilter<string>("test/priority"); + it("respects priority ordering (lower runs first)", async () => { + const hook = defineFilter<string>("test/priority"); - bus.addFilter(hook, (value) => `${value}-second`); - bus.addFilter(hook, (value) => `${value}-first`, { priority: -1 }); + bus.addFilter(hook, (value) => `${value}-second`); + bus.addFilter(hook, (value) => `${value}-first`, { priority: -1 }); - const result = await bus.applyFilters(hook, "start"); - expect(result).toBe("start-first-second"); - }); + const result = await bus.applyFilters(hook, "start"); + expect(result).toBe("start-first-second"); + }); - it("fail-open passes value through on throw", async () => { - const hook = defineFilter<number>("test/fail-open"); + it("fail-open passes value through on throw", async () => { + const hook = defineFilter<number>("test/fail-open"); - bus.addFilter(hook, (value) => value + 1); - bus.addFilter(hook, () => { - throw new Error("filter boom"); - }); - bus.addFilter(hook, (value) => value * 2); + bus.addFilter(hook, (value) => value + 1); + bus.addFilter(hook, () => { + throw new Error("filter boom"); + }); + bus.addFilter(hook, (value) => value * 2); - const result = await bus.applyFilters(hook, 5); - expect(result).toBe(12); - expect(logger.errors).toHaveLength(1); - expect(logger.errors[0]?.message).toContain("test/fail-open"); - }); + const result = await bus.applyFilters(hook, 5); + expect(result).toBe(12); + expect(logger.errors).toHaveLength(1); + expect(logger.errors[0]?.message).toContain("test/fail-open"); + }); - it("fail-closed propagates the error", async () => { - const hook = defineFilter<number>("test/fail-closed"); + it("fail-closed propagates the error", async () => { + const hook = defineFilter<number>("test/fail-closed"); - bus.addFilter(hook, () => { - throw new Error("closed boom"); - }); + bus.addFilter(hook, () => { + throw new Error("closed boom"); + }); - await expect(bus.applyFilters(hook, 5, { failClosed: true })).rejects.toThrow("closed boom"); - }); + await expect(bus.applyFilters(hook, 5, { failClosed: true })).rejects.toThrow("closed boom"); + }); - it("applyFilters with no filters returns value unchanged", async () => { - const hook = defineFilter<string>("test/no-filters"); - const result = await bus.applyFilters(hook, "unchanged"); - expect(result).toBe("unchanged"); - }); + it("applyFilters with no filters returns value unchanged", async () => { + const hook = defineFilter<string>("test/no-filters"); + const result = await bus.applyFilters(hook, "unchanged"); + expect(result).toBe("unchanged"); + }); - it("unsubscribe removes a filter from the chain", async () => { - const hook = defineFilter<string>("test/filter-unsub"); + it("unsubscribe removes a filter from the chain", async () => { + const hook = defineFilter<string>("test/filter-unsub"); - const unsub = bus.addFilter(hook, (value) => `${value}-removed`); - bus.addFilter(hook, (value) => `${value}-kept`); + const unsub = bus.addFilter(hook, (value) => `${value}-removed`); + bus.addFilter(hook, (value) => `${value}-kept`); - unsub(); + unsub(); - const result = await bus.applyFilters(hook, "start"); - expect(result).toBe("start-kept"); - }); + const result = await bus.applyFilters(hook, "start"); + expect(result).toBe("start-kept"); + }); }); describe("services", () => { - let logger: FakeLogger; - let bus: Bus; + let logger: FakeLogger; + let bus: Bus; - beforeEach(() => { - logger = createFakeLogger(); - bus = createBus(logger); - }); + beforeEach(() => { + logger = createFakeLogger(); + bus = createBus(logger); + }); - it("provide and get round-trips", () => { - const handle = defineService<{ greet: (name: string) => string }>("test/service"); - const impl = { greet: (name: string) => `hello ${name}` }; + it("provide and get round-trips", () => { + const handle = defineService<{ greet: (name: string) => string }>("test/service"); + const impl = { greet: (name: string) => `hello ${name}` }; - bus.provideService(handle, impl); - const retrieved = bus.getService(handle); + bus.provideService(handle, impl); + const retrieved = bus.getService(handle); - expect(retrieved.greet("world")).toBe("hello world"); - }); + expect(retrieved.greet("world")).toBe("hello world"); + }); - it("getService on missing service throws", () => { - const handle = defineService<string>("test/missing"); - expect(() => bus.getService(handle)).toThrow("test/missing"); - }); + it("getService on missing service throws", () => { + const handle = defineService<string>("test/missing"); + expect(() => bus.getService(handle)).toThrow("test/missing"); + }); - it("double-provide throws", () => { - const handle = defineService<number>("test/double"); + it("double-provide throws", () => { + const handle = defineService<number>("test/double"); - bus.provideService(handle, 1); - expect(() => bus.provideService(handle, 2)).toThrow("test/double"); - }); + bus.provideService(handle, 1); + expect(() => bus.provideService(handle, 2)).toThrow("test/double"); + }); }); describe("pure functions", () => { - describe("dispatchEventSync", () => { - it("calls all handlers with the payload", () => { - const logger = createFakeLogger(); - const received: number[] = []; - - dispatchEventSync( - [ - (payload) => { - received.push(payload); - }, - (payload) => { - received.push(payload * 2); - }, - ], - 5, - logger, - "test", - ); - - expect(received).toEqual([5, 10]); - }); - - it("catches sync throws and logs them", () => { - const logger = createFakeLogger(); - const received: number[] = []; - - dispatchEventSync( - [ - () => { - throw new Error("sync boom"); - }, - (payload) => { - received.push(payload); - }, - ], - 42, - logger, - "test/sync", - ); - - expect(received).toEqual([42]); - expect(logger.errors).toHaveLength(1); - }); - }); - - describe("sortFilters", () => { - it("sorts by priority ascending, then by order ascending", () => { - const entries = [ - { fn: async (v: number) => v, priority: 10, order: 0 }, - { fn: async (v: number) => v, priority: -1, order: 1 }, - { fn: async (v: number) => v, priority: 10, order: 2 }, - { fn: async (v: number) => v, priority: 0, order: 3 }, - ]; - - const sorted = sortFilters(entries); - expect(sorted.map((e) => e.order)).toEqual([1, 3, 0, 2]); - }); - - it("preserves registration order when priorities are equal", () => { - const entries = [ - { fn: async (v: string) => v, priority: 0, order: 0 }, - { fn: async (v: string) => v, priority: 0, order: 1 }, - { fn: async (v: string) => v, priority: 0, order: 2 }, - ]; - - const sorted = sortFilters(entries); - expect(sorted.map((e) => e.order)).toEqual([0, 1, 2]); - }); - }); - - describe("applyFilterChain", () => { - it("applies filters in order", async () => { - const logger = createFakeLogger(); - const result = await applyFilterChain([(v) => v + 1, (v) => v * 3], 2, logger, "test", false); - expect(result).toBe(9); - }); - - it("fail-open skips the throwing filter", async () => { - const logger = createFakeLogger(); - const result = await applyFilterChain( - [ - (v) => v + 10, - () => { - throw new Error("skip me"); - }, - (v) => v + 1, - ], - 0, - logger, - "test", - false, - ); - expect(result).toBe(11); - expect(logger.errors).toHaveLength(1); - }); - - it("fail-closed throws on error", async () => { - const logger = createFakeLogger(); - await expect( - applyFilterChain( - [ - () => { - throw new Error("closed"); - }, - ], - 0, - logger, - "test", - true, - ), - ).rejects.toThrow("closed"); - }); - }); + describe("dispatchEventSync", () => { + it("calls all handlers with the payload", () => { + const logger = createFakeLogger(); + const received: number[] = []; + + dispatchEventSync( + [ + (payload) => { + received.push(payload); + }, + (payload) => { + received.push(payload * 2); + }, + ], + 5, + logger, + "test", + ); + + expect(received).toEqual([5, 10]); + }); + + it("catches sync throws and logs them", () => { + const logger = createFakeLogger(); + const received: number[] = []; + + dispatchEventSync( + [ + () => { + throw new Error("sync boom"); + }, + (payload) => { + received.push(payload); + }, + ], + 42, + logger, + "test/sync", + ); + + expect(received).toEqual([42]); + expect(logger.errors).toHaveLength(1); + }); + }); + + describe("sortFilters", () => { + it("sorts by priority ascending, then by order ascending", () => { + const entries = [ + { fn: async (v: number) => v, priority: 10, order: 0 }, + { fn: async (v: number) => v, priority: -1, order: 1 }, + { fn: async (v: number) => v, priority: 10, order: 2 }, + { fn: async (v: number) => v, priority: 0, order: 3 }, + ]; + + const sorted = sortFilters(entries); + expect(sorted.map((e) => e.order)).toEqual([1, 3, 0, 2]); + }); + + it("preserves registration order when priorities are equal", () => { + const entries = [ + { fn: async (v: string) => v, priority: 0, order: 0 }, + { fn: async (v: string) => v, priority: 0, order: 1 }, + { fn: async (v: string) => v, priority: 0, order: 2 }, + ]; + + const sorted = sortFilters(entries); + expect(sorted.map((e) => e.order)).toEqual([0, 1, 2]); + }); + }); + + describe("applyFilterChain", () => { + it("applies filters in order", async () => { + const logger = createFakeLogger(); + const result = await applyFilterChain([(v) => v + 1, (v) => v * 3], 2, logger, "test", false); + expect(result).toBe(9); + }); + + it("fail-open skips the throwing filter", async () => { + const logger = createFakeLogger(); + const result = await applyFilterChain( + [ + (v) => v + 10, + () => { + throw new Error("skip me"); + }, + (v) => v + 1, + ], + 0, + logger, + "test", + false, + ); + expect(result).toBe(11); + expect(logger.errors).toHaveLength(1); + }); + + it("fail-closed throws on error", async () => { + const logger = createFakeLogger(); + await expect( + applyFilterChain( + [ + () => { + throw new Error("closed"); + }, + ], + 0, + logger, + "test", + true, + ), + ).rejects.toThrow("closed"); + }); + }); }); diff --git a/packages/kernel/src/bus/bus.ts b/packages/kernel/src/bus/bus.ts index 03d692e..013d426 100644 --- a/packages/kernel/src/bus/bus.ts +++ b/packages/kernel/src/bus/bus.ts @@ -1,139 +1,139 @@ import type { Logger } from "../contracts/extension.js"; import type { - EventHandler, - EventHookDescriptor, - FilterDescriptor, - FilterHandler, - ServiceHandle, + EventHandler, + EventHookDescriptor, + FilterDescriptor, + FilterHandler, + ServiceHandle, } from "../contracts/hooks.js"; import { - applyFilterChain, - dispatchEventAsync, - dispatchEventSync, - type FilterEntry, - sortFilters, + applyFilterChain, + dispatchEventAsync, + dispatchEventSync, + type FilterEntry, + sortFilters, } from "./pure.js"; export interface Bus { - readonly on: <T>(hook: EventHookDescriptor<T>, handler: EventHandler<T>) => () => void; - readonly emit: <T>(hook: EventHookDescriptor<T>, payload: T) => void; - readonly emitAsync: <T>( - hook: EventHookDescriptor<T>, - payload: T, - timeoutMs?: number, - ) => Promise<void>; - readonly addFilter: <T>( - hook: FilterDescriptor<T>, - fn: FilterHandler<T>, - opts?: { readonly priority?: number }, - ) => () => void; - readonly applyFilters: <T>( - hook: FilterDescriptor<T>, - value: T, - opts?: { readonly failClosed?: boolean }, - ) => Promise<T>; - readonly provideService: <T>(handle: ServiceHandle<T>, impl: T) => void; - readonly getService: <T>(handle: ServiceHandle<T>) => T; + readonly on: <T>(hook: EventHookDescriptor<T>, handler: EventHandler<T>) => () => void; + readonly emit: <T>(hook: EventHookDescriptor<T>, payload: T) => void; + readonly emitAsync: <T>( + hook: EventHookDescriptor<T>, + payload: T, + timeoutMs?: number, + ) => Promise<void>; + readonly addFilter: <T>( + hook: FilterDescriptor<T>, + fn: FilterHandler<T>, + opts?: { readonly priority?: number }, + ) => () => void; + readonly applyFilters: <T>( + hook: FilterDescriptor<T>, + value: T, + opts?: { readonly failClosed?: boolean }, + ) => Promise<T>; + readonly provideService: <T>(handle: ServiceHandle<T>, impl: T) => void; + readonly getService: <T>(handle: ServiceHandle<T>) => T; } interface StoredFilterEntry { - readonly fn: unknown; - readonly priority: number; - readonly order: number; + readonly fn: unknown; + readonly priority: number; + readonly order: number; } export function createBus(logger: Logger): Bus { - const eventHandlers = new Map<string, Set<unknown>>(); - const filterEntries = new Map<string, StoredFilterEntry[]>(); - const services = new Map<string, unknown>(); - let filterOrderCounter = 0; + const eventHandlers = new Map<string, Set<unknown>>(); + const filterEntries = new Map<string, StoredFilterEntry[]>(); + const services = new Map<string, unknown>(); + let filterOrderCounter = 0; - return { - on<T>(hook: EventHookDescriptor<T>, handler: EventHandler<T>): () => void { - let set = eventHandlers.get(hook.id); - if (set === undefined) { - set = new Set(); - eventHandlers.set(hook.id, set); - } - const stored: unknown = handler; - set.add(stored); - return () => { - const current = eventHandlers.get(hook.id); - if (current !== undefined) current.delete(stored); - }; - }, + return { + on<T>(hook: EventHookDescriptor<T>, handler: EventHandler<T>): () => void { + let set = eventHandlers.get(hook.id); + if (set === undefined) { + set = new Set(); + eventHandlers.set(hook.id, set); + } + const stored: unknown = handler; + set.add(stored); + return () => { + const current = eventHandlers.get(hook.id); + if (current !== undefined) current.delete(stored); + }; + }, - emit<T>(hook: EventHookDescriptor<T>, payload: T): void { - const set = eventHandlers.get(hook.id); - if (set === undefined || set.size === 0) return; - const handlers = [...set] as Array<EventHandler<T>>; - dispatchEventSync(handlers, payload, logger, hook.id); - }, + emit<T>(hook: EventHookDescriptor<T>, payload: T): void { + const set = eventHandlers.get(hook.id); + if (set === undefined || set.size === 0) return; + const handlers = [...set] as Array<EventHandler<T>>; + dispatchEventSync(handlers, payload, logger, hook.id); + }, - async emitAsync<T>( - hook: EventHookDescriptor<T>, - payload: T, - timeoutMs?: number, - ): Promise<void> { - const set = eventHandlers.get(hook.id); - if (set === undefined || set.size === 0) return; - const handlers = [...set] as Array<EventHandler<T>>; - await dispatchEventAsync(handlers, payload, logger, hook.id, timeoutMs); - }, + async emitAsync<T>( + hook: EventHookDescriptor<T>, + payload: T, + timeoutMs?: number, + ): Promise<void> { + const set = eventHandlers.get(hook.id); + if (set === undefined || set.size === 0) return; + const handlers = [...set] as Array<EventHandler<T>>; + await dispatchEventAsync(handlers, payload, logger, hook.id, timeoutMs); + }, - addFilter<T>( - hook: FilterDescriptor<T>, - fn: FilterHandler<T>, - opts?: { readonly priority?: number }, - ): () => void { - let entries = filterEntries.get(hook.id); - if (entries === undefined) { - entries = []; - filterEntries.set(hook.id, entries); - } - const entry: StoredFilterEntry = { - fn, - priority: opts?.priority ?? 0, - order: filterOrderCounter++, - }; - entries.push(entry); - return () => { - const current = filterEntries.get(hook.id); - if (current === undefined) return; - const idx = current.indexOf(entry); - if (idx !== -1) current.splice(idx, 1); - }; - }, + addFilter<T>( + hook: FilterDescriptor<T>, + fn: FilterHandler<T>, + opts?: { readonly priority?: number }, + ): () => void { + let entries = filterEntries.get(hook.id); + if (entries === undefined) { + entries = []; + filterEntries.set(hook.id, entries); + } + const entry: StoredFilterEntry = { + fn, + priority: opts?.priority ?? 0, + order: filterOrderCounter++, + }; + entries.push(entry); + return () => { + const current = filterEntries.get(hook.id); + if (current === undefined) return; + const idx = current.indexOf(entry); + if (idx !== -1) current.splice(idx, 1); + }; + }, - async applyFilters<T>( - hook: FilterDescriptor<T>, - value: T, - opts?: { readonly failClosed?: boolean }, - ): Promise<T> { - const entries = filterEntries.get(hook.id); - if (entries === undefined || entries.length === 0) return value; - const sorted = sortFilters(entries as ReadonlyArray<FilterEntry<T>>); - const fns = sorted.map((e) => e.fn) as Array<FilterHandler<T>>; - return applyFilterChain(fns, value, logger, hook.id, opts?.failClosed ?? false); - }, + async applyFilters<T>( + hook: FilterDescriptor<T>, + value: T, + opts?: { readonly failClosed?: boolean }, + ): Promise<T> { + const entries = filterEntries.get(hook.id); + if (entries === undefined || entries.length === 0) return value; + const sorted = sortFilters(entries as ReadonlyArray<FilterEntry<T>>); + const fns = sorted.map((e) => e.fn) as Array<FilterHandler<T>>; + return applyFilterChain(fns, value, logger, hook.id, opts?.failClosed ?? false); + }, - provideService<T>(handle: ServiceHandle<T>, impl: T): void { - if (services.has(handle.id)) { - throw new Error( - `Service "${handle.id}" is already provided. Only one provider per handle is allowed.`, - ); - } - services.set(handle.id, impl); - }, + provideService<T>(handle: ServiceHandle<T>, impl: T): void { + if (services.has(handle.id)) { + throw new Error( + `Service "${handle.id}" is already provided. Only one provider per handle is allowed.`, + ); + } + services.set(handle.id, impl); + }, - getService<T>(handle: ServiceHandle<T>): T { - const impl = services.get(handle.id); - if (impl === undefined) { - throw new Error( - `Service "${handle.id}" has no provider. Call provideService before getService.`, - ); - } - return impl as T; - }, - }; + getService<T>(handle: ServiceHandle<T>): T { + const impl = services.get(handle.id); + if (impl === undefined) { + throw new Error( + `Service "${handle.id}" has no provider. Call provideService before getService.`, + ); + } + return impl as T; + }, + }; } diff --git a/packages/kernel/src/bus/pure.ts b/packages/kernel/src/bus/pure.ts index 4d90fc6..a1c7a86 100644 --- a/packages/kernel/src/bus/pure.ts +++ b/packages/kernel/src/bus/pure.ts @@ -2,82 +2,82 @@ import type { Logger } from "../contracts/extension.js"; import type { EventHandler, FilterHandler } from "../contracts/hooks.js"; export function dispatchEventSync<T>( - handlers: ReadonlyArray<EventHandler<T>>, - payload: T, - logger: Logger, - hookId: string, + handlers: ReadonlyArray<EventHandler<T>>, + payload: T, + logger: Logger, + hookId: string, ): void { - for (const handler of handlers) { - try { - const result = handler(payload); - if (result instanceof Promise) { - result.catch((err: unknown) => { - logger.error(`Event hook "${hookId}" handler rejected`, { err }); - }); - } - } catch (err) { - logger.error(`Event hook "${hookId}" handler threw`, { err }); - } - } + for (const handler of handlers) { + try { + const result = handler(payload); + if (result instanceof Promise) { + result.catch((err: unknown) => { + logger.error(`Event hook "${hookId}" handler rejected`, { err }); + }); + } + } catch (err) { + logger.error(`Event hook "${hookId}" handler threw`, { err }); + } + } } export async function dispatchEventAsync<T>( - handlers: ReadonlyArray<EventHandler<T>>, - payload: T, - logger: Logger, - hookId: string, - timeoutMs?: number, + handlers: ReadonlyArray<EventHandler<T>>, + payload: T, + logger: Logger, + hookId: string, + timeoutMs?: number, ): Promise<void> { - const promises = handlers.map(async (handler) => { - try { - await handler(payload); - } catch (err) { - logger.error(`Event hook "${hookId}" handler threw`, { err }); - } - }); + const promises = handlers.map(async (handler) => { + try { + await handler(payload); + } catch (err) { + logger.error(`Event hook "${hookId}" handler threw`, { err }); + } + }); - if (timeoutMs !== undefined) { - await Promise.race([ - Promise.all(promises), - new Promise<void>((resolve) => { - setTimeout(resolve, timeoutMs); - }), - ]); - } else { - await Promise.all(promises); - } + if (timeoutMs !== undefined) { + await Promise.race([ + Promise.all(promises), + new Promise<void>((resolve) => { + setTimeout(resolve, timeoutMs); + }), + ]); + } else { + await Promise.all(promises); + } } export interface FilterEntry<T> { - readonly fn: FilterHandler<T>; - readonly priority: number; - readonly order: number; + readonly fn: FilterHandler<T>; + readonly priority: number; + readonly order: number; } export function sortFilters<T>( - entries: ReadonlyArray<FilterEntry<T>>, + entries: ReadonlyArray<FilterEntry<T>>, ): ReadonlyArray<FilterEntry<T>> { - return [...entries].sort((a, b) => { - if (a.priority !== b.priority) return a.priority - b.priority; - return a.order - b.order; - }); + return [...entries].sort((a, b) => { + if (a.priority !== b.priority) return a.priority - b.priority; + return a.order - b.order; + }); } export async function applyFilterChain<T>( - filters: ReadonlyArray<FilterHandler<T>>, - value: T, - logger: Logger, - hookId: string, - failClosed: boolean, + filters: ReadonlyArray<FilterHandler<T>>, + value: T, + logger: Logger, + hookId: string, + failClosed: boolean, ): Promise<T> { - let current = value; - for (const fn of filters) { - try { - current = await fn(current); - } catch (err) { - if (failClosed) throw err; - logger.error(`Filter "${hookId}" handler threw (fail-open, passing through)`, { err }); - } - } - return current; + let current = value; + for (const fn of filters) { + try { + current = await fn(current); + } catch (err) { + if (failClosed) throw err; + logger.error(`Filter "${hookId}" handler threw (fail-open, passing through)`, { err }); + } + } + return current; } diff --git a/packages/kernel/src/contracts/auth.ts b/packages/kernel/src/contracts/auth.ts index 6058156..85963ba 100644 --- a/packages/kernel/src/contracts/auth.ts +++ b/packages/kernel/src/contracts/auth.ts @@ -11,9 +11,9 @@ * This is the common case for OpenAI-compatible and most provider extensions. */ export interface ApiKeyCredentials { - readonly type: "api-key"; - readonly apiKey: string; - readonly baseURL?: string; + readonly type: "api-key"; + readonly apiKey: string; + readonly baseURL?: string; } /** @@ -22,9 +22,9 @@ export interface ApiKeyCredentials { * receives a currently-valid token. */ export interface BearerTokenCredentials { - readonly type: "bearer-token"; - readonly token: string; - readonly baseURL?: string; + readonly type: "bearer-token"; + readonly token: string; + readonly baseURL?: string; } /** Union of credential shapes the kernel recognizes. */ @@ -36,13 +36,13 @@ export type Credentials = ApiKeyCredentials | BearerTokenCredentials; * directly (the concrete vault is a core extension). */ export interface AuthContract { - /** Unique identifier for this auth provider (e.g. "apikey", "claude-oauth"). */ - readonly id: string; + /** Unique identifier for this auth provider (e.g. "apikey", "claude-oauth"). */ + readonly id: string; - /** - * Resolve currently-valid credentials. May involve reading from the - * secret vault (injected via Host API) or performing a token refresh. - * Returns `null` if credentials are unavailable (e.g. not yet configured). - */ - readonly resolve: () => Promise<Credentials | null>; + /** + * Resolve currently-valid credentials. May involve reading from the + * secret vault (injected via Host API) or performing a token refresh. + * Returns `null` if credentials are unavailable (e.g. not yet configured). + */ + readonly resolve: () => Promise<Credentials | null>; } diff --git a/packages/kernel/src/contracts/conversation.ts b/packages/kernel/src/contracts/conversation.ts index b459532..f074c52 100644 --- a/packages/kernel/src/contracts/conversation.ts +++ b/packages/kernel/src/contracts/conversation.ts @@ -6,23 +6,23 @@ */ export type { - ChatMessage, - Chunk, - CompactionResult, - ConversationMeta, - ConversationStatus, - ErrorChunk, - Role, - StepId, - StepMetrics, - StoredChunk, - SystemChunk, - TextChunk, - ThinkingChunk, - ToolCallChunk, - ToolResultChunk, - TurnId, - TurnMetrics, - Workspace, - WorkspaceEntry, + ChatMessage, + Chunk, + CompactionResult, + ConversationMeta, + ConversationStatus, + ErrorChunk, + Role, + StepId, + StepMetrics, + StoredChunk, + SystemChunk, + TextChunk, + ThinkingChunk, + ToolCallChunk, + ToolResultChunk, + TurnId, + TurnMetrics, + Workspace, + WorkspaceEntry, } from "@dispatch/wire"; diff --git a/packages/kernel/src/contracts/dispatch.ts b/packages/kernel/src/contracts/dispatch.ts index c2914cf..318f14a 100644 --- a/packages/kernel/src/contracts/dispatch.ts +++ b/packages/kernel/src/contracts/dispatch.ts @@ -26,6 +26,6 @@ * (safe for any tool), yet the first tool starts during generation. */ export interface ToolDispatchPolicy { - readonly maxConcurrent: number; - readonly eager: boolean; + readonly maxConcurrent: number; + readonly eager: boolean; } diff --git a/packages/kernel/src/contracts/events.ts b/packages/kernel/src/contracts/events.ts index dca34c2..dfd7456 100644 --- a/packages/kernel/src/contracts/events.ts +++ b/packages/kernel/src/contracts/events.ts @@ -6,20 +6,20 @@ */ export type { - AgentEvent, - StatusEvent, - TurnDoneEvent, - TurnErrorEvent, - TurnInputEvent, - TurnProviderRetryEvent, - TurnReasoningDeltaEvent, - TurnSealedEvent, - TurnStartEvent, - TurnSteeringEvent, - TurnStepCompleteEvent, - TurnTextDeltaEvent, - TurnToolCallEvent, - TurnToolOutputEvent, - TurnToolResultEvent, - TurnUsageEvent, + AgentEvent, + StatusEvent, + TurnDoneEvent, + TurnErrorEvent, + TurnInputEvent, + TurnProviderRetryEvent, + TurnReasoningDeltaEvent, + TurnSealedEvent, + TurnStartEvent, + TurnSteeringEvent, + TurnStepCompleteEvent, + TurnTextDeltaEvent, + TurnToolCallEvent, + TurnToolOutputEvent, + TurnToolResultEvent, + TurnUsageEvent, } from "@dispatch/wire"; diff --git a/packages/kernel/src/contracts/extension.ts b/packages/kernel/src/contracts/extension.ts index 4d6cf07..fd1594b 100644 --- a/packages/kernel/src/contracts/extension.ts +++ b/packages/kernel/src/contracts/extension.ts @@ -9,11 +9,11 @@ import type { AuthContract } from "./auth.js"; import type { - EventHandler, - EventHookDescriptor, - FilterDescriptor, - FilterHandler, - ServiceHandle, + EventHandler, + EventHookDescriptor, + FilterDescriptor, + FilterHandler, + ServiceHandle, } from "./hooks.js"; import type { Logger } from "./logging.js"; @@ -32,16 +32,16 @@ export type TrustLevel = "bundled" | "local" | "external"; * discovery, dependency resolution, and the capability gate. */ export interface ManifestContributions { - readonly tools?: readonly string[]; - readonly providers?: readonly string[]; - readonly auth?: readonly string[]; - readonly hooks?: readonly string[]; - readonly routes?: readonly string[]; - readonly commands?: readonly string[]; - readonly services?: readonly string[]; - readonly migrations?: readonly string[]; - readonly scheduledJobs?: readonly string[]; - readonly settings?: readonly string[]; + readonly tools?: readonly string[]; + readonly providers?: readonly string[]; + readonly auth?: readonly string[]; + readonly hooks?: readonly string[]; + readonly routes?: readonly string[]; + readonly commands?: readonly string[]; + readonly services?: readonly string[]; + readonly migrations?: readonly string[]; + readonly scheduledJobs?: readonly string[]; + readonly settings?: readonly string[]; } /** @@ -50,12 +50,12 @@ export interface ManifestContributions { * declared capability for. */ export interface ManifestCapabilities { - readonly fs?: boolean; - readonly shell?: boolean; - readonly network?: boolean; - readonly secrets?: boolean; - readonly db?: boolean; - readonly spawn?: boolean; + readonly fs?: boolean; + readonly shell?: boolean; + readonly network?: boolean; + readonly secrets?: boolean; + readonly db?: boolean; + readonly spawn?: boolean; } /** @@ -64,32 +64,32 @@ export interface ManifestCapabilities { * compatibility, and enforce the capability gate. */ export interface Manifest { - /** Unique extension identifier (e.g. "tools-fs", "provider-anthropic"). */ - readonly id: string; + /** Unique extension identifier (e.g. "tools-fs", "provider-anthropic"). */ + readonly id: string; - /** Human-readable display name. */ - readonly name: string; + /** Human-readable display name. */ + readonly name: string; - /** Extension's own version (semver). */ - readonly version: string; + /** Extension's own version (semver). */ + readonly version: string; - /** Semver range of kernel API versions this extension is compatible with. */ - readonly apiVersion: string; + /** Semver range of kernel API versions this extension is compatible with. */ + readonly apiVersion: string; - /** Ids of extensions this one depends on (resolved topologically). */ - readonly dependsOn?: readonly string[]; + /** Ids of extensions this one depends on (resolved topologically). */ + readonly dependsOn?: readonly string[]; - /** Activation strategy: "eager" (on boot) or lazy event triggers. */ - readonly activation?: "eager" | string; + /** Activation strategy: "eager" (on boot) or lazy event triggers. */ + readonly activation?: "eager" | string; - /** What this extension contributes to the system. */ - readonly contributes?: ManifestContributions; + /** What this extension contributes to the system. */ + readonly contributes?: ManifestContributions; - /** Capabilities this extension requires from the host. */ - readonly capabilities?: ManifestCapabilities; + /** Capabilities this extension requires from the host. */ + readonly capabilities?: ManifestCapabilities; - /** Trust level — bundled (first-party), local (project), or external. */ - readonly trust: TrustLevel; + /** Trust level — bundled (first-party), local (project), or external. */ + readonly trust: TrustLevel; } // --- Storage interface --- @@ -100,40 +100,40 @@ export interface Manifest { * only the contract. Supports key-value and simple query operations. */ export interface StorageNamespace { - readonly get: (key: string) => Promise<string | null>; - readonly set: (key: string, value: string) => Promise<void>; - readonly delete: (key: string) => Promise<void>; - readonly has: (key: string) => Promise<boolean>; - readonly keys: (prefix?: string) => Promise<readonly string[]>; + readonly get: (key: string) => Promise<string | null>; + readonly set: (key: string, value: string) => Promise<void>; + readonly delete: (key: string) => Promise<void>; + readonly has: (key: string) => Promise<boolean>; + readonly keys: (prefix?: string) => Promise<readonly string[]>; } // --- Permission --- /** The outcome of a permission check. */ export interface PermissionDecision { - readonly allowed: boolean; - readonly reason?: string; + readonly allowed: boolean; + readonly reason?: string; } /** A request to check whether an action is permitted. */ export interface PermissionRequest { - readonly tool: string; - readonly action: string; - readonly context?: Readonly<Record<string, unknown>>; + readonly tool: string; + readonly action: string; + readonly context?: Readonly<Record<string, unknown>>; } /** Permission gate exposed through the Host API. */ export interface PermissionGate { - readonly check: (request: PermissionRequest) => Promise<PermissionDecision>; + readonly check: (request: PermissionRequest) => Promise<PermissionDecision>; } // --- Scheduler --- /** A scheduled job definition an extension can register with the host. */ export interface ScheduledJob { - readonly id: string; - readonly cron: string; - readonly execute: () => void | Promise<void>; + readonly id: string; + readonly cron: string; + readonly execute: () => void | Promise<void>; } // --- Logger is re-exported from logging.ts (structured, correlated) --- @@ -142,24 +142,24 @@ export interface ScheduledJob { /** Read-only config access for an extension's own settings namespace. */ export interface ConfigAccess { - readonly get: <T = unknown>(key: string) => T | undefined; - readonly getAll: () => Readonly<Record<string, unknown>>; + readonly get: <T = unknown>(key: string) => T | undefined; + readonly getAll: () => Readonly<Record<string, unknown>>; } // --- Secrets --- /** Capability-gated access to the secret/credential vault. */ export interface SecretsAccess { - readonly get: (key: string) => Promise<string | null>; - readonly set: (key: string, value: string) => Promise<void>; - readonly delete: (key: string) => Promise<void>; + readonly get: (key: string) => Promise<string | null>; + readonly set: (key: string, value: string) => Promise<void>; + readonly delete: (key: string) => Promise<void>; } // --- Events emitter --- /** Outward event emitter available to extensions via the Host API. */ export interface EventsEmitter { - readonly emit: (event: { readonly type: string; readonly [key: string]: unknown }) => void; + readonly emit: (event: { readonly type: string; readonly [key: string]: unknown }) => void; } // --- Host API --- @@ -175,102 +175,102 @@ export interface EventsEmitter { * module (not the kernel contracts). */ export interface HostAPI { - /** Register a tool with the kernel's tool registry. */ - readonly defineTool: (tool: ToolContract) => void; - - /** Register a provider with the kernel's provider registry. */ - readonly defineProvider: (provider: ProviderContract) => void; - - /** Register an auth provider with the kernel's auth registry. */ - readonly defineAuth: (auth: AuthContract) => void; - - /** Subscribe to an event hook. Handlers are error-isolated per call. */ - readonly on: <TPayload>( - hook: EventHookDescriptor<TPayload>, - handler: EventHandler<TPayload>, - ) => () => void; - - /** - * Emit an event hook: fire-and-forget dispatch to every `on` subscriber, - * error-isolated per handler (a thrown handler is caught + logged, never - * breaks the caller). The counterpart of `on`. - * - * This lets a core extension that OWNS a lifecycle publish typed events that - * standard extensions react to — e.g. the session-orchestrator emitting - * per-turn start/settle events a cache-warming extension subscribes to. The - * kernel owns the mechanism; the owner declares the typed `EventHookDescriptor`. - */ - readonly emit: <TPayload>(hook: EventHookDescriptor<TPayload>, payload: TPayload) => void; - - /** Add a filter to a filter hook chain. Filters are awaited in-band. */ - readonly addFilter: <TValue>( - hook: FilterDescriptor<TValue>, - fn: FilterHandler<TValue>, - ) => () => void; - - /** - * Run a filter chain: thread `value` through every filter registered for - * `hook` in priority/registration order and return the final value. The - * single-value-in/value-out counterpart to `addFilter`. Awaited in-band. - * - * Fail-open by default (a thrown filter is logged and the value passes - * through unchanged); pass `{ failClosed: true }` to make a thrown filter - * reject. With no registered filters the input value is returned as-is. - * - * This is what lets a core extension expose a contribution point (e.g. the - * session-orchestrator running a per-turn tool/context-assembly chain) that - * standard extensions plug into via `addFilter` — the kernel owns the - * mechanism, the owner declares the typed `FilterDescriptor`. - */ - readonly applyFilters: <TValue>( - hook: FilterDescriptor<TValue>, - value: TValue, - opts?: { readonly failClosed?: boolean }, - ) => Promise<TValue>; - - /** Provide an implementation for a typed service handle. */ - readonly provideService: <T>(handle: ServiceHandle<T>, impl: T) => void; - - /** Retrieve the implementation for a typed service handle. */ - readonly getService: <T>(handle: ServiceHandle<T>) => T; - - /** Get a namespaced storage interface for this extension. */ - readonly storage: (namespace: string) => StorageNamespace; - - /** Read-only access to merged config (global → project → extension). */ - readonly config: ConfigAccess; - - /** Capability-gated access to the secret/credential vault. */ - readonly secrets: SecretsAccess; - - /** Permission gate — check whether an action is allowed. */ - readonly permissions: PermissionGate; - - /** Emit outward events (transport pushes these to clients). */ - readonly events: EventsEmitter; - - /** Logger — always available, even before other extensions activate. */ - readonly logger: Logger; - - /** Read-only view of all registered providers. */ - readonly getProviders: () => ReadonlyMap<string, ProviderContract>; - - /** Read-only view of all registered tools. */ - readonly getTools: () => ReadonlyMap<string, ToolContract>; - - /** Read-only view of all registered auth providers. */ - readonly getAuthProviders: () => ReadonlyMap<string, AuthContract>; - - /** Look up a single auth provider by id. */ - readonly getAuthProvider: (id: string) => AuthContract | undefined; - - /** Read-only view of all activated extensions' manifests (what is loaded). */ - readonly getExtensions: () => readonly Manifest[]; - - /** Register a scheduled job with the host's scheduler. */ - readonly scheduler: { - readonly register: (job: ScheduledJob) => void; - }; + /** Register a tool with the kernel's tool registry. */ + readonly defineTool: (tool: ToolContract) => void; + + /** Register a provider with the kernel's provider registry. */ + readonly defineProvider: (provider: ProviderContract) => void; + + /** Register an auth provider with the kernel's auth registry. */ + readonly defineAuth: (auth: AuthContract) => void; + + /** Subscribe to an event hook. Handlers are error-isolated per call. */ + readonly on: <TPayload>( + hook: EventHookDescriptor<TPayload>, + handler: EventHandler<TPayload>, + ) => () => void; + + /** + * Emit an event hook: fire-and-forget dispatch to every `on` subscriber, + * error-isolated per handler (a thrown handler is caught + logged, never + * breaks the caller). The counterpart of `on`. + * + * This lets a core extension that OWNS a lifecycle publish typed events that + * standard extensions react to — e.g. the session-orchestrator emitting + * per-turn start/settle events a cache-warming extension subscribes to. The + * kernel owns the mechanism; the owner declares the typed `EventHookDescriptor`. + */ + readonly emit: <TPayload>(hook: EventHookDescriptor<TPayload>, payload: TPayload) => void; + + /** Add a filter to a filter hook chain. Filters are awaited in-band. */ + readonly addFilter: <TValue>( + hook: FilterDescriptor<TValue>, + fn: FilterHandler<TValue>, + ) => () => void; + + /** + * Run a filter chain: thread `value` through every filter registered for + * `hook` in priority/registration order and return the final value. The + * single-value-in/value-out counterpart to `addFilter`. Awaited in-band. + * + * Fail-open by default (a thrown filter is logged and the value passes + * through unchanged); pass `{ failClosed: true }` to make a thrown filter + * reject. With no registered filters the input value is returned as-is. + * + * This is what lets a core extension expose a contribution point (e.g. the + * session-orchestrator running a per-turn tool/context-assembly chain) that + * standard extensions plug into via `addFilter` — the kernel owns the + * mechanism, the owner declares the typed `FilterDescriptor`. + */ + readonly applyFilters: <TValue>( + hook: FilterDescriptor<TValue>, + value: TValue, + opts?: { readonly failClosed?: boolean }, + ) => Promise<TValue>; + + /** Provide an implementation for a typed service handle. */ + readonly provideService: <T>(handle: ServiceHandle<T>, impl: T) => void; + + /** Retrieve the implementation for a typed service handle. */ + readonly getService: <T>(handle: ServiceHandle<T>) => T; + + /** Get a namespaced storage interface for this extension. */ + readonly storage: (namespace: string) => StorageNamespace; + + /** Read-only access to merged config (global → project → extension). */ + readonly config: ConfigAccess; + + /** Capability-gated access to the secret/credential vault. */ + readonly secrets: SecretsAccess; + + /** Permission gate — check whether an action is allowed. */ + readonly permissions: PermissionGate; + + /** Emit outward events (transport pushes these to clients). */ + readonly events: EventsEmitter; + + /** Logger — always available, even before other extensions activate. */ + readonly logger: Logger; + + /** Read-only view of all registered providers. */ + readonly getProviders: () => ReadonlyMap<string, ProviderContract>; + + /** Read-only view of all registered tools. */ + readonly getTools: () => ReadonlyMap<string, ToolContract>; + + /** Read-only view of all registered auth providers. */ + readonly getAuthProviders: () => ReadonlyMap<string, AuthContract>; + + /** Look up a single auth provider by id. */ + readonly getAuthProvider: (id: string) => AuthContract | undefined; + + /** Read-only view of all activated extensions' manifests (what is loaded). */ + readonly getExtensions: () => readonly Manifest[]; + + /** Register a scheduled job with the host's scheduler. */ + readonly scheduler: { + readonly register: (job: ScheduledJob) => void; + }; } // --- Extension lifecycle --- @@ -281,18 +281,18 @@ export interface HostAPI { * `deactivate` is optional and called on shutdown or reload. */ export interface Extension { - /** The extension's manifest — its declaration of identity and capabilities. */ - readonly manifest: Manifest; - - /** - * Called by the host to activate the extension. The extension registers - * its contributions (tools, providers, hooks, services) through the Host API. - */ - readonly activate: (host: HostAPI) => void | Promise<void>; - - /** - * Optional cleanup called when the extension is deactivated (shutdown, - * reload, or auto-disable). Should dispose resources the extension owns. - */ - readonly deactivate?: () => void | Promise<void>; + /** The extension's manifest — its declaration of identity and capabilities. */ + readonly manifest: Manifest; + + /** + * Called by the host to activate the extension. The extension registers + * its contributions (tools, providers, hooks, services) through the Host API. + */ + readonly activate: (host: HostAPI) => void | Promise<void>; + + /** + * Optional cleanup called when the extension is deactivated (shutdown, + * reload, or auto-disable). Should dispose resources the extension owns. + */ + readonly deactivate?: () => void | Promise<void>; } diff --git a/packages/kernel/src/contracts/hooks.ts b/packages/kernel/src/contracts/hooks.ts index eb94465..8f2bd1f 100644 --- a/packages/kernel/src/contracts/hooks.ts +++ b/packages/kernel/src/contracts/hooks.ts @@ -21,9 +21,9 @@ * (a thrown handler is caught and logged — it never breaks the turn). */ export interface EventHookDescriptor<TPayload> { - readonly kind: "event"; - readonly id: string; - readonly _payload?: TPayload; + readonly kind: "event"; + readonly id: string; + readonly _payload?: TPayload; } /** @@ -35,9 +35,9 @@ export interface EventHookDescriptor<TPayload> { * the owner may mark a chain fail-closed. */ export interface FilterDescriptor<TValue> { - readonly kind: "filter"; - readonly id: string; - readonly _value?: TValue; + readonly kind: "filter"; + readonly id: string; + readonly _value?: TValue; } /** Union of hook descriptor kinds the kernel mechanism supports. */ @@ -49,9 +49,9 @@ export type HookDescriptor<TPayload> = EventHookDescriptor<TPayload> | FilterDes * "which of N handlers wins?" ambiguity; a service has exactly one provider. */ export interface ServiceHandle<T> { - readonly kind: "service"; - readonly id: string; - readonly _type?: T; + readonly kind: "service"; + readonly id: string; + readonly _type?: T; } /** @@ -61,7 +61,7 @@ export interface ServiceHandle<T> { * @param id - Namespaced hook id in `owner/name` form (e.g. "kernel/turn.sealed"). */ export function defineEventHook<TPayload>(id: string): EventHookDescriptor<TPayload> { - return { kind: "event", id }; + return { kind: "event", id }; } /** @@ -71,7 +71,7 @@ export function defineEventHook<TPayload>(id: string): EventHookDescriptor<TPayl * @param id - Namespaced filter id in `owner/name` form. */ export function defineFilter<TValue>(id: string): FilterDescriptor<TValue> { - return { kind: "filter", id }; + return { kind: "filter", id }; } /** @@ -82,7 +82,7 @@ export function defineFilter<TValue>(id: string): FilterDescriptor<TValue> { * @param id - Namespaced service id in `owner/name` form. */ export function defineService<T>(id: string): ServiceHandle<T> { - return { kind: "service", id }; + return { kind: "service", id }; } /** Handler function for an event hook subscription. */ diff --git a/packages/kernel/src/contracts/index.ts b/packages/kernel/src/contracts/index.ts index f3e5bca..09e0a56 100644 --- a/packages/kernel/src/contracts/index.ts +++ b/packages/kernel/src/contracts/index.ts @@ -7,118 +7,118 @@ */ export type { - ApiKeyCredentials, - AuthContract, - BearerTokenCredentials, - Credentials, + ApiKeyCredentials, + AuthContract, + BearerTokenCredentials, + Credentials, } from "./auth.js"; export type { - ChatMessage, - Chunk, - CompactionResult, - ConversationMeta, - ConversationStatus, - ErrorChunk, - Role, - StepId, - StepMetrics, - StoredChunk, - SystemChunk, - TextChunk, - ThinkingChunk, - ToolCallChunk, - ToolResultChunk, - TurnId, - TurnMetrics, - Workspace, - WorkspaceEntry, + ChatMessage, + Chunk, + CompactionResult, + ConversationMeta, + ConversationStatus, + ErrorChunk, + Role, + StepId, + StepMetrics, + StoredChunk, + SystemChunk, + TextChunk, + ThinkingChunk, + ToolCallChunk, + ToolResultChunk, + TurnId, + TurnMetrics, + Workspace, + WorkspaceEntry, } from "./conversation.js"; export type { ToolDispatchPolicy } from "./dispatch.js"; export type { - AgentEvent, - StatusEvent, - TurnDoneEvent, - TurnErrorEvent, - TurnInputEvent, - TurnProviderRetryEvent, - TurnReasoningDeltaEvent, - TurnSealedEvent, - TurnStartEvent, - TurnSteeringEvent, - TurnStepCompleteEvent, - TurnTextDeltaEvent, - TurnToolCallEvent, - TurnToolOutputEvent, - TurnToolResultEvent, - TurnUsageEvent, + AgentEvent, + StatusEvent, + TurnDoneEvent, + TurnErrorEvent, + TurnInputEvent, + TurnProviderRetryEvent, + TurnReasoningDeltaEvent, + TurnSealedEvent, + TurnStartEvent, + TurnSteeringEvent, + TurnStepCompleteEvent, + TurnTextDeltaEvent, + TurnToolCallEvent, + TurnToolOutputEvent, + TurnToolResultEvent, + TurnUsageEvent, } from "./events.js"; export type { - ConfigAccess, - EventsEmitter, - Extension, - HostAPI, - Manifest, - ManifestCapabilities, - ManifestContributions, - PermissionDecision, - PermissionGate, - PermissionRequest, - ScheduledJob, - SecretsAccess, - StorageNamespace, - TrustLevel, + ConfigAccess, + EventsEmitter, + Extension, + HostAPI, + Manifest, + ManifestCapabilities, + ManifestContributions, + PermissionDecision, + PermissionGate, + PermissionRequest, + ScheduledJob, + SecretsAccess, + StorageNamespace, + TrustLevel, } from "./extension.js"; export type { - EventHandler, - EventHookDescriptor, - FilterDescriptor, - FilterHandler, - HookDescriptor, - ServiceHandle, + EventHandler, + EventHookDescriptor, + FilterDescriptor, + FilterHandler, + HookDescriptor, + ServiceHandle, } from "./hooks.js"; export { defineEventHook, defineFilter, defineService } from "./hooks.js"; export type { - Attributes, - ErrorAttributes, - Level, - LogContext, - LogDeps, - Logger, - LogLineRecord, - LogRecord, - LogSink, - Span, - SpanCloseRecord, - SpanLink, - SpanOpenRecord, - SpanStatus, + Attributes, + ErrorAttributes, + Level, + LogContext, + LogDeps, + Logger, + LogLineRecord, + LogRecord, + LogSink, + Span, + SpanCloseRecord, + SpanLink, + SpanOpenRecord, + SpanStatus, } from "./logging.js"; export type { - FinishEvent, - ModelInfo, - ProviderContract, - ProviderErrorEvent, - ProviderEvent, - ProviderStreamOptions, - ProviderToolCallEvent, - ReasoningDeltaEvent, - ReasoningEffort, - TextDeltaEvent, - Usage, - UsageEvent, + FinishEvent, + ModelInfo, + ProviderContract, + ProviderErrorEvent, + ProviderEvent, + ProviderStreamOptions, + ProviderToolCallEvent, + ReasoningDeltaEvent, + ReasoningEffort, + TextDeltaEvent, + Usage, + UsageEvent, } from "./provider.js"; export type { - EventEmitter, - FinishReason, - RetryStrategy, - RunTurnInput, - RunTurnResult, + EventEmitter, + FinishReason, + RetryStrategy, + RunTurnInput, + RunTurnResult, } from "./runtime.js"; export type { - JsonSchemaProperty, - ToolCall, - ToolContract, - ToolExecuteContext, - ToolParameterSchema, - ToolResult, + JsonSchemaProperty, + ToolCall, + ToolContract, + ToolExecuteContext, + ToolParameterSchema, + ToolResult, } from "./tool.js"; diff --git a/packages/kernel/src/contracts/logging.ts b/packages/kernel/src/contracts/logging.ts index a8bab7c..d121777 100644 --- a/packages/kernel/src/contracts/logging.ts +++ b/packages/kernel/src/contracts/logging.ts @@ -27,12 +27,12 @@ export type Attributes = Readonly<Record<string, string | number | boolean | nul /** Correlation context carried on every log record and span. */ export interface LogContext { - /** Auto-stamped by host from manifest.id (D6) — never caller-supplied. */ - readonly extensionId: string; - readonly conversationId?: string; - readonly turnId?: string; - readonly spanId?: string; - readonly parentSpanId?: string; + /** Auto-stamped by host from manifest.id (D6) — never caller-supplied. */ + readonly extensionId: string; + readonly conversationId?: string; + readonly turnId?: string; + readonly spanId?: string; + readonly parentSpanId?: string; } // --- Span --- @@ -43,27 +43,27 @@ export interface LogContext { * crashed turn is reconstructable from the journal (D3). */ export interface Span { - readonly id: string; - /** Pre-bound Logger scoped to this span's correlation. */ - readonly log: Logger; - /** Add or overwrite attributes on this span. */ - readonly setAttributes: (attrs: Attributes) => void; - /** Record a causal link to another span (D4 cross-feature causality). */ - readonly addLink: ( - target: { readonly spanId: string; readonly turnId?: string }, - reason?: string, - ) => void; - /** Open a child span nested under this one. */ - readonly child: (name: string, attrs?: Attributes, body?: string) => Span; - /** - * Close this span. Records duration + status. Optionally records an - * error, additional attributes, and/or a body payload. - */ - readonly end: (outcome?: { - readonly err?: unknown; - readonly attrs?: Attributes; - readonly body?: string; - }) => void; + readonly id: string; + /** Pre-bound Logger scoped to this span's correlation. */ + readonly log: Logger; + /** Add or overwrite attributes on this span. */ + readonly setAttributes: (attrs: Attributes) => void; + /** Record a causal link to another span (D4 cross-feature causality). */ + readonly addLink: ( + target: { readonly spanId: string; readonly turnId?: string }, + reason?: string, + ) => void; + /** Open a child span nested under this one. */ + readonly child: (name: string, attrs?: Attributes, body?: string) => Span; + /** + * Close this span. Records duration + status. Optionally records an + * error, additional attributes, and/or a body payload. + */ + readonly end: (outcome?: { + readonly err?: unknown; + readonly attrs?: Attributes; + readonly body?: string; + }) => void; } // --- Logger --- @@ -75,17 +75,17 @@ export interface Span { * `info("msg")` must still compile — attrs is optional (backward compat). */ export interface Logger { - readonly debug: (msg: string, attrs?: Attributes) => void; - readonly info: (msg: string, attrs?: Attributes) => void; - readonly warn: (msg: string, attrs?: Attributes) => void; - readonly error: (msg: string, attrs?: ErrorAttributes) => void; - /** - * Create a child logger with additional correlation context. - * Explicit values passed down (P3 — no ambient state). - */ - readonly child: (ctx: Partial<LogContext> & { readonly attrs?: Attributes }) => Logger; - /** Open a new span. Emits a `span-open` record immediately (D3). */ - readonly span: (name: string, attrs?: Attributes, body?: string) => Span; + readonly debug: (msg: string, attrs?: Attributes) => void; + readonly info: (msg: string, attrs?: Attributes) => void; + readonly warn: (msg: string, attrs?: Attributes) => void; + readonly error: (msg: string, attrs?: ErrorAttributes) => void; + /** + * Create a child logger with additional correlation context. + * Explicit values passed down (P3 — no ambient state). + */ + readonly child: (ctx: Partial<LogContext> & { readonly attrs?: Attributes }) => Logger; + /** Open a new span. Emits a `span-open` record immediately (D3). */ + readonly span: (name: string, attrs?: Attributes, body?: string) => Span; } /** @@ -94,8 +94,8 @@ export interface Logger { * pass `error("msg", { err })` directly. */ export interface ErrorAttributes { - readonly err?: unknown; - readonly [key: string]: unknown; + readonly err?: unknown; + readonly [key: string]: unknown; } // --- LogRecord (discriminated union) --- @@ -109,9 +109,9 @@ export type SpanStatus = "ok" | "error"; * A link to another span, recorded at a handoff moment (D4). */ export interface SpanLink { - readonly spanId: string; - readonly turnId?: string; - readonly reason?: string; + readonly spanId: string; + readonly turnId?: string; + readonly reason?: string; } /** @@ -126,50 +126,50 @@ export type LogRecord = LogLineRecord | SpanOpenRecord | SpanCloseRecord; /** A structured log line (debug/info/warn/error). */ export interface LogLineRecord { - readonly kind: "log"; - readonly level: Level; - readonly msg: string; - readonly timestamp: number; - readonly extensionId: string; - readonly conversationId?: string; - readonly turnId?: string; - readonly spanId?: string; - readonly parentSpanId?: string; - readonly attributes?: Attributes; - /** Optional large verbatim payload (store-fat, serve-thin). */ - readonly body?: string; + readonly kind: "log"; + readonly level: Level; + readonly msg: string; + readonly timestamp: number; + readonly extensionId: string; + readonly conversationId?: string; + readonly turnId?: string; + readonly spanId?: string; + readonly parentSpanId?: string; + readonly attributes?: Attributes; + /** Optional large verbatim payload (store-fat, serve-thin). */ + readonly body?: string; } /** Emitted when a span is opened (at `logger.span(name)`). */ export interface SpanOpenRecord { - readonly kind: "span-open"; - readonly spanId: string; - readonly name: string; - readonly timestamp: number; - readonly extensionId: string; - readonly conversationId?: string; - readonly turnId?: string; - readonly parentSpanId?: string; - readonly attributes?: Attributes; - readonly links?: readonly SpanLink[]; - readonly body?: string; + readonly kind: "span-open"; + readonly spanId: string; + readonly name: string; + readonly timestamp: number; + readonly extensionId: string; + readonly conversationId?: string; + readonly turnId?: string; + readonly parentSpanId?: string; + readonly attributes?: Attributes; + readonly links?: readonly SpanLink[]; + readonly body?: string; } /** Emitted when a span is closed (at `span.end()`). Carries duration + status. */ export interface SpanCloseRecord { - readonly kind: "span-close"; - readonly spanId: string; - readonly name: string; - readonly timestamp: number; - readonly durationMs: number; - readonly status: SpanStatus; - readonly extensionId: string; - readonly conversationId?: string; - readonly turnId?: string; - readonly parentSpanId?: string; - readonly attributes?: Attributes; - readonly links?: readonly SpanLink[]; - readonly body?: string; + readonly kind: "span-close"; + readonly spanId: string; + readonly name: string; + readonly timestamp: number; + readonly durationMs: number; + readonly status: SpanStatus; + readonly extensionId: string; + readonly conversationId?: string; + readonly turnId?: string; + readonly parentSpanId?: string; + readonly attributes?: Attributes; + readonly links?: readonly SpanLink[]; + readonly body?: string; } // --- LogSink --- @@ -179,13 +179,13 @@ export interface SpanCloseRecord { * a concrete implementation. Kernel never lets sink errors escape (D7). */ export interface LogSink { - readonly emit: (record: LogRecord) => void; + readonly emit: (record: LogRecord) => void; } // --- Deterministic helpers (injected for testability) --- /** Clock + id generator injected into the logger factory. */ export interface LogDeps { - readonly now: () => number; - readonly newId: () => string; + readonly now: () => number; + readonly newId: () => string; } diff --git a/packages/kernel/src/contracts/provider.ts b/packages/kernel/src/contracts/provider.ts index 52d853b..b6dc8ca 100644 --- a/packages/kernel/src/contracts/provider.ts +++ b/packages/kernel/src/contracts/provider.ts @@ -19,23 +19,23 @@ export type { ReasoningEffort, Usage } from "@dispatch/wire"; * Discriminated by `type`. */ export type ProviderEvent = - | TextDeltaEvent - | ReasoningDeltaEvent - | ProviderToolCallEvent - | UsageEvent - | FinishEvent - | ProviderErrorEvent; + | TextDeltaEvent + | ReasoningDeltaEvent + | ProviderToolCallEvent + | UsageEvent + | FinishEvent + | ProviderErrorEvent; /** Incremental text content from the model. */ export interface TextDeltaEvent { - readonly type: "text-delta"; - readonly delta: string; + readonly type: "text-delta"; + readonly delta: string; } /** Incremental reasoning / thinking content from the model. */ export interface ReasoningDeltaEvent { - readonly type: "reasoning-delta"; - readonly delta: string; + readonly type: "reasoning-delta"; + readonly delta: string; } /** @@ -43,16 +43,16 @@ export interface ReasoningDeltaEvent { * dispatch to the matching `ToolContract`. */ export interface ProviderToolCallEvent { - readonly type: "tool-call"; - readonly toolCallId: string; - readonly toolName: string; - readonly input: unknown; + readonly type: "tool-call"; + readonly toolCallId: string; + readonly toolName: string; + readonly input: unknown; } /** Token usage report, typically emitted at step end. */ export interface UsageEvent { - readonly type: "usage"; - readonly usage: Usage; + readonly type: "usage"; + readonly usage: Usage; } /** @@ -60,16 +60,16 @@ export interface UsageEvent { * generating (e.g. "stop", "tool-calls", "length", "content-filter"). */ export interface FinishEvent { - readonly type: "finish"; - readonly reason: string; + readonly type: "finish"; + readonly reason: string; } /** An error from the provider (network, rate-limit, model error, etc.). */ export interface ProviderErrorEvent { - readonly type: "error"; - readonly message: string; - readonly code?: string; - readonly retryable?: boolean; + readonly type: "error"; + readonly message: string; + readonly code?: string; + readonly retryable?: boolean; } /** @@ -77,30 +77,30 @@ export interface ProviderErrorEvent { * Kept minimal — providers may ignore fields they don't support. */ export interface ProviderStreamOptions { - /** Model identifier to use. */ - readonly model?: string; - /** Sampling temperature override. */ - readonly temperature?: number; - /** Maximum output tokens override. */ - readonly maxTokens?: number; - /** System prompt to prepend. */ - readonly systemPrompt?: string; - /** - * Reasoning-effort level for this request (already RESOLVED by the caller — - * the session-orchestrator applies the request → conversation → `"high"` - * default chain, so a provider receiving `undefined` may treat it as "no - * preference"). The provider maps the level to its native thinking knob in - * its own code; providers without such a knob ignore it. - */ - readonly reasoningEffort?: ReasoningEffort; - /** - * Correlated logger for this turn's step (Phase A logging ABI). When present, - * the provider should open a child `provider.request` span and capture the - * verbatim post-transform request + raw response/error there, self-redacting - * secrets in its own code. Optional so non-instrumented callers/tests still - * compile (the provider falls back to no capture). - */ - readonly logger?: Logger; + /** Model identifier to use. */ + readonly model?: string; + /** Sampling temperature override. */ + readonly temperature?: number; + /** Maximum output tokens override. */ + readonly maxTokens?: number; + /** System prompt to prepend. */ + readonly systemPrompt?: string; + /** + * Reasoning-effort level for this request (already RESOLVED by the caller — + * the session-orchestrator applies the request → conversation → `"high"` + * default chain, so a provider receiving `undefined` may treat it as "no + * preference"). The provider maps the level to its native thinking knob in + * its own code; providers without such a knob ignore it. + */ + readonly reasoningEffort?: ReasoningEffort; + /** + * Correlated logger for this turn's step (Phase A logging ABI). When present, + * the provider should open a child `provider.request` span and capture the + * verbatim post-transform request + raw response/error there, self-redacting + * secrets in its own code. Optional so non-instrumented callers/tests still + * compile (the provider falls back to no capture). + */ + readonly logger?: Logger; } /** @@ -110,10 +110,10 @@ export interface ProviderStreamOptions { * is the wire model identifier; `displayName` is an optional human label. */ export interface ModelInfo { - readonly id: string; - readonly displayName?: string; - /** The model's max context window in tokens (e.g. 200000). Optional — providers that don't report it leave it undefined. */ - readonly contextWindow?: number; + readonly id: string; + readonly displayName?: string; + /** The model's max context window in tokens (e.g. 200000). Optional — providers that don't report it leave it undefined. */ + readonly contextWindow?: number; } /** @@ -122,26 +122,26 @@ export interface ModelInfo { * concrete LLM API is behind it. */ export interface ProviderContract { - /** Unique identifier for this provider (e.g. "anthropic", "openai-compat"). */ - readonly id: string; + /** Unique identifier for this provider (e.g. "anthropic", "openai-compat"). */ + readonly id: string; - /** - * Stream a response for the given messages and available tools. - * The provider yields `ProviderEvent`s incrementally; the kernel drives - * tool dispatch and chunk assembly from them. - */ - readonly stream: ( - messages: readonly ChatMessage[], - tools: readonly ToolContract[], - opts?: ProviderStreamOptions, - ) => AsyncIterable<ProviderEvent>; + /** + * Stream a response for the given messages and available tools. + * The provider yields `ProviderEvent`s incrementally; the kernel drives + * tool dispatch and chunk assembly from them. + */ + readonly stream: ( + messages: readonly ChatMessage[], + tools: readonly ToolContract[], + opts?: ProviderStreamOptions, + ) => AsyncIterable<ProviderEvent>; - /** - * Enumerate the models this provider can serve, each in its own way (e.g. an - * OpenAI-compatible provider GETs `/v1/models`). Optional: a provider that - * cannot (or chooses not to) enumerate omits it, and a catalog simply lists - * none for it. A future multi-credential design may pass per-credential - * credentials in; today the provider uses the key it resolved at activate. - */ - readonly listModels?: () => Promise<readonly ModelInfo[]>; + /** + * Enumerate the models this provider can serve, each in its own way (e.g. an + * OpenAI-compatible provider GETs `/v1/models`). Optional: a provider that + * cannot (or chooses not to) enumerate omits it, and a catalog simply lists + * none for it. A future multi-credential design may pass per-credential + * credentials in; today the provider uses the key it resolved at activate. + */ + readonly listModels?: () => Promise<readonly ModelInfo[]>; } diff --git a/packages/kernel/src/contracts/runtime.ts b/packages/kernel/src/contracts/runtime.ts index dc74c84..71d2211 100644 --- a/packages/kernel/src/contracts/runtime.ts +++ b/packages/kernel/src/contracts/runtime.ts @@ -26,14 +26,14 @@ export type EventEmitter = (event: AgentEvent) => void; * passed through verbatim without losing autocomplete on the known values. */ export type FinishReason = - | "stop" - | "tool-calls" - | "length" - | "content-filter" - | "max-steps" - | "error" - | "aborted" - | (string & {}); + | "stop" + | "tool-calls" + | "length" + | "content-filter" + | "max-steps" + | "error" + | "aborted" + | (string & {}); /** * Input to `runTurn` — everything the kernel needs to execute one turn. @@ -41,121 +41,121 @@ export type FinishReason = * the kernel never reads config or resolves providers/tools itself. */ export interface RunTurnInput { - /** The resolved provider to stream from. */ - readonly provider: ProviderContract; - - /** The conversation history (including system prompt as first message). */ - readonly messages: readonly ChatMessage[]; - - /** The tool set available for this turn (may be empty). */ - readonly tools: readonly ToolContract[]; - - /** How to dispatch tool calls within each step. */ - readonly dispatch: ToolDispatchPolicy; - - /** The emitter the kernel calls for each outward event. */ - readonly emit: EventEmitter; - - /** - * Identifiers used to attribute every emitted `AgentEvent`. The kernel does - * not generate these — the session-orchestrator owns turn/conversation identity - * and passes them in, so events are traceable to their conversation. - */ - readonly conversationId: string; - readonly turnId: string; - - /** - * Optional per-turn provider options (model, temperature, maxTokens, - * systemPrompt). The orchestrator resolves these; the kernel forwards them - * verbatim to `provider.stream` and never interprets them. A provider may - * also be pre-configured at construction and ignore these. - */ - readonly providerOpts?: ProviderStreamOptions; - - /** Cancellation signal for the entire turn. */ - readonly signal?: AbortSignal; - - /** - * Working directory for this turn's tool execution. The kernel does NOT - * interpret it — it forwards the value verbatim to each `ToolExecuteContext.cwd` - * so tools resolve/contain paths against it. It never enters the model prompt, - * so it does not affect prompt caching. When omitted, tools fall back to their - * own configured/default workdir. - */ - readonly cwd?: string; - - /** - * The computer to execute this turn's tools on (SSH support). Omitted/undefined - * = LOCAL (today's behavior). When set, it is an SSH config alias; the kernel - * does NOT interpret it — it forwards the value verbatim to each - * `ToolExecuteContext.computerId`, exactly like `cwd`. It never enters the - * model prompt, so it does not affect prompt caching. Tools resolve their - * execution backend (local vs. remote) from this; see - * `notes/ssh-support-plan.md`. - */ - readonly computerId?: string; - - /** - * Optional logger for structured span instrumentation. The runtime opens - * turn/step/tool-call spans using this logger. If omitted, no spans are - * emitted (backward-compatible with callers that don't yet pass a logger). - */ - readonly logger?: Logger; - - /** - * Optional monotonic-ish clock (milliseconds) for emitting wall-clock timing - * on outward events: per-step `step-complete` (ttft/decode/genTotal), tool - * execution `durationMs` on `tool-result`, and turn `durationMs` on `done`. - * Injected (not ambient) so the runtime stays pure and deterministic in tests. - * If omitted, the runtime emits no such timing (the optional fields stay - * absent) — backward-compatible with callers that don't provide a clock. - */ - readonly now?: () => number; - - /** - * Optional. Called by the runtime at the tool-result boundary — after a - * step whose tool calls have all executed, before the next step begins — - * to drain messages to inject alongside the tool results. Whatever it - * returns is appended as user-role messages to the next step's input, so - * a caller can inject mid-turn guidance the model sees with the tool - * results. When omitted or returning an empty array, no injection happens - * (the runtime is unchanged). - * - * Injected (not ambient) so the kernel stays pure: it owns no queue and - * names no feature — it just calls the callback and appends what it gets. - * Only invoked when a step PRODUCED tool calls (the tool-result boundary); - * a step that ends without tool calls does not drain (the caller decides - * what to do with any pending messages after the turn ends). - */ - readonly drainSteering?: () => readonly ChatMessage[]; - - /** - * Optional. Called by the runtime after each step's messages are finalized - * (the assistant message + tool-result messages are built). The caller can - * use this to persist step messages incrementally — assigning seq numbers - * during generation so consumers can `GET /conversations/:id?sinceSeq=N` - * mid-turn. When omitted, the caller must persist all messages at turn end - * (via `RunTurnResult.messages`). The messages passed to this callback are - * the SAME objects in `RunTurnResult.messages` — the caller must NOT - * double-persist them. - */ - readonly onStepComplete?: (messages: readonly ChatMessage[]) => Promise<void> | void; - - /** - * Optional injected retry strategy for retryable provider errors (e.g. HTTP - * 429 / 5xx "overloaded"). When omitted, a retryable error ends the step - * exactly as before (backward-compatible). When provided, the runtime wraps - * `provider.stream()` consumption in a retry loop: on a retryable error - * (an emitted `error` ProviderEvent with `retryable === true`, OR a thrown - * error) — ONLY when no content was emitted yet this step (the safety - * invariant — never duplicate partial output) — it asks `retry.delayFor` - * for a delay, emits a transient `provider-retry` AgentEvent, sleeps via the - * injected `retry.sleep` (abortable), and re-calls `provider.stream()`. - * - * Injected (not ambient): the kernel imports no timer and owns no schedule. - * Mirrors the `now`/`logger` injection pattern — optional + backward-compatible. - */ - readonly retry?: RetryStrategy; + /** The resolved provider to stream from. */ + readonly provider: ProviderContract; + + /** The conversation history (including system prompt as first message). */ + readonly messages: readonly ChatMessage[]; + + /** The tool set available for this turn (may be empty). */ + readonly tools: readonly ToolContract[]; + + /** How to dispatch tool calls within each step. */ + readonly dispatch: ToolDispatchPolicy; + + /** The emitter the kernel calls for each outward event. */ + readonly emit: EventEmitter; + + /** + * Identifiers used to attribute every emitted `AgentEvent`. The kernel does + * not generate these — the session-orchestrator owns turn/conversation identity + * and passes them in, so events are traceable to their conversation. + */ + readonly conversationId: string; + readonly turnId: string; + + /** + * Optional per-turn provider options (model, temperature, maxTokens, + * systemPrompt). The orchestrator resolves these; the kernel forwards them + * verbatim to `provider.stream` and never interprets them. A provider may + * also be pre-configured at construction and ignore these. + */ + readonly providerOpts?: ProviderStreamOptions; + + /** Cancellation signal for the entire turn. */ + readonly signal?: AbortSignal; + + /** + * Working directory for this turn's tool execution. The kernel does NOT + * interpret it — it forwards the value verbatim to each `ToolExecuteContext.cwd` + * so tools resolve/contain paths against it. It never enters the model prompt, + * so it does not affect prompt caching. When omitted, tools fall back to their + * own configured/default workdir. + */ + readonly cwd?: string; + + /** + * The computer to execute this turn's tools on (SSH support). Omitted/undefined + * = LOCAL (today's behavior). When set, it is an SSH config alias; the kernel + * does NOT interpret it — it forwards the value verbatim to each + * `ToolExecuteContext.computerId`, exactly like `cwd`. It never enters the + * model prompt, so it does not affect prompt caching. Tools resolve their + * execution backend (local vs. remote) from this; see + * `notes/ssh-support-plan.md`. + */ + readonly computerId?: string; + + /** + * Optional logger for structured span instrumentation. The runtime opens + * turn/step/tool-call spans using this logger. If omitted, no spans are + * emitted (backward-compatible with callers that don't yet pass a logger). + */ + readonly logger?: Logger; + + /** + * Optional monotonic-ish clock (milliseconds) for emitting wall-clock timing + * on outward events: per-step `step-complete` (ttft/decode/genTotal), tool + * execution `durationMs` on `tool-result`, and turn `durationMs` on `done`. + * Injected (not ambient) so the runtime stays pure and deterministic in tests. + * If omitted, the runtime emits no such timing (the optional fields stay + * absent) — backward-compatible with callers that don't provide a clock. + */ + readonly now?: () => number; + + /** + * Optional. Called by the runtime at the tool-result boundary — after a + * step whose tool calls have all executed, before the next step begins — + * to drain messages to inject alongside the tool results. Whatever it + * returns is appended as user-role messages to the next step's input, so + * a caller can inject mid-turn guidance the model sees with the tool + * results. When omitted or returning an empty array, no injection happens + * (the runtime is unchanged). + * + * Injected (not ambient) so the kernel stays pure: it owns no queue and + * names no feature — it just calls the callback and appends what it gets. + * Only invoked when a step PRODUCED tool calls (the tool-result boundary); + * a step that ends without tool calls does not drain (the caller decides + * what to do with any pending messages after the turn ends). + */ + readonly drainSteering?: () => readonly ChatMessage[]; + + /** + * Optional. Called by the runtime after each step's messages are finalized + * (the assistant message + tool-result messages are built). The caller can + * use this to persist step messages incrementally — assigning seq numbers + * during generation so consumers can `GET /conversations/:id?sinceSeq=N` + * mid-turn. When omitted, the caller must persist all messages at turn end + * (via `RunTurnResult.messages`). The messages passed to this callback are + * the SAME objects in `RunTurnResult.messages` — the caller must NOT + * double-persist them. + */ + readonly onStepComplete?: (messages: readonly ChatMessage[]) => Promise<void> | void; + + /** + * Optional injected retry strategy for retryable provider errors (e.g. HTTP + * 429 / 5xx "overloaded"). When omitted, a retryable error ends the step + * exactly as before (backward-compatible). When provided, the runtime wraps + * `provider.stream()` consumption in a retry loop: on a retryable error + * (an emitted `error` ProviderEvent with `retryable === true`, OR a thrown + * error) — ONLY when no content was emitted yet this step (the safety + * invariant — never duplicate partial output) — it asks `retry.delayFor` + * for a delay, emits a transient `provider-retry` AgentEvent, sleeps via the + * injected `retry.sleep` (abortable), and re-calls `provider.stream()`. + * + * Injected (not ambient): the kernel imports no timer and owns no schedule. + * Mirrors the `now`/`logger` injection pattern — optional + backward-compatible. + */ + readonly retry?: RetryStrategy; } /** @@ -163,14 +163,14 @@ export interface RunTurnInput { * persist the new messages and report usage. */ export interface RunTurnResult { - /** The assistant messages produced by this turn (appended to history). */ - readonly messages: readonly ChatMessage[]; + /** The assistant messages produced by this turn (appended to history). */ + readonly messages: readonly ChatMessage[]; - /** Aggregated token usage across all steps in the turn. */ - readonly usage: Usage; + /** Aggregated token usage across all steps in the turn. */ + readonly usage: Usage; - /** Why the turn ended. */ - readonly finishReason: FinishReason; + /** Why the turn ended. */ + readonly finishReason: FinishReason; } /** @@ -187,16 +187,16 @@ export interface RunTurnResult { * the step exactly as before). */ export interface RetryStrategy { - /** - * Pure, deterministic decision: given the 0-based attempt index, return the - * delay in ms to sleep before the next retry, or `undefined` to stop (budget - * exhausted). No I/O, no clock — fully testable. - */ - readonly delayFor: (attempt: number) => number | undefined; - /** - * Injected effect: actually sleep for the given ms. Must honor the abort - * signal — reject when aborted so the turn seals `aborted`. The kernel - * imports no timer; the shell provides a `setTimeout`-based implementation. - */ - readonly sleep: (ms: number, signal: AbortSignal) => Promise<void>; + /** + * Pure, deterministic decision: given the 0-based attempt index, return the + * delay in ms to sleep before the next retry, or `undefined` to stop (budget + * exhausted). No I/O, no clock — fully testable. + */ + readonly delayFor: (attempt: number) => number | undefined; + /** + * Injected effect: actually sleep for the given ms. Must honor the abort + * signal — reject when aborted so the turn seals `aborted`. The kernel + * imports no timer; the shell provides a `setTimeout`-based implementation. + */ + readonly sleep: (ms: number, signal: AbortSignal) => Promise<void>; } diff --git a/packages/kernel/src/contracts/tool.ts b/packages/kernel/src/contracts/tool.ts index 589fbd0..897b86e 100644 --- a/packages/kernel/src/contracts/tool.ts +++ b/packages/kernel/src/contracts/tool.ts @@ -16,22 +16,22 @@ import type { Logger } from "./logging.js"; * Using a structural type (not a library) keeps the kernel dependency-free. */ export interface ToolParameterSchema { - readonly type: "object"; - readonly properties?: Readonly<Record<string, JsonSchemaProperty>>; - readonly required?: readonly string[]; - readonly additionalProperties?: boolean; - readonly description?: string; + readonly type: "object"; + readonly properties?: Readonly<Record<string, JsonSchemaProperty>>; + readonly required?: readonly string[]; + readonly additionalProperties?: boolean; + readonly description?: string; } /** A single property within a tool's parameter schema. */ export interface JsonSchemaProperty { - readonly type?: string; - readonly description?: string; - readonly enum?: readonly string[]; - readonly items?: JsonSchemaProperty; - readonly properties?: Readonly<Record<string, JsonSchemaProperty>>; - readonly required?: readonly string[]; - readonly default?: unknown; + readonly type?: string; + readonly description?: string; + readonly enum?: readonly string[]; + readonly items?: JsonSchemaProperty; + readonly properties?: Readonly<Record<string, JsonSchemaProperty>>; + readonly required?: readonly string[]; + readonly default?: unknown; } /** @@ -40,56 +40,56 @@ export interface JsonSchemaProperty { * concurrent tool output is never interleaved ambiguously. */ export interface ToolExecuteContext { - /** Unique id of the tool-call this execution serves. */ - readonly toolCallId: string; - - /** - * Stream output from the tool. The kernel attributes every call to the - * tool-call id, so concurrent shell output from different tools is - * correctly separated. - */ - readonly onOutput: (data: string, stream: "stdout" | "stderr") => void; - - /** - * Cancellation signal. An aborted turn sets this so in-flight tool work - * can clean up rather than leak. - */ - readonly signal: AbortSignal; - - /** - * Pre-bound Logger scoped to this tool-call span. Tools log correlated - * without a global (P3). The kernel stamps extensionId, conversationId, - * turnId, and spanId automatically. - */ - readonly log: Logger; - - /** - * Working directory for this turn, forwarded verbatim from `RunTurnInput.cwd`. - * Tools that touch the filesystem resolve and contain paths against it. - * Optional: when omitted, a tool falls back to its own configured/default - * workdir. The kernel never interprets it. - */ - readonly cwd?: string; - - /** - * The conversation this tool-call belongs to. Tools that maintain - * per-conversation state (e.g. a todo list) key on this. Forwarded - * verbatim from `RunTurnInput.conversationId`. Optional: when omitted, - * a tool has no conversation scope (e.g. a global tool). - */ - readonly conversationId?: string; - - /** - * The computer this tool-call executes on (SSH support). When - * omitted/undefined, execution is LOCAL (today's behavior — the tool uses - * the local node fs/child_process). When set, it is an SSH config alias - * (see `notes/ssh-support-plan.md` §3); a tool resolves a remote - * `ExecBackend` for it via its injected resolver. The kernel never - * interprets it — it forwards the value verbatim from - * `RunTurnInput.computerId`, exactly like `cwd`. It never enters the model - * prompt, so it does not affect prompt caching. - */ - readonly computerId?: string; + /** Unique id of the tool-call this execution serves. */ + readonly toolCallId: string; + + /** + * Stream output from the tool. The kernel attributes every call to the + * tool-call id, so concurrent shell output from different tools is + * correctly separated. + */ + readonly onOutput: (data: string, stream: "stdout" | "stderr") => void; + + /** + * Cancellation signal. An aborted turn sets this so in-flight tool work + * can clean up rather than leak. + */ + readonly signal: AbortSignal; + + /** + * Pre-bound Logger scoped to this tool-call span. Tools log correlated + * without a global (P3). The kernel stamps extensionId, conversationId, + * turnId, and spanId automatically. + */ + readonly log: Logger; + + /** + * Working directory for this turn, forwarded verbatim from `RunTurnInput.cwd`. + * Tools that touch the filesystem resolve and contain paths against it. + * Optional: when omitted, a tool falls back to its own configured/default + * workdir. The kernel never interprets it. + */ + readonly cwd?: string; + + /** + * The conversation this tool-call belongs to. Tools that maintain + * per-conversation state (e.g. a todo list) key on this. Forwarded + * verbatim from `RunTurnInput.conversationId`. Optional: when omitted, + * a tool has no conversation scope (e.g. a global tool). + */ + readonly conversationId?: string; + + /** + * The computer this tool-call executes on (SSH support). When + * omitted/undefined, execution is LOCAL (today's behavior — the tool uses + * the local node fs/child_process). When set, it is an SSH config alias + * (see `notes/ssh-support-plan.md` §3); a tool resolves a remote + * `ExecBackend` for it via its injected resolver. The kernel never + * interprets it — it forwards the value verbatim from + * `RunTurnInput.computerId`, exactly like `cwd`. It never enters the model + * prompt, so it does not affect prompt caching. + */ + readonly computerId?: string; } /** @@ -98,8 +98,8 @@ export interface ToolExecuteContext { * react without the kernel interpreting the content. */ export interface ToolResult { - readonly content: string; - readonly isError?: boolean; + readonly content: string; + readonly isError?: boolean; } /** @@ -108,9 +108,9 @@ export interface ToolResult { * to the matched tool's `execute`. */ export interface ToolCall { - readonly id: string; - readonly name: string; - readonly input: unknown; + readonly id: string; + readonly name: string; + readonly input: unknown; } /** @@ -119,26 +119,26 @@ export interface ToolCall { * concrete tools exist. */ export interface ToolContract { - /** Unique name the model uses to invoke this tool. */ - readonly name: string; - - /** Human-readable description shown to the model. */ - readonly description: string; - - /** JSON-Schema-ish parameter declaration (structural, no library dep). */ - readonly parameters: ToolParameterSchema; - - /** - * Execute the tool with parsed input. The kernel provides a per-call - * context (cancellation, output streaming, attribution). - */ - readonly execute: (args: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>; - - /** - * Whether this tool is safe to run concurrently with other tools. - * When `false`, the kernel serializes this tool's calls even when the - * dispatch policy allows parallelism. Defaults to `true` if omitted. - * This overrides the global setting downward only (never widens parallelism). - */ - readonly concurrencySafe?: boolean; + /** Unique name the model uses to invoke this tool. */ + readonly name: string; + + /** Human-readable description shown to the model. */ + readonly description: string; + + /** JSON-Schema-ish parameter declaration (structural, no library dep). */ + readonly parameters: ToolParameterSchema; + + /** + * Execute the tool with parsed input. The kernel provides a per-call + * context (cancellation, output streaming, attribution). + */ + readonly execute: (args: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>; + + /** + * Whether this tool is safe to run concurrently with other tools. + * When `false`, the kernel serializes this tool's calls even when the + * dispatch policy allows parallelism. Defaults to `true` if omitted. + * This overrides the global setting downward only (never widens parallelism). + */ + readonly concurrencySafe?: boolean; } diff --git a/packages/kernel/src/host/dag.test.ts b/packages/kernel/src/host/dag.test.ts index 1a0431f..352965c 100644 --- a/packages/kernel/src/host/dag.test.ts +++ b/packages/kernel/src/host/dag.test.ts @@ -3,105 +3,105 @@ import type { Manifest } from "../contracts/extension.js"; import { resolveActivationOrder } from "./dag.js"; function manifest(id: string, deps?: readonly string[]): Manifest { - const base: Manifest = { - id, - name: id, - version: "1.0.0", - apiVersion: "^0.1.0", - trust: "bundled", - }; - if (deps !== undefined) { - return { ...base, dependsOn: deps }; - } - return base; + const base: Manifest = { + id, + name: id, + version: "1.0.0", + apiVersion: "^0.1.0", + trust: "bundled", + }; + if (deps !== undefined) { + return { ...base, dependsOn: deps }; + } + return base; } describe("resolveActivationOrder", () => { - it("returns empty array for no extensions", () => { - expect(resolveActivationOrder([])).toEqual([]); - }); - - it("returns a single extension with no deps", () => { - const result = resolveActivationOrder([manifest("a")]); - expect(result.map((m) => m.id)).toEqual(["a"]); - }); - - it("orders a linear chain (A → B → C)", () => { - const a = manifest("a"); - const b = manifest("b", ["a"]); - const c = manifest("c", ["b"]); - - const result = resolveActivationOrder([c, b, a]); - const ids = result.map((m) => m.id); - - expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); - expect(ids.indexOf("b")).toBeLessThan(ids.indexOf("c")); - }); - - it("orders a diamond (A → B, A → C, B → D, C → D)", () => { - const a = manifest("a"); - const b = manifest("b", ["a"]); - const c = manifest("c", ["a"]); - const d = manifest("d", ["b", "c"]); - - const result = resolveActivationOrder([d, c, b, a]); - const ids = result.map((m) => m.id); - - expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); - expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("c")); - expect(ids.indexOf("b")).toBeLessThan(ids.indexOf("d")); - expect(ids.indexOf("c")).toBeLessThan(ids.indexOf("d")); - }); - - it("handles independent sets (no deps between them)", () => { - const a = manifest("a"); - const b = manifest("b"); - const c = manifest("c"); - - const result = resolveActivationOrder([a, b, c]); - expect(result).toHaveLength(3); - expect(result.map((m) => m.id).sort()).toEqual(["a", "b", "c"]); - }); - - it("throws on a cycle (A → B → A)", () => { - const a = manifest("a", ["b"]); - const b = manifest("b", ["a"]); - - expect(() => resolveActivationOrder([a, b])).toThrow(/cycle/i); - }); - - it("throws on a larger cycle (A → B → C → A)", () => { - const a = manifest("a", ["c"]); - const b = manifest("b", ["a"]); - const c = manifest("c", ["b"]); - - expect(() => resolveActivationOrder([a, b, c])).toThrow(/cycle/i); - }); - - it("throws on a missing dependency", () => { - const a = manifest("a", ["nonexistent"]); - - expect(() => resolveActivationOrder([a])).toThrow(/not available/); - }); - - it("throws on duplicate extension ids", () => { - const a1 = manifest("a"); - const a2 = manifest("a"); - - expect(() => resolveActivationOrder([a1, a2])).toThrow(/duplicate/i); - }); - - it("handles mixed independent and dependent extensions", () => { - const a = manifest("a"); - const b = manifest("b", ["a"]); - const c = manifest("c"); - const d = manifest("d", ["c"]); - - const result = resolveActivationOrder([a, b, c, d]); - const ids = result.map((m) => m.id); - - expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); - expect(ids.indexOf("c")).toBeLessThan(ids.indexOf("d")); - expect(result).toHaveLength(4); - }); + it("returns empty array for no extensions", () => { + expect(resolveActivationOrder([])).toEqual([]); + }); + + it("returns a single extension with no deps", () => { + const result = resolveActivationOrder([manifest("a")]); + expect(result.map((m) => m.id)).toEqual(["a"]); + }); + + it("orders a linear chain (A → B → C)", () => { + const a = manifest("a"); + const b = manifest("b", ["a"]); + const c = manifest("c", ["b"]); + + const result = resolveActivationOrder([c, b, a]); + const ids = result.map((m) => m.id); + + expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); + expect(ids.indexOf("b")).toBeLessThan(ids.indexOf("c")); + }); + + it("orders a diamond (A → B, A → C, B → D, C → D)", () => { + const a = manifest("a"); + const b = manifest("b", ["a"]); + const c = manifest("c", ["a"]); + const d = manifest("d", ["b", "c"]); + + const result = resolveActivationOrder([d, c, b, a]); + const ids = result.map((m) => m.id); + + expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); + expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("c")); + expect(ids.indexOf("b")).toBeLessThan(ids.indexOf("d")); + expect(ids.indexOf("c")).toBeLessThan(ids.indexOf("d")); + }); + + it("handles independent sets (no deps between them)", () => { + const a = manifest("a"); + const b = manifest("b"); + const c = manifest("c"); + + const result = resolveActivationOrder([a, b, c]); + expect(result).toHaveLength(3); + expect(result.map((m) => m.id).sort()).toEqual(["a", "b", "c"]); + }); + + it("throws on a cycle (A → B → A)", () => { + const a = manifest("a", ["b"]); + const b = manifest("b", ["a"]); + + expect(() => resolveActivationOrder([a, b])).toThrow(/cycle/i); + }); + + it("throws on a larger cycle (A → B → C → A)", () => { + const a = manifest("a", ["c"]); + const b = manifest("b", ["a"]); + const c = manifest("c", ["b"]); + + expect(() => resolveActivationOrder([a, b, c])).toThrow(/cycle/i); + }); + + it("throws on a missing dependency", () => { + const a = manifest("a", ["nonexistent"]); + + expect(() => resolveActivationOrder([a])).toThrow(/not available/); + }); + + it("throws on duplicate extension ids", () => { + const a1 = manifest("a"); + const a2 = manifest("a"); + + expect(() => resolveActivationOrder([a1, a2])).toThrow(/duplicate/i); + }); + + it("handles mixed independent and dependent extensions", () => { + const a = manifest("a"); + const b = manifest("b", ["a"]); + const c = manifest("c"); + const d = manifest("d", ["c"]); + + const result = resolveActivationOrder([a, b, c, d]); + const ids = result.map((m) => m.id); + + expect(ids.indexOf("a")).toBeLessThan(ids.indexOf("b")); + expect(ids.indexOf("c")).toBeLessThan(ids.indexOf("d")); + expect(result).toHaveLength(4); + }); }); diff --git a/packages/kernel/src/host/dag.ts b/packages/kernel/src/host/dag.ts index 5cde2f5..1ce35e6 100644 --- a/packages/kernel/src/host/dag.ts +++ b/packages/kernel/src/host/dag.ts @@ -1,64 +1,64 @@ import type { Manifest } from "../contracts/extension.js"; export function resolveActivationOrder(manifests: readonly Manifest[]): Manifest[] { - const byId = new Map<string, Manifest>(); - for (const m of manifests) { - if (byId.has(m.id)) { - throw new Error(`Duplicate extension id: "${m.id}"`); - } - byId.set(m.id, m); - } + const byId = new Map<string, Manifest>(); + for (const m of manifests) { + if (byId.has(m.id)) { + throw new Error(`Duplicate extension id: "${m.id}"`); + } + byId.set(m.id, m); + } - for (const m of manifests) { - for (const dep of m.dependsOn ?? []) { - if (!byId.has(dep)) { - throw new Error(`Extension "${m.id}" depends on "${dep}", which is not available.`); - } - } - } + for (const m of manifests) { + for (const dep of m.dependsOn ?? []) { + if (!byId.has(dep)) { + throw new Error(`Extension "${m.id}" depends on "${dep}", which is not available.`); + } + } + } - const inDegree = new Map<string, number>(); - const dependents = new Map<string, string[]>(); + const inDegree = new Map<string, number>(); + const dependents = new Map<string, string[]>(); - for (const m of manifests) { - inDegree.set(m.id, 0); - dependents.set(m.id, []); - } + for (const m of manifests) { + inDegree.set(m.id, 0); + dependents.set(m.id, []); + } - for (const m of manifests) { - for (const dep of m.dependsOn ?? []) { - const list = dependents.get(dep); - if (list !== undefined) list.push(m.id); - inDegree.set(m.id, (inDegree.get(m.id) ?? 0) + 1); - } - } + for (const m of manifests) { + for (const dep of m.dependsOn ?? []) { + const list = dependents.get(dep); + if (list !== undefined) list.push(m.id); + inDegree.set(m.id, (inDegree.get(m.id) ?? 0) + 1); + } + } - const queue: string[] = []; - for (const [id, deg] of inDegree) { - if (deg === 0) queue.push(id); - } + const queue: string[] = []; + for (const [id, deg] of inDegree) { + if (deg === 0) queue.push(id); + } - const result: Manifest[] = []; - let idx = 0; - while (idx < queue.length) { - const id = queue[idx]; - if (id === undefined) break; - idx++; - const m = byId.get(id); - if (m === undefined) continue; - result.push(m); - for (const dep of dependents.get(id) ?? []) { - const newDeg = (inDegree.get(dep) ?? 1) - 1; - inDegree.set(dep, newDeg); - if (newDeg === 0) queue.push(dep); - } - } + const result: Manifest[] = []; + let idx = 0; + while (idx < queue.length) { + const id = queue[idx]; + if (id === undefined) break; + idx++; + const m = byId.get(id); + if (m === undefined) continue; + result.push(m); + for (const dep of dependents.get(id) ?? []) { + const newDeg = (inDegree.get(dep) ?? 1) - 1; + inDegree.set(dep, newDeg); + if (newDeg === 0) queue.push(dep); + } + } - if (result.length !== manifests.length) { - const remaining = manifests.filter((m) => !result.some((r) => r.id === m.id)); - const ids = remaining.map((m) => m.id).join(", "); - throw new Error(`Dependency cycle detected among extensions: ${ids}`); - } + if (result.length !== manifests.length) { + const remaining = manifests.filter((m) => !result.some((r) => r.id === m.id)); + const ids = remaining.map((m) => m.id).join(", "); + throw new Error(`Dependency cycle detected among extensions: ${ids}`); + } - return result; + return result; } diff --git a/packages/kernel/src/host/host.test.ts b/packages/kernel/src/host/host.test.ts index 0067091..88e2836 100644 --- a/packages/kernel/src/host/host.test.ts +++ b/packages/kernel/src/host/host.test.ts @@ -2,1276 +2,1276 @@ import { beforeEach, describe, expect, it } from "vitest"; import { createBus } from "../bus/bus.js"; import type { AuthContract } from "../contracts/auth.js"; import type { - ConfigAccess, - EventsEmitter, - Extension, - HostAPI, - Manifest, - ManifestContributions, - PermissionDecision, - PermissionGate, - PermissionRequest, - ScheduledJob, - SecretsAccess, - StorageNamespace, + ConfigAccess, + EventsEmitter, + Extension, + HostAPI, + Manifest, + ManifestContributions, + PermissionDecision, + PermissionGate, + PermissionRequest, + ScheduledJob, + SecretsAccess, + StorageNamespace, } from "../contracts/extension.js"; import { defineEventHook, defineFilter, defineService } from "../contracts/hooks.js"; import type { - Attributes, - ErrorAttributes, - LogDeps, - Logger, - LogRecord, - LogSink, + Attributes, + ErrorAttributes, + LogDeps, + Logger, + LogRecord, + LogSink, } from "../contracts/logging.js"; import type { ProviderContract } from "../contracts/provider.js"; import type { ToolContract } from "../contracts/tool.js"; import { createHost, type HostDeps, KERNEL_API_VERSION } from "./host.js"; interface FakeLogger extends Logger { - readonly logs: Array<{ level: string; message: string; attrs?: Attributes | ErrorAttributes }>; + readonly logs: Array<{ level: string; message: string; attrs?: Attributes | ErrorAttributes }>; } function createFakeLogger(): FakeLogger { - const logs: Array<{ level: string; message: string; attrs?: Attributes | ErrorAttributes }> = []; - return { - logs, - debug: (message: string, attrs?: Attributes) => { - if (attrs !== undefined) { - logs.push({ level: "debug", message, attrs }); - } else { - logs.push({ level: "debug", message }); - } - }, - info: (message: string, attrs?: Attributes) => { - if (attrs !== undefined) { - logs.push({ level: "info", message, attrs }); - } else { - logs.push({ level: "info", message }); - } - }, - warn: (message: string, attrs?: Attributes) => { - if (attrs !== undefined) { - logs.push({ level: "warn", message, attrs }); - } else { - logs.push({ level: "warn", message }); - } - }, - error: (message: string, attrs?: ErrorAttributes) => { - if (attrs !== undefined) { - logs.push({ level: "error", message, attrs }); - } else { - logs.push({ level: "error", message }); - } - }, - child( - _ctx: Partial<import("../contracts/logging.js").LogContext> & { readonly attrs?: Attributes }, - ): Logger { - return createFakeLogger(); - }, - span(_name: string, _attrs?: Attributes): import("../contracts/logging.js").Span { - return { - id: "fake-span", - log: createFakeLogger(), - setAttributes() {}, - addLink() {}, - child() { - return this; - }, - end() {}, - }; - }, - }; + const logs: Array<{ level: string; message: string; attrs?: Attributes | ErrorAttributes }> = []; + return { + logs, + debug: (message: string, attrs?: Attributes) => { + if (attrs !== undefined) { + logs.push({ level: "debug", message, attrs }); + } else { + logs.push({ level: "debug", message }); + } + }, + info: (message: string, attrs?: Attributes) => { + if (attrs !== undefined) { + logs.push({ level: "info", message, attrs }); + } else { + logs.push({ level: "info", message }); + } + }, + warn: (message: string, attrs?: Attributes) => { + if (attrs !== undefined) { + logs.push({ level: "warn", message, attrs }); + } else { + logs.push({ level: "warn", message }); + } + }, + error: (message: string, attrs?: ErrorAttributes) => { + if (attrs !== undefined) { + logs.push({ level: "error", message, attrs }); + } else { + logs.push({ level: "error", message }); + } + }, + child( + _ctx: Partial<import("../contracts/logging.js").LogContext> & { readonly attrs?: Attributes }, + ): Logger { + return createFakeLogger(); + }, + span(_name: string, _attrs?: Attributes): import("../contracts/logging.js").Span { + return { + id: "fake-span", + log: createFakeLogger(), + setAttributes() {}, + addLink() {}, + child() { + return this; + }, + end() {}, + }; + }, + }; } function createFakeLogSink(): LogSink & { readonly records: LogRecord[] } { - const records: LogRecord[] = []; - return { - records, - emit: (record: LogRecord) => { - records.push(record); - }, - }; + const records: LogRecord[] = []; + return { + records, + emit: (record: LogRecord) => { + records.push(record); + }, + }; } function createFakeLogDeps(): LogDeps { - let idCounter = 0; - return { - now: () => 1000 + idCounter * 100, - newId: () => `span-${++idCounter}`, - }; + let idCounter = 0; + return { + now: () => 1000 + idCounter * 100, + newId: () => `span-${++idCounter}`, + }; } function createFakeConfig(): ConfigAccess { - return { - get: () => undefined, - getAll: () => ({}), - }; + return { + get: () => undefined, + getAll: () => ({}), + }; } function createFakeStorageFactory(): (ns: string) => StorageNamespace { - const stores = new Map<string, Map<string, string>>(); - return (ns: string) => { - let store = stores.get(ns); - if (!store) { - store = new Map(); - stores.set(ns, store); - } - const s = store; - return { - get: async (key: string) => s.get(key) ?? null, - set: async (key: string, value: string) => { - s.set(key, value); - }, - delete: async (key: string) => { - s.delete(key); - }, - has: async (key: string) => s.has(key), - keys: async () => [...s.keys()], - }; - }; + const stores = new Map<string, Map<string, string>>(); + return (ns: string) => { + let store = stores.get(ns); + if (!store) { + store = new Map(); + stores.set(ns, store); + } + const s = store; + return { + get: async (key: string) => s.get(key) ?? null, + set: async (key: string, value: string) => { + s.set(key, value); + }, + delete: async (key: string) => { + s.delete(key); + }, + has: async (key: string) => s.has(key), + keys: async () => [...s.keys()], + }; + }; } function createFakeSecrets(): SecretsAccess { - const store = new Map<string, string>(); - return { - get: async (key: string) => store.get(key) ?? null, - set: async (key: string, value: string) => { - store.set(key, value); - }, - delete: async (key: string) => { - store.delete(key); - }, - }; + const store = new Map<string, string>(); + return { + get: async (key: string) => store.get(key) ?? null, + set: async (key: string, value: string) => { + store.set(key, value); + }, + delete: async (key: string) => { + store.delete(key); + }, + }; } function createFakePermissions(): PermissionGate { - return { - check: async (_request: PermissionRequest): Promise<PermissionDecision> => ({ - allowed: true, - }), - }; + return { + check: async (_request: PermissionRequest): Promise<PermissionDecision> => ({ + allowed: true, + }), + }; } function createFakeScheduler(): { - readonly register: (job: ScheduledJob) => void; - readonly jobs: ScheduledJob[]; + readonly register: (job: ScheduledJob) => void; + readonly jobs: ScheduledJob[]; } { - const jobs: ScheduledJob[] = []; - return { - register: (job: ScheduledJob) => { - jobs.push(job); - }, - jobs, - }; + const jobs: ScheduledJob[] = []; + return { + register: (job: ScheduledJob) => { + jobs.push(job); + }, + jobs, + }; } function createFakeEvents(): EventsEmitter & { readonly emitted: unknown[] } { - const emitted: unknown[] = []; - return { - emitted, - emit: (event) => { - emitted.push(event); - }, - }; + const emitted: unknown[] = []; + return { + emitted, + emit: (event) => { + emitted.push(event); + }, + }; } function createExtension( - id: string, - opts: { - readonly dependsOn?: readonly string[]; - readonly apiVersion?: string; - readonly activate?: (host: HostAPI) => void | Promise<void>; - readonly deactivate?: () => void | Promise<void>; - readonly contributes?: ManifestContributions; - } = {}, + id: string, + opts: { + readonly dependsOn?: readonly string[]; + readonly apiVersion?: string; + readonly activate?: (host: HostAPI) => void | Promise<void>; + readonly deactivate?: () => void | Promise<void>; + readonly contributes?: ManifestContributions; + } = {}, ): Extension { - const base: Manifest = { - id, - name: id, - version: "1.0.0", - apiVersion: opts.apiVersion ?? `^${KERNEL_API_VERSION}`, - trust: "bundled", - }; - const manifest: Manifest = - opts.dependsOn !== undefined - ? { ...base, dependsOn: opts.dependsOn } - : opts.contributes !== undefined - ? { ...base, contributes: opts.contributes } - : base; - const ext: Extension = { - manifest, - activate: opts.activate ?? (() => {}), - }; - if (opts.deactivate !== undefined) { - return { ...ext, deactivate: opts.deactivate }; - } - return ext; + const base: Manifest = { + id, + name: id, + version: "1.0.0", + apiVersion: opts.apiVersion ?? `^${KERNEL_API_VERSION}`, + trust: "bundled", + }; + const manifest: Manifest = + opts.dependsOn !== undefined + ? { ...base, dependsOn: opts.dependsOn } + : opts.contributes !== undefined + ? { ...base, contributes: opts.contributes } + : base; + const ext: Extension = { + manifest, + activate: opts.activate ?? (() => {}), + }; + if (opts.deactivate !== undefined) { + return { ...ext, deactivate: opts.deactivate }; + } + return ext; } function createFakeTool(name: string): ToolContract { - return { - name, - description: `Tool ${name}`, - parameters: { type: "object" }, - execute: async () => ({ content: "ok" }), - }; + return { + name, + description: `Tool ${name}`, + parameters: { type: "object" }, + execute: async () => ({ content: "ok" }), + }; } function createFakeProvider(id: string): ProviderContract { - return { - id, - stream: async function* () {}, - }; + return { + id, + stream: async function* () {}, + }; } function createFakeAuth(id: string): AuthContract { - return { - id, - resolve: async () => null, - }; + return { + id, + resolve: async () => null, + }; } describe("createHost", () => { - let logger: FakeLogger; - let logSink: ReturnType<typeof createFakeLogSink>; - let logDeps: LogDeps; - let deps: HostDeps; - let scheduler: ReturnType<typeof createFakeScheduler>; - let events: ReturnType<typeof createFakeEvents>; - - beforeEach(() => { - logger = createFakeLogger(); - logSink = createFakeLogSink(); - logDeps = createFakeLogDeps(); - scheduler = createFakeScheduler(); - events = createFakeEvents(); - deps = { - logger, - config: createFakeConfig(), - storageFactory: createFakeStorageFactory(), - secrets: createFakeSecrets(), - permissions: createFakePermissions(), - scheduler, - bus: createBus(logger), - events, - logSink, - logDeps, - }; - }); - - describe("activation order", () => { - it("activates extensions in topological order", async () => { - const order: string[] = []; - - const a = createExtension("a", { - activate: () => { - order.push("a"); - }, - }); - const b = createExtension("b", { - dependsOn: ["a"], - activate: () => { - order.push("b"); - }, - }); - const c = createExtension("c", { - dependsOn: ["b"], - activate: () => { - order.push("c"); - }, - }); - - const host = createHost([c, b, a], deps); - await host.activate(); - - expect(order).toEqual(["a", "b", "c"]); - }); - - it("activates independent extensions", async () => { - const order: string[] = []; - - const a = createExtension("a", { - activate: () => { - order.push("a"); - }, - }); - const b = createExtension("b", { - activate: () => { - order.push("b"); - }, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - expect(order).toHaveLength(2); - expect(order).toContain("a"); - expect(order).toContain("b"); - }); - }); - - describe("fault isolation", () => { - it("a throwing extension is isolated — others still activate", async () => { - const order: string[] = []; - - const a = createExtension("a", { - activate: () => { - order.push("a"); - }, - }); - const b = createExtension("b", { - activate: () => { - throw new Error("boom"); - }, - }); - const c = createExtension("c", { - activate: () => { - order.push("c"); - }, - }); - - const host = createHost([a, b, c], deps); - await host.activate(); - - expect(order).toEqual(["a", "c"]); - expect(host.getDisabled()).toHaveLength(1); - expect(host.getDisabled()[0]?.manifest.id).toBe("b"); - expect(host.getDisabled()[0]?.reason).toContain("boom"); - }); - - it("an async-rejecting extension is isolated", async () => { - const a = createExtension("a", { - activate: async () => { - throw new Error("async fail"); - }, - }); - const b = createExtension("b", { - activate: () => {}, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - expect(host.getDisabled()).toHaveLength(1); - expect(host.getDisabled()[0]?.manifest.id).toBe("a"); - }); - }); - - describe("apiVersion compatibility", () => { - it("activates compatible extensions", async () => { - const ext = createExtension("good", { - apiVersion: `^${KERNEL_API_VERSION}`, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getDisabled()).toHaveLength(0); - }); - - it("disables incompatible extensions without crashing", async () => { - const ext = createExtension("bad", { - apiVersion: "^99.0.0", - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getDisabled()).toHaveLength(1); - expect(host.getDisabled()[0]?.manifest.id).toBe("bad"); - expect(host.getDisabled()[0]?.reason).toContain("incompatible"); - }); - - it("logs a warning for disabled extensions", async () => { - const ext = createExtension("bad", { - apiVersion: "^99.0.0", - }); - - const host = createHost([ext], deps); - await host.activate(); - - const warnings = logger.logs.filter((l) => l.level === "warn"); - expect(warnings).toHaveLength(1); - expect(warnings[0]?.message).toContain("bad"); - }); - }); - - describe("registries", () => { - it("defineTool populates the tool registry", async () => { - const tool = createFakeTool("read-file"); - const ext = createExtension("tools-fs", { - activate: (host) => { - host.defineTool(tool); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getTools().size).toBe(1); - expect(host.getTool("read-file")).toBe(tool); - }); - - it("defineProvider populates the provider registry", async () => { - const provider = createFakeProvider("anthropic"); - const ext = createExtension("provider-anthropic", { - activate: (host) => { - host.defineProvider(provider); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getProviders().size).toBe(1); - expect(host.getProvider("anthropic")).toBe(provider); - }); - - it("defineAuth populates the auth registry", async () => { - const auth = createFakeAuth("apikey"); - const ext = createExtension("auth-apikey", { - activate: (host) => { - host.defineAuth(auth); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getAuthProviders().size).toBe(1); - expect(host.getAuthProvider("apikey")).toBe(auth); - }); - - it("getService returns what an extension provided via provideService", async () => { - const handle = defineService<{ value: number }>("test/svc"); - const ext = createExtension("svc-provider", { - activate: (host) => { - host.provideService(handle, { value: 42 }); - }, - }); - const consumer = createExtension("svc-consumer", { - dependsOn: ["svc-provider"], - activate: (host) => { - const svc = host.getService(handle); - expect(svc.value).toBe(42); - }, - }); - - const host = createHost([ext, consumer], deps); - await host.activate(); - - expect(host.getDisabled()).toHaveLength(0); - }); - - it("multiple extensions contribute to the same registry", async () => { - const ext1 = createExtension("tools-a", { - activate: (host) => { - host.defineTool(createFakeTool("tool-a")); - }, - }); - const ext2 = createExtension("tools-b", { - activate: (host) => { - host.defineTool(createFakeTool("tool-b")); - }, - }); - - const host = createHost([ext1, ext2], deps); - await host.activate(); - - expect(host.getTools().size).toBe(2); - expect(host.getTool("tool-a")).toBeDefined(); - expect(host.getTool("tool-b")).toBeDefined(); - }); - }); - - describe("scheduler", () => { - it("collects scheduled jobs and forwards to sink", async () => { - const job: ScheduledJob = { - id: "cache-warm", - cron: "*/5 * * * *", - execute: () => {}, - }; - const ext = createExtension("scheduler-ext", { - activate: (host) => { - host.scheduler.register(job); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getScheduledJobs()).toHaveLength(1); - expect(host.getScheduledJobs()[0]).toBe(job); - expect(scheduler.jobs).toHaveLength(1); - expect(scheduler.jobs[0]).toBe(job); - }); - }); - - describe("migrations", () => { - it("collects migrations from manifests", async () => { - const ext = createExtension("store-ext", { - contributes: { migrations: ["001-init", "002-add-index"] }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(host.getMigrations()).toEqual(["001-init", "002-add-index"]); - }); - }); - - describe("deactivation", () => { - it("deactivates in reverse activation order", async () => { - const order: string[] = []; - - const a = createExtension("a", { - activate: () => { - order.push("activate-a"); - }, - deactivate: () => { - order.push("deactivate-a"); - }, - }); - const b = createExtension("b", { - activate: () => { - order.push("activate-b"); - }, - deactivate: () => { - order.push("deactivate-b"); - }, - }); - const c = createExtension("c", { - activate: () => { - order.push("activate-c"); - }, - deactivate: () => { - order.push("deactivate-c"); - }, - }); - - const host = createHost([a, b, c], deps); - await host.activate(); - await host.deactivate(); - - expect(order).toEqual([ - "activate-a", - "activate-b", - "activate-c", - "deactivate-c", - "deactivate-b", - "deactivate-a", - ]); - }); - - it("a failing deactivate does not prevent others", async () => { - const order: string[] = []; - - const a = createExtension("a", { - activate: () => {}, - deactivate: () => { - order.push("deactivate-a"); - }, - }); - const b = createExtension("b", { - activate: () => {}, - deactivate: () => { - throw new Error("deactivate boom"); - }, - }); - const c = createExtension("c", { - activate: () => {}, - deactivate: () => { - order.push("deactivate-c"); - }, - }); - - const host = createHost([a, b, c], deps); - await host.activate(); - await host.deactivate(); - - expect(order).toEqual(["deactivate-c", "deactivate-a"]); - const errors = logger.logs.filter((l) => l.level === "error"); - expect(errors.some((e) => e.message.includes("deactivate"))).toBe(true); - expect(errors.some((e) => (e.attrs as { err?: unknown })?.err instanceof Error)).toBe(true); - }); - }); - - describe("HostAPI delegation", () => { - it("on/addFilter delegate to the bus", async () => { - const hook = defineEventHook<string>("test/host-event"); - const received: string[] = []; - - const ext = createExtension("hook-ext", { - activate: (host) => { - host.on(hook, (payload) => { - received.push(payload); - }); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - deps.bus.emit(hook, "hello"); - expect(received).toEqual(["hello"]); - }); - - it("emit dispatches to handlers registered via on", async () => { - const hook = defineEventHook<string>("test/emit-dispatch"); - const received: string[] = []; - - const ext = createExtension("emit-ext", { - activate: (host) => { - host.on(hook, (payload) => { - received.push(payload); - }); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - api.emit(hook, "world"); - expect(received).toEqual(["world"]); - }); - - it("emit isolates a throwing handler (does not propagate)", async () => { - const hook = defineEventHook<string>("test/emit-isolation"); - const received: string[] = []; - - const ext = createExtension("emit-isolation-ext", { - activate: (host) => { - host.on(hook, () => { - throw new Error("handler boom"); - }); - host.on(hook, (payload) => { - received.push(payload); - }); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - expect(() => api.emit(hook, "safe")).not.toThrow(); - expect(received).toEqual(["safe"]); - }); - - it("applyFilters threads a value through registered filters in order", async () => { - const hook = defineFilter<string>("test/text-transform"); - - const ext = createExtension("filter-ext", { - activate: (host) => { - host.addFilter(hook, (value) => `${value}-first`); - host.addFilter(hook, (value) => `${value}-second`); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - const result = await api.applyFilters(hook, "start"); - expect(result).toBe("start-first-second"); - }); - - it("applyFilters returns the input unchanged when no filters are registered", async () => { - const hook = defineFilter<string>("test/unused-filter"); - - const ext = createExtension("no-filter-ext", { - activate: () => {}, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - const result = await api.applyFilters(hook, "unchanged"); - expect(result).toBe("unchanged"); - }); - - it("storage delegates to the factory", async () => { - let storageResult: StorageNamespace | undefined; - - const ext = createExtension("storage-ext", { - activate: (host) => { - storageResult = host.storage("my-ns"); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(storageResult).toBeDefined(); - await storageResult?.set("key", "value"); - expect(await storageResult?.get("key")).toBe("value"); - }); - - it("events delegates to the emitter", async () => { - const ext = createExtension("event-ext", { - activate: (host) => { - host.events.emit({ type: "custom", data: 42 }); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - expect(events.emitted).toHaveLength(1); - expect(events.emitted[0]).toEqual({ type: "custom", data: 42 }); - }); - }); - - describe("HostAPI registry access", () => { - it("getTools returns registered tools via HostAPI", async () => { - const tool = createFakeTool("read-file"); - let capturedTools: ReadonlyMap<string, ToolContract> | undefined; - - const producer = createExtension("tools-fs", { - activate: (host) => { - host.defineTool(tool); - }, - }); - const consumer = createExtension("consumer", { - dependsOn: ["tools-fs"], - activate: (host) => { - capturedTools = host.getTools(); - }, - }); - - const host = createHost([producer, consumer], deps); - await host.activate(); - - expect(capturedTools).toBeDefined(); - expect(capturedTools?.size).toBe(1); - expect(capturedTools?.get("read-file")).toBe(tool); - }); - - it("getProviders returns registered providers via HostAPI", async () => { - const provider = createFakeProvider("anthropic"); - let capturedProviders: ReadonlyMap<string, ProviderContract> | undefined; - - const producer = createExtension("provider-anthropic", { - activate: (host) => { - host.defineProvider(provider); - }, - }); - const consumer = createExtension("consumer", { - dependsOn: ["provider-anthropic"], - activate: (host) => { - capturedProviders = host.getProviders(); - }, - }); - - const host = createHost([producer, consumer], deps); - await host.activate(); - - expect(capturedProviders).toBeDefined(); - expect(capturedProviders?.size).toBe(1); - expect(capturedProviders?.get("anthropic")).toBe(provider); - }); - - it("getAuthProviders/getAuthProvider returns registered auth via HostAPI", async () => { - const auth = createFakeAuth("apikey"); - let capturedAuth: ReadonlyMap<string, AuthContract> | undefined; - let capturedSingle: AuthContract | undefined; - - const producer = createExtension("auth-apikey", { - activate: (host) => { - host.defineAuth(auth); - }, - }); - const consumer = createExtension("consumer", { - dependsOn: ["auth-apikey"], - activate: (host) => { - capturedAuth = host.getAuthProviders(); - capturedSingle = host.getAuthProvider("apikey"); - }, - }); - - const host = createHost([producer, consumer], deps); - await host.activate(); - - expect(capturedAuth).toBeDefined(); - expect(capturedAuth?.size).toBe(1); - expect(capturedAuth?.get("apikey")).toBe(auth); - expect(capturedSingle).toBe(auth); - }); - }); - - describe("getExtensions", () => { - it("returns empty array when no extensions are activated", async () => { - const host = createHost([], deps); - await host.activate(); - - expect(host.getExtensions()).toEqual([]); - }); - - it("returns manifests of all activated extensions", async () => { - const a = createExtension("ext-a"); - const b = createExtension("ext-b"); - - const host = createHost([a, b], deps); - await host.activate(); - - const exts = host.getExtensions(); - expect(exts).toHaveLength(2); - expect(exts.map((e) => e.id)).toContain("ext-a"); - expect(exts.map((e) => e.id)).toContain("ext-b"); - }); - - it("returns manifests in activation order", async () => { - const a = createExtension("a"); - const b = createExtension("b", { dependsOn: ["a"] }); - const c = createExtension("c", { dependsOn: ["b"] }); - - const host = createHost([c, b, a], deps); - await host.activate(); - - const exts = host.getExtensions(); - expect(exts.map((e) => e.id)).toEqual(["a", "b", "c"]); - }); - - it("excludes extensions that failed to activate", async () => { - const a = createExtension("good"); - const b = createExtension("bad", { - activate: () => { - throw new Error("boom"); - }, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - const exts = host.getExtensions(); - expect(exts).toHaveLength(1); - expect(exts[0]?.id).toBe("good"); - }); - - it("excludes extensions disabled by apiVersion incompatibility", async () => { - const good = createExtension("good"); - const bad = createExtension("bad", { apiVersion: "^99.0.0" }); - - const host = createHost([good, bad], deps); - await host.activate(); - - const exts = host.getExtensions(); - expect(exts).toHaveLength(1); - expect(exts[0]?.id).toBe("good"); - }); - - it("returns a frozen array", async () => { - const ext = createExtension("ext"); - const host = createHost([ext], deps); - await host.activate(); - - const exts = host.getExtensions(); - expect(Object.isFrozen(exts)).toBe(true); - }); - - it("HostAPI getExtensions reflects activated extensions after full activation", async () => { - const a = createExtension("ext-a"); - const b = createExtension("ext-b", { - dependsOn: ["ext-a"], - activate: () => {}, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - // Use getHostAPI() to verify the post-activation view - const api = host.getHostAPI(); - const capturedExtsAfter = api.getExtensions(); - - expect(capturedExtsAfter).toHaveLength(2); - expect(capturedExtsAfter.map((e) => e.id)).toEqual(["ext-a", "ext-b"]); - }); - - it("HostAPI getExtensions during activation sees only previously activated", async () => { - const seenDuringActivation: string[][] = []; - - const a = createExtension("a", { - activate: (host) => { - seenDuringActivation.push(host.getExtensions().map((e) => e.id)); - }, - }); - const b = createExtension("b", { - activate: (host) => { - seenDuringActivation.push(host.getExtensions().map((e) => e.id)); - }, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - // When a activates, activated[] is empty (a hasn't been pushed yet) - // When b activates, activated[] has [a] (b hasn't been pushed yet) - expect(seenDuringActivation).toEqual([[], ["a"]]); - }); - }); - - describe("DAG errors", () => { - it("throws on missing dependency", () => { - const ext = createExtension("a", { dependsOn: ["missing"] }); - expect(() => createHost([ext], deps)).toThrow(/not available/); - }); - - it("throws on dependency cycle", () => { - const a = createExtension("a", { dependsOn: ["b"] }); - const b = createExtension("b", { dependsOn: ["a"] }); - expect(() => createHost([a, b], deps)).toThrow(/cycle/i); - }); - }); - - describe("empty host", () => { - it("works with no extensions", async () => { - const host = createHost([], deps); - await host.activate(); - - expect(host.getTools().size).toBe(0); - expect(host.getProviders().size).toBe(0); - expect(host.getAuthProviders().size).toBe(0); - expect(host.getScheduledJobs()).toHaveLength(0); - expect(host.getMigrations()).toHaveLength(0); - expect(host.getDisabled()).toHaveLength(0); - }); - }); - - describe("getHostAPI", () => { - it("returns a HostAPI whose read-views reflect registrations from activation", async () => { - const tool = createFakeTool("read-file"); - const provider = createFakeProvider("anthropic"); - const auth = createFakeAuth("apikey"); - - const ext = createExtension("multi-ext", { - activate: (host) => { - host.defineTool(tool); - host.defineProvider(provider); - host.defineAuth(auth); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - - expect(api.getTools().size).toBe(1); - expect(api.getTools().get("read-file")).toBe(tool); - - expect(api.getProviders().size).toBe(1); - expect(api.getProviders().get("anthropic")).toBe(provider); - - expect(api.getAuthProviders().size).toBe(1); - expect(api.getAuthProvider("apikey")).toBe(auth); - }); - - it("throws on defineTool after activation", async () => { - const ext = createExtension("ext", { activate: () => {} }); - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - expect(() => api.defineTool(createFakeTool("late"))).toThrow( - "Registration not available after activation", - ); - }); - - it("throws on defineProvider after activation", async () => { - const ext = createExtension("ext", { activate: () => {} }); - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - expect(() => api.defineProvider(createFakeProvider("late"))).toThrow( - "Registration not available after activation", - ); - }); - - it("throws on defineAuth after activation", async () => { - const ext = createExtension("ext", { activate: () => {} }); - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - expect(() => api.defineAuth(createFakeAuth("late"))).toThrow( - "Registration not available after activation", - ); - }); - - it("applyFilters is available on registration-closed HostAPI", async () => { - const hook = defineFilter<string>("test/closed-filter"); - - const ext = createExtension("filter-ext", { - activate: (host) => { - host.addFilter(hook, (value) => `${value}-filtered`); - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const api = host.getHostAPI(); - const result = await api.applyFilters(hook, "input"); - expect(result).toBe("input-filtered"); - }); - }); - - describe("auto-scoped logger (D6)", () => { - it("each extension's logger stamps its own manifest.id as extensionId", async () => { - let extALogger: Logger | undefined; - let extBLogger: Logger | undefined; - - const a = createExtension("ext-a", { - activate: (host) => { - extALogger = host.logger; - }, - }); - const b = createExtension("ext-b", { - activate: (host) => { - extBLogger = host.logger; - }, - }); - - const host = createHost([a, b], deps); - await host.activate(); - - extALogger?.info("from-a"); - extBLogger?.info("from-b"); - - const logRecords = logSink.records.filter((r) => r.kind === "log"); - expect(logRecords).toHaveLength(2); - if (logRecords[0]?.kind === "log") { - expect(logRecords[0].extensionId).toBe("ext-a"); - expect(logRecords[0].msg).toBe("from-a"); - } - if (logRecords[1]?.kind === "log") { - expect(logRecords[1].extensionId).toBe("ext-b"); - expect(logRecords[1].msg).toBe("from-b"); - } - }); - - it("an extension cannot spoof extensionId — it is auto-stamped", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("real-id", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - // child() cannot override extensionId - const child = extLogger?.child({ extensionId: "spoofed" }); - child?.info("msg"); - - const logRecords = logSink.records.filter((r) => r.kind === "log"); - expect(logRecords).toHaveLength(1); - if (logRecords[0]?.kind === "log") { - expect(logRecords[0].extensionId).toBe("real-id"); - } - }); - - it("host.logger.error uses structured { err } shape", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - extLogger?.error("something broke", { err: new Error("boom") }); - - const logRecords = logSink.records.filter((r) => r.kind === "log"); - expect(logRecords).toHaveLength(1); - if (logRecords[0]?.kind === "log") { - expect(logRecords[0].level).toBe("error"); - expect(logRecords[0].msg).toBe("something broke"); - expect(logRecords[0].attributes?.["error.message"]).toBe("boom"); - } - }); - - it("a throwing sink does NOT break the caller", async () => { - const brokenSink: LogSink = { - emit() { - throw new Error("sink down"); - }, - }; - const brokenDeps: HostDeps = { - ...deps, - logSink: brokenSink, - }; - - let extLogger: Logger | undefined; - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], brokenDeps); - await host.activate(); - - // Should not throw - expect(() => extLogger?.info("msg")).not.toThrow(); - }); - - it("span() + end() emit incremental span-open and span-close records", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("my-span", { key: "value" }); - span?.setAttributes({ extra: "attr" }); - span?.end({ attrs: { result: "ok" } }); - - const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); - const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); - - expect(spanOpens).toHaveLength(1); - expect(spanCloses).toHaveLength(1); - - if (spanOpens[0]?.kind === "span-open") { - expect(spanOpens[0].name).toBe("my-span"); - expect(spanOpens[0].extensionId).toBe("ext"); - expect(spanOpens[0].attributes?.key).toBe("value"); - } - if (spanCloses[0]?.kind === "span-close") { - expect(spanCloses[0].name).toBe("my-span"); - expect(spanCloses[0].status).toBe("ok"); - expect(spanCloses[0].durationMs).toBeGreaterThanOrEqual(0); - expect(spanCloses[0].attributes?.extra).toBe("attr"); - expect(spanCloses[0].attributes?.result).toBe("ok"); - } - }); - - it("span() with body emits body on span-open record", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("with-body", { key: "value" }, '{"payload":"hello"}'); - span?.end(); - - const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); - expect(spanOpens).toHaveLength(1); - if (spanOpens[0]?.kind === "span-open") { - expect(spanOpens[0].body).toBe('{"payload":"hello"}'); - } - }); - - it("span() without body omits body field on span-open record", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("no-body"); - span?.end(); - - const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); - expect(spanOpens).toHaveLength(1); - if (spanOpens[0]?.kind === "span-open") { - expect(spanOpens[0].body).toBeUndefined(); - } - }); - - it("child() with body emits body on child span-open record", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("parent"); - const child = span?.child("child-name", { k: "v" }, '{"child":"body"}'); - child?.end(); - span?.end(); - - const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); - const childOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "child-name"); - expect(childOpen).toBeDefined(); - if (childOpen?.kind === "span-open") { - expect(childOpen.body).toBe('{"child":"body"}'); - } - }); - - it("end() with body emits body on span-close record", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("close-body"); - span?.end({ body: '{"result":"data"}' }); - - const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); - expect(spanCloses).toHaveLength(1); - if (spanCloses[0]?.kind === "span-close") { - expect(spanCloses[0].body).toBe('{"result":"data"}'); - } - }); - - it("end() without body omits body field on span-close record", async () => { - let extLogger: Logger | undefined; - - const ext = createExtension("ext", { - activate: (host) => { - extLogger = host.logger; - }, - }); - - const host = createHost([ext], deps); - await host.activate(); - - const span = extLogger?.span("no-close-body"); - span?.end(); - - const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); - expect(spanCloses).toHaveLength(1); - if (spanCloses[0]?.kind === "span-close") { - expect(spanCloses[0].body).toBeUndefined(); - } - }); - }); + let logger: FakeLogger; + let logSink: ReturnType<typeof createFakeLogSink>; + let logDeps: LogDeps; + let deps: HostDeps; + let scheduler: ReturnType<typeof createFakeScheduler>; + let events: ReturnType<typeof createFakeEvents>; + + beforeEach(() => { + logger = createFakeLogger(); + logSink = createFakeLogSink(); + logDeps = createFakeLogDeps(); + scheduler = createFakeScheduler(); + events = createFakeEvents(); + deps = { + logger, + config: createFakeConfig(), + storageFactory: createFakeStorageFactory(), + secrets: createFakeSecrets(), + permissions: createFakePermissions(), + scheduler, + bus: createBus(logger), + events, + logSink, + logDeps, + }; + }); + + describe("activation order", () => { + it("activates extensions in topological order", async () => { + const order: string[] = []; + + const a = createExtension("a", { + activate: () => { + order.push("a"); + }, + }); + const b = createExtension("b", { + dependsOn: ["a"], + activate: () => { + order.push("b"); + }, + }); + const c = createExtension("c", { + dependsOn: ["b"], + activate: () => { + order.push("c"); + }, + }); + + const host = createHost([c, b, a], deps); + await host.activate(); + + expect(order).toEqual(["a", "b", "c"]); + }); + + it("activates independent extensions", async () => { + const order: string[] = []; + + const a = createExtension("a", { + activate: () => { + order.push("a"); + }, + }); + const b = createExtension("b", { + activate: () => { + order.push("b"); + }, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + expect(order).toHaveLength(2); + expect(order).toContain("a"); + expect(order).toContain("b"); + }); + }); + + describe("fault isolation", () => { + it("a throwing extension is isolated — others still activate", async () => { + const order: string[] = []; + + const a = createExtension("a", { + activate: () => { + order.push("a"); + }, + }); + const b = createExtension("b", { + activate: () => { + throw new Error("boom"); + }, + }); + const c = createExtension("c", { + activate: () => { + order.push("c"); + }, + }); + + const host = createHost([a, b, c], deps); + await host.activate(); + + expect(order).toEqual(["a", "c"]); + expect(host.getDisabled()).toHaveLength(1); + expect(host.getDisabled()[0]?.manifest.id).toBe("b"); + expect(host.getDisabled()[0]?.reason).toContain("boom"); + }); + + it("an async-rejecting extension is isolated", async () => { + const a = createExtension("a", { + activate: async () => { + throw new Error("async fail"); + }, + }); + const b = createExtension("b", { + activate: () => {}, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + expect(host.getDisabled()).toHaveLength(1); + expect(host.getDisabled()[0]?.manifest.id).toBe("a"); + }); + }); + + describe("apiVersion compatibility", () => { + it("activates compatible extensions", async () => { + const ext = createExtension("good", { + apiVersion: `^${KERNEL_API_VERSION}`, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getDisabled()).toHaveLength(0); + }); + + it("disables incompatible extensions without crashing", async () => { + const ext = createExtension("bad", { + apiVersion: "^99.0.0", + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getDisabled()).toHaveLength(1); + expect(host.getDisabled()[0]?.manifest.id).toBe("bad"); + expect(host.getDisabled()[0]?.reason).toContain("incompatible"); + }); + + it("logs a warning for disabled extensions", async () => { + const ext = createExtension("bad", { + apiVersion: "^99.0.0", + }); + + const host = createHost([ext], deps); + await host.activate(); + + const warnings = logger.logs.filter((l) => l.level === "warn"); + expect(warnings).toHaveLength(1); + expect(warnings[0]?.message).toContain("bad"); + }); + }); + + describe("registries", () => { + it("defineTool populates the tool registry", async () => { + const tool = createFakeTool("read-file"); + const ext = createExtension("tools-fs", { + activate: (host) => { + host.defineTool(tool); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getTools().size).toBe(1); + expect(host.getTool("read-file")).toBe(tool); + }); + + it("defineProvider populates the provider registry", async () => { + const provider = createFakeProvider("anthropic"); + const ext = createExtension("provider-anthropic", { + activate: (host) => { + host.defineProvider(provider); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getProviders().size).toBe(1); + expect(host.getProvider("anthropic")).toBe(provider); + }); + + it("defineAuth populates the auth registry", async () => { + const auth = createFakeAuth("apikey"); + const ext = createExtension("auth-apikey", { + activate: (host) => { + host.defineAuth(auth); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getAuthProviders().size).toBe(1); + expect(host.getAuthProvider("apikey")).toBe(auth); + }); + + it("getService returns what an extension provided via provideService", async () => { + const handle = defineService<{ value: number }>("test/svc"); + const ext = createExtension("svc-provider", { + activate: (host) => { + host.provideService(handle, { value: 42 }); + }, + }); + const consumer = createExtension("svc-consumer", { + dependsOn: ["svc-provider"], + activate: (host) => { + const svc = host.getService(handle); + expect(svc.value).toBe(42); + }, + }); + + const host = createHost([ext, consumer], deps); + await host.activate(); + + expect(host.getDisabled()).toHaveLength(0); + }); + + it("multiple extensions contribute to the same registry", async () => { + const ext1 = createExtension("tools-a", { + activate: (host) => { + host.defineTool(createFakeTool("tool-a")); + }, + }); + const ext2 = createExtension("tools-b", { + activate: (host) => { + host.defineTool(createFakeTool("tool-b")); + }, + }); + + const host = createHost([ext1, ext2], deps); + await host.activate(); + + expect(host.getTools().size).toBe(2); + expect(host.getTool("tool-a")).toBeDefined(); + expect(host.getTool("tool-b")).toBeDefined(); + }); + }); + + describe("scheduler", () => { + it("collects scheduled jobs and forwards to sink", async () => { + const job: ScheduledJob = { + id: "cache-warm", + cron: "*/5 * * * *", + execute: () => {}, + }; + const ext = createExtension("scheduler-ext", { + activate: (host) => { + host.scheduler.register(job); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getScheduledJobs()).toHaveLength(1); + expect(host.getScheduledJobs()[0]).toBe(job); + expect(scheduler.jobs).toHaveLength(1); + expect(scheduler.jobs[0]).toBe(job); + }); + }); + + describe("migrations", () => { + it("collects migrations from manifests", async () => { + const ext = createExtension("store-ext", { + contributes: { migrations: ["001-init", "002-add-index"] }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(host.getMigrations()).toEqual(["001-init", "002-add-index"]); + }); + }); + + describe("deactivation", () => { + it("deactivates in reverse activation order", async () => { + const order: string[] = []; + + const a = createExtension("a", { + activate: () => { + order.push("activate-a"); + }, + deactivate: () => { + order.push("deactivate-a"); + }, + }); + const b = createExtension("b", { + activate: () => { + order.push("activate-b"); + }, + deactivate: () => { + order.push("deactivate-b"); + }, + }); + const c = createExtension("c", { + activate: () => { + order.push("activate-c"); + }, + deactivate: () => { + order.push("deactivate-c"); + }, + }); + + const host = createHost([a, b, c], deps); + await host.activate(); + await host.deactivate(); + + expect(order).toEqual([ + "activate-a", + "activate-b", + "activate-c", + "deactivate-c", + "deactivate-b", + "deactivate-a", + ]); + }); + + it("a failing deactivate does not prevent others", async () => { + const order: string[] = []; + + const a = createExtension("a", { + activate: () => {}, + deactivate: () => { + order.push("deactivate-a"); + }, + }); + const b = createExtension("b", { + activate: () => {}, + deactivate: () => { + throw new Error("deactivate boom"); + }, + }); + const c = createExtension("c", { + activate: () => {}, + deactivate: () => { + order.push("deactivate-c"); + }, + }); + + const host = createHost([a, b, c], deps); + await host.activate(); + await host.deactivate(); + + expect(order).toEqual(["deactivate-c", "deactivate-a"]); + const errors = logger.logs.filter((l) => l.level === "error"); + expect(errors.some((e) => e.message.includes("deactivate"))).toBe(true); + expect(errors.some((e) => (e.attrs as { err?: unknown })?.err instanceof Error)).toBe(true); + }); + }); + + describe("HostAPI delegation", () => { + it("on/addFilter delegate to the bus", async () => { + const hook = defineEventHook<string>("test/host-event"); + const received: string[] = []; + + const ext = createExtension("hook-ext", { + activate: (host) => { + host.on(hook, (payload) => { + received.push(payload); + }); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + deps.bus.emit(hook, "hello"); + expect(received).toEqual(["hello"]); + }); + + it("emit dispatches to handlers registered via on", async () => { + const hook = defineEventHook<string>("test/emit-dispatch"); + const received: string[] = []; + + const ext = createExtension("emit-ext", { + activate: (host) => { + host.on(hook, (payload) => { + received.push(payload); + }); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + api.emit(hook, "world"); + expect(received).toEqual(["world"]); + }); + + it("emit isolates a throwing handler (does not propagate)", async () => { + const hook = defineEventHook<string>("test/emit-isolation"); + const received: string[] = []; + + const ext = createExtension("emit-isolation-ext", { + activate: (host) => { + host.on(hook, () => { + throw new Error("handler boom"); + }); + host.on(hook, (payload) => { + received.push(payload); + }); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + expect(() => api.emit(hook, "safe")).not.toThrow(); + expect(received).toEqual(["safe"]); + }); + + it("applyFilters threads a value through registered filters in order", async () => { + const hook = defineFilter<string>("test/text-transform"); + + const ext = createExtension("filter-ext", { + activate: (host) => { + host.addFilter(hook, (value) => `${value}-first`); + host.addFilter(hook, (value) => `${value}-second`); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + const result = await api.applyFilters(hook, "start"); + expect(result).toBe("start-first-second"); + }); + + it("applyFilters returns the input unchanged when no filters are registered", async () => { + const hook = defineFilter<string>("test/unused-filter"); + + const ext = createExtension("no-filter-ext", { + activate: () => {}, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + const result = await api.applyFilters(hook, "unchanged"); + expect(result).toBe("unchanged"); + }); + + it("storage delegates to the factory", async () => { + let storageResult: StorageNamespace | undefined; + + const ext = createExtension("storage-ext", { + activate: (host) => { + storageResult = host.storage("my-ns"); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(storageResult).toBeDefined(); + await storageResult?.set("key", "value"); + expect(await storageResult?.get("key")).toBe("value"); + }); + + it("events delegates to the emitter", async () => { + const ext = createExtension("event-ext", { + activate: (host) => { + host.events.emit({ type: "custom", data: 42 }); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + expect(events.emitted).toHaveLength(1); + expect(events.emitted[0]).toEqual({ type: "custom", data: 42 }); + }); + }); + + describe("HostAPI registry access", () => { + it("getTools returns registered tools via HostAPI", async () => { + const tool = createFakeTool("read-file"); + let capturedTools: ReadonlyMap<string, ToolContract> | undefined; + + const producer = createExtension("tools-fs", { + activate: (host) => { + host.defineTool(tool); + }, + }); + const consumer = createExtension("consumer", { + dependsOn: ["tools-fs"], + activate: (host) => { + capturedTools = host.getTools(); + }, + }); + + const host = createHost([producer, consumer], deps); + await host.activate(); + + expect(capturedTools).toBeDefined(); + expect(capturedTools?.size).toBe(1); + expect(capturedTools?.get("read-file")).toBe(tool); + }); + + it("getProviders returns registered providers via HostAPI", async () => { + const provider = createFakeProvider("anthropic"); + let capturedProviders: ReadonlyMap<string, ProviderContract> | undefined; + + const producer = createExtension("provider-anthropic", { + activate: (host) => { + host.defineProvider(provider); + }, + }); + const consumer = createExtension("consumer", { + dependsOn: ["provider-anthropic"], + activate: (host) => { + capturedProviders = host.getProviders(); + }, + }); + + const host = createHost([producer, consumer], deps); + await host.activate(); + + expect(capturedProviders).toBeDefined(); + expect(capturedProviders?.size).toBe(1); + expect(capturedProviders?.get("anthropic")).toBe(provider); + }); + + it("getAuthProviders/getAuthProvider returns registered auth via HostAPI", async () => { + const auth = createFakeAuth("apikey"); + let capturedAuth: ReadonlyMap<string, AuthContract> | undefined; + let capturedSingle: AuthContract | undefined; + + const producer = createExtension("auth-apikey", { + activate: (host) => { + host.defineAuth(auth); + }, + }); + const consumer = createExtension("consumer", { + dependsOn: ["auth-apikey"], + activate: (host) => { + capturedAuth = host.getAuthProviders(); + capturedSingle = host.getAuthProvider("apikey"); + }, + }); + + const host = createHost([producer, consumer], deps); + await host.activate(); + + expect(capturedAuth).toBeDefined(); + expect(capturedAuth?.size).toBe(1); + expect(capturedAuth?.get("apikey")).toBe(auth); + expect(capturedSingle).toBe(auth); + }); + }); + + describe("getExtensions", () => { + it("returns empty array when no extensions are activated", async () => { + const host = createHost([], deps); + await host.activate(); + + expect(host.getExtensions()).toEqual([]); + }); + + it("returns manifests of all activated extensions", async () => { + const a = createExtension("ext-a"); + const b = createExtension("ext-b"); + + const host = createHost([a, b], deps); + await host.activate(); + + const exts = host.getExtensions(); + expect(exts).toHaveLength(2); + expect(exts.map((e) => e.id)).toContain("ext-a"); + expect(exts.map((e) => e.id)).toContain("ext-b"); + }); + + it("returns manifests in activation order", async () => { + const a = createExtension("a"); + const b = createExtension("b", { dependsOn: ["a"] }); + const c = createExtension("c", { dependsOn: ["b"] }); + + const host = createHost([c, b, a], deps); + await host.activate(); + + const exts = host.getExtensions(); + expect(exts.map((e) => e.id)).toEqual(["a", "b", "c"]); + }); + + it("excludes extensions that failed to activate", async () => { + const a = createExtension("good"); + const b = createExtension("bad", { + activate: () => { + throw new Error("boom"); + }, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + const exts = host.getExtensions(); + expect(exts).toHaveLength(1); + expect(exts[0]?.id).toBe("good"); + }); + + it("excludes extensions disabled by apiVersion incompatibility", async () => { + const good = createExtension("good"); + const bad = createExtension("bad", { apiVersion: "^99.0.0" }); + + const host = createHost([good, bad], deps); + await host.activate(); + + const exts = host.getExtensions(); + expect(exts).toHaveLength(1); + expect(exts[0]?.id).toBe("good"); + }); + + it("returns a frozen array", async () => { + const ext = createExtension("ext"); + const host = createHost([ext], deps); + await host.activate(); + + const exts = host.getExtensions(); + expect(Object.isFrozen(exts)).toBe(true); + }); + + it("HostAPI getExtensions reflects activated extensions after full activation", async () => { + const a = createExtension("ext-a"); + const b = createExtension("ext-b", { + dependsOn: ["ext-a"], + activate: () => {}, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + // Use getHostAPI() to verify the post-activation view + const api = host.getHostAPI(); + const capturedExtsAfter = api.getExtensions(); + + expect(capturedExtsAfter).toHaveLength(2); + expect(capturedExtsAfter.map((e) => e.id)).toEqual(["ext-a", "ext-b"]); + }); + + it("HostAPI getExtensions during activation sees only previously activated", async () => { + const seenDuringActivation: string[][] = []; + + const a = createExtension("a", { + activate: (host) => { + seenDuringActivation.push(host.getExtensions().map((e) => e.id)); + }, + }); + const b = createExtension("b", { + activate: (host) => { + seenDuringActivation.push(host.getExtensions().map((e) => e.id)); + }, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + // When a activates, activated[] is empty (a hasn't been pushed yet) + // When b activates, activated[] has [a] (b hasn't been pushed yet) + expect(seenDuringActivation).toEqual([[], ["a"]]); + }); + }); + + describe("DAG errors", () => { + it("throws on missing dependency", () => { + const ext = createExtension("a", { dependsOn: ["missing"] }); + expect(() => createHost([ext], deps)).toThrow(/not available/); + }); + + it("throws on dependency cycle", () => { + const a = createExtension("a", { dependsOn: ["b"] }); + const b = createExtension("b", { dependsOn: ["a"] }); + expect(() => createHost([a, b], deps)).toThrow(/cycle/i); + }); + }); + + describe("empty host", () => { + it("works with no extensions", async () => { + const host = createHost([], deps); + await host.activate(); + + expect(host.getTools().size).toBe(0); + expect(host.getProviders().size).toBe(0); + expect(host.getAuthProviders().size).toBe(0); + expect(host.getScheduledJobs()).toHaveLength(0); + expect(host.getMigrations()).toHaveLength(0); + expect(host.getDisabled()).toHaveLength(0); + }); + }); + + describe("getHostAPI", () => { + it("returns a HostAPI whose read-views reflect registrations from activation", async () => { + const tool = createFakeTool("read-file"); + const provider = createFakeProvider("anthropic"); + const auth = createFakeAuth("apikey"); + + const ext = createExtension("multi-ext", { + activate: (host) => { + host.defineTool(tool); + host.defineProvider(provider); + host.defineAuth(auth); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + + expect(api.getTools().size).toBe(1); + expect(api.getTools().get("read-file")).toBe(tool); + + expect(api.getProviders().size).toBe(1); + expect(api.getProviders().get("anthropic")).toBe(provider); + + expect(api.getAuthProviders().size).toBe(1); + expect(api.getAuthProvider("apikey")).toBe(auth); + }); + + it("throws on defineTool after activation", async () => { + const ext = createExtension("ext", { activate: () => {} }); + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + expect(() => api.defineTool(createFakeTool("late"))).toThrow( + "Registration not available after activation", + ); + }); + + it("throws on defineProvider after activation", async () => { + const ext = createExtension("ext", { activate: () => {} }); + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + expect(() => api.defineProvider(createFakeProvider("late"))).toThrow( + "Registration not available after activation", + ); + }); + + it("throws on defineAuth after activation", async () => { + const ext = createExtension("ext", { activate: () => {} }); + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + expect(() => api.defineAuth(createFakeAuth("late"))).toThrow( + "Registration not available after activation", + ); + }); + + it("applyFilters is available on registration-closed HostAPI", async () => { + const hook = defineFilter<string>("test/closed-filter"); + + const ext = createExtension("filter-ext", { + activate: (host) => { + host.addFilter(hook, (value) => `${value}-filtered`); + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const api = host.getHostAPI(); + const result = await api.applyFilters(hook, "input"); + expect(result).toBe("input-filtered"); + }); + }); + + describe("auto-scoped logger (D6)", () => { + it("each extension's logger stamps its own manifest.id as extensionId", async () => { + let extALogger: Logger | undefined; + let extBLogger: Logger | undefined; + + const a = createExtension("ext-a", { + activate: (host) => { + extALogger = host.logger; + }, + }); + const b = createExtension("ext-b", { + activate: (host) => { + extBLogger = host.logger; + }, + }); + + const host = createHost([a, b], deps); + await host.activate(); + + extALogger?.info("from-a"); + extBLogger?.info("from-b"); + + const logRecords = logSink.records.filter((r) => r.kind === "log"); + expect(logRecords).toHaveLength(2); + if (logRecords[0]?.kind === "log") { + expect(logRecords[0].extensionId).toBe("ext-a"); + expect(logRecords[0].msg).toBe("from-a"); + } + if (logRecords[1]?.kind === "log") { + expect(logRecords[1].extensionId).toBe("ext-b"); + expect(logRecords[1].msg).toBe("from-b"); + } + }); + + it("an extension cannot spoof extensionId — it is auto-stamped", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("real-id", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + // child() cannot override extensionId + const child = extLogger?.child({ extensionId: "spoofed" }); + child?.info("msg"); + + const logRecords = logSink.records.filter((r) => r.kind === "log"); + expect(logRecords).toHaveLength(1); + if (logRecords[0]?.kind === "log") { + expect(logRecords[0].extensionId).toBe("real-id"); + } + }); + + it("host.logger.error uses structured { err } shape", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + extLogger?.error("something broke", { err: new Error("boom") }); + + const logRecords = logSink.records.filter((r) => r.kind === "log"); + expect(logRecords).toHaveLength(1); + if (logRecords[0]?.kind === "log") { + expect(logRecords[0].level).toBe("error"); + expect(logRecords[0].msg).toBe("something broke"); + expect(logRecords[0].attributes?.["error.message"]).toBe("boom"); + } + }); + + it("a throwing sink does NOT break the caller", async () => { + const brokenSink: LogSink = { + emit() { + throw new Error("sink down"); + }, + }; + const brokenDeps: HostDeps = { + ...deps, + logSink: brokenSink, + }; + + let extLogger: Logger | undefined; + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], brokenDeps); + await host.activate(); + + // Should not throw + expect(() => extLogger?.info("msg")).not.toThrow(); + }); + + it("span() + end() emit incremental span-open and span-close records", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("my-span", { key: "value" }); + span?.setAttributes({ extra: "attr" }); + span?.end({ attrs: { result: "ok" } }); + + const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); + const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); + + expect(spanOpens).toHaveLength(1); + expect(spanCloses).toHaveLength(1); + + if (spanOpens[0]?.kind === "span-open") { + expect(spanOpens[0].name).toBe("my-span"); + expect(spanOpens[0].extensionId).toBe("ext"); + expect(spanOpens[0].attributes?.key).toBe("value"); + } + if (spanCloses[0]?.kind === "span-close") { + expect(spanCloses[0].name).toBe("my-span"); + expect(spanCloses[0].status).toBe("ok"); + expect(spanCloses[0].durationMs).toBeGreaterThanOrEqual(0); + expect(spanCloses[0].attributes?.extra).toBe("attr"); + expect(spanCloses[0].attributes?.result).toBe("ok"); + } + }); + + it("span() with body emits body on span-open record", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("with-body", { key: "value" }, '{"payload":"hello"}'); + span?.end(); + + const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); + expect(spanOpens).toHaveLength(1); + if (spanOpens[0]?.kind === "span-open") { + expect(spanOpens[0].body).toBe('{"payload":"hello"}'); + } + }); + + it("span() without body omits body field on span-open record", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("no-body"); + span?.end(); + + const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); + expect(spanOpens).toHaveLength(1); + if (spanOpens[0]?.kind === "span-open") { + expect(spanOpens[0].body).toBeUndefined(); + } + }); + + it("child() with body emits body on child span-open record", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("parent"); + const child = span?.child("child-name", { k: "v" }, '{"child":"body"}'); + child?.end(); + span?.end(); + + const spanOpens = logSink.records.filter((r) => r.kind === "span-open"); + const childOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "child-name"); + expect(childOpen).toBeDefined(); + if (childOpen?.kind === "span-open") { + expect(childOpen.body).toBe('{"child":"body"}'); + } + }); + + it("end() with body emits body on span-close record", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("close-body"); + span?.end({ body: '{"result":"data"}' }); + + const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); + expect(spanCloses).toHaveLength(1); + if (spanCloses[0]?.kind === "span-close") { + expect(spanCloses[0].body).toBe('{"result":"data"}'); + } + }); + + it("end() without body omits body field on span-close record", async () => { + let extLogger: Logger | undefined; + + const ext = createExtension("ext", { + activate: (host) => { + extLogger = host.logger; + }, + }); + + const host = createHost([ext], deps); + await host.activate(); + + const span = extLogger?.span("no-close-body"); + span?.end(); + + const spanCloses = logSink.records.filter((r) => r.kind === "span-close"); + expect(spanCloses).toHaveLength(1); + if (spanCloses[0]?.kind === "span-close") { + expect(spanCloses[0].body).toBeUndefined(); + } + }); + }); }); diff --git a/packages/kernel/src/host/host.ts b/packages/kernel/src/host/host.ts index 2a262be..f74881f 100644 --- a/packages/kernel/src/host/host.ts +++ b/packages/kernel/src/host/host.ts @@ -1,22 +1,22 @@ import type { Bus } from "../bus/bus.js"; import type { AuthContract } from "../contracts/auth.js"; import type { - ConfigAccess, - EventsEmitter, - Extension, - HostAPI, - Manifest, - PermissionGate, - ScheduledJob, - SecretsAccess, - StorageNamespace, + ConfigAccess, + EventsEmitter, + Extension, + HostAPI, + Manifest, + PermissionGate, + ScheduledJob, + SecretsAccess, + StorageNamespace, } from "../contracts/extension.js"; import type { - EventHandler, - EventHookDescriptor, - FilterDescriptor, - FilterHandler, - ServiceHandle, + EventHandler, + EventHookDescriptor, + FilterDescriptor, + FilterHandler, + ServiceHandle, } from "../contracts/hooks.js"; import type { LogDeps, Logger, LogSink } from "../contracts/logging.js"; import type { ProviderContract } from "../contracts/provider.js"; @@ -28,210 +28,210 @@ import { isApiVersionCompatible } from "./version.js"; export const KERNEL_API_VERSION = "0.1.0"; export interface DisabledExtension { - readonly manifest: Manifest; - readonly reason: string; + readonly manifest: Manifest; + readonly reason: string; } export interface HostDeps { - readonly logger: Logger; - readonly config: ConfigAccess; - readonly storageFactory: (namespace: string) => StorageNamespace; - readonly secrets: SecretsAccess; - readonly permissions: PermissionGate; - readonly scheduler: { readonly register: (job: ScheduledJob) => void }; - readonly bus: Bus; - readonly events: EventsEmitter; - readonly logSink: LogSink; - readonly logDeps: LogDeps; + readonly logger: Logger; + readonly config: ConfigAccess; + readonly storageFactory: (namespace: string) => StorageNamespace; + readonly secrets: SecretsAccess; + readonly permissions: PermissionGate; + readonly scheduler: { readonly register: (job: ScheduledJob) => void }; + readonly bus: Bus; + readonly events: EventsEmitter; + readonly logSink: LogSink; + readonly logDeps: LogDeps; } export interface Host { - readonly activate: () => Promise<void>; - readonly deactivate: () => Promise<void>; - readonly getTools: () => ReadonlyMap<string, ToolContract>; - readonly getTool: (name: string) => ToolContract | undefined; - readonly getProviders: () => ReadonlyMap<string, ProviderContract>; - readonly getProvider: (id: string) => ProviderContract | undefined; - readonly getAuthProviders: () => ReadonlyMap<string, AuthContract>; - readonly getAuthProvider: (id: string) => AuthContract | undefined; - readonly getScheduledJobs: () => readonly ScheduledJob[]; - readonly getMigrations: () => readonly string[]; - readonly getDisabled: () => readonly DisabledExtension[]; - readonly getExtensions: () => readonly Manifest[]; - readonly getHostAPI: () => HostAPI; + readonly activate: () => Promise<void>; + readonly deactivate: () => Promise<void>; + readonly getTools: () => ReadonlyMap<string, ToolContract>; + readonly getTool: (name: string) => ToolContract | undefined; + readonly getProviders: () => ReadonlyMap<string, ProviderContract>; + readonly getProvider: (id: string) => ProviderContract | undefined; + readonly getAuthProviders: () => ReadonlyMap<string, AuthContract>; + readonly getAuthProvider: (id: string) => AuthContract | undefined; + readonly getScheduledJobs: () => readonly ScheduledJob[]; + readonly getMigrations: () => readonly string[]; + readonly getDisabled: () => readonly DisabledExtension[]; + readonly getExtensions: () => readonly Manifest[]; + readonly getHostAPI: () => HostAPI; } export function createHost(extensions: readonly Extension[], deps: HostDeps): Host { - const tools = new Map<string, ToolContract>(); - const providers = new Map<string, ProviderContract>(); - const authProviders = new Map<string, AuthContract>(); - const scheduledJobs: ScheduledJob[] = []; - const migrations: string[] = []; - const disabled: DisabledExtension[] = []; - const activated: Extension[] = []; + const tools = new Map<string, ToolContract>(); + const providers = new Map<string, ProviderContract>(); + const authProviders = new Map<string, AuthContract>(); + const scheduledJobs: ScheduledJob[] = []; + const migrations: string[] = []; + const disabled: DisabledExtension[] = []; + const activated: Extension[] = []; - const ordered = resolveActivationOrder(extensions.map((e) => e.manifest)); - const extById = new Map<string, Extension>(); - for (const ext of extensions) { - extById.set(ext.manifest.id, ext); - } + const ordered = resolveActivationOrder(extensions.map((e) => e.manifest)); + const extById = new Map<string, Extension>(); + for (const ext of extensions) { + extById.set(ext.manifest.id, ext); + } - const compatible: Extension[] = []; - for (const m of ordered) { - const ext = extById.get(m.id); - if (ext === undefined) continue; - if (isApiVersionCompatible(m.apiVersion, KERNEL_API_VERSION)) { - compatible.push(ext); - } else { - disabled.push({ - manifest: m, - reason: `apiVersion "${m.apiVersion}" is incompatible with kernel API ${KERNEL_API_VERSION}`, - }); - deps.logger.warn(`Extension "${m.id}" disabled: apiVersion incompatible`); - } - } + const compatible: Extension[] = []; + for (const m of ordered) { + const ext = extById.get(m.id); + if (ext === undefined) continue; + if (isApiVersionCompatible(m.apiVersion, KERNEL_API_VERSION)) { + compatible.push(ext); + } else { + disabled.push({ + manifest: m, + reason: `apiVersion "${m.apiVersion}" is incompatible with kernel API ${KERNEL_API_VERSION}`, + }); + deps.logger.warn(`Extension "${m.id}" disabled: apiVersion incompatible`); + } + } - for (const ext of compatible) { - const extMigrations = ext.manifest.contributes?.migrations; - if (extMigrations) { - for (const migration of extMigrations) { - migrations.push(migration); - } - } - } + for (const ext of compatible) { + const extMigrations = ext.manifest.contributes?.migrations; + if (extMigrations) { + for (const migration of extMigrations) { + migrations.push(migration); + } + } + } - function buildHostAPI( - extensionId: string, - opts?: { readonly registrationClosed?: boolean }, - ): HostAPI { - const closed = opts?.registrationClosed ?? false; - const extLogger = createLogger({ extensionId }, deps.logSink, deps.logDeps); - return { - defineTool(tool: ToolContract) { - if (closed) throw new Error("Registration not available after activation"); - tools.set(tool.name, tool); - }, - defineProvider(provider: ProviderContract) { - if (closed) throw new Error("Registration not available after activation"); - providers.set(provider.id, provider); - }, - defineAuth(auth: AuthContract) { - if (closed) throw new Error("Registration not available after activation"); - authProviders.set(auth.id, auth); - }, - on<TPayload>(hook: EventHookDescriptor<TPayload>, handler: EventHandler<TPayload>) { - return deps.bus.on(hook, handler); - }, - emit<TPayload>(hook: EventHookDescriptor<TPayload>, payload: TPayload) { - deps.bus.emit(hook, payload); - }, - addFilter<TValue>(hook: FilterDescriptor<TValue>, fn: FilterHandler<TValue>) { - return deps.bus.addFilter(hook, fn); - }, - async applyFilters<TValue>( - hook: FilterDescriptor<TValue>, - value: TValue, - opts?: { readonly failClosed?: boolean }, - ): Promise<TValue> { - return deps.bus.applyFilters(hook, value, opts); - }, - provideService<T>(handle: ServiceHandle<T>, impl: T) { - deps.bus.provideService(handle, impl); - }, - getService<T>(handle: ServiceHandle<T>): T { - return deps.bus.getService(handle); - }, - storage(namespace: string): StorageNamespace { - return deps.storageFactory(namespace); - }, - config: deps.config, - secrets: deps.secrets, - permissions: deps.permissions, - events: deps.events, - logger: extLogger, - getProviders() { - return providers; - }, - getTools() { - return tools; - }, - getAuthProviders() { - return authProviders; - }, - getAuthProvider(id: string) { - return authProviders.get(id); - }, - getExtensions() { - return Object.freeze(activated.map((e) => e.manifest)); - }, - scheduler: { - register(job: ScheduledJob) { - scheduledJobs.push(job); - deps.scheduler.register(job); - }, - }, - }; - } + function buildHostAPI( + extensionId: string, + opts?: { readonly registrationClosed?: boolean }, + ): HostAPI { + const closed = opts?.registrationClosed ?? false; + const extLogger = createLogger({ extensionId }, deps.logSink, deps.logDeps); + return { + defineTool(tool: ToolContract) { + if (closed) throw new Error("Registration not available after activation"); + tools.set(tool.name, tool); + }, + defineProvider(provider: ProviderContract) { + if (closed) throw new Error("Registration not available after activation"); + providers.set(provider.id, provider); + }, + defineAuth(auth: AuthContract) { + if (closed) throw new Error("Registration not available after activation"); + authProviders.set(auth.id, auth); + }, + on<TPayload>(hook: EventHookDescriptor<TPayload>, handler: EventHandler<TPayload>) { + return deps.bus.on(hook, handler); + }, + emit<TPayload>(hook: EventHookDescriptor<TPayload>, payload: TPayload) { + deps.bus.emit(hook, payload); + }, + addFilter<TValue>(hook: FilterDescriptor<TValue>, fn: FilterHandler<TValue>) { + return deps.bus.addFilter(hook, fn); + }, + async applyFilters<TValue>( + hook: FilterDescriptor<TValue>, + value: TValue, + opts?: { readonly failClosed?: boolean }, + ): Promise<TValue> { + return deps.bus.applyFilters(hook, value, opts); + }, + provideService<T>(handle: ServiceHandle<T>, impl: T) { + deps.bus.provideService(handle, impl); + }, + getService<T>(handle: ServiceHandle<T>): T { + return deps.bus.getService(handle); + }, + storage(namespace: string): StorageNamespace { + return deps.storageFactory(namespace); + }, + config: deps.config, + secrets: deps.secrets, + permissions: deps.permissions, + events: deps.events, + logger: extLogger, + getProviders() { + return providers; + }, + getTools() { + return tools; + }, + getAuthProviders() { + return authProviders; + }, + getAuthProvider(id: string) { + return authProviders.get(id); + }, + getExtensions() { + return Object.freeze(activated.map((e) => e.manifest)); + }, + scheduler: { + register(job: ScheduledJob) { + scheduledJobs.push(job); + deps.scheduler.register(job); + }, + }, + }; + } - return { - async activate() { - for (const ext of compatible) { - try { - await ext.activate(buildHostAPI(ext.manifest.id)); - activated.push(ext); - deps.logger.info(`Extension "${ext.manifest.id}" activated`); - } catch (err) { - disabled.push({ - manifest: ext.manifest, - reason: `Activation failed: ${err instanceof Error ? err.message : String(err)}`, - }); - deps.logger.error(`Extension "${ext.manifest.id}" failed to activate`, { err }); - } - } - }, - async deactivate() { - for (let i = activated.length - 1; i >= 0; i--) { - const ext = activated[i]; - if (ext === undefined || ext.deactivate === undefined) continue; - try { - await ext.deactivate(); - } catch (err) { - deps.logger.error(`Extension "${ext.manifest.id}" failed to deactivate`, { err }); - } - } - }, - getTools() { - return tools; - }, - getTool(name: string) { - return tools.get(name); - }, - getProviders() { - return providers; - }, - getProvider(id: string) { - return providers.get(id); - }, - getAuthProviders() { - return authProviders; - }, - getAuthProvider(id: string) { - return authProviders.get(id); - }, - getScheduledJobs() { - return scheduledJobs; - }, - getMigrations() { - return migrations; - }, - getDisabled() { - return disabled; - }, - getExtensions() { - return Object.freeze(activated.map((e) => e.manifest)); - }, - getHostAPI() { - return buildHostAPI("__host__", { registrationClosed: true }); - }, - }; + return { + async activate() { + for (const ext of compatible) { + try { + await ext.activate(buildHostAPI(ext.manifest.id)); + activated.push(ext); + deps.logger.info(`Extension "${ext.manifest.id}" activated`); + } catch (err) { + disabled.push({ + manifest: ext.manifest, + reason: `Activation failed: ${err instanceof Error ? err.message : String(err)}`, + }); + deps.logger.error(`Extension "${ext.manifest.id}" failed to activate`, { err }); + } + } + }, + async deactivate() { + for (let i = activated.length - 1; i >= 0; i--) { + const ext = activated[i]; + if (ext === undefined || ext.deactivate === undefined) continue; + try { + await ext.deactivate(); + } catch (err) { + deps.logger.error(`Extension "${ext.manifest.id}" failed to deactivate`, { err }); + } + } + }, + getTools() { + return tools; + }, + getTool(name: string) { + return tools.get(name); + }, + getProviders() { + return providers; + }, + getProvider(id: string) { + return providers.get(id); + }, + getAuthProviders() { + return authProviders; + }, + getAuthProvider(id: string) { + return authProviders.get(id); + }, + getScheduledJobs() { + return scheduledJobs; + }, + getMigrations() { + return migrations; + }, + getDisabled() { + return disabled; + }, + getExtensions() { + return Object.freeze(activated.map((e) => e.manifest)); + }, + getHostAPI() { + return buildHostAPI("__host__", { registrationClosed: true }); + }, + }; } diff --git a/packages/kernel/src/host/version.test.ts b/packages/kernel/src/host/version.test.ts index 85002b6..8325c5e 100644 --- a/packages/kernel/src/host/version.test.ts +++ b/packages/kernel/src/host/version.test.ts @@ -2,99 +2,99 @@ import { describe, expect, it } from "vitest"; import { isApiVersionCompatible } from "./version.js"; describe("isApiVersionCompatible", () => { - describe("wildcard", () => { - it("matches any version", () => { - expect(isApiVersionCompatible("*", "0.1.0")).toBe(true); - expect(isApiVersionCompatible("*", "1.0.0")).toBe(true); - expect(isApiVersionCompatible("*", "99.99.99")).toBe(true); - }); - }); - - describe("exact match", () => { - it("matches identical version", () => { - expect(isApiVersionCompatible("0.1.0", "0.1.0")).toBe(true); - }); - - it("rejects different patch", () => { - expect(isApiVersionCompatible("0.1.0", "0.1.1")).toBe(false); - }); - - it("rejects different minor", () => { - expect(isApiVersionCompatible("0.1.0", "0.2.0")).toBe(false); - }); - - it("rejects different major", () => { - expect(isApiVersionCompatible("1.0.0", "2.0.0")).toBe(false); - }); - }); - - describe("caret range (^)", () => { - it("0.x: allows same minor, higher patch", () => { - expect(isApiVersionCompatible("^0.1.0", "0.1.0")).toBe(true); - expect(isApiVersionCompatible("^0.1.0", "0.1.5")).toBe(true); - expect(isApiVersionCompatible("^0.1.0", "0.1.99")).toBe(true); - }); - - it("0.x: rejects different minor", () => { - expect(isApiVersionCompatible("^0.1.0", "0.2.0")).toBe(false); - expect(isApiVersionCompatible("^0.1.0", "0.0.9")).toBe(false); - }); - - it("0.x: rejects different major", () => { - expect(isApiVersionCompatible("^0.1.0", "1.0.0")).toBe(false); - }); - - it("1.x+: allows same major, higher minor/patch", () => { - expect(isApiVersionCompatible("^1.2.0", "1.2.0")).toBe(true); - expect(isApiVersionCompatible("^1.2.0", "1.3.0")).toBe(true); - expect(isApiVersionCompatible("^1.2.0", "1.99.0")).toBe(true); - }); - - it("1.x+: rejects next major", () => { - expect(isApiVersionCompatible("^1.2.0", "2.0.0")).toBe(false); - }); - - it("rejects below minimum", () => { - expect(isApiVersionCompatible("^1.2.3", "1.2.2")).toBe(false); - expect(isApiVersionCompatible("^1.2.3", "1.1.9")).toBe(false); - }); - }); - - describe("tilde range (~)", () => { - it("allows same major.minor, higher patch", () => { - expect(isApiVersionCompatible("~0.1.0", "0.1.0")).toBe(true); - expect(isApiVersionCompatible("~0.1.0", "0.1.5")).toBe(true); - }); - - it("rejects different minor", () => { - expect(isApiVersionCompatible("~0.1.0", "0.2.0")).toBe(false); - }); - - it("rejects below minimum", () => { - expect(isApiVersionCompatible("~1.2.3", "1.2.2")).toBe(false); - }); - }); - - describe(">= range", () => { - it("allows equal or higher", () => { - expect(isApiVersionCompatible(">=0.1.0", "0.1.0")).toBe(true); - expect(isApiVersionCompatible(">=0.1.0", "0.2.0")).toBe(true); - expect(isApiVersionCompatible(">=0.1.0", "1.0.0")).toBe(true); - }); - - it("rejects below minimum", () => { - expect(isApiVersionCompatible(">=0.2.0", "0.1.0")).toBe(false); - expect(isApiVersionCompatible(">=1.0.0", "0.9.9")).toBe(false); - }); - }); - - describe("invalid input", () => { - it("throws on invalid kernel version", () => { - expect(() => isApiVersionCompatible("^0.1.0", "abc")).toThrow(/invalid semver/i); - }); - - it("throws on invalid range version", () => { - expect(() => isApiVersionCompatible("not-a-range", "0.1.0")).toThrow(/invalid semver/i); - }); - }); + describe("wildcard", () => { + it("matches any version", () => { + expect(isApiVersionCompatible("*", "0.1.0")).toBe(true); + expect(isApiVersionCompatible("*", "1.0.0")).toBe(true); + expect(isApiVersionCompatible("*", "99.99.99")).toBe(true); + }); + }); + + describe("exact match", () => { + it("matches identical version", () => { + expect(isApiVersionCompatible("0.1.0", "0.1.0")).toBe(true); + }); + + it("rejects different patch", () => { + expect(isApiVersionCompatible("0.1.0", "0.1.1")).toBe(false); + }); + + it("rejects different minor", () => { + expect(isApiVersionCompatible("0.1.0", "0.2.0")).toBe(false); + }); + + it("rejects different major", () => { + expect(isApiVersionCompatible("1.0.0", "2.0.0")).toBe(false); + }); + }); + + describe("caret range (^)", () => { + it("0.x: allows same minor, higher patch", () => { + expect(isApiVersionCompatible("^0.1.0", "0.1.0")).toBe(true); + expect(isApiVersionCompatible("^0.1.0", "0.1.5")).toBe(true); + expect(isApiVersionCompatible("^0.1.0", "0.1.99")).toBe(true); + }); + + it("0.x: rejects different minor", () => { + expect(isApiVersionCompatible("^0.1.0", "0.2.0")).toBe(false); + expect(isApiVersionCompatible("^0.1.0", "0.0.9")).toBe(false); + }); + + it("0.x: rejects different major", () => { + expect(isApiVersionCompatible("^0.1.0", "1.0.0")).toBe(false); + }); + + it("1.x+: allows same major, higher minor/patch", () => { + expect(isApiVersionCompatible("^1.2.0", "1.2.0")).toBe(true); + expect(isApiVersionCompatible("^1.2.0", "1.3.0")).toBe(true); + expect(isApiVersionCompatible("^1.2.0", "1.99.0")).toBe(true); + }); + + it("1.x+: rejects next major", () => { + expect(isApiVersionCompatible("^1.2.0", "2.0.0")).toBe(false); + }); + + it("rejects below minimum", () => { + expect(isApiVersionCompatible("^1.2.3", "1.2.2")).toBe(false); + expect(isApiVersionCompatible("^1.2.3", "1.1.9")).toBe(false); + }); + }); + + describe("tilde range (~)", () => { + it("allows same major.minor, higher patch", () => { + expect(isApiVersionCompatible("~0.1.0", "0.1.0")).toBe(true); + expect(isApiVersionCompatible("~0.1.0", "0.1.5")).toBe(true); + }); + + it("rejects different minor", () => { + expect(isApiVersionCompatible("~0.1.0", "0.2.0")).toBe(false); + }); + + it("rejects below minimum", () => { + expect(isApiVersionCompatible("~1.2.3", "1.2.2")).toBe(false); + }); + }); + + describe(">= range", () => { + it("allows equal or higher", () => { + expect(isApiVersionCompatible(">=0.1.0", "0.1.0")).toBe(true); + expect(isApiVersionCompatible(">=0.1.0", "0.2.0")).toBe(true); + expect(isApiVersionCompatible(">=0.1.0", "1.0.0")).toBe(true); + }); + + it("rejects below minimum", () => { + expect(isApiVersionCompatible(">=0.2.0", "0.1.0")).toBe(false); + expect(isApiVersionCompatible(">=1.0.0", "0.9.9")).toBe(false); + }); + }); + + describe("invalid input", () => { + it("throws on invalid kernel version", () => { + expect(() => isApiVersionCompatible("^0.1.0", "abc")).toThrow(/invalid semver/i); + }); + + it("throws on invalid range version", () => { + expect(() => isApiVersionCompatible("not-a-range", "0.1.0")).toThrow(/invalid semver/i); + }); + }); }); diff --git a/packages/kernel/src/host/version.ts b/packages/kernel/src/host/version.ts index 0d62a3b..eb9ac16 100644 --- a/packages/kernel/src/host/version.ts +++ b/packages/kernel/src/host/version.ts @@ -1,52 +1,52 @@ interface SemVer { - readonly major: number; - readonly minor: number; - readonly patch: number; + readonly major: number; + readonly minor: number; + readonly patch: number; } function parseSemVer(version: string): SemVer { - const match = version.match(/^(\d+)\.(\d+)\.(\d+)$/); - if (!match) throw new Error(`Invalid semver: "${version}"`); - return { - major: Number(match[1]), - minor: Number(match[2]), - patch: Number(match[3]), - }; + const match = version.match(/^(\d+)\.(\d+)\.(\d+)$/); + if (!match) throw new Error(`Invalid semver: "${version}"`); + return { + major: Number(match[1]), + minor: Number(match[2]), + patch: Number(match[3]), + }; } function gte(a: SemVer, b: SemVer): boolean { - if (a.major !== b.major) return a.major > b.major; - if (a.minor !== b.minor) return a.minor > b.minor; - return a.patch >= b.patch; + if (a.major !== b.major) return a.major > b.major; + if (a.minor !== b.minor) return a.minor > b.minor; + return a.patch >= b.patch; } export function isApiVersionCompatible(range: string, kernelVersion: string): boolean { - if (range === "*") return true; - - const kernel = parseSemVer(kernelVersion); - - if (range.startsWith("^")) { - const min = parseSemVer(range.slice(1)); - if (!gte(kernel, min)) return false; - if (min.major === 0) { - return kernel.major === 0 && kernel.minor === min.minor; - } - return kernel.major === min.major; - } - - if (range.startsWith("~")) { - const min = parseSemVer(range.slice(1)); - if (!gte(kernel, min)) return false; - return kernel.major === min.major && kernel.minor === min.minor; - } - - if (range.startsWith(">=")) { - const min = parseSemVer(range.slice(2)); - return gte(kernel, min); - } - - const exact = parseSemVer(range); - return ( - kernel.major === exact.major && kernel.minor === exact.minor && kernel.patch === exact.patch - ); + if (range === "*") return true; + + const kernel = parseSemVer(kernelVersion); + + if (range.startsWith("^")) { + const min = parseSemVer(range.slice(1)); + if (!gte(kernel, min)) return false; + if (min.major === 0) { + return kernel.major === 0 && kernel.minor === min.minor; + } + return kernel.major === min.major; + } + + if (range.startsWith("~")) { + const min = parseSemVer(range.slice(1)); + if (!gte(kernel, min)) return false; + return kernel.major === min.major && kernel.minor === min.minor; + } + + if (range.startsWith(">=")) { + const min = parseSemVer(range.slice(2)); + return gte(kernel, min); + } + + const exact = parseSemVer(range); + return ( + kernel.major === exact.major && kernel.minor === exact.minor && kernel.patch === exact.patch + ); } diff --git a/packages/kernel/src/logging/logger.test.ts b/packages/kernel/src/logging/logger.test.ts index 5d7bf45..783d5af 100644 --- a/packages/kernel/src/logging/logger.test.ts +++ b/packages/kernel/src/logging/logger.test.ts @@ -3,43 +3,43 @@ import type { LogDeps, LogRecord, LogSink } from "../contracts/logging.js"; import { createLogger } from "./logger.js"; function harness() { - let idCounter = 0; - const deps: LogDeps = { - now: () => 1000 + idCounter * 10, - newId: () => `span-${++idCounter}`, - }; - const records: LogRecord[] = []; - const sink: LogSink = { emit: (r) => records.push(r) }; - return { logger: createLogger({ extensionId: "test" }, sink, deps), records }; + let idCounter = 0; + const deps: LogDeps = { + now: () => 1000 + idCounter * 10, + newId: () => `span-${++idCounter}`, + }; + const records: LogRecord[] = []; + const sink: LogSink = { emit: (r) => records.push(r) }; + return { logger: createLogger({ extensionId: "test" }, sink, deps), records }; } describe("createLogger child-bound attributes", () => { - it("merges child-bound attrs into BOTH span-open and span-close records", () => { - const { logger, records } = harness(); - // Bind `warm: true` via child() — mirrors the cache-warming capture path. - const warmLogger = logger.child({ conversationId: "c1", attrs: { warm: true } }); + it("merges child-bound attrs into BOTH span-open and span-close records", () => { + const { logger, records } = harness(); + // Bind `warm: true` via child() — mirrors the cache-warming capture path. + const warmLogger = logger.child({ conversationId: "c1", attrs: { warm: true } }); - const span = warmLogger.span("provider.request", { model: "x" }); - span.end({ attrs: { "usage.cacheReadTokens": 0 } }); + const span = warmLogger.span("provider.request", { model: "x" }); + span.end({ attrs: { "usage.cacheReadTokens": 0 } }); - const open = records.find((r) => r.kind === "span-open"); - const close = records.find((r) => r.kind === "span-close"); + const open = records.find((r) => r.kind === "span-open"); + const close = records.find((r) => r.kind === "span-close"); - // Open carries the bound attr (pre-existing behavior). - expect(open?.attributes?.warm).toBe(true); - // Close MUST carry it too, so a `warm = true` query finds the closed span - // (with its usage/status) — not just the open record. - expect(close?.attributes?.warm).toBe(true); - // Span-specific attrs from span()/end() are still present on close. - expect(close?.attributes?.model).toBe("x"); - expect(close?.attributes?.["usage.cacheReadTokens"]).toBe(0); - }); + // Open carries the bound attr (pre-existing behavior). + expect(open?.attributes?.warm).toBe(true); + // Close MUST carry it too, so a `warm = true` query finds the closed span + // (with its usage/status) — not just the open record. + expect(close?.attributes?.warm).toBe(true); + // Span-specific attrs from span()/end() are still present on close. + expect(close?.attributes?.model).toBe("x"); + expect(close?.attributes?.["usage.cacheReadTokens"]).toBe(0); + }); - it("omits attributes entirely when neither bound nor span attrs exist", () => { - const { logger, records } = harness(); - const span = logger.span("bare"); - span.end(); - const close = records.find((r) => r.kind === "span-close"); - expect(close?.attributes).toBeUndefined(); - }); + it("omits attributes entirely when neither bound nor span attrs exist", () => { + const { logger, records } = harness(); + const span = logger.span("bare"); + span.end(); + const close = records.find((r) => r.kind === "span-close"); + expect(close?.attributes).toBeUndefined(); + }); }); diff --git a/packages/kernel/src/logging/logger.ts b/packages/kernel/src/logging/logger.ts index 4d2a609..341348d 100644 --- a/packages/kernel/src/logging/logger.ts +++ b/packages/kernel/src/logging/logger.ts @@ -6,112 +6,112 @@ */ import type { - Attributes, - ErrorAttributes, - Level, - LogContext, - LogDeps, - Logger, - LogLineRecord, - LogSink, - Span, - SpanCloseRecord, - SpanLink, - SpanOpenRecord, - SpanStatus, + Attributes, + ErrorAttributes, + Level, + LogContext, + LogDeps, + Logger, + LogLineRecord, + LogSink, + Span, + SpanCloseRecord, + SpanLink, + SpanOpenRecord, + SpanStatus, } from "../contracts/logging.js"; interface LoggerState { - readonly ctx: LogContext; - readonly attrs: Attributes | undefined; - readonly deps: LogDeps; - readonly sink: LogSink; + readonly ctx: LogContext; + readonly attrs: Attributes | undefined; + readonly deps: LogDeps; + readonly sink: LogSink; } function mergeAttributes( - base: Attributes | undefined, - extra: Attributes | undefined, + base: Attributes | undefined, + extra: Attributes | undefined, ): Attributes | undefined { - if (base === undefined && extra === undefined) return undefined; - if (base === undefined) return extra; - if (extra === undefined) return base; - return { ...base, ...extra }; + if (base === undefined && extra === undefined) return undefined; + if (base === undefined) return extra; + if (extra === undefined) return base; + return { ...base, ...extra }; } function isScalarAttr(value: unknown): value is string | number | boolean | null { - const t = typeof value; - return t === "string" || t === "number" || t === "boolean" || value === null; + const t = typeof value; + return t === "string" || t === "number" || t === "boolean" || value === null; } function emitLog(state: LoggerState, level: Level, msg: string, attrs?: Attributes): void { - const merged = mergeAttributes(state.attrs, attrs); - const base = { - kind: "log" as const, - level, - msg, - timestamp: state.deps.now(), - extensionId: state.ctx.extensionId, - }; - const record: LogLineRecord = - state.ctx.conversationId !== undefined || - state.ctx.turnId !== undefined || - state.ctx.spanId !== undefined || - state.ctx.parentSpanId !== undefined || - merged !== undefined - ? { - ...base, - ...(state.ctx.conversationId !== undefined - ? { conversationId: state.ctx.conversationId } - : {}), - ...(state.ctx.turnId !== undefined ? { turnId: state.ctx.turnId } : {}), - ...(state.ctx.spanId !== undefined ? { spanId: state.ctx.spanId } : {}), - ...(state.ctx.parentSpanId !== undefined ? { parentSpanId: state.ctx.parentSpanId } : {}), - ...(merged !== undefined ? { attributes: merged } : {}), - } - : base; - try { - state.sink.emit(record); - } catch { - // Swallow — D7: the turn is sovereign (never break the caller). - } + const merged = mergeAttributes(state.attrs, attrs); + const base = { + kind: "log" as const, + level, + msg, + timestamp: state.deps.now(), + extensionId: state.ctx.extensionId, + }; + const record: LogLineRecord = + state.ctx.conversationId !== undefined || + state.ctx.turnId !== undefined || + state.ctx.spanId !== undefined || + state.ctx.parentSpanId !== undefined || + merged !== undefined + ? { + ...base, + ...(state.ctx.conversationId !== undefined + ? { conversationId: state.ctx.conversationId } + : {}), + ...(state.ctx.turnId !== undefined ? { turnId: state.ctx.turnId } : {}), + ...(state.ctx.spanId !== undefined ? { spanId: state.ctx.spanId } : {}), + ...(state.ctx.parentSpanId !== undefined ? { parentSpanId: state.ctx.parentSpanId } : {}), + ...(merged !== undefined ? { attributes: merged } : {}), + } + : base; + try { + state.sink.emit(record); + } catch { + // Swallow — D7: the turn is sovereign (never break the caller). + } } function buildSpanOpen( - state: LoggerState, - name: string, - spanId: string, - attrs?: Attributes, - body?: string, - parentSpanId?: string, + state: LoggerState, + name: string, + spanId: string, + attrs?: Attributes, + body?: string, + parentSpanId?: string, ): SpanOpenRecord { - const base = { - kind: "span-open" as const, - spanId, - name, - timestamp: state.deps.now(), - extensionId: state.ctx.extensionId, - }; - const merged = mergeAttributes(state.attrs, attrs); - const effectiveParent = parentSpanId ?? state.ctx.parentSpanId; - return { - ...base, - ...(state.ctx.conversationId !== undefined ? { conversationId: state.ctx.conversationId } : {}), - ...(state.ctx.turnId !== undefined ? { turnId: state.ctx.turnId } : {}), - ...(effectiveParent !== undefined ? { parentSpanId: effectiveParent } : {}), - ...(merged !== undefined ? { attributes: merged } : {}), - ...(body !== undefined ? { body } : {}), - }; + const base = { + kind: "span-open" as const, + spanId, + name, + timestamp: state.deps.now(), + extensionId: state.ctx.extensionId, + }; + const merged = mergeAttributes(state.attrs, attrs); + const effectiveParent = parentSpanId ?? state.ctx.parentSpanId; + return { + ...base, + ...(state.ctx.conversationId !== undefined ? { conversationId: state.ctx.conversationId } : {}), + ...(state.ctx.turnId !== undefined ? { turnId: state.ctx.turnId } : {}), + ...(effectiveParent !== undefined ? { parentSpanId: effectiveParent } : {}), + ...(merged !== undefined ? { attributes: merged } : {}), + ...(body !== undefined ? { body } : {}), + }; } function buildSpanLink( - target: { readonly spanId: string; readonly turnId?: string }, - reason?: string, + target: { readonly spanId: string; readonly turnId?: string }, + reason?: string, ): SpanLink { - return { - spanId: target.spanId, - ...(target.turnId !== undefined ? { turnId: target.turnId } : {}), - ...(reason !== undefined ? { reason } : {}), - }; + return { + spanId: target.spanId, + ...(target.turnId !== undefined ? { turnId: target.turnId } : {}), + ...(reason !== undefined ? { reason } : {}), + }; } /** @@ -124,187 +124,187 @@ function buildSpanLink( * @param attrs Optional default attributes (from child()). */ export function createLogger( - ctx: LogContext, - sink: LogSink, - deps: LogDeps, - attrs?: Attributes, + ctx: LogContext, + sink: LogSink, + deps: LogDeps, + attrs?: Attributes, ): Logger { - const state: LoggerState = { ctx, attrs, deps, sink }; + const state: LoggerState = { ctx, attrs, deps, sink }; - function makeSpan( - name: string, - spanAttrs?: Attributes, - parentSpanId?: string, - body?: string, - ): Span { - const spanId = deps.newId(); - const mergedParent = parentSpanId ?? state.ctx.spanId; - const spanCtx: LogContext = { - extensionId: ctx.extensionId, - ...(ctx.conversationId !== undefined ? { conversationId: ctx.conversationId } : {}), - ...(ctx.turnId !== undefined ? { turnId: ctx.turnId } : {}), - spanId, - ...(mergedParent !== undefined ? { parentSpanId: mergedParent } : {}), - }; + function makeSpan( + name: string, + spanAttrs?: Attributes, + parentSpanId?: string, + body?: string, + ): Span { + const spanId = deps.newId(); + const mergedParent = parentSpanId ?? state.ctx.spanId; + const spanCtx: LogContext = { + extensionId: ctx.extensionId, + ...(ctx.conversationId !== undefined ? { conversationId: ctx.conversationId } : {}), + ...(ctx.turnId !== undefined ? { turnId: ctx.turnId } : {}), + spanId, + ...(mergedParent !== undefined ? { parentSpanId: mergedParent } : {}), + }; - const openRecord = buildSpanOpen(state, name, spanId, spanAttrs, body, mergedParent); - const spanAttrsMutable: Record<string, string | number | boolean | null> = - spanAttrs !== undefined ? { ...spanAttrs } : {}; - const links: SpanLink[] = []; - const openedAt = deps.now(); + const openRecord = buildSpanOpen(state, name, spanId, spanAttrs, body, mergedParent); + const spanAttrsMutable: Record<string, string | number | boolean | null> = + spanAttrs !== undefined ? { ...spanAttrs } : {}; + const links: SpanLink[] = []; + const openedAt = deps.now(); - try { - sink.emit(openRecord); - } catch { - // Swallow — D7. - } + try { + sink.emit(openRecord); + } catch { + // Swallow — D7. + } - const spanLogger = createLogger(spanCtx, sink, deps, state.attrs); + const spanLogger = createLogger(spanCtx, sink, deps, state.attrs); - const span: Span = { - id: spanId, - log: spanLogger, - setAttributes(newAttrs: Attributes): void { - for (const [key, value] of Object.entries(newAttrs)) { - spanAttrsMutable[key] = value; - } - }, - addLink(target, reason): void { - links.push(buildSpanLink(target, reason)); - }, - child(childName: string, childAttrs?: Attributes, childBody?: string): Span { - return makeSpan(childName, childAttrs, spanId, childBody); - }, - end(outcome?): void { - const closedAt = deps.now(); - const err = outcome?.err; - let status: SpanStatus = "ok"; - if (err !== undefined && err !== null) { - status = "error"; - const errMsg = err instanceof Error ? err.message : String(err); - spanAttrsMutable["error.message"] = errMsg; - if (err instanceof Error && err.stack !== undefined) { - spanAttrsMutable["error.stack"] = err.stack; - } - } - if (outcome?.attrs !== undefined) { - for (const [key, value] of Object.entries(outcome.attrs)) { - spanAttrsMutable[key] = value; - } - } + const span: Span = { + id: spanId, + log: spanLogger, + setAttributes(newAttrs: Attributes): void { + for (const [key, value] of Object.entries(newAttrs)) { + spanAttrsMutable[key] = value; + } + }, + addLink(target, reason): void { + links.push(buildSpanLink(target, reason)); + }, + child(childName: string, childAttrs?: Attributes, childBody?: string): Span { + return makeSpan(childName, childAttrs, spanId, childBody); + }, + end(outcome?): void { + const closedAt = deps.now(); + const err = outcome?.err; + let status: SpanStatus = "ok"; + if (err !== undefined && err !== null) { + status = "error"; + const errMsg = err instanceof Error ? err.message : String(err); + spanAttrsMutable["error.message"] = errMsg; + if (err instanceof Error && err.stack !== undefined) { + spanAttrsMutable["error.stack"] = err.stack; + } + } + if (outcome?.attrs !== undefined) { + for (const [key, value] of Object.entries(outcome.attrs)) { + spanAttrsMutable[key] = value; + } + } - const hasAttrs = Object.keys(spanAttrsMutable).length > 0; - // Merge child-bound default attrs (state.attrs) the SAME way span-open - // does (buildSpanOpen). Without this, an attribute bound via - // `logger.child({ attrs })` appears on the span-open record but NOT the - // span-close record — so a query like `warm = true` can't find the - // closed span (with its usage/status). Open and close must agree. - const mergedCloseAttrs = mergeAttributes( - state.attrs, - hasAttrs ? spanAttrsMutable : undefined, - ); - const hasLinks = links.length > 0; - const base = { - kind: "span-close" as const, - spanId, - name, - timestamp: closedAt, - durationMs: closedAt - openedAt, - status, - extensionId: ctx.extensionId, - }; - const closeRecord: SpanCloseRecord = { - ...base, - ...(ctx.conversationId !== undefined ? { conversationId: ctx.conversationId } : {}), - ...(ctx.turnId !== undefined ? { turnId: ctx.turnId } : {}), - ...(mergedParent !== undefined ? { parentSpanId: mergedParent } : {}), - ...(mergedCloseAttrs !== undefined ? { attributes: mergedCloseAttrs } : {}), - ...(hasLinks ? { links: [...links] } : {}), - ...(outcome?.body !== undefined ? { body: outcome.body } : {}), - }; - try { - sink.emit(closeRecord); - } catch { - // Swallow — D7. - } - }, - }; + const hasAttrs = Object.keys(spanAttrsMutable).length > 0; + // Merge child-bound default attrs (state.attrs) the SAME way span-open + // does (buildSpanOpen). Without this, an attribute bound via + // `logger.child({ attrs })` appears on the span-open record but NOT the + // span-close record — so a query like `warm = true` can't find the + // closed span (with its usage/status). Open and close must agree. + const mergedCloseAttrs = mergeAttributes( + state.attrs, + hasAttrs ? spanAttrsMutable : undefined, + ); + const hasLinks = links.length > 0; + const base = { + kind: "span-close" as const, + spanId, + name, + timestamp: closedAt, + durationMs: closedAt - openedAt, + status, + extensionId: ctx.extensionId, + }; + const closeRecord: SpanCloseRecord = { + ...base, + ...(ctx.conversationId !== undefined ? { conversationId: ctx.conversationId } : {}), + ...(ctx.turnId !== undefined ? { turnId: ctx.turnId } : {}), + ...(mergedParent !== undefined ? { parentSpanId: mergedParent } : {}), + ...(mergedCloseAttrs !== undefined ? { attributes: mergedCloseAttrs } : {}), + ...(hasLinks ? { links: [...links] } : {}), + ...(outcome?.body !== undefined ? { body: outcome.body } : {}), + }; + try { + sink.emit(closeRecord); + } catch { + // Swallow — D7. + } + }, + }; - return span; - } + return span; + } - const logger: Logger = { - debug(msg: string, attrs?: Attributes): void { - emitLog(state, "debug", msg, attrs); - }, - info(msg: string, attrs?: Attributes): void { - emitLog(state, "info", msg, attrs); - }, - warn(msg: string, attrs?: Attributes): void { - emitLog(state, "warn", msg, attrs); - }, - error(msg: string, attrs?: ErrorAttributes): void { - const err = attrs?.err; - if (err !== undefined && err !== null) { - // Extract scalar attributes (everything except err). - const scalarAttrs: Record<string, string | number | boolean | null> = {}; - if (attrs !== undefined) { - for (const [key, value] of Object.entries(attrs)) { - if (key !== "err" && isScalarAttr(value)) { - scalarAttrs[key] = value; - } - } - } - const merged = mergeAttributes( - state.attrs, - Object.keys(scalarAttrs).length > 0 ? scalarAttrs : undefined, - ); - const errMsg = err instanceof Error ? err.message : String(err); - const errorAttrs: Record<string, string | number | boolean | null> = { - ...(merged ?? {}), - "error.message": errMsg, - }; - if (err instanceof Error && err.stack !== undefined) { - errorAttrs["error.stack"] = err.stack; - } - emitLog(state, "error", msg, errorAttrs as Attributes); - } else { - // No err field — filter to scalar attributes only. - const scalarAttrs: Record<string, string | number | boolean | null> = {}; - if (attrs !== undefined) { - for (const [key, value] of Object.entries(attrs)) { - if (isScalarAttr(value)) { - scalarAttrs[key] = value; - } - } - } - emitLog( - state, - "error", - msg, - Object.keys(scalarAttrs).length > 0 ? (scalarAttrs as Attributes) : undefined, - ); - } - }, - child(childCtx: Partial<LogContext> & { readonly attrs?: Attributes }): Logger { - const convId = childCtx.conversationId ?? ctx.conversationId; - const tId = childCtx.turnId ?? ctx.turnId; - const sId = childCtx.spanId ?? ctx.spanId; - const pId = childCtx.parentSpanId ?? ctx.parentSpanId; - const newCtx: LogContext = { - extensionId: ctx.extensionId, - ...(convId !== undefined ? { conversationId: convId } : {}), - ...(tId !== undefined ? { turnId: tId } : {}), - ...(sId !== undefined ? { spanId: sId } : {}), - ...(pId !== undefined ? { parentSpanId: pId } : {}), - }; - const newAttrs = mergeAttributes(state.attrs, childCtx.attrs); - return createLogger(newCtx, sink, deps, newAttrs); - }, - span(name: string, attrs?: Attributes, body?: string): Span { - return makeSpan(name, attrs, undefined, body); - }, - }; + const logger: Logger = { + debug(msg: string, attrs?: Attributes): void { + emitLog(state, "debug", msg, attrs); + }, + info(msg: string, attrs?: Attributes): void { + emitLog(state, "info", msg, attrs); + }, + warn(msg: string, attrs?: Attributes): void { + emitLog(state, "warn", msg, attrs); + }, + error(msg: string, attrs?: ErrorAttributes): void { + const err = attrs?.err; + if (err !== undefined && err !== null) { + // Extract scalar attributes (everything except err). + const scalarAttrs: Record<string, string | number | boolean | null> = {}; + if (attrs !== undefined) { + for (const [key, value] of Object.entries(attrs)) { + if (key !== "err" && isScalarAttr(value)) { + scalarAttrs[key] = value; + } + } + } + const merged = mergeAttributes( + state.attrs, + Object.keys(scalarAttrs).length > 0 ? scalarAttrs : undefined, + ); + const errMsg = err instanceof Error ? err.message : String(err); + const errorAttrs: Record<string, string | number | boolean | null> = { + ...(merged ?? {}), + "error.message": errMsg, + }; + if (err instanceof Error && err.stack !== undefined) { + errorAttrs["error.stack"] = err.stack; + } + emitLog(state, "error", msg, errorAttrs as Attributes); + } else { + // No err field — filter to scalar attributes only. + const scalarAttrs: Record<string, string | number | boolean | null> = {}; + if (attrs !== undefined) { + for (const [key, value] of Object.entries(attrs)) { + if (isScalarAttr(value)) { + scalarAttrs[key] = value; + } + } + } + emitLog( + state, + "error", + msg, + Object.keys(scalarAttrs).length > 0 ? (scalarAttrs as Attributes) : undefined, + ); + } + }, + child(childCtx: Partial<LogContext> & { readonly attrs?: Attributes }): Logger { + const convId = childCtx.conversationId ?? ctx.conversationId; + const tId = childCtx.turnId ?? ctx.turnId; + const sId = childCtx.spanId ?? ctx.spanId; + const pId = childCtx.parentSpanId ?? ctx.parentSpanId; + const newCtx: LogContext = { + extensionId: ctx.extensionId, + ...(convId !== undefined ? { conversationId: convId } : {}), + ...(tId !== undefined ? { turnId: tId } : {}), + ...(sId !== undefined ? { spanId: sId } : {}), + ...(pId !== undefined ? { parentSpanId: pId } : {}), + }; + const newAttrs = mergeAttributes(state.attrs, childCtx.attrs); + return createLogger(newCtx, sink, deps, newAttrs); + }, + span(name: string, attrs?: Attributes, body?: string): Span { + return makeSpan(name, attrs, undefined, body); + }, + }; - return logger; + return logger; } diff --git a/packages/kernel/src/runtime/dispatch.test.ts b/packages/kernel/src/runtime/dispatch.test.ts index afbfb39..dfe2ac7 100644 --- a/packages/kernel/src/runtime/dispatch.test.ts +++ b/packages/kernel/src/runtime/dispatch.test.ts @@ -11,51 +11,51 @@ import { runTurn } from "./run-turn.js"; // --------------------------------------------------------------------------- function delay(ms: number): Promise<void> { - return new Promise((resolve) => { - setTimeout(resolve, ms); - }); + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); } function createFakeProvider(script: ProviderEvent[][]): ProviderContract { - let callIndex = 0; - return { - id: "fake", - stream() { - const events = script[callIndex] ?? []; - callIndex++; - return (async function* () { - for (const event of events) { - yield event; - } - })(); - }, - }; + let callIndex = 0; + return { + id: "fake", + stream() { + const events = script[callIndex] ?? []; + callIndex++; + return (async function* () { + for (const event of events) { + yield event; + } + })(); + }, + }; } function createFakeTool( - name: string, - handler?: (input: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>, - opts?: { concurrencySafe?: boolean }, + name: string, + handler?: (input: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>, + opts?: { concurrencySafe?: boolean }, ): ToolContract { - return { - name, - description: `Fake tool: ${name}`, - parameters: { type: "object" }, - ...(opts?.concurrencySafe !== undefined ? { concurrencySafe: opts.concurrencySafe } : {}), - execute: handler ?? (async (input) => ({ content: `${name}: ${JSON.stringify(input)}` })), - }; + return { + name, + description: `Fake tool: ${name}`, + parameters: { type: "object" }, + ...(opts?.concurrencySafe !== undefined ? { concurrencySafe: opts.concurrencySafe } : {}), + execute: handler ?? (async (input) => ({ content: `${name}: ${JSON.stringify(input)}` })), + }; } function createCollectingEmit(): { events: AgentEvent[]; emit: (event: AgentEvent) => void } { - const events: AgentEvent[] = []; - return { events, emit: (event) => events.push(event) }; + const events: AgentEvent[] = []; + return { events, emit: (event) => events.push(event) }; } const noopEmit = () => {}; const userMessage: ChatMessage = { - role: "user", - chunks: [{ type: "text", text: "hello" }], + role: "user", + chunks: [{ type: "text", text: "hello" }], }; const ABORTED_RESULT: ToolResult = { content: "Aborted", isError: true }; @@ -65,158 +65,158 @@ const ABORTED_RESULT: ToolResult = { content: "Aborted", isError: true }; // =========================================================================== describe("executeToolCall", () => { - it("returns the tool's result when the tool resolves before abort", async () => { - const ac = new AbortController(); - const tool = createFakeTool("echo", async (input) => ({ - content: `echo: ${JSON.stringify(input)}`, - })); - - const result = await executeToolCall( - { id: "tc1", name: "echo", input: { x: 1 } }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - expect(result).toEqual({ content: 'echo: {"x":1}' }); - }); - - it("returns Aborted immediately when signal is already aborted at call time", async () => { - const ac = new AbortController(); - ac.abort(); - const tool = createFakeTool("echo", async () => ({ content: "should not run" })); - - const result = await executeToolCall( - { id: "tc1", name: "echo", input: {} }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - expect(result).toEqual(ABORTED_RESULT); - }); - - it("returns Aborted when a hanging tool is raced against an abort signal", async () => { - const ac = new AbortController(); - // A tool that never resolves and ignores ctx.signal - const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); - - const promise = executeToolCall( - { id: "tc1", name: "hang", input: {} }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - // Abort after the tool has started - await delay(10); - ac.abort(); - - const result = await promise; - expect(result).toEqual(ABORTED_RESULT); - }); - - it("returns the tool's own result when a signal-aware tool resolves on abort", async () => { - const ac = new AbortController(); - const toolResult: ToolResult = { content: "aborted by tool", isError: true }; - const tool = createFakeTool("aware", (_input, ctx) => { - return new Promise<ToolResult>((resolve) => { - ctx.signal.addEventListener("abort", () => resolve(toolResult), { once: true }); - }); - }); - - const promise = executeToolCall( - { id: "tc1", name: "aware", input: {} }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - await delay(10); - ac.abort(); - - const result = await promise; - // The tool listens to the signal and resolves its own result. Whether - // the tool's result or the race's "Aborted" wins is timing-dependent; - // both are isError and let the turn seal with finishReason "aborted". - expect(result.isError).toBe(true); - expect(result.content).toBe("aborted by tool"); - }); - - it("swallows a late rejection from the orphaned tool promise after abort wins the race", async () => { - const ac = new AbortController(); - let rejectTool: ((err: Error) => void) | undefined; - const tool = createFakeTool("late-reject", () => { - return new Promise<ToolResult>((_resolve, reject) => { - rejectTool = reject; - }); - }); - - const promise = executeToolCall( - { id: "tc1", name: "late-reject", input: {} }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - await delay(10); - ac.abort(); - - const result = await promise; - expect(result).toEqual(ABORTED_RESULT); - - // The tool rejects AFTER the race already resolved with "Aborted". - // The no-op catch must swallow this — no unhandled rejection. - rejectTool?.(new Error("late boom")); - // Give the microtask queue a tick to flush - await delay(5); - // If we reach here without an unhandledRejection crashing the process, - // the test passes. (vitest surfaces unhandled rejections as failures.) - }); - - it("returns an error result when the tool rejects before abort", async () => { - const ac = new AbortController(); - const tool = createFakeTool("boom", async () => { - throw new Error("tool exploded"); - }); - - const result = await executeToolCall( - { id: "tc1", name: "boom", input: {} }, - tool, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - expect(result.isError).toBe(true); - expect(result.content).toContain("tool exploded"); - }); - - it("returns Unknown tool when the tool is undefined", async () => { - const ac = new AbortController(); - const result = await executeToolCall( - { id: "tc1", name: "nonexistent", input: {} }, - undefined, - ac.signal, - noopEmit, - "conv-1", - "turn-1", - ); - - expect(result.isError).toBe(true); - expect(result.content).toContain("Unknown tool"); - }); + it("returns the tool's result when the tool resolves before abort", async () => { + const ac = new AbortController(); + const tool = createFakeTool("echo", async (input) => ({ + content: `echo: ${JSON.stringify(input)}`, + })); + + const result = await executeToolCall( + { id: "tc1", name: "echo", input: { x: 1 } }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + expect(result).toEqual({ content: 'echo: {"x":1}' }); + }); + + it("returns Aborted immediately when signal is already aborted at call time", async () => { + const ac = new AbortController(); + ac.abort(); + const tool = createFakeTool("echo", async () => ({ content: "should not run" })); + + const result = await executeToolCall( + { id: "tc1", name: "echo", input: {} }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + expect(result).toEqual(ABORTED_RESULT); + }); + + it("returns Aborted when a hanging tool is raced against an abort signal", async () => { + const ac = new AbortController(); + // A tool that never resolves and ignores ctx.signal + const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); + + const promise = executeToolCall( + { id: "tc1", name: "hang", input: {} }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + // Abort after the tool has started + await delay(10); + ac.abort(); + + const result = await promise; + expect(result).toEqual(ABORTED_RESULT); + }); + + it("returns the tool's own result when a signal-aware tool resolves on abort", async () => { + const ac = new AbortController(); + const toolResult: ToolResult = { content: "aborted by tool", isError: true }; + const tool = createFakeTool("aware", (_input, ctx) => { + return new Promise<ToolResult>((resolve) => { + ctx.signal.addEventListener("abort", () => resolve(toolResult), { once: true }); + }); + }); + + const promise = executeToolCall( + { id: "tc1", name: "aware", input: {} }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + await delay(10); + ac.abort(); + + const result = await promise; + // The tool listens to the signal and resolves its own result. Whether + // the tool's result or the race's "Aborted" wins is timing-dependent; + // both are isError and let the turn seal with finishReason "aborted". + expect(result.isError).toBe(true); + expect(result.content).toBe("aborted by tool"); + }); + + it("swallows a late rejection from the orphaned tool promise after abort wins the race", async () => { + const ac = new AbortController(); + let rejectTool: ((err: Error) => void) | undefined; + const tool = createFakeTool("late-reject", () => { + return new Promise<ToolResult>((_resolve, reject) => { + rejectTool = reject; + }); + }); + + const promise = executeToolCall( + { id: "tc1", name: "late-reject", input: {} }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + await delay(10); + ac.abort(); + + const result = await promise; + expect(result).toEqual(ABORTED_RESULT); + + // The tool rejects AFTER the race already resolved with "Aborted". + // The no-op catch must swallow this — no unhandled rejection. + rejectTool?.(new Error("late boom")); + // Give the microtask queue a tick to flush + await delay(5); + // If we reach here without an unhandledRejection crashing the process, + // the test passes. (vitest surfaces unhandled rejections as failures.) + }); + + it("returns an error result when the tool rejects before abort", async () => { + const ac = new AbortController(); + const tool = createFakeTool("boom", async () => { + throw new Error("tool exploded"); + }); + + const result = await executeToolCall( + { id: "tc1", name: "boom", input: {} }, + tool, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + expect(result.isError).toBe(true); + expect(result.content).toContain("tool exploded"); + }); + + it("returns Unknown tool when the tool is undefined", async () => { + const ac = new AbortController(); + const result = await executeToolCall( + { id: "tc1", name: "nonexistent", input: {} }, + undefined, + ac.signal, + noopEmit, + "conv-1", + "turn-1", + ); + + expect(result.isError).toBe(true); + expect(result.content).toContain("Unknown tool"); + }); }); // =========================================================================== @@ -224,312 +224,312 @@ describe("executeToolCall", () => { // =========================================================================== describe("runTurn abort-race durability", () => { - // Required test 1: A hanging tool (never resolves, ignores ctx.signal) - // must not keep runTurn from returning when the signal aborts. - it("hanging tool + abort → runTurn returns with finishReason aborted and emits done", async () => { - const ac = new AbortController(); - - // A tool whose execute returns a promise that NEVER resolves and - // ignores ctx.signal entirely. - const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); - - // Use eager: true so the tool starts BEFORE the signal aborts. - // This exercises the race (not the early signal.aborted return). - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "hang", - input: {}, - } as ProviderEvent; - ac.abort(); - await delay(10); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: true }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: ac.signal, - }); - - // runTurn returned (didn't hang) → the race worked. - expect(result.finishReason).toBe("aborted"); - - // A done event was emitted with reason "aborted". - const doneEvents = events.filter((e) => e.type === "done"); - expect(doneEvents).toHaveLength(1); - if (doneEvents[0]?.type === "done") { - expect(doneEvents[0].reason).toBe("aborted"); - } - }); - - // Required test 2: A signal-aware tool that resolves its own result on - // abort must also let runTurn return with finishReason "aborted". - it("signal-aware tool + abort → runTurn returns with finishReason aborted", async () => { - const ac = new AbortController(); - - const tool = createFakeTool("aware", (_input, ctx) => { - return new Promise<ToolResult>((resolve) => { - ctx.signal.addEventListener( - "abort", - () => resolve({ content: "aborted by tool", isError: true }), - { once: true }, - ); - }); - }); - - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "aware", - input: {}, - } as ProviderEvent; - ac.abort(); - await delay(10); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: true }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - - const doneEvents = events.filter((e) => e.type === "done"); - expect(doneEvents).toHaveLength(1); - if (doneEvents[0]?.type === "done") { - expect(doneEvents[0].reason).toBe("aborted"); - } - - // When the step is aborted, tool-result MESSAGES are omitted from the - // result (the tool-result EVENT is still emitted by executeStep for - // live UI updates, but the message is not persisted). This prevents - // orphaned `tool` messages from breaking the next turn's provider - // request. The assistant message has its tool-call chunks stripped. - const toolResultMsg = result.messages.find((m) => m.role === "tool"); - expect(toolResultMsg).toBeUndefined(); - - // The assistant message should NOT contain tool-call chunks. - const assistantMsg = result.messages.find( - (m) => m.role === "assistant" && m.chunks.some((c) => c.type === "tool-call"), - ); - expect(assistantMsg).toBeUndefined(); - }); - - // Required test 3 (regression guard): Without abort, a normal tool runs - // and its result is used; finishReason reflects the model. - it("no abort → tool runs normally and its result is used (regression)", async () => { - const tool = createFakeTool("normal", async (input) => ({ - content: `result: ${JSON.stringify(input)}`, - })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "normal", input: { x: 1 } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: true }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - // finishReason reflects the model (second step's "stop"). - expect(result.finishReason).toBe("stop"); - - // The tool's result was used (fed back, not "Aborted"). - const toolResultMsg = result.messages.find((m) => m.role === "tool"); - expect(toolResultMsg).toBeDefined(); - const trChunk = toolResultMsg?.chunks[0]; - expect(trChunk?.type).toBe("tool-result"); - if (trChunk?.type === "tool-result") { - expect(trChunk.content).toBe('result: {"x":1}'); - expect(trChunk.isError).toBe(false); - } - - // done event emitted with reason "stop". - const doneEvents = events.filter((e) => e.type === "done"); - expect(doneEvents).toHaveLength(1); - if (doneEvents[0]?.type === "done") { - expect(doneEvents[0].reason).toBe("stop"); - } - }); - - // Bonus: multiple hanging tools + abort → all resolve via the race, - // drain() doesn't deadlock, and runTurn returns. Tool-result messages - // are omitted from the result (aborted step); the turn seals cleanly. - it("multiple hanging tools + abort → drain completes and runTurn returns", async () => { - const ac = new AbortController(); - - // Two tools that never resolve and ignore ctx.signal. - const toolA = createFakeTool("hangA", () => new Promise<ToolResult>(() => {})); - const toolB = createFakeTool("hangB", () => new Promise<ToolResult>(() => {})); - - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "hangA", - input: {}, - } as ProviderEvent; - yield { - type: "tool-call", - toolCallId: "tc2", - toolName: "hangB", - input: {}, - } as ProviderEvent; - ac.abort(); - await delay(10); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [toolA, toolB], - dispatch: { maxConcurrent: 2, eager: true }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - - // tool-result EVENTS are still emitted by executeStep (for live UI), - // but tool-result MESSAGES are omitted from the result (not persisted). - const toolResultEvents = events.filter((e) => e.type === "tool-result"); - expect(toolResultEvents).toHaveLength(2); - for (const tr of toolResultEvents) { - if (tr.type === "tool-result") { - expect(tr.isError).toBe(true); - } - } - - // No tool messages in the result (they would orphan on the next turn). - const toolMessages = result.messages.filter((m) => m.role === "tool"); - expect(toolMessages).toHaveLength(0); - - // Assistant message has no tool-call chunks. - const assistantMsgs = result.messages.filter((m) => m.role === "assistant"); - for (const msg of assistantMsgs) { - expect(msg.chunks.some((c) => c.type === "tool-call")).toBe(false); - } - - const doneEvents = events.filter((e) => e.type === "done"); - expect(doneEvents).toHaveLength(1); - if (doneEvents[0]?.type === "done") { - expect(doneEvents[0].reason).toBe("aborted"); - } - }); - - // Critical regression: after an aborted tool call, the result messages - // must NOT contain orphaned tool messages. If they did, the next turn - // would send a `tool` role message to the provider without a preceding - // `assistant` message carrying `tool_calls` → 400 error. - it("aborted step produces no tool messages and no tool-call chunks in result", async () => { - const ac = new AbortController(); - - // Tool that hangs forever - const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); - - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { type: "text-delta", delta: "Let me run that for you" } as ProviderEvent; - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "hang", - input: {}, - } as ProviderEvent; - ac.abort(); - await delay(10); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - })(); - }, - }; - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: true }, - conversationId: "conv-1", - turnId: "turn-1", - emit: noopEmit, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - - // No tool messages in the result - const toolMessages = result.messages.filter((m) => m.role === "tool"); - expect(toolMessages).toHaveLength(0); - - // The assistant message should preserve text but NOT tool-call chunks - const assistantMsg = result.messages.find((m) => m.role === "assistant"); - expect(assistantMsg).toBeDefined(); - if (assistantMsg !== undefined) { - const hasToolCall = assistantMsg.chunks.some((c) => c.type === "tool-call"); - expect(hasToolCall).toBe(false); - // Text content should be preserved - const hasText = assistantMsg.chunks.some((c) => c.type === "text"); - expect(hasText).toBe(true); - } - - // Simulate what the next turn would see: the result messages are the - // conversation history (minus the user message). If we feed these to - // a simple converter, there should be NO `tool` role messages. - const toolRoleCount = result.messages.filter((m) => m.role === "tool").length; - expect(toolRoleCount).toBe(0); - }); + // Required test 1: A hanging tool (never resolves, ignores ctx.signal) + // must not keep runTurn from returning when the signal aborts. + it("hanging tool + abort → runTurn returns with finishReason aborted and emits done", async () => { + const ac = new AbortController(); + + // A tool whose execute returns a promise that NEVER resolves and + // ignores ctx.signal entirely. + const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); + + // Use eager: true so the tool starts BEFORE the signal aborts. + // This exercises the race (not the early signal.aborted return). + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "hang", + input: {}, + } as ProviderEvent; + ac.abort(); + await delay(10); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: true }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: ac.signal, + }); + + // runTurn returned (didn't hang) → the race worked. + expect(result.finishReason).toBe("aborted"); + + // A done event was emitted with reason "aborted". + const doneEvents = events.filter((e) => e.type === "done"); + expect(doneEvents).toHaveLength(1); + if (doneEvents[0]?.type === "done") { + expect(doneEvents[0].reason).toBe("aborted"); + } + }); + + // Required test 2: A signal-aware tool that resolves its own result on + // abort must also let runTurn return with finishReason "aborted". + it("signal-aware tool + abort → runTurn returns with finishReason aborted", async () => { + const ac = new AbortController(); + + const tool = createFakeTool("aware", (_input, ctx) => { + return new Promise<ToolResult>((resolve) => { + ctx.signal.addEventListener( + "abort", + () => resolve({ content: "aborted by tool", isError: true }), + { once: true }, + ); + }); + }); + + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "aware", + input: {}, + } as ProviderEvent; + ac.abort(); + await delay(10); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: true }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + + const doneEvents = events.filter((e) => e.type === "done"); + expect(doneEvents).toHaveLength(1); + if (doneEvents[0]?.type === "done") { + expect(doneEvents[0].reason).toBe("aborted"); + } + + // When the step is aborted, tool-result MESSAGES are omitted from the + // result (the tool-result EVENT is still emitted by executeStep for + // live UI updates, but the message is not persisted). This prevents + // orphaned `tool` messages from breaking the next turn's provider + // request. The assistant message has its tool-call chunks stripped. + const toolResultMsg = result.messages.find((m) => m.role === "tool"); + expect(toolResultMsg).toBeUndefined(); + + // The assistant message should NOT contain tool-call chunks. + const assistantMsg = result.messages.find( + (m) => m.role === "assistant" && m.chunks.some((c) => c.type === "tool-call"), + ); + expect(assistantMsg).toBeUndefined(); + }); + + // Required test 3 (regression guard): Without abort, a normal tool runs + // and its result is used; finishReason reflects the model. + it("no abort → tool runs normally and its result is used (regression)", async () => { + const tool = createFakeTool("normal", async (input) => ({ + content: `result: ${JSON.stringify(input)}`, + })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "normal", input: { x: 1 } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: true }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + // finishReason reflects the model (second step's "stop"). + expect(result.finishReason).toBe("stop"); + + // The tool's result was used (fed back, not "Aborted"). + const toolResultMsg = result.messages.find((m) => m.role === "tool"); + expect(toolResultMsg).toBeDefined(); + const trChunk = toolResultMsg?.chunks[0]; + expect(trChunk?.type).toBe("tool-result"); + if (trChunk?.type === "tool-result") { + expect(trChunk.content).toBe('result: {"x":1}'); + expect(trChunk.isError).toBe(false); + } + + // done event emitted with reason "stop". + const doneEvents = events.filter((e) => e.type === "done"); + expect(doneEvents).toHaveLength(1); + if (doneEvents[0]?.type === "done") { + expect(doneEvents[0].reason).toBe("stop"); + } + }); + + // Bonus: multiple hanging tools + abort → all resolve via the race, + // drain() doesn't deadlock, and runTurn returns. Tool-result messages + // are omitted from the result (aborted step); the turn seals cleanly. + it("multiple hanging tools + abort → drain completes and runTurn returns", async () => { + const ac = new AbortController(); + + // Two tools that never resolve and ignore ctx.signal. + const toolA = createFakeTool("hangA", () => new Promise<ToolResult>(() => {})); + const toolB = createFakeTool("hangB", () => new Promise<ToolResult>(() => {})); + + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "hangA", + input: {}, + } as ProviderEvent; + yield { + type: "tool-call", + toolCallId: "tc2", + toolName: "hangB", + input: {}, + } as ProviderEvent; + ac.abort(); + await delay(10); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [toolA, toolB], + dispatch: { maxConcurrent: 2, eager: true }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + + // tool-result EVENTS are still emitted by executeStep (for live UI), + // but tool-result MESSAGES are omitted from the result (not persisted). + const toolResultEvents = events.filter((e) => e.type === "tool-result"); + expect(toolResultEvents).toHaveLength(2); + for (const tr of toolResultEvents) { + if (tr.type === "tool-result") { + expect(tr.isError).toBe(true); + } + } + + // No tool messages in the result (they would orphan on the next turn). + const toolMessages = result.messages.filter((m) => m.role === "tool"); + expect(toolMessages).toHaveLength(0); + + // Assistant message has no tool-call chunks. + const assistantMsgs = result.messages.filter((m) => m.role === "assistant"); + for (const msg of assistantMsgs) { + expect(msg.chunks.some((c) => c.type === "tool-call")).toBe(false); + } + + const doneEvents = events.filter((e) => e.type === "done"); + expect(doneEvents).toHaveLength(1); + if (doneEvents[0]?.type === "done") { + expect(doneEvents[0].reason).toBe("aborted"); + } + }); + + // Critical regression: after an aborted tool call, the result messages + // must NOT contain orphaned tool messages. If they did, the next turn + // would send a `tool` role message to the provider without a preceding + // `assistant` message carrying `tool_calls` → 400 error. + it("aborted step produces no tool messages and no tool-call chunks in result", async () => { + const ac = new AbortController(); + + // Tool that hangs forever + const tool = createFakeTool("hang", () => new Promise<ToolResult>(() => {})); + + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { type: "text-delta", delta: "Let me run that for you" } as ProviderEvent; + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "hang", + input: {}, + } as ProviderEvent; + ac.abort(); + await delay(10); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + })(); + }, + }; + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: true }, + conversationId: "conv-1", + turnId: "turn-1", + emit: noopEmit, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + + // No tool messages in the result + const toolMessages = result.messages.filter((m) => m.role === "tool"); + expect(toolMessages).toHaveLength(0); + + // The assistant message should preserve text but NOT tool-call chunks + const assistantMsg = result.messages.find((m) => m.role === "assistant"); + expect(assistantMsg).toBeDefined(); + if (assistantMsg !== undefined) { + const hasToolCall = assistantMsg.chunks.some((c) => c.type === "tool-call"); + expect(hasToolCall).toBe(false); + // Text content should be preserved + const hasText = assistantMsg.chunks.some((c) => c.type === "text"); + expect(hasText).toBe(true); + } + + // Simulate what the next turn would see: the result messages are the + // conversation history (minus the user message). If we feed these to + // a simple converter, there should be NO `tool` role messages. + const toolRoleCount = result.messages.filter((m) => m.role === "tool").length; + expect(toolRoleCount).toBe(0); + }); }); diff --git a/packages/kernel/src/runtime/dispatch.ts b/packages/kernel/src/runtime/dispatch.ts index 01f0043..d09db3b 100644 --- a/packages/kernel/src/runtime/dispatch.ts +++ b/packages/kernel/src/runtime/dispatch.ts @@ -5,182 +5,182 @@ import type { ToolCall, ToolContract, ToolExecuteContext, ToolResult } from "../ import { toolOutputEvent } from "./events.js"; export interface StepDispatcher { - submit(call: ToolCall): void; - drain(): Promise<Map<string, ToolResult>>; + submit(call: ToolCall): void; + drain(): Promise<Map<string, ToolResult>>; } export async function executeToolCall( - call: ToolCall, - tool: ToolContract | undefined, - signal: AbortSignal, - emit: EventEmitter, - conversationId: string, - turnId: string, - toolSpan?: Span, - cwd?: string, - computerId?: string, + call: ToolCall, + tool: ToolContract | undefined, + signal: AbortSignal, + emit: EventEmitter, + conversationId: string, + turnId: string, + toolSpan?: Span, + cwd?: string, + computerId?: string, ): Promise<ToolResult> { - if (tool === undefined) { - return { content: `Unknown tool: ${call.name}`, isError: true }; - } - if (signal.aborted) { - return { content: "Aborted", isError: true }; - } - const ctx: ToolExecuteContext = { - toolCallId: call.id, - signal, - onOutput: (data, stream) => { - emit(toolOutputEvent(conversationId, turnId, call.id, data, stream)); - }, - log: toolSpan?.log ?? createNoopLogger(), - conversationId, - ...(cwd !== undefined ? { cwd } : {}), - ...(computerId !== undefined ? { computerId } : {}), - }; - // Race the tool's execute promise against the abort signal so a tool - // that hangs (ignores ctx.signal, or blocks on something the signal - // can't interrupt) can't keep runTurn from returning. When the signal - // fires we RESOLVE (not reject) with an "Aborted" result so the step - // completes normally and the existing signal.aborted → finishReason = - // "aborted" path seals the turn cleanly (done event), letting the - // caller's finally clear active state and the FE clear its spinner. - try { - const toolPromise = tool.execute(call.input, ctx); - const abortPromise = new Promise<ToolResult>((resolve) => { - signal.addEventListener("abort", () => resolve({ content: "Aborted", isError: true }), { - once: true, - }); - }); - // Swallow late rejections from the orphaned tool promise: the tool - // may reject after the race already resolved with "Aborted". - void toolPromise.catch(() => {}); - return await Promise.race([toolPromise, abortPromise]); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { content: `Tool execution error: ${message}`, isError: true }; - } + if (tool === undefined) { + return { content: `Unknown tool: ${call.name}`, isError: true }; + } + if (signal.aborted) { + return { content: "Aborted", isError: true }; + } + const ctx: ToolExecuteContext = { + toolCallId: call.id, + signal, + onOutput: (data, stream) => { + emit(toolOutputEvent(conversationId, turnId, call.id, data, stream)); + }, + log: toolSpan?.log ?? createNoopLogger(), + conversationId, + ...(cwd !== undefined ? { cwd } : {}), + ...(computerId !== undefined ? { computerId } : {}), + }; + // Race the tool's execute promise against the abort signal so a tool + // that hangs (ignores ctx.signal, or blocks on something the signal + // can't interrupt) can't keep runTurn from returning. When the signal + // fires we RESOLVE (not reject) with an "Aborted" result so the step + // completes normally and the existing signal.aborted → finishReason = + // "aborted" path seals the turn cleanly (done event), letting the + // caller's finally clear active state and the FE clear its spinner. + try { + const toolPromise = tool.execute(call.input, ctx); + const abortPromise = new Promise<ToolResult>((resolve) => { + signal.addEventListener("abort", () => resolve({ content: "Aborted", isError: true }), { + once: true, + }); + }); + // Swallow late rejections from the orphaned tool promise: the tool + // may reject after the race already resolved with "Aborted". + void toolPromise.catch(() => {}); + return await Promise.race([toolPromise, abortPromise]); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { content: `Tool execution error: ${message}`, isError: true }; + } } interface QueueEntry { - readonly call: ToolCall; - readonly tool: ToolContract | undefined; - readonly resolve: (result: ToolResult) => void; + readonly call: ToolCall; + readonly tool: ToolContract | undefined; + readonly resolve: (result: ToolResult) => void; } export function createStepDispatcher( - toolMap: Map<string, ToolContract>, - policy: ToolDispatchPolicy, - signal: AbortSignal, - emit: EventEmitter, - conversationId: string, - turnId: string, - toolSpans: Map<string, Span>, - cwd?: string, - computerId?: string, + toolMap: Map<string, ToolContract>, + policy: ToolDispatchPolicy, + signal: AbortSignal, + emit: EventEmitter, + conversationId: string, + turnId: string, + toolSpans: Map<string, Span>, + cwd?: string, + computerId?: string, ): StepDispatcher { - let activeCount = 0; - let unsafeRunning = false; - const queue: QueueEntry[] = []; - const allPromises: Array<{ id: string; promise: Promise<ToolResult> }> = []; - const dedupMap = new Map<string, Promise<ToolResult>>(); + let activeCount = 0; + let unsafeRunning = false; + const queue: QueueEntry[] = []; + const allPromises: Array<{ id: string; promise: Promise<ToolResult> }> = []; + const dedupMap = new Map<string, Promise<ToolResult>>(); - function canStart(isConcurrencySafe: boolean): boolean { - if (unsafeRunning) return false; - if (!isConcurrencySafe && activeCount > 0) return false; - if (policy.maxConcurrent === 0) return true; - return activeCount < policy.maxConcurrent; - } + function canStart(isConcurrencySafe: boolean): boolean { + if (unsafeRunning) return false; + if (!isConcurrencySafe && activeCount > 0) return false; + if (policy.maxConcurrent === 0) return true; + return activeCount < policy.maxConcurrent; + } - function tryStartNext(): void { - while (queue.length > 0) { - const next = queue[0]; - if (next === undefined) break; - const isSafe = next.tool?.concurrencySafe !== false; - if (!canStart(isSafe)) break; - queue.shift(); - activeCount++; - if (!isSafe) unsafeRunning = true; - void runAndResolve(next); - } - } + function tryStartNext(): void { + while (queue.length > 0) { + const next = queue[0]; + if (next === undefined) break; + const isSafe = next.tool?.concurrencySafe !== false; + if (!canStart(isSafe)) break; + queue.shift(); + activeCount++; + if (!isSafe) unsafeRunning = true; + void runAndResolve(next); + } + } - async function runAndResolve(entry: QueueEntry): Promise<void> { - const tcSpan = toolSpans.get(entry.call.id); - const result = await executeToolCall( - entry.call, - entry.tool, - signal, - emit, - conversationId, - turnId, - tcSpan, - cwd, - computerId, - ); - activeCount--; - if (entry.tool?.concurrencySafe === false) unsafeRunning = false; - entry.resolve(result); - tryStartNext(); - } + async function runAndResolve(entry: QueueEntry): Promise<void> { + const tcSpan = toolSpans.get(entry.call.id); + const result = await executeToolCall( + entry.call, + entry.tool, + signal, + emit, + conversationId, + turnId, + tcSpan, + cwd, + computerId, + ); + activeCount--; + if (entry.tool?.concurrencySafe === false) unsafeRunning = false; + entry.resolve(result); + tryStartNext(); + } - function submit(call: ToolCall): void { - const tool = toolMap.get(call.name); - const key = `${call.name}:${JSON.stringify(call.input)}`; + function submit(call: ToolCall): void { + const tool = toolMap.get(call.name); + const key = `${call.name}:${JSON.stringify(call.input)}`; - const existing = dedupMap.get(key); - if (existing !== undefined) { - allPromises.push({ id: call.id, promise: existing }); - return; - } + const existing = dedupMap.get(key); + if (existing !== undefined) { + allPromises.push({ id: call.id, promise: existing }); + return; + } - const promise = new Promise<ToolResult>((resolve) => { - queue.push({ call, tool, resolve }); - tryStartNext(); - }); + const promise = new Promise<ToolResult>((resolve) => { + queue.push({ call, tool, resolve }); + tryStartNext(); + }); - dedupMap.set(key, promise); - allPromises.push({ id: call.id, promise }); - } + dedupMap.set(key, promise); + allPromises.push({ id: call.id, promise }); + } - async function drain(): Promise<Map<string, ToolResult>> { - if (signal.aborted) { - for (const item of queue) { - item.resolve({ content: "Aborted", isError: true }); - } - queue.length = 0; - } + async function drain(): Promise<Map<string, ToolResult>> { + if (signal.aborted) { + for (const item of queue) { + item.resolve({ content: "Aborted", isError: true }); + } + queue.length = 0; + } - const results = new Map<string, ToolResult>(); - for (const entry of allPromises) { - const result = await entry.promise; - results.set(entry.id, result); - } - return results; - } + const results = new Map<string, ToolResult>(); + for (const entry of allPromises) { + const result = await entry.promise; + results.set(entry.id, result); + } + return results; + } - return { submit, drain }; + return { submit, drain }; } function createNoopLogger(): Logger { - return { - debug() {}, - info() {}, - warn() {}, - error() {}, - child() { - return createNoopLogger(); - }, - span() { - return { - id: "noop", - log: createNoopLogger(), - setAttributes() {}, - addLink() {}, - child() { - return this; - }, - end() {}, - }; - }, - }; + return { + debug() {}, + info() {}, + warn() {}, + error() {}, + child() { + return createNoopLogger(); + }, + span() { + return { + id: "noop", + log: createNoopLogger(), + setAttributes() {}, + addLink() {}, + child() { + return this; + }, + end() {}, + }; + }, + }; } diff --git a/packages/kernel/src/runtime/events.ts b/packages/kernel/src/runtime/events.ts index 5805e28..353b6ca 100644 --- a/packages/kernel/src/runtime/events.ts +++ b/packages/kernel/src/runtime/events.ts @@ -3,178 +3,178 @@ import type { AgentEvent } from "../contracts/events.js"; import type { Usage } from "../contracts/provider.js"; export function textDeltaEvent(conversationId: string, turnId: string, delta: string): AgentEvent { - return { type: "text-delta", conversationId, turnId, delta }; + return { type: "text-delta", conversationId, turnId, delta }; } export function reasoningDeltaEvent( - conversationId: string, - turnId: string, - delta: string, + conversationId: string, + turnId: string, + delta: string, ): AgentEvent { - return { type: "reasoning-delta", conversationId, turnId, delta }; + return { type: "reasoning-delta", conversationId, turnId, delta }; } export function toolCallEvent( - conversationId: string, - turnId: string, - stepId: StepId, - toolCallId: string, - toolName: string, - input: unknown, + conversationId: string, + turnId: string, + stepId: StepId, + toolCallId: string, + toolName: string, + input: unknown, ): AgentEvent { - return { type: "tool-call", conversationId, turnId, stepId, toolCallId, toolName, input }; + return { type: "tool-call", conversationId, turnId, stepId, toolCallId, toolName, input }; } export function toolResultEvent( - conversationId: string, - turnId: string, - stepId: StepId, - toolCallId: string, - toolName: string, - content: string, - isError: boolean, - durationMs?: number, + conversationId: string, + turnId: string, + stepId: StepId, + toolCallId: string, + toolName: string, + content: string, + isError: boolean, + durationMs?: number, ): AgentEvent { - const base = { - type: "tool-result" as const, - conversationId, - turnId, - stepId, - toolCallId, - toolName, - content, - isError, - }; - if (durationMs !== undefined) { - return { ...base, durationMs }; - } - return base; + const base = { + type: "tool-result" as const, + conversationId, + turnId, + stepId, + toolCallId, + toolName, + content, + isError, + }; + if (durationMs !== undefined) { + return { ...base, durationMs }; + } + return base; } export function toolOutputEvent( - conversationId: string, - turnId: string, - toolCallId: string, - data: string, - stream: "stdout" | "stderr", + conversationId: string, + turnId: string, + toolCallId: string, + data: string, + stream: "stdout" | "stderr", ): AgentEvent { - return { type: "tool-output", conversationId, turnId, toolCallId, data, stream }; + return { type: "tool-output", conversationId, turnId, toolCallId, data, stream }; } export function usageEvent( - conversationId: string, - turnId: string, - usage: Usage, - stepId?: StepId, + conversationId: string, + turnId: string, + usage: Usage, + stepId?: StepId, ): AgentEvent { - if (stepId !== undefined) { - return { type: "usage", conversationId, turnId, usage, stepId }; - } - return { type: "usage", conversationId, turnId, usage }; + if (stepId !== undefined) { + return { type: "usage", conversationId, turnId, usage, stepId }; + } + return { type: "usage", conversationId, turnId, usage }; } export function turnStartEvent(conversationId: string, turnId: string): AgentEvent { - return { type: "turn-start", conversationId, turnId }; + return { type: "turn-start", conversationId, turnId }; } export function stepCompleteEvent( - conversationId: string, - turnId: string, - stepId: StepId, - timing?: { ttftMs?: number; decodeMs?: number; genTotalMs?: number }, + conversationId: string, + turnId: string, + stepId: StepId, + timing?: { ttftMs?: number; decodeMs?: number; genTotalMs?: number }, ): AgentEvent { - if (timing !== undefined) { - if (timing.ttftMs !== undefined) { - if (timing.decodeMs !== undefined && timing.genTotalMs !== undefined) { - return { - type: "step-complete", - conversationId, - turnId, - stepId, - ttftMs: timing.ttftMs, - decodeMs: timing.decodeMs, - genTotalMs: timing.genTotalMs, - }; - } - if (timing.genTotalMs !== undefined) { - return { - type: "step-complete", - conversationId, - turnId, - stepId, - ttftMs: timing.ttftMs, - genTotalMs: timing.genTotalMs, - }; - } - return { type: "step-complete", conversationId, turnId, stepId, ttftMs: timing.ttftMs }; - } - if (timing.genTotalMs !== undefined) { - return { - type: "step-complete", - conversationId, - turnId, - stepId, - genTotalMs: timing.genTotalMs, - }; - } - } - return { type: "step-complete", conversationId, turnId, stepId }; + if (timing !== undefined) { + if (timing.ttftMs !== undefined) { + if (timing.decodeMs !== undefined && timing.genTotalMs !== undefined) { + return { + type: "step-complete", + conversationId, + turnId, + stepId, + ttftMs: timing.ttftMs, + decodeMs: timing.decodeMs, + genTotalMs: timing.genTotalMs, + }; + } + if (timing.genTotalMs !== undefined) { + return { + type: "step-complete", + conversationId, + turnId, + stepId, + ttftMs: timing.ttftMs, + genTotalMs: timing.genTotalMs, + }; + } + return { type: "step-complete", conversationId, turnId, stepId, ttftMs: timing.ttftMs }; + } + if (timing.genTotalMs !== undefined) { + return { + type: "step-complete", + conversationId, + turnId, + stepId, + genTotalMs: timing.genTotalMs, + }; + } + } + return { type: "step-complete", conversationId, turnId, stepId }; } export function doneEvent( - conversationId: string, - turnId: string, - reason: string, - durationMs?: number, - usage?: Usage, - contextSize?: number, + conversationId: string, + turnId: string, + reason: string, + durationMs?: number, + usage?: Usage, + contextSize?: number, ): AgentEvent { - if (durationMs !== undefined && usage !== undefined && contextSize !== undefined) { - return { type: "done", conversationId, turnId, reason, durationMs, usage, contextSize }; - } - if (durationMs !== undefined && usage !== undefined) { - return { type: "done", conversationId, turnId, reason, durationMs, usage }; - } - if (durationMs !== undefined && contextSize !== undefined) { - return { type: "done", conversationId, turnId, reason, durationMs, contextSize }; - } - if (usage !== undefined && contextSize !== undefined) { - return { type: "done", conversationId, turnId, reason, usage, contextSize }; - } - if (durationMs !== undefined) { - return { type: "done", conversationId, turnId, reason, durationMs }; - } - if (usage !== undefined) { - return { type: "done", conversationId, turnId, reason, usage }; - } - if (contextSize !== undefined) { - return { type: "done", conversationId, turnId, reason, contextSize }; - } - return { type: "done", conversationId, turnId, reason }; + if (durationMs !== undefined && usage !== undefined && contextSize !== undefined) { + return { type: "done", conversationId, turnId, reason, durationMs, usage, contextSize }; + } + if (durationMs !== undefined && usage !== undefined) { + return { type: "done", conversationId, turnId, reason, durationMs, usage }; + } + if (durationMs !== undefined && contextSize !== undefined) { + return { type: "done", conversationId, turnId, reason, durationMs, contextSize }; + } + if (usage !== undefined && contextSize !== undefined) { + return { type: "done", conversationId, turnId, reason, usage, contextSize }; + } + if (durationMs !== undefined) { + return { type: "done", conversationId, turnId, reason, durationMs }; + } + if (usage !== undefined) { + return { type: "done", conversationId, turnId, reason, usage }; + } + if (contextSize !== undefined) { + return { type: "done", conversationId, turnId, reason, contextSize }; + } + return { type: "done", conversationId, turnId, reason }; } export function errorEvent( - conversationId: string, - turnId: string, - message: string, - code?: string, + conversationId: string, + turnId: string, + message: string, + code?: string, ): AgentEvent { - if (code !== undefined) { - return { type: "error", conversationId, turnId, message, code }; - } - return { type: "error", conversationId, turnId, message }; + if (code !== undefined) { + return { type: "error", conversationId, turnId, message, code }; + } + return { type: "error", conversationId, turnId, message }; } export function providerRetryEvent( - conversationId: string, - turnId: string, - attempt: number, - delayMs: number, - message: string, - code?: string, + conversationId: string, + turnId: string, + attempt: number, + delayMs: number, + message: string, + code?: string, ): AgentEvent { - if (code !== undefined) { - return { type: "provider-retry", conversationId, turnId, attempt, delayMs, message, code }; - } - return { type: "provider-retry", conversationId, turnId, attempt, delayMs, message }; + if (code !== undefined) { + return { type: "provider-retry", conversationId, turnId, attempt, delayMs, message, code }; + } + return { type: "provider-retry", conversationId, turnId, attempt, delayMs, message }; } diff --git a/packages/kernel/src/runtime/index.ts b/packages/kernel/src/runtime/index.ts index e0dd656..ecb802e 100644 --- a/packages/kernel/src/runtime/index.ts +++ b/packages/kernel/src/runtime/index.ts @@ -1,13 +1,13 @@ export type { StepDispatcher } from "./dispatch.js"; export { createStepDispatcher, executeToolCall } from "./dispatch.js"; export { - errorEvent, - providerRetryEvent, - reasoningDeltaEvent, - textDeltaEvent, - toolCallEvent, - toolOutputEvent, - toolResultEvent, - usageEvent, + errorEvent, + providerRetryEvent, + reasoningDeltaEvent, + textDeltaEvent, + toolCallEvent, + toolOutputEvent, + toolResultEvent, + usageEvent, } from "./events.js"; export { MAX_STEPS, runTurn } from "./run-turn.js"; diff --git a/packages/kernel/src/runtime/run-turn.test.ts b/packages/kernel/src/runtime/run-turn.test.ts index a9fc3d9..8d20975 100644 --- a/packages/kernel/src/runtime/run-turn.test.ts +++ b/packages/kernel/src/runtime/run-turn.test.ts @@ -5,3420 +5,3432 @@ import type { LogDeps, Logger, LogRecord, LogSink } from "../contracts/logging.j import type { ProviderContract, ProviderEvent } from "../contracts/provider.js"; import type { ToolContract, ToolExecuteContext, ToolResult } from "../contracts/tool.js"; import { createLogger } from "../logging/logger.js"; -import { MAX_STEPS, runTurn } from "./run-turn.js"; +import { runTurn } from "./run-turn.js"; function delay(ms: number): Promise<void> { - return new Promise((resolve) => { - setTimeout(resolve, ms); - }); + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); } function createFakeProvider(script: ProviderEvent[][]): ProviderContract { - let callIndex = 0; - return { - id: "fake", - stream(_messages, _tools) { - const events = script[callIndex] ?? []; - callIndex++; - return (async function* () { - for (const event of events) { - yield event; - } - })(); - }, - }; + let callIndex = 0; + return { + id: "fake", + stream(_messages, _tools) { + const events = script[callIndex] ?? []; + callIndex++; + return (async function* () { + for (const event of events) { + yield event; + } + })(); + }, + }; } function createCapturingProvider(script: ProviderEvent[][]): { - provider: ProviderContract; - capturedMessages: ChatMessage[][]; + provider: ProviderContract; + capturedMessages: ChatMessage[][]; } { - const capturedMessages: ChatMessage[][] = []; - let callIndex = 0; - const provider: ProviderContract = { - id: "fake", - stream(messages, _tools) { - capturedMessages.push([...messages]); - const events = script[callIndex] ?? []; - callIndex++; - return (async function* () { - for (const event of events) { - yield event; - } - })(); - }, - }; - return { provider, capturedMessages }; + const capturedMessages: ChatMessage[][] = []; + let callIndex = 0; + const provider: ProviderContract = { + id: "fake", + stream(messages, _tools) { + capturedMessages.push([...messages]); + const events = script[callIndex] ?? []; + callIndex++; + return (async function* () { + for (const event of events) { + yield event; + } + })(); + }, + }; + return { provider, capturedMessages }; } function createFakeTool( - name: string, - handler?: (input: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>, - opts?: { concurrencySafe?: boolean }, + name: string, + handler?: (input: unknown, ctx: ToolExecuteContext) => Promise<ToolResult>, + opts?: { concurrencySafe?: boolean }, ): ToolContract { - return { - name, - description: `Fake tool: ${name}`, - parameters: { type: "object" }, - ...(opts?.concurrencySafe !== undefined ? { concurrencySafe: opts.concurrencySafe } : {}), - execute: handler ?? (async (input) => ({ content: `${name}: ${JSON.stringify(input)}` })), - }; + return { + name, + description: `Fake tool: ${name}`, + parameters: { type: "object" }, + ...(opts?.concurrencySafe !== undefined ? { concurrencySafe: opts.concurrencySafe } : {}), + execute: handler ?? (async (input) => ({ content: `${name}: ${JSON.stringify(input)}` })), + }; } function createCollectingEmit(): { events: AgentEvent[]; emit: (event: AgentEvent) => void } { - const events: AgentEvent[] = []; - return { events, emit: (event) => events.push(event) }; + const events: AgentEvent[] = []; + return { events, emit: (event) => events.push(event) }; } const userMessage: ChatMessage = { - role: "user", - chunks: [{ type: "text", text: "hello" }], + role: "user", + chunks: [{ type: "text", text: "hello" }], }; describe("runTurn", () => { - it("emits events with the conversationId and turnId from input", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-42", - turnId: "turn-99", - emit, - }); - - expect(events.length).toBeGreaterThan(0); - for (const event of events) { - expect(event.conversationId).toBe("conv-42"); - if (event.type !== "status") { - expect(event.turnId).toBe("turn-99"); - } - } - }); - - it("text-only turn emits correct events and returns correct result", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "text-delta", delta: " world" }, - { type: "reasoning-delta", delta: "thinking..." }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - expect(result.finishReason).toBe("stop"); - expect(result.messages).toHaveLength(1); - expect(result.messages[0]?.role).toBe("assistant"); - - const chunks = result.messages[0]?.chunks ?? []; - expect(chunks).toHaveLength(2); - expect(chunks[0]).toEqual({ type: "text", text: "Hello world" }); - expect(chunks[1]).toEqual({ type: "thinking", text: "thinking..." }); - - expect(result.usage).toEqual({ inputTokens: 10, outputTokens: 5 }); - - const eventTypes = events.map((e) => e.type); - expect(eventTypes).toEqual([ - "turn-start", - "text-delta", - "text-delta", - "reasoning-delta", - "usage", - "step-complete", - "done", - ]); - }); - - it("turn with one tool call executes tool, feeds result back, then finishes", async () => { - const tool = createFakeTool("greet", async (input) => ({ - content: `Hello, ${(input as { name: string }).name}!`, - })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "greet", input: { name: "World" } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "Done." }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - expect(result.finishReason).toBe("stop"); - expect(result.messages).toHaveLength(3); - expect(result.messages[0]?.role).toBe("assistant"); - expect(result.messages[1]?.role).toBe("tool"); - expect(result.messages[2]?.role).toBe("assistant"); - - const toolResultChunk = result.messages[1]?.chunks[0]; - expect(toolResultChunk?.type).toBe("tool-result"); - if (toolResultChunk?.type === "tool-result") { - expect(toolResultChunk.content).toBe("Hello, World!"); - expect(toolResultChunk.toolCallId).toBe("tc1"); - expect(toolResultChunk.isError).toBe(false); - } - - const eventTypes = events.map((e) => e.type); - expect(eventTypes).toContain("tool-call"); - expect(eventTypes).toContain("tool-result"); - expect(eventTypes).toContain("text-delta"); - }); - - it("passes updated messages to subsequent provider calls", async () => { - const capturedMessages: ChatMessage[][] = []; - let callIndex = 0; - const script: ProviderEvent[][] = [ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]; - - const provider: ProviderContract = { - id: "fake", - stream(messages, _tools) { - capturedMessages.push([...messages]); - const events = script[callIndex] ?? []; - callIndex++; - return (async function* () { - for (const event of events) yield event; - })(); - }, - }; - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - expect(capturedMessages).toHaveLength(2); - expect(capturedMessages[0] ?? []).toHaveLength(1); - expect(capturedMessages[0]?.[0]?.role).toBe("user"); - - expect(capturedMessages[1] ?? []).toHaveLength(3); - expect(capturedMessages[1]?.[0]?.role).toBe("user"); - expect(capturedMessages[1]?.[1]?.role).toBe("assistant"); - expect(capturedMessages[1]?.[2]?.role).toBe("tool"); - }); - - it("maxConcurrent: 1 runs tools sequentially", async () => { - const log: string[] = []; - - const toolA = createFakeTool("a", async () => { - log.push("a:start"); - await delay(10); - log.push("a:end"); - return { content: "a" }; - }); - - const toolB = createFakeTool("b", async () => { - log.push("b:start"); - await delay(10); - log.push("b:end"); - return { content: "b" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, - { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [toolA, toolB], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const aEndIdx = log.indexOf("a:end"); - const bStartIdx = log.indexOf("b:start"); - expect(aEndIdx).toBeLessThan(bStartIdx); - }); - - it("maxConcurrent: 2 runs tools in parallel", async () => { - const log: string[] = []; - - const toolA = createFakeTool("a", async () => { - log.push("a:start"); - await delay(20); - log.push("a:end"); - return { content: "a" }; - }); - - const toolB = createFakeTool("b", async () => { - log.push("b:start"); - await delay(20); - log.push("b:end"); - return { content: "b" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, - { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [toolA, toolB], - dispatch: { maxConcurrent: 2, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const aStartIdx = log.indexOf("a:start"); - const bStartIdx = log.indexOf("b:start"); - const aEndIdx = log.indexOf("a:end"); - const bEndIdx = log.indexOf("b:end"); - - expect(aStartIdx).toBeLessThan(aEndIdx); - expect(bStartIdx).toBeLessThan(bEndIdx); - expect(aStartIdx).toBeLessThan(bEndIdx); - expect(bStartIdx).toBeLessThan(aEndIdx); - }); - - it("maxConcurrent: 0 runs all tools in parallel (unlimited)", async () => { - const log: string[] = []; - - const toolA = createFakeTool("a", async () => { - log.push("a:start"); - await delay(20); - log.push("a:end"); - return { content: "a" }; - }); - - const toolB = createFakeTool("b", async () => { - log.push("b:start"); - await delay(20); - log.push("b:end"); - return { content: "b" }; - }); - - const toolC = createFakeTool("c", async () => { - log.push("c:start"); - await delay(20); - log.push("c:end"); - return { content: "c" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, - { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, - { type: "tool-call", toolCallId: "tc3", toolName: "c", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [toolA, toolB, toolC], - dispatch: { maxConcurrent: 0, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const aStartIdx = log.indexOf("a:start"); - const bStartIdx = log.indexOf("b:start"); - const cStartIdx = log.indexOf("c:start"); - const aEndIdx = log.indexOf("a:end"); - const bEndIdx = log.indexOf("b:end"); - const cEndIdx = log.indexOf("c:end"); - - expect(aStartIdx).toBeLessThan(aEndIdx); - expect(bStartIdx).toBeLessThan(bEndIdx); - expect(cStartIdx).toBeLessThan(cEndIdx); - expect(aStartIdx).toBeLessThan(bEndIdx); - expect(bStartIdx).toBeLessThan(aEndIdx); - expect(cStartIdx).toBeLessThan(aEndIdx); - }); - - it("eager: true launches tool before step finish", async () => { - const log: string[] = []; - - const tool = createFakeTool("test", async () => { - log.push("tool:start"); - await delay(5); - log.push("tool:end"); - return { content: "done" }; - }); - - let callCount = 0; - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - const idx = callCount++; - if (idx === 0) { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "test", - input: {}, - } as ProviderEvent; - log.push("provider:after-tool-call"); - await delay(50); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - log.push("provider:finish"); - })(); - } - return (async function* () { - yield { type: "text-delta", delta: "done" } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: true }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const toolStartIdx = log.indexOf("tool:start"); - const finishIdx = log.indexOf("provider:finish"); - expect(toolStartIdx).toBeLessThan(finishIdx); - }); - - it("eager: false does not launch tool before step finish", async () => { - const log: string[] = []; - - const tool = createFakeTool("test", async () => { - log.push("tool:start"); - await delay(5); - log.push("tool:end"); - return { content: "done" }; - }); - - let callCount = 0; - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - const idx = callCount++; - if (idx === 0) { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "test", - input: {}, - } as ProviderEvent; - log.push("provider:after-tool-call"); - await delay(50); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - log.push("provider:finish"); - })(); - } - return (async function* () { - yield { type: "text-delta", delta: "done" } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const toolStartIdx = log.indexOf("tool:start"); - const finishIdx = log.indexOf("provider:finish"); - expect(toolStartIdx).toBeGreaterThan(finishIdx); - }); - - it("abort mid-turn synthesizes error results for unresolved tool calls", async () => { - const ac = new AbortController(); - - const tool = createFakeTool("slow", async (_input, ctx) => { - await delay(200); - if (ctx.signal.aborted) return { content: "Aborted", isError: true }; - return { content: "done" }; - }); - - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - return (async function* () { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "slow", - input: {}, - } as ProviderEvent; - yield { - type: "tool-call", - toolCallId: "tc2", - toolName: "slow", - input: { x: 1 }, - } as ProviderEvent; - ac.abort(); - await delay(10); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - - const toolResults = events.filter((e) => e.type === "tool-result"); - for (const tr of toolResults) { - if (tr.type === "tool-result") { - expect(tr.isError).toBe(true); - } - } - }); - - it("abort before any step returns aborted immediately", async () => { - const ac = new AbortController(); - ac.abort(); - - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "should not appear" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - expect(result.messages).toHaveLength(0); - }); - - it("de-duplicates identical tool calls in a batch", async () => { - let execCount = 0; - - const tool = createFakeTool("dedup", async (_input) => { - execCount++; - return { content: `result-${execCount}` }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "dedup", input: { x: 1 } }, - { type: "tool-call", toolCallId: "tc2", toolName: "dedup", input: { x: 1 } }, - { type: "tool-call", toolCallId: "tc3", toolName: "dedup", input: { x: 2 } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - expect(execCount).toBe(2); - - const toolResults = events.filter((e) => e.type === "tool-result"); - expect(toolResults).toHaveLength(3); - - const tc1Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc1"); - const tc2Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc2"); - const tc3Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc3"); - - expect(tc1Result).toBeDefined(); - expect(tc2Result).toBeDefined(); - expect(tc3Result).toBeDefined(); - - if (tc1Result?.type === "tool-result" && tc2Result?.type === "tool-result") { - expect(tc1Result.content).toBe(tc2Result.content); - expect(tc1Result.content).toBe("result-1"); - } - if (tc3Result?.type === "tool-result") { - expect(tc3Result.content).toBe("result-2"); - } - - expect(result.finishReason).toBe("stop"); - }); - - it("serializes non-concurrency-safe tools even with maxConcurrent > 1", async () => { - const log: string[] = []; - - const unsafeTool: ToolContract = { - name: "unsafe", - description: "Unsafe tool", - parameters: { type: "object" }, - concurrencySafe: false, - execute: async () => { - log.push("unsafe:start"); - await delay(10); - log.push("unsafe:end"); - return { content: "done" }; - }, - }; - - const safeTool: ToolContract = { - name: "safe", - description: "Safe tool", - parameters: { type: "object" }, - execute: async () => { - log.push("safe:start"); - await delay(10); - log.push("safe:end"); - return { content: "done" }; - }, - }; - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "unsafe", input: {} }, - { type: "tool-call", toolCallId: "tc2", toolName: "safe", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [unsafeTool, safeTool], - dispatch: { maxConcurrent: 5, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - const unsafeEndIdx = log.indexOf("unsafe:end"); - const safeStartIdx = log.indexOf("safe:start"); - expect(unsafeEndIdx).toBeLessThan(safeStartIdx); - }); - - it("handles unknown tool name gracefully", async () => { - const provider = createFakeProvider([ - [ - { - type: "tool-call", - toolCallId: "tc1", - toolName: "nonexistent", - input: {}, - }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - const toolResults = events.filter((e) => e.type === "tool-result"); - expect(toolResults).toHaveLength(1); - if (toolResults[0]?.type === "tool-result") { - expect(toolResults[0]?.isError).toBe(true); - expect(toolResults[0]?.content).toContain("Unknown tool"); - } - - expect(result.finishReason).toBe("stop"); - }); - - it("handles provider error gracefully", async () => { - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { type: "text-delta", delta: "partial" } as ProviderEvent; - throw new Error("provider crashed"); - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - expect(result.finishReason).toBe("error"); - - const errorEvents = events.filter((e) => e.type === "error"); - expect(errorEvents).toHaveLength(1); - if (errorEvents[0]?.type === "error") { - expect(errorEvents[0]?.message).toContain("provider crashed"); - } - }); - - it("forwards cwd from RunTurnInput to ToolExecuteContext", async () => { - let capturedCwd: string | undefined = "SENTINEL_NOT_SET"; - - const tool = createFakeTool("cwdcheck", async (_input, ctx) => { - capturedCwd = ctx.cwd; - return { content: "ok" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "cwdcheck", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - cwd: "/some/dir", - }); - - expect(capturedCwd).toBe("/some/dir"); - }); - - it("forwards undefined cwd when RunTurnInput has no cwd", async () => { - let capturedCwd: string | undefined = "SENTINEL_NOT_SET"; - - const tool = createFakeTool("cwdcheck", async (_input, ctx) => { - capturedCwd = ctx.cwd; - return { content: "ok" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "cwdcheck", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - expect(capturedCwd).toBeUndefined(); - }); - - it("forwards computerId from RunTurnInput to ToolExecuteContext", async () => { - let capturedComputerId: string | undefined = "SENTINEL_NOT_SET"; - - const tool = createFakeTool("computercheck", async (_input, ctx) => { - capturedComputerId = ctx.computerId; - return { content: "ok" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "computercheck", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - computerId: "ssh-host-alias", - }); - - expect(capturedComputerId).toBe("ssh-host-alias"); - }); - - it("forwards undefined computerId when RunTurnInput has no computerId", async () => { - let capturedComputerId: string | undefined = "SENTINEL_NOT_SET"; - - const tool = createFakeTool("computercheck", async (_input, ctx) => { - capturedComputerId = ctx.computerId; - return { content: "ok" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "computercheck", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - expect(capturedComputerId).toBeUndefined(); - }); - - it("aggregates usage across multiple steps", async () => { - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "usage", usage: { inputTokens: 20, outputTokens: 10 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit: () => {}, - }); - - expect(result.usage).toEqual({ inputTokens: 30, outputTokens: 15 }); - }); - - it("emits tool-output events from tool ctx.onOutput", async () => { - const tool: ToolContract = { - name: "streaming", - description: "A tool that streams output", - parameters: { type: "object" }, - execute: async (_input, ctx) => { - ctx.onOutput("line 1\n", "stdout"); - ctx.onOutput("err 1\n", "stderr"); - return { content: "done" }; - }, - }; - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "streaming", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "tab-test", - turnId: "turn-test", - emit, - }); - - const outputs = events.filter((e) => e.type === "tool-output"); - expect(outputs).toHaveLength(2); - if (outputs[0]?.type === "tool-output") { - expect(outputs[0]?.data).toBe("line 1\n"); - expect(outputs[0]?.stream).toBe("stdout"); - expect(outputs[0]?.toolCallId).toBe("tc1"); - } - if (outputs[1]?.type === "tool-output") { - expect(outputs[1]?.data).toBe("err 1\n"); - expect(outputs[1]?.stream).toBe("stderr"); - } - }); - - function createTestLogger(): { - logger: Logger; - sink: LogSink & { records: LogRecord[] }; - deps: LogDeps; - } { - let idCounter = 0; - const deps: LogDeps = { - now: () => 1000 + idCounter * 100, - newId: () => `span-${++idCounter}`, - }; - const records: LogRecord[] = []; - const sink: LogSink & { records: LogRecord[] } = { - records, - emit: (record) => records.push(record), - }; - const logger = createLogger({ extensionId: "test" }, sink, deps); - return { logger, sink, deps }; - } - - describe("span instrumentation", () => { - it("emits turn + step span open/close in order", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const spanOpens = sink.records.filter((r) => r.kind === "span-open"); - const spanCloses = sink.records.filter((r) => r.kind === "span-close"); - - expect(spanOpens.length).toBeGreaterThanOrEqual(2); // turn + step - expect(spanCloses.length).toBeGreaterThanOrEqual(2); - - const turnOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "turn"); - const stepOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "step"); - expect(turnOpen).toBeDefined(); - expect(stepOpen).toBeDefined(); - - if (turnOpen?.kind === "span-open") { - expect(turnOpen.extensionId).toBe("test"); - expect(turnOpen.attributes?.conversationId).toBe("conv-1"); - expect(turnOpen.attributes?.turnId).toBe("turn-1"); - } - - const turnClose = spanCloses.find((r) => r.kind === "span-close" && r.name === "turn"); - expect(turnClose).toBeDefined(); - if (turnClose?.kind === "span-close") { - expect(turnClose.status).toBe("ok"); - expect(turnClose.durationMs).toBeGreaterThanOrEqual(0); - } - }); - - it("emits tool-call spans for dispatched tools", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const toolCallSpans = sink.records.filter( - (r) => r.kind === "span-open" && r.name === "tool-call", - ); - expect(toolCallSpans).toHaveLength(1); - if (toolCallSpans[0]?.kind === "span-open") { - expect(toolCallSpans[0].attributes?.name).toBe("echo"); - expect(toolCallSpans[0].attributes?.toolCallId).toBe("tc1"); - } - - const toolCallCloses = sink.records.filter( - (r) => r.kind === "span-close" && r.name === "tool-call", - ); - expect(toolCallCloses).toHaveLength(1); - if (toolCallCloses[0]?.kind === "span-close") { - expect(toolCallCloses[0].status).toBe("ok"); - } - }); - - it("tools receive ctx.log (correlated logger)", async () => { - let capturedLog: Logger | undefined; - - const tool = createFakeTool("logtest", async (_input, ctx) => { - capturedLog = ctx.log; - ctx.log.info("tool ran", { key: "value" }); - return { content: "ok" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "logtest", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - expect(capturedLog).toBeDefined(); - - const toolLogs = sink.records.filter( - (r) => r.kind === "log" && r.kind === "log" && (r as { msg: string }).msg === "tool ran", - ); - expect(toolLogs).toHaveLength(1); - if (toolLogs[0]?.kind === "log") { - expect(toolLogs[0].attributes?.key).toBe("value"); - expect(toolLogs[0].extensionId).toBe("test"); - } - }); - - it("an aborted turn still closes its turn span", async () => { - const ac = new AbortController(); - ac.abort(); - - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "should not appear" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - signal: ac.signal, - logger, - }); - - const turnCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "turn"); - expect(turnCloses).toHaveLength(1); - if (turnCloses[0]?.kind === "span-close") { - expect(turnCloses[0].attributes?.finishReason).toBe("aborted"); - } - }); - - it("a provider error closes the step span with error status", async () => { - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { type: "text-delta", delta: "partial" } as ProviderEvent; - throw new Error("provider exploded"); - })(); - }, - }; - - const { logger, sink } = createTestLogger(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - expect(result.finishReason).toBe("error"); - - const stepCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "step"); - expect(stepCloses).toHaveLength(1); - if (stepCloses[0]?.kind === "span-close") { - expect(stepCloses[0].status).toBe("error"); - expect(stepCloses[0].attributes?.["error.message"]).toContain("provider exploded"); - } - }); - - it("emits a prompt span with verbatim body and small scalar attributes", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const promptOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "prompt"); - expect(promptOpens).toHaveLength(1); - - const promptOpen = promptOpens[0]; - if (promptOpen?.kind === "span-open") { - expect(promptOpen.body).toBeDefined(); - const parsed = JSON.parse(promptOpen.body as string); - expect(parsed.messages).toEqual([userMessage]); - expect(parsed.tools).toHaveLength(1); - expect(parsed.tools[0].name).toBe("echo"); - - expect(promptOpen.attributes?.messageCount).toBe(1); - expect(promptOpen.attributes?.toolCount).toBe(1); - } - - const promptCloses = sink.records.filter( - (r) => r.kind === "span-close" && r.name === "prompt", - ); - expect(promptCloses).toHaveLength(1); - - const logRecords = sink.records.filter( - (r) => - r.kind === "log" && r.kind === "log" && (r as { msg: string }).msg === "prompt:before", - ); - expect(logRecords).toHaveLength(0); - }); - - it("emits ttft and decode spans for a generating step", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "text-delta", delta: " world" }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const ttftOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "ttft"); - const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); - const decodeOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "decode"); - const decodeCloses = sink.records.filter( - (r) => r.kind === "span-close" && r.name === "decode", - ); - - expect(ttftOpens).toHaveLength(1); - expect(ttftCloses).toHaveLength(1); - expect(decodeOpens).toHaveLength(1); - expect(decodeCloses).toHaveLength(1); - - const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); - expect(stepOpen).toBeDefined(); - - if ( - ttftOpens[0]?.kind === "span-open" && - ttftCloses[0]?.kind === "span-close" && - decodeOpens[0]?.kind === "span-open" && - decodeCloses[0]?.kind === "span-close" && - stepOpen?.kind === "span-open" - ) { - // ttft and decode are children of step - expect(ttftOpens[0].parentSpanId).toBe(stepOpen.spanId); - expect(decodeOpens[0].parentSpanId).toBe(stepOpen.spanId); - - // ttft closes before decode opens (in order) - const ttftCloseIdx = sink.records.indexOf(ttftCloses[0]); - const decodeOpenIdx = sink.records.indexOf(decodeOpens[0]); - expect(ttftCloseIdx).toBeLessThan(decodeOpenIdx); - - // ttft has firstToken: true - expect(ttftCloses[0].attributes?.firstToken).toBe(true); - - // durations from fake clock - expect(ttftCloses[0].durationMs).toBeGreaterThanOrEqual(0); - expect(decodeCloses[0].durationMs).toBeGreaterThanOrEqual(0); - } - }); - - it("first token counts a reasoning delta", async () => { - const provider = createFakeProvider([ - [ - { type: "reasoning-delta", delta: "thinking..." }, - { type: "text-delta", delta: "Hello" }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); - expect(ttftCloses).toHaveLength(1); - - // The ttft span should close at the reasoning delta, not at the text delta - if (ttftCloses[0]?.kind === "span-close") { - expect(ttftCloses[0].attributes?.firstToken).toBe(true); - } - }); - - it("a step with no content token does not emit a misleading decode", async () => { - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - // First step (tool-call-only) should have ttft with firstToken: false and no decode - const ttftOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "ttft"); - const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); - const decodeOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "decode"); - - // There should be 2 ttft opens (one per step) and 2 ttft closes - expect(ttftOpens).toHaveLength(2); - expect(ttftCloses).toHaveLength(2); - - // First step: tool-call-only, no first token - if (ttftCloses[0]?.kind === "span-close") { - expect(ttftCloses[0].attributes?.firstToken).toBe(false); - } - - // Second step: has text-delta, should have firstToken: true and decode span - if (ttftCloses[1]?.kind === "span-close") { - expect(ttftCloses[1].attributes?.firstToken).toBe(true); - } - - // Only one decode span (for the second step) - expect(decodeOpens).toHaveLength(1); - }); - - it("turn span close stamps usage.inputTokens / usage.outputTokens (dotted)", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); - expect(turnClose).toBeDefined(); - if (turnClose?.kind === "span-close") { - expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); - expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); - expect(turnClose.attributes?.usage_inputTokens).toBeUndefined(); - expect(turnClose.attributes?.usage_outputTokens).toBeUndefined(); - } - }); - - it("step span close stamps usage.inputTokens / usage.outputTokens (dotted)", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 7, outputTokens: 3 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); - expect(stepClose).toBeDefined(); - if (stepClose?.kind === "span-close") { - expect(stepClose.attributes?.["usage.inputTokens"]).toBe(7); - expect(stepClose.attributes?.["usage.outputTokens"]).toBe(3); - expect(stepClose.attributes?.usage_inputTokens).toBeUndefined(); - expect(stepClose.attributes?.usage_outputTokens).toBeUndefined(); - } - }); - - it("turn + step spans stamp usage.cacheReadTokens / usage.cacheWriteTokens when the provider Usage carries them", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { - type: "usage", - usage: { inputTokens: 10, outputTokens: 5, cacheReadTokens: 3, cacheWriteTokens: 2 }, - }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); - const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); - - expect(turnClose).toBeDefined(); - if (turnClose?.kind === "span-close") { - expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); - expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); - expect(turnClose.attributes?.["usage.cacheReadTokens"]).toBe(3); - expect(turnClose.attributes?.["usage.cacheWriteTokens"]).toBe(2); - } - - expect(stepClose).toBeDefined(); - if (stepClose?.kind === "span-close") { - expect(stepClose.attributes?.["usage.inputTokens"]).toBe(10); - expect(stepClose.attributes?.["usage.outputTokens"]).toBe(5); - expect(stepClose.attributes?.["usage.cacheReadTokens"]).toBe(3); - expect(stepClose.attributes?.["usage.cacheWriteTokens"]).toBe(2); - } - }); - - it("turn + step spans OMIT the cache-token attrs when the provider Usage lacks them", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); - const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); - - expect(turnClose).toBeDefined(); - if (turnClose?.kind === "span-close") { - expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); - expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); - expect(turnClose.attributes?.["usage.cacheReadTokens"]).toBeUndefined(); - expect(turnClose.attributes?.["usage.cacheWriteTokens"]).toBeUndefined(); - } - - expect(stepClose).toBeDefined(); - if (stepClose?.kind === "span-close") { - expect(stepClose.attributes?.["usage.inputTokens"]).toBe(10); - expect(stepClose.attributes?.["usage.outputTokens"]).toBe(5); - expect(stepClose.attributes?.["usage.cacheReadTokens"]).toBeUndefined(); - expect(stepClose.attributes?.["usage.cacheWriteTokens"]).toBeUndefined(); - } - }); - }); - - describe("provider logger threading", () => { - it("passes step span logger to provider.stream opts when logger provided", async () => { - let capturedOpts: Record<string, unknown> | undefined; - - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools, opts) { - capturedOpts = opts !== undefined ? { ...opts } : undefined; - return (async function* () { - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - const { logger } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - expect(capturedOpts).toBeDefined(); - expect(capturedOpts?.logger).toBeDefined(); - expect(typeof (capturedOpts?.logger as Record<string, unknown>).info).toBe("function"); - expect(typeof (capturedOpts?.logger as Record<string, unknown>).span).toBe("function"); - }); - - it("passes undefined for opts.logger when no logger provided", async () => { - let capturedOpts: Record<string, unknown> | undefined; - - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools, opts) { - capturedOpts = opts !== undefined ? { ...opts } : undefined; - return (async function* () { - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - }); - - expect(capturedOpts).toBeDefined(); - expect(capturedOpts?.logger).toBeUndefined(); - }); - - it("threads providerOpts.model through to provider.stream opts", async () => { - let capturedOpts: Record<string, unknown> | undefined; - - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools, opts) { - capturedOpts = opts !== undefined ? { ...opts } : undefined; - return (async function* () { - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - providerOpts: { model: "some-model-id" }, - }); - - expect(capturedOpts?.model).toBe("some-model-id"); - }); - }); - - describe("span tree nesting", () => { - it("turn span is root (parentSpanId undefined)", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const turnOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "turn"); - expect(turnOpen).toBeDefined(); - if (turnOpen?.kind === "span-open") { - expect(turnOpen.parentSpanId).toBeUndefined(); - } - }); - - it("step span is a child of turn span", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const turnOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "turn"); - const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); - expect(turnOpen).toBeDefined(); - expect(stepOpen).toBeDefined(); - if (turnOpen?.kind === "span-open" && stepOpen?.kind === "span-open") { - expect(stepOpen.parentSpanId).toBe(turnOpen.spanId); - } - }); - - it("prompt span is a child of step span", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); - const promptOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "prompt"); - expect(stepOpen).toBeDefined(); - expect(promptOpen).toBeDefined(); - if (stepOpen?.kind === "span-open" && promptOpen?.kind === "span-open") { - expect(promptOpen.parentSpanId).toBe(stepOpen.spanId); - } - }); - - it("provider logger creates spans nested under step", async () => { - let capturedLogger: Logger | undefined; - let providerReqSpanId: string | undefined; - - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools, opts) { - capturedLogger = opts?.logger; - return (async function* () { - // Open provider.request span inside the stream (like a real provider) - if (capturedLogger !== undefined) { - const span = capturedLogger.span("provider.request"); - providerReqSpanId = span.id; - span.end(); - } - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - expect(capturedLogger).toBeDefined(); - expect(providerReqSpanId).toBeDefined(); - - const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); - const provReqOpen = sink.records.find( - (r) => r.kind === "span-open" && r.name === "provider.request", - ); - expect(stepOpen).toBeDefined(); - expect(provReqOpen).toBeDefined(); - if (stepOpen?.kind === "span-open" && provReqOpen?.kind === "span-open") { - expect(provReqOpen.parentSpanId).toBe(stepOpen.spanId); - expect(provReqOpen.spanId).toBe(providerReqSpanId); - } - }); - - it("tool-call spans are children of step span", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); - const tcOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "tool-call"); - expect(stepOpen).toBeDefined(); - expect(tcOpen).toBeDefined(); - if (stepOpen?.kind === "span-open" && tcOpen?.kind === "span-open") { - expect(tcOpen.parentSpanId).toBe(stepOpen.spanId); - } - }); - - it("full parent chain: turn → step → {prompt, provider.request, tool-call}", async () => { - let capturedLogger: Logger | undefined; - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - let streamCallCount = 0; - const provider: ProviderContract = { - id: "fake", - stream(_messages, _tools, opts) { - capturedLogger = opts?.logger; - streamCallCount++; - return (async function* () { - // Simulate provider opening a provider.request span - // INSIDE the stream on the first call only (like a real provider) - if (streamCallCount === 1 && capturedLogger !== undefined) { - const span = capturedLogger.span("provider.request"); - span.end(); - } - if (streamCallCount === 1) { - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "echo", - input: {}, - } as ProviderEvent; - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - } else { - yield { type: "text-delta", delta: "done" } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - } - })(); - }, - }; - - const { logger, sink } = createTestLogger(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - logger, - }); - - const spanOpens = sink.records.filter((r) => r.kind === "span-open") as Array< - Extract<LogRecord, { kind: "span-open" }> - >; - - const turnOpen = spanOpens.find((r) => r.name === "turn"); - const stepOpen = spanOpens.find((r) => r.name === "step"); - const promptOpen = spanOpens.find((r) => r.name === "prompt"); - const provReqOpen = spanOpens.find((r) => r.name === "provider.request"); - const tcOpen = spanOpens.find((r) => r.name === "tool-call"); - - expect(turnOpen).toBeDefined(); - expect(stepOpen).toBeDefined(); - expect(promptOpen).toBeDefined(); - expect(provReqOpen).toBeDefined(); - expect(tcOpen).toBeDefined(); - - if ( - turnOpen?.kind === "span-open" && - stepOpen?.kind === "span-open" && - promptOpen?.kind === "span-open" && - provReqOpen?.kind === "span-open" && - tcOpen?.kind === "span-open" - ) { - // turn = root - expect(turnOpen.parentSpanId).toBeUndefined(); - - // step = child of turn - expect(stepOpen.parentSpanId).toBe(turnOpen.spanId); - - // prompt = child of step - expect(promptOpen.parentSpanId).toBe(stepOpen.spanId); - - // provider.request = child of step - expect(provReqOpen.parentSpanId).toBe(stepOpen.spanId); - - // tool-call = child of step - expect(tcOpen.parentSpanId).toBe(stepOpen.spanId); - } - }); - }); - - describe("lifecycle events", () => { - it("emits turn-start as the first event with conversation + turn ids", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-42", - turnId: "turn-99", - emit, - }); - - expect(events[0]?.type).toBe("turn-start"); - if (events[0]?.type === "turn-start") { - expect(events[0].conversationId).toBe("conv-42"); - expect(events[0].turnId).toBe("turn-99"); - } - }); - - it("emits a single done event last, carrying the finishReason", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const lastEvent = events[events.length - 1]; - expect(lastEvent?.type).toBe("done"); - if (lastEvent?.type === "done") { - expect(lastEvent.reason).toBe(result.finishReason); - expect(lastEvent.conversationId).toBe("conv-1"); - expect(lastEvent.turnId).toBe("turn-1"); - } - - const doneEvents = events.filter((e) => e.type === "done"); - expect(doneEvents).toHaveLength(1); - }); - - it("emits done after a tool-call turn", async () => { - const tool = createFakeTool("echo", async (input) => ({ - content: `echo: ${JSON.stringify(input)}`, - })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: { x: 1 } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const lastEvent = events[events.length - 1]; - expect(lastEvent?.type).toBe("done"); - if (lastEvent?.type === "done") { - expect(lastEvent.reason).toBe(result.finishReason); - } - }); - - it('still emits done with reason "aborted" when the turn is aborted via signal', async () => { - const ac = new AbortController(); - ac.abort(); - - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "should not appear" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: ac.signal, - }); - - expect(result.finishReason).toBe("aborted"); - - const lastEvent = events[events.length - 1]; - expect(lastEvent?.type).toBe("done"); - if (lastEvent?.type === "done") { - expect(lastEvent.reason).toBe("aborted"); - } - }); - - it('still emits done with reason "error" when the provider errors', async () => { - const provider: ProviderContract = { - id: "fake", - stream() { - return (async function* () { - yield { type: "text-delta", delta: "partial" } as ProviderEvent; - throw new Error("provider crashed"); - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - expect(result.finishReason).toBe("error"); - - const lastEvent = events[events.length - 1]; - expect(lastEvent?.type).toBe("done"); - if (lastEvent?.type === "done") { - expect(lastEvent.reason).toBe("error"); - } - }); - - it("turn-start precedes every delta and done follows every delta", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "reasoning-delta", delta: "thinking..." }, - { type: "text-delta", delta: " world" }, - { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const turnStartIdx = events.findIndex((e) => e.type === "turn-start"); - const doneIdx = events.findIndex((e) => e.type === "done"); - - expect(turnStartIdx).toBe(0); - expect(doneIdx).toBe(events.length - 1); - - for (let i = 0; i < events.length; i++) { - const e = events[i]; - if (e?.type === "text-delta" || e?.type === "reasoning-delta") { - expect(i).toBeGreaterThan(turnStartIdx); - expect(i).toBeLessThan(doneIdx); - } - } - }); - }); - - describe("stepId", () => { - it("tool-call and tool-result events carry stepId", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const toolCallEvt = events.find((e) => e.type === "tool-call"); - const toolResultEvt = events.find((e) => e.type === "tool-result"); - - expect(toolCallEvt).toBeDefined(); - expect(toolResultEvt).toBeDefined(); - - if (toolCallEvt?.type === "tool-call" && toolResultEvt?.type === "tool-result") { - expect(toolCallEvt.stepId).toBeDefined(); - expect(toolResultEvt.stepId).toBeDefined(); - expect(toolCallEvt.stepId).toBe(toolResultEvt.stepId); - } - }); - - it("tool calls in the SAME step share one stepId; a later step gets a different one", async () => { - const toolA = createFakeTool("a", async () => ({ content: "a-result" })); - const toolB = createFakeTool("b", async () => ({ content: "b-result" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, - { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "tool-call", toolCallId: "tc3", toolName: "a", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [toolA, toolB], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const toolCallEvts = events.filter((e) => e.type === "tool-call"); - expect(toolCallEvts.length).toBeGreaterThanOrEqual(2); - - const step0Calls = toolCallEvts.filter( - (e) => e.type === "tool-call" && (e.toolCallId === "tc1" || e.toolCallId === "tc2"), - ); - const step1Call = toolCallEvts.find((e) => e.type === "tool-call" && e.toolCallId === "tc3"); - - expect(step0Calls).toHaveLength(2); - if (step0Calls[0]?.type === "tool-call" && step0Calls[1]?.type === "tool-call") { - expect(step0Calls[0].stepId).toBe(step0Calls[1].stepId); - } - - if (step1Call?.type === "tool-call" && step0Calls[0]?.type === "tool-call") { - expect(step1Call.stepId).not.toBe(step0Calls[0].stepId); - } - }); - - it("tool chunks in the result carry stepId", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - }); - - const toolCallMsg = result.messages.find( - (m) => m.role === "assistant" && m.chunks.some((c) => c.type === "tool-call"), - ); - const toolResultMsg = result.messages.find((m) => m.role === "tool"); - - expect(toolCallMsg).toBeDefined(); - expect(toolResultMsg).toBeDefined(); - - const tcChunk = toolCallMsg?.chunks.find((c) => c.type === "tool-call"); - const trChunk = toolResultMsg?.chunks[0]; - - expect(tcChunk?.type).toBe("tool-call"); - expect(trChunk?.type).toBe("tool-result"); - - if (tcChunk?.type === "tool-call" && trChunk?.type === "tool-result") { - expect(tcChunk.stepId).toBeDefined(); - expect(trChunk.stepId).toBeDefined(); - expect(tcChunk.stepId).toBe(trChunk.stepId); - } - }); - }); - - describe("timing events (now provided)", () => { - function createCounterNow(): { now: () => number; tick: (ms: number) => void } { - let current = 0; - return { - now: () => current, - tick: (ms: number) => { - current += ms; - }, - }; - } - - it("emits step-complete per step with timing when now provided", async () => { - const clock = createCounterNow(); - clock.tick(100); // turn starts at 100 - - const { events, emit } = createCollectingEmit(); - - // Advance clock during stream: first token at +50ms, stream ends at +200ms - let streamCallCount = 0; - const wrappedProvider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - const idx = streamCallCount++; - return (async function* () { - if (idx === 0) { - clock.tick(50); // stream starts - yield { type: "text-delta", delta: "Hello" } as ProviderEvent; - // first token seen at 150 (100+50) - clock.tick(100); - yield { type: "text-delta", delta: " world" } as ProviderEvent; - clock.tick(50); - yield { - type: "usage", - usage: { inputTokens: 10, outputTokens: 5 }, - } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - } - })(); - }, - }; - - await runTurn({ - provider: wrappedProvider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - now: clock.now, - }); - - const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); - expect(stepCompleteEvts).toHaveLength(1); - - const sc = stepCompleteEvts[0]; - if (sc?.type === "step-complete") { - expect(sc.conversationId).toBe("conv-1"); - expect(sc.turnId).toBe("turn-1"); - expect(sc.stepId).toBeDefined(); - expect(sc.genTotalMs).toBe(200); // 50+100+50 - expect(sc.ttftMs).toBe(50); // stream start → first text-delta - expect(sc.decodeMs).toBe(150); // first token → stream end - const ttft = sc.ttftMs; - const decode = sc.decodeMs; - const genTotal = sc.genTotalMs; - if (ttft !== undefined && decode !== undefined && genTotal !== undefined) { - expect(genTotal).toBe(ttft + decode); - } - } - }); - - it("step-complete omits ttft/decode but keeps genTotalMs for a no-content step", async () => { - const clock = createCounterNow(); - clock.tick(100); // turn starts at 100 - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - let streamCallCount = 0; - const wrappedProvider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - const idx = streamCallCount++; - return (async function* () { - if (idx === 0) { - clock.tick(80); // stream starts at 180 - yield { - type: "tool-call", - toolCallId: "tc1", - toolName: "echo", - input: {}, - } as ProviderEvent; - clock.tick(20); - yield { type: "finish", reason: "tool-calls" } as ProviderEvent; - } else { - clock.tick(50); - yield { type: "text-delta", delta: "done" } as ProviderEvent; - clock.tick(50); - yield { type: "finish", reason: "stop" } as ProviderEvent; - } - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider: wrappedProvider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - now: clock.now, - }); - - const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); - expect(stepCompleteEvts).toHaveLength(2); - - // First step: tool-call-only, no content token - const sc0 = stepCompleteEvts[0]; - if (sc0?.type === "step-complete") { - expect(sc0.stepId).toBeDefined(); - expect(sc0.genTotalMs).toBe(100); // 80+20 - expect(sc0.ttftMs).toBeUndefined(); - expect(sc0.decodeMs).toBeUndefined(); - } - - // Second step: has text-delta - const sc1 = stepCompleteEvts[1]; - if (sc1?.type === "step-complete") { - expect(sc1.stepId).toBeDefined(); - expect(sc1.genTotalMs).toBe(100); // 50+50 - expect(sc1.ttftMs).toBe(50); - expect(sc1.decodeMs).toBe(50); - } - }); - - it("usage event carries stepId", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const usageEvts = events.filter((e) => e.type === "usage"); - expect(usageEvts).toHaveLength(1); - const ue = usageEvts[0]; - if (ue?.type === "usage") { - expect(ue.stepId).toBeDefined(); - } - }); - - it("tool-result carries durationMs (execution time) when now provided", async () => { - const clock = createCounterNow(); - clock.tick(100); // turn starts at 100 - - const tool = createFakeTool("slow", async () => { - clock.tick(200); // tool takes 200ms to execute - return { content: "done" }; - }); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "slow", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "ok" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - now: clock.now, - }); - - const toolResultEvts = events.filter((e) => e.type === "tool-result"); - expect(toolResultEvts).toHaveLength(1); - const tr = toolResultEvts[0]; - if (tr?.type === "tool-result") { - expect(tr.durationMs).toBeDefined(); - expect(tr.durationMs).toBe(200); - } - }); - - it("done carries durationMs and aggregate usage when now provided", async () => { - const clock = createCounterNow(); - clock.tick(100); // turn starts at 100 - - const wrappedProvider: ProviderContract = { - id: "fake", - stream(_messages, _tools) { - return (async function* () { - clock.tick(80); // stream duration - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { - type: "usage", - usage: { inputTokens: 10, outputTokens: 5 }, - } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider: wrappedProvider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - now: clock.now, - }); - - const doneEvts = events.filter((e) => e.type === "done"); - expect(doneEvts).toHaveLength(1); - const d = doneEvts[0]; - if (d?.type === "done") { - expect(d.durationMs).toBeDefined(); - expect(d.durationMs).toBeGreaterThan(0); - expect(d.usage).toBeDefined(); - if (d.usage !== undefined) { - expect(d.usage.inputTokens).toBe(10); - expect(d.usage.outputTokens).toBe(5); - } - } - }); - - it("no now → timing fields absent", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hi" }, - { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - // no now - }); - - // step-complete still emitted (with stepId, no timing) - const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); - expect(stepCompleteEvts).toHaveLength(2); - for (const sc of stepCompleteEvts) { - if (sc?.type === "step-complete") { - expect(sc.stepId).toBeDefined(); - expect(sc.ttftMs).toBeUndefined(); - expect(sc.decodeMs).toBeUndefined(); - expect(sc.genTotalMs).toBeUndefined(); - } - } - - // usage still carries stepId - const usageEvts = events.filter((e) => e.type === "usage"); - for (const ue of usageEvts) { - if (ue?.type === "usage") { - expect(ue.stepId).toBeDefined(); - } - } - - // no durationMs on tool-result - const toolResultEvts = events.filter((e) => e.type === "tool-result"); - for (const tr of toolResultEvts) { - if (tr?.type === "tool-result") { - expect(tr.durationMs).toBeUndefined(); - } - } - - // no durationMs on done, but usage is present (independent of now) - const doneEvts = events.filter((e) => e.type === "done"); - expect(doneEvts).toHaveLength(1); - const d = doneEvts[0]; - if (d?.type === "done") { - expect(d.durationMs).toBeUndefined(); - expect(d.usage).toBeDefined(); - if (d.usage !== undefined) { - expect(d.usage.inputTokens).toBe(15); - expect(d.usage.outputTokens).toBe(8); - } - } - }); - }); - - describe("contextSize", () => { - it("single-step turn: contextSize equals step inputTokens + outputTokens", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "usage", usage: { inputTokens: 100, outputTokens: 50 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const doneEvt = events.find((e) => e.type === "done"); - expect(doneEvt).toBeDefined(); - if (doneEvt?.type === "done") { - expect(doneEvt.contextSize).toBe(150); - } - }); - - it("multi-step turn: contextSize equals ONLY the last step's inputTokens + outputTokens", async () => { - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "usage", usage: { inputTokens: 100, outputTokens: 20 } }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "usage", usage: { inputTokens: 300, outputTokens: 80 } }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const doneEvt = events.find((e) => e.type === "done"); - expect(doneEvt).toBeDefined(); - if (doneEvt?.type === "done") { - expect(doneEvt.contextSize).toBe(380); - expect(doneEvt.usage).toBeDefined(); - if (doneEvt.usage !== undefined) { - expect(doneEvt.contextSize).not.toBe(doneEvt.usage.inputTokens); - } - } - }); - - it("no usage reported: contextSize is undefined", async () => { - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "Hello" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - }); - - const doneEvt = events.find((e) => e.type === "done"); - expect(doneEvt).toBeDefined(); - if (doneEvt?.type === "done") { - expect(doneEvt.contextSize).toBeUndefined(); - expect(doneEvt.usage).toBeUndefined(); - } - }); - }); - - describe("drainSteering", () => { - it("drainSteering called once at the tool-result boundary; returned messages appended to the next step's provider input (after tool results)", async () => { - let drainCallCount = 0; - const steeringMessage: ChatMessage = { - role: "user", - chunks: [{ type: "text", text: "steer!" }], - }; - - const { provider, capturedMessages } = createCapturingProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - drainSteering: () => { - drainCallCount++; - return [steeringMessage]; - }, - }); - - expect(drainCallCount).toBe(1); - // The provider was called twice (tool-call step, then text step). - expect(capturedMessages).toHaveLength(2); - const secondStepMessages = capturedMessages[1] ?? []; - // user, assistant(tool-call), tool-result, steering(user) — in order, - // steering appended AFTER the tool results, before the next call. - expect(secondStepMessages).toHaveLength(4); - expect(secondStepMessages[0]?.role).toBe("user"); - expect(secondStepMessages[1]?.role).toBe("assistant"); - expect(secondStepMessages[2]?.role).toBe("tool"); - expect(secondStepMessages[3]).toEqual(steeringMessage); - expect(secondStepMessages[3]?.role).toBe("user"); - // Steering is fed to the next provider call, NOT surfaced in the - // turn result — the caller owns the steering messages' lifecycle. - expect(result.messages).toHaveLength(3); - }); - - it("drainSteering omitted → no injection; turn byte-identical to before", async () => { - const { provider, capturedMessages } = createCapturingProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - // drainSteering omitted — must be a strict no-op. - }); - - expect(capturedMessages).toHaveLength(2); - const secondStepMessages = capturedMessages[1] ?? []; - // user, assistant(tool-call), tool-result — NO steering injected. - expect(secondStepMessages).toHaveLength(3); - expect(secondStepMessages[0]?.role).toBe("user"); - expect(secondStepMessages[1]?.role).toBe("assistant"); - expect(secondStepMessages[2]?.role).toBe("tool"); - expect(result.messages).toHaveLength(3); - }); - - it("drainSteering returns [] → no injection", async () => { - let drainCallCount = 0; - const { provider, capturedMessages } = createCapturingProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - drainSteering: () => { - drainCallCount++; - return []; - }, - }); - - // Called at the boundary, but returned nothing → no injection. - expect(drainCallCount).toBe(1); - expect(capturedMessages).toHaveLength(2); - const secondStepMessages = capturedMessages[1] ?? []; - expect(secondStepMessages).toHaveLength(3); - expect(secondStepMessages[2]?.role).toBe("tool"); - }); - - it("drainSteering NOT called when a step has no tool calls (text-only turn)", async () => { - let drainCallCount = 0; - const provider = createFakeProvider([ - [ - { type: "text-delta", delta: "hello" }, - { type: "finish", reason: "stop" }, - ], - ]); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - drainSteering: () => { - drainCallCount++; - return []; - }, - }); - - expect(drainCallCount).toBe(0); - }); - - it("multiple tool-call steps → drainSteering called once per tool-call step", async () => { - let drainCallCount = 0; - const provider = createFakeProvider([ - [ - { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "tool-call", toolCallId: "tc2", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ], - [ - { type: "text-delta", delta: "done" }, - { type: "finish", reason: "stop" }, - ], - ]); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - drainSteering: () => { - drainCallCount++; - return []; - }, - }); - - // Steps 0 and 1 each produced tool calls → drained once each. - // Step 2 (text-only) → no boundary → no drain. Total = 2. - expect(drainCallCount).toBe(2); - }); - - it("drainSteering NOT called when max-steps ends the turn after a tool-call step (no next step → no drain)", async () => { - let drainCallCount = 0; - // Every step produces a tool call → the turn runs to MAX_STEPS. - const script: ProviderEvent[][] = Array.from({ length: MAX_STEPS }, () => [ - { type: "tool-call", toolCallId: "tc", toolName: "echo", input: {} }, - { type: "finish", reason: "tool-calls" }, - ]); - const provider = createFakeProvider(script); - - const tool = createFakeTool("echo", async () => ({ content: "echoed" })); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [tool], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit: () => {}, - drainSteering: () => { - drainCallCount++; - return []; - }, - }); - - expect(result.finishReason).toBe("max-steps"); - // MAX_STEPS tool-call steps (indices 0..MAX_STEPS-1). Drained on every - // step that is followed by a next step (0..MAX_STEPS-2 = MAX_STEPS-1 - // calls); the final step is the max-steps boundary → no next step → - // no drain (queue left intact for the caller). - expect(drainCallCount).toBe(MAX_STEPS - 1); - }); - }); - - // ── Retry with backoff ────────────────────────────────────────────────── - // - // PURE tests: a fake `sleep` (records calls, resolves instantly, can abort - // on a chosen call) + a pure `delayFor` (the canonical schedule + 8h budget). - // A stub `ProviderContract` whose `stream` yields a retryable error N times - // then a finish. ZERO mocks of `@dispatch/*` modules — effects injected. - - /** The canonical backoff schedule (matches the orchestrator's concrete strategy). */ - const RETRY_SCHEDULE_MS = [5_000, 10_000, 30_000, 60_000, 300_000, 600_000, 900_000, 1_800_000]; - const RETRY_TAIL_MS = 1_800_000; // 30m - const RETRY_BUDGET_MS = 8 * 60 * 60 * 1000; // 8h - - /** Cumulative scheduled sleep through `attempt` (sum of delay[0..attempt]). */ - function cumulativeSleepMs(attempt: number): number { - let sum = 0; - for (let i = 0; i <= attempt; i++) { - sum += i < RETRY_SCHEDULE_MS.length ? RETRY_SCHEDULE_MS[i] : RETRY_TAIL_MS; - } - return sum; - } - - /** Pure, deterministic delay decision (no I/O, no clock). */ - function delayFor(attempt: number): number | undefined { - const delay = attempt < RETRY_SCHEDULE_MS.length ? RETRY_SCHEDULE_MS[attempt] : RETRY_TAIL_MS; - if (cumulativeSleepMs(attempt) > RETRY_BUDGET_MS) return undefined; // over budget → stop - return delay; - } - - /** The full schedule delayFor would emit (until budget exhausted). */ - function fullSchedule(): number[] { - const result: number[] = []; - let attempt = 0; - while (true) { - const delay = delayFor(attempt); - if (delay === undefined) break; - result.push(delay); - attempt++; - } - return result; - } - - /** - * Fake, controllable `sleep`: records every call's delay, resolves - * instantly (no real waiting), and can abort the controller on a chosen - * 1-based call index to simulate "abort during sleep". - */ - function createFakeSleep(controller: AbortController): { - sleep: (ms: number, signal: AbortSignal) => Promise<void>; - calls: number[]; - abortOnCall: (n: number) => void; - } { - const calls: number[] = []; - let abortAt: number | undefined; - const sleep = async (ms: number, _signal: AbortSignal): Promise<void> => { - calls.push(ms); - if (abortAt !== undefined && calls.length === abortAt) { - controller.abort(); - throw new Error("aborted"); - } - // Otherwise resolve instantly (no real waiting). - }; - return { - sleep, - calls, - abortOnCall: (n: number) => { - abortAt = n; - }, - }; - } - - /** A provider that yields a retryable error `errorCount` times, then success. */ - function createRetryingProvider(opts: { - errorCount: number; - error?: { message: string; code?: string; retryable?: boolean }; - success?: ProviderEvent[]; - }): { provider: ProviderContract; streamCalls: { value: number } } { - const streamCalls = { value: 0 }; - const error: ProviderEvent = { - type: "error", - message: opts.error?.message ?? "overloaded", - ...(opts.error?.code !== undefined ? { code: opts.error.code } : {}), - ...(opts.error?.retryable !== undefined ? { retryable: opts.error.retryable } : {}), - }; - const success = opts.success ?? [ - { type: "text-delta", delta: "hi" }, - { type: "finish", reason: "stop" }, - ]; - const provider: ProviderContract = { - id: "fake", - stream() { - const idx = streamCalls.value++; - return (async function* () { - if (idx < opts.errorCount) { - yield error; - return; - } - for (const event of success) yield event; - })(); - }, - }; - return { provider, streamCalls }; - } - - describe("retry with backoff", () => { - it("retries a retryable emitted error on schedule then succeeds", async () => { - const { provider } = createRetryingProvider({ - errorCount: 3, - error: { message: "HTTP 429: overloaded", code: "429", retryable: true }, - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(result.finishReason).toBe("stop"); - // 3 retries: 5s, 10s, 30s. - expect(fake.calls).toEqual([5_000, 10_000, 30_000]); - // 3 provider-retry events (one per sleep), then the successful text. - const retryEvents = events.filter((e) => e.type === "provider-retry"); - expect(retryEvents).toHaveLength(3); - if (retryEvents[0]?.type === "provider-retry") { - expect(retryEvents[0].attempt).toBe(0); - expect(retryEvents[0].delayMs).toBe(5_000); - expect(retryEvents[0].message).toBe("HTTP 429: overloaded"); - expect(retryEvents[0].code).toBe("429"); - expect(retryEvents[0].conversationId).toBe("conv-1"); - expect(retryEvents[0].turnId).toBe("turn-1"); - } - if (retryEvents[1]?.type === "provider-retry") { - expect(retryEvents[1].attempt).toBe(1); - expect(retryEvents[1].delayMs).toBe(10_000); - } - if (retryEvents[2]?.type === "provider-retry") { - expect(retryEvents[2].attempt).toBe(2); - expect(retryEvents[2].delayMs).toBe(30_000); - } - // The error was suppressed (no error event emitted — retry succeeded). - expect(events.filter((e) => e.type === "error")).toHaveLength(0); - // The successful content still streams. - const deltas = events.filter((e) => e.type === "text-delta"); - expect(deltas).toHaveLength(1); - }); - - it("sleep is called with the full schedule [5s,10s,30s,60s,5m,10m,15m,30m,30m…]", async () => { - // Provider errors forever → retries until budget exhausted → gives up. - const { provider } = createRetryingProvider({ - errorCount: Number.POSITIVE_INFINITY, - error: { message: "overloaded", code: "429", retryable: true }, - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - // Budget exhausted → give up → error. - expect(result.finishReason).toBe("error"); - - // The sleep schedule matches the pure delayFor output exactly. - expect(fake.calls).toEqual(fullSchedule()); - - // Head of the schedule (the 8 stepped delays). - expect(fake.calls.slice(0, 8)).toEqual([ - 5_000, 10_000, 30_000, 60_000, 300_000, 600_000, 900_000, 1_800_000, - ]); - // Tail repeats 30m. - expect(fake.calls[8]).toBe(1_800_000); - expect(fake.calls.at(-1)).toBe(1_800_000); - - // 8h cumulative budget cap: head (3705s) + 13×30m = ~7h31m, then stop. - // 21 retries (attempts 0..20), then delayFor(21) → undefined → give up. - expect(fake.calls).toHaveLength(21); - const totalSlept = fake.calls.reduce((a, b) => a + b, 0); - expect(totalSlept).toBeLessThanOrEqual(RETRY_BUDGET_MS); - expect(totalSlept).toBe(3_705_000 + 13 * 1_800_000); // 27_105_000 - - // One provider-retry per sleep, plus a final error (give-up). - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(21); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - const errEvt = events.find((e) => e.type === "error"); - if (errEvt?.type === "error") { - expect(errEvt.message).toBe("overloaded"); - expect(errEvt.code).toBe("429"); - } - }); - - it("does NOT retry after content was emitted (safety invariant)", async () => { - // Provider yields text (content) THEN a retryable error. Because content - // was emitted, retrying is unsafe (would duplicate partial output). - let callCount = 0; - const provider: ProviderContract = { - id: "fake", - stream() { - callCount++; - return (async function* () { - yield { type: "text-delta", delta: "partial" } as ProviderEvent; - yield { - type: "error", - message: "overloaded", - code: "429", - retryable: true, - } as ProviderEvent; - })(); - }, - }; - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - // No retries: stream called exactly once. - expect(callCount).toBe(1); - expect(fake.calls).toHaveLength(0); - // The error is emitted (give-up) and partial content preserved. - expect(result.finishReason).toBe("error"); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); - expect(events.filter((e) => e.type === "text-delta")).toHaveLength(1); - }); - - it("does NOT retry a non-retryable emitted error (retryable: false)", async () => { - const { provider, streamCalls } = createRetryingProvider({ - errorCount: 1, - error: { message: "bad request", code: "400", retryable: false }, - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(streamCalls.value).toBe(1); // no retry - expect(fake.calls).toHaveLength(0); - expect(result.finishReason).toBe("error"); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); - }); - - it("does NOT retry a non-retryable emitted error (retryable absent)", async () => { - const { provider, streamCalls } = createRetryingProvider({ - errorCount: 1, - error: { message: "bad request", code: "400" }, // no retryable field - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(streamCalls.value).toBe(1); // no retry - expect(fake.calls).toHaveLength(0); - expect(result.finishReason).toBe("error"); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - }); - - it("give-up emits the final error when budget is exhausted", async () => { - // Custom delayFor that allows exactly 1 retry then stops. - const shortDelayFor = (attempt: number): number | undefined => - attempt === 0 ? 100 : undefined; - const { provider } = createRetryingProvider({ - errorCount: Number.POSITIVE_INFINITY, - error: { message: "overloaded", code: "429", retryable: true }, - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor: shortDelayFor, sleep: fake.sleep }, - }); - - expect(result.finishReason).toBe("error"); - expect(fake.calls).toEqual([100]); // one retry, then give up - // One provider-retry (attempt 0), then the final error. - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(1); - const errs = events.filter((e) => e.type === "error"); - expect(errs).toHaveLength(1); - if (errs[0]?.type === "error") { - expect(errs[0].message).toBe("overloaded"); - expect(errs[0].code).toBe("429"); - } - }); - - it("abort during sleep seals the turn aborted", async () => { - const { provider } = createRetryingProvider({ - errorCount: Number.POSITIVE_INFINITY, - error: { message: "overloaded", code: "429", retryable: true }, - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - fake.abortOnCall(2); // abort on the 2nd sleep - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(result.finishReason).toBe("aborted"); - // Two sleeps attempted; the 2nd aborted. - expect(fake.calls).toHaveLength(2); - // No terminal error emitted (it was an abort, not a give-up). - expect(events.filter((e) => e.type === "error")).toHaveLength(0); - // One provider-retry before the aborted sleep (attempt 0). - const retries = events.filter((e) => e.type === "provider-retry"); - expect(retries).toHaveLength(2); - // The done event carries reason "aborted". - const done = events.find((e) => e.type === "done"); - if (done?.type === "done") { - expect(done.reason).toBe("aborted"); - } - }); - - it("omitting retry keeps the pre-retry behavior (backward-compatible)", async () => { - // A retryable error with no retry configured → ends the step as today. - const { provider, streamCalls } = createRetryingProvider({ - errorCount: 1, - error: { message: "overloaded", code: "429", retryable: true }, - }); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - // no retry field - }); - - expect(streamCalls.value).toBe(1); // no retry - expect(result.finishReason).toBe("error"); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); - }); - - it("retries a THROWN error (retryable-by-default when pre-content)", async () => { - // A thrown error (no retryable flag) before content is retried. - let callCount = 0; - const provider: ProviderContract = { - id: "fake", - stream() { - callCount++; - return (async function* () { - if (callCount <= 2) { - throw new Error("network blip"); - } - yield { type: "text-delta", delta: "hi" } as ProviderEvent; - yield { type: "finish", reason: "stop" } as ProviderEvent; - })(); - }, - }; - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(callCount).toBe(3); // 2 throws retried, 3rd succeeds - expect(fake.calls).toEqual([5_000, 10_000]); - expect(result.finishReason).toBe("stop"); - expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(2); - // Thrown errors have no code. - if (events[0]?.type === "provider-retry") { - expect(events[0].code).toBeUndefined(); - expect(events[0].message).toBe("network blip"); - } - expect(events.filter((e) => e.type === "error")).toHaveLength(0); - }); - - it("does NOT retry a thrown error after content was emitted", async () => { - let callCount = 0; - const provider: ProviderContract = { - id: "fake", - stream() { - callCount++; - return (async function* () { - yield { type: "text-delta", delta: "partial" } as ProviderEvent; - throw new Error("network blip"); - })(); - }, - }; - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - const result = await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - expect(callCount).toBe(1); - expect(fake.calls).toHaveLength(0); - expect(result.finishReason).toBe("error"); - expect(events.filter((e) => e.type === "error")).toHaveLength(1); - expect(events.filter((e) => e.type === "text-delta")).toHaveLength(1); - }); - - it("provider-retry events interleave correctly: error → retry-event → sleep → retry", async () => { - // Verify ordering: each provider-retry event comes BEFORE its sleep, - // and the successful content comes only after the last retry. - const { provider } = createRetryingProvider({ - errorCount: 2, - error: { message: "overloaded", code: "429", retryable: true }, - success: [ - { type: "text-delta", delta: "ok" }, - { type: "finish", reason: "stop" }, - ], - }); - const controller = new AbortController(); - const fake = createFakeSleep(controller); - - const { events, emit } = createCollectingEmit(); - - await runTurn({ - provider, - messages: [userMessage], - tools: [], - dispatch: { maxConcurrent: 1, eager: false }, - conversationId: "conv-1", - turnId: "turn-1", - emit, - signal: controller.signal, - retry: { delayFor, sleep: fake.sleep }, - }); - - const types = events.map((e) => e.type); - // turn-start, provider-retry(0), provider-retry(1), text-delta, step-complete, done - expect(types[0]).toBe("turn-start"); - const firstRetryIdx = types.indexOf("provider-retry"); - const textIdx = types.indexOf("text-delta"); - expect(firstRetryIdx).toBeGreaterThan(0); - expect(textIdx).toBeGreaterThan(firstRetryIdx); - // Both retries precede the text. - const retryCount = types.filter((t) => t === "provider-retry").length; - expect(retryCount).toBe(2); - }); - }); + it("emits events with the conversationId and turnId from input", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-42", + turnId: "turn-99", + emit, + }); + + expect(events.length).toBeGreaterThan(0); + for (const event of events) { + expect(event.conversationId).toBe("conv-42"); + if (event.type !== "status") { + expect(event.turnId).toBe("turn-99"); + } + } + }); + + it("text-only turn emits correct events and returns correct result", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "text-delta", delta: " world" }, + { type: "reasoning-delta", delta: "thinking..." }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + expect(result.finishReason).toBe("stop"); + expect(result.messages).toHaveLength(1); + expect(result.messages[0]?.role).toBe("assistant"); + + const chunks = result.messages[0]?.chunks ?? []; + expect(chunks).toHaveLength(2); + expect(chunks[0]).toEqual({ type: "text", text: "Hello world" }); + expect(chunks[1]).toEqual({ type: "thinking", text: "thinking..." }); + + expect(result.usage).toEqual({ inputTokens: 10, outputTokens: 5 }); + + const eventTypes = events.map((e) => e.type); + expect(eventTypes).toEqual([ + "turn-start", + "text-delta", + "text-delta", + "reasoning-delta", + "usage", + "step-complete", + "done", + ]); + }); + + it("turn with one tool call executes tool, feeds result back, then finishes", async () => { + const tool = createFakeTool("greet", async (input) => ({ + content: `Hello, ${(input as { name: string }).name}!`, + })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "greet", input: { name: "World" } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "Done." }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + expect(result.finishReason).toBe("stop"); + expect(result.messages).toHaveLength(3); + expect(result.messages[0]?.role).toBe("assistant"); + expect(result.messages[1]?.role).toBe("tool"); + expect(result.messages[2]?.role).toBe("assistant"); + + const toolResultChunk = result.messages[1]?.chunks[0]; + expect(toolResultChunk?.type).toBe("tool-result"); + if (toolResultChunk?.type === "tool-result") { + expect(toolResultChunk.content).toBe("Hello, World!"); + expect(toolResultChunk.toolCallId).toBe("tc1"); + expect(toolResultChunk.isError).toBe(false); + } + + const eventTypes = events.map((e) => e.type); + expect(eventTypes).toContain("tool-call"); + expect(eventTypes).toContain("tool-result"); + expect(eventTypes).toContain("text-delta"); + }); + + it("passes updated messages to subsequent provider calls", async () => { + const capturedMessages: ChatMessage[][] = []; + let callIndex = 0; + const script: ProviderEvent[][] = [ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]; + + const provider: ProviderContract = { + id: "fake", + stream(messages, _tools) { + capturedMessages.push([...messages]); + const events = script[callIndex] ?? []; + callIndex++; + return (async function* () { + for (const event of events) yield event; + })(); + }, + }; + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + expect(capturedMessages).toHaveLength(2); + expect(capturedMessages[0] ?? []).toHaveLength(1); + expect(capturedMessages[0]?.[0]?.role).toBe("user"); + + expect(capturedMessages[1] ?? []).toHaveLength(3); + expect(capturedMessages[1]?.[0]?.role).toBe("user"); + expect(capturedMessages[1]?.[1]?.role).toBe("assistant"); + expect(capturedMessages[1]?.[2]?.role).toBe("tool"); + }); + + it("maxConcurrent: 1 runs tools sequentially", async () => { + const log: string[] = []; + + const toolA = createFakeTool("a", async () => { + log.push("a:start"); + await delay(10); + log.push("a:end"); + return { content: "a" }; + }); + + const toolB = createFakeTool("b", async () => { + log.push("b:start"); + await delay(10); + log.push("b:end"); + return { content: "b" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, + { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [toolA, toolB], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const aEndIdx = log.indexOf("a:end"); + const bStartIdx = log.indexOf("b:start"); + expect(aEndIdx).toBeLessThan(bStartIdx); + }); + + it("maxConcurrent: 2 runs tools in parallel", async () => { + const log: string[] = []; + + const toolA = createFakeTool("a", async () => { + log.push("a:start"); + await delay(20); + log.push("a:end"); + return { content: "a" }; + }); + + const toolB = createFakeTool("b", async () => { + log.push("b:start"); + await delay(20); + log.push("b:end"); + return { content: "b" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, + { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [toolA, toolB], + dispatch: { maxConcurrent: 2, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const aStartIdx = log.indexOf("a:start"); + const bStartIdx = log.indexOf("b:start"); + const aEndIdx = log.indexOf("a:end"); + const bEndIdx = log.indexOf("b:end"); + + expect(aStartIdx).toBeLessThan(aEndIdx); + expect(bStartIdx).toBeLessThan(bEndIdx); + expect(aStartIdx).toBeLessThan(bEndIdx); + expect(bStartIdx).toBeLessThan(aEndIdx); + }); + + it("maxConcurrent: 0 runs all tools in parallel (unlimited)", async () => { + const log: string[] = []; + + const toolA = createFakeTool("a", async () => { + log.push("a:start"); + await delay(20); + log.push("a:end"); + return { content: "a" }; + }); + + const toolB = createFakeTool("b", async () => { + log.push("b:start"); + await delay(20); + log.push("b:end"); + return { content: "b" }; + }); + + const toolC = createFakeTool("c", async () => { + log.push("c:start"); + await delay(20); + log.push("c:end"); + return { content: "c" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, + { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, + { type: "tool-call", toolCallId: "tc3", toolName: "c", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [toolA, toolB, toolC], + dispatch: { maxConcurrent: 0, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const aStartIdx = log.indexOf("a:start"); + const bStartIdx = log.indexOf("b:start"); + const cStartIdx = log.indexOf("c:start"); + const aEndIdx = log.indexOf("a:end"); + const bEndIdx = log.indexOf("b:end"); + const cEndIdx = log.indexOf("c:end"); + + expect(aStartIdx).toBeLessThan(aEndIdx); + expect(bStartIdx).toBeLessThan(bEndIdx); + expect(cStartIdx).toBeLessThan(cEndIdx); + expect(aStartIdx).toBeLessThan(bEndIdx); + expect(bStartIdx).toBeLessThan(aEndIdx); + expect(cStartIdx).toBeLessThan(aEndIdx); + }); + + it("eager: true launches tool before step finish", async () => { + const log: string[] = []; + + const tool = createFakeTool("test", async () => { + log.push("tool:start"); + await delay(5); + log.push("tool:end"); + return { content: "done" }; + }); + + let callCount = 0; + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + const idx = callCount++; + if (idx === 0) { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "test", + input: {}, + } as ProviderEvent; + log.push("provider:after-tool-call"); + await delay(50); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + log.push("provider:finish"); + })(); + } + return (async function* () { + yield { type: "text-delta", delta: "done" } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: true }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const toolStartIdx = log.indexOf("tool:start"); + const finishIdx = log.indexOf("provider:finish"); + expect(toolStartIdx).toBeLessThan(finishIdx); + }); + + it("eager: false does not launch tool before step finish", async () => { + const log: string[] = []; + + const tool = createFakeTool("test", async () => { + log.push("tool:start"); + await delay(5); + log.push("tool:end"); + return { content: "done" }; + }); + + let callCount = 0; + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + const idx = callCount++; + if (idx === 0) { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "test", + input: {}, + } as ProviderEvent; + log.push("provider:after-tool-call"); + await delay(50); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + log.push("provider:finish"); + })(); + } + return (async function* () { + yield { type: "text-delta", delta: "done" } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const toolStartIdx = log.indexOf("tool:start"); + const finishIdx = log.indexOf("provider:finish"); + expect(toolStartIdx).toBeGreaterThan(finishIdx); + }); + + it("abort mid-turn synthesizes error results for unresolved tool calls", async () => { + const ac = new AbortController(); + + const tool = createFakeTool("slow", async (_input, ctx) => { + await delay(200); + if (ctx.signal.aborted) return { content: "Aborted", isError: true }; + return { content: "done" }; + }); + + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + return (async function* () { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "slow", + input: {}, + } as ProviderEvent; + yield { + type: "tool-call", + toolCallId: "tc2", + toolName: "slow", + input: { x: 1 }, + } as ProviderEvent; + ac.abort(); + await delay(10); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + + const toolResults = events.filter((e) => e.type === "tool-result"); + for (const tr of toolResults) { + if (tr.type === "tool-result") { + expect(tr.isError).toBe(true); + } + } + }); + + it("abort before any step returns aborted immediately", async () => { + const ac = new AbortController(); + ac.abort(); + + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "should not appear" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + expect(result.messages).toHaveLength(0); + }); + + it("de-duplicates identical tool calls in a batch", async () => { + let execCount = 0; + + const tool = createFakeTool("dedup", async (_input) => { + execCount++; + return { content: `result-${execCount}` }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "dedup", input: { x: 1 } }, + { type: "tool-call", toolCallId: "tc2", toolName: "dedup", input: { x: 1 } }, + { type: "tool-call", toolCallId: "tc3", toolName: "dedup", input: { x: 2 } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + expect(execCount).toBe(2); + + const toolResults = events.filter((e) => e.type === "tool-result"); + expect(toolResults).toHaveLength(3); + + const tc1Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc1"); + const tc2Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc2"); + const tc3Result = toolResults.find((e) => e.type === "tool-result" && e.toolCallId === "tc3"); + + expect(tc1Result).toBeDefined(); + expect(tc2Result).toBeDefined(); + expect(tc3Result).toBeDefined(); + + if (tc1Result?.type === "tool-result" && tc2Result?.type === "tool-result") { + expect(tc1Result.content).toBe(tc2Result.content); + expect(tc1Result.content).toBe("result-1"); + } + if (tc3Result?.type === "tool-result") { + expect(tc3Result.content).toBe("result-2"); + } + + expect(result.finishReason).toBe("stop"); + }); + + it("serializes non-concurrency-safe tools even with maxConcurrent > 1", async () => { + const log: string[] = []; + + const unsafeTool: ToolContract = { + name: "unsafe", + description: "Unsafe tool", + parameters: { type: "object" }, + concurrencySafe: false, + execute: async () => { + log.push("unsafe:start"); + await delay(10); + log.push("unsafe:end"); + return { content: "done" }; + }, + }; + + const safeTool: ToolContract = { + name: "safe", + description: "Safe tool", + parameters: { type: "object" }, + execute: async () => { + log.push("safe:start"); + await delay(10); + log.push("safe:end"); + return { content: "done" }; + }, + }; + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "unsafe", input: {} }, + { type: "tool-call", toolCallId: "tc2", toolName: "safe", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [unsafeTool, safeTool], + dispatch: { maxConcurrent: 5, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + const unsafeEndIdx = log.indexOf("unsafe:end"); + const safeStartIdx = log.indexOf("safe:start"); + expect(unsafeEndIdx).toBeLessThan(safeStartIdx); + }); + + it("handles unknown tool name gracefully", async () => { + const provider = createFakeProvider([ + [ + { + type: "tool-call", + toolCallId: "tc1", + toolName: "nonexistent", + input: {}, + }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + const toolResults = events.filter((e) => e.type === "tool-result"); + expect(toolResults).toHaveLength(1); + if (toolResults[0]?.type === "tool-result") { + expect(toolResults[0]?.isError).toBe(true); + expect(toolResults[0]?.content).toContain("Unknown tool"); + } + + expect(result.finishReason).toBe("stop"); + }); + + it("handles provider error gracefully", async () => { + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { type: "text-delta", delta: "partial" } as ProviderEvent; + throw new Error("provider crashed"); + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + expect(result.finishReason).toBe("error"); + + const errorEvents = events.filter((e) => e.type === "error"); + expect(errorEvents).toHaveLength(1); + if (errorEvents[0]?.type === "error") { + expect(errorEvents[0]?.message).toContain("provider crashed"); + } + }); + + it("forwards cwd from RunTurnInput to ToolExecuteContext", async () => { + let capturedCwd: string | undefined = "SENTINEL_NOT_SET"; + + const tool = createFakeTool("cwdcheck", async (_input, ctx) => { + capturedCwd = ctx.cwd; + return { content: "ok" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "cwdcheck", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + cwd: "/some/dir", + }); + + expect(capturedCwd).toBe("/some/dir"); + }); + + it("forwards undefined cwd when RunTurnInput has no cwd", async () => { + let capturedCwd: string | undefined = "SENTINEL_NOT_SET"; + + const tool = createFakeTool("cwdcheck", async (_input, ctx) => { + capturedCwd = ctx.cwd; + return { content: "ok" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "cwdcheck", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + expect(capturedCwd).toBeUndefined(); + }); + + it("forwards computerId from RunTurnInput to ToolExecuteContext", async () => { + let capturedComputerId: string | undefined = "SENTINEL_NOT_SET"; + + const tool = createFakeTool("computercheck", async (_input, ctx) => { + capturedComputerId = ctx.computerId; + return { content: "ok" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "computercheck", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + computerId: "ssh-host-alias", + }); + + expect(capturedComputerId).toBe("ssh-host-alias"); + }); + + it("forwards undefined computerId when RunTurnInput has no computerId", async () => { + let capturedComputerId: string | undefined = "SENTINEL_NOT_SET"; + + const tool = createFakeTool("computercheck", async (_input, ctx) => { + capturedComputerId = ctx.computerId; + return { content: "ok" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "computercheck", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + expect(capturedComputerId).toBeUndefined(); + }); + + it("aggregates usage across multiple steps", async () => { + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "usage", usage: { inputTokens: 20, outputTokens: 10 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit: () => {}, + }); + + expect(result.usage).toEqual({ inputTokens: 30, outputTokens: 15 }); + }); + + it("emits tool-output events from tool ctx.onOutput", async () => { + const tool: ToolContract = { + name: "streaming", + description: "A tool that streams output", + parameters: { type: "object" }, + execute: async (_input, ctx) => { + ctx.onOutput("line 1\n", "stdout"); + ctx.onOutput("err 1\n", "stderr"); + return { content: "done" }; + }, + }; + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "streaming", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "tab-test", + turnId: "turn-test", + emit, + }); + + const outputs = events.filter((e) => e.type === "tool-output"); + expect(outputs).toHaveLength(2); + if (outputs[0]?.type === "tool-output") { + expect(outputs[0]?.data).toBe("line 1\n"); + expect(outputs[0]?.stream).toBe("stdout"); + expect(outputs[0]?.toolCallId).toBe("tc1"); + } + if (outputs[1]?.type === "tool-output") { + expect(outputs[1]?.data).toBe("err 1\n"); + expect(outputs[1]?.stream).toBe("stderr"); + } + }); + + function createTestLogger(): { + logger: Logger; + sink: LogSink & { records: LogRecord[] }; + deps: LogDeps; + } { + let idCounter = 0; + const deps: LogDeps = { + now: () => 1000 + idCounter * 100, + newId: () => `span-${++idCounter}`, + }; + const records: LogRecord[] = []; + const sink: LogSink & { records: LogRecord[] } = { + records, + emit: (record) => records.push(record), + }; + const logger = createLogger({ extensionId: "test" }, sink, deps); + return { logger, sink, deps }; + } + + describe("span instrumentation", () => { + it("emits turn + step span open/close in order", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const spanOpens = sink.records.filter((r) => r.kind === "span-open"); + const spanCloses = sink.records.filter((r) => r.kind === "span-close"); + + expect(spanOpens.length).toBeGreaterThanOrEqual(2); // turn + step + expect(spanCloses.length).toBeGreaterThanOrEqual(2); + + const turnOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "turn"); + const stepOpen = spanOpens.find((r) => r.kind === "span-open" && r.name === "step"); + expect(turnOpen).toBeDefined(); + expect(stepOpen).toBeDefined(); + + if (turnOpen?.kind === "span-open") { + expect(turnOpen.extensionId).toBe("test"); + expect(turnOpen.attributes?.conversationId).toBe("conv-1"); + expect(turnOpen.attributes?.turnId).toBe("turn-1"); + } + + const turnClose = spanCloses.find((r) => r.kind === "span-close" && r.name === "turn"); + expect(turnClose).toBeDefined(); + if (turnClose?.kind === "span-close") { + expect(turnClose.status).toBe("ok"); + expect(turnClose.durationMs).toBeGreaterThanOrEqual(0); + } + }); + + it("emits tool-call spans for dispatched tools", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const toolCallSpans = sink.records.filter( + (r) => r.kind === "span-open" && r.name === "tool-call", + ); + expect(toolCallSpans).toHaveLength(1); + if (toolCallSpans[0]?.kind === "span-open") { + expect(toolCallSpans[0].attributes?.name).toBe("echo"); + expect(toolCallSpans[0].attributes?.toolCallId).toBe("tc1"); + } + + const toolCallCloses = sink.records.filter( + (r) => r.kind === "span-close" && r.name === "tool-call", + ); + expect(toolCallCloses).toHaveLength(1); + if (toolCallCloses[0]?.kind === "span-close") { + expect(toolCallCloses[0].status).toBe("ok"); + } + }); + + it("tools receive ctx.log (correlated logger)", async () => { + let capturedLog: Logger | undefined; + + const tool = createFakeTool("logtest", async (_input, ctx) => { + capturedLog = ctx.log; + ctx.log.info("tool ran", { key: "value" }); + return { content: "ok" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "logtest", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + expect(capturedLog).toBeDefined(); + + const toolLogs = sink.records.filter( + (r) => r.kind === "log" && r.kind === "log" && (r as { msg: string }).msg === "tool ran", + ); + expect(toolLogs).toHaveLength(1); + if (toolLogs[0]?.kind === "log") { + expect(toolLogs[0].attributes?.key).toBe("value"); + expect(toolLogs[0].extensionId).toBe("test"); + } + }); + + it("an aborted turn still closes its turn span", async () => { + const ac = new AbortController(); + ac.abort(); + + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "should not appear" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + signal: ac.signal, + logger, + }); + + const turnCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "turn"); + expect(turnCloses).toHaveLength(1); + if (turnCloses[0]?.kind === "span-close") { + expect(turnCloses[0].attributes?.finishReason).toBe("aborted"); + } + }); + + it("a provider error closes the step span with error status", async () => { + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { type: "text-delta", delta: "partial" } as ProviderEvent; + throw new Error("provider exploded"); + })(); + }, + }; + + const { logger, sink } = createTestLogger(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + expect(result.finishReason).toBe("error"); + + const stepCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "step"); + expect(stepCloses).toHaveLength(1); + if (stepCloses[0]?.kind === "span-close") { + expect(stepCloses[0].status).toBe("error"); + expect(stepCloses[0].attributes?.["error.message"]).toContain("provider exploded"); + } + }); + + it("emits a prompt span with verbatim body and small scalar attributes", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const promptOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "prompt"); + expect(promptOpens).toHaveLength(1); + + const promptOpen = promptOpens[0]; + if (promptOpen?.kind === "span-open") { + expect(promptOpen.body).toBeDefined(); + const parsed = JSON.parse(promptOpen.body as string); + expect(parsed.messages).toEqual([userMessage]); + expect(parsed.tools).toHaveLength(1); + expect(parsed.tools[0].name).toBe("echo"); + + expect(promptOpen.attributes?.messageCount).toBe(1); + expect(promptOpen.attributes?.toolCount).toBe(1); + } + + const promptCloses = sink.records.filter( + (r) => r.kind === "span-close" && r.name === "prompt", + ); + expect(promptCloses).toHaveLength(1); + + const logRecords = sink.records.filter( + (r) => + r.kind === "log" && r.kind === "log" && (r as { msg: string }).msg === "prompt:before", + ); + expect(logRecords).toHaveLength(0); + }); + + it("emits ttft and decode spans for a generating step", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "text-delta", delta: " world" }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const ttftOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "ttft"); + const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); + const decodeOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "decode"); + const decodeCloses = sink.records.filter( + (r) => r.kind === "span-close" && r.name === "decode", + ); + + expect(ttftOpens).toHaveLength(1); + expect(ttftCloses).toHaveLength(1); + expect(decodeOpens).toHaveLength(1); + expect(decodeCloses).toHaveLength(1); + + const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); + expect(stepOpen).toBeDefined(); + + if ( + ttftOpens[0]?.kind === "span-open" && + ttftCloses[0]?.kind === "span-close" && + decodeOpens[0]?.kind === "span-open" && + decodeCloses[0]?.kind === "span-close" && + stepOpen?.kind === "span-open" + ) { + // ttft and decode are children of step + expect(ttftOpens[0].parentSpanId).toBe(stepOpen.spanId); + expect(decodeOpens[0].parentSpanId).toBe(stepOpen.spanId); + + // ttft closes before decode opens (in order) + const ttftCloseIdx = sink.records.indexOf(ttftCloses[0]); + const decodeOpenIdx = sink.records.indexOf(decodeOpens[0]); + expect(ttftCloseIdx).toBeLessThan(decodeOpenIdx); + + // ttft has firstToken: true + expect(ttftCloses[0].attributes?.firstToken).toBe(true); + + // durations from fake clock + expect(ttftCloses[0].durationMs).toBeGreaterThanOrEqual(0); + expect(decodeCloses[0].durationMs).toBeGreaterThanOrEqual(0); + } + }); + + it("first token counts a reasoning delta", async () => { + const provider = createFakeProvider([ + [ + { type: "reasoning-delta", delta: "thinking..." }, + { type: "text-delta", delta: "Hello" }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); + expect(ttftCloses).toHaveLength(1); + + // The ttft span should close at the reasoning delta, not at the text delta + if (ttftCloses[0]?.kind === "span-close") { + expect(ttftCloses[0].attributes?.firstToken).toBe(true); + } + }); + + it("a step with no content token does not emit a misleading decode", async () => { + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + // First step (tool-call-only) should have ttft with firstToken: false and no decode + const ttftOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "ttft"); + const ttftCloses = sink.records.filter((r) => r.kind === "span-close" && r.name === "ttft"); + const decodeOpens = sink.records.filter((r) => r.kind === "span-open" && r.name === "decode"); + + // There should be 2 ttft opens (one per step) and 2 ttft closes + expect(ttftOpens).toHaveLength(2); + expect(ttftCloses).toHaveLength(2); + + // First step: tool-call-only, no first token + if (ttftCloses[0]?.kind === "span-close") { + expect(ttftCloses[0].attributes?.firstToken).toBe(false); + } + + // Second step: has text-delta, should have firstToken: true and decode span + if (ttftCloses[1]?.kind === "span-close") { + expect(ttftCloses[1].attributes?.firstToken).toBe(true); + } + + // Only one decode span (for the second step) + expect(decodeOpens).toHaveLength(1); + }); + + it("turn span close stamps usage.inputTokens / usage.outputTokens (dotted)", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); + expect(turnClose).toBeDefined(); + if (turnClose?.kind === "span-close") { + expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); + expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); + expect(turnClose.attributes?.usage_inputTokens).toBeUndefined(); + expect(turnClose.attributes?.usage_outputTokens).toBeUndefined(); + } + }); + + it("step span close stamps usage.inputTokens / usage.outputTokens (dotted)", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 7, outputTokens: 3 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); + expect(stepClose).toBeDefined(); + if (stepClose?.kind === "span-close") { + expect(stepClose.attributes?.["usage.inputTokens"]).toBe(7); + expect(stepClose.attributes?.["usage.outputTokens"]).toBe(3); + expect(stepClose.attributes?.usage_inputTokens).toBeUndefined(); + expect(stepClose.attributes?.usage_outputTokens).toBeUndefined(); + } + }); + + it("turn + step spans stamp usage.cacheReadTokens / usage.cacheWriteTokens when the provider Usage carries them", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { + type: "usage", + usage: { inputTokens: 10, outputTokens: 5, cacheReadTokens: 3, cacheWriteTokens: 2 }, + }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); + const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); + + expect(turnClose).toBeDefined(); + if (turnClose?.kind === "span-close") { + expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); + expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); + expect(turnClose.attributes?.["usage.cacheReadTokens"]).toBe(3); + expect(turnClose.attributes?.["usage.cacheWriteTokens"]).toBe(2); + } + + expect(stepClose).toBeDefined(); + if (stepClose?.kind === "span-close") { + expect(stepClose.attributes?.["usage.inputTokens"]).toBe(10); + expect(stepClose.attributes?.["usage.outputTokens"]).toBe(5); + expect(stepClose.attributes?.["usage.cacheReadTokens"]).toBe(3); + expect(stepClose.attributes?.["usage.cacheWriteTokens"]).toBe(2); + } + }); + + it("turn + step spans OMIT the cache-token attrs when the provider Usage lacks them", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const turnClose = sink.records.find((r) => r.kind === "span-close" && r.name === "turn"); + const stepClose = sink.records.find((r) => r.kind === "span-close" && r.name === "step"); + + expect(turnClose).toBeDefined(); + if (turnClose?.kind === "span-close") { + expect(turnClose.attributes?.["usage.inputTokens"]).toBe(10); + expect(turnClose.attributes?.["usage.outputTokens"]).toBe(5); + expect(turnClose.attributes?.["usage.cacheReadTokens"]).toBeUndefined(); + expect(turnClose.attributes?.["usage.cacheWriteTokens"]).toBeUndefined(); + } + + expect(stepClose).toBeDefined(); + if (stepClose?.kind === "span-close") { + expect(stepClose.attributes?.["usage.inputTokens"]).toBe(10); + expect(stepClose.attributes?.["usage.outputTokens"]).toBe(5); + expect(stepClose.attributes?.["usage.cacheReadTokens"]).toBeUndefined(); + expect(stepClose.attributes?.["usage.cacheWriteTokens"]).toBeUndefined(); + } + }); + }); + + describe("provider logger threading", () => { + it("passes step span logger to provider.stream opts when logger provided", async () => { + let capturedOpts: Record<string, unknown> | undefined; + + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools, opts) { + capturedOpts = opts !== undefined ? { ...opts } : undefined; + return (async function* () { + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + const { logger } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + expect(capturedOpts).toBeDefined(); + expect(capturedOpts?.logger).toBeDefined(); + expect(typeof (capturedOpts?.logger as Record<string, unknown>).info).toBe("function"); + expect(typeof (capturedOpts?.logger as Record<string, unknown>).span).toBe("function"); + }); + + it("passes undefined for opts.logger when no logger provided", async () => { + let capturedOpts: Record<string, unknown> | undefined; + + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools, opts) { + capturedOpts = opts !== undefined ? { ...opts } : undefined; + return (async function* () { + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + }); + + expect(capturedOpts).toBeDefined(); + expect(capturedOpts?.logger).toBeUndefined(); + }); + + it("threads providerOpts.model through to provider.stream opts", async () => { + let capturedOpts: Record<string, unknown> | undefined; + + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools, opts) { + capturedOpts = opts !== undefined ? { ...opts } : undefined; + return (async function* () { + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { type: "usage", usage: { inputTokens: 1, outputTokens: 1 } } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + providerOpts: { model: "some-model-id" }, + }); + + expect(capturedOpts?.model).toBe("some-model-id"); + }); + }); + + describe("span tree nesting", () => { + it("turn span is root (parentSpanId undefined)", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const turnOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "turn"); + expect(turnOpen).toBeDefined(); + if (turnOpen?.kind === "span-open") { + expect(turnOpen.parentSpanId).toBeUndefined(); + } + }); + + it("step span is a child of turn span", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const turnOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "turn"); + const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); + expect(turnOpen).toBeDefined(); + expect(stepOpen).toBeDefined(); + if (turnOpen?.kind === "span-open" && stepOpen?.kind === "span-open") { + expect(stepOpen.parentSpanId).toBe(turnOpen.spanId); + } + }); + + it("prompt span is a child of step span", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); + const promptOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "prompt"); + expect(stepOpen).toBeDefined(); + expect(promptOpen).toBeDefined(); + if (stepOpen?.kind === "span-open" && promptOpen?.kind === "span-open") { + expect(promptOpen.parentSpanId).toBe(stepOpen.spanId); + } + }); + + it("provider logger creates spans nested under step", async () => { + let capturedLogger: Logger | undefined; + let providerReqSpanId: string | undefined; + + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools, opts) { + capturedLogger = opts?.logger; + return (async function* () { + // Open provider.request span inside the stream (like a real provider) + if (capturedLogger !== undefined) { + const span = capturedLogger.span("provider.request"); + providerReqSpanId = span.id; + span.end(); + } + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + expect(capturedLogger).toBeDefined(); + expect(providerReqSpanId).toBeDefined(); + + const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); + const provReqOpen = sink.records.find( + (r) => r.kind === "span-open" && r.name === "provider.request", + ); + expect(stepOpen).toBeDefined(); + expect(provReqOpen).toBeDefined(); + if (stepOpen?.kind === "span-open" && provReqOpen?.kind === "span-open") { + expect(provReqOpen.parentSpanId).toBe(stepOpen.spanId); + expect(provReqOpen.spanId).toBe(providerReqSpanId); + } + }); + + it("tool-call spans are children of step span", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const stepOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "step"); + const tcOpen = sink.records.find((r) => r.kind === "span-open" && r.name === "tool-call"); + expect(stepOpen).toBeDefined(); + expect(tcOpen).toBeDefined(); + if (stepOpen?.kind === "span-open" && tcOpen?.kind === "span-open") { + expect(tcOpen.parentSpanId).toBe(stepOpen.spanId); + } + }); + + it("full parent chain: turn → step → {prompt, provider.request, tool-call}", async () => { + let capturedLogger: Logger | undefined; + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + let streamCallCount = 0; + const provider: ProviderContract = { + id: "fake", + stream(_messages, _tools, opts) { + capturedLogger = opts?.logger; + streamCallCount++; + return (async function* () { + // Simulate provider opening a provider.request span + // INSIDE the stream on the first call only (like a real provider) + if (streamCallCount === 1 && capturedLogger !== undefined) { + const span = capturedLogger.span("provider.request"); + span.end(); + } + if (streamCallCount === 1) { + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "echo", + input: {}, + } as ProviderEvent; + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + } else { + yield { type: "text-delta", delta: "done" } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + } + })(); + }, + }; + + const { logger, sink } = createTestLogger(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + logger, + }); + + const spanOpens = sink.records.filter((r) => r.kind === "span-open") as Array< + Extract<LogRecord, { kind: "span-open" }> + >; + + const turnOpen = spanOpens.find((r) => r.name === "turn"); + const stepOpen = spanOpens.find((r) => r.name === "step"); + const promptOpen = spanOpens.find((r) => r.name === "prompt"); + const provReqOpen = spanOpens.find((r) => r.name === "provider.request"); + const tcOpen = spanOpens.find((r) => r.name === "tool-call"); + + expect(turnOpen).toBeDefined(); + expect(stepOpen).toBeDefined(); + expect(promptOpen).toBeDefined(); + expect(provReqOpen).toBeDefined(); + expect(tcOpen).toBeDefined(); + + if ( + turnOpen?.kind === "span-open" && + stepOpen?.kind === "span-open" && + promptOpen?.kind === "span-open" && + provReqOpen?.kind === "span-open" && + tcOpen?.kind === "span-open" + ) { + // turn = root + expect(turnOpen.parentSpanId).toBeUndefined(); + + // step = child of turn + expect(stepOpen.parentSpanId).toBe(turnOpen.spanId); + + // prompt = child of step + expect(promptOpen.parentSpanId).toBe(stepOpen.spanId); + + // provider.request = child of step + expect(provReqOpen.parentSpanId).toBe(stepOpen.spanId); + + // tool-call = child of step + expect(tcOpen.parentSpanId).toBe(stepOpen.spanId); + } + }); + }); + + describe("lifecycle events", () => { + it("emits turn-start as the first event with conversation + turn ids", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-42", + turnId: "turn-99", + emit, + }); + + expect(events[0]?.type).toBe("turn-start"); + if (events[0]?.type === "turn-start") { + expect(events[0].conversationId).toBe("conv-42"); + expect(events[0].turnId).toBe("turn-99"); + } + }); + + it("emits a single done event last, carrying the finishReason", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const lastEvent = events[events.length - 1]; + expect(lastEvent?.type).toBe("done"); + if (lastEvent?.type === "done") { + expect(lastEvent.reason).toBe(result.finishReason); + expect(lastEvent.conversationId).toBe("conv-1"); + expect(lastEvent.turnId).toBe("turn-1"); + } + + const doneEvents = events.filter((e) => e.type === "done"); + expect(doneEvents).toHaveLength(1); + }); + + it("emits done after a tool-call turn", async () => { + const tool = createFakeTool("echo", async (input) => ({ + content: `echo: ${JSON.stringify(input)}`, + })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: { x: 1 } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const lastEvent = events[events.length - 1]; + expect(lastEvent?.type).toBe("done"); + if (lastEvent?.type === "done") { + expect(lastEvent.reason).toBe(result.finishReason); + } + }); + + it('still emits done with reason "aborted" when the turn is aborted via signal', async () => { + const ac = new AbortController(); + ac.abort(); + + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "should not appear" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: ac.signal, + }); + + expect(result.finishReason).toBe("aborted"); + + const lastEvent = events[events.length - 1]; + expect(lastEvent?.type).toBe("done"); + if (lastEvent?.type === "done") { + expect(lastEvent.reason).toBe("aborted"); + } + }); + + it('still emits done with reason "error" when the provider errors', async () => { + const provider: ProviderContract = { + id: "fake", + stream() { + return (async function* () { + yield { type: "text-delta", delta: "partial" } as ProviderEvent; + throw new Error("provider crashed"); + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + expect(result.finishReason).toBe("error"); + + const lastEvent = events[events.length - 1]; + expect(lastEvent?.type).toBe("done"); + if (lastEvent?.type === "done") { + expect(lastEvent.reason).toBe("error"); + } + }); + + it("turn-start precedes every delta and done follows every delta", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "reasoning-delta", delta: "thinking..." }, + { type: "text-delta", delta: " world" }, + { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const turnStartIdx = events.findIndex((e) => e.type === "turn-start"); + const doneIdx = events.findIndex((e) => e.type === "done"); + + expect(turnStartIdx).toBe(0); + expect(doneIdx).toBe(events.length - 1); + + for (let i = 0; i < events.length; i++) { + const e = events[i]; + if (e?.type === "text-delta" || e?.type === "reasoning-delta") { + expect(i).toBeGreaterThan(turnStartIdx); + expect(i).toBeLessThan(doneIdx); + } + } + }); + }); + + describe("stepId", () => { + it("tool-call and tool-result events carry stepId", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const toolCallEvt = events.find((e) => e.type === "tool-call"); + const toolResultEvt = events.find((e) => e.type === "tool-result"); + + expect(toolCallEvt).toBeDefined(); + expect(toolResultEvt).toBeDefined(); + + if (toolCallEvt?.type === "tool-call" && toolResultEvt?.type === "tool-result") { + expect(toolCallEvt.stepId).toBeDefined(); + expect(toolResultEvt.stepId).toBeDefined(); + expect(toolCallEvt.stepId).toBe(toolResultEvt.stepId); + } + }); + + it("tool calls in the SAME step share one stepId; a later step gets a different one", async () => { + const toolA = createFakeTool("a", async () => ({ content: "a-result" })); + const toolB = createFakeTool("b", async () => ({ content: "b-result" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "a", input: {} }, + { type: "tool-call", toolCallId: "tc2", toolName: "b", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "tool-call", toolCallId: "tc3", toolName: "a", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [toolA, toolB], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const toolCallEvts = events.filter((e) => e.type === "tool-call"); + expect(toolCallEvts.length).toBeGreaterThanOrEqual(2); + + const step0Calls = toolCallEvts.filter( + (e) => e.type === "tool-call" && (e.toolCallId === "tc1" || e.toolCallId === "tc2"), + ); + const step1Call = toolCallEvts.find((e) => e.type === "tool-call" && e.toolCallId === "tc3"); + + expect(step0Calls).toHaveLength(2); + if (step0Calls[0]?.type === "tool-call" && step0Calls[1]?.type === "tool-call") { + expect(step0Calls[0].stepId).toBe(step0Calls[1].stepId); + } + + if (step1Call?.type === "tool-call" && step0Calls[0]?.type === "tool-call") { + expect(step1Call.stepId).not.toBe(step0Calls[0].stepId); + } + }); + + it("tool chunks in the result carry stepId", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + }); + + const toolCallMsg = result.messages.find( + (m) => m.role === "assistant" && m.chunks.some((c) => c.type === "tool-call"), + ); + const toolResultMsg = result.messages.find((m) => m.role === "tool"); + + expect(toolCallMsg).toBeDefined(); + expect(toolResultMsg).toBeDefined(); + + const tcChunk = toolCallMsg?.chunks.find((c) => c.type === "tool-call"); + const trChunk = toolResultMsg?.chunks[0]; + + expect(tcChunk?.type).toBe("tool-call"); + expect(trChunk?.type).toBe("tool-result"); + + if (tcChunk?.type === "tool-call" && trChunk?.type === "tool-result") { + expect(tcChunk.stepId).toBeDefined(); + expect(trChunk.stepId).toBeDefined(); + expect(tcChunk.stepId).toBe(trChunk.stepId); + } + }); + }); + + describe("timing events (now provided)", () => { + function createCounterNow(): { now: () => number; tick: (ms: number) => void } { + let current = 0; + return { + now: () => current, + tick: (ms: number) => { + current += ms; + }, + }; + } + + it("emits step-complete per step with timing when now provided", async () => { + const clock = createCounterNow(); + clock.tick(100); // turn starts at 100 + + const { events, emit } = createCollectingEmit(); + + // Advance clock during stream: first token at +50ms, stream ends at +200ms + let streamCallCount = 0; + const wrappedProvider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + const idx = streamCallCount++; + return (async function* () { + if (idx === 0) { + clock.tick(50); // stream starts + yield { type: "text-delta", delta: "Hello" } as ProviderEvent; + // first token seen at 150 (100+50) + clock.tick(100); + yield { type: "text-delta", delta: " world" } as ProviderEvent; + clock.tick(50); + yield { + type: "usage", + usage: { inputTokens: 10, outputTokens: 5 }, + } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + } + })(); + }, + }; + + await runTurn({ + provider: wrappedProvider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + now: clock.now, + }); + + const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); + expect(stepCompleteEvts).toHaveLength(1); + + const sc = stepCompleteEvts[0]; + if (sc?.type === "step-complete") { + expect(sc.conversationId).toBe("conv-1"); + expect(sc.turnId).toBe("turn-1"); + expect(sc.stepId).toBeDefined(); + expect(sc.genTotalMs).toBe(200); // 50+100+50 + expect(sc.ttftMs).toBe(50); // stream start → first text-delta + expect(sc.decodeMs).toBe(150); // first token → stream end + const ttft = sc.ttftMs; + const decode = sc.decodeMs; + const genTotal = sc.genTotalMs; + if (ttft !== undefined && decode !== undefined && genTotal !== undefined) { + expect(genTotal).toBe(ttft + decode); + } + } + }); + + it("step-complete omits ttft/decode but keeps genTotalMs for a no-content step", async () => { + const clock = createCounterNow(); + clock.tick(100); // turn starts at 100 + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + let streamCallCount = 0; + const wrappedProvider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + const idx = streamCallCount++; + return (async function* () { + if (idx === 0) { + clock.tick(80); // stream starts at 180 + yield { + type: "tool-call", + toolCallId: "tc1", + toolName: "echo", + input: {}, + } as ProviderEvent; + clock.tick(20); + yield { type: "finish", reason: "tool-calls" } as ProviderEvent; + } else { + clock.tick(50); + yield { type: "text-delta", delta: "done" } as ProviderEvent; + clock.tick(50); + yield { type: "finish", reason: "stop" } as ProviderEvent; + } + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider: wrappedProvider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + now: clock.now, + }); + + const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); + expect(stepCompleteEvts).toHaveLength(2); + + // First step: tool-call-only, no content token + const sc0 = stepCompleteEvts[0]; + if (sc0?.type === "step-complete") { + expect(sc0.stepId).toBeDefined(); + expect(sc0.genTotalMs).toBe(100); // 80+20 + expect(sc0.ttftMs).toBeUndefined(); + expect(sc0.decodeMs).toBeUndefined(); + } + + // Second step: has text-delta + const sc1 = stepCompleteEvts[1]; + if (sc1?.type === "step-complete") { + expect(sc1.stepId).toBeDefined(); + expect(sc1.genTotalMs).toBe(100); // 50+50 + expect(sc1.ttftMs).toBe(50); + expect(sc1.decodeMs).toBe(50); + } + }); + + it("usage event carries stepId", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const usageEvts = events.filter((e) => e.type === "usage"); + expect(usageEvts).toHaveLength(1); + const ue = usageEvts[0]; + if (ue?.type === "usage") { + expect(ue.stepId).toBeDefined(); + } + }); + + it("tool-result carries durationMs (execution time) when now provided", async () => { + const clock = createCounterNow(); + clock.tick(100); // turn starts at 100 + + const tool = createFakeTool("slow", async () => { + clock.tick(200); // tool takes 200ms to execute + return { content: "done" }; + }); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "slow", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "ok" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + now: clock.now, + }); + + const toolResultEvts = events.filter((e) => e.type === "tool-result"); + expect(toolResultEvts).toHaveLength(1); + const tr = toolResultEvts[0]; + if (tr?.type === "tool-result") { + expect(tr.durationMs).toBeDefined(); + expect(tr.durationMs).toBe(200); + } + }); + + it("done carries durationMs and aggregate usage when now provided", async () => { + const clock = createCounterNow(); + clock.tick(100); // turn starts at 100 + + const wrappedProvider: ProviderContract = { + id: "fake", + stream(_messages, _tools) { + return (async function* () { + clock.tick(80); // stream duration + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { + type: "usage", + usage: { inputTokens: 10, outputTokens: 5 }, + } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider: wrappedProvider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + now: clock.now, + }); + + const doneEvts = events.filter((e) => e.type === "done"); + expect(doneEvts).toHaveLength(1); + const d = doneEvts[0]; + if (d?.type === "done") { + expect(d.durationMs).toBeDefined(); + expect(d.durationMs).toBeGreaterThan(0); + expect(d.usage).toBeDefined(); + if (d.usage !== undefined) { + expect(d.usage.inputTokens).toBe(10); + expect(d.usage.outputTokens).toBe(5); + } + } + }); + + it("no now → timing fields absent", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hi" }, + { type: "usage", usage: { inputTokens: 5, outputTokens: 3 } }, + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "usage", usage: { inputTokens: 10, outputTokens: 5 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + // no now + }); + + // step-complete still emitted (with stepId, no timing) + const stepCompleteEvts = events.filter((e) => e.type === "step-complete"); + expect(stepCompleteEvts).toHaveLength(2); + for (const sc of stepCompleteEvts) { + if (sc?.type === "step-complete") { + expect(sc.stepId).toBeDefined(); + expect(sc.ttftMs).toBeUndefined(); + expect(sc.decodeMs).toBeUndefined(); + expect(sc.genTotalMs).toBeUndefined(); + } + } + + // usage still carries stepId + const usageEvts = events.filter((e) => e.type === "usage"); + for (const ue of usageEvts) { + if (ue?.type === "usage") { + expect(ue.stepId).toBeDefined(); + } + } + + // no durationMs on tool-result + const toolResultEvts = events.filter((e) => e.type === "tool-result"); + for (const tr of toolResultEvts) { + if (tr?.type === "tool-result") { + expect(tr.durationMs).toBeUndefined(); + } + } + + // no durationMs on done, but usage is present (independent of now) + const doneEvts = events.filter((e) => e.type === "done"); + expect(doneEvts).toHaveLength(1); + const d = doneEvts[0]; + if (d?.type === "done") { + expect(d.durationMs).toBeUndefined(); + expect(d.usage).toBeDefined(); + if (d.usage !== undefined) { + expect(d.usage.inputTokens).toBe(15); + expect(d.usage.outputTokens).toBe(8); + } + } + }); + }); + + describe("contextSize", () => { + it("single-step turn: contextSize equals step inputTokens + outputTokens", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "usage", usage: { inputTokens: 100, outputTokens: 50 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const doneEvt = events.find((e) => e.type === "done"); + expect(doneEvt).toBeDefined(); + if (doneEvt?.type === "done") { + expect(doneEvt.contextSize).toBe(150); + } + }); + + it("multi-step turn: contextSize equals ONLY the last step's inputTokens + outputTokens", async () => { + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "usage", usage: { inputTokens: 100, outputTokens: 20 } }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "usage", usage: { inputTokens: 300, outputTokens: 80 } }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const doneEvt = events.find((e) => e.type === "done"); + expect(doneEvt).toBeDefined(); + if (doneEvt?.type === "done") { + expect(doneEvt.contextSize).toBe(380); + expect(doneEvt.usage).toBeDefined(); + if (doneEvt.usage !== undefined) { + expect(doneEvt.contextSize).not.toBe(doneEvt.usage.inputTokens); + } + } + }); + + it("no usage reported: contextSize is undefined", async () => { + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "Hello" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + }); + + const doneEvt = events.find((e) => e.type === "done"); + expect(doneEvt).toBeDefined(); + if (doneEvt?.type === "done") { + expect(doneEvt.contextSize).toBeUndefined(); + expect(doneEvt.usage).toBeUndefined(); + } + }); + }); + + describe("drainSteering", () => { + it("drainSteering called once at the tool-result boundary; returned messages appended to the next step's provider input (after tool results)", async () => { + let drainCallCount = 0; + const steeringMessage: ChatMessage = { + role: "user", + chunks: [{ type: "text", text: "steer!" }], + }; + + const { provider, capturedMessages } = createCapturingProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + drainSteering: () => { + drainCallCount++; + return [steeringMessage]; + }, + }); + + expect(drainCallCount).toBe(1); + // The provider was called twice (tool-call step, then text step). + expect(capturedMessages).toHaveLength(2); + const secondStepMessages = capturedMessages[1] ?? []; + // user, assistant(tool-call), tool-result, steering(user) — in order, + // steering appended AFTER the tool results, before the next call. + expect(secondStepMessages).toHaveLength(4); + expect(secondStepMessages[0]?.role).toBe("user"); + expect(secondStepMessages[1]?.role).toBe("assistant"); + expect(secondStepMessages[2]?.role).toBe("tool"); + expect(secondStepMessages[3]).toEqual(steeringMessage); + expect(secondStepMessages[3]?.role).toBe("user"); + // Steering is fed to the next provider call, NOT surfaced in the + // turn result — the caller owns the steering messages' lifecycle. + expect(result.messages).toHaveLength(3); + }); + + it("drainSteering omitted → no injection; turn byte-identical to before", async () => { + const { provider, capturedMessages } = createCapturingProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + // drainSteering omitted — must be a strict no-op. + }); + + expect(capturedMessages).toHaveLength(2); + const secondStepMessages = capturedMessages[1] ?? []; + // user, assistant(tool-call), tool-result — NO steering injected. + expect(secondStepMessages).toHaveLength(3); + expect(secondStepMessages[0]?.role).toBe("user"); + expect(secondStepMessages[1]?.role).toBe("assistant"); + expect(secondStepMessages[2]?.role).toBe("tool"); + expect(result.messages).toHaveLength(3); + }); + + it("drainSteering returns [] → no injection", async () => { + let drainCallCount = 0; + const { provider, capturedMessages } = createCapturingProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + drainSteering: () => { + drainCallCount++; + return []; + }, + }); + + // Called at the boundary, but returned nothing → no injection. + expect(drainCallCount).toBe(1); + expect(capturedMessages).toHaveLength(2); + const secondStepMessages = capturedMessages[1] ?? []; + expect(secondStepMessages).toHaveLength(3); + expect(secondStepMessages[2]?.role).toBe("tool"); + }); + + it("drainSteering NOT called when a step has no tool calls (text-only turn)", async () => { + let drainCallCount = 0; + const provider = createFakeProvider([ + [ + { type: "text-delta", delta: "hello" }, + { type: "finish", reason: "stop" }, + ], + ]); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + drainSteering: () => { + drainCallCount++; + return []; + }, + }); + + expect(drainCallCount).toBe(0); + }); + + it("multiple tool-call steps → drainSteering called once per tool-call step", async () => { + let drainCallCount = 0; + const provider = createFakeProvider([ + [ + { type: "tool-call", toolCallId: "tc1", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "tool-call", toolCallId: "tc2", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ], + [ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ], + ]); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + drainSteering: () => { + drainCallCount++; + return []; + }, + }); + + // Steps 0 and 1 each produced tool calls → drained once each. + // Step 2 (text-only) → no boundary → no drain. Total = 2. + expect(drainCallCount).toBe(2); + }); + + it("MAX_STEPS=0 (unlimited): turn runs past the old 50-step limit and drains at every tool-result boundary until the model stops naturally", async () => { + let drainCallCount = 0; + // 100 tool-call steps (past the old MAX_STEPS=50) + 1 text-only step + // to end the turn naturally. + const STEPS_WITH_TOOLS = 100; + const script: ProviderEvent[][] = []; + for (let i = 0; i < STEPS_WITH_TOOLS; i++) { + script.push([ + { type: "tool-call", toolCallId: "tc", toolName: "echo", input: {} }, + { type: "finish", reason: "tool-calls" }, + ]); + } + // Final step: text only, no tool calls → natural end. + script.push([ + { type: "text-delta", delta: "done" }, + { type: "finish", reason: "stop" }, + ]); + const provider = createFakeProvider(script); + + const tool = createFakeTool("echo", async () => ({ content: "echoed" })); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [tool], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit: () => {}, + drainSteering: () => { + drainCallCount++; + return []; + }, + }); + + // Turn ended naturally, NOT via max-steps. + expect(result.finishReason).toBe("stop"); + // Every tool-call step (0..99) is followed by a next step → each + // triggers a drain. The text-only step breaks before draining. + expect(drainCallCount).toBe(STEPS_WITH_TOOLS); + // All 101 steps produced messages (100 tool steps with assistant + + // tool messages, 1 text-only step with an assistant message). + expect(result.messages.length).toBe(STEPS_WITH_TOOLS * 2 + 1); + }); + }); + + // ── Retry with backoff ────────────────────────────────────────────────── + // + // PURE tests: a fake `sleep` (records calls, resolves instantly, can abort + // on a chosen call) + a pure `delayFor` (the canonical schedule + 8h budget). + // A stub `ProviderContract` whose `stream` yields a retryable error N times + // then a finish. ZERO mocks of `@dispatch/*` modules — effects injected. + + /** The canonical backoff schedule (matches the orchestrator's concrete strategy). */ + const RETRY_SCHEDULE_MS = [5_000, 10_000, 30_000, 60_000, 300_000, 600_000, 900_000, 1_800_000]; + const RETRY_TAIL_MS = 1_800_000; // 30m + const RETRY_BUDGET_MS = 8 * 60 * 60 * 1000; // 8h + + /** Cumulative scheduled sleep through `attempt` (sum of delay[0..attempt]). */ + function cumulativeSleepMs(attempt: number): number { + let sum = 0; + for (let i = 0; i <= attempt; i++) { + sum += i < RETRY_SCHEDULE_MS.length ? RETRY_SCHEDULE_MS[i] : RETRY_TAIL_MS; + } + return sum; + } + + /** Pure, deterministic delay decision (no I/O, no clock). */ + function delayFor(attempt: number): number | undefined { + const delay = attempt < RETRY_SCHEDULE_MS.length ? RETRY_SCHEDULE_MS[attempt] : RETRY_TAIL_MS; + if (cumulativeSleepMs(attempt) > RETRY_BUDGET_MS) return undefined; // over budget → stop + return delay; + } + + /** The full schedule delayFor would emit (until budget exhausted). */ + function fullSchedule(): number[] { + const result: number[] = []; + let attempt = 0; + while (true) { + const delay = delayFor(attempt); + if (delay === undefined) break; + result.push(delay); + attempt++; + } + return result; + } + + /** + * Fake, controllable `sleep`: records every call's delay, resolves + * instantly (no real waiting), and can abort the controller on a chosen + * 1-based call index to simulate "abort during sleep". + */ + function createFakeSleep(controller: AbortController): { + sleep: (ms: number, signal: AbortSignal) => Promise<void>; + calls: number[]; + abortOnCall: (n: number) => void; + } { + const calls: number[] = []; + let abortAt: number | undefined; + const sleep = async (ms: number, _signal: AbortSignal): Promise<void> => { + calls.push(ms); + if (abortAt !== undefined && calls.length === abortAt) { + controller.abort(); + throw new Error("aborted"); + } + // Otherwise resolve instantly (no real waiting). + }; + return { + sleep, + calls, + abortOnCall: (n: number) => { + abortAt = n; + }, + }; + } + + /** A provider that yields a retryable error `errorCount` times, then success. */ + function createRetryingProvider(opts: { + errorCount: number; + error?: { message: string; code?: string; retryable?: boolean }; + success?: ProviderEvent[]; + }): { provider: ProviderContract; streamCalls: { value: number } } { + const streamCalls = { value: 0 }; + const error: ProviderEvent = { + type: "error", + message: opts.error?.message ?? "overloaded", + ...(opts.error?.code !== undefined ? { code: opts.error.code } : {}), + ...(opts.error?.retryable !== undefined ? { retryable: opts.error.retryable } : {}), + }; + const success = opts.success ?? [ + { type: "text-delta", delta: "hi" }, + { type: "finish", reason: "stop" }, + ]; + const provider: ProviderContract = { + id: "fake", + stream() { + const idx = streamCalls.value++; + return (async function* () { + if (idx < opts.errorCount) { + yield error; + return; + } + for (const event of success) yield event; + })(); + }, + }; + return { provider, streamCalls }; + } + + describe("retry with backoff", () => { + it("retries a retryable emitted error on schedule then succeeds", async () => { + const { provider } = createRetryingProvider({ + errorCount: 3, + error: { message: "HTTP 429: overloaded", code: "429", retryable: true }, + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(result.finishReason).toBe("stop"); + // 3 retries: 5s, 10s, 30s. + expect(fake.calls).toEqual([5_000, 10_000, 30_000]); + // 3 provider-retry events (one per sleep), then the successful text. + const retryEvents = events.filter((e) => e.type === "provider-retry"); + expect(retryEvents).toHaveLength(3); + if (retryEvents[0]?.type === "provider-retry") { + expect(retryEvents[0].attempt).toBe(0); + expect(retryEvents[0].delayMs).toBe(5_000); + expect(retryEvents[0].message).toBe("HTTP 429: overloaded"); + expect(retryEvents[0].code).toBe("429"); + expect(retryEvents[0].conversationId).toBe("conv-1"); + expect(retryEvents[0].turnId).toBe("turn-1"); + } + if (retryEvents[1]?.type === "provider-retry") { + expect(retryEvents[1].attempt).toBe(1); + expect(retryEvents[1].delayMs).toBe(10_000); + } + if (retryEvents[2]?.type === "provider-retry") { + expect(retryEvents[2].attempt).toBe(2); + expect(retryEvents[2].delayMs).toBe(30_000); + } + // The error was suppressed (no error event emitted — retry succeeded). + expect(events.filter((e) => e.type === "error")).toHaveLength(0); + // The successful content still streams. + const deltas = events.filter((e) => e.type === "text-delta"); + expect(deltas).toHaveLength(1); + }); + + it("sleep is called with the full schedule [5s,10s,30s,60s,5m,10m,15m,30m,30m…]", async () => { + // Provider errors forever → retries until budget exhausted → gives up. + const { provider } = createRetryingProvider({ + errorCount: Number.POSITIVE_INFINITY, + error: { message: "overloaded", code: "429", retryable: true }, + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + // Budget exhausted → give up → error. + expect(result.finishReason).toBe("error"); + + // The sleep schedule matches the pure delayFor output exactly. + expect(fake.calls).toEqual(fullSchedule()); + + // Head of the schedule (the 8 stepped delays). + expect(fake.calls.slice(0, 8)).toEqual([ + 5_000, 10_000, 30_000, 60_000, 300_000, 600_000, 900_000, 1_800_000, + ]); + // Tail repeats 30m. + expect(fake.calls[8]).toBe(1_800_000); + expect(fake.calls.at(-1)).toBe(1_800_000); + + // 8h cumulative budget cap: head (3705s) + 13×30m = ~7h31m, then stop. + // 21 retries (attempts 0..20), then delayFor(21) → undefined → give up. + expect(fake.calls).toHaveLength(21); + const totalSlept = fake.calls.reduce((a, b) => a + b, 0); + expect(totalSlept).toBeLessThanOrEqual(RETRY_BUDGET_MS); + expect(totalSlept).toBe(3_705_000 + 13 * 1_800_000); // 27_105_000 + + // One provider-retry per sleep, plus a final error (give-up). + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(21); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + const errEvt = events.find((e) => e.type === "error"); + if (errEvt?.type === "error") { + expect(errEvt.message).toBe("overloaded"); + expect(errEvt.code).toBe("429"); + } + }); + + it("does NOT retry after content was emitted (safety invariant)", async () => { + // Provider yields text (content) THEN a retryable error. Because content + // was emitted, retrying is unsafe (would duplicate partial output). + let callCount = 0; + const provider: ProviderContract = { + id: "fake", + stream() { + callCount++; + return (async function* () { + yield { type: "text-delta", delta: "partial" } as ProviderEvent; + yield { + type: "error", + message: "overloaded", + code: "429", + retryable: true, + } as ProviderEvent; + })(); + }, + }; + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + // No retries: stream called exactly once. + expect(callCount).toBe(1); + expect(fake.calls).toHaveLength(0); + // The error is emitted (give-up) and partial content preserved. + expect(result.finishReason).toBe("error"); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); + expect(events.filter((e) => e.type === "text-delta")).toHaveLength(1); + }); + + it("does NOT retry a non-retryable emitted error (retryable: false)", async () => { + const { provider, streamCalls } = createRetryingProvider({ + errorCount: 1, + error: { message: "bad request", code: "400", retryable: false }, + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(streamCalls.value).toBe(1); // no retry + expect(fake.calls).toHaveLength(0); + expect(result.finishReason).toBe("error"); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); + }); + + it("does NOT retry a non-retryable emitted error (retryable absent)", async () => { + const { provider, streamCalls } = createRetryingProvider({ + errorCount: 1, + error: { message: "bad request", code: "400" }, // no retryable field + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(streamCalls.value).toBe(1); // no retry + expect(fake.calls).toHaveLength(0); + expect(result.finishReason).toBe("error"); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + }); + + it("give-up emits the final error when budget is exhausted", async () => { + // Custom delayFor that allows exactly 1 retry then stops. + const shortDelayFor = (attempt: number): number | undefined => + attempt === 0 ? 100 : undefined; + const { provider } = createRetryingProvider({ + errorCount: Number.POSITIVE_INFINITY, + error: { message: "overloaded", code: "429", retryable: true }, + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor: shortDelayFor, sleep: fake.sleep }, + }); + + expect(result.finishReason).toBe("error"); + expect(fake.calls).toEqual([100]); // one retry, then give up + // One provider-retry (attempt 0), then the final error. + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(1); + const errs = events.filter((e) => e.type === "error"); + expect(errs).toHaveLength(1); + if (errs[0]?.type === "error") { + expect(errs[0].message).toBe("overloaded"); + expect(errs[0].code).toBe("429"); + } + }); + + it("abort during sleep seals the turn aborted", async () => { + const { provider } = createRetryingProvider({ + errorCount: Number.POSITIVE_INFINITY, + error: { message: "overloaded", code: "429", retryable: true }, + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + fake.abortOnCall(2); // abort on the 2nd sleep + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(result.finishReason).toBe("aborted"); + // Two sleeps attempted; the 2nd aborted. + expect(fake.calls).toHaveLength(2); + // No terminal error emitted (it was an abort, not a give-up). + expect(events.filter((e) => e.type === "error")).toHaveLength(0); + // One provider-retry before the aborted sleep (attempt 0). + const retries = events.filter((e) => e.type === "provider-retry"); + expect(retries).toHaveLength(2); + // The done event carries reason "aborted". + const done = events.find((e) => e.type === "done"); + if (done?.type === "done") { + expect(done.reason).toBe("aborted"); + } + }); + + it("omitting retry keeps the pre-retry behavior (backward-compatible)", async () => { + // A retryable error with no retry configured → ends the step as today. + const { provider, streamCalls } = createRetryingProvider({ + errorCount: 1, + error: { message: "overloaded", code: "429", retryable: true }, + }); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + // no retry field + }); + + expect(streamCalls.value).toBe(1); // no retry + expect(result.finishReason).toBe("error"); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(0); + }); + + it("retries a THROWN error (retryable-by-default when pre-content)", async () => { + // A thrown error (no retryable flag) before content is retried. + let callCount = 0; + const provider: ProviderContract = { + id: "fake", + stream() { + callCount++; + return (async function* () { + if (callCount <= 2) { + throw new Error("network blip"); + } + yield { type: "text-delta", delta: "hi" } as ProviderEvent; + yield { type: "finish", reason: "stop" } as ProviderEvent; + })(); + }, + }; + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(callCount).toBe(3); // 2 throws retried, 3rd succeeds + expect(fake.calls).toEqual([5_000, 10_000]); + expect(result.finishReason).toBe("stop"); + expect(events.filter((e) => e.type === "provider-retry")).toHaveLength(2); + // Thrown errors have no code. + if (events[0]?.type === "provider-retry") { + expect(events[0].code).toBeUndefined(); + expect(events[0].message).toBe("network blip"); + } + expect(events.filter((e) => e.type === "error")).toHaveLength(0); + }); + + it("does NOT retry a thrown error after content was emitted", async () => { + let callCount = 0; + const provider: ProviderContract = { + id: "fake", + stream() { + callCount++; + return (async function* () { + yield { type: "text-delta", delta: "partial" } as ProviderEvent; + throw new Error("network blip"); + })(); + }, + }; + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + const result = await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + expect(callCount).toBe(1); + expect(fake.calls).toHaveLength(0); + expect(result.finishReason).toBe("error"); + expect(events.filter((e) => e.type === "error")).toHaveLength(1); + expect(events.filter((e) => e.type === "text-delta")).toHaveLength(1); + }); + + it("provider-retry events interleave correctly: error → retry-event → sleep → retry", async () => { + // Verify ordering: each provider-retry event comes BEFORE its sleep, + // and the successful content comes only after the last retry. + const { provider } = createRetryingProvider({ + errorCount: 2, + error: { message: "overloaded", code: "429", retryable: true }, + success: [ + { type: "text-delta", delta: "ok" }, + { type: "finish", reason: "stop" }, + ], + }); + const controller = new AbortController(); + const fake = createFakeSleep(controller); + + const { events, emit } = createCollectingEmit(); + + await runTurn({ + provider, + messages: [userMessage], + tools: [], + dispatch: { maxConcurrent: 1, eager: false }, + conversationId: "conv-1", + turnId: "turn-1", + emit, + signal: controller.signal, + retry: { delayFor, sleep: fake.sleep }, + }); + + const types = events.map((e) => e.type); + // turn-start, provider-retry(0), provider-retry(1), text-delta, step-complete, done + expect(types[0]).toBe("turn-start"); + const firstRetryIdx = types.indexOf("provider-retry"); + const textIdx = types.indexOf("text-delta"); + expect(firstRetryIdx).toBeGreaterThan(0); + expect(textIdx).toBeGreaterThan(firstRetryIdx); + // Both retries precede the text. + const retryCount = types.filter((t) => t === "provider-retry").length; + expect(retryCount).toBe(2); + }); + }); }); diff --git a/packages/kernel/src/runtime/run-turn.ts b/packages/kernel/src/runtime/run-turn.ts index ac87a1f..3460033 100644 --- a/packages/kernel/src/runtime/run-turn.ts +++ b/packages/kernel/src/runtime/run-turn.ts @@ -1,98 +1,100 @@ import type { ChatMessage, Chunk, StepId } from "../contracts/conversation.js"; import type { Logger, Span } from "../contracts/logging.js"; import type { - ProviderContract, - ProviderEvent, - ProviderStreamOptions, - Usage, + ProviderContract, + ProviderEvent, + ProviderStreamOptions, + Usage, } from "../contracts/provider.js"; import type { - EventEmitter, - RetryStrategy, - RunTurnInput, - RunTurnResult, + EventEmitter, + RetryStrategy, + RunTurnInput, + RunTurnResult, } from "../contracts/runtime.js"; import type { ToolCall, ToolContract } from "../contracts/tool.js"; import { createStepDispatcher, type StepDispatcher } from "./dispatch.js"; import { - doneEvent, - errorEvent, - providerRetryEvent, - reasoningDeltaEvent, - stepCompleteEvent, - textDeltaEvent, - toolCallEvent, - toolResultEvent, - turnStartEvent, - usageEvent, + doneEvent, + errorEvent, + providerRetryEvent, + reasoningDeltaEvent, + stepCompleteEvent, + textDeltaEvent, + toolCallEvent, + toolResultEvent, + turnStartEvent, + usageEvent, } from "./events.js"; -export const MAX_STEPS = 50; +/** Max steps per turn. 0 = unlimited (the loop runs until the model stops + * making tool calls or the abort signal fires). */ +export const MAX_STEPS = 0; function zeroUsage(): Usage { - return { inputTokens: 0, outputTokens: 0 }; + return { inputTokens: 0, outputTokens: 0 }; } function addUsage(a: Usage, b: Usage): Usage { - const inputTokens = a.inputTokens + b.inputTokens; - const outputTokens = a.outputTokens + b.outputTokens; - - if (a.cacheReadTokens !== undefined || b.cacheReadTokens !== undefined) { - const cacheReadTokens = (a.cacheReadTokens ?? 0) + (b.cacheReadTokens ?? 0); - if (a.cacheWriteTokens !== undefined || b.cacheWriteTokens !== undefined) { - return { - inputTokens, - outputTokens, - cacheReadTokens, - cacheWriteTokens: (a.cacheWriteTokens ?? 0) + (b.cacheWriteTokens ?? 0), - }; - } - return { inputTokens, outputTokens, cacheReadTokens }; - } - - if (a.cacheWriteTokens !== undefined || b.cacheWriteTokens !== undefined) { - return { - inputTokens, - outputTokens, - cacheWriteTokens: (a.cacheWriteTokens ?? 0) + (b.cacheWriteTokens ?? 0), - }; - } - - return { inputTokens, outputTokens }; + const inputTokens = a.inputTokens + b.inputTokens; + const outputTokens = a.outputTokens + b.outputTokens; + + if (a.cacheReadTokens !== undefined || b.cacheReadTokens !== undefined) { + const cacheReadTokens = (a.cacheReadTokens ?? 0) + (b.cacheReadTokens ?? 0); + if (a.cacheWriteTokens !== undefined || b.cacheWriteTokens !== undefined) { + return { + inputTokens, + outputTokens, + cacheReadTokens, + cacheWriteTokens: (a.cacheWriteTokens ?? 0) + (b.cacheWriteTokens ?? 0), + }; + } + return { inputTokens, outputTokens, cacheReadTokens }; + } + + if (a.cacheWriteTokens !== undefined || b.cacheWriteTokens !== undefined) { + return { + inputTokens, + outputTokens, + cacheWriteTokens: (a.cacheWriteTokens ?? 0) + (b.cacheWriteTokens ?? 0), + }; + } + + return { inputTokens, outputTokens }; } function usageAttrs(usage: Usage): Record<string, string | number | boolean | null> { - const attrs: Record<string, string | number | boolean | null> = { - "usage.inputTokens": usage.inputTokens, - "usage.outputTokens": usage.outputTokens, - }; - if (usage.cacheReadTokens !== undefined) { - attrs["usage.cacheReadTokens"] = usage.cacheReadTokens; - } - if (usage.cacheWriteTokens !== undefined) { - attrs["usage.cacheWriteTokens"] = usage.cacheWriteTokens; - } - return attrs; + const attrs: Record<string, string | number | boolean | null> = { + "usage.inputTokens": usage.inputTokens, + "usage.outputTokens": usage.outputTokens, + }; + if (usage.cacheReadTokens !== undefined) { + attrs["usage.cacheReadTokens"] = usage.cacheReadTokens; + } + if (usage.cacheWriteTokens !== undefined) { + attrs["usage.cacheWriteTokens"] = usage.cacheWriteTokens; + } + return attrs; } function appendTextDelta(chunks: Chunk[], delta: string): void { - const lastIdx = chunks.length - 1; - const last = chunks[lastIdx]; - if (last !== undefined && last.type === "text") { - chunks[lastIdx] = { type: "text", text: last.text + delta }; - } else { - chunks.push({ type: "text", text: delta }); - } + const lastIdx = chunks.length - 1; + const last = chunks[lastIdx]; + if (last !== undefined && last.type === "text") { + chunks[lastIdx] = { type: "text", text: last.text + delta }; + } else { + chunks.push({ type: "text", text: delta }); + } } function appendThinkingDelta(chunks: Chunk[], delta: string): void { - const lastIdx = chunks.length - 1; - const last = chunks[lastIdx]; - if (last !== undefined && last.type === "thinking") { - chunks[lastIdx] = { type: "thinking", text: last.text + delta }; - } else { - chunks.push({ type: "thinking", text: delta }); - } + const lastIdx = chunks.length - 1; + const last = chunks[lastIdx]; + if (last !== undefined && last.type === "thinking") { + chunks[lastIdx] = { type: "thinking", text: last.text + delta }; + } else { + chunks.push({ type: "thinking", text: delta }); + } } /** @@ -104,698 +106,698 @@ function appendThinkingDelta(chunks: Chunk[], delta: string): void { * orphaned `tool` messages in the next turn's history. */ function stripToolCallChunks(msg: ChatMessage): ChatMessage | undefined { - const stripped = msg.chunks.filter((c) => c.type !== "tool-call"); - return stripped.length > 0 ? { role: msg.role, chunks: stripped } : undefined; + const stripped = msg.chunks.filter((c) => c.type !== "tool-call"); + return stripped.length > 0 ? { role: msg.role, chunks: stripped } : undefined; } interface StepContext { - readonly provider: ProviderContract; - readonly messages: ChatMessage[]; - readonly tools: readonly ToolContract[]; - readonly toolMap: Map<string, ToolContract>; - readonly dispatch: RunTurnInput["dispatch"]; - readonly emit: EventEmitter; - readonly signal: AbortSignal; - readonly conversationId: string; - readonly turnId: string; - readonly stepId: StepId; - readonly logger: Logger; - readonly turnSpan: Span | undefined; - readonly toolSpans: Map<string, Span>; - readonly cwd: string | undefined; - readonly computerId: string | undefined; - readonly now: (() => number) | undefined; - /** Per-turn provider options (model, systemPrompt, …) threaded to stream(). */ - readonly providerOpts: ProviderStreamOptions | undefined; - /** Optional injected retry strategy (omit = no retry, backward-compatible). */ - readonly retry: RetryStrategy | undefined; + readonly provider: ProviderContract; + readonly messages: ChatMessage[]; + readonly tools: readonly ToolContract[]; + readonly toolMap: Map<string, ToolContract>; + readonly dispatch: RunTurnInput["dispatch"]; + readonly emit: EventEmitter; + readonly signal: AbortSignal; + readonly conversationId: string; + readonly turnId: string; + readonly stepId: StepId; + readonly logger: Logger; + readonly turnSpan: Span | undefined; + readonly toolSpans: Map<string, Span>; + readonly cwd: string | undefined; + readonly computerId: string | undefined; + readonly now: (() => number) | undefined; + /** Per-turn provider options (model, systemPrompt, …) threaded to stream(). */ + readonly providerOpts: ProviderStreamOptions | undefined; + /** Optional injected retry strategy (omit = no retry, backward-compatible). */ + readonly retry: RetryStrategy | undefined; } interface TimingState { - ttftSpan: Span | undefined; - decodeSpan: Span | undefined; - firstTokenSeen: boolean; - streamStartMs: number | undefined; - firstTokenMs: number | undefined; + ttftSpan: Span | undefined; + decodeSpan: Span | undefined; + firstTokenSeen: boolean; + streamStartMs: number | undefined; + firstTokenMs: number | undefined; } interface StepResult { - readonly assistantMessage: ChatMessage | undefined; - readonly toolCalls: ToolCall[]; - readonly toolMessages: ChatMessage[]; - readonly usage: Usage; - readonly finishReason: string; + readonly assistantMessage: ChatMessage | undefined; + readonly toolCalls: ToolCall[]; + readonly toolMessages: ChatMessage[]; + readonly usage: Usage; + readonly finishReason: string; } function processEvent( - event: ProviderEvent, - chunks: Chunk[], - toolCalls: ToolCall[], - dispatcher: StepDispatcher, - ctx: StepContext, - stepSpan: Span | undefined, - timing: TimingState, - toolDispatchTimes: Map<string, number>, + event: ProviderEvent, + chunks: Chunk[], + toolCalls: ToolCall[], + dispatcher: StepDispatcher, + ctx: StepContext, + stepSpan: Span | undefined, + timing: TimingState, + toolDispatchTimes: Map<string, number>, ): void { - switch (event.type) { - case "text-delta": - if (!timing.firstTokenSeen) { - timing.firstTokenSeen = true; - if (ctx.now !== undefined) { - timing.firstTokenMs = ctx.now(); - } - try { - timing.ttftSpan?.end({ attrs: { firstToken: true } }); - } catch { - // Swallow — D7. - } - timing.ttftSpan = undefined; - try { - timing.decodeSpan = stepSpan?.child("decode"); - } catch { - // Swallow — D7. - } - } - appendTextDelta(chunks, event.delta); - ctx.emit(textDeltaEvent(ctx.conversationId, ctx.turnId, event.delta)); - break; - case "reasoning-delta": - if (!timing.firstTokenSeen) { - timing.firstTokenSeen = true; - if (ctx.now !== undefined) { - timing.firstTokenMs = ctx.now(); - } - try { - timing.ttftSpan?.end({ attrs: { firstToken: true } }); - } catch { - // Swallow — D7. - } - timing.ttftSpan = undefined; - try { - timing.decodeSpan = stepSpan?.child("decode"); - } catch { - // Swallow — D7. - } - } - appendThinkingDelta(chunks, event.delta); - ctx.emit(reasoningDeltaEvent(ctx.conversationId, ctx.turnId, event.delta)); - break; - case "tool-call": { - const call: ToolCall = { - id: event.toolCallId, - name: event.toolName, - input: event.input, - }; - toolCalls.push(call); - chunks.push({ - type: "tool-call", - toolCallId: event.toolCallId, - toolName: event.toolName, - input: event.input, - stepId: ctx.stepId, - }); - ctx.emit( - toolCallEvent( - ctx.conversationId, - ctx.turnId, - ctx.stepId, - event.toolCallId, - event.toolName, - event.input, - ), - ); - - // Capture dispatch time for tool-call durationMs - if (ctx.now !== undefined) { - toolDispatchTimes.set(event.toolCallId, ctx.now()); - } - - // Open a tool-call span as a child of the step span (attrs: name, toolCallId) - try { - const tcSpan = - stepSpan !== undefined - ? stepSpan.child("tool-call", { - name: event.toolName, - toolCallId: event.toolCallId, - }) - : ctx.logger.span("tool-call", { - name: event.toolName, - toolCallId: event.toolCallId, - }); - ctx.toolSpans.set(event.toolCallId, tcSpan); - } catch { - // Swallow — D7: logging never breaks the turn. - } - - if (ctx.dispatch.eager) { - dispatcher.submit(call); - } - break; - } - case "usage": - ctx.emit(usageEvent(ctx.conversationId, ctx.turnId, event.usage, ctx.stepId)); - break; - case "finish": - break; - case "error": - // Handled by the retry loop in executeStep (not here): an error event - // is intercepted before processEvent so the step can decide whether to - // retry (suppressing the error) or give up (emit it). processEvent - // never receives an "error" event. - break; - } + switch (event.type) { + case "text-delta": + if (!timing.firstTokenSeen) { + timing.firstTokenSeen = true; + if (ctx.now !== undefined) { + timing.firstTokenMs = ctx.now(); + } + try { + timing.ttftSpan?.end({ attrs: { firstToken: true } }); + } catch { + // Swallow — D7. + } + timing.ttftSpan = undefined; + try { + timing.decodeSpan = stepSpan?.child("decode"); + } catch { + // Swallow — D7. + } + } + appendTextDelta(chunks, event.delta); + ctx.emit(textDeltaEvent(ctx.conversationId, ctx.turnId, event.delta)); + break; + case "reasoning-delta": + if (!timing.firstTokenSeen) { + timing.firstTokenSeen = true; + if (ctx.now !== undefined) { + timing.firstTokenMs = ctx.now(); + } + try { + timing.ttftSpan?.end({ attrs: { firstToken: true } }); + } catch { + // Swallow — D7. + } + timing.ttftSpan = undefined; + try { + timing.decodeSpan = stepSpan?.child("decode"); + } catch { + // Swallow — D7. + } + } + appendThinkingDelta(chunks, event.delta); + ctx.emit(reasoningDeltaEvent(ctx.conversationId, ctx.turnId, event.delta)); + break; + case "tool-call": { + const call: ToolCall = { + id: event.toolCallId, + name: event.toolName, + input: event.input, + }; + toolCalls.push(call); + chunks.push({ + type: "tool-call", + toolCallId: event.toolCallId, + toolName: event.toolName, + input: event.input, + stepId: ctx.stepId, + }); + ctx.emit( + toolCallEvent( + ctx.conversationId, + ctx.turnId, + ctx.stepId, + event.toolCallId, + event.toolName, + event.input, + ), + ); + + // Capture dispatch time for tool-call durationMs + if (ctx.now !== undefined) { + toolDispatchTimes.set(event.toolCallId, ctx.now()); + } + + // Open a tool-call span as a child of the step span (attrs: name, toolCallId) + try { + const tcSpan = + stepSpan !== undefined + ? stepSpan.child("tool-call", { + name: event.toolName, + toolCallId: event.toolCallId, + }) + : ctx.logger.span("tool-call", { + name: event.toolName, + toolCallId: event.toolCallId, + }); + ctx.toolSpans.set(event.toolCallId, tcSpan); + } catch { + // Swallow — D7: logging never breaks the turn. + } + + if (ctx.dispatch.eager) { + dispatcher.submit(call); + } + break; + } + case "usage": + ctx.emit(usageEvent(ctx.conversationId, ctx.turnId, event.usage, ctx.stepId)); + break; + case "finish": + break; + case "error": + // Handled by the retry loop in executeStep (not here): an error event + // is intercepted before processEvent so the step can decide whether to + // retry (suppressing the error) or give up (emit it). processEvent + // never receives an "error" event. + break; + } } async function executeStep(ctx: StepContext): Promise<StepResult> { - const chunks: Chunk[] = []; - const toolCalls: ToolCall[] = []; - const toolDispatchTimes = new Map<string, number>(); - let stepUsage = zeroUsage(); - let finishReason = "stop"; - - // Open a step span as a child of the turn span; capture the verbatim - // pre-mutation prompt via a "prompt" child span whose body holds the - // serialized messages+tools. - let stepSpan: Span | undefined; - try { - stepSpan = ctx.turnSpan !== undefined ? ctx.turnSpan.child("step") : ctx.logger.span("step"); - const promptBody = JSON.stringify({ messages: ctx.messages, tools: ctx.tools }); - const promptSpan = stepSpan.child( - "prompt", - { - messageCount: ctx.messages.length, - toolCount: ctx.tools.length, - }, - promptBody, - ); - promptSpan.end(); - } catch { - // Swallow — D7. - } - - const dispatcher = createStepDispatcher( - ctx.toolMap, - ctx.dispatch, - ctx.signal, - ctx.emit, - ctx.conversationId, - ctx.turnId, - ctx.toolSpans, - ctx.cwd, - ctx.computerId, - ); - - const timing: TimingState = { - ttftSpan: undefined, - decodeSpan: undefined, - firstTokenSeen: false, - streamStartMs: ctx.now !== undefined ? ctx.now() : undefined, - firstTokenMs: undefined, - }; - - // Open TTFT span when spans are enabled - try { - if (stepSpan !== undefined) { - timing.ttftSpan = stepSpan.child("ttft"); - } - } catch { - // Swallow — D7. - } - - // Retry loop: wrap provider.stream() consumption. Retries are ONLY - // attempted when no content was emitted yet this step (the safety - // invariant — never duplicate partial output). On a retryable error — - // either an EMITTED `error` ProviderEvent with `retryable === true`, OR a - // THROWN error (retryable-by-default when pre-content) — with !hadContent: - // ask retry.delayFor(attempt); if it returns a delay → emit a transient - // provider-retry AgentEvent, sleep via the injected retry.sleep (abortable), - // attempt++, re-call provider.stream(); if it returns undefined (budget - // exhausted) → give up. Non-retryable emitted errors (retryable === false or - // absent), errors after content, and the no-retry-configured case all fall - // through to "give up" — identical to the pre-retry behavior. - let hadContent = false; - let attempt = 0; - while (true) { - let errored = false; - let wasThrown = false; - let errorMessage: string | undefined; - let errorCode: string | undefined; - let errorRetryable: boolean | undefined; - let thrownErr: unknown; - - try { - const opts: ProviderStreamOptions = { - ...ctx.providerOpts, - ...(ctx.turnSpan !== undefined && stepSpan !== undefined ? { logger: stepSpan.log } : {}), - }; - const stream = ctx.provider.stream(ctx.messages, ctx.tools, opts); - for await (const event of stream) { - if (ctx.signal.aborted) break; - if (event.type === "error") { - // Intercept: hold for the retry decision — don't push a chunk - // or emit yet (a successful retry would leave a stale error). - errored = true; - errorMessage = event.message; - errorCode = event.code; - errorRetryable = event.retryable; - break; - } - if ( - event.type === "text-delta" || - event.type === "reasoning-delta" || - event.type === "tool-call" || - event.type === "usage" - ) { - hadContent = true; - } - processEvent( - event, - chunks, - toolCalls, - dispatcher, - ctx, - stepSpan, - timing, - toolDispatchTimes, - ); - if (event.type === "usage") { - stepUsage = addUsage(stepUsage, event.usage); - } - if (event.type === "finish") { - finishReason = event.reason; - } - } - } catch (err) { - errored = true; - wasThrown = true; - errorMessage = err instanceof Error ? err.message : String(err); - errorCode = undefined; - errorRetryable = undefined; - thrownErr = err; - } - - // Abort (during stream) → stop; the runTurn loop seals aborted. - if (ctx.signal.aborted) { - break; - } - - // No error → step succeeded. - if (!errored) { - break; - } - - // Retryable? A thrown error is retryable-by-default when pre-content; - // an emitted error is retryable ONLY when `retryable === true` (absent - // or false → not retried, per the contract). - const isRetryable = wasThrown ? true : errorRetryable === true; - if (ctx.retry !== undefined && !hadContent && isRetryable) { - const delay = ctx.retry.delayFor(attempt); - if (delay !== undefined) { - // Emit the transient provider-retry event BEFORE the sleep so the - // UI shows "⚠ retrying in Ns…" immediately. Not persisted as a - // chat message — it never pollutes the prompt. - ctx.emit( - providerRetryEvent( - ctx.conversationId, - ctx.turnId, - attempt, - delay, - errorMessage ?? "", - errorCode, - ), - ); - // Abortable sleep. If the signal fires during sleep, the shell's - // sleep rejects — we catch it and break so the turn seals aborted. - try { - await ctx.retry.sleep(delay, ctx.signal); - } catch { - // Abort during sleep (or unexpected sleep failure). - } - if (ctx.signal.aborted) { - break; - } - attempt++; - continue; - } - // delayFor returned undefined → budget exhausted → give up. - } - - // Give up: emit the suppressed error and end the step. This is the - // single emission point for a terminal provider error (non-retryable, - // post-content, budget-exhausted, or no-retry-configured). - const message = errorMessage ?? ""; - if (errorCode !== undefined) { - chunks.push({ type: "error", message, code: errorCode }); - } else { - chunks.push({ type: "error", message }); - } - ctx.emit(errorEvent(ctx.conversationId, ctx.turnId, message, errorCode)); - finishReason = "error"; - try { - stepSpan?.end({ err: thrownErr ?? new Error(message) }); - } catch { - // Swallow — D7. - } - stepSpan = undefined; - break; - } - - // Close timing spans: if no first token was seen, end ttft with firstToken: false - // If decode span is open, close it - try { - if (timing.ttftSpan !== undefined) { - timing.ttftSpan.end({ attrs: { firstToken: false } }); - timing.ttftSpan = undefined; - } - if (timing.decodeSpan !== undefined) { - timing.decodeSpan.end(); - timing.decodeSpan = undefined; - } - } catch { - // Swallow — D7. - } - - // Emit step-complete event with timing - const streamEndMs = ctx.now !== undefined ? ctx.now() : undefined; - if (timing.streamStartMs !== undefined && streamEndMs !== undefined) { - const genTotalMs = streamEndMs - timing.streamStartMs; - const stepTiming: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = { - genTotalMs, - }; - if (timing.firstTokenMs !== undefined) { - stepTiming.ttftMs = timing.firstTokenMs - timing.streamStartMs; - stepTiming.decodeMs = streamEndMs - timing.firstTokenMs; - } - ctx.emit(stepCompleteEvent(ctx.conversationId, ctx.turnId, ctx.stepId, stepTiming)); - } else { - ctx.emit(stepCompleteEvent(ctx.conversationId, ctx.turnId, ctx.stepId)); - } - - if (!ctx.dispatch.eager) { - for (const call of toolCalls) { - dispatcher.submit(call); - } - } - - const results = await dispatcher.drain(); - - // Close remaining tool-call spans - for (const call of toolCalls) { - const tcSpan = ctx.toolSpans.get(call.id); - if (tcSpan !== undefined) { - const result = results.get(call.id); - try { - tcSpan.end({ - attrs: { - isError: result?.isError ?? false, - contentLength: result?.content.length ?? 0, - }, - }); - } catch { - // Swallow — D7. - } - ctx.toolSpans.delete(call.id); - } - } - - const toolMessages: ChatMessage[] = []; - for (const call of toolCalls) { - const result = results.get(call.id); - if (result !== undefined) { - const isError = result.isError ?? false; - const dispatchTime = toolDispatchTimes.get(call.id); - const toolDurationMs = - ctx.now !== undefined && dispatchTime !== undefined ? ctx.now() - dispatchTime : undefined; - ctx.emit( - toolResultEvent( - ctx.conversationId, - ctx.turnId, - ctx.stepId, - call.id, - call.name, - result.content, - isError, - toolDurationMs, - ), - ); - toolMessages.push({ - role: "tool", - chunks: [ - { - type: "tool-result", - toolCallId: call.id, - toolName: call.name, - content: result.content, - isError, - stepId: ctx.stepId, - }, - ], - }); - } - } - - // Close step span (if not already closed by error) - if (stepSpan !== undefined) { - try { - stepSpan.end({ - attrs: { - finishReason, - ...usageAttrs(stepUsage), - }, - }); - } catch { - // Swallow — D7. - } - } - - const assistantMessage: ChatMessage | undefined = - chunks.length > 0 ? { role: "assistant", chunks } : undefined; - - return { assistantMessage, toolCalls, toolMessages, usage: stepUsage, finishReason }; + const chunks: Chunk[] = []; + const toolCalls: ToolCall[] = []; + const toolDispatchTimes = new Map<string, number>(); + let stepUsage = zeroUsage(); + let finishReason = "stop"; + + // Open a step span as a child of the turn span; capture the verbatim + // pre-mutation prompt via a "prompt" child span whose body holds the + // serialized messages+tools. + let stepSpan: Span | undefined; + try { + stepSpan = ctx.turnSpan !== undefined ? ctx.turnSpan.child("step") : ctx.logger.span("step"); + const promptBody = JSON.stringify({ messages: ctx.messages, tools: ctx.tools }); + const promptSpan = stepSpan.child( + "prompt", + { + messageCount: ctx.messages.length, + toolCount: ctx.tools.length, + }, + promptBody, + ); + promptSpan.end(); + } catch { + // Swallow — D7. + } + + const dispatcher = createStepDispatcher( + ctx.toolMap, + ctx.dispatch, + ctx.signal, + ctx.emit, + ctx.conversationId, + ctx.turnId, + ctx.toolSpans, + ctx.cwd, + ctx.computerId, + ); + + const timing: TimingState = { + ttftSpan: undefined, + decodeSpan: undefined, + firstTokenSeen: false, + streamStartMs: ctx.now !== undefined ? ctx.now() : undefined, + firstTokenMs: undefined, + }; + + // Open TTFT span when spans are enabled + try { + if (stepSpan !== undefined) { + timing.ttftSpan = stepSpan.child("ttft"); + } + } catch { + // Swallow — D7. + } + + // Retry loop: wrap provider.stream() consumption. Retries are ONLY + // attempted when no content was emitted yet this step (the safety + // invariant — never duplicate partial output). On a retryable error — + // either an EMITTED `error` ProviderEvent with `retryable === true`, OR a + // THROWN error (retryable-by-default when pre-content) — with !hadContent: + // ask retry.delayFor(attempt); if it returns a delay → emit a transient + // provider-retry AgentEvent, sleep via the injected retry.sleep (abortable), + // attempt++, re-call provider.stream(); if it returns undefined (budget + // exhausted) → give up. Non-retryable emitted errors (retryable === false or + // absent), errors after content, and the no-retry-configured case all fall + // through to "give up" — identical to the pre-retry behavior. + let hadContent = false; + let attempt = 0; + while (true) { + let errored = false; + let wasThrown = false; + let errorMessage: string | undefined; + let errorCode: string | undefined; + let errorRetryable: boolean | undefined; + let thrownErr: unknown; + + try { + const opts: ProviderStreamOptions = { + ...ctx.providerOpts, + ...(ctx.turnSpan !== undefined && stepSpan !== undefined ? { logger: stepSpan.log } : {}), + }; + const stream = ctx.provider.stream(ctx.messages, ctx.tools, opts); + for await (const event of stream) { + if (ctx.signal.aborted) break; + if (event.type === "error") { + // Intercept: hold for the retry decision — don't push a chunk + // or emit yet (a successful retry would leave a stale error). + errored = true; + errorMessage = event.message; + errorCode = event.code; + errorRetryable = event.retryable; + break; + } + if ( + event.type === "text-delta" || + event.type === "reasoning-delta" || + event.type === "tool-call" || + event.type === "usage" + ) { + hadContent = true; + } + processEvent( + event, + chunks, + toolCalls, + dispatcher, + ctx, + stepSpan, + timing, + toolDispatchTimes, + ); + if (event.type === "usage") { + stepUsage = addUsage(stepUsage, event.usage); + } + if (event.type === "finish") { + finishReason = event.reason; + } + } + } catch (err) { + errored = true; + wasThrown = true; + errorMessage = err instanceof Error ? err.message : String(err); + errorCode = undefined; + errorRetryable = undefined; + thrownErr = err; + } + + // Abort (during stream) → stop; the runTurn loop seals aborted. + if (ctx.signal.aborted) { + break; + } + + // No error → step succeeded. + if (!errored) { + break; + } + + // Retryable? A thrown error is retryable-by-default when pre-content; + // an emitted error is retryable ONLY when `retryable === true` (absent + // or false → not retried, per the contract). + const isRetryable = wasThrown ? true : errorRetryable === true; + if (ctx.retry !== undefined && !hadContent && isRetryable) { + const delay = ctx.retry.delayFor(attempt); + if (delay !== undefined) { + // Emit the transient provider-retry event BEFORE the sleep so the + // UI shows "⚠ retrying in Ns…" immediately. Not persisted as a + // chat message — it never pollutes the prompt. + ctx.emit( + providerRetryEvent( + ctx.conversationId, + ctx.turnId, + attempt, + delay, + errorMessage ?? "", + errorCode, + ), + ); + // Abortable sleep. If the signal fires during sleep, the shell's + // sleep rejects — we catch it and break so the turn seals aborted. + try { + await ctx.retry.sleep(delay, ctx.signal); + } catch { + // Abort during sleep (or unexpected sleep failure). + } + if (ctx.signal.aborted) { + break; + } + attempt++; + continue; + } + // delayFor returned undefined → budget exhausted → give up. + } + + // Give up: emit the suppressed error and end the step. This is the + // single emission point for a terminal provider error (non-retryable, + // post-content, budget-exhausted, or no-retry-configured). + const message = errorMessage ?? ""; + if (errorCode !== undefined) { + chunks.push({ type: "error", message, code: errorCode }); + } else { + chunks.push({ type: "error", message }); + } + ctx.emit(errorEvent(ctx.conversationId, ctx.turnId, message, errorCode)); + finishReason = "error"; + try { + stepSpan?.end({ err: thrownErr ?? new Error(message) }); + } catch { + // Swallow — D7. + } + stepSpan = undefined; + break; + } + + // Close timing spans: if no first token was seen, end ttft with firstToken: false + // If decode span is open, close it + try { + if (timing.ttftSpan !== undefined) { + timing.ttftSpan.end({ attrs: { firstToken: false } }); + timing.ttftSpan = undefined; + } + if (timing.decodeSpan !== undefined) { + timing.decodeSpan.end(); + timing.decodeSpan = undefined; + } + } catch { + // Swallow — D7. + } + + // Emit step-complete event with timing + const streamEndMs = ctx.now !== undefined ? ctx.now() : undefined; + if (timing.streamStartMs !== undefined && streamEndMs !== undefined) { + const genTotalMs = streamEndMs - timing.streamStartMs; + const stepTiming: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = { + genTotalMs, + }; + if (timing.firstTokenMs !== undefined) { + stepTiming.ttftMs = timing.firstTokenMs - timing.streamStartMs; + stepTiming.decodeMs = streamEndMs - timing.firstTokenMs; + } + ctx.emit(stepCompleteEvent(ctx.conversationId, ctx.turnId, ctx.stepId, stepTiming)); + } else { + ctx.emit(stepCompleteEvent(ctx.conversationId, ctx.turnId, ctx.stepId)); + } + + if (!ctx.dispatch.eager) { + for (const call of toolCalls) { + dispatcher.submit(call); + } + } + + const results = await dispatcher.drain(); + + // Close remaining tool-call spans + for (const call of toolCalls) { + const tcSpan = ctx.toolSpans.get(call.id); + if (tcSpan !== undefined) { + const result = results.get(call.id); + try { + tcSpan.end({ + attrs: { + isError: result?.isError ?? false, + contentLength: result?.content.length ?? 0, + }, + }); + } catch { + // Swallow — D7. + } + ctx.toolSpans.delete(call.id); + } + } + + const toolMessages: ChatMessage[] = []; + for (const call of toolCalls) { + const result = results.get(call.id); + if (result !== undefined) { + const isError = result.isError ?? false; + const dispatchTime = toolDispatchTimes.get(call.id); + const toolDurationMs = + ctx.now !== undefined && dispatchTime !== undefined ? ctx.now() - dispatchTime : undefined; + ctx.emit( + toolResultEvent( + ctx.conversationId, + ctx.turnId, + ctx.stepId, + call.id, + call.name, + result.content, + isError, + toolDurationMs, + ), + ); + toolMessages.push({ + role: "tool", + chunks: [ + { + type: "tool-result", + toolCallId: call.id, + toolName: call.name, + content: result.content, + isError, + stepId: ctx.stepId, + }, + ], + }); + } + } + + // Close step span (if not already closed by error) + if (stepSpan !== undefined) { + try { + stepSpan.end({ + attrs: { + finishReason, + ...usageAttrs(stepUsage), + }, + }); + } catch { + // Swallow — D7. + } + } + + const assistantMessage: ChatMessage | undefined = + chunks.length > 0 ? { role: "assistant", chunks } : undefined; + + return { assistantMessage, toolCalls, toolMessages, usage: stepUsage, finishReason }; } export async function runTurn(input: RunTurnInput): Promise<RunTurnResult> { - const messages: ChatMessage[] = [...input.messages]; - const resultMessages: ChatMessage[] = []; - let totalUsage = zeroUsage(); - let lastStepUsage: Usage | undefined; - let finishReason = "stop"; - - const toolMap = new Map<string, ToolContract>(); - for (const tool of input.tools) { - toolMap.set(tool.name, tool); - } - - const conversationId = input.conversationId; - const turnId = input.turnId; - const signal = input.signal ?? new AbortController().signal; - const logger = input.logger; - const now = input.now; - - // Record turn start time for durationMs on done - const turnStartMs = now !== undefined ? now() : undefined; - - // Open a turn span (attrs: conversationId, turnId, model) - let turnSpan: Span | undefined; - if (logger !== undefined) { - try { - turnSpan = logger.span("turn", { - conversationId, - turnId, - model: input.providerOpts?.model ?? input.provider.id, - }); - } catch { - // Swallow — D7. - } - } - - // Track open tool-call spans across steps so we can close them on abort - const toolSpans = new Map<string, Span>(); - - input.emit(turnStartEvent(conversationId, turnId)); - - try { - for (let step = 0; step < MAX_STEPS; step++) { - if (signal.aborted) { - finishReason = "aborted"; - break; - } - - const stepId = `${turnId}#${step}` as StepId; - - const stepResult = await executeStep({ - provider: input.provider, - messages, - tools: input.tools, - toolMap, - dispatch: input.dispatch, - emit: input.emit, - signal, - conversationId, - turnId, - stepId, - logger: turnSpan?.log ?? logger ?? createNoopLogger(), - turnSpan, - toolSpans, - cwd: input.cwd, - computerId: input.computerId, - now, - providerOpts: input.providerOpts, - retry: input.retry, - }); - - totalUsage = addUsage(totalUsage, stepResult.usage); - lastStepUsage = stepResult.usage; - - // When the signal is aborted mid-step, the tool results are - // placeholders ({ content: "Aborted", isError: true }). If these - // are persisted and included in the next turn's message history, - // the provider sees a `tool` role message without a preceding - // `assistant` message carrying `tool_calls` → 400 error. - // - // To prevent this, when the signal is aborted we: - // 1. Strip tool-call chunks from the assistant message (keep - // text/thinking/error chunks so the partial response is - // preserved). - // 2. Omit tool-result messages entirely (they are not persisted, - // not added to resultMessages, and not passed to onStepComplete). - // - // This keeps the conversation history clean: the assistant's - // partial text is preserved, but no incomplete tool calls are - // left dangling. The `done` event still carries - // `reason: "aborted"`, so the turn seals cleanly. - const stepAborted = signal.aborted; - const assistantMessage = - stepAborted && stepResult.assistantMessage !== undefined - ? stripToolCallChunks(stepResult.assistantMessage) - : stepResult.assistantMessage; - const toolMessages = stepAborted ? [] : stepResult.toolMessages; - - if (assistantMessage !== undefined) { - messages.push(assistantMessage); - resultMessages.push(assistantMessage); - } - - for (const msg of toolMessages) { - messages.push(msg); - resultMessages.push(msg); - } - - // Incremental persistence: notify the caller that this step's - // messages are finalized. The caller can persist them immediately - // (assigning seq numbers during generation). The messages are the - // SAME objects in resultMessages — the caller must NOT double-persist. - if (input.onStepComplete !== undefined) { - const stepMessages: ChatMessage[] = []; - if (assistantMessage !== undefined) { - stepMessages.push(assistantMessage); - } - for (const msg of toolMessages) { - stepMessages.push(msg); - } - if (stepMessages.length > 0) { - await input.onStepComplete(stepMessages); - } - } - - if (stepAborted) { - finishReason = "aborted"; - break; - } - - if (stepResult.toolCalls.length === 0) { - finishReason = stepResult.finishReason; - break; - } - - if (step === MAX_STEPS - 1) { - finishReason = "max-steps"; - // No next step → no tool-result boundary. Leave any pending - // steering messages for the caller (it owns the queue). - } else { - // Tool-result boundary: this step produced tool calls and we are - // about to call provider.stream again. Drain steering messages - // and append them after the tool results, before the next call. - // The kernel owns no queue and names no feature — it just calls - // the callback and appends. Emits nothing (caller emits the - // `steering` AgentEvent in its own wrapper). - const steering = input.drainSteering?.() ?? []; - for (const msg of steering) { - messages.push(msg); - } - } - } - } finally { - // Close any orphaned tool-call spans (e.g. abort mid-tool) - for (const [id, tcSpan] of toolSpans) { - try { - tcSpan.end({ attrs: { orphaned: true } }); - } catch { - // Swallow — D7. - } - toolSpans.delete(id); - } - - // Close the turn span - if (turnSpan !== undefined) { - try { - turnSpan.end({ - attrs: { - finishReason, - ...usageAttrs(totalUsage), - }, - }); - } catch { - // Swallow — D7. - } - } - } - - const turnDurationMs = - turnStartMs !== undefined && now !== undefined ? now() - turnStartMs : undefined; - const hasUsage = - totalUsage.inputTokens > 0 || - totalUsage.outputTokens > 0 || - totalUsage.cacheReadTokens !== undefined || - totalUsage.cacheWriteTokens !== undefined; - const contextSize = - hasUsage && lastStepUsage !== undefined - ? lastStepUsage.inputTokens + lastStepUsage.outputTokens - : undefined; - input.emit( - doneEvent( - conversationId, - turnId, - finishReason, - turnDurationMs, - hasUsage ? totalUsage : undefined, - contextSize, - ), - ); - - return { messages: resultMessages, usage: totalUsage, finishReason }; + const messages: ChatMessage[] = [...input.messages]; + const resultMessages: ChatMessage[] = []; + let totalUsage = zeroUsage(); + let lastStepUsage: Usage | undefined; + let finishReason = "stop"; + + const toolMap = new Map<string, ToolContract>(); + for (const tool of input.tools) { + toolMap.set(tool.name, tool); + } + + const conversationId = input.conversationId; + const turnId = input.turnId; + const signal = input.signal ?? new AbortController().signal; + const logger = input.logger; + const now = input.now; + + // Record turn start time for durationMs on done + const turnStartMs = now !== undefined ? now() : undefined; + + // Open a turn span (attrs: conversationId, turnId, model) + let turnSpan: Span | undefined; + if (logger !== undefined) { + try { + turnSpan = logger.span("turn", { + conversationId, + turnId, + model: input.providerOpts?.model ?? input.provider.id, + }); + } catch { + // Swallow — D7. + } + } + + // Track open tool-call spans across steps so we can close them on abort + const toolSpans = new Map<string, Span>(); + + input.emit(turnStartEvent(conversationId, turnId)); + + try { + for (let step = 0; MAX_STEPS === 0 || step < MAX_STEPS; step++) { + if (signal.aborted) { + finishReason = "aborted"; + break; + } + + const stepId = `${turnId}#${step}` as StepId; + + const stepResult = await executeStep({ + provider: input.provider, + messages, + tools: input.tools, + toolMap, + dispatch: input.dispatch, + emit: input.emit, + signal, + conversationId, + turnId, + stepId, + logger: turnSpan?.log ?? logger ?? createNoopLogger(), + turnSpan, + toolSpans, + cwd: input.cwd, + computerId: input.computerId, + now, + providerOpts: input.providerOpts, + retry: input.retry, + }); + + totalUsage = addUsage(totalUsage, stepResult.usage); + lastStepUsage = stepResult.usage; + + // When the signal is aborted mid-step, the tool results are + // placeholders ({ content: "Aborted", isError: true }). If these + // are persisted and included in the next turn's message history, + // the provider sees a `tool` role message without a preceding + // `assistant` message carrying `tool_calls` → 400 error. + // + // To prevent this, when the signal is aborted we: + // 1. Strip tool-call chunks from the assistant message (keep + // text/thinking/error chunks so the partial response is + // preserved). + // 2. Omit tool-result messages entirely (they are not persisted, + // not added to resultMessages, and not passed to onStepComplete). + // + // This keeps the conversation history clean: the assistant's + // partial text is preserved, but no incomplete tool calls are + // left dangling. The `done` event still carries + // `reason: "aborted"`, so the turn seals cleanly. + const stepAborted = signal.aborted; + const assistantMessage = + stepAborted && stepResult.assistantMessage !== undefined + ? stripToolCallChunks(stepResult.assistantMessage) + : stepResult.assistantMessage; + const toolMessages = stepAborted ? [] : stepResult.toolMessages; + + if (assistantMessage !== undefined) { + messages.push(assistantMessage); + resultMessages.push(assistantMessage); + } + + for (const msg of toolMessages) { + messages.push(msg); + resultMessages.push(msg); + } + + // Incremental persistence: notify the caller that this step's + // messages are finalized. The caller can persist them immediately + // (assigning seq numbers during generation). The messages are the + // SAME objects in resultMessages — the caller must NOT double-persist. + if (input.onStepComplete !== undefined) { + const stepMessages: ChatMessage[] = []; + if (assistantMessage !== undefined) { + stepMessages.push(assistantMessage); + } + for (const msg of toolMessages) { + stepMessages.push(msg); + } + if (stepMessages.length > 0) { + await input.onStepComplete(stepMessages); + } + } + + if (stepAborted) { + finishReason = "aborted"; + break; + } + + if (stepResult.toolCalls.length === 0) { + finishReason = stepResult.finishReason; + break; + } + + if (MAX_STEPS > 0 && step === MAX_STEPS - 1) { + finishReason = "max-steps"; + // No next step → no tool-result boundary. Leave any pending + // steering messages for the caller (it owns the queue). + } else { + // Tool-result boundary: this step produced tool calls and we are + // about to call provider.stream again. Drain steering messages + // and append them after the tool results, before the next call. + // The kernel owns no queue and names no feature — it just calls + // the callback and appends. Emits nothing (caller emits the + // `steering` AgentEvent in its own wrapper). + const steering = input.drainSteering?.() ?? []; + for (const msg of steering) { + messages.push(msg); + } + } + } + } finally { + // Close any orphaned tool-call spans (e.g. abort mid-tool) + for (const [id, tcSpan] of toolSpans) { + try { + tcSpan.end({ attrs: { orphaned: true } }); + } catch { + // Swallow — D7. + } + toolSpans.delete(id); + } + + // Close the turn span + if (turnSpan !== undefined) { + try { + turnSpan.end({ + attrs: { + finishReason, + ...usageAttrs(totalUsage), + }, + }); + } catch { + // Swallow — D7. + } + } + } + + const turnDurationMs = + turnStartMs !== undefined && now !== undefined ? now() - turnStartMs : undefined; + const hasUsage = + totalUsage.inputTokens > 0 || + totalUsage.outputTokens > 0 || + totalUsage.cacheReadTokens !== undefined || + totalUsage.cacheWriteTokens !== undefined; + const contextSize = + hasUsage && lastStepUsage !== undefined + ? lastStepUsage.inputTokens + lastStepUsage.outputTokens + : undefined; + input.emit( + doneEvent( + conversationId, + turnId, + finishReason, + turnDurationMs, + hasUsage ? totalUsage : undefined, + contextSize, + ), + ); + + return { messages: resultMessages, usage: totalUsage, finishReason }; } function createNoopLogger(): Logger { - return { - debug() {}, - info() {}, - warn() {}, - error() {}, - child() { - return createNoopLogger(); - }, - span() { - return { - id: "noop", - log: createNoopLogger(), - setAttributes() {}, - addLink() {}, - child() { - return this; - }, - end() {}, - }; - }, - }; + return { + debug() {}, + info() {}, + warn() {}, + error() {}, + child() { + return createNoopLogger(); + }, + span() { + return { + id: "noop", + log: createNoopLogger(), + setAttributes() {}, + addLink() {}, + child() { + return this; + }, + end() {}, + }; + }, + }; } |
