summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-25 22:15:04 +0900
committerAdam Malczewski <[email protected]>2026-06-25 22:15:04 +0900
commit9b91d1bca83e7599fb7d7de6038cedf186e61764 (patch)
tree7a6c2325e618eaf89743ef89734e7b5102744668
parent2cc9ddfb590dc60557bba3ed76a6c4639df5f596 (diff)
downloaddispatch-9b91d1bca83e7599fb7d7de6038cedf186e61764.tar.gz
dispatch-9b91d1bca83e7599fb7d7de6038cedf186e61764.zip
fix(ssh): POST /computers/:alias/test hangs after successful SSH connect
The test endpoint's runProbe() waited for the ssh2 stream's 'close' event, which some SSH servers never emit for short-lived exec channels (the command 'true' exits instantly). This caused the promise to hang forever — the HTTP response never returned, and the FE's Test spinner spun indefinitely. Three fixes: 1. runProbe now resolves on the 'exit' event (not 'close') — the command has finished and the exit code is available. 'close' is kept as a fallback. Stream data/stderr are drained to prevent buffer deadlocks. 2. runProbe has a 15s timeout safety net — if the exec callback or 'exit' event never fires (e.g. server requires a pty for exec), the probe resolves false instead of hanging forever. 3. The entire test() method is wrapped in a 30s Promise.race timeout — even if pool.acquire() or pool.drop() hangs, the endpoint ALWAYS responds with { ok, error? }. The probe is fully non-interactive (no blocking prompts). tsc EXIT 0, biome clean, 1756 tests pass.
-rw-r--r--packages/ssh/src/service.ts89
1 files changed, 72 insertions, 17 deletions
diff --git a/packages/ssh/src/service.ts b/packages/ssh/src/service.ts
index 629c9f8..cebe2ef 100644
--- a/packages/ssh/src/service.ts
+++ b/packages/ssh/src/service.ts
@@ -124,19 +124,18 @@ export function createSshService(deps: SshServiceDeps): {
}
// One-shot probe: acquire (connects), run a trivial command, then drop
// the connection so a test never holds a pooled socket open (plan §9.1).
+ // Wrapped in a timeout safety net so the endpoint ALWAYS responds —
+ // even if the SSH connect/exec/drop hangs (the probe is non-interactive).
+ const PROBE_TOTAL_TIMEOUT_MS = 30_000;
try {
- const conn = await pool.acquire(alias);
- const client = await conn.getClient();
- const ok = await runProbe(client);
- if (ok) {
- // Successful connect pins the host key (the accept-new analog);
- // a fresh known_hosts read reflects the new pin.
- deps.logger.info("computer test ok", { alias });
- }
- await pool.drop(alias);
- return ok
- ? { alias, ok: true }
- : { alias, ok: false, error: "remote command returned no exit code" };
+ const result = await Promise.race<TestComputerResponse>([
+ runTestProbe(pool, alias, deps.logger),
+ timeoutAfter<TestComputerResponse>(
+ PROBE_TOTAL_TIMEOUT_MS,
+ `test timed out after ${PROBE_TOTAL_TIMEOUT_MS / 1000}s`,
+ ),
+ ]);
+ return result;
} catch (err: unknown) {
await pool.drop(alias).catch(() => undefined);
const message = err instanceof Error ? err.message : String(err);
@@ -159,21 +158,77 @@ export function createSshService(deps: SshServiceDeps): {
return { service, pool, remoteFactory };
}
-/** Run `true` over SSH as a connectivity probe; resolve ok=true on exit 0. */
+/**
+ * The one-shot test probe: acquire → exec `true` → drop. Extracted so it can
+ * be raced against a timeout. Always drops the connection (even on success).
+ */
+async function runTestProbe(
+ pool: SshConnectionPool,
+ alias: string,
+ logger: Logger,
+): Promise<TestComputerResponse> {
+ const conn = await pool.acquire(alias);
+ const client = await conn.getClient();
+ const ok = await runProbe(client);
+ if (ok) {
+ logger.info("computer test ok", { alias });
+ }
+ await pool.drop(alias);
+ return ok
+ ? { alias, ok: true }
+ : { alias, ok: false, error: "remote command returned no exit code" };
+}
+
+/** Reject with `message` after `ms`. Used to race against a hanging probe. */
+function timeoutAfter<T>(ms: number, message: string): Promise<T> {
+ return new Promise<T>((_resolve, reject) => {
+ setTimeout(() => reject(new Error(message)), ms);
+ });
+}
+
+/** Probe timeout — the `true` command exits instantly; 15s is generous. */
+const PROBE_TIMEOUT_MS = 15_000;
+
+/**
+ * Run `true` over SSH as a connectivity probe; resolve `true` on exit 0.
+ *
+ * Resolves on the `exit` event (not `close` — some SSH servers don't emit
+ * `close` for short-lived exec channels, causing the promise to hang forever).
+ * A timeout safety net ensures we ALWAYS resolve, even if `exec` callback or
+ * the `exit` event never fires (e.g. the server requires a pty for exec).
+ */
function runProbe(client: import("ssh2").Client): Promise<boolean> {
return new Promise<boolean>((resolve) => {
+ let settled = false;
+ const done = (result: boolean): void => {
+ if (!settled) {
+ settled = true;
+ clearTimeout(timer);
+ resolve(result);
+ }
+ };
+
+ const timer = setTimeout(() => {
+ done(false);
+ }, PROBE_TIMEOUT_MS);
+
client.exec("true", { pty: false }, (err, stream) => {
if (err !== null && err !== undefined) {
- resolve(false);
+ done(false);
return;
}
- let exitCode: number | null = null;
+ // Resolve on `exit` — the command has finished. Don't wait for
+ // `close` (some servers never emit it for exec channels).
stream.on("exit", (code: number | null) => {
- exitCode = code;
+ done(code === 0);
});
+ // Safety net: if `exit` never fires, `close` might.
stream.on("close", () => {
- resolve(exitCode === 0);
+ done(false);
});
+ // Drain any output so the stream doesn't deadlock.
+ stream.on("data", () => {});
+ stream.stderr.on("data", () => {});
});
});
}