diff options
| author | Adam Malczewski <[email protected]> | 2026-05-29 23:14:55 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-05-29 23:14:55 +0900 |
| commit | 497b397e873f96d6fde3d8a44b3318e1ee1cbef4 (patch) | |
| tree | 3ae632fbe445f881e49fbfd6f490394fc8ed7e9e | |
| parent | c8e76ef506da32884ccf9ea2ac83a4d344c62943 (diff) | |
| download | dispatch-497b397e873f96d6fde3d8a44b3318e1ee1cbef4.tar.gz dispatch-497b397e873f96d6fde3d8a44b3318e1ee1cbef4.zip | |
fix(claude): eliminate /home mount race that blanks Claude credentials at boot
On hosts where /home is a separate filesystem, the dispatch-api service
could start before /home was mounted. The API's first DB access then
failed (EACCES: mkdir '/home/tradam'), Claude account discovery silently
caught the error and left claudeAccounts empty, and -- because discovery
only ran in the constructor -- it stayed empty for the whole process
lifetime. Every Claude message then fell back to the deepseek-v4-flash /
empty-key defaults, producing a 401 'Missing API key' from OpenCode Zen.
Fixes:
- s6 run script waits (capped ~30s) for /home/tradam before exec'ing bun;
passes instantly where /home is on the root filesystem.
- systemd unit gains RequiresMountsFor=/home and After=...home.mount.
- agent-manager re-runs _refreshClaudeAccounts() on config hot-reload and
lazily on an empty cache in the Anthropic path, so a process that lost
the boot race self-heals on the next request instead of staying broken.
| -rw-r--r-- | packages/api/src/agent-manager.ts | 16 | ||||
| -rw-r--r-- | packaging/[email protected] | 18 | ||||
| -rw-r--r-- | packaging/s6/dispatch-api-srv/run | 21 |
3 files changed, 45 insertions, 10 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index c09a607..c873388 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -244,6 +244,10 @@ export class AgentManager { } // Update model registry with new config this._initModelRegistry(newConfig); + // Re-discover Claude accounts: a config reload may accompany freshly + // imported credentials, and (critically) lets a process that failed + // account discovery at boot recover without a full restart. + this._refreshClaudeAccounts(); // Invalidate cached agents so next message uses updated config for (const tabAgent of this.tabAgents.values()) { tabAgent.agent = null; @@ -575,11 +579,21 @@ export class AgentManager { if (key.provider === "anthropic") { // Anthropic provider: resolve credentials from Claude accounts const credFile = key.credentials_file; - const account = + const findAccount = () => this.claudeAccounts.find((a) => a.id === effectiveKeyId) ?? (credFile ? this.claudeAccounts.find((a) => a.source === credFile) : this.claudeAccounts[0]); + let account = findAccount(); + // Self-heal: account discovery runs once at construction and can + // fail at boot (e.g. the data dir isn't mounted yet and + // getDatabase() throws EACCES), leaving claudeAccounts empty for + // the process lifetime. If the lookup fails, re-run discovery now + // that the DB is reachable and retry before giving up. + if (!account) { + this._refreshClaudeAccounts(); + account = findAccount(); + } if (account) { const creds = refreshAccountCredentials(account); if (creds && creds.expiresAt > Date.now() + 60_000) { diff --git a/packaging/[email protected] b/packaging/[email protected] index 3449fcb..e1946dd 100644 --- a/packaging/[email protected] +++ b/packaging/[email protected] @@ -1,19 +1,19 @@ -# Dispatch API — system service template. -# Runs under the system manager (PID 1) but drops privileges to the user named -# in the instance: `dispatch-api@tradam` runs as the `tradam` user. -# -# Enable/start: -# sudo systemctl enable --now dispatch-api@<user> [Unit] -Description=Dispatch API Backend (running as %i) -After=network.target +Description=Dispatch API server (user %i) +After=network.target local-fs.target home.mount +# The API stores its SQLite DB and reads Claude credentials under the user's +# home (~/.local/share/dispatch). If /home is a separate filesystem, starting +# before it is mounted makes credential discovery fail (EACCES) and Claude +# tabs fall back to an empty API key (401). Ensure /home is mounted first. +RequiresMountsFor=/home [Service] Type=simple User=%i +Environment=NODE_ENV=production +Environment=PORT=18390 WorkingDirectory=/opt/dispatch ExecStart=/usr/bin/bun packages/api/src/index.ts -EnvironmentFile=-/etc/dispatch/dispatch-api.conf Restart=on-failure RestartSec=5 diff --git a/packaging/s6/dispatch-api-srv/run b/packaging/s6/dispatch-api-srv/run index 5754e3d..f9c3a67 100644 --- a/packaging/s6/dispatch-api-srv/run +++ b/packaging/s6/dispatch-api-srv/run @@ -16,6 +16,27 @@ cd "$DISPATCH_DIR" || exit 1 # Merge stderr into stdout so both get logged by the consumer. exec 2>&1 +# Wait for the home directory to be available before starting. +# +# On the cyberdeck /home is a separate filesystem (ext4 on /dev/sda4). If this +# service starts before that mount is ready, the API runs as `tradam` with +# HOME=/home/tradam while /home/tradam does not yet exist. Creating the data +# dir (~/.local/share/dispatch) then fails with EACCES, Claude credential +# discovery silently fails, and Claude tabs fall back to an empty API key +# (401 from OpenCode Zen) for the entire lifetime of the process. Block until +# the home directory appears (capped at ~30s as a safety net). Where /home is +# part of the root filesystem this check passes immediately. +i=0 +while [ ! -d "/home/tradam" ]; do + i=$((i + 1)) + if [ "$i" -ge 30 ]; then + echo "dispatch-api: /home/tradam still missing after ${i}s — starting anyway" >&2 + break + fi + echo "dispatch-api: waiting for /home/tradam to be available (${i})..." >&2 + sleep 1 +done + # Drop privileges to tradam and run bun. exec /usr/bin/s6-setuidgid tradam \ /usr/bin/env \ |
