diff --git a/.changeset/fix-interactive-connect-resume.md b/.changeset/fix-interactive-connect-resume.md new file mode 100644 index 00000000..937e8dda --- /dev/null +++ b/.changeset/fix-interactive-connect-resume.md @@ -0,0 +1,5 @@ +--- +"sandbox": patch +--- + +Fix `sandbox connect` hanging or failing on a stopped/resumed sandbox. The interactive shell now surfaces `attach()` failures instead of swallowing them once the connection handshake lands, always stops the spinner on teardown (so a failure can no longer hang the process), and includes the in-sandbox server's stderr when the interactive server exits early. The in-sandbox `vc-interactive-server` also health-checks a reused server before trusting a leftover config file, so a stale `/tmp/vercel/interactive/config.json` restored from a snapshot no longer causes it to connect to a dead socket. diff --git a/packages/pty-tunnel-server/modes/remote.go b/packages/pty-tunnel-server/modes/remote.go index d4ac81b4..43d471ea 100644 --- a/packages/pty-tunnel-server/modes/remote.go +++ b/packages/pty-tunnel-server/modes/remote.go @@ -33,10 +33,46 @@ var _ Bootstrapper = (*ExternalProcessBootstrapper)(nil) // GetOrCreateServer implements Bootstrapper. func (e *ExternalProcessBootstrapper) GetOrCreateServer() (info config.ServerInfo, err error) { info, err = config.VerifyConnection(e.ConfigPath) + if err == nil { + // A live PID is not sufficient evidence that the server is usable. Across + // a snapshot/resume the config file is restored from the snapshot while + // the original server process is gone: the recorded PID may have been + // reused by an unrelated process, or a memory-restored daemon may no + // longer be serving. + if healthErr := e.pingServer(info.Port); healthErr == nil { + return info, nil + } else { + e.Logger.Info( + "Existing server config is stale (failed health check), spawning a new server", + "port", info.Port, + "pid", info.PID, + "error", healthErr, + ) + } + } + return e.spawnServer() +} + +func (e *ExternalProcessBootstrapper) pingServer(port int) error { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + + url := fmt.Sprintf("http://localhost:%d/health", port) + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { - return e.spawnServer() + return err } - return + + res, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status %d", res.StatusCode) + } + return nil } func (e *ExternalProcessBootstrapper) spawnServer() (info config.ServerInfo, err error) { @@ -66,6 +102,11 @@ func (e *ExternalProcessBootstrapper) spawnServer() (info config.ServerInfo, err basename := path.Join(os.TempDir(), fmt.Sprintf("pty-tunnel-server-%d", time.Now().Nanosecond())) e.Logger.Debug("Creating temporary files for server stdout/stderr", "basename", basename) + // Remove any leftover config before starting the new server. + if rmErr := os.Remove(e.ConfigPath); rmErr != nil && !os.IsNotExist(rmErr) { + e.Logger.Debug("Could not remove stale server config", "path", e.ConfigPath, "error", rmErr) + } + e.Logger.Info("Spawning new pty-tunnel-server process", "args", cmd.Args) cmd.SysProcAttr = &syscall.SysProcAttr{ Setpgid: true, // new process group diff --git a/packages/sandbox/src/commands/exec.ts b/packages/sandbox/src/commands/exec.ts index e9b83bd7..85aa4bca 100644 --- a/packages/sandbox/src/commands/exec.ts +++ b/packages/sandbox/src/commands/exec.ts @@ -114,6 +114,9 @@ export const exec = cmd.command({ projectId: project, teamId: team, token, + // Resume up front so the sandbox is already running by the time the + // interactive-shell setup runs its parallel steps. + resume: true, __includeSystemRoutes: true, }); diff --git a/packages/sandbox/src/interactive-shell/interactive-shell.ts b/packages/sandbox/src/interactive-shell/interactive-shell.ts index 3e90e02c..212a6e56 100644 --- a/packages/sandbox/src/interactive-shell/interactive-shell.ts +++ b/packages/sandbox/src/interactive-shell/interactive-shell.ts @@ -193,7 +193,7 @@ export async function startInteractiveShell(options: { using progress = acquireRelease( () => ora({ discardStdin: false }).start(), - (s) => s.clear(), + (s) => s.stop(), ); progress.text = "Setting up sandbox environment"; @@ -226,7 +226,12 @@ export async function startInteractiveShell(options: { }); await Promise.all([ - throwIfCommandPrematurelyExited(command, waitForProcess.signal), + // `throwIfCommandPrematurelyExited` rejects with an abort error once the + // connection is established; swallow only that interruption (a genuine + // premature exit is thrown before the abort and still propagates). + throwIfCommandPrematurelyExited(command, waitForProcess.signal).catch( + waitForProcess.ignoreInterruptions, + ), attach({ sandbox: options.sandbox, progress, @@ -237,28 +242,40 @@ export async function startInteractiveShell(options: { printCommand(options.execution[0], options.execution.slice(1)), ), }), - ]).catch(waitForProcess.ignoreInterruptions); + ]); } async function throwIfCommandPrematurelyExited( command: Command, signal: AbortSignal, ) { + let exitCode: number; try { - const { exitCode } = await command.wait({ signal }); - throw new Error( - [ - `Interactive shell failed to start (exit code: ${exitCode}).`, - `${chalk.bold("hint:")} The sandbox may have timed out or encountered an error.`, - "╰▶ Check sandbox status with `sandbox list` or view logs for details.", - ].join("\n"), - ); + ({ exitCode } = await command.wait({ signal })); } catch (err) { if (signal.aborted) { return; } throw err; } + + // The interactive server process exited before a connection was established. + // Surface its stderr. + let serverError = ""; + try { + serverError = (await command.stderr({ signal })).trim(); + } catch { + // Best-effort: never let reading the failure output mask the real error. + } + + throw new Error( + [ + `Interactive shell failed to start (exit code: ${exitCode}).`, + `${chalk.bold("hint:")} The sandbox may have timed out or encountered an error.`, + ...(serverError ? [chalk.dim(serverError)] : []), + "╰▶ Check sandbox status with `sandbox list` or view logs for details.", + ].join("\n"), + ); } async function attach({