fix(dashboard): improve server singleton robustness
- Add reuse_address=True to TCP server start to allow quick rebind after process restart (TIME_WAIT state) - Add _is_server_responsive() check to verify server is actually responding, not just trusting the is_running flag which can be stale if the server thread died unexpectedly Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -107,6 +107,16 @@ class TestProgress:
|
||||
return time.time() - self.started_at
|
||||
|
||||
|
||||
def _is_server_responsive(host: str = "127.0.0.1", port: int = 5001) -> bool:
|
||||
"""Check if a server is actually responding on the given port."""
|
||||
import socket
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=0.5):
|
||||
return True
|
||||
except (OSError, ConnectionRefusedError, TimeoutError):
|
||||
return False
|
||||
|
||||
|
||||
def get_or_create_server() -> SimulationServer:
|
||||
"""Get or create the simulation server singleton.
|
||||
|
||||
@@ -118,9 +128,14 @@ def get_or_create_server() -> SimulationServer:
|
||||
"""
|
||||
global _simulation_server, _server_thread
|
||||
|
||||
# Return existing server if it's running
|
||||
# Return existing server if it's running AND responsive
|
||||
if _simulation_server is not None and _simulation_server.is_running:
|
||||
# Verify the server is actually responding (not a stale flag)
|
||||
if _is_server_responsive():
|
||||
return _simulation_server
|
||||
# Server flag says running but it's not responsive - clean up
|
||||
_simulation_server = None
|
||||
_server_thread = None
|
||||
|
||||
# Create new server
|
||||
server = SimulationServer(
|
||||
|
||||
@@ -138,6 +138,7 @@ class InstrumentServer:
|
||||
handler,
|
||||
self._host,
|
||||
port,
|
||||
reuse_address=True,
|
||||
)
|
||||
self._servers.append(server)
|
||||
logger.info(
|
||||
|
||||
Reference in New Issue
Block a user