fix(dashboard): stop server event loop on correct thread
When idle shutdown triggered _stop_server(), it was creating a new event loop and calling server.stop() on it, but the daemon thread was still running loop.run_forever() on the original event loop. This left sockets bound, causing "address already in use" on restart. Fix by storing references to the server's event loop and thread, then using call_soon_threadsafe(loop.stop) to signal the correct loop to exit. The thread join ensures sockets are released before the next server starts. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,8 @@ IDLE_SHUTDOWN_SECONDS = int(os.environ.get("IDLE_SHUTDOWN_SECONDS", "300"))
|
||||
_last_activity_time: float = time.time()
|
||||
_idle_checker_started = False
|
||||
_server_ref: SimulationServer | None = None # Reference for idle checker thread
|
||||
_server_loop: asyncio.AbstractEventLoop | None = None # Event loop running the server
|
||||
_server_thread: threading.Thread | None = None # Thread running the server event loop
|
||||
|
||||
|
||||
def _idle_checker() -> None:
|
||||
@@ -54,17 +56,20 @@ def _idle_checker() -> None:
|
||||
|
||||
def _stop_server() -> None:
|
||||
"""Stop the server and clear caches for fresh restart."""
|
||||
global _server_ref, _idle_checker_started
|
||||
if _server_ref is not None:
|
||||
# Stop the server
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
loop.run_until_complete(_server_ref.stop())
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
loop.close()
|
||||
_server_ref = None
|
||||
global _server_ref, _idle_checker_started, _server_loop, _server_thread
|
||||
|
||||
if _server_loop is not None and _server_thread is not None:
|
||||
# Schedule stop on the correct event loop (the one actually running the server)
|
||||
# This causes loop.run_forever() to exit in the daemon thread
|
||||
_server_loop.call_soon_threadsafe(_server_loop.stop)
|
||||
|
||||
# Wait for thread to exit (with timeout to avoid hanging)
|
||||
_server_thread.join(timeout=5.0)
|
||||
|
||||
_server_loop = None
|
||||
_server_thread = None
|
||||
|
||||
_server_ref = None
|
||||
|
||||
# Clear Streamlit's cached server so next visitor gets fresh instance
|
||||
get_or_create_server.clear()
|
||||
@@ -136,6 +141,8 @@ def get_or_create_server() -> SimulationServer:
|
||||
Returns:
|
||||
The simulation server instance.
|
||||
"""
|
||||
global _server_loop, _server_thread
|
||||
|
||||
server = SimulationServer(
|
||||
ServerConfig(
|
||||
host="127.0.0.1",
|
||||
@@ -151,7 +158,9 @@ def get_or_create_server() -> SimulationServer:
|
||||
|
||||
def run_server() -> None:
|
||||
"""Run the async server in a new event loop."""
|
||||
global _server_loop
|
||||
loop = asyncio.new_event_loop()
|
||||
_server_loop = loop # Store reference for _stop_server to use
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(server.start())
|
||||
@@ -171,6 +180,7 @@ def get_or_create_server() -> SimulationServer:
|
||||
|
||||
thread = threading.Thread(target=run_server, daemon=True)
|
||||
thread.start()
|
||||
_server_thread = thread # Store reference for _stop_server to use
|
||||
|
||||
# Wait for server to be fully started (up to 5 seconds)
|
||||
if not server_ready.wait(timeout=5.0):
|
||||
|
||||
Reference in New Issue
Block a user