fix(dashboard): stop server event loop on correct thread
When idle shutdown triggered _stop_server(), it was creating a new event loop and calling server.stop() on it, but the daemon thread was still running loop.run_forever() on the original event loop. This left sockets bound, causing "address already in use" on restart. Fix by storing references to the server's event loop and thread, then using call_soon_threadsafe(loop.stop) to signal the correct loop to exit. The thread join ensures sockets are released before the next server starts. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,8 @@ IDLE_SHUTDOWN_SECONDS = int(os.environ.get("IDLE_SHUTDOWN_SECONDS", "300"))
|
|||||||
_last_activity_time: float = time.time()
|
_last_activity_time: float = time.time()
|
||||||
_idle_checker_started = False
|
_idle_checker_started = False
|
||||||
_server_ref: SimulationServer | None = None # Reference for idle checker thread
|
_server_ref: SimulationServer | None = None # Reference for idle checker thread
|
||||||
|
_server_loop: asyncio.AbstractEventLoop | None = None # Event loop running the server
|
||||||
|
_server_thread: threading.Thread | None = None # Thread running the server event loop
|
||||||
|
|
||||||
|
|
||||||
def _idle_checker() -> None:
|
def _idle_checker() -> None:
|
||||||
@@ -54,16 +56,19 @@ def _idle_checker() -> None:
|
|||||||
|
|
||||||
def _stop_server() -> None:
|
def _stop_server() -> None:
|
||||||
"""Stop the server and clear caches for fresh restart."""
|
"""Stop the server and clear caches for fresh restart."""
|
||||||
global _server_ref, _idle_checker_started
|
global _server_ref, _idle_checker_started, _server_loop, _server_thread
|
||||||
if _server_ref is not None:
|
|
||||||
# Stop the server
|
if _server_loop is not None and _server_thread is not None:
|
||||||
loop = asyncio.new_event_loop()
|
# Schedule stop on the correct event loop (the one actually running the server)
|
||||||
try:
|
# This causes loop.run_forever() to exit in the daemon thread
|
||||||
loop.run_until_complete(_server_ref.stop())
|
_server_loop.call_soon_threadsafe(_server_loop.stop)
|
||||||
except Exception:
|
|
||||||
pass
|
# Wait for thread to exit (with timeout to avoid hanging)
|
||||||
finally:
|
_server_thread.join(timeout=5.0)
|
||||||
loop.close()
|
|
||||||
|
_server_loop = None
|
||||||
|
_server_thread = None
|
||||||
|
|
||||||
_server_ref = None
|
_server_ref = None
|
||||||
|
|
||||||
# Clear Streamlit's cached server so next visitor gets fresh instance
|
# Clear Streamlit's cached server so next visitor gets fresh instance
|
||||||
@@ -136,6 +141,8 @@ def get_or_create_server() -> SimulationServer:
|
|||||||
Returns:
|
Returns:
|
||||||
The simulation server instance.
|
The simulation server instance.
|
||||||
"""
|
"""
|
||||||
|
global _server_loop, _server_thread
|
||||||
|
|
||||||
server = SimulationServer(
|
server = SimulationServer(
|
||||||
ServerConfig(
|
ServerConfig(
|
||||||
host="127.0.0.1",
|
host="127.0.0.1",
|
||||||
@@ -151,7 +158,9 @@ def get_or_create_server() -> SimulationServer:
|
|||||||
|
|
||||||
def run_server() -> None:
|
def run_server() -> None:
|
||||||
"""Run the async server in a new event loop."""
|
"""Run the async server in a new event loop."""
|
||||||
|
global _server_loop
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
|
_server_loop = loop # Store reference for _stop_server to use
|
||||||
asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(server.start())
|
loop.run_until_complete(server.start())
|
||||||
@@ -171,6 +180,7 @@ def get_or_create_server() -> SimulationServer:
|
|||||||
|
|
||||||
thread = threading.Thread(target=run_server, daemon=True)
|
thread = threading.Thread(target=run_server, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
|
_server_thread = thread # Store reference for _stop_server to use
|
||||||
|
|
||||||
# Wait for server to be fully started (up to 5 seconds)
|
# Wait for server to be fully started (up to 5 seconds)
|
||||||
if not server_ready.wait(timeout=5.0):
|
if not server_ready.wait(timeout=5.0):
|
||||||
|
|||||||
Reference in New Issue
Block a user