Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/forge/controller/provisioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@
from monarch._src.actor.actor_mesh import ActorMesh
from monarch._src.actor.shape import Extent

from monarch.actor import Actor, endpoint, HostMesh, ProcMesh, this_host
from monarch.actor import (
Actor,
endpoint,
HostMesh,
ProcMesh,
shutdown_context,
this_host,
)

from monarch.tools import commands
from monarch.utils import setup_env_for_distributed
Expand Down Expand Up @@ -486,6 +493,21 @@ async def shutdown_all_allocations(self):
self._registered_actors.clear()
self._registered_services.clear()

# -- HostMeshes (including the implicit local host) ---
logger.info(f"Shutting down {len(self._host_mesh_map)} HostMesh(es)...")
results = await asyncio.gather(
*[host_mesh.shutdown() for host_mesh in self._host_mesh_map.values()],
return_exceptions=True,
)
for (name, _), result in zip(self._host_mesh_map.items(), results, strict=True):
if isinstance(result, Exception):
logger.warning(f"Failed to shutdown HostMesh {name}: {result}")
self._host_mesh_map.clear()
try:
await shutdown_context()
except Exception as e:
logger.warning(f"Failed to shutdown context: {e}")

async def shutdown(self):
"""Tears down all remaining remote allocations."""
await self.shutdown_all_allocations()
Expand Down
Loading