diff --git a/CHANGELOG.md b/CHANGELOG.md
index b80f76fa..cadc35b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Arch `from_config` bug for literal params.
 - Fixed fused SiLU activation test.
 - Update `np.bool` to `np.bool_`.
+- Added a workaround fix for the CUDA graphs error in multi-node runs
 
 ### Security
 
diff --git a/modulus/sym/trainer.py b/modulus/sym/trainer.py
index 2aa4d5ea..9010c5f4 100644
--- a/modulus/sym/trainer.py
+++ b/modulus/sym/trainer.py
@@ -737,6 +737,10 @@ def _cuda_graph_training_step(self, step: int):
 
             self.g = torch.cuda.CUDAGraph()
             self.global_optimizer_model.zero_grad(set_to_none=True)
+            # TODO: temporary workaround till this issue is fixed:
+            # https://github.com/pytorch/pytorch/pull/104487#issuecomment-1638665876
+            delay = os.environ.get("MODULUS_CUDA_GRAPH_CAPTURE_DELAY", "10")
+            time.sleep(int(delay))
             with torch.cuda.graph(self.g):
                 # compute gradients
                 self.loss_static, self.losses_static = self.compute_gradients(