From f65d0f9c5d95d814ead5c3a3364b492807f513a4 Mon Sep 17 00:00:00 2001 From: Nicolas Richart <nicolas.richart@epfl.ch> Date: Tue, 23 Jul 2024 15:38:29 +0200 Subject: [PATCH] nccl config in modules --- stacks/pinot-noir/modules_kuma_h100.yaml | 10 +++++----- stacks/pinot-noir/modules_kuma_l40s.yaml | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/stacks/pinot-noir/modules_kuma_h100.yaml b/stacks/pinot-noir/modules_kuma_h100.yaml index 997a2e1..4599ea4 100644 --- a/stacks/pinot-noir/modules_kuma_h100.yaml +++ b/stacks/pinot-noir/modules_kuma_h100.yaml @@ -7,8 +7,8 @@ modules: OMPI_MCA_pml: 'ucx' OMPI_MCA_osc: 'ucx' UCX_NET_DEVICES: 'mlx5_2:1,mlx5_3:1' - nccl: - environment: - set: - NCCL_IB_HCA: 'mlx5_2:1,mlx5_3:1' - NCCL_SOCKET_IFNAME: 'bond0' + nccl: + environment: + set: + NCCL_IB_HCA: 'mlx5_2:1,mlx5_3:1' + NCCL_SOCKET_IFNAME: 'bond0' diff --git a/stacks/pinot-noir/modules_kuma_l40s.yaml b/stacks/pinot-noir/modules_kuma_l40s.yaml index 7be541d..1f147c1 100644 --- a/stacks/pinot-noir/modules_kuma_l40s.yaml +++ b/stacks/pinot-noir/modules_kuma_l40s.yaml @@ -7,3 +7,8 @@ modules: OMPI_MCA_pml: 'ucx' OMPI_MCA_osc: 'ucx' UCX_NET_DEVICES: 'mlx5_0:1,mlx5_1:1' + nccl: + environment: + set: + NCCL_IB_HCA: 'mlx5_0:1,mlx5_1:1' + NCCL_SOCKET_IFNAME: 'bond0' -- GitLab