diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0522e65956541329c10786d4af03e4806c389da1..76cf9000f8b8d72edcc4bd2ca24891fd0a202dee 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,12 +30,12 @@ variables: # stack: ["pinot-noir"] # app_image: ["registry.c4science.ch/scitas-stack/rhel9-kuma"] - environment: [kuma_h100] - slurm_options: ["-c 64"] + slurm_options: ["-c 64 --reservation scitas"] stack: ["pinot-noir-gcc"] apptainer_options: ['--nv'] app_image: ["registry.c4science.ch/scitas-stack/rhel9-kuma"] - environment: [kuma_l40s] - slurm_options: ["-c 64"] + slurm_options: ["-c 64 --reservation scitas"] stack: ["pinot-noir-gcc"] apptainer_options: ['--nv'] app_image: ["registry.c4science.ch/scitas-stack/rhel9-kuma"] diff --git a/stacks/pinot-noir-gcc/config.json b/stacks/pinot-noir-gcc/config.json index 5527a93ec5c62dfb5d2560989833bdc9baaabade..530523f1986eaa170a83399cacdfa20fa1732a74 100644 --- a/stacks/pinot-noir-gcc/config.json +++ b/stacks/pinot-noir-gcc/config.json @@ -45,6 +45,11 @@ "compiler": "gcc", "spec": "gcc@12.3.0 %gcc@11.2.1", "version": "12.3.0" + }, + "nvhpc": { + "compiler": "nvhpc", + "spec": "nvhpc +blas +lapack +mpi@24.3 %gcc@11.2.1", + "version": "24.3" } }, "system_packages": [ diff --git a/stacks/pinot-noir-gcc/definitions_cuda.yaml b/stacks/pinot-noir-gcc/definitions_cuda.yaml index 4710276c2be849f7ba905a7c66ce192ee3a206eb..154a251f72827d434dc0ef6dcf2ed468460106d7 100644 --- a/stacks/pinot-noir-gcc/definitions_cuda.yaml +++ b/stacks/pinot-noir-gcc/definitions_cuda.yaml @@ -2,7 +2,9 @@ definitions: - cuda_system_codes: - cuda - cudnn + - cudnn@9.2.1.18-12 - cuda_serial_codes: - nccl + - nccl@2.20.5-1 - nvshmem diff --git a/stacks/pinot-noir-gcc/modules_kuma_h100.yaml b/stacks/pinot-noir-gcc/modules_kuma_h100.yaml index c4659078af130b7c2532939538c205d6d12edde9..4599ea4a749e4ffd3b6fcdb239b836e8fb83a7c7 100644 --- a/stacks/pinot-noir-gcc/modules_kuma_h100.yaml +++ b/stacks/pinot-noir-gcc/modules_kuma_h100.yaml @@ -7,3 +7,8 @@ modules: OMPI_MCA_pml: 'ucx' OMPI_MCA_osc: 'ucx' UCX_NET_DEVICES: 'mlx5_2:1,mlx5_3:1' + nccl: + environment: + set: + NCCL_IB_HCA: 'mlx5_2:1,mlx5_3:1' + NCCL_SOCKET_IFNAME: 'bond0' diff --git a/stacks/pinot-noir-gcc/spack.yaml b/stacks/pinot-noir-gcc/spack.yaml index 194bc387ba5b98af2b4f2e450f34cc6811d09168..05cae96448aafee4ec332981b95c4eef2bff976e 100644 --- a/stacks/pinot-noir-gcc/spack.yaml +++ b/stacks/pinot-noir-gcc/spack.yaml @@ -59,6 +59,9 @@ spack: - serial_codes: - python + - mpi_codes: + - ior + # ------------------------------------------------------------------------- # Empty to specialize in environments # ------------------------------------------------------------------------- @@ -88,12 +91,17 @@ spack: - matrix: - [$serial_codes] - - [$%compilers] + - [$%compiler_gcc] - matrix: - [$cuda_serial_codes] - [$%compiler_gcc] + - matrix: + - [$mpi_codes] + - [$^mpi_gcc] + - [$%compiler_gcc] + view: false # default: # root: /stack/packages/ diff --git a/stacks/pinot-noir/modules_kuma_h100.yaml b/stacks/pinot-noir/modules_kuma_h100.yaml index c4659078af130b7c2532939538c205d6d12edde9..997a2e17dbd2183ee05688ae39bbc6a9be1403e4 100644 --- a/stacks/pinot-noir/modules_kuma_h100.yaml +++ b/stacks/pinot-noir/modules_kuma_h100.yaml @@ -7,3 +7,8 @@ modules: OMPI_MCA_pml: 'ucx' OMPI_MCA_osc: 'ucx' UCX_NET_DEVICES: 'mlx5_2:1,mlx5_3:1' + nccl: + environment: + set: + NCCL_IB_HCA: 'mlx5_2:1,mlx5_3:1' + NCCL_SOCKET_IFNAME: 'bond0'