From 19654480ad7c828d39ed886d03125d0ee37d6e31 Mon Sep 17 00:00:00 2001 From: Nicolas Richart <networms@gmail.ch> Date: Tue, 9 Jul 2024 16:07:13 +0200 Subject: [PATCH] Addapting version of rdma-core and adding l40s definitions --- dockerfiles/rhel9-kuma/Dockerfile | 2 +- .../pinot-noir-gcc/definitions_kuma_l40s.yaml | 7 ++ stacks/pinot-noir-gcc/packages_kuma_h100.yaml | 2 +- stacks/pinot-noir-gcc/packages_kuma_l40s.yaml | 75 ++++++++++++++++++- 4 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml diff --git a/dockerfiles/rhel9-kuma/Dockerfile b/dockerfiles/rhel9-kuma/Dockerfile index 2b6da77..c3cc960 100644 --- a/dockerfiles/rhel9-kuma/Dockerfile +++ b/dockerfiles/rhel9-kuma/Dockerfile @@ -10,7 +10,7 @@ FROM registry.c4science.ch/scitas-stack/rhel9-base:9.0 ARG APPSTREAM_REPO=rhel-9-for-x86_64-appstream-rpms ARG SLURM_REPO=SCITAS_product-slurm_repo-slurm-2311 ARG PMIX_REPO=SCITAS_product-scitas_repo-scitas-cluster -ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_01-0_3_3_1-5_14_0-70_30_1-rhel90 +ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_04-0_6_6_0-5_14_0-70_30_1-rhel90 ARG CUDA_REPO=SCITAS_product-cuda_repo-cuda-535_154_05-x86_64-rhel9 ARG GPFS_REPO=SCITAS_product-gpfs_repo-gpfs-519 diff --git a/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml new file mode 100644 index 0000000..0f156fa --- /dev/null +++ b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml @@ -0,0 +1,7 @@ +definitions: + - cuda_system_codes: + - cuda + - cudnn + + - cuda_serial_codes: + - nccl diff --git a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml index 07fab46..9dd1e2a 100644 --- a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml +++ b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml @@ -62,7 +62,7 @@ packages: rdma-core: buildable: false externals: - - spec: 'rdma-core@47.1' + - spec: 'rdma-core@51.1' prefix: /usr slurm: diff --git a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml index ad6b8fb..e73666a 100644 --- a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml +++ b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml @@ -2,10 +2,77 @@ packages: all: target: ["zen4"] require: - - spec: ^openmpi +cuda - when: '%gcc ^mpi' + - "~rocm" - - spec: cuda_arch=89 + - spec: cuda_arch=89 target=zen4 when: '+cuda' - - '+cuda' + - spec: '+cuda' + when: '%gcc' + + # --------------------------------------------------------------------------- + hypre: + require: + - spec: '+cuda cuda_arch=89 +unified-memory' + when: '%gcc' + + kokkos: + require: + - spec: '+cuda cuda_arch=89 +cuda_uvm +wrapper ~openmptarget' + when: '%gcc' + + openmpi: + require: + - spec: '+cuda cuda_arch=89 target=zen4' + + petsc: + require: + - spec: '+cuda cuda_arch=89' + when: '%gcc' + + py-tensorflow: + require: + - spec: '+cuda cuda_arch=89 +nccl' + + py-torch: + require: + - spec: '+cuda cuda_arch=89 +nccl +cudnn' + + quantum-espresso: + require: + - spec: '~cuda' + + suite-sparse: + require: + - spec: '+cuda' + + ucx: + require: + - spec: '+xpmem +cma +rdmacm +rc +ud +dc +verbs +ib_hw_tm +gdrcopy +cuda cuda_arch=89 ~rocm target=zen4' + + # --------------------------------------------------------------------------- + # Externals + # --------------------------------------------------------------------------- + pmix: + buildable: false + externals: + - spec: 'pmix@5.0.2' + prefix: /usr + + rdma-core: + buildable: false + externals: + - spec: 'rdma-core@51.1' + prefix: /usr + + slurm: + buildable: false + externals: + - spec: 'slurm@24-05-0-2' + prefix: /usr + + xpmem: + buildable: false + externals: + - spec: 'xpmem@2.7.3' + prefix: /usr -- GitLab