From 19654480ad7c828d39ed886d03125d0ee37d6e31 Mon Sep 17 00:00:00 2001
From: Nicolas Richart <networms@gmail.ch>
Date: Tue, 9 Jul 2024 16:07:13 +0200
Subject: [PATCH] Addapting version of rdma-core and adding l40s definitions

---
 dockerfiles/rhel9-kuma/Dockerfile             |  2 +-
 .../pinot-noir-gcc/definitions_kuma_l40s.yaml |  7 ++
 stacks/pinot-noir-gcc/packages_kuma_h100.yaml |  2 +-
 stacks/pinot-noir-gcc/packages_kuma_l40s.yaml | 75 ++++++++++++++++++-
 4 files changed, 80 insertions(+), 6 deletions(-)
 create mode 100644 stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml

diff --git a/dockerfiles/rhel9-kuma/Dockerfile b/dockerfiles/rhel9-kuma/Dockerfile
index 2b6da77..c3cc960 100644
--- a/dockerfiles/rhel9-kuma/Dockerfile
+++ b/dockerfiles/rhel9-kuma/Dockerfile
@@ -10,7 +10,7 @@ FROM registry.c4science.ch/scitas-stack/rhel9-base:9.0
 ARG APPSTREAM_REPO=rhel-9-for-x86_64-appstream-rpms
 ARG SLURM_REPO=SCITAS_product-slurm_repo-slurm-2311
 ARG PMIX_REPO=SCITAS_product-scitas_repo-scitas-cluster
-ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_01-0_3_3_1-5_14_0-70_30_1-rhel90
+ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_04-0_6_6_0-5_14_0-70_30_1-rhel90
 ARG CUDA_REPO=SCITAS_product-cuda_repo-cuda-535_154_05-x86_64-rhel9
 ARG GPFS_REPO=SCITAS_product-gpfs_repo-gpfs-519
 
diff --git a/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml
new file mode 100644
index 0000000..0f156fa
--- /dev/null
+++ b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml
@@ -0,0 +1,7 @@
+definitions:
+  - cuda_system_codes:
+      - cuda
+      - cudnn
+
+  - cuda_serial_codes:
+      - nccl
diff --git a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml
index 07fab46..9dd1e2a 100644
--- a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml
+++ b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml
@@ -62,7 +62,7 @@ packages:
   rdma-core:
     buildable: false
     externals:
-    - spec: 'rdma-core@47.1'
+    - spec: 'rdma-core@51.1'
       prefix: /usr
 
   slurm:
diff --git a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml
index ad6b8fb..e73666a 100644
--- a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml
+++ b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml
@@ -2,10 +2,77 @@ packages:
   all:
     target: ["zen4"]
     require:
-      - spec: ^openmpi +cuda
-        when: '%gcc ^mpi'
+      - "~rocm"
 
-      - spec: cuda_arch=89
+      - spec: cuda_arch=89 target=zen4
         when: '+cuda'
 
-      - '+cuda'
+      - spec: '+cuda'
+        when: '%gcc'
+
+  # ---------------------------------------------------------------------------
+  hypre:
+    require:
+    - spec: '+cuda cuda_arch=89 +unified-memory'
+      when: '%gcc'
+
+  kokkos:
+    require:
+    - spec: '+cuda cuda_arch=89 +cuda_uvm +wrapper ~openmptarget'
+      when: '%gcc'
+
+  openmpi:
+    require:
+      - spec: '+cuda cuda_arch=89 target=zen4'
+
+  petsc:
+    require:
+    - spec: '+cuda cuda_arch=89'
+      when: '%gcc'
+
+  py-tensorflow:
+    require:
+    - spec: '+cuda cuda_arch=89 +nccl'
+
+  py-torch:
+    require:
+    - spec: '+cuda cuda_arch=89 +nccl +cudnn'
+
+  quantum-espresso:
+    require:
+    - spec: '~cuda'
+
+  suite-sparse:
+    require:
+    - spec: '+cuda'
+
+  ucx:
+    require:
+      - spec: '+xpmem +cma +rdmacm +rc +ud +dc +verbs +ib_hw_tm +gdrcopy +cuda cuda_arch=89 ~rocm target=zen4'
+
+  # ---------------------------------------------------------------------------
+  # Externals
+  # ---------------------------------------------------------------------------
+  pmix:
+    buildable: false
+    externals:
+    - spec: 'pmix@5.0.2'
+      prefix: /usr
+
+  rdma-core:
+    buildable: false
+    externals:
+    - spec: 'rdma-core@51.1'
+      prefix: /usr
+
+  slurm:
+    buildable: false
+    externals:
+    - spec: 'slurm@24-05-0-2'
+      prefix: /usr
+
+  xpmem:
+    buildable: false
+    externals:
+    - spec: 'xpmem@2.7.3'
+      prefix: /usr
-- 
GitLab