diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fdda41d61bb0c1440ec2351043e237edd9cec1b3..678603b7eaa484798c09aed6c0fc51d352b9a541 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -173,6 +173,7 @@ spack:install: --show-log-on-error - ${STACK_LOCATION}/spack/bin/spack -e ${environment} + buildcache create --update-index --key EDC904DCE3D2E84E /buildcache diff --git a/dockerfiles/rhel9-kuma/Dockerfile b/dockerfiles/rhel9-kuma/Dockerfile index e120d5c882fa0ca9f31eae8d6ccd743d29d633c5..c3cc9608cac6f7dbe5938087146f2d851e67ac98 100644 --- a/dockerfiles/rhel9-kuma/Dockerfile +++ b/dockerfiles/rhel9-kuma/Dockerfile @@ -4,31 +4,36 @@ FROM registry.c4science.ch/scitas-stack/rhel9-base:9.0 #dnf group install "Development Tools" #dnf install curl findutils gcc-gfortran gnupg2 hostname iproute redhat-lsb-core python3 python3-pip python3-setuptools unzip python3-boto3 +# zlib-devel needed to compiler gcc ? + +# repos for kuma +ARG APPSTREAM_REPO=rhel-9-for-x86_64-appstream-rpms +ARG SLURM_REPO=SCITAS_product-slurm_repo-slurm-2311 +ARG PMIX_REPO=SCITAS_product-scitas_repo-scitas-cluster +ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_04-0_6_6_0-5_14_0-70_30_1-rhel90 +ARG CUDA_REPO=SCITAS_product-cuda_repo-cuda-535_154_05-x86_64-rhel9 +ARG GPFS_REPO=SCITAS_product-gpfs_repo-gpfs-519 + +RUN sed /etc/yum.repos.d/ubi.repo -i -e 's/enabled = 1/enabled = 0/' + # spack dependencies -RUN yum install -y \ +RUN yum install -y --enablerepo=$APPSTREAM_REPO \ gcc-g++ gcc-gfortran \ patchelf findutils patch \ - xz bzip2 file gnupg2 hostname iproute unzip\ + xz bzip2 file gnupg2 hostname iproute unzip zlib-devel\ python3 python3-pip python3-setuptools python3-boto3 \ git subversion mercurial \ glibc-gconv-extra \ jq \ && yum -y clean all && rm -fr /var/cache -# repos for kuma -ARG SLURM_REPO=SCITAS_product-slurm_repo-slurm-2311 -ARG PMIX_REPO=SCITAS_product-scitas_repo-scitas-cluster -ARG MLNX_REPO=SCITAS_product-mlnx_repo-mlnx-ofed-24_01-0_3_3_1-5_14_0-70_30_1-rhel90 -ARG CUDA_REPO=SCITAS_product-cuda_repo-cuda-535_154_05-x86_64-rhel9 -ARG GPFS_REPO=SCITAS_product-gpfs_repo-gpfs-519 - # stack dependencies RUN yum install -y --enablerepo=$SLURM_REPO \ slurm-devel slurm-libpmi \ && yum -y clean all && rm -fr /var/cache RUN yum install -y --enablerepo=$PMIX_REPO \ - slurm-devel slurm-libpmi \ + pmix \ && yum -y clean all && rm -fr /var/cache RUN yum install -y --enablerepo=$MLNX_REPO \ diff --git a/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f156fa830b9567b3e7839218a0a88c16076c6ca --- /dev/null +++ b/stacks/pinot-noir-gcc/definitions_kuma_l40s.yaml @@ -0,0 +1,7 @@ +definitions: + - cuda_system_codes: + - cuda + - cudnn + + - cuda_serial_codes: + - nccl diff --git a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml index 07fab46b1d2c1b45b5ea688ff8eb341829e37bb6..9dd1e2a2a7ee17ed02b03f762ac3356677eaed90 100644 --- a/stacks/pinot-noir-gcc/packages_kuma_h100.yaml +++ b/stacks/pinot-noir-gcc/packages_kuma_h100.yaml @@ -62,7 +62,7 @@ packages: rdma-core: buildable: false externals: - - spec: 'rdma-core@47.1' + - spec: 'rdma-core@51.1' prefix: /usr slurm: diff --git a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml index ad6b8fb44336cd6730bb956c5d8489ff7f0acf0b..e73666abca13ad29f14d633c2d13b060b6641e8e 100644 --- a/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml +++ b/stacks/pinot-noir-gcc/packages_kuma_l40s.yaml @@ -2,10 +2,77 @@ packages: all: target: ["zen4"] require: - - spec: ^openmpi +cuda - when: '%gcc ^mpi' + - "~rocm" - - spec: cuda_arch=89 + - spec: cuda_arch=89 target=zen4 when: '+cuda' - - '+cuda' + - spec: '+cuda' + when: '%gcc' + + # --------------------------------------------------------------------------- + hypre: + require: + - spec: '+cuda cuda_arch=89 +unified-memory' + when: '%gcc' + + kokkos: + require: + - spec: '+cuda cuda_arch=89 +cuda_uvm +wrapper ~openmptarget' + when: '%gcc' + + openmpi: + require: + - spec: '+cuda cuda_arch=89 target=zen4' + + petsc: + require: + - spec: '+cuda cuda_arch=89' + when: '%gcc' + + py-tensorflow: + require: + - spec: '+cuda cuda_arch=89 +nccl' + + py-torch: + require: + - spec: '+cuda cuda_arch=89 +nccl +cudnn' + + quantum-espresso: + require: + - spec: '~cuda' + + suite-sparse: + require: + - spec: '+cuda' + + ucx: + require: + - spec: '+xpmem +cma +rdmacm +rc +ud +dc +verbs +ib_hw_tm +gdrcopy +cuda cuda_arch=89 ~rocm target=zen4' + + # --------------------------------------------------------------------------- + # Externals + # --------------------------------------------------------------------------- + pmix: + buildable: false + externals: + - spec: 'pmix@5.0.2' + prefix: /usr + + rdma-core: + buildable: false + externals: + - spec: 'rdma-core@51.1' + prefix: /usr + + slurm: + buildable: false + externals: + - spec: 'slurm@24-05-0-2' + prefix: /usr + + xpmem: + buildable: false + externals: + - spec: 'xpmem@2.7.3' + prefix: /usr