diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 76cf9000f8b8d72edcc4bd2ca24891fd0a202dee..e0c2c53874fdccdae56348a4b6a97e85475cd7e1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,6 +17,8 @@ workflow: variables: TERM: ansi + CI_DATA_LT: "/work/scitas-ge/richart/ci" + CI_DATA_ST: "/scratch/richart/ci" .parallel_definition: parallel: @@ -51,16 +53,39 @@ variables: image: ${app_image} # ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------ +.squashfs_var: + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + variables: + SQUASHFS_ID: ${CI_MERGE_REQUEST_IID} + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH + variables: + SQUASHFS_ID: ${CI_DEFAULT_BRANCH} + .parallel_job: extends: - .parallel_definition + - .squashfs_var variables: - MOUNT_POINT: /ssoft/spack - BUILDCACHE: "/work/scitas-ge/richart/ci/buildcache" - MIRROR: "/work/scitas-ge/richart/ci/mirror" - FAKEHOME: "/scratch/richart" + MOUNT_POINT: $(jq -Mrc .stack.mount_point ${CI_PROJECT_DIR}/stacks/${stack}/config.json) + FAKEHOME: "/scratch/$(id -un)" COMMAND_OPTIONS_SBATCH: ${slurm_options} - APPTAINER_EXEC_OPTIONS: ${apptainer_options} --cleanenv -H $(mktemp -d -p ${FAKEHOME}/):/home/richart --bind ${BUILDCACHE}:/buildcache:rw --bind ${MIRROR}:${MOUNT_POINT}/spack-mirror:rw --bind ${CI_PROJECT_DIR}:${MOUNT_POINT} --bind /dev/tty + STACK_VERSION: $(jq -Mrc .stack.version ${CI_PROJECT_DIR}/stacks/${stack}/config.json) + image_name: "${stack}-${environment}-${SQUASHFS_ID}" + squashfs_image: $(ls -t1 ${CI_DATA_LT}/squashfs-cache/${image_name}*.sqfs 2> /dev/null | head -1) + APPTAINER_EXEC_OPTIONS: >- + ${apptainer_options} + --cleanenv + -H $(mktemp -d -p ${FAKEHOME}/):/home/$(id -un) + --bind ${CI_DATA_LT}/buildcache:${MOUNT_POINT}/buildcache + --bind ${CI_DATA_LT}/spack-mirror:${MOUNT_POINT}/spack-mirror + --bind ${CI_DATA_ST}/squashfs-cache/:/squashfs-cache + --bind ${CI_DATA_ST}/overlayfs:/overlayfs + --fusemount "host:${CI_PROJECT_DIR}/ci/squashfuse_ll.sh ${CI_DATA_ST} ${image_name} ${squashfs_image} /overlayfs/lower-${image_name}" + --fusemount "container:${CI_PROJECT_DIR}/ci/fuse-overlayfs.sh ${image_name} ${MOUNT_POINT}/${stack}/${environment}/${STACK_VERSION}" + # after_script: + # - if [ $CI_JOB_STATUS != "success" ]; then exit 0; fi + # - ${CI_PROJECT_DIR}/ci/update_squashfs.sh .spack_cache: cache: @@ -73,12 +98,18 @@ variables: # ------------------------------------------------------------------------------ spack:checkout: stage: .pre + variables: + APPTAINER_EXEC_OPTIONS: >- + --cleanenv + --bind ${CI_DATA_LT} + --bind ${CI_DATA_ST} + --bind ${CI_DATA_LT}/squashfs-cache:/squashfs-cache extends: - .parallel_definition before_script: - git config --global --add --bool advice.detachedHead false script: - - ls + - ./ci/prepare_squashfs.sh timeout: 1h spack:setup: @@ -88,6 +119,7 @@ spack:setup: - .spack_cache script: - ci/setup_spack.sh + - ${CI_PROJECT_DIR}/ci/update_squashfs.sh needs: - job: spack:checkout @@ -100,6 +132,7 @@ spack:install_compilers: - ci/install_compilers.sh - source ci/stack_env.sh - ${STACK_LOCATION}/spack/bin/spack -e ${environment} config blame compilers + - ${CI_PROJECT_DIR}/ci/update_squashfs.sh artifacts: reports: junit: spack-install-*.xml @@ -114,10 +147,15 @@ spack:concretize: - .spack_cache script: - source ci/stack_env.sh - - ${STACK_LOCATION}/spack/bin/spack -e ${environment} config blame | tee config-${environment}-${stack}.log - - ${STACK_LOCATION}/spack/bin/spack -e ${environment} concretize | tee concretize-${environment}-${stack}.log + + - ${STACK_LOCATION}/spack/bin/spack -e ${environment} + config blame | tee config-${environment}-${stack}.log + + - ${STACK_LOCATION}/spack/bin/spack -e ${environment} + concretize | tee concretize-${environment}-${stack}.log - cp ${SPACK_SYSTEM_CONFIG_PATH}/spack.lock spack-${environment}-${stack}.lock + - ${CI_PROJECT_DIR}/ci/update_squashfs.sh artifacts: paths: - config-*.log @@ -137,18 +175,24 @@ spack:install: - echo "{}" > spack-install-${environment}.xml - ${STACK_LOCATION}/spack/bin/spack -e ${environment} install - --log-file spack-install-${environment}.xml - --log-format junit - --only-concrete - --fail-fast - --show-log-on-error + --log-file spack-install-${environment}.xml + --log-format junit + --only-concrete + --fail-fast + --show-log-on-error - ${STACK_LOCATION}/spack/bin/spack -e ${environment} find -vl | tee spack-find-${environment}-${stack}.log - ${STACK_LOCATION}/spack/bin/spack -e ${environment} - buildcache create - --update-index - --key EDC904DCE3D2E84E - /buildcache + buildcache create + --update-index + --key EDC904DCE3D2E84E + ${MOUNT_POINT}/buildcache + + - ${STACK_LOCATION}/spack/bin/spack + -e ${environment} module + lmod refresh --yes | tee spack-modules-${environment}-${stack}.log + + - ${CI_PROJECT_DIR}/ci/update_squashfs.sh needs: - job: spack:concretize artifacts: @@ -166,10 +210,6 @@ spack:mksquashfs: script: - source ci/stack_env.sh - - ${STACK_LOCATION}/spack/bin/spack - -e ${environment} module - lmod refresh --yes | tee spack-modules-${environment}-${stack}.log - - echo "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/stack/${stack}/stack-${stack}-${environment}-${CI_COMMIT_REF_SLUG}.sqfs" - "mksquashfs ${STACK_LOCATION} stack-${stack}-${environment}-${CI_COMMIT_REF_SLUG}.sqfs" - 'curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file stack-${stack}-${environment}-${CI_COMMIT_REF_SLUG}.sqfs "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/stack/${stack}/stack-${stack}-${environment}-${CI_COMMIT_REF_SLUG}.sqfs"' diff --git a/ci/apptainer_by_hand.sh b/ci/apptainer_by_hand.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba95ef61b8d7e50ade9b205f9c4fe81aaf438cfc --- /dev/null +++ b/ci/apptainer_by_hand.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env sh + +export CI_DATA_LT="/work/scitas-ge/richart/ci" +export CI_DATA_ST="/scratch/richart/ci" +srun_options="-N1 -n1 -c 64 -p h100" +apptainer_options="--nv" + +export stack=pinot-noir +export environment=kuma_h100 + +export SQUASHFS_ID=local +export CI_PIPELINE_ID=1337 + +export FAKEHOME="/scratch/$(id -un)" + +export CI_PROJECT_DIR=$PWD +export GPG_PRIVATE_KEY=${CI_PROJECT_DIR}/stacks/buildcache.pem +export MOUNT_POINT=$(jq -Mrc .stack.mount_point ${CI_PROJECT_DIR}/stacks/${stack}/config.json) +export STACK_VERSION=$(jq -Mrc .stack.version ${CI_PROJECT_DIR}/stacks/${stack}/config.json) + +APPTAINER_IMAGE=~/rhel9-kuma.sif +image_name=${stack}-${environment}-${SQUASHFS_ID}-${CI_PIPELINE_ID} +squashfs_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${image_name}*.sqfs 2> /dev/null | head -1) +#./ci/prepare_squashfs.sh + +srun ${srun_options} --pty apptainer run \ + ${apptainer_options}\ + --writable-tmpfs \ + --cleanenv \ + -H $(mktemp -d -p ${FAKEHOME}/):/home/$(id -un) \ + --bind ${CI_DATA_LT}/buildcache:${MOUNT_POINT}/buildcache \ + --bind ${CI_DATA_LT}/spack-mirror:${MOUNT_POINT}/spack-mirror \ + --bind ${CI_DATA_LT}/squashfs-cache/:/squashfs-cache \ + --bind ${CI_DATA_ST}/overlayfs:/overlayfs \ + --env stack=${stack} \ + --env environment=${environment} \ + --env GPG_PRIVATE_KEY=${GPG_PRIVATE_KEY} \ + --env CI_PROJECT_DIR=${CI_PROJECT_DIR} \ + --env CI_JOB_ID=${CI_JOB_ID} \ + --env SQUASHFS_ID=${SQUASHFS_ID} \ + --fusemount "host:${CI_PROJECT_DIR}/ci/squashfuse_ll.sh ${CI_DATA_ST} ${image_name} ${squashfs_image} /overlayfs/lower-${image_name}" \ + --fusemount "container:${CI_PROJECT_DIR}/ci/fuse-overlayfs.sh ${image_name} ${MOUNT_POINT}/${stack}/${environment}/${STACK_VERSION}" \ + ${APPTAINER_IMAGE} \ + bash diff --git a/ci/fuse-overlayfs.sh b/ci/fuse-overlayfs.sh new file mode 100755 index 0000000000000000000000000000000000000000..6bc9af3115615de2f3d92381aab6cd23225d134e --- /dev/null +++ b/ci/fuse-overlayfs.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env sh + +suffix=$1 +mount_point=$2 + + +#/usr/libexec/apptainer/bin/fuse-overlayfs \ +fuse-overlayfs \ + -o squash_to_uid=$(id -u) \ + -o squash_to_gid=$(id -g) \ + -o lowerdir=/overlayfs/lower-${suffix} \ + -o upperdir=/overlayfs/upper-${suffix} \ + -o workdir=/overlayfs/wd-${suffix} \ + $mount_point diff --git a/ci/install_compilers.sh b/ci/install_compilers.sh index b0a32164111837009fc115239f91aca3374a6fb3..46e19e096f15c677de8d1eb1fd33a8165f82bd86 100755 --- a/ci/install_compilers.sh +++ b/ci/install_compilers.sh @@ -53,7 +53,7 @@ for c in "core_compilers" "compilers"; do ${STACK_LOCATION}/spack/bin/spack buildcache create \ --update-index \ --key EDC904DCE3D2E84E \ - /buildcache ${hashes} + ${MOUNT_POINT}/buildcache ${hashes} for compiler_hash in $(echo ${hashes}); do location=$(${STACK_LOCATION}/spack/bin/spack location -i ${compiler_hash}) diff --git a/ci/prepare_squashfs.sh b/ci/prepare_squashfs.sh new file mode 100755 index 0000000000000000000000000000000000000000..ed5f5b33c97c7551cb7fc5301b8dc76a86947e51 --- /dev/null +++ b/ci/prepare_squashfs.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env sh + +set -o pipefail +set -o nounset +set -o errexit +source ${CI_PROJECT_DIR}/ci/stack_env.sh + +for i in squashfs-cache buildcache spack-mirror +do + if [ ! -e ${CI_DATA_LT}/$i ] + then + mkdir -p ${CI_DATA_LT}/$i + fi +done + + +image_name=${stack}-${environment}-${squash_id}-${CI_PIPELINE_ID} + +set +o errexit +# Check if MR as squashfs +sqfs_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs 2> /dev/null | head -1) +if [ $? -ne 0 ] +then + echo "No MR ($squash_id) squashfs found" + # Check if MR default branch as squashfs + sqfs_base_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${stack}-${environment}-${squash_base}*.sqfs 2> /dev/null | head -1) + if [ $? -ne 0 ] + then + echo "No default branch ($squash_base) squashfs found" + echo "Creating an empty one" + empty=$(mktemp -d) + sqfs_base_image="${CI_DATA_LT}/squashfs-cache/${stack}-${environment}-${squash_base}-initial.sqfs" + mksquashfs ${empty} ${sqfs_base_image} + else + echo "Found ${sqfs_base_image}" + fi + + set -o errexit + + # Link MR sqaushfs to the one of default branch + sqfs_image=${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs + + cd ${CI_DATA_LT}/squashfs-cache + ln -sf $(basename ${sqfs_base_image}) $(basename ${sqfs_image}) + cd - + + echo "Linking ${sqfs_image} -> ${sqfs_base_image}" +else + set -o errexit + if [ ${sqfs_image} != ${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs ]; + then + cd ${CI_DATA_LT}/squashfs-cache + ln -sf $(basname ${sqfs_image}) ${image_name}.sqfs + cd + fi + echo "Found ${sqfs_image}" +fi diff --git a/ci/setup_spack.sh b/ci/setup_spack.sh index 7ac1929d494831655bee182571f252d2268da01e..8ce478151f1201b22841cc1dd0c642274793f026 100755 --- a/ci/setup_spack.sh +++ b/ci/setup_spack.sh @@ -147,9 +147,9 @@ echo "Setting up buildcache" spack/bin/spack gpg trust \ $GPG_PRIVATE_KEY -if [ ! -d /buildcache/build_cache ]; then +if [ ! -d ${MOUNT_POINT}/buildcache/build_cache ]; then spack/bin/spack gpg publish \ - -d /buildcache + -d ${MOUNT_POINT}/buildcache fi if ! spack/bin/spack mirror list | grep buildcache; then @@ -157,7 +157,7 @@ if ! spack/bin/spack mirror list | grep buildcache; then spack/bin/spack mirror add \ --type binary \ --scope system \ - buildcache file:///buildcache + buildcache file://${MOUNT_POINT}/buildcache fi spack/bin/spack buildcache keys \ diff --git a/ci/squashfuse_ll.sh b/ci/squashfuse_ll.sh new file mode 100755 index 0000000000000000000000000000000000000000..3a635bec5925b5a80ea969ea147636567d0e2ce7 --- /dev/null +++ b/ci/squashfuse_ll.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env sh + +CI_DATA=$1 +shift +suffix=$1 +shift +sqfs_image=$1 +shift +mount_point="$*" + +for i in upper wd +do + if [ -d ${CI_DATA}/overlayfs/$i-${suffix} ] + then + rm -rf ${CI_DATA}/overlayfs/$i-${suffix} + fi + mkdir -p ${CI_DATA}/overlayfs/$i-${suffix} +done + +echo "squashfuse_ll $sqfs_image $mount_point" +/usr/libexec/apptainer/bin/squashfuse_ll $sqfs_image $mount_point diff --git a/ci/stack_env.sh b/ci/stack_env.sh index d992e13913c8e8edea1fbc3c8a301b2b2f5c8f14..aac06691ab1680381527903fc8e26303eea64f5b 100644 --- a/ci/stack_env.sh +++ b/ci/stack_env.sh @@ -10,8 +10,8 @@ export STACK_LOCATION=${MOUNT_POINT}/${stack}/${environment}/${STACK_VERSION} export SPACK_SYSTEM_CONFIG_PATH=${STACK_LOCATION}/spack/var/spack/environments/${environment} -export SPACK_USER_CACHE_PATH=$(mktemp -p /tmp -d slurm_user_cache_XXXXXXX) -export SPACK_USER_CONFIG_PATH=$(mktemp -p /tmp -d slurm_user_config_XXXXXXX) +#export SPACK_USER_CACHE_PATH=$(mktemp -p /tmp -d slurm_user_cache_XXXXXXX) +#export SPACK_USER_CONFIG_PATH=$(mktemp -p /tmp -d slurm_user_config_XXXXXXX) export environment_type="local_cluster" @@ -21,5 +21,26 @@ echo "SPACK_VERSION: ${SPACK_VERSION}" echo "MOUNT_POINT: ${MOUNT_POINT}" echo "STACK_LOCATION: ${STACK_LOCATION}" echo "SPACK_SYSTEM_CONFIG_PATH: ${SPACK_SYSTEM_CONFIG_PATH}" -echo "SPACK_USER_CACHE_PATH: ${SPACK_USER_CACHE_PATH}" -echo "SPACK_USER_CONFIG_PATH: ${SPACK_USER_CONFIG_PATH}" +# echo "SPACK_USER_CACHE_PATH: ${SPACK_USER_CACHE_PATH}" +# echo "SPACK_USER_CONFIG_PATH: ${SPACK_USER_CONFIG_PATH}" + +set +o nounset + +if [ "x${CI_DEFAULT_BRANCH}" != "x" ] +then + squash_base=${CI_DEFAULT_BRANCH} +else + squash_base=main +fi + +if [ "x${CI_MERGE_REQUEST_IID}" != "x" ] +then + squash_id=${CI_MERGE_REQUEST_IID} +elif [ "x${CI_COMMIT_BRANCH}" != "x" ] +then + squash_id=${CI_MERGE_REQUEST_IID} +else + squash_id="local" +fi + +set -o nounset diff --git a/ci/update_squashfs.sh b/ci/update_squashfs.sh new file mode 100755 index 0000000000000000000000000000000000000000..66e962e1205abacb0a779232368687368f591073 --- /dev/null +++ b/ci/update_squashfs.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env sh + +set -o errexit +set -o pipefail +set -o nounset + +source ${CI_PROJECT_DIR}/ci/stack_env.sh + +cd /squashfs-cache + +mksquashfs ${STACK_LOCATION} ${stack}-${environment}-${squash_id}-$(date +'%Y%m%d_%H%M').sqfs + +#ln -sf ${stack}-${environment}-${squash_id}-$(date +'%Y%m%d_%H%M').sqfs ${stack}-${environment}-${squash_id}.sqfs diff --git a/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml b/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml deleted file mode 120000 index 7180f05ea42a27f7e59f063ecb69b4d815c4030b..0000000000000000000000000000000000000000 --- a/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml +++ /dev/null @@ -1 +0,0 @@ -modules_kuma_h100.yaml \ No newline at end of file diff --git a/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml b/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7be541da7ebaa02cccf6874f657a141d82f56541 --- /dev/null +++ b/stacks/pinot-noir-gcc/modules_kuma_l40s.yaml @@ -0,0 +1,9 @@ +modules: + default: + lmod: + openmpi: + environment: + set: + OMPI_MCA_pml: 'ucx' + OMPI_MCA_osc: 'ucx' + UCX_NET_DEVICES: 'mlx5_0:1,mlx5_1:1' diff --git a/stacks/pinot-noir/modules_kuma_l40s.yaml b/stacks/pinot-noir/modules_kuma_l40s.yaml deleted file mode 120000 index 7180f05ea42a27f7e59f063ecb69b4d815c4030b..0000000000000000000000000000000000000000 --- a/stacks/pinot-noir/modules_kuma_l40s.yaml +++ /dev/null @@ -1 +0,0 @@ -modules_kuma_h100.yaml \ No newline at end of file diff --git a/stacks/pinot-noir/modules_kuma_l40s.yaml b/stacks/pinot-noir/modules_kuma_l40s.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7be541da7ebaa02cccf6874f657a141d82f56541 --- /dev/null +++ b/stacks/pinot-noir/modules_kuma_l40s.yaml @@ -0,0 +1,9 @@ +modules: + default: + lmod: + openmpi: + environment: + set: + OMPI_MCA_pml: 'ucx' + OMPI_MCA_osc: 'ucx' + UCX_NET_DEVICES: 'mlx5_0:1,mlx5_1:1'