Skip to content
Snippets Groups Projects
Commit 9453ea8c authored by Nicolas Richart's avatar Nicolas Richart
Browse files

Switching nvhpc to ucx-mt

Cleaning the way to squashfs are handled
parent a3e7e609
No related branches found
No related tags found
1 merge request!10Switching to ucx-mt
......@@ -6,8 +6,8 @@ stages:
- compilers
- concretize
- install
- deploy
- release
- deploy
workflow:
rules:
......@@ -59,20 +59,20 @@ variables:
tags:
- stack
rules:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG =~ /v[0-9]+.*/
variables:
SQUASHFS_ID: ${CI_MERGE_REQUEST_IID}
SQUASHFS_ID: ${CI_COMMIT_REF_SLUG}
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
SQUASHFS_ID: ${CI_COMMIT_REF_SLUG}
- if: $CI_COMMIT_TAG =~ /^v[0-9].*$/
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
variables:
SQUASHFS_ID: ${CI_COMMIT_REF_SLUG}
SQUASHFS_ID: ${CI_MERGE_REQUEST_IID}
variables:
CI_DATA_LT: "/work/scitas-ge/scitas-stack/ci"
CI_DATA_ST: "/scratch/${path_suffix}scitas-stack/ci"
COMMAND_OPTIONS_SBATCH: ${slurm_options}
FAKEHOME: "${CI_DATA_ST}/homes"
FAKEHOME: "${CI_DATA_ST}/homes/${stack}-${environment}-${CI_PIPELINE_ID}"
MOUNT_POINT: $(jq -Mrc .stack.mount_point ${CI_PROJECT_DIR}/stacks/${stack}/config.json)
PYTHONUNBUFFERED: 1
environment: ${environment}
......@@ -91,7 +91,7 @@ variables:
APPTAINER_EXEC_OPTIONS: >-
${apptainer_options}
--cleanenv
-H $(mktemp -d -p ${FAKEHOME}/):/home/$(id -un)
-H ${FAKEHOME}:/home/$(id -un)
--bind ${CI_DATA_LT}/buildcache:${MOUNT_POINT}/buildcache
--bind ${CI_DATA_LT}/spack-mirror:${MOUNT_POINT}/spack-mirror
--bind ${CI_DATA_LT}/squashfs-cache/:/squashfs-cache
......@@ -117,6 +117,7 @@ spack:checkout:
- git config --global --add --bool advice.detachedHead false
script:
- ./ci/prepare_squashfs.sh
- mkdir -p ${FAKEHOME}
timeout: 1h
spack:setup:
......@@ -184,6 +185,35 @@ spack:install:
junit: spack-install-*.xml
timeout: 72h
spack:mksquashfs:
stage: release
extends:
- .parallel_job
script:
- source ci/stack_env.sh
- mkdir -p /squashfs-cache/releases/
# evaluate the suffix
- eval suffix=${release_suffix}
- echo "${stack}-${environment}-${SQUASHFS_ID}-${suffix}.sqfs"
- ./ci/update_squashfs.sh ${suffix}
- mv /squashfs-cache/"${stack}-${environment}-${SQUASHFS_ID}-${suffix}.sqfs"
/squashfs-cache/releases/
needs:
- job: spack:install
timeout: 1h
rules:
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG =~ /v[0-9]+.*/
variables:
SQUASHFS_ID: ${CI_COMMIT_REF_SLUG}
release_suffix: ${CI_COMMIT_TAG}
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
variables:
SQUASHFS_ID: ${CI_COMMIT_REF_SLUG}
release_suffix: "$(date +%Y%m%d-%H%M)"
spack:deploy:
stage: deploy
image: registry.c4science.ch/scitas-stack/deploy
......@@ -195,7 +225,7 @@ spack:deploy:
MOUNT_POINT: /ssoft/spack
COMMAND_OPTIONS_SBATCH: "-c 36"
image_name: ${stack}-${environment}-${SQUASHFS_ID}
squashfs_image: $(ls -t1 ${CI_DATA_LT}/squashfs-cache/${image_name}*.sqfs 2> /dev/null | head -1)
squashfs_image: $(ls -t1 ${CI_DATA_LT}/squashfs-cache/releases/${image_name}*.sqfs 2> /dev/null | head -1)
APPTAINER_EXEC_OPTIONS: >-
--bind ${MOUNT_POINT}
--bind ${CI_DATA_LT}/squashfs-cache/
......@@ -208,29 +238,11 @@ spack:deploy:
- rsync -auP /squashfs/${stack}/ ${MOUNT_POINT}/${stack}/
needs:
- job: spack:install
- job: spack:mksquashfs
timeout: 2h
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
spack:mksquashfs:
stage: release
extends:
- .parallel_job
script:
- source ci/stack_env.sh
- mkdir -p /squashfs-cache/releases/
- ./ci/update_squashfs.sh ${CI_COMMIT_TAG}
- mv /squashfs-cache/${stack}-${environment}-${SQUASHFS_ID}-${CI_COMMIT_TAG}.sqfs
/squashfs-cache/releases/
needs:
- job: spack:install
timeout: 1h
rules:
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG =~ /v[0-9]+.*/
post_cleaning:
stage: .post
variables:
......
......@@ -17,31 +17,41 @@ do
fi
done
cd ${CI_DATA_LT}/squashfs-cache
set +o errexit
# Check if MR as squashfs
sqfs_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs 2> /dev/null | head -1)
if [ $? -ne 0 ]
sqfs_image=$(ls -t1 ${image_name}.sqfs 2> /dev/null | head -1)
if [ "x${sqfs_image}" == "x" ]
then
echo "No MR ($SQUASHFS_ID-${CI_PIPELINE_ID}) squashfs found"
echo " - while looking for ${image_name}.sqfs"
# look for latest release version
sqfs_base_image=$(ls -t1 releases/${stack}-${environment}*.sqfs 2> /dev/null | head -1)
# look for base in the branch
sqfs_base_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${stack}-${environment}-${SQUASHFS_ID}.sqfs 2> /dev/null | head -1)
if [ $? -ne 0 ]
# if no release look for main branch
if [ "x${sqfs_base_image}" == "x" ]
then
echo "No MR ($SQUASHFS_ID) squashfs found"
sqfs_base_image=$(ls -t1 ${stack}-${environment}-${squash_base}*.sqfs 2> /dev/null | head -1)
fi
if [ "x${sqfs_base_image}" == "x" ]
then
echo "No release nor default branch ($squash_base) squashfs found"
echo " while looking for"
echo " - releases/${stack}-${environment}*.sqfs"
echo " - ${stack}-${environment}-${squash_base}*.sqfs"
# look for base in default branch
sqfs_base_image=$(ls -t1 ${CI_DATA_LT}/squashfs-cache/${stack}-${environment}-${squash_base}*.sqfs 2> /dev/null | head -1)
if [ $? -ne 0 ]
sqfs_base_image="${stack}-${environment}-${squash_base}-initial.sqfs"
if [ ! -f "${sqfs_base_image}" ]
then
echo "No default branch ($squash_base) squashfs found"
echo "Creating an empty one"
set -o errexit
echo "Initial build creating an empty one"
empty=$(mktemp -d)
mkdir ${empty}/${stack}
sqfs_base_image="${CI_DATA_LT}/squashfs-cache/${stack}-${environment}-${squash_base}-initial.sqfs"
mksquashfs ${empty} ${sqfs_base_image}
else
echo "Found ${sqfs_base_image}"
echo "ERROR: Not initial build and no main image found"
exit -10
fi
else
echo "Found ${sqfs_base_image}"
......@@ -49,20 +59,13 @@ then
set -o errexit
# Link MR sqaushfs to the one of default branch
sqfs_image=${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs
sqfs_image=${image_name}.sqfs
cd ${CI_DATA_LT}/squashfs-cache
ln -sf $(basename ${sqfs_base_image}) $(basename ${sqfs_image})
cd -
ln -sf ${sqfs_base_image} ${sqfs_image}
echo "Linking ${sqfs_image} -> ${sqfs_base_image}"
else
set -o errexit
if [ ${sqfs_image} != ${CI_DATA_LT}/squashfs-cache/${image_name}.sqfs ];
then
cd ${CI_DATA_LT}/squashfs-cache
ln -sf $(basname ${sqfs_image}) ${image_name}.sqfs
cd
fi
echo "Found ${sqfs_image}"
fi
cd -
......@@ -6,7 +6,7 @@ set -o pipefail
source ${CI_PROJECT_DIR}/ci/stack_env.sh
sqfs_suffix="${CI_PIPELINE_ID}-$(date +%Y%m%d_%H%M)"
sqfs_suffix="${CI_PIPELINE_ID}-$(date +%Y%m%d-%H%M)"
if [ $# -eq 1 ]
then
......
......@@ -11,8 +11,8 @@ modules:
HPCX_DIR: "{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest"
OPAL_PREFIX: "{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ompi"
prepend_path:
LD_LIBRARY_PATH: '{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ompi/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/lib/ucx:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/lib/ucc:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/hcoll/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/sharp/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/nccl_rdma_sharp_plugin/lib'
PATH: '{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/hcoll/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/sharp/bin'
LD_LIBRARY_PATH: "{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ompi/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/mt/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/mt/lib/ucx:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/lib/ucc:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/hcoll/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/sharp/lib:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/nccl_rdma_sharp_plugin/lib"
PATH: "{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucx/mt/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/ucc/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/hcoll/bin:{prefix}/Linux_x86_64/24.7/comm_libs/12.5/hpcx/latest/sharp/bin"
nccl:
environment:
set:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment