From f3e3f269486f4257d89f8b9ad366b153f7f58287 Mon Sep 17 00:00:00 2001 From: agimenog Date: Mon, 4 Aug 2025 10:45:02 -0400 Subject: [PATCH 1/7] adding compatibility with mpi version=>5 --- .../install_openmpi_host_injection.sh | 293 ++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100755 scripts/mpi_support/install_openmpi_host_injection.sh diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh new file mode 100755 index 00000000..b5bf2727 --- /dev/null +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -0,0 +1,293 @@ +#!/usr/bin/env bash + +# This script can be used to install the host MPI libraries under the `.../host_injections` directory. +# It allows EESSI software to use the MPI stack from the host. +# +# The `host_injections` directory is a variant symlink that by default points to +# `/opt/eessi`, unless otherwise defined in the local CVMFS configuration (see +# https://cvmfs.readthedocs.io/en/stable/cpt-repo.html#variant-symlinks). For the +# installation to be successful, this directory needs to be writeable by the user +# executing this script. + +# Initialise our bash functions +TOPDIR=$(dirname $(realpath $BASH_SOURCE)) +source "$TOPDIR"/../utils.sh + + +# Function to display help message +show_help() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --help Display this help message" + echo " --mpi-path /path/to/mpi Specify the path to the MPI host installation (Required)" + echo " -t, --temp-dir /path/to/tmpdir Specify a location to use for temporary" + echo " storage during the mpi injection" + echo " --noclean Do not remove the temporary directory and the host injected libraries after finishing injection" + echo " --force Force MPI injection even if it is already done" +} + + +parse_cmdline() { + while [[ $# -gt 0 ]]; do + case "$1" in + --help) + show_help + exit 0 + ;; + --mpi-path) + if [ -n "$2" ]; then + readonly MPI_PATH="$2" + shift 2 + else + echo_red "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + -t|--temp-dir) + if [ -n "$2" ]; then + readonly TEMP_DIR="$2" + shift 2 + else + echo_red "Error: Argument required for $1" + show_help + exit 1 + fi + ;; + --noclean) + CLEAN=false + shift 1 + ;; + --force) + FORCE=true + shift 1 + ;; + *) + echo_red "Error: Unknown option: $1" + show_help + exit 1 + ;; + esac + done + if [ -z "${MPI_PATH}" ]; then + echo_yellow "MPI path was not specified and it is required" + show_help + exit 0 + fi + + readonly CLEAN=${CLEAN:=true} + readonly FORCE=${FORCE:=false} +} + + +# ****Warning: patchelf v0.18.0 (currently shipped with EESSI) does not work.**** +# We get v0.17.2 +download_patchelf() { + # Temporary directory to save patchelf + local tmpdir=$1 + + local patchelf_version="0.17.2" + local url + local curl_opts="-L --silent --show-error --fail" + + url="https://github.com/NixOS/patchelf/releases/download/${patchelf_version}/" + url+="patchelf-${patchelf_version}-${EESSI_CPU_FAMILY}.tar.gz" + + local patchelf_path=${tmpdir}/patchelf + mkdir ${patchelf_path} + + curl ${url} ${curl_opts} -o ${patchelf_path}/patchelf.tar.gz + tar -xf ${patchelf_path}/patchelf.tar.gz -C ${patchelf_path} + PATCHELF_BIN=${patchelf_path}/bin/patchelf +} + +# Declaring this var here to use later on the main function +# This only applies to MPI=>5 +libcuda_exists=0 + +inject_mpi() { + # Temporary directory for injection + local tmpdir=$1 + + local eessi_ldd="${EESSI_EPREFIX}/usr/bin/ldd" + local system_ldd="/usr/bin/ldd" + + local host_injection_mpi_path + + host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} + host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" + + if [ -d ${host_injection_mpi_path} ]; then + if [ -n "$(ls -A ${host_injection_mpi_path})" ]; then + echo "MPI was already injected" + if ${FORCE}; then + echo "Forcing new MPI injection" + else + return 0 + fi + fi + fi + + mkdir -p ${host_injection_mpi_path} + + local temp_inject_path="${tmpdir}/mpi_inject" + mkdir ${temp_inject_path} + + # Get all library files from openmpi dir + find ${MPI_PATH} -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; + + # Copy library links to host injection path + find ${MPI_PATH} -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + # Get MPI libs dependencies from system ldd + local libname libpath pmixpath + local -A libs_dict + local -a dlopen_libs + + readarray -d '' dlopen_libs < <(find ${MPI_PATH} -mindepth 2 -name "*.so*") + + # Get all library names and paths in associative array + # If library is libfabric, libpmix, or from the MPI path + # modify libpath in assoc array to point to host_injection_mpi_path + while read -r libname libpath; do + + if [[ ${libname} =~ libfabric\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work + find ${libdir} -maxdepth 1 -type f -name "libfabric.so*" -exec cp {} ${temp_inject_path} \; + find ${libdir} -maxdepth 1 -type l -name "libfabric.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + local depname deppath + + while read -r depname deppath; do + libs_dict[${depname}]=${deppath} + done < <(${system_ldd} ${libpath} | awk '/=>/ {print $1, $3}' | sort | uniq) + + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + + if [[ ${libname} =~ libpmix\.so ]] && [[ ! -f ${temp_inject_path}/${libname} ]]; then + local libdir="$(dirname ${libpath})/" # without trailing slash the find does not work + [ -n "${PMIX_PATH}" ] && pmixpath="${PMIX_PATH}/pmix" || pmixpath="$(dirname ${libpath})/pmix" + find ${libdir} -maxdepth 1 -type f -name "libpmix.so*" -exec cp {} ${temp_inject_path} \; + find ${libdir} -maxdepth 1 -type l -name "libpmix.so*" -exec cp -P {} ${host_injection_mpi_path} \; + + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + + if [[ ${libpath} =~ ${MPI_PATH} ]]; then + libpath=${host_injection_mpi_path}/$(basename ${libpath}) + fi + + # Forcing libname to be libcuda, if not it will be "not", as the lib is not found + # This only applies to MPI=>5 + if [[ ${libname} =~ libcuda\.so ]]; then + export libcuda_exists=1 + libs_dict[${libname}]="libcuda.so" + else + libs_dict[${libname}]=${libpath} + fi + + done < <(cat <(find ${temp_inject_path} -maxdepth 1 -type f -name "*.so*" -exec ${system_ldd} {} \;) \ + <(for dlopen in ${dlopen_libs[@]}; do ${system_ldd} ${dlopen}; done) \ + | awk '/=>/ {print $1, $3}' | sort | uniq) + + # Do library injection to openmpi libs, libfabric and libpmix + local lib + while read -r lib; do + local dep + + # Force system libefa, librdmacm, libibverbs and libpsm2 (present in the EESSI compat layer) + # Must be done before the injection of unresolved dependencies + if [[ ${lib} =~ libfabric\.so ]]; then + while read -r dep; do + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + done < <(${system_ldd} ${lib} | awk '/libefa/ || /libibverbs/ || /libpsm2/ || /librdmacm/ {print $1}' | sort | uniq) + fi + + # Do injection of unresolved libraries + ${PATCHELF_BIN} --set-rpath "${host_injection_mpi_path}" ${lib} + while read -r dep; do + if ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --replace-needed ${dep} ${libs_dict[${dep}]} ${lib} + else + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} + fi + done < <(${eessi_ldd} ${lib} | awk '/not found/ {print $1}' | sort | uniq) + + # Inject into libmpi.so non resolved dependencies from dlopen libraries that are not already present in libmpi.so + if [[ ${lib} =~ libmpi\.so ]]; then + while read -r dep; do + if ! ${PATCHELF_BIN} --print-needed ${lib} | grep -q "${dep}"; then + ${PATCHELF_BIN} --add-needed ${libs_dict[${dep}]} ${lib} + fi + done < <(for dlopen in ${dlopen_libs[@]}; do ${eessi_ldd} ${dlopen}; done \ + | awk '/not found/ && !/libmpi\.so.*/ {print $1}' | sort | uniq) + fi + + done < <(find ${temp_inject_path} -type f) + + # Sanity check MPI injection + local sanity=1 + if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then + cp ${temp_inject_path}/* -t ${host_injection_mpi_path} + # libcuda.so will be always not found + # This only applies to MPI=>5 + if ${eessi_ldd} ${temp_inject_path}/* | grep "not found" | grep -v "libcuda.so" > /dev/null; then + ${CLEAN} && rm -f ${host_injection_mpi_path}/*.so* + else + sanity=0 + fi + fi + + return ${sanity} +} + + +main() { + parse_cmdline "$@" + check_eessi_initialised + + # Create directory linked by host_injections + local inject_dir=$(readlink -f /cvmfs/software.eessi.io/host_injections) + [[ ! -d ${inject_dir} ]] && mkdir -p ${inject_dir} + + # we need a directory we can use for temporary storage + if [[ -z "${TEMP_DIR}" ]]; then + tmpdir=$(mktemp -d) + else + tmpdir="${TEMP_DIR}"/temp + if ! mkdir -p "$tmpdir" ; then + fatal_error "Could not create directory ${tmpdir}" + fi + fi + + echo "Temporary directory for injection: ${tmpdir}" + + download_patchelf ${tmpdir} + + local host_injection_mpi_path + + host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} + host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" + + if inject_mpi ${tmpdir}; then + if [ $libcuda_exists -eq 0 ]; then + echo_green "MPI injection was successful" + # This only applies to MPI=>5 + elif [ $libcuda_exists -eq 1 ]; then + echo_yellow "The scripts could not find libcuda file, which may cause some issues, please copy it manually using the following command:" + echo "----------------------" + echo "cp /path/to/libcuda.so" ${host_injection_mpi_path} + echo "----------------------" + echo_green "MPI injection was successful" + fi + else + fatal_error "MPI host injection failed" + fi + + if ${CLEAN}; then + rm -rf "${tmpdir}" + fi +} + +main "$@" From 00bf9de312bfea91320b9a679762fd1749475a5d Mon Sep 17 00:00:00 2001 From: Arturo Gimeno Date: Tue, 26 Aug 2025 16:01:59 +0200 Subject: [PATCH 2/7] adding ci --- .github/workflows/mpi_injections.yml | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/mpi_injections.yml diff --git a/.github/workflows/mpi_injections.yml b/.github/workflows/mpi_injections.yml new file mode 100644 index 00000000..4d864370 --- /dev/null +++ b/.github/workflows/mpi_injections.yml @@ -0,0 +1,33 @@ +name: Test for mpi injection script +on: + pull_request: + workflow_dispatch: +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - uses: eessi/github-action-eessi@v3 + + - name: Install OpenMPI + run: | + module load EESSI-extend + eb --rebuild OpenMPI-4.1.6-GCC-13.2.0.eb + + - name: Inject OpenMPI libs + run: | + mkdir /opt/eessi + ./../scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $HOME/eessi/versions/2023.06/software/linux/$EESSI_SOFTWARE_SUBDIR/software/OpenMPI/4.1.6-GCC-13.2.0/ + + - name: Check with OSU + run: | + module load OSU-Micro-Benchmarks/7.2-gompi-2023b + mpirun -n 2 osu_latency + export output=$? + if [ $output -ne 0 ]; then + echo "MPI injection failed, check the logs for more detail" + else + echo "MPI injection succeded!" + fi \ No newline at end of file From 9f0b83f1d0aec8bfcee3af0ad7f1671034a667ce Mon Sep 17 00:00:00 2001 From: Arturo Gimeno Date: Tue, 26 Aug 2025 16:14:27 +0200 Subject: [PATCH 3/7] making sure injected libs are being picked --- .github/workflows/mpi_injections.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mpi_injections.yml b/.github/workflows/mpi_injections.yml index 4d864370..186f8ca8 100644 --- a/.github/workflows/mpi_injections.yml +++ b/.github/workflows/mpi_injections.yml @@ -24,6 +24,7 @@ jobs: - name: Check with OSU run: | module load OSU-Micro-Benchmarks/7.2-gompi-2023b + ldd $(which osu_latency) | grep host_injections mpirun -n 2 osu_latency export output=$? if [ $output -ne 0 ]; then From ffa761edfcc9b137965b25326c5a5bc0d588a399 Mon Sep 17 00:00:00 2001 From: agimenog <134053210+agimenog@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:55:58 +0200 Subject: [PATCH 4/7] update script path Co-authored-by: ocaisa --- .github/workflows/mpi_injections.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mpi_injections.yml b/.github/workflows/mpi_injections.yml index 186f8ca8..451615c2 100644 --- a/.github/workflows/mpi_injections.yml +++ b/.github/workflows/mpi_injections.yml @@ -19,7 +19,7 @@ jobs: - name: Inject OpenMPI libs run: | mkdir /opt/eessi - ./../scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $HOME/eessi/versions/2023.06/software/linux/$EESSI_SOFTWARE_SUBDIR/software/OpenMPI/4.1.6-GCC-13.2.0/ + ./scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $HOME/eessi/versions/2023.06/software/linux/$EESSI_SOFTWARE_SUBDIR/software/OpenMPI/4.1.6-GCC-13.2.0/ - name: Check with OSU run: | From 08547bb8aac7101d5292c7eacd47ad9d9b6b77f0 Mon Sep 17 00:00:00 2001 From: Arturo Gimeno Date: Tue, 26 Aug 2025 17:15:39 +0200 Subject: [PATCH 5/7] some fixes to faster the script --- .github/workflows/mpi_injections.yml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/mpi_injections.yml b/.github/workflows/mpi_injections.yml index 451615c2..bfe16fa0 100644 --- a/.github/workflows/mpi_injections.yml +++ b/.github/workflows/mpi_injections.yml @@ -10,21 +10,29 @@ jobs: steps: - uses: actions/checkout@v4 - uses: eessi/github-action-eessi@v3 - - - name: Install OpenMPI - run: | - module load EESSI-extend - eb --rebuild OpenMPI-4.1.6-GCC-13.2.0.eb - name: Inject OpenMPI libs run: | + # Needed dir for the injected libs mkdir /opt/eessi - ./scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $HOME/eessi/versions/2023.06/software/linux/$EESSI_SOFTWARE_SUBDIR/software/OpenMPI/4.1.6-GCC-13.2.0/ + + # Load last OpenMPI from 2023b and declaring var for injection path + module load OpenMPI/4.1.6-GCC-13.2.0 + export OPENMPI_TO_INJECT=$EBROOTOPENMPI + module purge + + # Inject script + ./scripts/mpi_support/install_openmpi_host_injection.sh --mpi-path $OPENMPI_TO_INJECT - name: Check with OSU run: | - module load OSU-Micro-Benchmarks/7.2-gompi-2023b + # Load OSU version that uses older OpenMPI + module load OSU-Micro-Benchmarks/7.1-1-gompi-2023a + + # Verify injected libs are being used ldd $(which osu_latency) | grep host_injections + + # Check it works correctly mpirun -n 2 osu_latency export output=$? if [ $output -ne 0 ]; then From 347bef1775984c6879f0fc0515be4ab6df99d29e Mon Sep 17 00:00:00 2001 From: Arturo Gimeno Date: Wed, 27 Aug 2025 11:24:56 +0200 Subject: [PATCH 6/7] adding timestamp when forcing mpi injections --- scripts/mpi_support/install_openmpi_host_injection.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index b5bf2727..665758ae 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -108,6 +108,7 @@ libcuda_exists=0 inject_mpi() { # Temporary directory for injection local tmpdir=$1 + local timestamp=$(date +%Y%m%d%H%M%S) local eessi_ldd="${EESSI_EPREFIX}/usr/bin/ldd" local system_ldd="/usr/bin/ldd" @@ -122,6 +123,8 @@ inject_mpi() { echo "MPI was already injected" if ${FORCE}; then echo "Forcing new MPI injection" + mv ${host_injection_mpi_path} ${host_injection_mpi_path}-${timestamp} + echo "Previous injection saved on" ${host_injection_mpi_path}-${timestamp} else return 0 fi From b162643b29e9f293960312b76036f7234c00ef0d Mon Sep 17 00:00:00 2001 From: majobenitez Date: Mon, 29 Sep 2025 16:10:58 +0200 Subject: [PATCH 7/7] Adding specific libraries to inject --- .../install_openmpi_host_injection.sh | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/scripts/mpi_support/install_openmpi_host_injection.sh b/scripts/mpi_support/install_openmpi_host_injection.sh index 665758ae..12c41344 100755 --- a/scripts/mpi_support/install_openmpi_host_injection.sh +++ b/scripts/mpi_support/install_openmpi_host_injection.sh @@ -115,6 +115,10 @@ inject_mpi() { local host_injection_mpi_path + # To avoid ldd warnings + exec 3>&2 + exec 2>/dev/null + host_injection_mpi_path=${EESSI_SOFTWARE_PATH/versions/host_injections} host_injection_mpi_path+="/rpath_overrides/OpenMPI/system/lib" @@ -136,11 +140,18 @@ inject_mpi() { local temp_inject_path="${tmpdir}/mpi_inject" mkdir ${temp_inject_path} - # Get all library files from openmpi dir - find ${MPI_PATH} -type f -name "*.so*" -exec cp {} ${temp_inject_path} \; + # Array for MPI libs + mpi_libs=("libmpi.so*" "libmpi_mpifh.so*" "libmpi_usempi_tkr.so*" \ + "libmpi_usempi_ignore_tkr.so*" "libmpi_usempif08.so*" \ + "libmpi_cxx.so*" "libmpi_java.so*" "liboshmem.so*" \ + "libmca*.so*" "mca_*.so*") + # Get all library files from MPI array + for lib in "${mpi_libs[@]}"; do + find ${MPI_PATH} -type f -name "$lib" -exec cp {} ${temp_inject_path} \; # Copy library links to host injection path - find ${MPI_PATH} -type l -name "*.so*" -exec cp -P {} ${host_injection_mpi_path} \; + find ${MPI_PATH} -type l -name "$lib" -exec cp -P {} ${host_injection_mpi_path} \; + done # Get MPI libs dependencies from system ldd local libname libpath pmixpath @@ -229,6 +240,8 @@ inject_mpi() { done < <(find ${temp_inject_path} -type f) + exec 2>&3 + # Sanity check MPI injection local sanity=1 if ${eessi_ldd} ${temp_inject_path}/* &> /dev/null; then