diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index 4fd979ce..ec22ee22 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -181,6 +181,12 @@ accelpath() { nvidia_smi_out=$(mktemp -p /tmp nvidia_smi_out.XXXXX) nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader 2>&1 > $nvidia_smi_out if [[ $? -eq 0 ]]; then + if grep -q "Failed to initialize NVML: Driver/library version mismatch" $nvidia_smi_out; then + log "ERROR" "accelpath: nvidia-smi command failed with 'Failed to initialize NVML: Driver/library version mismatch'" + rm -f $nvidia_smi_out + exit 4 + fi + nvidia_smi_info=$(head -1 $nvidia_smi_out) cuda_cc=$(echo $nvidia_smi_info | sed 's/, /,/g' | cut -f4 -d, | sed 's/\.//g') log "DEBUG" "accelpath: CUDA compute capability '${cuda_cc}' derived from nvidia-smi output '${nvidia_smi_info}'"