GEOS icon indicating copy to clipboard operation
GEOS copied to clipboard

[Bug] Failed compilation w/ Cuda 11.5 in `benchmarkOuterProduct` from LvArray on Sherlock

Open jafranc opened this issue 3 years ago • 3 comments

Describe the bug Compilation error while using compiling cuda-on version of GEOSX in Sherlock docker replica.

related to #2010 and https://github.com/GEOSX/thirdPartyLibs/pull/193

To Reproduce

  1. Pull docker image https://hub.docker.com/r/geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda11.5.0-openblas0.3.10-zlib1.2.11
  2. Try compiling using that config file for GEOSX, setting GEOSX_TPL_DIR to the right position
set(CONFIG_NAME "sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.5.0-sm80" CACHE PATH "")

set(GCC_ROOT "/share/software/user/open/gcc/10.1.0" CACHE PATH "")
set(MPI_ROOT "/share/software/user/open/openmpi/4.1.2" CACHE PATH "")
set(CUDA_HOME "/share/software/user/open/cuda/11.5.0" CACHE PATH "")
set(CMAKE_CUDA_ARCHITECTURES "80" CACHE STRING "")
set(CUDA_ARCH "sm_80" CACHE STRING "")
set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_HOME}" CACHE STRING "")

site_name(HOST_NAME)

# Compilers
set(CMAKE_C_COMPILER       "${GCC_ROOT}/bin/gcc"      CACHE PATH "")
set(CMAKE_CXX_COMPILER     "${GCC_ROOT}/bin/g++"      CACHE PATH "")
set(CMAKE_Fortran_COMPILER "${GCC_ROOT}/bin/gfortran" CACHE PATH "")

# OpenMP options
set(ENABLE_OPENMP OFF CACHE BOOL "")
set(MPI_C_COMPILER       "${MPI_ROOT}/bin/mpicc"   CACHE PATH "")
set(MPI_CXX_COMPILER     "${MPI_ROOT}/bin/mpic++"  CACHE PATH "")
set(MPI_Fortran_COMPILER "${MPI_ROOT}/bin/mpifort" CACHE PATH "")
set(MPIEXEC              "${MPI_ROOT}/bin/mpirun"  CACHE PATH "")
# MPI options
set(ENABLE_MPI ON CACHE PATH "" FORCE)

set(MPIEXEC_NUMPROC_FLAG "-n" CACHE STRING "")
set(ENABLE_WRAP_ALL_TESTS_WITH_MPIEXEC ON CACHE BOOL "")

# CUDA options
#set(ENABLE_CUDA OFF)
set(ENABLE_CUDA ON CACHE BOOL "" FORCE)
set(ENABLE_HYPRE_CUDA ON CACHE BOOL "" FORCE)

message("-- CUDA ARCH ..." ${CUDA_ARCH})
set(CMAKE_CUDA_HOST_COMPILER ${MPI_CXX_COMPILER} CACHE STRING "")
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "")
set(CMAKE_CUDA_STANDARD 14 CACHE STRING "")
set(CMAKE_CUDA_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr -Werror cross-execution-space-call,reorder,deprecated-declarations " CACHE STRING "")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3" CACHE STRING "")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "")
set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "")

set(ENABLE_VALGRIND OFF CACHE BOOL "")
set(ENABLE_CALIPER ON CACHE BOOL "")

set(ENABLE_PETSC OFF CACHE BOOL "")
set(ENABLE_TRILINOS OFF CACHE BOOL "")
set(GEOSX_LA_INTERFACE "Hypre" CACHE STRING "")

# Blas/Lapack options
set(BLAS_LIBRARIES "/share/software/user/open/openblas/0.3.10/lib/libopenblas.so" CACHE STRING "")
set(LAPACK_LIBRARIES "/share/software/user/open/openblas/0.3.10/lib/libopenblas.so" CACHE STRING "")

include(/home/groups/tchelepi/geosx/GEOSX/host-configs/tpls.cmake)
  1. Try make geosx to confirm that geosx target is fine
  2. Try make to confirm error in BenchmarkOuterProduct such as
/home/groups/tchelepi/geosx-sherlock/GPU/GEOSX/src/coreComponents/LvArray/benchmarks/benchmarkOuterProductKernels.hpp(98): error: template instantiation resulted in unexpected function type of 
"RAJA::internal::ViewBase<LvArray::benchmarking::VALUE_TYPE, LvArray::benchmarking::VALUE_TYPE *, RAJA::detail::LayoutBase_impl<camp::int_seq<camp::idx_t, 0L>, LvArray::benchmarking::INDEX_TYPE, 0L>> (const LvArray::A
rray<LvArray::benchmarking::VALUE_TYPE, 1, RAJA::PERM_I, LvArray::benchmarking::INDEX_TYPE, LvArray::testing::DEFAULT_BUFFER> &)" (the meaning of a name may have changed since the template declaration -- the type of the template is "LvArray::benchmarking::RajaView<T, PERMUTATION> (const LvArray::benchmarking::ArrayT<T, PERMUTATION> &)") detected during:
            instantiation of "LvArray::benchmarking::makeRajaView" based on template arguments <LvArray::benchmarking::VALUE_TYPE, RAJA::PERM_I> 
(98): here
            instantiation of "void LvArray::benchmarking::RAJAViewNative<PERMUTATION>(benchmark::State &) [with PERMUTATION=RAJA::PERM_IJ]" 
/home/groups/tchelepi/geosx-sherlock/GPU/GEOSX/src/coreComponents/LvArray/benchmarks/benchmarkOuterProduct.cpp(145): here

/home/groups/tchelepi/geosx-sherlock/GPU/GEOSX/src/coreComponents/LvArray/benchmarks/benchmarkOuterProductKernels.hpp(98): error: more than one conversion function from "const LvArray::Array<LvArray::benchmarking::VALUE_TYPE, 1, RAJA::PERM_I, LvArray::benchmarking::INDEX_TYPE, LvArray::testing::DEFAULT_BUFFER>" to "<error-type>" applies:
            function "LvArray::Array<T, NDIM, PERMUTATION, INDEX_TYPE, BUFFER_TYPE>::operator LvArray::ArrayView<const T, NDIM, LvArray::ArrayView<T, NDIM, <expression>, INDEX_TYPE, BUFFER_TYPE>::USD, INDEX_TYPE, BUFFER_TYPE>() const & [with T=LvArray::benchmarking::VALUE_TYPE, NDIM=1, PERMUTATION=RAJA::PERM_I, INDEX_TYPE=LvArray::benchmarking::INDEX_TYPE, BUFFER_TYPE=LvArray::testing::DEFAULT_BUFFER]"
/home/groups/tchelepi/geosx-sherlock/GPU/GEOSX/src/coreComponents/LvArray/benchmarks/../src/Array.hpp(268): here        function "LvArray::ArrayView<T, NDIM_TPARAM, USD_TPARAM, INDEX_TYPE, BUFFER_TYPE>::operator LvArray::ArraySlice<T, LvArray::ArrayView<T, NDIM_TPARAM, USD_TPARAM, INDEX_TYPE, BUFFER_TYPE>::NDIM,LvArray::ArrayView<T, NDIM_TPARAM, USD_TPARAM, INDEX_TYPE, BUFFER_TYPE>::USD, INDEX_TYPE>() const & [with T=LvArray::benchmarking::VALUE_TYPE, NDIM_TPARAM=1, USD_TPARAM=0, INDEX_TYPE=LvArray::benchmarking
::INDEX_TYPE, BUFFER_TYPE=LvArray::testing::DEFAULT_BUFFER]" .

Platform (please complete the following information):

  • Machine Docker image replica Sherlock cluster config

  • Compiler and main libs :

  • gcc 10.3.0

  • openmpi4.1.2

  • cuda11.5.0

  • openblas0.3.10

  • zlib1.2.11

  • GEOSX Version 68a30d21e7997dcde5c1ca0c4ebc7d0bc928f3ef

jafranc avatar Sep 10 '22 00:09 jafranc

Do you confirm it also breaks on Sherlock?

TotoGaz avatar Sep 10 '22 04:09 TotoGaz

I confirm it breaks for both @aguitton and myself

jafranc avatar Sep 10 '22 07:09 jafranc

I confirm it breaks for both @aguitton and myself

OK; maybe remove the dockerized from the title then?

TotoGaz avatar Sep 10 '22 17:09 TotoGaz

@jafranc can it be closed?

paveltomin avatar Sep 20 '23 23:09 paveltomin