xsimd icon indicating copy to clipboard operation
xsimd copied to clipboard

Invalid CodeGen with clang-cl (From visual studio 2019) in release builds (64 bits, ssse3)

Open abique opened this issue 5 years ago • 1 comments

Hi,

I've found that the following code snippet does not produce correct CodeGen with clang-cl.

   xsimd::batch<float, 4> e{ 0, 1.f, 2.f, 3.f };
   auto f = xsimd::exp(e);
   for (int i = 0; i < 4; ++i)
      CATCH_CHECK(f[i] == Approx(std::exp(e[i])));

Here is the output result:

Additional Info:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ClangVectorsTests.cpp line 575: Failed
  CATCH_CHECK( f[i] == Approx(std::exp(e[i])) )
with expansion:
  CATCH_CHECK( 2.0f == Approx( 2.7182817459 ) )

ClangVectorsTests.cpp line 575: Failed
  CATCH_CHECK( f[i] == Approx(std::exp(e[i])) )
with expansion:
  CATCH_CHECK( 4.0f == Approx( 7.3890562057 ) )

ClangVectorsTests.cpp line 575: Failed
  CATCH_CHECK( f[i] == Approx(std::exp(e[i])) )
with expansion:
  CATCH_CHECK( 16.0f == Approx( 20.0855369568 ) )

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

After looking at the output, it seems that it is computing pow(2, x); instead of exp(x);

Here is the assembly code:

   xsimd::batch<float, 4> e{ 0, 1.f, 2.f, 3.f };
0000000140021982  movaps      xmm0,xmmword ptr [__xmm@40400000400000003f80000000000000 (014019C700h)]  
0000000140021989  movaps      xmmword ptr [e],xmm0  
000000014002198D  movaps      xmm0,xmmword ptr [rbp+70h]  
   auto f = xsimd::exp(e);
0000000140021991  movaps      xmmword ptr [f],xmm0  
0000000140021995  xor         esi,esi  
0000000140021997  jmp         ____C_A_T_C_H____T_E_S_T____78+3C5h (01400219B5h)  
0000000140021999  nop         dword ptr [rax]  
   for (int i = 0; i < 4; ++i)
00000001400219A0  mov         rsi,qword ptr [rbp+1B8h]  
00000001400219A7  add         rsi,4  
00000001400219AB  cmp         rsi,10h  
00000001400219AF  je          ____C_A_T_C_H____T_E_S_T____78+524h (0140021B14h)  
      CATCH_CHECK(f[i] == Approx(std::exp(e[i])));
00000001400219B5  lea         rdi,[rbp-30h]  
00000001400219B9  mov         rcx,rdi  
00000001400219BC  lea         rdx,[string "f[i] == Approx(std::exp(e[i]))" (01401C20C2h)]  
00000001400219C3  call        Catch::StringRef::StringRef (01400B2ED0h)  
00000001400219C8  lea         rax,[string "C:\\dev\\dev1\\base-cpp\\tests\\src\\m"... (01401BE058h)]  
00000001400219CF  mov         qword ptr [rbp+120h],rax  
00000001400219D6  mov         qword ptr [rbp+128h],23Fh  
00000001400219E1  lea         rax,[string "CATCH_CHECK" (01401BF818h)]  
00000001400219E8  mov         qword ptr [rbp+0B0h],rax  
00000001400219EF  mov         qword ptr [rbp+0B8h],0Bh  
00000001400219FA  mov         dword ptr [rsp+20h],2  
0000000140021A02  lea         rcx,[catchAssertionHandler]  
0000000140021A09  lea         rdx,[rbp+0B0h]  
0000000140021A10  lea         r8,[rbp+120h]  
0000000140021A17  mov         r9,rdi  
0000000140021A1A  call        Catch::AssertionHandler::AssertionHandler (014008C460h)  
0000000140021A1F  mov         qword ptr [rbp+1B8h],rsi  
0000000140021A26  movss       xmm0,dword ptr e[rsi]  
0000000140021A2C  call        expf (0140171C60h)  
0000000140021A31  xorps       xmm1,xmm1  
0000000140021A34  cvtss2sd    xmm1,xmm0  
0000000140021A38  lea         rcx,[rbp+0C0h]  
0000000140021A3F  call        Catch::Detail::Approx::Approx (014008B9D0h)  
0000000140021A44  mov         rax,qword ptr [rbp+1B8h]  
0000000140021A4B  lea         rsi,[rax+rbp]  
0000000140021A4F  add         rsi,30h  
0000000140021A53  movss       xmm0,dword ptr [rsi]  
0000000140021A57  xorps       xmm1,xmm1  
0000000140021A5A  cvtss2sd    xmm1,xmm0  
0000000140021A5E  lea         rcx,[rbp+0C0h]  
0000000140021A65  call        Catch::Detail::Approx::equalityComparisonImpl (014008BEC0h)  
0000000140021A6A  mov         bl,al  
0000000140021A6C  lea         rcx,[rbp+120h]  
0000000140021A73  lea         rdx,[string "==" (01401BFAA9h)]  
0000000140021A7A  call        Catch::StringRef::StringRef (01400B2ED0h)  
0000000140021A7F  mov         byte ptr [rbp+138h],1  
0000000140021A86  mov         byte ptr [rbp+139h],bl  
0000000140021A8C  lea         rax,[Catch::BinaryExpr<float const & __ptr64,Catch::Detail::Approx const & __ptr64>::`vftable' (014019CC48h)]  
0000000140021A93  mov         qword ptr [rbp+130h],rax  
0000000140021A9A  mov         qword ptr [rbp+140h],rsi  
0000000140021AA1  movups      xmm0,xmmword ptr [rbp+120h]  
0000000140021AA8  lea         rax,[rbp+148h]  
0000000140021AAF  movups      xmmword ptr [rax],xmm0  
0000000140021AB2  lea         rax,[rbp+0C0h]  
0000000140021AB9  mov         qword ptr [rbp+158h],rax  
0000000140021AC0  lea         rcx,[catchAssertionHandler]  
0000000140021AC7  lea         rdx,[rbp+130h]  
0000000140021ACE  call        Catch::AssertionHandler::handleExpr (014008C650h)  
0000000140021AD3  lea         rcx,[rbp+130h]  
0000000140021ADA  call        Catch::ITransientExpression::~ITransientExpression (014008D9A0h)  
0000000140021ADF  lea         rcx,[catchAssertionHandler]  
0000000140021AE6  call        Catch::AssertionHandler::complete (014008C700h)  
0000000140021AEB  cmp         byte ptr [rbp+1A2h],0  
0000000140021AF2  jne         ____C_A_T_C_H____T_E_S_T____78+3B0h (01400219A0h)  
0000000140021AF8  mov         rcx,qword ptr [rbp+1A8h]  
0000000140021AFF  mov         rax,qword ptr [rcx]  
0000000140021B02  lea         rdx,[catchAssertionHandler]  
0000000140021B09  call        qword ptr [rax+88h]  
0000000140021B0F  jmp         ____C_A_T_C_H____T_E_S_T____78+3B0h (01400219A0h)  

Here is the compiler flags (stripped from include paths):

ClCompile:
  All outputs are up-to-date.
  C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\Llvm\x64\bin\clang-cl.exe /c /Z7 /nologo /W3 /WX- /diagnostics:column /Ox /Ob2 /Oi /Ot /O
  y /D WIN32 /D _WINDOWS /D _USE_MATH_DEFINES=1 /D SMTG_RENAME_ASSERT=1 /D RELEASE=1 /D PP_RELEASE=1 /D NDEBUG=1 /D PP_MACHINE_LITTLE_ENDIAN=1 /D PP_PLATFORM_WINDOWS=1 /D LINK_PLATFORM_WINDOWS=1 /D NOMINMAX /D NOCOMM /D NOMCX /D WIN32_LEAN_AND_MEAN /D WINVER=0x0601 /D _WIN32_WINNT
  =0x0601 /D _UNICODE /D UNICODE /D PP_MACHINE_64_BIT=1 /D PP_COMPILER_MSVC /D _CRT_SECURE_NO_WARNINGS /D PP_COMPILER_CLANG_CL /D PP_MACHINE_INTEL=1 /D PP_SSE=1 /D PP_VECSZ=4 /D PP_ALIGN=32 /D "CMAKE_INTDIR=\"Release\"" /D _UNICODE /D UNICODE /EHsc /MT /GS /fp:fast /GR /std:c++17
  /Fo"base-tests.dir\Release\\" /Gd /TP -m64   /clang:-fno-math-errno /clang:-ffp-model=fast /clang:-ffp-exception-behavior=ignore /clang:-ffp-contract=fast /clang:-fno-lto /clang:-mssse3 /clang:-mfpmath=sse "C:\dev\dev1\base-cpp\tests\src\main\cpp\tests\SocketTests.cpp"

Can you reproduce the issue? Is there a chance that you program appveyor/trevor to check windows/clang-cl? Many thanks.

Regards, Alexandre

abique avatar Sep 16 '20 10:09 abique

I cross-link this issue with a corresponding one in LLVM bug tracker. It could be an LLVM issue... https://bugs.llvm.org/show_bug.cgi?id=47547

abique avatar Sep 16 '20 13:09 abique

I'm unable to reproduce the issue on Linux with clang. clang and clang-cl share the same codebase so I'd think the bug is fixed, but cannot tell if it was an xsimd issue or not.

serge-sans-paille avatar Apr 19 '23 22:04 serge-sans-paille