Segfault on ROCm 5.0
Following kernel produces segfault on AMDGPU. On CUDA it works though.
MWE:
using AMDGPU
using ROCKernels
using KernelAbstractions
Base.zeros(::ROCDevice, T, shape) = AMDGPU.zeros(T, shape)
linear_threads(::ROCDevice) = 512
@kernel function t!(x)
i = @index(Global, NTuple)
x[i[1]] += 1f0
end
function main()
device = ROCDevice()
x = zeros(device, Float32, 16);
wait(t!(device, linear_threads(device))(x; ndrange=(16, 4)))
end
main()
Details:
- Julia 1.8.0-rc1 debug build with LLVM assertions
-
]st:
[21141c5a] AMDGPU v0.3.7
[63c18a36] KernelAbstractions v0.8.2 `https://github.com/JuliaGPU/KernelAbstractions.jl.git#master`
[7eb9e9f0] ROCKernels v0.3.1 `https://github.com/JuliaGPU/KernelAbstractions.jl.git:lib/ROCKernels#master`
Error on debug build of Julia with LLVM assertions:
julia: /workspace/srcdir/llvm-project/llvm/lib/IR/Value.cpp:494: void llvm::Value::doRAUW(llvm::Value*, llvm::Value::ReplaceMetadataUses): Assertion `New->getType() == getType() && "replaceAllUses of value with new value of different type!"' failed.
signal (6): Aborted
in expression starting at /home/asmirnov/INGP.jl/src/a.jl:19
gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x7ffbde1b2728)
__assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
_ZN4llvm5Value6doRAUWEPS0_NS0_19ReplaceMetadataUsesE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN4llvm22BitcodeReaderValueList11assignValueEPNS_5ValueEj at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN12_GLOBAL__N_113BitcodeReader17parseFunctionBodyEPN4llvm8FunctionE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN12_GLOBAL__N_113BitcodeReader11materializeEPN4llvm11GlobalValueE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN12_GLOBAL__N_113BitcodeReader17materializeModuleEv at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN4llvm6Module14materializeAllEv at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN4llvm13BitcodeModule13getModuleImplERNS_11LLVMContextEbbbNS_12function_refIFNS_8OptionalINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS_9StringRefEEEE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
_ZN4llvm16parseBitcodeFileENS_15MemoryBufferRefERNS_11LLVMContextENS_12function_refIFNS_8OptionalINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS_9StringRefEEEE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
LLVMParseBitcodeInContext2 at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line)
LLVMParseBitcodeInContext2 at /home/asmirnov/.julia/packages/LLVM/WjSQG/lib/13/libLLVM_h.jl:5863 [inlined]
#parse#79 at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:6
parse##kw at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:3 [inlined]
#parse#80 at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:12 [inlined]
parse##kw at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:12 [inlined]
#99 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/rtlib.jl:168 [inlined]
#open#378 at ./io.jl:384
open at ./io.jl:381 [inlined]
#97 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/rtlib.jl:167
lock at ./lock.jl:185
unknown function (ip: 0x7ffba80c4081)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2358
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
#load_runtime#96 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:64
load_runtime##kw at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:62 [inlined]
macro expansion at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:217 [inlined]
#emit_llvm#115 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:64
unknown function (ip: 0x7ffba80abeaf)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2358
ijl_invoke at /home/asmirnov/julia/src/gf.c:2365
unknown function (ip: 0x7ffba80a0c29)
unknown function (ip: 0x7ffba80a0be9)
emit_llvm##kw at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:62 [inlined]
#268 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:368
JuliaContext at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:74
unknown function (ip: 0x7ffba80a0ddd)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2358
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
rocfunction_compile at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:367
cached_compilation at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/cache.jl:90
#rocfunction#265 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:358
rocfunction at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:348 [inlined]
macro expansion at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:233 [inlined]
#gpu_call#309 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:14
gpu_call##kw at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:11 [inlined]
#gpu_call#1 at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/device/execution.jl:72 [inlined]
gpu_call at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/device/execution.jl:41 [inlined]
fill! at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/host/construction.jl:14 [inlined]
zeros at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:386 [inlined]
zeros at /home/asmirnov/INGP.jl/src/a.jl:5 [inlined]
main at /home/asmirnov/INGP.jl/src/a.jl:16
unknown function (ip: 0x7ffba8069e6f)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2358
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
jl_apply at /home/asmirnov/julia/src/julia.h:1838
do_call at /home/asmirnov/julia/src/interpreter.c:126
eval_value at /home/asmirnov/julia/src/interpreter.c:215
eval_stmt_value at /home/asmirnov/julia/src/interpreter.c:166
eval_body at /home/asmirnov/julia/src/interpreter.c:594
jl_interpret_toplevel_thunk at /home/asmirnov/julia/src/interpreter.c:750
jl_toplevel_eval_flex at /home/asmirnov/julia/src/toplevel.c:906
jl_toplevel_eval_flex at /home/asmirnov/julia/src/toplevel.c:850
ijl_toplevel_eval at /home/asmirnov/julia/src/toplevel.c:915
ijl_toplevel_eval_in at /home/asmirnov/julia/src/toplevel.c:965
eval at ./boot.jl:368 [inlined]
include_string at ./loading.jl:1281
jl_fptr_args at /home/asmirnov/julia/src/gf.c:2119
_jl_invoke at /home/asmirnov/julia/src/gf.c:2339
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
_include at ./loading.jl:1341
include at ./Base.jl:422
jfptr_include_31490 at /home/asmirnov/julia/usr/lib/julia/sys-debug.so (unknown line)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2339
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
exec_options at ./client.jl:303
_start at ./client.jl:522
jfptr__start_52601 at /home/asmirnov/julia/usr/lib/julia/sys-debug.so (unknown line)
_jl_invoke at /home/asmirnov/julia/src/gf.c:2339
ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540
jl_apply at /home/asmirnov/julia/src/julia.h:1838
true_main at /home/asmirnov/julia/src/jlapi.c:567
jl_repl_entrypoint at /home/asmirnov/julia/src/jlapi.c:711
jl_load_repl at /home/asmirnov/julia/cli/loader_lib.c:271
main at /home/asmirnov/julia/cli/loader_exe.c:59
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
_start at /home/asmirnov/julia/julia (unknown line)
Allocations: 56832574 (Pool: 56815656; Big: 16918); GC: 51
Aborted (core dumped)
This is most likely due to a LLVM Version mismatch. You are using Julia with LLVM 13 and ROCM libs has been built with 14.X.
I have been wondering if we can build the rock device libs which is the only thing we really care about with multiple LLVM versions.
On Wed, Jun 15, 2022, 09:20 Anton Smirnov @.***> wrote:
Following kernel produces segfault on AMDGPU. On CUDA it works though.
MWE:
using AMDGPUusing ROCKernelsusing KernelAbstractions
Base.zeros(::ROCDevice, T, shape) = AMDGPU.zeros(T, shape) linear_threads(::ROCDevice) = 512 @kernel function t!(x) i = @index(Global, NTuple) x[i[1]] += 1f0end function main() device = ROCDevice() x = zeros(device, Float32, 16); wait(t!(device, linear_threads(device))(x; ndrange=(16, 4)))endmain()
Details:
- Julia 1.8.0-rc1 debug build with LLVM assertions
- ]st:
[21141c5a] AMDGPU v0.3.7 [63c18a36] KernelAbstractions v0.8.2
https://github.com/JuliaGPU/KernelAbstractions.jl.git#masterhttps://github.com/JuliaGPU/KernelAbstractions.jl.git#master [7eb9e9f0] ROCKernels v0.3.1https://github.com/JuliaGPU/KernelAbstractions.jl.git:lib/ROCKernels#masterhttps://github.com/JuliaGPU/KernelAbstractions.jl.git:lib/ROCKernels#masterError on debug build of Julia with LLVM assertions:
julia: /workspace/srcdir/llvm-project/llvm/lib/IR/Value.cpp:494: void llvm::Value::doRAUW(llvm::Value*, llvm::Value::ReplaceMetadataUses): Assertion `New->getType() == getType() && "replaceAllUses of value with new value of different type!"' failed.
signal (6): Aborted in expression starting at /home/asmirnov/INGP.jl/src/a.jl:19 gsignal at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) abort at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) unknown function (ip: 0x7ffbde1b2728) __assert_fail at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) _ZN4llvm5Value6doRAUWEPS0_NS0_19ReplaceMetadataUsesE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN4llvm22BitcodeReaderValueList11assignValueEPNS_5ValueEj at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN12_GLOBAL__N_113BitcodeReader17parseFunctionBodyEPN4llvm8FunctionE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN12_GLOBAL__N_113BitcodeReader11materializeEPN4llvm11GlobalValueE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN12_GLOBAL__N_113BitcodeReader17materializeModuleEv at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN4llvm6Module14materializeAllEv at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN4llvm13BitcodeModule13getModuleImplERNS_11LLVMContextEbbbNS_12function_refIFNS_8OptionalINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS_9StringRefEEEE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) _ZN4llvm16parseBitcodeFileENS_15MemoryBufferRefERNS_11LLVMContextENS_12function_refIFNS_8OptionalINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS_9StringRefEEEE at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) LLVMParseBitcodeInContext2 at /home/asmirnov/julia/usr/bin/../lib/libLLVM-13jl.so (unknown line) LLVMParseBitcodeInContext2 at /home/asmirnov/.julia/packages/LLVM/WjSQG/lib/13/libLLVM_h.jl:5863 [inlined] #parse#79 at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:6 parse##kw at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:3 [inlined] #parse#80 at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:12 [inlined] parse##kw at /home/asmirnov/.julia/packages/LLVM/WjSQG/src/bitcode.jl:12 [inlined] #99 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/rtlib.jl:168 [inlined] #open#378 at ./io.jl:384 open at ./io.jl:381 [inlined] #97 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/rtlib.jl:167 lock at ./lock.jl:185 unknown function (ip: 0x7ffba80c4081) _jl_invoke at /home/asmirnov/julia/src/gf.c:2358 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 #load_runtime#96 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:64 load_runtime##kw at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:62 [inlined] macro expansion at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:217 [inlined] #emit_llvm#115 at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:64 unknown function (ip: 0x7ffba80abeaf) _jl_invoke at /home/asmirnov/julia/src/gf.c:2358 ijl_invoke at /home/asmirnov/julia/src/gf.c:2365 unknown function (ip: 0x7ffba80a0c29) unknown function (ip: 0x7ffba80a0be9) emit_llvm##kw at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/utils.jl:62 [inlined] #268 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:368 JuliaContext at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/driver.jl:74 unknown function (ip: 0x7ffba80a0ddd) _jl_invoke at /home/asmirnov/julia/src/gf.c:2358 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 rocfunction_compile at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:367 cached_compilation at /home/asmirnov/.julia/packages/GPUCompiler/1FdJy/src/cache.jl:90 #rocfunction#265 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:358 rocfunction at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:348 [inlined] macro expansion at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/execution.jl:233 [inlined] #gpu_call#309 at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:14 gpu_call##kw at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:11 [inlined] #gpu_call#1 at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/device/execution.jl:72 [inlined] gpu_call at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/device/execution.jl:41 [inlined] fill! at /home/asmirnov/.julia/packages/GPUArrays/Zecv7/src/host/construction.jl:14 [inlined] zeros at /home/asmirnov/.julia/packages/AMDGPU/f6OQx/src/array.jl:386 [inlined] zeros at /home/asmirnov/INGP.jl/src/a.jl:5 [inlined] main at /home/asmirnov/INGP.jl/src/a.jl:16 unknown function (ip: 0x7ffba8069e6f) _jl_invoke at /home/asmirnov/julia/src/gf.c:2358 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 jl_apply at /home/asmirnov/julia/src/julia.h:1838 do_call at /home/asmirnov/julia/src/interpreter.c:126 eval_value at /home/asmirnov/julia/src/interpreter.c:215 eval_stmt_value at /home/asmirnov/julia/src/interpreter.c:166 eval_body at /home/asmirnov/julia/src/interpreter.c:594 jl_interpret_toplevel_thunk at /home/asmirnov/julia/src/interpreter.c:750 jl_toplevel_eval_flex at /home/asmirnov/julia/src/toplevel.c:906 jl_toplevel_eval_flex at /home/asmirnov/julia/src/toplevel.c:850 ijl_toplevel_eval at /home/asmirnov/julia/src/toplevel.c:915 ijl_toplevel_eval_in at /home/asmirnov/julia/src/toplevel.c:965 eval at ./boot.jl:368 [inlined] include_string at ./loading.jl:1281 jl_fptr_args at /home/asmirnov/julia/src/gf.c:2119 _jl_invoke at /home/asmirnov/julia/src/gf.c:2339 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 _include at ./loading.jl:1341 include at ./Base.jl:422 jfptr_include_31490 at /home/asmirnov/julia/usr/lib/julia/sys-debug.so (unknown line) _jl_invoke at /home/asmirnov/julia/src/gf.c:2339 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 exec_options at ./client.jl:303 _start at ./client.jl:522 jfptr__start_52601 at /home/asmirnov/julia/usr/lib/julia/sys-debug.so (unknown line) _jl_invoke at /home/asmirnov/julia/src/gf.c:2339 ijl_apply_generic at /home/asmirnov/julia/src/gf.c:2540 jl_apply at /home/asmirnov/julia/src/julia.h:1838 true_main at /home/asmirnov/julia/src/jlapi.c:567 jl_repl_entrypoint at /home/asmirnov/julia/src/jlapi.c:711 jl_load_repl at /home/asmirnov/julia/cli/loader_lib.c:271 main at /home/asmirnov/julia/cli/loader_exe.c:59 __libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line) _start at /home/asmirnov/julia/julia (unknown line) Allocations: 56832574 (Pool: 56815656; Big: 16918); GC: 51 Aborted (core dumped)
— Reply to this email directly, view it on GitHub https://github.com/JuliaGPU/AMDGPU.jl/issues/239, or unsubscribe https://github.com/notifications/unsubscribe-auth/AABDO2XRXG7AG2SAE7SVRBTVPHKDRANCNFSM5Y3JDPKQ . You are receiving this because you are subscribed to this thread.Message ID: @.***>
I have been wondering if we can build the rock device libs which is the only thing we really care about with multiple LLVM versions.
That'd be a nice to have indeed if possible.
This is likely not an LLVM version issue, this stems from our rewriting of alloca addrspaces from 0 to 5. We should be able to bypass this issue with https://github.com/JuliaLang/julia/pull/45544 and https://github.com/JuliaGPU/GPUCompiler.jl/pull/342.
I think we can close it now as the issue has been resolved:
- we now use correct ROCm & LLVM versions
- alloca issue was also fixed