Metal.jl icon indicating copy to clipboard operation
Metal.jl copied to clipboard

Validation-related back-end crash on macOS Ventura

Open maleadt opened this issue 3 years ago • 0 comments

The following IR, reduced from our test suite, fails under MTL_SHADER_VALIDATOR=1 on macOS Ventura:

; ModuleID = 'broken.ll'
source_filename = "text"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "air64-apple-macosx13.0.0"

define void @kernel_function({ i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0, float addrspace(1)* %1, { { i8 addrspace(1)*, [1 x i64] }, i8, i8 } addrspace(1)* %2, i64 addrspace(1)* %3, i64 addrspace(1)* %4, i32 %thread_position_in_threadgroup, i32 %threadgroup_position_in_grid, i32 %threads_per_threadgroup, i32 %threadgroups_per_grid) local_unnamed_addr {
conversion:
  %5 = bitcast { i8 addrspace(1)*, [1 x i64] } addrspace(1)* %0 to [2 x float] addrspace(1)* addrspace(1)*
  %.unpack913 = load [2 x float] addrspace(1)*, [2 x float] addrspace(1)* addrspace(1)* %5, align 8
  %6 = load { { i8 addrspace(1)*, [1 x i64] }, i8, i8 }, { { i8 addrspace(1)*, [1 x i64] }, i8, i8 } addrspace(1)* %2, align 8
  %.fca.0.0.extract = extractvalue { { i8 addrspace(1)*, [1 x i64] }, i8, i8 } %6, 0, 0
  %7 = alloca [2 x float], align 16
  %.sub = bitcast [2 x float]* %7 to i8*
  %8 = bitcast i8 addrspace(1)* %.fca.0.0.extract to [2 x float] addrspace(1)*
  %.elt = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* %8, i64 undef, i64 0
  %.unpack = load float, float addrspace(1)* %.elt, align 4
  %.repack = getelementptr inbounds [2 x float], [2 x float]* %7, i64 0, i64 0
  store float %.unpack, float* %.repack, align 16
  %9 = getelementptr i8, i8* %.sub, i64 undef
  %10 = bitcast i8* %9 to float*
  %11 = load float, float* %10, align 4
  %.repack11 = getelementptr inbounds [2 x float], [2 x float] addrspace(1)* %.unpack913, i64 undef, i64 0
  store float %11, float addrspace(1)* %.repack11, align 4
  ret void
}

attributes #0 = { cold noreturn nounwind }

!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
!air.kernel = !{!10}
!llvm.ident = !{!22}
!air.version = !{!23}
!air.language_version = !{!24}

!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 7, !"air.max_device_buffers", i32 31}
!3 = !{i32 7, !"air.max_constant_buffers", i32 31}
!4 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!5 = !{i32 7, !"air.max_textures", i32 128}
!6 = !{i32 7, !"air.max_read_write_textures", i32 8}
!7 = !{i32 7, !"air.max_samplers", i32 16}
!8 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 0]}
!10 = !{void ({ i8 addrspace(1)*, [1 x i64] } addrspace(1)*, float addrspace(1)*, { { i8 addrspace(1)*, [1 x i64] }, i8, i8 } addrspace(1)*, i64 addrspace(1)*, i64 addrspace(1)*, i32, i32, i32, i32)* @kernel_function, !11, !12}
!11 = !{}
!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21}
!13 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 16, !"air.arg_type_align_size", i32 8}
!14 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 4, !"air.arg_type_align_size", i32 8}
!15 = !{i32 2, !"air.buffer", !"air.location_index", i32 2, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 24, !"air.arg_type_align_size", i32 8}
!16 = !{i32 3, !"air.buffer", !"air.location_index", i32 3, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 8, !"air.arg_type_align_size", i32 8}
!17 = !{i32 4, !"air.buffer", !"air.location_index", i32 4, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 8, !"air.arg_type_align_size", i32 8}
!18 = !{i32 5, !"air.thread_position_in_threadgroup", !"air.arg_type_name", !"uint"}
!19 = !{i32 6, !"air.threadgroup_position_in_grid", !"air.arg_type_name", !"uint"}
!20 = !{i32 7, !"air.threads_per_threadgroup", !"air.arg_type_name", !"uint"}
!21 = !{i32 8, !"air.threadgroups_per_grid", !"air.arg_type_name", !"uint"}
!22 = !{!"Apple metal version 31001.322 (metalfe-31001.322.1)"}
!23 = !{i32 2, i32 4, i32 0}
!24 = !{!"Metal", i32 2, i32 4, i32 0}

Looks like a back-end issue; from log stream:

LLVM ERROR: unable to legalize instruction: %244:_(p0) = 141 %243:_, 4
Context:
%244:_(p0) = 141 %243:_, 4
%243:_(p0) = 66 %493:_(s64)
%493:_(s64) = 101 %492:_(s32)
%492:_(s32) = 35 %490:_, %495:_
%490:_(s32) = 94 %489:_(s64)
%495:_(s32) = 95 i32 8
%489:_(s64) = 65 %242:_(p0)
%242:_(p0) = 15 $noreg
 (in function: agc.main)

maleadt avatar Jun 23 '22 07:06 maleadt