LoopVectorization.jl icon indicating copy to clipboard operation
LoopVectorization.jl copied to clipboard

Incorrect indexing when not using zero-based arrays

Open chriselrod opened this issue 3 years ago • 0 comments

using LoopVectorization, Static
M,K,N = 3,4,5
A = view(fill(NaN,100,100), 11:10+M,11:10+K+1); A .= rand.();
B = view(fill(NaN,100,100), 11:10+N,11:10+K); B .= rand.();
C0 = view(zeros(100,100), 11:10+N, 11:10+M); C1 = deepcopy(C0);
function reference!(C,A,B)
  K = lastindex(C,static(2))
  for i_1 = indices((A, C), (1, 2))
      for i_0 = indices((B, C), (1, 1))
          begin
              Caccum = zero(eltype(C))
              for r_0 = axes(B, 2)
                  Caccum += A[i_1, r_0] * B[i_0, r_0]
              end
              C[i_0, i_1] = Caccum + ifelse(i_0 == firstindex(C, static(1)), A[i_1, K], zero(Caccum))
          end
      end
  end
end
function turbo_fail!(C,A,B)
  K = lastindex(C,static(2))
  @turbo for i_1 = indices((A, C), (1, 2))
      for i_0 = indices((B, C), (1, 1))
          begin
              Caccum = zero(eltype(C))
              for r_0 = axes(B, 2)
                  Caccum += A[i_1, r_0] * B[i_0, r_0]
              end
              C[i_0, i_1] = Caccum + ifelse(i_0 == firstindex(C, static(1)), A[i_1, K], zero(Caccum))
          end
      end
  end
end
reference!(C0,A,B); C0
turbo_fail!(C1,A,B); C1
C0 ≈ C1
C0 ≈ view(parent(C1), C1.indices[1] .+ 1, C1.indices[2])
using StrideArraysCore: PtrArray, zeroindex
turbo_fail!(zeroindex(PtrArray(C1)),zeroindex(PtrArray(A)),zeroindex(PtrArray(B))); C1
C0 ≈ C1

I get

julia> reference!(C0,A,B); C0
5×3 view(::Matrix{Float64}, 11:15, 11:13) with eltype Float64:
 1.4703    1.87281  1.27726
 0.870971  1.35742  0.975715
 0.395836  1.33265  0.968053
 0.219909  1.592    0.970622
 0.719743  2.00412  1.42784

julia> turbo_fail!(C1,A,B); C1
5×3 view(::Matrix{Float64}, 11:15, 11:13) with eltype Float64:
 0.0       0.0      0.0
 1.4703    1.87281  1.27726
 0.870971  1.35742  0.975715
 0.395836  1.33265  0.968053
 0.219909  1.592    0.970622

julia> C0 ≈ C1
false

julia> C0 ≈ view(parent(C1), C1.indices[1] .+ 1, C1.indices[2])
true

julia> using StrideArraysCore: PtrArray, zeroindex

julia> turbo_fail!(zeroindex(PtrArray(C1)),zeroindex(PtrArray(A)),zeroindex(PtrArray(B))); C1
5×3 view(::Matrix{Float64}, 11:15, 11:13) with eltype Float64:
 1.4703    1.87281  1.27726
 0.870971  1.35742  0.975715
 0.395836  1.33265  0.968053
 0.219909  1.592    0.970622
 0.719743  2.00412  1.42784

julia> C0 ≈ C1
true

So with 1-based indexing, it is storing C at an offset of 1 in the dimension where we're using the index. That is, LV converts C to use 0-based indexing, but uses 1-based indexing for i_0 because i_0 is used in an expression.

chriselrod avatar Jun 09 '22 21:06 chriselrod