Addition/broadcasting type unstable with mixed block range types
Allocations increase drastically with newer versions of BandedMatrices.jl, BlockbandedMatrices.jl, LazyArrays.jl and LazyBandedMatrices.jl
I do not know where the problem is.
$ julia-15 mwe.jl 20.566046 seconds (1.11 M allocations: 65.356 MiB, 0.16% gc time)
$ julia-16 mwe.jl 135.413411 seconds (1.11 G allocations: 95.863 GiB, 6.35% gc time, 3.05% compilation time)
$ julia-16 mwe.jl #(with package versions as in 1.5.3) 18.151103 seconds (2.76 M allocations: 143.134 MiB, 0.74% gc time, 18.48% compilation time)
OS: macOS Big Sur
Setup: Julia 1.5.3 [aae01518] BandedMatrices v0.15.15 [ffab5731] BlockBandedMatrices v0.8.11 [5078a376] LazyArrays v0.16.16 [d7e5e226] LazyBandedMatrices v0.2.12 Julia 1.6.1 [aae01518] BandedMatrices v0.16.8 [ffab5731] BlockBandedMatrices v0.10.4 [5078a376] LazyArrays v0.21.3 [d7e5e226] LazyBandedMatrices v0.5.5
@code_warntype 1.5.3 Variables #self#::Core.Compiler.Const(toeplitz, false) n::Int64 vc::Array{Array{Int64,2},1} vr::Array{Array{Int64,2},1} s::Int64 u::Int64 l::Int64 Tn::BlockSkylineMatrix{Int64,Array{Int64,1},BlockBandedMatrices.BlockSkylineSizes{Tuple{BlockArrays.BlockedUnitRange{Array{Int64,1}},BlockArrays.BlockedUnitRange{Array{Int64,1}}},Fill{Int64,1,Tuple{Base.OneTo{Int64}}},Fill{Int64,1,Tuple{Base.OneTo{Int64}}},BandedMatrix{Int64,Array{Int64,2},Base.OneTo{Int64}},Array{Int64,1}}} @_9::Union{Nothing, Tuple{Int64,Int64}} @_10::Union{Nothing, Tuple{Int64,Int64}} ii::Int64 jj::Int64
@code_warntype 1.6.1 Variables #self#::Core.Const(toeplitz) n::Int64 vc::Vector{Matrix{Int64}} vr::Vector{Matrix{Int64}} @_5::Union{Nothing, Tuple{Int64, Int64}} @_6::Union{Nothing, Tuple{Int64, Int64}} Tn::Any l::Int64 u::Int64 s::Int64 ii::Int64 jj::Int64
@code_warntype 1.6.1 (with package versions as in 1.5.3) Variables #self#::Core.Const(toeplitz) n::Int64 vc::Vector{Matrix{Int64}} vr::Vector{Matrix{Int64}} @_5::Union{Nothing, Tuple{Int64, Int64}} @_6::Union{Nothing, Tuple{Int64, Int64}} Tn::BlockBandedMatrix{Int64} l::Int64 u::Int64 s::Int64 ii::Int64 jj::Int64
mwe.jl:
using LinearAlgebra
using BandedMatrices
using BlockBandedMatrices
using LazyArrays
using LazyBandedMatrices
function toeplitz(n :: Integer, vc :: Array{T,1}, vr :: Array{T,1}) where T
s = size(vc[1],2)
u = size(vr)[1] - 1
l = size(vc)[1] - 1
Tn = BlockBandedMatrix(convert.(eltype(T),Zeros(n*s,n*s)), s*ones(Int64,n),s*ones(Int64,n), (l,u))
for ii = 1:length(vc)
Tn = Tn + BandedBlockBandedMatrix(Kron(BandedMatrix(-ii+1=>ones(eltype(T),n-ii+1)),vc[ii]))
end
for jj = 2:length(vr)
Tn = Tn + BandedBlockBandedMatrix(Kron(BandedMatrix( jj-1=>ones(eltype(T),n-jj+1)),vr[jj]))
end
return Tn
end
@time toeplitz(5000,[[ 1 2;3 4]], [[ 1 2;3 4], [9 10; 11 12]]);
Here's a better MWE:
julia> A = BlockBandedMatrix(randn(6,6),1:3,1:3,(1,1));
julia> B = BlockBandedMatrices._BandedBlockBandedMatrix(PseudoBlockArray(randn(3,6), (blockedrange(Fill(3,1)),Base.OneTo(6))), Base.OneTo(6), (0,0), (1,1));
julia> @code_warntype A+B
Variables
#self#::Core.Const(+)
A::BlockBandedMatrix{Float64}
B::BandedBlockBandedMatrix{Float64, PseudoBlockMatrix{Float64, Matrix{Float64}, Tuple{BlockedUnitRange{StepRange{Int64, Int64}}, Base.OneTo{Int64}}}, Base.OneTo{Int64}}
Body::BlockSkylineMatrix
1 ─ Base.promote_shape(A, B)
│ %2 = Base.broadcast_preserving_zero_d(Base.:+, A, B)::BlockSkylineMatrix
└── return %2
Note the real cause is now we insist on using BlockKron to have "good" block structure. So try this:
function toeplitz(n :: Integer, vc :: Array{T,1}, vr :: Array{T,1}) where T
s = size(vc[1],2)
u = size(vr)[1] - 1
l = size(vc)[1] - 1
Tn = BlockBandedMatrix(convert.(eltype(T),Zeros(n*s,n*s)), s*ones(Int64,n),s*ones(Int64,n), (l,u))
for ii = 1:length(vc)
Tn = Tn + BandedBlockBandedMatrix(BlockKron(BandedMatrix(-ii+1=>ones(eltype(T),n-ii+1)),vc[ii]))
end
for jj = 2:length(vr)
Tn = Tn + BandedBlockBandedMatrix(BlockKron(BandedMatrix( jj-1=>ones(eltype(T),n-jj+1)),vr[jj]))
end
return Tn
end