NormalizingFlows.jl
NormalizingFlows.jl copied to clipboard
CUDAarray support
using CUDA
using LinearAlgebra
using FunctionChains
using Bijectors
using Flux
T = Float32
q0 = MvNormal(ones(T, 2))
Distributions._rand!(rng, q0_g, xx)
ts = reduce(∘, [f32(Bijectors.PlanarLayer(2)) for _ in 1:2])
flow = transformed(q0, ts)
# gpu
CUDA.functional()
flow_g = gpu(flow)
ts_g = gpu(ts)
xs = rand(flow_g.dist, 10) # on cpu
ys_g = transform(ts_g, cu(xs)) # good
logpdf(flow_g, ys_g[:, 1]) # good
rand(flow_g) # bug
output
julia> rand(flow_g) # bug
ERROR: MethodError: no method matching dot(::Int64, ::CuPtr{Float32}, ::Int64, ::Ptr{Float32}, ::Int64)
Closest candidates are:
dot(::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer)
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344
dot(::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer)
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344
Stacktrace:
[1] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
@ LinearAlgebra.BLAS ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:395
[2] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/matmul.jl:14
[3] aT_b(a::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, b::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/utils.jl:4
[4] _transform(flow::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:77
[5] transform(b::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:82
[6] (::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}})(x::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/interface.jl:80
[7] call_composed(fs::Tuple{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, x::Tuple{Vector{Float32}}, kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./operators.jl:1035
[8] call_composed
@ ./operators.jl:1034 [inlined]
[9] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./operators.jl:1031
[10] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32})
@ Base ./operators.jl:1031
[11] rand(td::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/transformed_distribution.jl:159
[12] top-level scope
@ REPL[67]:1
[13] top-level scope
@ ~/.julia/packages/CUDA/p5OVK/src/initialization.jl:171
Another example
using Bijectors
using CUDA
using Distributions
using Random
q0_gpu = MvNormal(cu(ones(2))) # reference distribution
ts_gpu = reduce(∘, [Bijectors.PlanarLayer(rand(CURAND.default_rng(), 2), rand(CURAND.default_rng(), 2), rand(CURAND.default_rng(), 1)) for _ in 1:2]) # transformation
flow_gpu = transformed(q0_gpu, ts_gpu)
rand(flow_gpu, 10)
error
ERROR: This object is not a GPU array
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] backend(#unused#::Type)
@ GPUArraysCore ~/packages/GPUArraysCore/src/GPUArraysCore.jl:148
[3] backend(x::Matrix{Float32})
@ GPUArraysCore ~/packages/GPUArraysCore/src/GPUArraysCore.jl:149
[4] _copyto!
@ ~/packages/GPUArrays/src/host/broadcast.jl:65 [inlined]
[5] materialize!
@ ~/packages/GPUArrays/src/host/broadcast.jl:41 [inlined]
[6] materialize!
@ ./broadcast.jl:881 [inlined]
[7] unwhiten!(r::Matrix{Float32}, a::PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, x::Matrix{Float32})
@ PDMats ~/packages/PDMats/src/pdiagmat.jl:107
[8] unwhiten!
@ ~/packages/PDMats/src/generics.jl:33 [inlined]
[9] _rand!(rng::TaskLocalRNG, d::MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, x::Matrix{Float32})
@ Distributions ~/packages/Distributions/src/multivariate/mvnormal.jl:277
[10] rand!
@ ~/packages/Distributions/src/genericrand.jl:108 [inlined]
[11] rand
@ ~/packages/Distributions/src/multivariates.jl:23 [inlined]
[12] rand(rng::TaskLocalRNG, td::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}}}, num_samples::Int64)
@ Bijectors ~/packages/Bijectors/src/transformed_distribution.jl:163
[13] rand(s::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, Vector{Float32}}}}, dims::Int64)
@ Distributions ~/packages/Distributions/src/genericrand.jl:22
[14] top-level scope
@ ~/Workspace/julia_gpu/nf.jl:18