Input too large?
I'm trying to decompress a 60GB gzippe'd file (this is the compressed size, not sure what the uncompressed is but I expect a high compression ratio), and I hit this error:
ERROR: InexactError: trunc(UInt32, 4294986918)
Stacktrace:
[1] throw_inexacterror(f::Symbol, #unused#::Type{UInt32}, val::UInt64)
@ Core ./boot.jl:614
[2] checked_trunc_uint
@ ./boot.jl:644 [inlined]
[3] toUInt32
@ ./boot.jl:733 [inlined]
[4] UInt32
@ ./boot.jl:768 [inlined]
[5] convert
@ ./number.jl:7 [inlined]
[6] setproperty!
@ ./Base.jl:39 [inlined]
[7] process(codec::CodecZlib.GzipDecompressor, input::TranscodingStreams.Memory, output::TranscodingStreams.Memory, error::TranscodingStreams.Error)
@ CodecZlib ~/.julia/packages/CodecZlib/ruMLE/src/decompression.jl:160
It would seem that the issue lies in this line
https://github.com/JuliaIO/CodecZlib.jl/blob/f9fddaa28c093c590a7a93358709df2945306bc7/src/decompression.jl#L160
avail_in is a UInt32 (Cuint) while size is a UInt64 (UInt, see TranscodingStreams.jl/src/memory.jl#L11)
I imagine that either Zstream.avail_in needs to change to a Culong or max(input.size, typemax(Cuint)) needs to be applied.
- Julia 1.8.3
- CodecZlib 0.7.0
Here's a minimum viable demonstration using JLD2 to call CodecZlib for convenience. Note that it requires > 30 GB of memory.
Demo
using JLD2, CodecZlib
v = zeros(typemax(UInt32)+10)
jldopen("example.jld2", "w"; compress = true) do f
f["v"] = v
end
jldopen("example.jld2")["v"] # errors
Error
ERROR: InexactError: trunc(UInt32, 4333001269)
Stacktrace:
[1] throw_inexacterror(f::Symbol, #unused#::Type{UInt32}, val::UInt64)
@ Core ./boot.jl:634
[2] checked_trunc_uint
@ ./boot.jl:664 [inlined]
[3] toUInt32
@ ./boot.jl:753 [inlined]
[4] UInt32
@ ./boot.jl:788 [inlined]
[5] convert
@ ./number.jl:7 [inlined]
[6] setproperty!
@ ./Base.jl:38 [inlined]
[7] process(codec::ZlibDecompressor, input::TranscodingStreams.Memory, output::TranscodingStreams.Memory, error::TranscodingStreams.Error)
@ CodecZlib /fs/lustre/cita/zack/jl/packages/CodecZlib/ytMgl/src/decompression.jl:162
[8] unsafe_transcode!(output::TranscodingStreams.Buffer, codec::ZlibDecompressor, input::TranscodingStreams.Buffer)
@ TranscodingStreams /fs/lustre/cita/zack/jl/packages/TranscodingStreams/2McN2/src/transcode.jl:152
[9] transcode!
@ /fs/lustre/cita/zack/jl/packages/TranscodingStreams/2McN2/src/transcode.jl:127 [inlined]
[10] transcode(codec::ZlibDecompressor, input::TranscodingStreams.Buffer, output::Nothing)
@ TranscodingStreams /fs/lustre/cita/zack/jl/packages/TranscodingStreams/2McN2/src/transcode.jl:109
[11] transcode
@ /fs/lustre/cita/zack/jl/packages/TranscodingStreams/2McN2/src/transcode.jl:108 [inlined]
[12] transcode
@ /fs/lustre/cita/zack/jl/packages/TranscodingStreams/2McN2/src/transcode.jl:189 [inlined]
[13] decompress!(inptr::Ptr{Nothing}, data_length::Int64, element_size::Int64, n::Int64, decompressor::ZlibDecompressor)
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/compression.jl:254
[14] read_compressed_array!(v::Vector{Float64}, f::JLD2.JLDFile{JLD2.MmapIO}, rr::JLD2.ReadRepresentation{Float64, Float64}, data_length::Int64, filters::JLD2.FilterPipeline)
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/compression.jl:293
[15] read_array(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, rr::JLD2.ReadRepresentation{Float64, Float64}, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datasets.jl:408
[16] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, rr::Any, read_dataspace::Tuple{JLD2.ReadDataspace, JLD2.RelOffset, JLD2.DataLayout, JLD2.FilterPipeline}, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datasets.jl:240
[17] macro expansion
@ /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datasets.jl:224 [inlined]
[18] macro expansion
@ /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datatypes.jl:103 [inlined]
[19] read_data(f::JLD2.JLDFile{JLD2.MmapIO}, dataspace::JLD2.ReadDataspace, datatype_class::UInt8, datatype_offset::Int64, layout::JLD2.DataLayout, filters::JLD2.FilterPipeline, header_offset::JLD2.RelOffset, attributes::Vector{JLD2.ReadAttribute})
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datasets.jl:211
[20] load_dataset(f::JLD2.JLDFile{JLD2.MmapIO}, offset::JLD2.RelOffset)
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/datasets.jl:125
[21] getindex(g::JLD2.Group{JLD2.JLDFile{JLD2.MmapIO}}, name::String)
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/groups.jl:109
[22] getindex(f::JLD2.JLDFile{JLD2.MmapIO}, name::String)
@ JLD2 /fs/lustre/cita/zack/jl/packages/JLD2/ryhNR/src/JLD2.jl:461
[23] top-level scope
@ REPL[4]:1