JSONTables.jl
JSONTables.jl copied to clipboard
arraytable significantly slows down when passed table with heterogenous columns
Here is the test code:
using JSONTables, DataFrames
results = DataFrame(cols=Int[], rows=Int[], arraytable=Float64[], objecttable=Float64[])
for cols in (100, 200, 300, 400, 500), rows in (10^3, 10^4, 10^5)
@show (cols, rows)
df = DataFrame(ones(rows, cols))
df[!, 1] .= "a"
df[!, 2] .= 1
df[!, 3] .= true
open(io -> objecttable(io, df), "test.json", "w")
x1 = @elapsed open(io -> arraytable(io, df), "test.json", "w")
open(io -> objecttable(io, df), "test.json", "w")
x2 = @elapsed open(io -> objecttable(io, df), "test.json", "w")
push!(results, [cols, rows, x1, x2])
end
and here is the benchmark result:
julia> results
15×4 DataFrame
│ Row │ cols │ rows │ arraytable │ objecttable │
│ │ Int64 │ Int64 │ Float64 │ Float64 │
├─────┼───────┼────────┼────────────┼─────────────┤
│ 1 │ 100 │ 1000 │ 0.178669 │ 0.0328257 │
│ 2 │ 100 │ 10000 │ 1.65927 │ 0.26272 │
│ 3 │ 100 │ 100000 │ 16.2332 │ 2.36529 │
│ 4 │ 200 │ 1000 │ 0.349468 │ 0.0498271 │
│ 5 │ 200 │ 10000 │ 3.49195 │ 0.595482 │
│ 6 │ 200 │ 100000 │ 34.8853 │ 4.9485 │
│ 7 │ 300 │ 1000 │ 0.547324 │ 0.0803132 │
│ 8 │ 300 │ 10000 │ 5.16746 │ 0.759614 │
│ 9 │ 300 │ 100000 │ 52.3498 │ 7.48296 │
│ 10 │ 400 │ 1000 │ 0.714898 │ 0.104794 │
│ 11 │ 400 │ 10000 │ 6.91257 │ 1.00389 │
│ 12 │ 400 │ 100000 │ 73.8235 │ 11.4878 │
│ 13 │ 500 │ 1000 │ 0.947894 │ 0.146453 │
│ 14 │ 500 │ 10000 │ 10.5129 │ 1.42812 │
│ 15 │ 500 │ 100000 │ 94.0811 │ 13.1131 │