TableMetadataTools.jl
TableMetadataTools.jl copied to clipboard
Better methods for showing metadata
I'm using metadata for the first time today (I usually use Stata and R for data cleaning) and I found it helpful to define the following functions to make it easier to see metadata
function showcolmetadata(df, col::Union{AbstractString, Symbol}, label::AbstractString)
colmetadata(df, col, label)
end
function showcolmetadata(df, cols::AbstractVector{<:Union{AbstractString, Symbol}}, label::AbstractString)
labvals = [colmetadata(df, col, label) for col in cols]
nt = (; :variable => cols, Symbol(label) => labvals)
pretty_table(nt; nosubheader = true, crop = :horizontal, alignment = [:l, :l])
end
function showcolmetadata(df, col::Union{AbstractString, Symbol}, labels::AbstractVector{<:AbstractString})
labvals = [colmetadata(df, col, label) for label in labels]
nt(; :label => labels, :value => labvals)
pretty_table(nt; nosubheader = true, tile = "colmetadata for $col", crop = :horizontal, alignment = [:l, :l])
end
function showcolmetadata(df, cols::Colon, label::AbstractString)
showcolmetadata(df, names(df), label)
end
function showcolmetadata(df, col::Union{AbstractString, Symbol}, labels::Colon)
d = colmetadata(df, col)
labnames = names(d)
showcolmetadata(df, col, labnames)
end
They would be used like following: You can see that the column names are pretty uninformative in the Penn World Tables, exactly the kind of situation metadata is great for.
julia> showcolmetadata(pwt[:, 1:10], :, "label")
┌───────────────┬──────────────────────────────────────────────────────────
│ variable │ label ⋯
├───────────────┼──────────────────────────────────────────────────────────
│ countrycode │ 3-letter ISO country code ⋯
│ country │ Country name ⋯
│ currency_unit │ Currency unit ⋯
│ year │ Year ⋯
│ rgdpe │ Expenditure-side real GDP at chained PPPs (in mil. 2017 ⋯
│ rgdpo │ Output-side real GDP at chained PPPs (in mil. 2017US$) ⋯
│ pop │ Population (in millions) ⋯
│ emp │ Number of persons engaged (in millions) ⋯
│ avh │ Average annual hours worked by persons engaged (source: ⋯
│ hc │ Human capital index, see note hc ⋯
└───────────────┴──────────────────────────────────────────────────────────
1 column omitted