TableMetadataTools.jl icon indicating copy to clipboard operation
TableMetadataTools.jl copied to clipboard

Better methods for showing metadata

Open pdeffebach opened this issue 2 years ago • 0 comments

I'm using metadata for the first time today (I usually use Stata and R for data cleaning) and I found it helpful to define the following functions to make it easier to see metadata

function showcolmetadata(df, col::Union{AbstractString, Symbol}, label::AbstractString)
    colmetadata(df, col, label)
end

function showcolmetadata(df, cols::AbstractVector{<:Union{AbstractString, Symbol}}, label::AbstractString)
    labvals = [colmetadata(df, col, label) for col in cols]
    nt = (; :variable => cols, Symbol(label) => labvals)
    pretty_table(nt; nosubheader = true, crop = :horizontal, alignment = [:l, :l])
end

function showcolmetadata(df, col::Union{AbstractString, Symbol}, labels::AbstractVector{<:AbstractString})
    labvals = [colmetadata(df, col, label) for label in labels]
    nt(; :label => labels, :value => labvals)
    pretty_table(nt; nosubheader = true, tile = "colmetadata for $col",  crop = :horizontal, alignment = [:l, :l])
end

function showcolmetadata(df, cols::Colon, label::AbstractString)
    showcolmetadata(df, names(df), label)
end

function showcolmetadata(df, col::Union{AbstractString, Symbol}, labels::Colon)
    d = colmetadata(df, col)
    labnames = names(d)
    showcolmetadata(df, col, labnames)
end

They would be used like following: You can see that the column names are pretty uninformative in the Penn World Tables, exactly the kind of situation metadata is great for.

julia> showcolmetadata(pwt[:, 1:10], :, "label")
┌───────────────┬──────────────────────────────────────────────────────────
│ variable      │ label                                                   ⋯
├───────────────┼──────────────────────────────────────────────────────────
│ countrycode   │ 3-letter ISO country code                               ⋯
│ country       │ Country name                                            ⋯
│ currency_unit │ Currency unit                                           ⋯
│ year          │ Year                                                    ⋯
│ rgdpe         │ Expenditure-side real GDP at chained PPPs (in mil. 2017 ⋯
│ rgdpo         │ Output-side real GDP at chained PPPs (in mil. 2017US$)  ⋯
│ pop           │ Population (in millions)                                ⋯
│ emp           │ Number of persons engaged (in millions)                 ⋯
│ avh           │ Average annual hours worked by persons engaged (source: ⋯
│ hc            │ Human capital index, see note hc                        ⋯
└───────────────┴──────────────────────────────────────────────────────────
                                                           1 column omitted

pdeffebach avatar Mar 17 '23 19:03 pdeffebach