data.tree
data.tree copied to clipboard
docs: add equivalent of ape::drop.tip
It would be helpful to include an example in the docs of how to prune a data.tree object in the same manner as ape::drop.tip
@gluc This is my solution:
# nestd list
acme <- Node$new("Acme Inc.")
accounting <- acme$AddChild("Accounting")
software <- accounting$AddChild("New Software")
standards <- accounting$AddChild("New Accounting Standards")
research <- acme$AddChild("Research")
newProductLine <- research$AddChild("New Product Line")
newLabs <- research$AddChild("New Labs")
it <- acme$AddChild("IT")
outsource <- it$AddChild("Outsource")
agile <- it$AddChild("Go agile")
goToR <- it$AddChild("Switch to R")
#' sum while considering NAs
SUM = function(x){
x = sum(x, na.rm=TRUE)
if(is.na(x)) x = 0
return(x)
}
#' find target taxa
includes_children = function(node) {
#result = ifelse(node$name %in% c('Research'), 1, NA)
result = ifelse(node$name %in% c('New Accounting Standards', 'New Labs'), 1, NA)
if(length(result) == 0){
result = sum(sapply(node$children, includes_children))
}
return(result)
}
# identify taxa
acme$Do(function(node) node$target_lineage = includes_children(node))
print(acme, 'target_lineage')
# sum "1" values for identified taxa
acme$Do(function(node) node$target_lineage_sum = Aggregate(node, 'target_lineage', SUM,
traversal = 'ancestor'))
print(acme, 'target_lineage', 'target_lineage_sum')
# prune out NAs (necessary for non-leaf node selections)
Prune(acme, function(x) !is.na(x$target_lineage_sum))
print(acme, 'target_lineage', 'target_lineage_sum')
# re-aggregate now that NAs are removed
acme$Do(function(node) node$target_lineage_sum = Aggregate(node, 'target_lineage', SUM,
traversal = 'ancestor'))
print(acme, 'target_lineage', 'target_lineage_sum')
# final pruning
Prune(acme, function(x) ifelse(is.na(x$target_lineage_sum), FALSE, x$target_lineage_sum))
print(acme, 'target_lineage', 'target_lineage_sum')
Is there a better way of pruning to certain lineages versus this convoluted method that I have devised?
Just re-run into this issue with a new project:
data(acme)
Prune(acme, function(node) node$name %in% c("New Labs"))
acme
Results in Acme Inc. instead of:
Acme Inc.
¦--Research
°--New Labs
My updated approach:
#' Determine whether a child node is in the target lineage
child_in_target_lineage = function(child_node, targets){
any(child_node$name %in% targets) || any(child_node$TARGET == TRUE)
}
#' Determine whether a node is in the target lineage
node_in_target_lineage = function(node, targets){
if(node$isRoot || child_in_target_lineage(node, targets)){
return(TRUE)
} else {
has_target_child = any(
sapply(node$children, child_in_target_lineage, targets=targets)
)
if(has_target_child){
return(TRUE)
}
}
return(FALSE)
}
#' Prune to target node names
datatree_prune = function(dtree, target_ids){
# check input
if(is.null(target_ids) || length(target_ids) == 0) return(dtree)
# Assign TRUE to any lineages containing the targets
dtree$Do(
function(node){
node$TARGET = node_in_target_lineage(node, target_ids)
},
traversal = "post-order"
)
# Prune based on the lineage assignments
Prune(dtree, function(node) node$TARGET == TRUE)
# Remove the lineage assignments
dtree$Do(function(node) node$TARGET = NULL)
}
data(acme)
datatree_prune(acme, c("New Labs"))
acme
Result:
Acme Inc.
¦--Research
°--New Labs