data.tree icon indicating copy to clipboard operation
data.tree copied to clipboard

docs: add equivalent of ape::drop.tip

Open nick-youngblut opened this issue 3 years ago • 3 comments

It would be helpful to include an example in the docs of how to prune a data.tree object in the same manner as ape::drop.tip

nick-youngblut avatar Nov 13 '22 21:11 nick-youngblut

@gluc This is my solution:

# nestd list
acme <- Node$new("Acme Inc.")
accounting <- acme$AddChild("Accounting")
software <- accounting$AddChild("New Software")
standards <- accounting$AddChild("New Accounting Standards")
research <- acme$AddChild("Research")
newProductLine <- research$AddChild("New Product Line")
newLabs <- research$AddChild("New Labs")
it <- acme$AddChild("IT")
outsource <- it$AddChild("Outsource")
agile <- it$AddChild("Go agile")
goToR <- it$AddChild("Switch to R")

#' sum while considering NAs 
SUM = function(x){
  x = sum(x, na.rm=TRUE)
  if(is.na(x)) x = 0
  return(x)
}

#' find target taxa
includes_children = function(node) {
  #result = ifelse(node$name %in% c('Research'), 1, NA)
  result = ifelse(node$name %in% c('New Accounting Standards', 'New Labs'), 1, NA)
  if(length(result) == 0){
    result = sum(sapply(node$children, includes_children))
  } 
  return(result)
}
# identify taxa
acme$Do(function(node) node$target_lineage = includes_children(node))
print(acme, 'target_lineage')
# sum "1" values for identified taxa
acme$Do(function(node) node$target_lineage_sum = Aggregate(node, 'target_lineage', SUM,
                                                       traversal = 'ancestor'))
print(acme, 'target_lineage', 'target_lineage_sum')
# prune out NAs (necessary for non-leaf node selections)
Prune(acme, function(x) !is.na(x$target_lineage_sum))
print(acme, 'target_lineage', 'target_lineage_sum')
# re-aggregate now that NAs are removed
acme$Do(function(node) node$target_lineage_sum = Aggregate(node, 'target_lineage', SUM,
                                                       traversal = 'ancestor'))
print(acme, 'target_lineage', 'target_lineage_sum')
# final pruning
Prune(acme, function(x) ifelse(is.na(x$target_lineage_sum), FALSE, x$target_lineage_sum))
print(acme, 'target_lineage', 'target_lineage_sum')

Is there a better way of pruning to certain lineages versus this convoluted method that I have devised?

nick-youngblut avatar Nov 22 '22 19:11 nick-youngblut

Just re-run into this issue with a new project:

data(acme)
Prune(acme, function(node) node$name %in% c("New Labs"))
acme

Results in Acme Inc. instead of:

Acme Inc.                       
   ¦--Research                        
       °--New Labs 

nick-youngblut avatar Aug 23 '24 17:08 nick-youngblut

My updated approach:

#' Determine whether a child node is in the target lineage
child_in_target_lineage = function(child_node, targets){
  any(child_node$name %in% targets) || any(child_node$TARGET == TRUE)
}

#' Determine whether a node is in the target lineage
node_in_target_lineage = function(node, targets){
  if(node$isRoot || child_in_target_lineage(node, targets)){
    return(TRUE)
  } else {
    has_target_child = any(
      sapply(node$children, child_in_target_lineage, targets=targets)
    )
    if(has_target_child){
      return(TRUE)
    }
  }
  return(FALSE)
}

#' Prune to target node names
datatree_prune = function(dtree, target_ids){
  # check input
  if(is.null(target_ids) || length(target_ids) == 0) return(dtree)
  # Assign TRUE to any lineages containing the targets
  dtree$Do(
    function(node){ 
      node$TARGET = node_in_target_lineage(node, target_ids)
    },
    traversal = "post-order"
  )
  # Prune based on the lineage assignments
  Prune(dtree, function(node) node$TARGET == TRUE)
  # Remove the lineage assignments
  dtree$Do(function(node) node$TARGET = NULL)
}
data(acme)
datatree_prune(acme, c("New Labs")) 
acme

Result:

Acme Inc.                       
   ¦--Research                        
       °--New Labs 

nick-youngblut avatar Aug 23 '24 17:08 nick-youngblut