Feature Request: System Disk IO Counters
Thank you for developing and maintaining such a useful R package.
For my use-case, it would be very beneficial to have a function to return disk IO counters (read bytes, write bytes, transfers, etc) for Linux and Windows, similar to https://psutil.readthedocs.io/en/latest/#psutil.disk_io_counters. I have implemented a potential version for Linux as a bolt-on to this package, however cannot implement the Windows version as it would require updating the C source for this package and I'm not very good with C. I have a workaround using powershell, but it is less than ideal.
I'm unsure if you would prefer me to open this as PR, as I wouldn't be able to implement the Windows/OSX versions so it would be incomplete
My implementation for Linux, heavily inspired by psutil
ps_disk_io_counters <- function(perdisk = FALSE) {
if (ps_os_name() == "WINDOWS") {
return(.ps_disk_io_counters_windows(perdisk))
} else {
return(.ps_disk_io_counters_linux(perdisk))
}
}
#' Internal disk IO counters for Linux, reads lines from diskstats if it exists
#' or /sys/block as a backup
.ps_disk_io_counters_linux <- function(perdisk = FALSE) {
if (file.exists("/proc/diskstats")) {
disk_info <- .read_procfs()
} else if (dir.exists("/sys/block")) {
disk_info <- .read_sysfs()
} else {
stop("Can't read, neither /proc/diskstats or /sys/block on this system")
}
# Keep all if perdisk, only keep non-partitions if perdisk=FALSE to get correct sums
disk_info <- disk_info[perdisk | sapply(disk_info$name, .is_storage_device), ]
if (perdisk) {
return(disk_info)
} else {
# Sum all numeric cols, convert to df via list to ensure 1 row
total_info <- data.frame(as.list(colSums(disk_info[, -1])))
# Add no name
total_info$name <- NA
return(total_info)
}
return(disk_io)
}
#' Whether a named drive (e.g. 'sda') is a real storage device, or a virtual one
.is_storage_device <- function(name, including_virtual = TRUE) {
name <- stringr::str_replace_all(name, "/", "!")
if (including_virtual) {
path <- file.path("/sys/block", name)
} else {
path <- file.path("/sys/block", name, "device")
}
return(dir.exists(path))
}
#' Logic to read disk IO stats from /proc/diskstats
.read_procfs <- function() {
file <- readLines("/proc/diskstats")
# Get info as list of vectors (could be different lengths)
lines <- stringr::str_split(stringr::str_trim(file), "\\s+")
# Pre-allocate matrix of info as NAs
mat <- matrix(data = NA_character_, nrow = length(lines), ncol = 10)
for (i in seq_along(lines)) {
# For each line, check length and insert into matrix as appropriate
line <- lines[[i]]
flen <- length(line)
if (flen == 15) {
# Linux 2.4
mat[i, 1] <- line[[4]] # name
mat[i, 2] <- line[[3]] # reads
mat[i, 3:9] <- line[5:11] # reads_merged, rbytes, rtime, writes, writes_merged, wbytes, wtime
mat[i, 10] <- line[[14]] # busy_time
} else if (flen == 14 || flen >= 18) {
# Linux 2.6+, line referring to a disk
mat[i, 1] <- line[[3]] # name
mat[i, 2:9] <- line[5:12] # reads, reads_merged, rbytes, rtime, writes, writes_merged, wbytes, wtime
mat[i, 10] <- line[[14]] # busy_time
} else if (flen == 7) {
# Linux 2.6+, line referring to a partition
mat[i, 1] <- line[[2]] # name
mat[i, 2] <- line[[4]] # reads
mat[i, 4:5] <- line[5:6] # rbytes, writes
mat[i, 8] <- line[7] # wbytes
} else {
stop("Cannot read diskstats file")
}
}
# Add names and convert types as appropriate
return(data.frame(
name = mat[, 1],
read_count = as.numeric(mat[, 2]),
read_merged_count = as.numeric(mat[, 3]),
read_bytes = as.numeric(mat[, 4]) * 512, # Multiple by disk sector size to get bytes
read_time = as.numeric(mat[, 5]),
write_count = as.numeric(mat[, 6]),
write_merged_count = as.numeric(mat[, 7]),
write_bytes = as.numeric(mat[, 8]) * 512, # Multiple by disk sector size to get bytes
write_time = as.numeric(mat[, 9]),
busy_time = as.numeric(mat[, 10])
))
}
#' Read disk IO from each device folder in /sys/block
.read_sysfs <- function() {
# Get stat files for each block
blocks <- list.dirs("/sys/block/", recursive = FALSE)
all_files <- list.files(blocks, full.names = TRUE)
stats <- all_files[stringr::str_ends(all_files, "stat")]
# Pre-allocate list for dfs
disk_info <- vector(mode = "list", length = length(stats))
for (i in seq_along(stats)) {
# Read in each file, get the field
stat <- stats[[i]]
fields <- readLines(stat)
fields <- unlist(stringr::str_split(stringr::str_trim(fields), "\\s+"))
# Save all info as a dataframe
block_info <- data.frame(
name = stringr::str_split(stat, "/")[[1]][[5]], # Extract name from stat filepath
read_count = as.numeric(fields[[1]]),
read_merged_count = as.numeric(fields[[2]]),
read_bytes = as.numeric(fields[[3]]) * 512, # Multiple by disk sector size to get bytes
read_time = as.numeric(fields[[4]]),
write_count = as.numeric(fields[[5]]),
write_merged_count = as.numeric(fields[[6]]),
write_bytes = as.numeric(fields[[7]]) * 512, # Multiple by disk sector size to get bytes
write_time = as.numeric(fields[[8]]),
busy_time = as.numeric(fields[[10]])
)
disk_info[[i]] <- block_info
}
return(do.call(rbind, disk_info))
}
Thanks! If you could submit it as a PR, that would be great. It is ok if a function is only implemented on Linux, we can always add a Windows/macOS implementation later, if it is possible to have one.
Okay thanks, I'll submit this as a draft PR, though I haven't really submitted to R packages before so it may be a while as this'll need tests and documentation etc
Thanks! It is fine to submit a first PR w/o docs and tests.