# File src/library/utils/R/summRprof.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2016 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ # The profile file always starts with a single header line followed by stack lines # If the header contains "memory profiling", the stack lines have memory info # The memory info is a fixed width prefix on each line of the form :[0-9]+:[0-9]+:[0-9]+:[0-9]+: # If the header contains "line profiling", there will be filename lines and stack lines will contain # line number info of the form [0-9]+#[0-9]+ # The filename lines will start #File [0-9]+: summaryRprof <- function(filename = "Rprof.out", chunksize = 5000, memory = c("none", "both", "tseries", "stats"), lines = c("hide", "show", "both"), index = 2, diff = TRUE, exclude = NULL, basenames = 1) { con <- file(filename, "rt") on.exit(close(con)) firstline <- readLines(con, n = 1L) if(!length(firstline)) stop(gettextf("no lines found in %s", sQuote(filename)), domain = NA) sample.interval <- as.numeric(strsplit(firstline, "=")[[1L]][2L])/1e6 memory.profiling <- substr(firstline, 1L, 6L) == "memory" line.profiling <- grepl("line profiling", firstline) if (line.profiling) filenames <- character(0) memory <- match.arg(memory) if(memory != "none" && !memory.profiling) stop("profile does not contain memory information") if (memory == "tseries") return(Rprof_memory_summary(filename = con, chunksize = chunksize, label = index, diff = diff, exclude = exclude, sample.interval = sample.interval)) else if (memory == "stats") return(Rprof_memory_summary(filename = con, chunksize = chunksize, aggregate = index, diff = diff, exclude = exclude, sample.interval = sample.interval)) lines <- match.arg(lines) if (lines != "hide" && !line.profiling) stop("profile does not contain line information") fnames <- NULL ucounts <- NULL fcounts <- NULL memcounts <- NULL umem <- NULL repeat({ chunk <- readLines(con, n = chunksize) if (line.profiling) { filenamelines <- grep("^#File [0-9]+: ", chunk) if (length(filenamelines)) { fnum <- as.integer(sub("^#File ([0-9]+): .*", "\\1", chunk[filenamelines])) filenames[fnum] <- sub("^#File [0-9]+: ", "", chunk[filenamelines]) if (basenames) { dirnames <- dirname(filenames[fnum]) filenames[fnum] <- basename(filenames[fnum]) for (i in seq_len(basenames - 1)) { tail <- basename(dirnames) filenames[fnum] <- ifelse(tail == ".", filenames[fnum], paste0(tail, "/", filenames[fnum])) # May have Windows-style names here where dirname("c:/") == "c:/" parent <- dirname(dirnames) dirnames <- ifelse(dirnames == parent, ".", parent) } } chunk <- chunk[-filenamelines] } } if (length(chunk) == 0L) break if (memory.profiling) { memprefix <- attr(regexpr(":[0-9]+:[0-9]+:[0-9]+:[0-9]+:", chunk), "match.length") if (memory == "both") { memstuff <- substr(chunk, 2L, memprefix-1L) memcounts <- pmax(apply(sapply(strsplit(memstuff, ":"), as.numeric), 1, diff), 0) if (!is.matrix(memcounts)) # Need a matrix result (PR#16395) memcounts <- matrix(memcounts, nrow = 1) ## memcounts <- c(0, rowSums(memcounts[, 1L:3L])) ## convert to bytes. memcounts <- c(0, rowSums(cbind(memcounts[, 1L:2L, drop = FALSE] * 8, memcounts[, 3L, drop = FALSE]))) rm(memstuff) } chunk <- substr(chunk, memprefix+1L, nchar(chunk, "c")) if(any((nc <- nchar(chunk, "c")) == 0L)) { chunk <- chunk[nc > 0L] memcounts <- memcounts[nc > 0L] } } chunk <- strsplit(chunk, " ") if (line.profiling) chunk <- lapply(chunk, function(x) { locations <- !startsWith(x, '"') if (lines != "hide") { fnum <- sub("#.*", "", x[locations]) lnum <- sub(".*#", "", x[locations]) x[locations] <- paste0(filenames[as.integer(fnum)], "#", lnum) } switch(lines, hide = x <- x[!locations], show = x <- x[locations] ) if (length(x)) x else "" }) newfirsts <- sapply(chunk, "[[", 1L) newuniques <- lapply(chunk, unique) ulen <- lengths(newuniques) newuniques <- unlist(newuniques) new.utable <- table(newuniques) new.ftable <- table(factor(newfirsts, levels = names(new.utable))) if (memory == "both") new.umem <- rowsum(memcounts[rep.int(seq_along(memcounts), ulen)], newuniques) fcounts <- rowsum( c(as.vector(new.ftable), fcounts), c(names(new.ftable), fnames) ) ucounts <- rowsum( c(as.vector(new.utable), ucounts), c(names(new.utable), fnames) ) if(memory == "both") umem <- rowsum(c(new.umem, umem), c(names(new.utable), fnames)) fnames <- sort(unique(c(fnames, names(new.utable)))) }) firstnum <- fcounts*sample.interval uniquenum <- ucounts*sample.interval ## sort and form % on unrounded numbers index1 <- order(-firstnum, -uniquenum) index2 <- order(-uniquenum, -firstnum) if (lines == "show") { filename <- sub("#.*$", "", fnames) linenum <- rep(0, length(filename)) hasline <- filename != fnames linenum[hasline] <- as.numeric(sub("^.*#", "", fnames[hasline])) index3 <- order(filename, linenum) } firstpct <- round(100*firstnum/sum(firstnum), 2) uniquepct <- round(100*uniquenum/sum(firstnum), 2) digits <- ifelse(sample.interval < 0.01, 3L, 2L) firstnum <- round(firstnum, digits) uniquenum <- round(uniquenum, digits) if (memory == "both") memtotal <- round(umem/1048576, 1) ## 0.1MB rval <- data.frame(firstnum, firstpct, uniquenum, uniquepct) names(rval) <- c("self.time", "self.pct", "total.time", "total.pct") rownames(rval) <- fnames if (memory == "both") rval$mem.total <- memtotal by.self <- rval[index1, ] by.self <- by.self[by.self[,1L] > 0, ] by.total <- rval[index2, c(3L, 4L, if(memory == "both") 5L, 1L, 2L)] result <- list(by.self = by.self, by.total = by.total) if (lines == "show") result <- c(result, list(by.line = rval[index3,])) c(result, sample.interval = sample.interval, sampling.time = sum(fcounts)*sample.interval) } Rprof_memory_summary <- function(filename, chunksize = 5000, label = c(1, -1), aggregate = 0, diff = FALSE, exclude = NULL, sample.interval) { memcounts <- NULL firsts <- NULL labels <- vector("list", length(label)) index <- NULL repeat({ chunk <- readLines(filename, n = chunksize) if (length(chunk) == 0L) break memprefix <- attr(regexpr(":[0-9]+:[0-9]+:[0-9]+:[0-9]+:", chunk), "match.length") memstuff <- substr(chunk, 2L, memprefix-1L) memcounts <- rbind(t(sapply(strsplit(memstuff, ":"), as.numeric))) chunk <- substr(chunk, memprefix+1, nchar(chunk, "c")) if(any((nc <- nchar(chunk, "c")) == 0L)) { memcounts <- memcounts[nc > 0L, ] chunk <- chunk[nc > 0L] } chunk <- strsplit(chunk, " ") if (length(exclude)) chunk <- lapply(chunk, function(l) l[!(l %in% exclude)]) newfirsts <- sapply(chunk, "[[", 1L) firsts <- c(firsts, newfirsts) if (!aggregate && length(label)){ for(i in seq_along(label)){ if (label[i] == 1) labels[[i]] <- c(labels[[i]], newfirsts) else if (label[i]>1) { labels[[i]] <- c(labels[[i]], sapply(chunk, function(line) paste(rev(line)[1L:min(label[i], length(line))], collapse = ":"))) } else { labels[[i]] <- c(labels[[i]], sapply(chunk, function(line) paste(line[1L:min(-label[i], length(line))], collapse = ":"))) } } } else if (aggregate) { if (aggregate > 0) { index <- c(index, sapply(chunk, function(line) paste(rev(line)[1L:min(aggregate, length(line))], collapse = ":"))) } else { index <- c(index, sapply(chunk, function(line) paste(line[1L:min(-aggregate, length(line))], collapse = ":"))) } } if (length(chunk) < chunksize) break }) if (length(memcounts) == 0L) stop("no events were recorded") memcounts <- as.data.frame(memcounts) names(memcounts) <- c("vsize.small", "vsize.large", "nodes", "duplications") if (!aggregate) { rownames(memcounts) <- (1L:nrow(memcounts))*sample.interval names(labels) <- paste0("stack:", label) memcounts <- cbind(memcounts, labels) } if (diff) memcounts[-1L, 1L:3L] <- pmax(0L, apply(memcounts[, 1L:3L], 2L, diff)) if (aggregate) memcounts <- by(memcounts, index, function(these) with(these, round(c(vsize.small = mean(vsize.small), max.vsize.small = max(vsize.small), vsize.large = mean(vsize.large), max.vsize.large = max(vsize.large), nodes = mean(nodes), max.nodes = max(nodes), duplications = mean(duplications), tot.duplications = sum(duplications), samples = nrow(these) )) ) ) return(memcounts) }