# File src/library/graphics/R/hist.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2021 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ hist <- function(x, ...) UseMethod("hist") hist.default <- function (x, breaks = "Sturges", freq = NULL, probability = !freq, include.lowest= TRUE, right = TRUE, fuzz = 1e-7, density = NULL, angle = 45, col = "lightgray", border = NULL, main = paste("Histogram of", xname), xlim = range(breaks), ylim = NULL, xlab = xname, ylab, axes = TRUE, plot = TRUE, labels = FALSE, nclass = NULL, warn.unused = TRUE, ...) { if (!is.numeric(x)) stop("'x' must be numeric") xname <- deparse1(substitute(x), collapse="\n") n <- length(x <- x[is.finite(x)]) n <- as.integer(n) if(is.na(n)) stop("invalid length(x)") use.br <- !missing(breaks) if(use.br) { if(!missing(nclass)) warning("'nclass' not used when 'breaks' is specified") } else if(!is.null(nclass) && length(nclass) == 1L) breaks <- nclass use.br <- use.br && (nB <- length(breaks)) > 1L if(use.br) breaks <- sort(breaks) else { # construct vector of breaks if(!include.lowest) { include.lowest <- TRUE warning("'include.lowest' ignored as 'breaks' is not a vector") } if(is.character(breaks)) { breaks <- match.arg(tolower(breaks), c("sturges", "fd", "freedman-diaconis", "scott")) breaks <- switch(breaks, sturges = nclass.Sturges(x), "freedman-diaconis" =, fd = nclass.FD(x), scott = nclass.scott(x), stop("unknown 'breaks' algorithm")) } else if(is.function(breaks)) { breaks <- breaks(x) } ## if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L) ## stop("invalid number of 'breaks'") ## breaks <- pretty (range(x), n = breaks, min.n = 1) ## nB <- length(breaks) ## if(nB <= 1) ##-- Impossible ! ## stop(gettextf("hist.default: pretty() error, breaks=%s", ## format(breaks)), domain = NA) if (length(breaks) == 1) { if(!is.numeric(breaks) || !is.finite(breaks) || breaks < 1L) stop("invalid number of 'breaks'") if(breaks > 1e6) { # pretty() must have n <= maximal integer warning(gettextf("'breaks = %g' is too large and set to 1e6", breaks), domain = NA) breaks <- 1e6L } breaks <- pretty (range(x), n = breaks, min.n = 1) nB <- length(breaks) if(nB <= 1) ##-- Impossible ! stop(gettextf("hist.default: pretty() error, breaks=%s", format(breaks)), domain = NA) } else { if(!is.numeric(breaks) || length(breaks) <= 1) stop(gettextf("Invalid breakpoints produced by 'breaks(x)': %s", format(breaks)), domain = NA) breaks <- sort(breaks) nB <- length(breaks) use.br <- TRUE # To allow equidist=FALSE below (FIXME: Find better way?) } } nB <- as.integer(nB) if(is.na(nB)) stop("invalid length(breaks)") ## Do this *before* adding fuzz or logic breaks down... h <- as.double(diff(breaks)) equidist <- !use.br || diff(range(h)) < 1e-7 * mean(h) if (!use.br && any(h <= 0)) stop("'breaks' are not strictly increasing") freq1 <- freq # we want to do missing(freq) later if (is.null(freq)) { freq1 <- if(!missing(probability)) !as.logical(probability) else equidist } else if(!missing(probability) && any(probability == freq)) stop("'probability' is an alias for '!freq', however they differ.") ## Fuzz to handle cases where points are "effectively on" ## the boundaries ## As one break point could be very much larger than the others, ## as from 1.9.1 we no longer use the range. (PR#6931) ## diddle <- 1e-7 * max(abs(range(breaks))) ## NB: h == diff(breaks) stopifnot("fuzz must be non-negative" = fuzz >= 0) diddle <- fuzz * if(nB > 5) stats::median(h) ## for few breaks, protect against very large bins: else if(nB <= 3) diff(range(x)) else min(h[h > 0]) fuzz <- if(right) c(if(include.lowest) -diddle else diddle, rep.int(diddle, nB - 1L)) else c(rep.int(-diddle, nB - 1L), if(include.lowest) diddle else -diddle) fuzzybreaks <- breaks + fuzz ## With the fuzz adjustment above, the "right" and "include" ## arguments are often irrelevant (but not with integer data!) counts <- .Call(C_BinCount, x, fuzzybreaks, right, include.lowest) if (any(counts < 0L)) stop("negative 'counts'. Internal Error.", domain = NA) if (sum(counts) < n) stop("some 'x' not counted; maybe 'breaks' do not span range of 'x'") dens <- counts/(n*h) # use un-fuzzed intervals mids <- 0.5 * (breaks[-1L] + breaks[-nB]) r <- structure(list(breaks = breaks, counts = counts, density = dens, mids = mids, xname = xname, equidist = equidist), class = "histogram") if (plot) { plot(r, freq = freq1, col = col, border = border, angle = angle, density = density, main = main, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, axes = axes, labels = labels, ...) invisible(r) } else { ## plot is FALSE if (warn.unused) { ## make an effort to warn about "non sensical" arguments: nf <- names(formals()) ## all formals but those: nf <- nf[is.na(match(nf, c("x", "breaks", "nclass", "plot", "include.lowest", "right", "fuzz")))] missE <- lapply(nf, function(n) substitute(missing(.), list(. = as.name(n)))) not.miss <- ! vapply(missE, eval, NA, envir = environment()) if(any(not.miss)) warning(sprintf(ngettext(sum(not.miss), "argument %s is not made use of", "arguments %s are not made use of"), paste(sQuote(nf[not.miss]), collapse=", ")), domain = NA) } r } } plot.histogram <- function (x, freq = equidist, density = NULL, angle = 45, col = "lightgray", border = NULL, lty = NULL, main = paste("Histogram of", paste(x$xname, collapse="\n")), sub = NULL, xlab = x$xname, ylab, xlim = range(x$breaks), ylim = NULL, axes = TRUE, labels = FALSE, add = FALSE, ann = TRUE, ...) { equidist <- if(is.logical(x$equidist)) x$equidist else { h <- diff(x$breaks) ; diff(range(h)) < 1e-7 * mean(h) } if(freq && !equidist) warning("the AREAS in the plot are wrong -- rather use 'freq = FALSE'") y <- if (freq) x$counts else x$density nB <- length(x$breaks) if(is.null(y) || 0L == nB) stop("'x' is wrongly structured") dev.hold(); on.exit(dev.flush()) if(!add) { if(is.null(ylim)) ylim <- range(y, 0) if (missing(ylab)) ylab <- if (!freq) "Density" else "Frequency" plot.new() plot.window(xlim, ylim, "", ...) #-> ylim's default from 'y' if(ann) title(main = main, sub = sub, xlab = xlab, ylab = ylab, ...) if(axes) { axis(1, ...) yt <- axTicks(2) if(freq && any(ni <- (yt %% 1) != 0)) # remove non-integers: yt <- yt[!ni] axis(2, at=yt, ...) } } rect(x$breaks[-nB], 0, x$breaks[-1L], y, col = col, border = border, angle = angle, density = density, lty = lty) if((logl <- is.logical(labels) && labels) || is.character(labels)) text(x$mids, y, labels = if(logl) { if(freq) x$counts else round(x$density,3) } else labels, adj = c(0.5, -0.5)) invisible() } lines.histogram <- function(x, ...) plot.histogram(x, ..., add = TRUE)