## file KernSmooth/R/all.R ## original file Copyright (C) M. P. Wand ## modifications for use with R copyright (C) B. D. Ripley ## Unlimited use and distribution (see LICENCE). bkde <- function(x, kernel = "normal", canonical = FALSE, bandwidth, gridsize = 401L, range.x, truncate = TRUE) { ## Install safeguard against non-positive bandwidths: if (!missing(bandwidth) && bandwidth <= 0) stop("'bandwidth' must be strictly positive") kernel <- match.arg(kernel, c("normal", "box", "epanech", "biweight", "triweight")) ## Rename common variables n <- length(x) M <- gridsize ## Set canonical scaling factors del0 <- switch(kernel, "normal" = (1/(4*pi))^(1/10), "box" = (9/2)^(1/5), "epanech" = 15^(1/5), "biweight" = 35^(1/5), "triweight" = (9450/143)^(1/5)) ## Set default bandwidth h <- if (missing(bandwidth)) del0 * (243/(35*n))^(1/5)*sqrt(var(x)) else if(canonical) del0 * bandwidth else bandwidth ## Set kernel support values tau <- if (kernel == "normal") 4 else 1 if (missing(range.x)) range.x <- c(min(x)-tau*h, max(x)+tau*h) a <- range.x[1L] b <- range.x[2L] ## Set up grid points and bin the data gpoints <- seq(a, b, length = M) gcounts <- linbin(x, gpoints, truncate) ## Compute kernel weights delta <- (b - a)/(h * (M-1L)) L <- min(floor(tau/delta), M) if (L == 0) warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") lvec <- 0L:L kappa <- if (kernel == "normal") dnorm(lvec*delta)/(n*h) else if (kernel == "box") 0.5*dbeta(0.5*(lvec*delta+1), 1, 1)/(n*h) else if (kernel == "epanech") 0.5*dbeta(0.5*(lvec*delta+1), 2, 2)/(n*h) else if (kernel == "biweight") 0.5*dbeta(0.5*(lvec*delta+1), 3, 3)/(n*h) else if (kernel == "triweight") 0.5*dbeta(0.5*(lvec*delta+1), 4, 4)/(n*h) ## Now combine weight and counts to obtain estimate ## we need P >= 2L+1L, M: L <= M. P <- 2^(ceiling(log(M+L+1L)/log(2))) kappa <- c(kappa, rep(0, P-2L*L-1L), rev(kappa[-1L])) tot <- sum(kappa) * (b-a)/(M-1L) * n # should have total weight one gcounts <- c(gcounts, rep(0L, P-M)) kappa <- fft(kappa/tot) gcounts <- fft(gcounts) list(x = gpoints, y = (Re(fft(kappa*gcounts, TRUE))/P)[1L:M]) } bkde2D <- function(x, bandwidth, gridsize = c(51L, 51L), range.x, truncate = TRUE) { ## Install safeguard against non-positive bandwidths: if (!missing(bandwidth) && min(bandwidth) <= 0) stop("'bandwidth' must be strictly positive") ## Rename common variables n <- nrow(x) M <- gridsize h <- bandwidth tau <- 3.4 # For bivariate normal kernel. ## Use same bandwidth in each direction ## if only a single bandwidth is given. if (length(h) == 1L) h <- c(h, h) ## If range.x is not specified then set it at its default value. if (missing(range.x)) { range.x <- list(0, 0) for (id in (1L:2L)) range.x[[id]] <- c(min(x[, id])-1.5*h[id], max(x[, id])+1.5*h[id]) } a <- c(range.x[[1L]][1L], range.x[[2L]][1L]) b <- c(range.x[[1L]][2L], range.x[[2L]][2L]) ## Set up grid points and bin the data gpoints1 <- seq(a[1L], b[1L], length = M[1L]) gpoints2 <- seq(a[2L], b[2L], length = M[2L]) gcounts <- linbin2D(x, gpoints1, gpoints2) ## Compute kernel weights L <- numeric(2L) kapid <- list(0, 0) for (id in 1L:2L) { L[id] <- min(floor(tau*h[id]*(M[id]-1)/(b[id]-a[id])), M[id] - 1L) lvecid <- 0:L[id] facid <- (b[id] - a[id])/(h[id]*(M[id]-1L)) z <- matrix(dnorm(lvecid*facid)/h[id]) tot <- sum(c(z, rev(z[-1L]))) * facid * h[id] kapid[[id]] <- z/tot } kapp <- kapid[[1L]] %*% (t(kapid[[2L]]))/n if (min(L) == 0) warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") ## Now combine weight and counts using the FFT to obtain estimate P <- 2^(ceiling(log(M+L)/log(2))) # smallest powers of 2 >= M+L L1 <- L[1L] ; L2 <- L[2L] M1 <- M[1L] ; M2 <- M[2L] P1 <- P[1L] ; P2 <- P[2L] rp <- matrix(0, P1, P2) rp[1L:(L1+1), 1L:(L2+1)] <- kapp if (L1) rp[(P1-L1+1):P1, 1L:(L2+1)] <- kapp[(L1+1):2, 1L:(L2+1)] if (L2) rp[, (P2-L2+1):P2] <- rp[, (L2+1):2] ## wrap-around version of "kapp" sp <- matrix(0, P1, P2) sp[1L:M1, 1L:M2] <- gcounts ## zero-padded version of "gcounts" rp <- fft(rp) # Obtain FFT's of r and s sp <- fft(sp) rp <- Re(fft(rp*sp, inverse = TRUE)/(P1*P2))[1L:M1, 1L:M2] ## invert element-wise product of FFT's ## and truncate and normalise it ## Ensure that rp is non-negative rp <- rp * matrix(as.numeric(rp>0), nrow(rp), ncol(rp)) list(x1 = gpoints1, x2 = gpoints2, fhat = rp) } bkfe <- function(x, drv, bandwidth, gridsize = 401L, range.x, binned = FALSE, truncate = TRUE) { ## Install safeguard against non-positive bandwidths: if (!missing(bandwidth) && bandwidth <= 0) stop("'bandwidth' must be strictly positive") if (missing(range.x) && !binned) range.x <- c(min(x), max(x)) ## Rename variables M <- gridsize a <- range.x[1L] b <- range.x[2L] h <- bandwidth ## Bin the data if not already binned if (!binned) { gpoints <- seq(a, b, length = gridsize) gcounts <- linbin(x, gpoints, truncate) } else { gcounts <- x M <- length(gcounts) gpoints <- seq(a, b, length = M) } ## Set the sample size and bin width n <- sum(gcounts) delta <- (b-a)/(M-1) ## Obtain kernel weights tau <- 4 + drv L <- min(floor(tau*h/delta), M) if (L == 0) warning("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") lvec <- 0L:L arg <- lvec*delta/h kappam <- dnorm(arg)/(h^(drv+1)) hmold0 <- 1 hmold1 <- arg hmnew <- 1 if (drv >= 2L) for (i in (2L:drv)) { hmnew <- arg*hmold1 - (i-1)*hmold0 hmold0 <- hmold1 # Compute mth degree Hermite polynomial hmold1 <- hmnew # by recurrence. } kappam <- hmnew * kappam ## Now combine weights and counts to obtain estimate ## we need P >= 2L+1L, M: L <= M. P <- 2^(ceiling(log(M+L+1L)/log(2))) kappam <- c(kappam, rep(0, P-2L*L-1L), rev(kappam[-1L])) Gcounts <- c(gcounts, rep(0, P-M)) kappam <- fft(kappam) Gcounts <- fft(Gcounts) sum(gcounts * (Re(fft(kappam*Gcounts, TRUE))/P)[1L:M] )/(n^2) } ## For obtaining preliminary estimates of ## quantities required for the "direct plug-in" ## regression bandwidth selector based on ## blocked qth degree polynomial fits. blkest <- function(x, y, Nval, q) { n <- length(x) ## Sort the (x, y) data with respect to ## the x's. datmat <- cbind(x, y) datmat <- datmat[sort.list(datmat[, 1L]), ] x <- datmat[, 1L] y <- datmat[, 2L] ## Set up arrays for FORTRAN programme "blkest" qq <- q + 1L xj <- rep(0, n) yj <- rep(0, n) coef <- rep(0, qq) Xmat <- matrix(0, n, qq) wk <- rep(0, n) qraux <- rep(0, qq) sigsqe <- 0 th22e <- 0 th24e <- 0 out <- .Fortran(F_blkest, as.double(x), as.double(y), as.integer(n), as.integer(q), as.integer(qq), as.integer(Nval), as.double(xj), as.double(yj), as.double(coef), as.double(Xmat), as.double(wk), as.double(qraux), as.double(sigsqe), as.double(th22e), as.double(th24e)) list(sigsqe = out[[13]], th22e = out[[14]], th24e = out[[15]]) } ## Chooses the number of blocks for the premilinary ## step of a plug-in rule using Mallows' C_p. cpblock <- function(X, Y, Nmax, q) { n <- length(X) ## Sort the (X, Y) data with respect tothe X's. datmat <- cbind(X, Y) datmat <- datmat[sort.list(datmat[, 1L]), ] X <- datmat[, 1L] Y <- datmat[, 2L] ## Set up arrays for FORTRAN subroutine "cp" qq <- q + 1L RSS <- rep(0, Nmax) Xj <- rep(0, n) Yj <- rep(0, n) coef <- rep(0, qq) Xmat <- matrix(0, n, qq) Cpvals <- rep(0, Nmax) wk <- rep(0, n) qraux <- rep(0, qq) ## remove unused 'q' 2007-07-10 out <- .Fortran(F_cp, as.double(X), as.double(Y), as.integer(n), as.integer(qq), as.integer(Nmax), as.double(RSS), as.double(Xj), as.double(Yj), as.double(coef), as.double(Xmat), as.double(wk), as.double(qraux), Cpvals = as.double(Cpvals)) Cpvec <- out$Cpvals order(Cpvec)[1L] } dpih <- function(x, scalest = "minim", level = 2L, gridsize = 401L, range.x = range(x), truncate = TRUE) { if (level > 5L) stop("Level should be between 0 and 5") ## Rename variables n <- length(x) M <- gridsize a <- range.x[1L] b <- range.x[2L] ## Set up grid points and bin the data gpoints <- seq(a, b, length = M) gcounts <- linbin(x, gpoints, truncate) ## Compute scale estimate scalest <- match.arg(scalest, c("minim", "stdev", "iqr")) scalest <- switch(scalest, "stdev" = sqrt(var(x)), "iqr"= (quantile(x, 3/4)-quantile(x, 1/4))/1.349, "minim" = min((quantile(x, 3/4)-quantile(x, 1/4))/1.349, sqrt(var(x))) ) if (scalest == 0) stop("scale estimate is zero for input data") ## Replace input data by standardised data for numerical stability: sx <- (x-mean(x))/scalest sa <- (a-mean(x))/scalest ; sb <- (b-mean(x))/scalest ## Set up grid points and bin the data: gpoints <- seq(sa, sb, length = M) gcounts <- linbin(sx, gpoints, truncate) ## delta <- (sb-sa)/(M - 1) ## Perform plug-in steps hpi <- if (level == 0L) (24*sqrt(pi)/n)^(1/3) else if (level == 1L) { alpha <- (2/(3*n))^(1/5)*sqrt(2) # bandwidth for psi_2 psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE) (6/(-psi2hat*n))^(1/3) } else if (level == 2L) { alpha <- ((2/(5*n))^(1/7))*sqrt(2) # bandwidth for psi_4 psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2 psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE) (6/(-psi2hat*n))^(1/3) } else if (level == 3L) { alpha <- ((2/(7*n))^(1/9))*sqrt(2) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2 psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE) (6/(-psi2hat*n))^(1/3) } else if (level == 4L) { alpha <- ((2/(9*n))^(1/11))*sqrt(2) # bandwidth for psi_8 psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2 psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE) (6/(-psi2hat*n))^(1/3) } else if (level == 5L) { alpha <- ((2/(11*n))^(1/13))*sqrt(2) # bandwidth for psi_10 psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8 psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 psi4hat <- bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (sqrt(2/pi)/(psi4hat*n))^(1/5) # bandwidth for psi_2 psi2hat <- bkfe(gcounts, 2L, alpha, range.x = c(sa, sb), binned = TRUE) (6/(-psi2hat*n))^(1/3) } scalest * hpi } dpik <- function(x, scalest = "minim", level = 2L, kernel = "normal", canonical = FALSE, gridsize = 401L, range.x = range(x), truncate = TRUE) { if (level > 5L) stop("Level should be between 0 and 5") kernel <- match.arg(kernel, c("normal", "box", "epanech", "biweight", "triweight")) ## Set kernel constants del0 <- if (canonical) 1 else switch(kernel, "normal" = 1/((4*pi)^(1/10)), "box" = (9/2)^(1/5), "epanech" = 15^(1/5), "biweight" = 35^(1/5), "triweight" = (9450/143)^(1/5)) ## Rename variables n <- length(x) M <- gridsize a <- range.x[1L] b <- range.x[2L] ## Set up grid points and bin the data gpoints <- seq(a, b, length = M) gcounts <- linbin(x, gpoints, truncate) ## Compute scale estimate scalest <- match.arg(scalest, c("minim", "stdev", "iqr")) scalest <- switch(scalest, "stdev" = sqrt(var(x)), "iqr"= (quantile(x, 3/4)-quantile(x, 1/4))/1.349, "minim" = min((quantile(x, 3/4)-quantile(x, 1/4))/1.349, sqrt(var(x))) ) if (scalest == 0) stop("scale estimate is zero for input data") ## Replace input data by standardised data for numerical stability: sx <- (x-mean(x))/scalest sa <- (a-mean(x))/scalest ; sb <- (b-mean(x))/scalest ## Set up grid points and bin the data: gpoints <- seq(sa, sb, length = M) gcounts <- linbin(sx, gpoints, truncate) ## delta <- (sb-sa)/(M-1) ## Perform plug-in steps: psi4hat <- if (level == 0L) 3/(8*sqrt(pi)) else if (level == 1L) { alpha <- (2*(sqrt(2))^7/(5*n))^(1/7) # bandwidth for psi_4 bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) } else if (level == 2L) { alpha <- (2*(sqrt(2))^9/(7*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) } else if (level == 3L) { alpha <- (2*(sqrt(2))^11/(9*n))^(1/11) # bandwidth for psi_8 psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) } else if (level == 4L) { alpha <- (2*(sqrt(2))^13/(11*n))^(1/13) # bandwidth for psi_10 psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8 psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) } else if (level == 5L) { alpha <- (2*(sqrt(2))^15/(13*n))^(1/15) # bandwidth for psi_12 psi12hat <- bkfe(gcounts, 12L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (945*sqrt(2/pi)/(psi12hat*n))^(1/13) # bandwidth for psi_10 psi10hat <- bkfe(gcounts, 10L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-105*sqrt(2/pi)/(psi10hat*n))^(1/11) # bandwidth for psi_8 psi8hat <- bkfe(gcounts, 8L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (15*sqrt(2/pi)/(psi8hat*n))^(1/9) # bandwidth for psi_6 psi6hat <- bkfe(gcounts, 6L, alpha, range.x = c(sa, sb), binned = TRUE) alpha <- (-3*sqrt(2/pi)/(psi6hat*n))^(1/7) # bandwidth for psi_4 bkfe(gcounts, 4L, alpha, range.x = c(sa, sb), binned = TRUE) } scalest * del0 * (1/(psi4hat*n))^(1/5) } ## Computes a direct plug-in selector of the ## bandwidth for local linear regression as ## described in the 1996 J. Amer. Statist. Assoc. ## paper by Ruppert, Sheather and Wand. dpill <- function(x, y, blockmax = 5, divisor = 20, trim = 0.01, proptrun = 0.05, gridsize = 401L, range.x = range(x), truncate = TRUE) { ## Trim the 100(trim)% of the data from each end (in the x-direction). xy <- cbind(x, y) xy <- xy[sort.list(xy[, 1L]), ] x <- xy[, 1L] y <- xy[, 2L] indlow <- floor(trim*length(x)) + 1 indupp <- length(x) - floor(trim*length(x)) x <- x[indlow:indupp] y <- y[indlow:indupp] ## Rename common parameters n <- length(x) M <- gridsize a <- range.x[1L] b <- range.x[2L] ## Bin the data gpoints <- seq(a, b, length = M) out <- rlbin(x, y, gpoints, truncate) xcounts <- out$xcounts ycounts <- out$ycounts ## Choose the value of N using Mallow's C_p Nmax <- max(min(floor(n/divisor), blockmax), 1) Nval <- cpblock(x, y, Nmax, 4) ## Estimate sig^2, theta_22 and theta_24 using quartic fits ## on "Nval" blocks. out <- blkest(x, y, Nval, 4) sigsqQ <- out$sigsqe th24Q <- out$th24e ## Estimate theta_22 using a local cubic fit ## with a "rule-of-thumb" bandwidth: "gamseh" gamseh <- (sigsqQ*(b-a)/(abs(th24Q)*n)) if (th24Q < 0) gamseh <- (3*gamseh/(8*sqrt(pi)))^(1/7) if (th24Q > 0) gamseh <- (15*gamseh/(16*sqrt(pi)))^(1/7) mddest <- locpoly(xcounts, ycounts, drv=2L, bandwidth=gamseh, range.x=range.x, binned=TRUE)$y llow <- floor(proptrun*M) + 1 lupp <- M - floor(proptrun*M) th22kn <- sum((mddest[llow:lupp]^2)*xcounts[llow:lupp])/n ## Estimate sigma^2 using a local linear fit ## with a "direct plug-in" bandwidth: "lamseh" C3K <- (1/2) + 2*sqrt(2) - (4/3)*sqrt(3) C3K <- (4*C3K/(sqrt(2*pi)))^(1/9) lamseh <- C3K*(((sigsqQ^2)*(b-a)/((th22kn*n)^2))^(1/9)) ## Now compute a local linear kernel estimate of ## the variance. mest <- locpoly(xcounts, ycounts, bandwidth=lamseh, range.x=range.x, binned=TRUE)$y Sdg <- sdiag(xcounts, bandwidth=lamseh, range.x=range.x, binned=TRUE)$y SSTdg <- sstdiag(xcounts, bandwidth=lamseh, range.x=range.x, binned=TRUE)$y sigsqn <- sum(y^2) - 2*sum(mest*ycounts) + sum((mest^2)*xcounts) sigsqd <- n - 2*sum(Sdg*xcounts) + sum(SSTdg*xcounts) sigsqkn <- sigsqn/sigsqd ## Combine to obtain final answer. (sigsqkn*(b-a)/(2*sqrt(pi)*th22kn*n))^(1/5) } ## For application of linear binning to a univariate data set. linbin <- function(X, gpoints, truncate = TRUE) { n <- length(X) M <- length(gpoints) trun <- if (truncate) 1L else 0L a <- gpoints[1L] b <- gpoints[M] .Fortran(F_linbin, as.double(X), as.integer(n), as.double(a), as.double(b), as.integer(M), as.integer(trun), double(M))[[7]] } ## Creates the grid counts from a bivariate data set X ## over an equally-spaced set of grid points ## contained in "gpoints" using the linear ## binning strategy. Note that the FORTRAN subroutine ## "lbtwod" is called. linbin2D <- function(X, gpoints1, gpoints2) { n <- nrow(X) X <- c(X[, 1L], X[, 2L]) M1 <- length(gpoints1) M2 <- length(gpoints2) a1 <- gpoints1[1L] a2 <- gpoints2[1L] b1 <- gpoints1[M1] b2 <- gpoints2[M2] out <- .Fortran(F_lbtwod, as.double(X), as.integer(n), as.double(a1), as.double(a2), as.double(b1), as.double(b2), as.integer(M1), as.integer(M2), double(M1*M2)) matrix(out[[9L]], M1, M2) } ## For computing a binned local polynomial ## regression estimator of a univariate regression ## function or its derivative. ## The data are discretised on an equally ## spaced grid. The bandwidths are discretised on a ## logarithmically spaced grid. locpoly <- function(x, y, drv = 0L, degree, kernel = "normal", bandwidth, gridsize = 401L, bwdisc = 25, range.x, binned = FALSE, truncate = TRUE) { ## Install safeguard against non-positive bandwidths: if (!missing(bandwidth) && bandwidth <= 0) stop("'bandwidth' must be strictly positive") drv <- as.integer(drv) if (missing(degree)) degree <- drv + 1L else degree <- as.integer(degree) if (missing(range.x) && !binned) if (missing(y)) { extra <- 0.05*(max(x) - min(x)) range.x <- c(min(x)-extra, max(x)+extra) } else range.x <- c(min(x), max(x)) ## Rename common variables M <- gridsize Q <- as.integer(bwdisc) a <- range.x[1L] b <- range.x[2L] pp <- degree + 1L ppp <- 2L*degree + 1L tau <- 4 ## Decide whether a density estimate or regressionestimate is required. if (missing(y)) { # obtain density estimate y <- NULL n <- length(x) gpoints <- seq(a, b, length = M) xcounts <- linbin(x, gpoints, truncate) ycounts <- (M-1)*xcounts/(n*(b-a)) xcounts <- rep(1, M) } else { # obtain regression estimate ## Bin the data if not already binned if (!binned) { gpoints <- seq(a, b, length = M) out <- rlbin(x, y, gpoints, truncate) xcounts <- out$xcounts ycounts <- out$ycounts } else { xcounts <- x ycounts <- y M <- length(xcounts) gpoints <- seq(a, b, length = M) } } ## Set the bin width delta <- (b-a)/(M-1L) ## Discretise the bandwidths if (length(bandwidth) == M) { hlow <- sort(bandwidth)[1L] hupp <- sort(bandwidth)[M] hdisc <- exp(seq(log(hlow), log(hupp), length = Q)) ## Determine value of L for each member of "hdisc" Lvec <- floor(tau*hdisc/delta) ## Determine index of closest entry of "hdisc" ## to each member of "bandwidth" indic <- if (Q > 1L) { lhdisc <- log(hdisc) gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1) if (gap == 0) rep(1, M) else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1) } else rep(1, M) } else if (length(bandwidth) == 1L) { indic <- rep(1, M) Q <- 1L Lvec <- rep(floor(tau*bandwidth/delta), Q) hdisc <- rep(bandwidth, Q) } else stop("'bandwidth' must be a scalar or an array of length 'gridsize'") if (min(Lvec) == 0) stop("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") ## Allocate space for the kernel vector and final estimate dimfkap <- 2L * sum(Lvec) + Q fkap <- rep(0, dimfkap) curvest <- rep(0, M) midpts <- rep(0, Q) ss <- matrix(0, M, ppp) tt <- matrix(0, M, pp) Smat <- matrix(0, pp, pp) Tvec <- rep(0, pp) ipvt <- rep(0, pp) ## Call FORTRAN routine "locpol" out <- .Fortran(F_locpol, as.double(xcounts), as.double(ycounts), as.integer(drv), as.double(delta), as.double(hdisc), as.integer(Lvec), as.integer(indic), as.integer(midpts), as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp), as.integer(ppp), as.double(ss), as.double(tt), as.double(Smat), as.double(Tvec), as.integer(ipvt), as.double(curvest)) curvest <- gamma(drv+1) * out[[19L]] list(x = gpoints, y = curvest) } ## For application of linear binning to a regression ## data set. rlbin <- function(X, Y, gpoints, truncate = TRUE) { n <- length(X) M <- length(gpoints) trun <- if (truncate) 1L else 0L a <- gpoints[1L] b <- gpoints[M] out <- .Fortran(F_rlbin, as.double(X), as.double(Y), as.integer(n), as.double(a), as.double(b), as.integer(M), as.integer(trun), double(M), double(M)) list(xcounts = out[[8L]], ycounts = out[[9L]]) } ## For computing the binned diagonal entries of a smoother ## matrix for local polynomial kernel regression. sdiag <- function(x, drv = 0L, degree = 1L, kernel = "normal", bandwidth, gridsize = 401L, bwdisc = 25, range.x, binned = FALSE, truncate = TRUE) { if (missing(range.x) && !binned) range.x <- c(min(x), max(x)) ## Rename common variables M <- gridsize Q <- as.integer(bwdisc) a <- range.x[1L] b <- range.x[2L] pp <- degree + 1L ppp <- 2L*degree + 1L tau <- 4 ## Bin the data if not already binned if (!binned) { gpoints <- seq(a, b, length = M) xcounts <- linbin(x, gpoints, truncate) } else { xcounts <- x M <- length(xcounts) gpoints <- seq(a, b, length = M) } ## Set the bin width delta <- (b-a)/(M-1L) ## Discretise the bandwidths if (length(bandwidth) == M) { hlow <- sort(bandwidth)[1L] hupp <- sort(bandwidth)[M] hdisc <- exp(seq(log(hlow), log(hupp), length = Q)) ## Determine value of L for each member of "hdisc" Lvec <- floor(tau*hdisc/delta) ## Determine index of closest entry of "hdisc" ## to each member of "bandwidth" indic <- if (Q > 1L) { lhdisc <- log(hdisc) gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1) if (gap == 0) rep(1, M) else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1) } else rep(1, M) } else if (length(bandwidth) == 1L) { indic <- rep(1, M) Q <- 1L Lvec <- rep(floor(tau*bandwidth/delta), Q) hdisc <- rep(bandwidth, Q) } else stop("'bandwidth' must be a scalar or an array of length 'gridsize'") dimfkap <- 2L * sum(Lvec) + Q fkap <- rep(0, dimfkap) midpts <- rep(0, Q) ss <- matrix(0, M, ppp) Smat <- matrix(0, pp, pp) work <- rep(0, pp) det <- rep(0, 2L) ipvt <- rep(0, pp) Sdg <- rep(0, M) out <- .Fortran(F_sdiag, as.double(xcounts), as.double(delta), as.double(hdisc), as.integer(Lvec), as.integer(indic), as.integer(midpts), as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp), as.integer(ppp), as.double(ss), as.double(Smat), as.double(work), as.double(det), as.integer(ipvt), as.double(Sdg)) list(x = gpoints, y = out[[17L]]) } ## For computing the binned diagonal entries of SS^T ## where S is a smoother matrix for local polynomial ## kernel regression. sstdiag <- function(x, drv = 0L, degree = 1L, kernel = "normal", bandwidth, gridsize = 401L, bwdisc = 25, range.x, binned = FALSE, truncate = TRUE) { if (missing(range.x) && !binned) range.x <- c(min(x), max(x)) ## Rename common variables M <- gridsize Q <- as.integer(bwdisc) a <- range.x[1L] b <- range.x[2L] pp <- degree + 1L ppp <- 2L*degree + 1L tau <- 4L ## Bin the data if not already binned if (!binned) { gpoints <- seq(a, b, length = M) xcounts <- linbin(x, gpoints, truncate) } else { xcounts <- x M <- length(xcounts) gpoints <- seq(a, b, length = M) } ## Set the bin width delta <- (b-a)/(M-1L) ## Discretise the bandwidths if (length(bandwidth) == M) { hlow <- sort(bandwidth)[1L] hupp <- sort(bandwidth)[M] hdisc <- exp(seq(log(hlow), log(hupp), length = Q)) ## Determine value of L for each member of "hdisc" Lvec <- floor(tau*hdisc/delta) ## Determine index of closest entry of "hdisc" ## to each member of "bandwidth" indic <- if (Q > 1L) { lhdisc <- log(hdisc) gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1) if (gap == 0) rep(1, M) else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1) } else rep(1, M) } else if (length(bandwidth) == 1L) { indic <- rep(1, M) Q <- 1L Lvec <- rep(floor(tau*bandwidth/delta), Q) hdisc <- rep(bandwidth, Q) } else stop("'bandwidth' must be a scalar or an array of length 'gridsize'") dimfkap <- 2L * sum(Lvec) + Q fkap <- rep(0, dimfkap) midpts <- rep(0, Q) ss <- matrix(0, M, ppp) uu <- matrix(0, M, ppp) Smat <- matrix(0, pp, pp) Umat <- matrix(0, pp, pp) work <- rep(0, pp) det <- rep(0, 2L) ipvt <- rep(0, pp) SSTd <- rep(0, M) SSTd <- .Fortran(F_sstdg, as.double(xcounts), as.double(delta), as.double(hdisc), as.integer(Lvec), as.integer(indic), as.integer(midpts), as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp), as.integer(ppp), as.double(ss), as.double(uu), as.double(Smat), as.double(Umat), as.double(work), as.double(det), as.integer(ipvt), as.double(SSTd))[[19L]] list(x = gpoints, y = SSTd) } .onAttach <- function(libname, pkgname) packageStartupMessage("KernSmooth 2.23 loaded\nCopyright M. P. Wand 1997-2009") .onUnload <- function(libpath) library.dynam.unload("KernSmooth", libpath)