# This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # A copy of the GNU General Public License is available via WWW at # http://www.gnu.org/copyleft/gpl.html. You can also obtain it by # writing to the Free Software Foundation, Inc., 59 Temple Place, # Suite 330, Boston, MA 02111-1307 USA. # Copyrights (C) # for this R-port: # 1999 - 2008, Diethelm Wuertz, Rmetrics Foundation, GPL # Diethelm Wuertz # info@rmetrics.org # www.rmetrics.org # for the code accessed (or partly included) from other R-ports: # see R's copyright and license files # for the code accessed (or partly included) from contributed R-ports # and other sources # see Rmetrics's copyright file ################################################################################ # FUNCTION: TIME SERIES TESTS # lmTest Linear Modelling Test, select from: # bgTest Breusch-Godfrey Test # bpTest Breusch-Pagan Test # dwTest Durbin-Watson Test # gqTest Goldfeld-Quandt Test # harvTest Harvey-Collier Test # hmcTest Harrison-McCabe Test # rainTest Rainbow Test # resetTest Ramsey's RESET Test ################################################################################ ################################################################################ # BUILTIN - PACKAGE DESCRIPTION: # Package: lmtest # Title: Testing Linear Regression Models # Version: 0.9-3 # Date: $Date: 2003/02/19 15:54:30 $ # Author: Torsten Hothorn , # Achim Zeileis , David Mitchell # Maintainer: Achim Zeileis # Description: A collection of tests, data sets and examples # for diagnostic checking in linear regression models. # Depends: R (>= 1.4.0) # License: GPL ################################################################################ lmTest <- function(formula, method = c("bg", "bp", "dw", "gq", "harv", "hmc", "rain", "reset"), data = list(), ...) { # A function implemented by Diethelm Wuertz # FUNCTION: # Load Library: # Here we use a BUILTIN ... # require(lmtest) # Settings: method = match.arg(method) # DW: if (method == "dw") ans = .dwtest(formula = formula, data = data, ...) # BP: if (method == "bp") ans = .bptest(formula = formula, data = data, ...) # GQ: if (method == "gq") ans = .gqtest(formula = formula, data = data, ...) # HMC: if (method == "hmc") ans = .hmctest(formula = formula, data = data, ...) # HARV: if (method == "harv") ans = .harvtest(formula = formula, data = data, ...) # RAIN: if (method == "rain") ans = .raintest(formula = formula, data = data, ...) # RESET: if (method == "reset") ans = .reset(formula = formula, data = data, ...) # BG: if (method == "bg") ans = .bgtest(formula = formula, data = data, ...) # Return Result: ans } # ****************************************************************************** dwTest <- function(formula, alternative = c("greater", "two.sided", "less"), iterations = 15, exact = NULL, tol = 1.0e-10, data = list()) { .dwtest(formula, alternative, iterations, exact, tol, data) } # ------------------------------------------------------------------------------ bpTest <- function(formula, varformula = NULL, studentize = TRUE, data = list()) { .bptest(formula, varformula, studentize, data) } # ------------------------------------------------------------------------------ gqTest <- function(formula, point=0.5, order.by = NULL, data = list()) { .gqtest(formula, point, order.by, data) } # ------------------------------------------------------------------------------ hmcTest <- function(formula, point = 0.5, order.by = NULL, simulate.p = TRUE, nsim = 1000, plot = FALSE, data = list()) { .hmctest(formula, point, order.by, simulate.p, nsim, plot, data) } # ------------------------------------------------------------------------------ harvTest <- function(formula, order.by = NULL, data = list()) { .harvtest(formula, order.by, data) } # ------------------------------------------------------------------------------ rainTest = function(formula, fraction = 0.5, order.by = NULL, center = NULL, data = list()) { .raintest(formula, fraction, order.by, center, data) } # ------------------------------------------------------------------------------ resetTest <- function(formula, power = 2:3, type = c("fitted", "regressor", "princomp"), data = list()) { .reset(formula, power, type, data) } # ------------------------------------------------------------------------------ bgTest <- function(formula, order = 1, type = c("Chisq", "F"), data = list()) { .bgtest(formula, order, type, data) } ################################################################################ # Regression Builtin: # All names prefixed by a dot ... # Package: lmtest # Title: Testing Linear Regression Models # Version: 0.9-6 # Date: 2004-01-30 # Author: Torsten Hothorn , # Achim Zeileis , David Mitchell # Maintainer: Achim Zeileis # Description: A collection of tests, data sets and examples # for diagnostic checking in linear regression models. # Depends: R (>= 1.4.0) # Suggests: strucchange # License: GPL # ------------------------------------------------------------------------------ .dwtest <- function(formula, alternative = c("greater", "two.sided", "less"), iterations = 15, exact = NULL, tol = 1e-10, data = list()) { dname = paste(deparse(substitute(formula))) alternative = match.arg(alternative) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) n = nrow(X) if (is.null(exact)) exact = (n < 100) k = ncol(X) res = lm.fit(X,y)$residuals dw = sum(diff(res)^2)/sum(res^2) A = diag(c(1,rep(2, n-2), 1)) A[abs(row(A)-col(A))==1] = -1 Q1 = chol2inv(qr.R(qr(X))) if (exact) { MA = diag(rep(1,n)) - X %*% Q1 %*% t(X) MA = MA %*% A ev = eigen(MA)$values[1:(n-k)] if (any(Im(ev)>tol)) warning("imaginary parts of eigenvalues discarded") ev = Re(ev) ev = ev[ev > tol] pdw = function(dw) .Fortran("pan", as.double(c(dw,ev)), as.integer(length(ev)), as.double(0), as.integer(iterations), x=double(1), PACKAGE = "fMultivar")$x pval = switch(alternative, "two.sided" = (2*min(pdw(dw), 1-pdw(dw))), "less" = (1 - pdw(dw)), "greater" = pdw(dw)) if ((pval > 1) | (pval < 0)) { warning1 = "exact p value cannot be computed (not in [0,1])," warning2 = "approximate p value will be used" warning(paste(warning1, warning2)) exact = FALSE } } if (!exact) { XAXQ = t(X) %*% A %*% X %*% Q1 P = 2*(n-1) - sum(diag(XAXQ)) Q = 2*(3*n - 4) - 2* sum(diag(t(X) %*% A %*% A %*% X %*% Q1)) + sum( diag(XAXQ %*% XAXQ)) dmean = P/(n-k) dvar = 2/((n-k)*(n-k+2)) * (Q - P*dmean) pval = switch(alternative, "two.sided" = (2*pnorm(abs(dw-dmean), sd=sqrt(dvar), lower.tail = FALSE)), "less" = pnorm(dw, mean = dmean, sd = sqrt(dvar), lower.tail = FALSE), "greater" = pnorm(dw, mean = dmean, sd = sqrt(dvar))) } alternative = switch(alternative, "two.sided" = "true autocorelation is not 0", "less" = "true autocorrelation is less than 0", "greater" = "true autocorrelation is greater than 0") names(dw) = "DW" RVAL = list(statistic = dw, method = "Durbin-Watson test", alternative = alternative, p.value= pval, data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .bptest <- function(formula, varformula = NULL, studentize = TRUE, data = list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) resi = lm.fit(X,y)$residuals sigma2 = sum(resi^2)/n if (is.null(varformula)) varformula = formula Z = model.matrix(varformula, data = data) if (studentize) { w = resi^2 - sigma2 fv = lm.fit(Z,w)$fitted bp = n * sum(fv^2)/sum(w^2) method = "studentized Breusch-Pagan test" } else { f = resi^2/sigma2 -1 fv = lm.fit(Z,f)$fitted bp = 0.5 * sum(fv^2) method = "Breusch-Pagan test" } names(bp) = "BP" df = ncol(Z)-1 names(df) = "df" RVAL = list(statistic = bp, parameter = df, method = method, p.value= 1-pchisq(bp,df), data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .gqtest <- function(formula, point = 0.5, order.by = NULL, data = list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) if (point < 1) point = floor(point*n) if (point > n - k | point < k) stop("inadmissable breakpoint") if (!is.null(order.by)) { x = model.matrix(order.by, data = data) x = as.vector(x[,ncol(x)]) X = as.matrix(X[order(x),]) y = y[order(x)] } rss1 = sum(lm.fit(as.matrix(X[1:point,]),y[1:point])$residuals^2) rss2 = sum(lm.fit(as.matrix(X[(point+1):n,]),y[(point+1):n])$residuals^2) gq = (rss2/(n-point-k))/(rss1/(point-k)) df = c(n-point-k, point-k) names(df) = c("df1", "df2") PVAL = 1-pf(gq, df[1], df[2]) method = "Goldfeld-Quandt test" names(gq) = "GQ" RVAL = list(statistic = gq, parameter = df, method = method, p.value= PVAL, data.name = dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .hmctest <- function(formula, point = 0.5, order.by = NULL, simulate.p = TRUE, nsim = 1000, plot = FALSE, data = list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) if (point < 1) point = floor(point*n) if (point > n - k | point < k) stop("inadmissable breakpoint") if (!is.null(order.by)) { x = model.matrix(order.by, data = data) x = as.vector(x[,ncol(x)]) X = as.matrix(X[order(x),]) y = y[order(x)] } resi = lm.fit(X,y)$residuals hmc = sum(resi[1:point]^2)/sum(resi^2) if (plot) { stats = c(0,cumsum(resi^2))/sum(resi^2) stats = ts(stats, start=0, freq=n) plot(stats, xlab="fraction", ylab="Harrison-McCabe statistics", xaxs="i", yaxs="i") abline(0,1) } names(hmc) = "HMC" if (simulate.p) { stat = rep(0, nsim) for (i in 1:nsim) { x = rnorm(n) x = (x - mean(x))/sqrt(var(x)) stat[i] = sum(x[1:point]^2)/sum(x^2) } PVAL = mean(stat <= hmc) } else { PVAL = NA } RVAL = list(statistic = hmc, method = "Harrison-McCabe test", p.value= PVAL, data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .harvtest <- function(formula, order.by = NULL, data = list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) rec.res = function(X, y) { n = nrow(X) q = ncol(X) w = rep(0,(n-q)) Xr1 = X[1:q,,drop = FALSE] xr = as.vector(X[q+1, ]) X1 = chol2inv(qr.R(qr(Xr1))) fr = as.vector((1 + (t(xr) %*% X1 %*% xr))) betar = X1 %*%t(Xr1)%*% y[1:q] w[1] = (y[q+1] - t(xr) %*% betar)/sqrt(fr) for(r in ((q+2):n)) { X1 = X1 - (X1 %*% outer(xr, xr) %*% X1)/fr betar = betar + X1 %*% xr * w[r-q-1]*sqrt(fr) xr = as.vector(X[r, ]) fr = as.vector((1 + (t(xr) %*% X1 %*% xr))) w[r-q] = (y[r] - t(xr) %*% betar)/sqrt(fr) } return(w) } if (!is.null(order.by)) { x = model.matrix(order.by, data = data) x = as.vector(x[,ncol(x)]) X = as.matrix(X[order(x),]) y = y[order(x)] } resr = rec.res(X,y) sigma = sqrt(var(resr)*(length(resr)-1)/(n-k-1)) resr = resr / sigma harv = abs(sum(resr)/sqrt(n-k))/sqrt(var(resr)) names(harv) = "HC" df = n-k-1 names(df) = "df" RVAL = list(statistic = harv, parameter = df, method = "Harvey-Collier test", p.value= 2 * (1-pt(harv, n-k-1)), data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .raintest <- function(formula, fraction=0.5, order.by=NULL, center=NULL, data=list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) if (is.null(order.by)) { if (is.null(center)) center = 0.5 if (center > 1) center = center/n from = ceiling(quantile(1:n, probs=(center-fraction/2))) to = from + floor(fraction*n) - 1 } else if (!is.null(class(order.by)) && class(order.by)=="formula") { x = model.matrix(order.by, data = data) x = as.vector(x[,ncol(x)]) X = as.matrix(X[order(x),]) y = y[order(x)] if (is.null(center)) center = 0.5 if (center > 1) center = center/n from = ceiling(quantile(1:n, probs=(center-fraction/2))) to = from + floor(fraction*n) - 1 } else if (order.by == "mahalanobis") { if (is.null(center)) center = apply(X,2,mean) o = order(mahalanobis(X,center, chol2inv(qr.R(qr(X))), inverted = TRUE)) X = as.matrix(X[o,]) y = y[o] from = 1 to = floor(fraction*n) } else stop("order.by must be a formula, \"mahalanobis\" or NULL") subX = as.matrix(X[from:to,]) suby = y[from:to] n1 = nrow(subX) if (n1 < k) stop("not enough observations in subset") resi = lm.fit(X,y)$residuals subresi = lm.fit(subX, suby)$residuals sresi = sum(resi^2) sresi1 = sum(subresi^2) rain = ((sresi - sresi1)/(n-n1))/(sresi1/(n1-k)) names(rain) = "Rain" df = c((n-n1),(n1-k)) names(df) = c("df1","df2") RVAL = list(statistic = rain, parameter = df, method = "Rainbow test", p.value= as.vector(1-pf(rain, df[1], df[2])), data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .reset <- function(formula, power=2:3, type=c("fitted", "regressor", "princomp"), data=list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) X = model.matrix(formula, data = data) k = ncol(X) n = nrow(X) type = match.arg(type) switch(type, "fitted" = { y.hat = lm.fit(X,y)$fitted Z = matrix(t(sapply(y.hat, "^", power)), nrow=n) }, "regressor" = { Z = as.matrix(mf[,which(!sapply(mf,is.factor))[-1]]) Z = matrix(as.vector(t(sapply(as.vector(Z), "^", power))), nrow=n) }, "princomp" = { Z = as.matrix(mf[,which(!sapply(mf,is.factor))[-1]]) pc1 = as.matrix(eigen(cov(Z))$vectors)[,1] pc1 = as.vector(Z %*% pc1) Z = matrix(t(sapply(pc1, "^", power)), nrow=n) }) XZ = cbind(X, Z) q = ncol(Z) res1 = lm.fit(X,y)$residuals res2 = lm.fit(XZ,y)$residuals res1 = sum(res1^2) res2 = sum(res2^2) df1 = q df2 = n-(k+q) reset = (df2/df1) * ((res1 - res2) / res2) names(reset) = "RESET" df = c(df1, df2) names(df) = c("df1","df2") RVAL = list(statistic = reset, parameter = df, method = "RESET test", p.value= as.vector(1-pf(reset, df1, df2)), data.name=dname) class(RVAL) = "htest" return(RVAL) } # ------------------------------------------------------------------------------ .bgtest <- function(formula, order = 1, type = c("Chisq", "F"), data = list()) { dname = paste(deparse(substitute(formula))) mf = model.frame(formula, data = data) y = model.response(mf) modelterms = terms(formula, data = data) X = model.matrix(modelterms, data = data) n = nrow(X) k = ncol(X) order = 1:order m = length(order) resi = lm.fit(X,y)$residuals Z = sapply(order, function(x) c(rep(0, x), resi[1:(n-x)])) auxfit = lm.fit(cbind(X,Z), resi) switch(match.arg(type), "Chisq" = { bg = n * sum(auxfit$fitted^2)/sum(resi^2) p.val = pchisq(bg, m, lower.tail = FALSE) df = m names(df) = "df" }, "F" = { uresi = auxfit$residuals rresi = lm.fit(X,resi)$residuals bg = ((sum(rresi^2) - sum(uresi^2))/m) / (sum(uresi^2) / (n-k-m)) p.val = pf(bg, df1 = m, df2 = n-k, lower.tail = FALSE) df = c(m, n-k) names(df) = c("df1", "df2") }) names(bg) = "LM test" RVAL = list(statistic = bg, parameter = df, method = paste("Breusch-Godfrey test for serial correlation of order", max(order)), p.value = p.val, data.name = dname) class(RVAL) = "htest" return(RVAL) } ################################################################################