#### lm, glm, aov, etc --- typically *strict* tests (no *.Rout.save) options(warn = 2, width = 101) # all warnings must be asserted below data(mtcars) mtcar2 <- within(mtcars, { mpg_c <- mpg * (1+am) + 5 am <- factor(am) }) fm2 <- glm(disp ~ am * mpg + mpg_c, data = mtcar2) c2 <- coef(fm2) V2 <- vcov(fm2) jj <- !is.na(c2) stopifnot(names(which(!jj)) == "am1:mpg" , identical(length(c2), 5L), identical(dim(V2), c(5L,5L)) , all.equal(c2[jj], coef(fm2, complete=FALSE)) , all.equal(V2[jj,jj], vcov(fm2, complete=FALSE)) , all.equal(c2[jj], c(`(Intercept)`= 626.0915, am1 = -249.4183, mpg = -33.74701, mpg_c = 10.97014), tol = 7e-7)# 1.01e-7 [F26 Lnx 64b] ) ### predict.lm(, newdata = *) -- PR#15072, PR#16158 -------------- ## constructed "exactly" rank-deficient x1 <- -4:4 x2 <- c(-2,1,-1,2, 0, 2,-1,1,-2) x3 <- 3*x1 - 2*x2 x4 <- x2 - x1 + 4 y <- 1 + x1 + x2 + x3 + x4 + c(-.5,.5,.5,-.5, 0, .5,-.5,-.5,.5) cbind(x1,x2,x3,x4,y) ## Fit a model mod1234 <- lm(y ~ x1 + x2 + x3 + x4) if(requireNamespace("MASS")) { (al <- alias(mod1234)) # x3: 3*x1 - 2*x2 \\ x4 : 4 -x1 +x2 stopifnot(all.equal(rbind(x3 = c(0, 3,-2), x4 = c(4, -1, 1)), unclass(al$Complete), check.attributes=FALSE)) } ## new.x new.x <- data.frame( row.names = LETTERS[1:6], x1 = c(3, 6, 6, 0, 0, 1), x2 = c(1, 2, 2, 0, 0, 2), x3 = c(7,14,14, 0, 0, 3), x4 = c(2, 4, 0, 4, 0, 4)) ## where do we have the same aliasing subspace? new.ok <- with(new.x, (x4 == 4 - x1 + x2) & (x3 == 3*x1 - 2*x2)) which(new.ok) # 1 3 4 ## old (hard-wired) R <= 4.2.x behavior: tools::assertWarning(ps <- predict(mod1234, newdata=new.x, rankdeficient = "simple"), verbose=TRUE) ps1 <- predict(mod1234, newdata=data.frame(x1,x2,x3,x4), rankdeficient = "warnif") # *not* warning anymore ## new (pN <- predict(mod1234, new.x, rankdeficient = "NA")) tools::assertWarning(pN.<- predict(mod1234, new.x, rankdeficient = "NAwarn")) ## "compromise": old predictions with extra info (no warning): (pne <- predict(mod1234, new.x, rankdeficient = "non-estim")) stopifnot(exprs = { identical(pN, pN.) all.equal(fitted(mod1234), ps1, tol = 2e-15) # seen 3.11e-16 identical(i.ne <- attr(pne, "non-estim"), c(B = 2L, E = 5L, F = 6L)) which(!new.ok) == i.ne is.na(pN[i.ne]) identical(ps[-i.ne], pN[-i.ne]) identical(unname(ps), `attributes<-`(pne, NULL)) }) d8 <- data.frame( y = c(747625803, -74936705, -750056726, -299805697, 76131520, -225971209, 301836031, 2249594776, 300581863, -2999324198, 450274906, -600962167, 1800954652, 900083298, -1498452810), X1 = c(149999999, -225000002, -149999999, 149999998, 225000002, -675000006, -149999998, 449999997, 900000008, -599999996, 1350000012, 299999996, -899999988, -449999994, -299999998), X2 = c(300000000.5, -149999999, -300000000.5, 1, 149999999, -449999997, -1, 900000001.5, 599999996, -1200000002, 899999994, 2, -6, -3, -600000001), X3 = c(-1, 149999998, 1, -150000002, -149999998, 449999994, 150000002, -3, -599999992, 4, -899999988, -300000004, 900000012, 450000006, 2)) coef(fm8. <- lm(y ~ . -1, data = d8)) # the one for X3 is NA cf8. <- c(X1 = -1.999854802642, X2 = 3.499496934397, X3 = NA) all.equal(cf8., coef(fm8.), tol=0)# -> "Mean rel..diff.: ~ 3e-15 stopifnot(all.equal(cf8., coef(fm8.))) coef(fm8.9 <- lm(y ~ . -1, data = d8, tol = 1e-9)) # no NA , but "instable" -- not too precise cf8.9 <- c(X1 = 45822.830422, X2 = -22908.915871, X3 = 45824.830295) all.equal(cf8.9, coef(fm8.9), tol=0)# -> "Mean rel..diff.: 5.3e-9 | 5.15e-12 ## was < 2e-8 in R 4.2.2 ## x86_64 Linux/gcc12 gives ca 5e-12 ## vanilla M1mac gives 6.16e-11, Accelerate on M1 macOS gives 3.99e-10; ## Debian with "generic" (i.e. not R's) BLAS/Lapack *still* gave 5.2985e-09 (?!) stopifnot(all.equal(cf8.9, coef(fm8.9), tol = 7e-9)) ## predict : nd <- d8[,-1] + rep(outer(c(-2:2),10^(1:3)), 3) # 5 * 9 = 45 = 15 * 3 (nrow * ncol) row.names(nd) <- LETTERS[1:nrow(nd)] tools::assertWarning(verbose=TRUE, # "... rank-deficient .. consider predict(., rankdeficient="NA") ps <- predict(fm8. , newdata=nd, rankdeficient = "simple") ) tools::assertWarning(verbose=TRUE, # "... rank-deficient .. attr(*, "non-estim") has doubtful cases ps.<- predict(fm8. , newdata=nd) ) # default pN <- predict(fm8. , newdata=nd, rankdeficient = "NA") pne <- predict(fm8. , newdata=nd, rankdeficient = "non-estim") p.9 <- predict(fm8.9, newdata=nd) print(digits=9, cbind(ps, pne, pN, p.9)) all.equal(p.9, ps, tol=0)# 0.035.. dropAtt <- function(x) `attributes<-`(x, NULL) stopifnot(exprs = { ps == ps. # numbers; identical(unname(ps), dropAtt(ps.)) identical(ps., pne) # both have "non-estim" identical(i.ne <- attr(pne, "non-estim"), c(K = 11L, L = 12L, N = 14L, O = 15L)) is.na(pN[i.ne]) identical(ps[-i.ne], pN[-i.ne]) }) ## play with tol str(tls <- sort(outer(c(1,2,4), 10^-(9:5)))) nT <- length(tls <- setNames(tls, formatC(tls))) pls <- t(sapply(tls, function(TL) predict(fm8. , newdata=nd, tol = TL, rankdeficient = "NA"))) stopifnot(is.finite(plsLst <- pls[nT,])) # (no NA) plsLst sweep(pls, 2L, plsLst, `-`) ## This *is* monotone in tol -- still somewhat amazing how much changes ## within two factors of 4 (of tol), i.e., between 2e-7 ... 4e-6 ## A B C D E F G H I J K L M N O ## 1e-09 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA ## 2e-09 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA ## 4e-09 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA ## 1e-08 NA NA 0 NA NA NA NA 0 NA NA NA NA NA NA NA ## 2e-08 NA NA 0 NA NA NA NA 0 NA NA NA NA 0 NA NA ## 4e-08 NA NA 0 0 NA NA NA 0 NA NA NA NA 0 NA NA ## 1e-07 0 0 0 0 0 NA NA 0 0 NA NA NA 0 NA NA ## 2e-07 0 0 0 0 0 NA NA 0 0 0 NA NA 0 NA NA ## 4e-07 0 0 0 0 0 0 NA 0 0 0 NA NA 0 NA NA ## 1e-06 0 0 0 0 0 0 0 0 0 0 NA NA 0 NA NA ## 2e-06 0 0 0 0 0 0 0 0 0 0 0 NA 0 0 NA ## 4e-06 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ## 1e-05 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ## 2e-05 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ## 4e-05 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 (iFi <- apply(pls, 2, function(.) which.max(is.finite(.)))) ## A B C D E F G H I J K L M N O ## 7 7 4 6 7 9 10 4 7 8 11 12 5 11 12 stopifnot(exprs = { ## checking monotonicity: each column is (NA NA ... NA | p_i p_i ... p_i) vapply(seq_along(tls), function(i) length(unique(pls[iFi[i]:nT, i])) == 1L, NA) ## allow 1 off : 3 <= iFi iFi <= 13 }) ## __FIXME__ ## predict(*, ... type="terms" .. ) does *not* obey rankdeficient=".." ##-------- dummy.coef() -- with "character"-factor --------------------------------------- ## [Bug 18635] New: dummy.coef could not deal with character variable // 9 Dec 2023 ## --------- https://bugs.r-project.org/show_bug.cgi?id=18635 ## for data generating model ch2num <- function(ch) vapply(ch, function(.) as.integer(charToRaw(.)), 1L, USE.NAMES=FALSE) ## test: str(print(ch2num(LETTERS)) - ch2num(letters)) set.seed(7) mydatC <- data.frame(x = sort(rnorm(49)), ch = c(LETTERS[1:3], letters[1:4])) mydatC$y <- with(mydatC, 20*x + 10 - (ch2num(ch) - 68) + rnorm(x)) str(mydatC) if(dev.interactive(TRUE)) ## visualize: plot(y ~ x, data=mydatC, col = factor(ch)) Sys.setlocale("LC_COLLATE", "C") mydatF <- mydatC; mydatF$ch <- factor(mydatC$ch) str(mydatF) ## $ ch: Factor w/ 7 levels "A","B","C","a",..: 1 2 3 4 5 6 7 1 2 3 ... (sfmCc <- summary(fmCc <- lm(y ~ ., data=mydatC))) sfmCf <- summary(fmCf <- lm(y ~ ., data=mydatF)) (ae.cf <- all.equal(sfmCc, sfmCf)) # only the call differs: ## [1] "Component “call”: target, current do not match when deparsed" stopifnot(length(ae.cf) == 1L, grepl("^Component .call.:", ae.cf)) coef(fmCc) ## (Intercept) x chB chC cha chb chc chd ## 12.7781626 19.8494272 -0.8240301 -1.3309157 -31.7032317 -32.8819084 -33.3519985 -34.6249161 (coef(fmCf) -> cf.f) # the same stopifnot(exprs = { identical(coef(fmCc), cf.f) }) (dummy.coef(fmCc) -> dc.Cc) ##-- was all wrong in R <= 4.3.2 ## (Intercept): 12.77816 ## x: 19.84943 ## ch: A B C a b c d ## 0.0000000 -0.8240301 -1.3309157 -31.7032317 -32.8819084 -33.3519985 -34.6249161 dummy.coef(fmCf) -> dc.Cf # the same all.equal15 <- function(x,y, ...) all.equal(x,y, tolerance = 1e-15, ...) stopifnot(exprs = { all.equal15(dc.Cc, dc.Cf) # *not* in R <= 4.3.2 ## coef() <--> dummy.coef() {was always true} length(dcCf <- unlist(dc.Cf)) == 1 + length(cf.f) is.character(names(dcCf) <- sub("[.]", "", names(dcCf))) all.equal15(dcCf[i2 <- 1:2], cf.f[i2], check.attributes = FALSE) all.equal15(dcCf[-i2], c(chA = 0, cf.f[-i2])) }) ##============= + 2 way interactions ============================================ fm2c <- lm(y ~ .^2, data=mydatC) cf2c <- coef(fm2c) (dc2c <- dummy.coef(fm2c)) # *wrong* in R <= 4.3.2 stopifnot(exprs = { length(dc2c <- unlist(dc2c)) == 2 + length(cf2c) # was false all.equal15(dc2c[1:2], cf2c[1:2], check.attributes = FALSE) is.character(names(dc2c) <- sub("[.]", "", names(dc2c))) all.equal15(dc2c[-(1:2)][1:7], c(chA = 0, cf2c[-(1:2)][1:6])) all.equal15(tail(dc2c, 7), c(`x:chA` = 0, tail(cf2c, 6))) }) fm2f <- lm(y ~ .^2, data=mydatF) # was always correct (dc2f <- dummy.coef(fm2f)) cf2f <- coef(fm2f) stopifnot(exprs = { ## were all TRUE before length(dc2f <- unlist(dc2f)) == 2 + length(cf2f) all.equal(dc2f[1:2], cf2f[1:2], check.attributes = FALSE) is.character(names(dc2f) <- sub("[.]", "", names(dc2f))) all.equal15(dc2f[-(1:2)][1:7], c(chA = 0, cf2f[-(1:2)][1:6])) all.equal15(tail(dc2f, 7), c(`x:chA` = 0, tail(cf2f, 6))) })