R Under development (unstable) (2022-03-19 r81942) -- "Unsuffered Consequences" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## For examples skipped in testing because they need recommended packages. > > ## This is skipped entirely on a Unix-alike if recommended packages are, > ## so for Windows > if(!require("MASS")) q() Loading required package: MASS > > pdf("reg-examples-3.pdf", encoding = "ISOLatin1.enc") > > ## From datasets > if(require("survival")) { + model3 <- clogit(case ~ spontaneous+induced+strata(stratum), data = infert) + print(summary(model3)) + detach("package:survival", unload = TRUE) # survival (conflicts) + } Loading required package: survival Call: coxph(formula = Surv(rep(1, 248L), case) ~ spontaneous + induced + strata(stratum), data = infert, method = "exact") n= 248, number of events= 83 coef exp(coef) se(coef) z Pr(>|z|) spontaneous 1.9859 7.2854 0.3524 5.635 1.75e-08 *** induced 1.4090 4.0919 0.3607 3.906 9.38e-05 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 exp(coef) exp(-coef) lower .95 upper .95 spontaneous 7.285 0.1373 3.651 14.536 induced 4.092 0.2444 2.018 8.298 Concordance= 0.776 (se = 0.044 ) Likelihood ratio test= 53.15 on 2 df, p=3e-12 Wald test = 31.84 on 2 df, p=1e-07 Score (logrank) test = 48.44 on 2 df, p=3e-11 > > > ## From grDevices > x1 <- matrix(rnorm(1e3), ncol = 2) > x2 <- matrix(rnorm(1e3, mean = 3, sd = 1.5), ncol = 2) > x <- rbind(x1, x2) > > dcols <- densCols(x) > graphics::plot(x, col = dcols, pch = 20, main = "n = 1000") > > > ## From graphics: > ## A largish data set > set.seed(123) > n <- 10000 > x1 <- matrix(rnorm(n), ncol = 2) > x2 <- matrix(rnorm(n, mean = 3, sd = 1.5), ncol = 2) > x <- rbind(x1, x2) > > oldpar <- par(mfrow = c(2, 2)) > smoothScatter(x, nrpoints = 0) > smoothScatter(x) > > ## a different color scheme: > Lab.palette <- colorRampPalette(c("blue", "orange", "red"), space = "Lab") > smoothScatter(x, colramp = Lab.palette) > > ## somewhat similar, using identical smoothing computations, > ## but considerably *less* efficient for really large data: > plot(x, col = densCols(x), pch = 20) > > ## use with pairs: > par(mfrow = c(1, 1)) > y <- matrix(rnorm(40000), ncol = 4) + 3*rnorm(10000) > y[, c(2,4)] <- -y[, c(2,4)] > pairs(y, panel = function(...) smoothScatter(..., nrpoints = 0, add = TRUE)) > > par(oldpar) > > > ## From stats > # alias.Rd > op <- options(contrasts = c("contr.helmert", "contr.poly")) > npk.aov <- aov(yield ~ block + N*P*K, npk) > alias(npk.aov) Model : yield ~ block + N * P * K Complete : (Intercept) block1 block2 block3 block4 block5 N1 P1 K1 N1:P1 N1:P1:K1 0 1 1/3 1/6 -3/10 -1/5 0 0 0 0 N1:K1 P1:K1 N1:P1:K1 0 0 > options(op) # reset > > # as.hclust.Rd > if(require("cluster", quietly = TRUE)) {# is a recommended package + set.seed(123) + x <- matrix(rnorm(30), ncol = 3) + hc <- hclust(dist(x), method = "complete") + ag <- agnes(x, method = "complete") + hcag <- as.hclust(ag) + ## The dendrograms order slightly differently: + op <- par(mfrow = c(1,2)) + plot(hc) ; mtext("hclust", side = 1) + plot(hcag); mtext("agnes", side = 1) + detach("package:cluster") + } > > # confint.Rd > counts <- c(18,17,15,20,10,20,25,13,12) > outcome <- gl(3, 1, 9); treatment <- gl(3, 3) > glm.D93 <- glm(counts ~ outcome + treatment, family = poisson()) > confint(glm.D93) Waiting for profiling to be done... 2.5 % 97.5 % (Intercept) 2.6958215 3.36655581 outcome2 -0.8577018 -0.06255840 outcome3 -0.6753696 0.08244089 treatment2 -0.3932548 0.39325483 treatment3 -0.3932548 0.39325483 > confint.default(glm.D93) # based on asymptotic normality} 2.5 % 97.5 % (Intercept) 2.7095672 3.37947764 outcome2 -0.8505027 -0.05800787 outcome3 -0.6707552 0.08478093 treatment2 -0.3919928 0.39199279 treatment3 -0.3919928 0.39199279 > > # contrasts.Rd > utils::example(factor) factor> (ff <- factor(substring("statistics", 1:10, 1:10), levels = letters)) [1] s t a t i s t i c s Levels: a b c d e f g h i j k l m n o p q r s t u v w x y z factor> as.integer(ff) # the internal codes [1] 19 20 1 20 9 19 20 9 3 19 factor> (f. <- factor(ff)) # drops the levels that do not occur [1] s t a t i s t i c s Levels: a c i s t factor> ff[, drop = TRUE] # the same, more transparently [1] s t a t i s t i c s Levels: a c i s t factor> factor(letters[1:20], labels = "letter") [1] letter1 letter2 letter3 letter4 letter5 letter6 letter7 letter8 [9] letter9 letter10 letter11 letter12 letter13 letter14 letter15 letter16 [17] letter17 letter18 letter19 letter20 20 Levels: letter1 letter2 letter3 letter4 letter5 letter6 letter7 ... letter20 factor> class(ordered(4:1)) # "ordered", inheriting from "factor" [1] "ordered" "factor" factor> z <- factor(LETTERS[3:1], ordered = TRUE) factor> ## and "relational" methods work: factor> stopifnot(sort(z)[c(1,3)] == range(z), min(z) < max(z)) factor> ## Don't show: factor> of <- ordered(ff) factor> stopifnot(identical(range(of, rev(of)), of[3:2]), factor+ identical(max(of), of[2])) factor> ## End(Don't show) factor> factor> ## suppose you want "NA" as a level, and to allow missing values. factor> (x <- factor(c(1, 2, NA), exclude = NULL)) [1] 1 2 Levels: 1 2 factor> is.na(x)[2] <- TRUE factor> x # [1] 1 [1] 1 Levels: 1 2 factor> is.na(x) [1] FALSE TRUE FALSE factor> # [1] FALSE TRUE FALSE factor> factor> ## More rational, since R 3.4.0 : factor> factor(c(1:2, NA), exclude = "" ) # keeps , as [1] 1 2 Levels: 1 2 factor> factor(c(1:2, NA), exclude = NULL) # always did [1] 1 2 Levels: 1 2 factor> ## exclude = factor> z # ordered levels 'A < B < C' [1] C B A Levels: A < B < C factor> factor(z, exclude = "C") # does exclude [1] B A Levels: A < B factor> factor(z, exclude = "B") # ditto [1] C A Levels: A < C factor> ## Now, labels maybe duplicated: factor> ## factor() with duplicated labels allowing to "merge levels" factor> x <- c("Man", "Male", "Man", "Lady", "Female") factor> ## Map from 4 different values to only two levels: factor> (xf <- factor(x, levels = c("Male", "Man" , "Lady", "Female"), factor+ labels = c("Male", "Male", "Female", "Female"))) [1] Male Male Male Female Female Levels: Male Female factor> #> [1] Male Male Male Female Female factor> #> Levels: Male Female factor> factor> ## Using addNA() factor> Month <- airquality$Month factor> table(addNA(Month)) 5 6 7 8 9 31 30 31 31 30 0 factor> table(addNA(Month, ifany = TRUE)) 5 6 7 8 9 31 30 31 31 30 > fff <- ff[, drop = TRUE] # reduce to 5 levels. > contrasts(fff) <- contr.sum(5)[, 1:2]; contrasts(fff) [,1] [,2] [,3] [,4] a 1 0 -0.2471257 0.2688164 c 0 1 -0.2471257 0.2688164 i 0 0 -0.1498721 -0.8817814 s 0 0 0.8912491 0.0753323 t -1 -1 -0.2471257 0.2688164 > > ## using sparse contrasts: % useful, once model.matrix() works with these : > ffs <- fff > contrasts(ffs) <- contr.sum(5, sparse = TRUE)[, 1:2]; contrasts(ffs) [,1] [,2] [,3] [,4] a 1 0 -0.2471257 0.2688164 c 0 1 -0.2471257 0.2688164 i 0 0 -0.1498721 -0.8817814 s 0 0 0.8912491 0.0753323 t -1 -1 -0.2471257 0.2688164 > stopifnot(all.equal(ffs, fff)) > contrasts(ffs) <- contr.sum(5, sparse = TRUE); contrasts(ffs) 5 x 4 sparse Matrix of class "dgCMatrix" a 1 . . . c . 1 . . i . . 1 . s . . . 1 t -1 -1 -1 -1 > > # glm.Rd > utils::data(anorexia, package = "MASS") > > anorex.1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt), + family = gaussian, data = anorexia) > summary(anorex.1) Call: glm(formula = Postwt ~ Prewt + Treat + offset(Prewt), family = gaussian, data = anorexia) Deviance Residuals: Min 1Q Median 3Q Max -14.1083 -4.2773 -0.5484 5.4838 15.2922 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 49.7711 13.3910 3.717 0.000410 *** Prewt -0.5655 0.1612 -3.509 0.000803 *** TreatCont -4.0971 1.8935 -2.164 0.033999 * TreatFT 4.5631 2.1333 2.139 0.036035 * --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for gaussian family taken to be 48.69504) Null deviance: 4525.4 on 71 degrees of freedom Residual deviance: 3311.3 on 68 degrees of freedom AIC: 489.97 Number of Fisher Scoring iterations: 2 > > # logLik.Rd > utils::data(Orthodont, package = "nlme") > fm1 <- lm(distance ~ Sex * age, Orthodont) > logLik(fm1) 'log Lik.' -239.1209 (df=5) > logLik(fm1, REML = TRUE) 'log Lik.' -241.7796 (df=5) > > # nls.Rd > od <- options(digits=5) > ## The muscle dataset in MASS is from an experiment on muscle > ## contraction on 21 animals. The observed variables are Strip > ## (identifier of muscle), Conc (Cacl concentration) and Length > ## (resulting length of muscle section). > utils::data(muscle, package = "MASS") > > ## The non linear model considered is > ## Length = alpha + beta*exp(-Conc/theta) + error > ## where theta is constant but alpha and beta may vary with Strip. > > with(muscle, table(Strip)) # 2, 3 or 4 obs per strip Strip S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 4 4 4 3 3 3 2 2 2 2 3 2 2 2 2 4 4 3 3 3 S21 3 > > ## We first use the plinear algorithm to fit an overall model, > ## ignoring that alpha and beta might vary with Strip. > > musc.1 <- nls(Length ~ cbind(1, exp(-Conc/th)), muscle, + start = list(th = 1), algorithm = "plinear") > ## IGNORE_RDIFF_BEGIN > summary(musc.1) Formula: Length ~ cbind(1, exp(-Conc/th)) Parameters: Estimate Std. Error t value Pr(>|t|) th 0.608 0.115 5.31 1.9e-06 *** .lin1 28.963 1.230 23.55 < 2e-16 *** .lin2 -34.227 3.793 -9.02 1.4e-12 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 4.67 on 57 degrees of freedom Number of iterations to convergence: 5 Achieved convergence tolerance: 9.32e-07 > ## IGNORE_RDIFF_END > > ## Then we use nls' indexing feature for parameters in non-linear > ## models to use the conventional algorithm to fit a model in which > ## alpha and beta vary with Strip. The starting values are provided > ## by the previously fitted model. > ## Note that with indexed parameters, the starting values must be > ## given in a list (with names): > b <- coef(musc.1) > musc.2 <- nls(Length ~ a[Strip] + b[Strip]*exp(-Conc/th), muscle, + start = list(a = rep(b[2], 21), b = rep(b[3], 21), th = b[1])) > ## IGNORE_RDIFF_BEGIN > summary(musc.2) Formula: Length ~ a[Strip] + b[Strip] * exp(-Conc/th) Parameters: Estimate Std. Error t value Pr(>|t|) a1 23.454 0.796 29.46 5.0e-16 *** a2 28.302 0.793 35.70 < 2e-16 *** a3 30.801 1.716 17.95 1.7e-12 *** a4 25.921 3.016 8.60 1.4e-07 *** a5 23.201 2.891 8.02 3.5e-07 *** a6 20.120 2.435 8.26 2.3e-07 *** a7 33.595 1.682 19.98 3.0e-13 *** a8 39.053 3.753 10.41 8.6e-09 *** a9 32.137 3.318 9.69 2.5e-08 *** a10 40.005 3.336 11.99 1.0e-09 *** a11 36.190 3.109 11.64 1.6e-09 *** a12 36.911 1.839 20.07 2.8e-13 *** a13 30.635 1.700 18.02 1.6e-12 *** a14 34.312 3.495 9.82 2.0e-08 *** a15 38.395 3.375 11.38 2.3e-09 *** a16 31.226 0.886 35.26 < 2e-16 *** a17 31.230 0.821 38.02 < 2e-16 *** a18 19.998 1.011 19.78 3.6e-13 *** a19 37.095 1.071 34.65 < 2e-16 *** a20 32.594 1.121 29.07 6.2e-16 *** a21 30.376 1.057 28.74 7.5e-16 *** b1 -27.300 6.873 -3.97 0.00099 *** b2 -26.270 6.754 -3.89 0.00118 ** b3 -30.901 2.270 -13.61 1.4e-10 *** b4 -32.238 3.810 -8.46 1.7e-07 *** b5 -29.941 3.773 -7.94 4.1e-07 *** b6 -20.622 3.647 -5.65 2.9e-05 *** b7 -19.625 8.085 -2.43 0.02661 * b8 -45.780 4.113 -11.13 3.2e-09 *** b9 -31.345 6.352 -4.93 0.00013 *** b10 -38.599 3.955 -9.76 2.2e-08 *** b11 -33.921 3.839 -8.84 9.2e-08 *** b12 -38.268 8.992 -4.26 0.00053 *** b13 -22.568 8.194 -2.75 0.01355 * b14 -36.167 6.358 -5.69 2.7e-05 *** b15 -32.952 6.354 -5.19 7.4e-05 *** b16 -47.207 9.540 -4.95 0.00012 *** b17 -33.875 7.688 -4.41 0.00039 *** b18 -15.896 6.222 -2.55 0.02051 * b19 -28.969 7.235 -4.00 0.00092 *** b20 -36.917 8.033 -4.60 0.00026 *** b21 -26.508 7.012 -3.78 0.00149 ** th 0.797 0.127 6.30 8.0e-06 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.11 on 17 degrees of freedom Number of iterations to convergence: 8 Achieved convergence tolerance: 2.17e-06 > ## IGNORE_RDIFF_END > options(od) > > # princomp.Rd > ## Robust: > (pc.rob <- princomp(stackloss, covmat = MASS::cov.rob(stackloss))) Call: princomp(x = stackloss, covmat = MASS::cov.rob(stackloss)) Standard deviations: Comp.1 Comp.2 Comp.3 Comp.4 7.8322873 4.0077676 1.9114016 0.7624211 4 variables and 21 observations. > > # termplot.R > library(MASS) > hills.lm <- lm(log(time) ~ log(climb)+log(dist), data = hills) > termplot(hills.lm, partial.resid = TRUE, smooth = panel.smooth, + terms = "log(dist)", main = "Original") > termplot(hills.lm, transform.x = TRUE, + partial.resid = TRUE, smooth = panel.smooth, + terms = "log(dist)", main = "Transformed") > > # xtabs.Rd > if(require("Matrix")) { + ## similar to "nlme"s 'ergoStool' : + d.ergo <- data.frame(Type = paste0("T", rep(1:4, 9*4)), + Subj = gl(9, 4, 36*4)) + print(xtabs(~ Type + Subj, data = d.ergo)) # 4 replicates each + set.seed(15) # a subset of cases: + print(xtabs(~ Type + Subj, data = d.ergo[sample(36, 10), ], sparse = TRUE)) + + ## Hypothetical two-level setup: + inner <- factor(sample(letters[1:25], 100, replace = TRUE)) + inout <- factor(sample(LETTERS[1:5], 25, replace = TRUE)) + fr <- data.frame(inner = inner, outer = inout[as.integer(inner)]) + print(xtabs(~ inner + outer, fr, sparse = TRUE)) + } Loading required package: Matrix Subj Type 1 2 3 4 5 6 7 8 9 T1 4 4 4 4 4 4 4 4 4 T2 4 4 4 4 4 4 4 4 4 T3 4 4 4 4 4 4 4 4 4 T4 4 4 4 4 4 4 4 4 4 4 x 9 sparse Matrix of class "dgCMatrix" Subj Type 1 2 3 4 5 6 7 8 9 T1 . 1 . . . 1 . . . T2 . . . . . . . 1 1 T3 . . 1 1 1 . 1 1 . T4 . 1 . . . . . . . 25 x 4 sparse Matrix of class "dgCMatrix" outer inner A B C E a . . . 3 b . . . 6 c . . 5 . d . 1 . . e . . 3 . f . . 5 . g . . 3 . h 6 . . . i 8 . . . j . 1 . . k . . . 6 l . 2 . . m . . . 7 n . . 3 . o . 3 . . p . 3 . . q . . . 7 r . . . 2 s . 6 . . t . . . 5 u 4 . . . v . . 3 . w . 3 . . x . . 3 . y 2 . . . > > ## From utils > example(packageDescription) pckgDs> ## No test: pckgDs> ##D packageDescription("stats") pckgDs> ##D packageDescription("stats", fields = c("Package", "Version")) pckgDs> ##D pckgDs> ##D packageDescription("stats", fields = "Version") pckgDs> ##D packageDescription("stats", fields = "Version", drop = FALSE) pckgDs> ##D pckgDs> ##D if(packageVersion("MASS") < "7.3.29") pckgDs> ##D message("you need to update 'MASS'") pckgDs> ## End(No test) pckgDs> pu <- packageDate("utils") pckgDs> ## No test: pckgDs> ##D str(pu) pckgDs> ## End(No test) pckgDs> stopifnot(identical(pu, packageDate(desc = packageDescription("utils"))), pckgDs+ identical(pu, packageDate("stats"))) # as "utils" and "stats" are pckgDs> # both 'base R' and "Built" at same time pckgDs> pckgDs> pckgDs> > > > ## From splines > library(splines) > Matrix::drop0(zapsmall(6*splineDesign(knots = 1:40, x = 4:37, sparse = TRUE))) 34 x 36 sparse Matrix of class "dgCMatrix" [1,] 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [2,] . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [3,] . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [4,] . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [5,] . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . . [6,] . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . . [7,] . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . . [8,] . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . . [9,] . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . . [10,] . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . . [11,] . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . . [12,] . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . . [13,] . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . . [14,] . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . . [15,] . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . . [16,] . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . . [17,] . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . . [18,] . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . . [19,] . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . . [20,] . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . . [21,] . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . . [22,] . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . . [23,] . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . . [24,] . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . . [25,] . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . . [26,] . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . . [27,] . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . . [28,] . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . . [29,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . . [30,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . . [31,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . . [32,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . . [33,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 . [34,] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1 4 1 >