R Under development (unstable) (2022-03-19 r81942) -- "Unsuffered Consequences" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## tests of the simulate.lm method, added Feb 2009 > > options(digits = 5) > > ## cases should be named > hills <- readRDS("hills.rds") # copied from package MASS > fit1 <- lm(time ~ dist, data = hills) > set.seed(1) > simulate(fit1, nsim = 3) sim_1 sim_2 sim_3 Greenmantle 3.4841 7.7039 25.4746 Carnethy 48.8068 37.2737 30.9745 Craig Dunain 28.4665 43.9584 57.3295 Ben Rha 89.4727 79.5895 38.9971 Ben Lomond 68.3785 77.0326 36.7858 Goatfell 45.4299 58.5197 67.6189 Bens of Jura 138.1736 123.3906 119.6004 Cairnpapple 59.8758 59.0504 45.1641 Scolty 48.3017 47.9202 38.2951 Traprain 39.0478 31.3974 33.3777 Lairig Ghru 258.5807 214.2935 217.0639 Dollar 44.5912 44.0871 34.1140 Lomonds 61.9013 89.6352 97.8082 Cairn Table 0.9461 42.9001 14.7382 Eildon Two 55.0951 50.2295 44.4990 Cairngorm 77.5672 86.4083 85.1081 Seven Hills 111.4626 99.5722 133.0006 Knock Hill 38.9856 26.9579 14.0804 Black Hill 49.0344 10.1091 40.0303 Creag Beag 52.8285 69.5738 46.3069 Kildcon Hill 38.4895 59.6709 9.3243 Meall Ant-Suidhe 39.9240 16.9877 48.4197 Half Ben Nevis 46.6300 24.3056 68.2987 Cow Hill -27.8787 23.1894 25.7935 N Berwick Law 32.5197 17.4555 51.8170 Creag Dubh 27.3610 76.4071 39.6261 Burnswark 42.0330 44.3590 19.6667 Largo Law 7.4616 50.5758 25.3716 Criffel 39.7654 49.8660 24.8692 Acmony 45.1519 21.9790 27.3645 Ben Nevis 105.5773 82.2313 66.0840 Knockfarrel 43.0908 9.1228 45.9825 Two Breweries 152.8438 174.3537 126.9294 Cockleroi 31.5726 35.7046 35.7999 Moffat Chase 134.2882 205.1244 148.7057 > > ## and weights should be taken into account > fit2 <- lm(time ~ -1 + dist + climb, hills[-18, ], weight = 1/dist^2) > coef(summary(fit2)) Estimate Std. Error t value Pr(>|t|) dist 4.8999847 0.4737032 10.3440 9.8468e-12 climb 0.0084718 0.0016869 5.0221 1.8636e-05 > set.seed(1); ( ys <- simulate(fit2, nsim = 3) ) sim_1 sim_2 sim_3 Greenmantle 15.754 13.355 18.247 Carnethy 51.988 47.396 67.247 Craig Dunain 30.614 34.000 40.672 Ben Rha 58.825 42.959 36.719 Ben Lomond 68.579 76.460 71.455 Goatfell 55.088 71.287 53.926 Bens of Jura 151.910 138.573 116.292 Cairnpapple 41.841 34.234 38.413 Scolty 34.958 35.733 28.443 Traprain 32.564 39.177 34.915 Lairig Ghru 209.113 130.333 157.652 Dollar 43.936 36.921 37.675 Lomonds 57.642 69.616 58.280 Cairn Table 16.646 39.532 32.599 Eildon Two 41.230 34.111 41.536 Cairngorm 73.841 85.681 54.935 Seven Hills 86.948 94.364 97.870 Black Hill 35.952 27.000 32.437 Creag Beag 37.808 34.432 39.509 Kildcon Hill 19.520 12.910 16.075 Meall Ant-Suidhe 33.970 36.271 31.514 Half Ben Nevis 54.038 63.231 50.087 Cow Hill 17.615 16.486 16.037 N Berwick Law 12.152 15.778 24.416 Creag Dubh 39.714 39.457 42.478 Burnswark 35.747 35.141 41.549 Largo Law 31.552 47.902 42.693 Criffel 34.452 46.350 51.317 Acmony 25.679 33.145 20.575 Ben Nevis 91.620 86.634 78.946 Knockfarrel 44.906 28.781 25.088 Two Breweries 129.888 136.598 121.358 Cockleroi 31.482 18.866 25.682 Moffat Chase 138.983 177.836 141.436 > for(i in seq_len(3)) + print(coef(summary(update(fit2, ys[, i] ~ .)))) Estimate Std. Error t value Pr(>|t|) dist 4.8759333 0.4295826 11.3504 9.3646e-13 climb 0.0091824 0.0015298 6.0023 1.0781e-06 Estimate Std. Error t value Pr(>|t|) dist 4.6969341 0.4406227 10.66 4.6417e-12 climb 0.0099795 0.0015691 6.36 3.8442e-07 Estimate Std. Error t value Pr(>|t|) dist 4.8215499 0.420077 11.4778 7.0162e-13 climb 0.0090388 0.001496 6.0422 9.6065e-07 > ## should be identical to glm(*, gaussian): > fit2. <- glm(time ~ -1 + dist + climb, family=gaussian, data=hills[-18, ], + weight = 1/dist^2) > set.seed(1); ys. <- simulate(fit2., nsim = 3) > stopifnot(all.equal(ys, ys.)) > > ## Poisson fit > load("anorexia.rda") # copied from package MASS > fit3 <- glm(Postwt ~ Prewt + Treat + offset(Prewt), + family = gaussian, data = anorexia) > coef(summary(fit3)) Estimate Std. Error t value Pr(>|t|) (Intercept) 49.77111 13.39096 3.7168 0.00041011 Prewt -0.56554 0.16118 -3.5087 0.00080343 TreatCont -4.09707 1.89349 -2.1638 0.03399931 TreatFT 4.56306 2.13334 2.1389 0.03603508 > set.seed(1) > simulate(fit3, nsim = 8) sim_1 sim_2 sim_3 sim_4 sim_5 sim_6 sim_7 sim_8 1 76.364 84.997 72.948 78.581 83.762 62.645 70.046 87.655 2 85.796 77.997 79.276 75.769 91.529 93.684 84.340 95.638 3 79.726 76.809 100.122 90.039 82.835 81.123 89.696 75.979 4 88.956 79.858 77.946 77.512 80.451 74.824 76.441 76.082 5 81.905 76.512 70.629 67.511 81.309 78.424 85.830 87.696 6 78.312 84.045 72.589 84.052 74.084 88.309 83.858 76.262 7 87.004 84.121 86.744 79.204 96.013 88.336 79.083 65.958 8 83.454 74.188 78.173 75.923 79.240 82.265 82.812 71.771 9 84.710 76.723 78.472 72.621 86.034 76.696 77.664 73.942 10 77.605 78.792 73.251 92.318 86.401 70.223 92.105 80.067 11 89.938 87.609 69.008 77.078 79.035 76.676 79.261 76.571 12 86.931 73.579 76.708 73.007 82.077 86.150 90.162 85.826 13 76.661 85.140 87.974 82.372 87.232 75.252 82.427 78.048 14 64.151 81.929 75.270 81.442 72.297 79.125 58.615 82.216 15 84.154 83.722 66.643 69.424 90.060 68.155 66.771 73.749 16 78.944 77.135 92.302 59.098 76.581 79.200 76.298 87.563 17 82.577 85.272 85.657 78.221 94.233 83.589 84.343 77.545 18 89.624 84.902 81.372 87.019 93.590 82.020 66.690 85.066 19 87.943 78.426 89.599 81.795 82.791 81.068 88.923 76.038 20 84.445 88.729 86.486 79.615 84.259 92.607 76.083 81.752 21 89.233 90.918 78.499 86.734 75.671 88.142 77.567 82.487 22 87.800 87.229 97.737 74.063 84.597 90.098 71.487 70.588 23 80.777 91.330 78.478 87.911 87.540 73.815 70.112 79.251 24 65.463 83.242 69.404 79.307 80.036 80.492 79.738 87.581 25 81.411 68.177 76.078 82.021 73.917 85.144 80.640 81.841 26 83.949 80.341 85.789 91.557 79.765 83.947 69.702 85.341 27 83.658 76.200 100.851 86.305 84.495 69.886 77.737 76.425 28 76.394 83.353 87.395 80.525 94.118 89.063 90.396 94.816 29 81.843 80.851 88.369 93.295 81.802 71.887 82.018 85.732 30 88.574 85.951 85.119 71.700 84.813 79.997 100.768 82.505 31 93.966 78.128 82.154 80.683 75.454 93.724 93.178 95.943 32 87.591 89.411 88.065 86.524 91.757 92.604 92.463 82.937 33 93.707 86.434 96.498 89.842 100.128 98.619 91.036 93.118 34 82.545 95.253 97.402 90.041 93.367 85.060 84.870 91.865 35 75.353 89.964 92.132 85.913 90.648 84.194 80.037 89.165 36 81.849 91.097 93.174 87.587 71.698 78.295 89.128 82.603 37 83.949 89.381 78.108 86.214 90.064 97.816 97.030 83.781 38 88.111 100.264 95.391 86.797 91.708 88.839 96.085 91.003 39 92.769 80.657 86.627 89.946 82.627 80.103 79.418 88.676 40 88.333 79.786 72.769 91.006 84.197 89.045 71.711 83.137 41 79.035 90.178 83.819 63.414 74.154 87.681 79.418 89.384 42 82.934 80.161 83.594 88.698 89.442 97.930 87.778 84.242 43 90.825 84.515 96.182 88.577 83.679 81.754 95.389 81.075 44 89.716 83.090 80.486 82.864 74.882 83.104 76.630 89.581 45 83.067 85.640 84.871 94.510 85.309 84.969 90.416 72.509 46 81.416 84.405 79.890 83.637 95.874 83.731 87.982 89.088 47 89.853 90.757 86.073 85.324 84.976 84.750 95.640 90.776 48 88.370 81.770 85.813 88.991 88.121 80.944 82.813 81.438 49 83.831 81.084 79.509 96.615 91.220 94.676 82.122 76.819 50 94.065 97.289 93.711 89.801 87.947 83.049 79.914 85.160 51 88.740 84.464 77.532 83.016 83.503 83.253 82.351 96.777 52 80.127 83.145 77.085 76.100 80.701 88.951 81.871 79.209 53 88.863 85.784 96.540 84.173 91.644 94.332 102.886 70.212 54 76.995 89.849 77.787 78.317 77.455 79.488 101.948 90.544 55 97.743 87.230 90.618 85.936 89.461 84.197 86.580 84.245 56 104.562 90.479 88.083 93.494 88.722 94.396 83.459 87.177 57 87.962 85.768 93.382 84.580 74.720 97.627 76.757 82.044 58 84.412 89.435 103.483 110.184 81.867 89.945 95.289 91.540 59 94.153 90.597 101.249 91.266 96.569 80.198 82.567 95.071 60 91.060 87.893 89.693 99.889 90.667 103.929 107.945 87.902 61 105.676 92.626 72.970 72.943 94.523 98.931 82.737 84.683 62 87.470 77.149 105.173 92.915 100.915 82.787 88.520 95.397 63 100.074 97.400 99.915 86.075 105.545 94.806 121.849 93.533 64 86.419 75.502 90.001 92.642 90.950 73.946 78.485 85.108 65 84.122 87.208 89.215 92.087 91.960 93.284 91.455 84.941 66 91.104 86.100 93.346 86.942 88.441 101.037 82.062 96.070 67 77.408 85.453 88.856 99.244 101.014 78.578 92.429 83.066 68 98.275 87.651 90.984 83.155 92.209 82.608 81.955 93.975 69 91.681 77.253 87.819 86.560 82.422 86.137 91.151 96.234 70 108.553 101.603 83.831 86.407 92.306 88.639 91.321 90.129 71 95.016 80.079 98.591 87.035 78.307 77.509 83.441 97.618 72 87.308 89.028 102.868 98.858 90.900 95.758 92.341 99.148 > > ## two-column binomial fit > ldose <- rep(0:5, 2) > numdead <- c(1, 4, 9, 13, 18, 20, 0, 2, 6, 10, 12, 16) > sex <- factor(rep(c("M", "F"), c(6, 6))) > SF <- cbind(numdead, numalive = 20 - numdead) > fit4 <- glm(SF ~ sex + ldose - 1, family = binomial) > coef(summary(fit4)) Estimate Std. Error z value Pr(>|z|) sexF -3.4732 0.46852 -7.4130 1.2344e-13 sexM -2.3724 0.38551 -6.1539 7.5579e-10 ldose 1.0642 0.13108 8.1190 4.7015e-16 > set.seed(1) > ( ys <- simulate(fit4, nsim = 3) ) sim_1.numdead sim_1.numalive sim_2.numdead sim_2.numalive sim_3.numdead 1 1 19 2 18 1 2 4 16 4 16 4 3 9 11 10 10 4 4 11 9 14 6 15 5 19 1 17 3 16 6 18 2 16 4 20 7 2 18 0 20 0 8 2 18 3 17 2 9 5 15 7 13 4 10 5 15 7 13 7 11 15 5 13 7 12 12 19 1 19 1 17 sim_3.numalive 1 19 2 16 3 16 4 5 5 4 6 0 7 20 8 18 9 16 10 13 11 8 12 3 > for(i in seq_len(3)) + print(coef(summary(update(fit4, ys[, i] ~ .)))) Estimate Std. Error z value Pr(>|z|) sexF -3.3079 0.45218 -7.3154 2.5656e-13 sexM -2.5067 0.39240 -6.3880 1.6812e-10 ldose 1.0482 0.12869 8.1454 3.7800e-16 Estimate Std. Error z value Pr(>|z|) sexF -2.84478 0.40661 -6.9963 2.6289e-12 sexM -2.11845 0.35818 -5.9145 3.3286e-09 ldose 0.90935 0.11578 7.8541 4.0273e-15 Estimate Std. Error z value Pr(>|z|) sexF -3.9838 0.52156 -7.6384 2.1996e-14 sexM -2.8717 0.43047 -6.6712 2.5376e-11 ldose 1.1487 0.14102 8.1459 3.7655e-16 > > ## same via proportions > fit5 <- glm(numdead/20 ~ sex + ldose - 1, family = binomial, + weights = rep(20, 12)) > set.seed(1) > ( ys <- simulate(fit5, nsim = 3) ) sim_1 sim_2 sim_3 1 0.05 0.10 0.05 2 0.20 0.20 0.20 3 0.45 0.50 0.20 4 0.55 0.70 0.75 5 0.95 0.85 0.80 6 0.90 0.80 1.00 7 0.10 0.00 0.00 8 0.10 0.15 0.10 9 0.25 0.35 0.20 10 0.25 0.35 0.35 11 0.75 0.65 0.60 12 0.95 0.95 0.85 > for(i in seq_len(3)) + print(coef(summary(update(fit5, ys[, i] ~ .)))) Estimate Std. Error z value Pr(>|z|) sexF -3.3079 0.45218 -7.3154 2.5656e-13 sexM -2.5067 0.39240 -6.3880 1.6812e-10 ldose 1.0482 0.12869 8.1454 3.7800e-16 Estimate Std. Error z value Pr(>|z|) sexF -2.84478 0.40661 -6.9963 2.6289e-12 sexM -2.11845 0.35818 -5.9145 3.3286e-09 ldose 0.90935 0.11578 7.8541 4.0273e-15 Estimate Std. Error z value Pr(>|z|) sexF -3.9838 0.52156 -7.6384 2.1996e-14 sexM -2.8717 0.43047 -6.6712 2.5376e-11 ldose 1.1487 0.14102 8.1459 3.7655e-16 > > > ## factor binomial fit > load("birthwt.rda") # copied from package MASS > bwt <- with(birthwt, { + race <- factor(race, labels = c("white", "black", "other")) + table(ptl) + ptd <- factor(ptl > 0) + table(ftv) + ftv <- factor(ftv) + levels(ftv)[-(1:2)] <- "2+" + data.frame(low = factor(low), age, lwt, race, + smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv) + }) > fit6 <- glm(low ~ ., family = binomial, data = bwt) > coef(summary(fit6)) Estimate Std. Error z value Pr(>|z|) (Intercept) 0.823019 1.2447143 0.66121 0.5084769 age -0.037234 0.0387024 -0.96207 0.3360159 lwt -0.015653 0.0070804 -2.21075 0.0270532 raceblack 1.192413 0.5359646 2.22480 0.0260948 raceother 0.740685 0.4617443 1.60410 0.1086916 smokeTRUE 0.755528 0.4250166 1.77764 0.0754623 ptdTRUE 1.343763 0.4806207 2.79589 0.0051757 htTRUE 1.913166 0.7207369 2.65446 0.0079436 uiTRUE 0.680195 0.4643403 1.46486 0.1429580 ftv1 -0.436380 0.4793936 -0.91027 0.3626779 ftv2+ 0.179009 0.4563778 0.39224 0.6948827 > set.seed(1) > ys <- simulate(fit6, nsim = 3) > ys[1:10, ] sim_1 sim_2 sim_3 1 0 0 0 2 0 0 0 3 0 0 0 4 1 0 1 5 0 1 1 6 1 0 1 7 1 0 0 8 0 0 0 9 0 0 0 10 0 0 1 > for(i in seq_len(3)) + print(coef(summary(update(fit6, ys[, i] ~ .)))) Estimate Std. Error z value Pr(>|z|) (Intercept) -0.284354 1.2297301 -0.23123 0.817134 age -0.011485 0.0389166 -0.29513 0.767897 lwt -0.012314 0.0069041 -1.78353 0.074500 raceblack 0.844051 0.5514350 1.53064 0.125857 raceother 0.565993 0.4665978 1.21302 0.225122 smokeTRUE 1.024337 0.4253416 2.40827 0.016028 ptdTRUE 1.578045 0.4887012 3.22906 0.001242 htTRUE 1.171556 0.7060321 1.65935 0.097045 uiTRUE 0.384214 0.4835840 0.79451 0.426897 ftv1 -0.790699 0.5271049 -1.50008 0.133594 ftv2+ 0.522245 0.4498448 1.16094 0.245665 Estimate Std. Error z value Pr(>|z|) (Intercept) 0.916922 1.433819 0.63950 5.2250e-01 age 0.034662 0.041752 0.83019 4.0643e-01 lwt -0.036299 0.009577 -3.79021 1.5052e-04 raceblack 2.588992 0.631372 4.10058 4.1211e-05 raceother 1.073046 0.522576 2.05338 4.0036e-02 smokeTRUE 0.710930 0.478870 1.48460 1.3765e-01 ptdTRUE 0.679588 0.503299 1.35027 1.7693e-01 htTRUE 1.988224 0.801811 2.47967 1.3151e-02 uiTRUE 1.080176 0.489033 2.20880 2.7189e-02 ftv1 -0.418153 0.549009 -0.76165 4.4627e-01 ftv2+ 0.657962 0.500605 1.31433 1.8873e-01 Estimate Std. Error z value Pr(>|z|) (Intercept) 1.4927579 1.2779154 1.168119 0.24275859 age -0.0986485 0.0415094 -2.376532 0.01747625 lwt -0.0088426 0.0070635 -1.251885 0.21061194 raceblack 0.2106382 0.5622160 0.374657 0.70791549 raceother 0.8608985 0.4814556 1.788116 0.07375727 smokeTRUE 0.7884781 0.4449757 1.771958 0.07640158 ptdTRUE 1.9686209 0.5435116 3.622040 0.00029229 htTRUE 1.7835637 0.7279712 2.450047 0.01428375 uiTRUE 1.4816743 0.5139743 2.882779 0.00394184 ftv1 -0.7388479 0.5090637 -1.451386 0.14667244 ftv2+ -0.0097438 0.4670024 -0.020865 0.98335371 > > ## This requires MASS::gamma.shape > if(!require("MASS", quietly = TRUE)) { + message("skipping tests requiring the MASS package") + q() + } > > ## gamma fit, from example(glm) > clotting <- data.frame(u = c(5,10,15,20,30,40,60,80,100), + lot1 = c(118,58,42,35,27,25,21,19,18)) > fit7 <- glm(lot1 ~ log(u), data = clotting, family = Gamma) > coef(summary(fit7)) Estimate Std. Error t value Pr(>|t|) (Intercept) -0.016554 0.00092755 -17.847 4.2791e-07 log(u) 0.015343 0.00041496 36.975 2.7512e-09 > set.seed(1) > ( ys <- simulate(fit7, nsim = 3) ) sim_1 sim_2 sim_3 1 119.451 118.552 123.140 2 56.310 51.798 48.747 3 42.193 39.456 40.843 4 34.581 33.905 35.580 5 26.208 26.971 27.210 6 25.476 25.840 24.437 7 22.287 22.151 21.375 8 20.206 20.503 19.626 9 18.224 19.094 17.952 > for(i in seq_len(3)) + print(coef(summary(update(fit7, ys[, i] ~ .)))) Estimate Std. Error t value Pr(>|t|) (Intercept) -0.016008 0.00077887 -20.553 1.6197e-07 log(u) 0.015044 0.00034611 43.465 8.9084e-10 Estimate Std. Error t value Pr(>|t|) (Intercept) -0.015483 0.00050102 -30.903 9.5910e-09 log(u) 0.014935 0.00021999 67.889 3.9547e-11 Estimate Std. Error t value Pr(>|t|) (Intercept) -0.016850 0.00075120 -22.431 8.8554e-08 log(u) 0.015569 0.00033659 46.256 5.7704e-10 > > > proc.time() user system elapsed 0.597 0.096 0.670