# File src/library/stats/R/loglin.R # Part of the R package, https://www.R-project.org # # Copyright (C) 1995-2013 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ loglin <- function(table, margin, start = rep(1, length(table)), fit = FALSE, eps = 0.1, iter = 20L, param = FALSE, print = TRUE) { rfit <- fit dtab <- dim(table) nvar <- length(dtab) ncon <- length(margin) conf <- matrix(0L, nrow = nvar, ncol = ncon) nmar <- 0 varnames <- names(dimnames(table)) for (k in seq_along(margin)) { tmp <- margin[[k]] if (is.character(tmp)) { ## Rewrite margin names to numbers tmp <- match(tmp, varnames) margin[[k]] <- tmp } if (!is.numeric(tmp) || any(is.na(tmp) | tmp <= 0)) stop("'margin' must contain names or numbers corresponding to 'table'") conf[seq_along(tmp), k] <- tmp nmar <- nmar + prod(dtab[tmp]) } ntab <- length(table) if (length(start) != ntab ) stop("'start' and 'table' must be same length") z <- .Call(C_LogLin, dtab, conf, table, start, nmar, eps, iter) if (print) cat(z$nlast, "iterations: deviation", z$dev[z$nlast], "\n") fit <- z$fit attributes(fit) <- attributes(table) ## Pearson chi-sq test statistic observed <- as.vector(table[start > 0]) expected <- as.vector(fit[start > 0]) pearson <- sum((observed - expected)^2 / expected) ## Likelihood Ratio Test statistic observed <- as.vector(table[table * fit > 0]) expected <- as.vector(fit[table * fit > 0]) lrt <- 2 * sum(observed * log(observed / expected)) ## Compute degrees of freedom. ## Use a dyadic-style representation for the (possible) subsets B. ## Let u_i(B) = 1 if i is contained in B and 0 otherwise. Then B ## <-> u(B) = (u_1(B),...,u_N(B)) <-> \sum_{i=1}^N u_i(B) 2^{i-1}. ## See also the code for 'dyadic' below which computes the u_i(B). subsets <- function(x) { y <- list(vector(mode(x), length = 0)) for (i in seq_along(x)) { y <- c(y, lapply(y, "c", x[i])) } y[-1L] } df <- rep.int(0, 2^nvar) for (k in seq_along(margin)) { terms <- subsets(margin[[k]]) for (j in seq_along(terms)) df[sum(2 ^ (terms[[j]] - 1))] <- prod(dtab[terms[[j]]] - 1) } ## Rewrite margin numbers to names if possible if (!is.null(varnames) && all(nzchar(varnames))) { for (k in seq_along(margin)) margin[[k]] <- varnames[margin[[k]]] } else { varnames <- as.character(1 : ntab) } y <- list(lrt = lrt, pearson = pearson, df = ntab - sum(df) - 1, margin = margin) if (rfit) y$fit <- fit if (param) { fit <- log(fit) terms <- seq_along(df)[df > 0] parlen <- length(terms) + 1 parval <- list(parlen) parnam <- character(parlen) parval[[1L]] <- mean(fit) parnam[1L] <- "(Intercept)" fit <- fit - parval[[1L]] ## Get the u_i(B) in the rows of 'dyadic', see above. dyadic <- NULL while(any(terms > 0)) { dyadic <- cbind(dyadic, terms %% 2) terms <- terms %/% 2 } dyadic <- dyadic[order(rowSums(dyadic)), , drop = FALSE] for (i in 2 : parlen) { vars <- which(dyadic[i - 1, ] > 0) parval[[i]] <- apply(fit, vars, mean) parnam[i] <- paste(varnames[vars], collapse = ".") fit <- sweep(fit, vars, parval[[i]], check.margin=FALSE) } names(parval) <- parnam y$param <- parval } return(y) }