#  File src/library/stats/R/cor.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2023 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

#### cor() , cov() and var() : Based on the same C code

cor <- function(x, y = NULL, use = "everything",
                method = c("pearson", "kendall", "spearman"))
{
    na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
		      "everything", "na.or.complete"))
    if(is.na(na.method)) stop("invalid 'use' argument")
    method <- match.arg(method)
    if(is.data.frame(y)) y <- as.matrix(y)
    if(is.data.frame(x)) x <- as.matrix(x)
    if(!is.matrix(x) && is.null(y))
        stop("supply both 'x' and 'y' or a matrix-like 'x'")
    ## non-atomic x should not be 'numeric', but in case a method messes up
    ## allow logicals for back-compatibility (package mice).
    if(!(is.numeric(x) || is.logical(x))) stop("'x' must be numeric")
    stopifnot(is.atomic(x))
    if(!is.null(y)) {
        if(!(is.numeric(y) || is.logical(y))) stop("'y' must be numeric")
        stopifnot(is.atomic(y))
    }

    ## Rank transform
    Rank <- function(u) {
        ## take care not to drop dims on a 0- or 1-row matrix
        if(length(u) == 0L) u
        else if(is.matrix(u)) {
            if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
        } else rank(u, na.last="keep")
    }
    if(method == "pearson")
        .Call(C_cor, x, y, na.method, FALSE)
    else if (na.method %in% c(2L, 5L)) { ## "complete.obs" / "na.or.complete"
        if (is.null(y)) {
            .Call(C_cor, Rank(na.omit(x)), NULL, na.method,
                  method == "kendall")
        } else {
            nas <- attr(na.omit(cbind(x,y)), "na.action")
            dropNA <- function(x, nas) {
                if(length(nas)) {
                    if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
                } else x
            }
            .Call(C_cor, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
                  na.method, method == "kendall")
        }
    } else if (na.method != 3L) {
        ## i.e., 1 or 4, i.e. "all.obs" or "everything":
	x <- Rank(x)
	if(!is.null(y)) y <- Rank(y)
        .Call(C_cor, x, y, na.method, method == "kendall")
    }
    else { # rank correlations and "pairwise.complete.obs"; the hard case
         ## Based on contribution from Shigenobu Aoki.
         ## matrix
         if (is.null(y)) {
             ncy <- ncx <- ncol(x)
             if(ncx == 0) stop("'x' is empty")
             r <- matrix(0, nrow = ncx, ncol = ncy)
             ## 2.6.0 assumed the diagonal was 1, but not so for all NAs,
             ## nor single non-NA pairs.
             for (i in seq_len(ncx)) {
                 for (j in seq_len(i)) {
                     x2 <- x[,i]
                     y2 <- x[,j]
                     ok <- complete.cases(x2, y2)
                     x2 <- rank(x2[ok])
                     y2 <- rank(y2[ok])
                     ## we've removed all NAs
                     r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
                 }
             }
             r <- r + t(r) - diag(diag(r))
	     rownames(r) <- colnames(x)
	     colnames(r) <- colnames(x)
             r
         }
         ## vector/matrix x vector/matrix
         else {
             if(length(x) == 0L || length(y) == 0L)
                 stop("both 'x' and 'y' must be non-empty")
             matrix_result <- is.matrix(x) || is.matrix(y)
	     if (!is.matrix(x)) x <- matrix(x, ncol=1L)
	     if (!is.matrix(y)) y <- matrix(y, ncol=1L)
             ncx <- ncol(x)
             ncy <- ncol(y)
             r <- matrix(0, nrow = ncx, ncol = ncy)
             for (i in seq_len(ncx)) {
                 for (j in seq_len(ncy)) {
                     x2 <- x[,i]
                     y2 <- y[,j]
                     ok <- complete.cases(x2, y2)
                     x2 <- rank(x2[ok])
                     y2 <- rank(y2[ok])
                     r[i, j] <- if(any(ok)) .Call(C_cor, x2, y2, 1L, method == "kendall") else NA
                 }
             }
	     rownames(r) <- colnames(x)
	     colnames(r) <- colnames(y)
             if(matrix_result) r else drop(r)
         }
     }
}

cov <- function(x, y = NULL, use = "everything",
                method = c("pearson", "kendall", "spearman"))
{
    na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
		      "everything", "na.or.complete"))
    if(is.na(na.method)) stop("invalid 'use' argument")
    method <- match.arg(method)
    if(is.data.frame(y)) y <- as.matrix(y)
    if(is.data.frame(x)) x <- as.matrix(x)
    if(!is.matrix(x) && is.null(y))
        stop("supply both 'x' and 'y' or a matrix-like 'x'")
    ## non-atomic x should not be 'numeric', but in case a method messes up
    stopifnot(is.numeric(x) || is.logical(x), is.atomic(x))
    if(!is.null(y)) stopifnot(is.numeric(y) || is.logical(y), is.atomic(y))

    ## Rank transform
    Rank <- function(u) {
        ## take care not to drop dims on a 0- or 1-row matrix
        if(length(u) == 0L) u
        else if(is.matrix(u)) {
            if(nrow(u) > 1L) apply(u, 2L, rank, na.last="keep") else row(u)
        } else rank(u, na.last="keep")
    }

    if(method == "pearson")
	.Call(C_cov, x, y, na.method, method == "kendall")
    else if (na.method %in% c(2L, 5L)) { ## "complete.obs"  or  "na.or.complete"

        if (is.null(y)) {
            .Call(C_cov, Rank(na.omit(x)), NULL, na.method,
                  method == "kendall")
        } else {
            nas <- attr(na.omit(cbind(x,y)), "na.action")
            dropNA <- function(x, nas) {
                if(length(nas)) {
                    if (is.matrix(x)) x[-nas, , drop = FALSE] else x[-nas]
                } else x
            }
            .Call(C_cov, Rank(dropNA(x, nas)), Rank(dropNA(y, nas)),
                  na.method, method == "kendall")
        }
    } else if (na.method != 3L) { ## 1 or 4: "all.obs"  or  "everything"
	x <- Rank(x)
	if(!is.null(y)) y <- Rank(y)
	.Call(C_cov, x, y, na.method, method == "kendall")
    }
    else ##  "pairwise.complete.obs"
	stop("cannot handle 'pairwise.complete.obs'")
}

var <- function(x, y = NULL, na.rm = FALSE, use) {
    if(missing(use))
	use <- if(na.rm) "na.or.complete" else "everything"
    na.method <-
	pmatch(use, c("all.obs", "complete.obs", "pairwise.complete.obs",
		      "everything", "na.or.complete"))
    if(is.na(na.method)) stop("invalid 'use' argument")
    if (is.data.frame(x)) x <- as.matrix(x) else if(!is.null(x)) stopifnot(is.atomic(x))
    if (is.data.frame(y)) y <- as.matrix(y) else if(!is.null(y)) stopifnot(is.atomic(y))
    .Call(C_cov, x, y, na.method, FALSE)
}

cov2cor <- function(V)
{
    ## Purpose: Covariance matrix |--> Correlation matrix -- efficiently
    ## ----------------------------------------------------------------------
    ## Arguments: V: a covariance matrix (i.e. symmetric and positive definite)
    ## ----------------------------------------------------------------------
    ## Author: Martin Maechler, Date: 12 Jun 2003
    p <- (d <- dim(V))[1L]
    if(!is.numeric(V) || length(d) != 2L || p != d[2L])
	stop("'V' is not a square numeric matrix")
    ## Is := diag( 1/sigma_i )
    pos <- !is.na(Is <- D <- diag(V, names=FALSE)) & D > 0
    Is[ pos] <- sqrt(1/D[pos])
    Is[!pos] <- NaN
    if(any(!pos) || any(!is.finite(Is)))
	warning("diag(V) had non-positive or NA entries; the non-finite result may be dubious")
    r <- V # keep dimnames
    r[] <- Is * V * rep(Is, each = p)
    ##	== D %*% V %*% D  where D = diag(Is)
    if(p) r[seq.int(from = 1L, by = p + 1L, length.out = p)] <- 1 # exact in diagonal
    r
}