#  File src/library/tools/R/packages.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2022 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

write_PACKAGES <-
function(dir = ".", fields = NULL,
         type = c("source", "mac.binary", "win.binary"),
         verbose = FALSE, unpacked = FALSE, subdirs = FALSE,
         latestOnly = TRUE, addFiles = FALSE, rds_compress = "xz",
         validate = FALSE)
{
    if(missing(type) && .Platform$OS.type == "windows")
        type <- "win.binary"
    type <- match.arg(type)

    paths <- ""
    if(is.logical(subdirs) && subdirs) {
        owd <- setwd(dir)
        paths <- list.dirs(".")
        setwd(owd)
        paths <- c("", paths[paths != "."])
        ## now strip leading ./
        paths <- sub("^[.]/", "", paths)
    } else if(is.character(subdirs)) paths <- c("", subdirs)

    ## Older versions created only plain text and gzipped DCF files with
    ## the (non-missing and non-empty) package db entries, and hence did
    ## so one path at a time.  We now also serialize the db directly,
    ## and hence first build the whole db, and then create the files in
    ## case some packages were found.

    db <- NULL
    addPaths <- !identical(paths, "")

    for(path in paths) {
        this <- if(nzchar(path)) file.path(dir, path) else dir
        desc <- .build_repository_package_db(this, fields, type, verbose,
                                             unpacked, validate)
        desc <- .process_repository_package_db_to_matrix(desc,
                                                         path,
                                                         addFiles,
                                                         addPaths,
                                                         latestOnly)
        if(NROW(desc))
            db <- rbind(db, desc)

    }

    np <- .write_repository_package_db(db, dir, rds_compress)

    invisible(np)
}

.write_repository_package_db <-
function(db, dir, rds_compress)
{
   np <- NROW(db)
   if(np > 0L) {
       ## To save space, empty entries are not written to the DCF, so
       ## that read.dcf() on these will have the entries as missing.
       ## Hence, change empty to missing in the db.
       db[!is.na(db) & (db == "")] <- NA_character_
       con <- file(file.path(dir, "PACKAGES"), "wt")
       write.dcf(db, con)
       close(con)
       con <- gzfile(file.path(dir, "PACKAGES.gz"), "wt")
       write.dcf(db, con)
       close(con)
       rownames(db) <- db[, "Package"]
       saveRDS(db, file.path(dir, "PACKAGES.rds"), compress = rds_compress)
   }

   invisible(np)
}

.process_repository_package_db_to_matrix <-
function(desc, path, addFiles, addPaths, latestOnly)
{
    desc <- Filter(length, desc)

    if(length(desc)) {
        Files <- names(desc)
        fields <- names(desc[[1L]])
        desc <- matrix(unlist(desc), ncol = length(fields), byrow = TRUE)
        colnames(desc) <- fields
        if(addFiles) desc <- cbind(desc, File = Files)
        if(addPaths) desc <- cbind(desc, Path = path)
        if(latestOnly) desc <- .remove_stale_dups(desc)

        ## Standardize licenses or replace by NA.
        license_info <- analyze_licenses(desc[, "License"])
            desc[, "License"] <-
                ifelse(license_info$is_standardizable,
                       license_info$standardization,
                       NA)
        }
    desc
}

## factored out so it can be used in multiple
## places without threat of divergence
.get_pkg_file_pattern = function(type = c("source", "mac.binary", "win.binary"),
                                 ext.only = FALSE)
{

    type <- match.arg(type)
    ## FIXME: might the source pattern be more general?
    ## was .tar.gz prior to 2.10.0

    ret = switch(type,
                 "source" = "_.*\\.tar\\.[^_]*$",
                 "mac.binary" = "_.*\\.tgz$",
                 "win.binary" = "_.*\\.zip$")
    if(ext.only)
        ret = gsub("_.*", "", fixed = TRUE, ret)
    ret
}
## this is OK provided all the 'fields' are ASCII -- so be careful
## what you add.
.build_repository_package_db <-
function(dir, fields = NULL,
         type = c("source", "mac.binary", "win.binary"),
         verbose = getOption("verbose"),
         unpacked = FALSE, validate = FALSE)
{
    if(unpacked)
        return(.build_repository_package_db_from_source_dirs(dir,
                                                             fields,
                                                             verbose,
                                                             validate))

       package_pattern <- .get_pkg_file_pattern(type)
    files <- list.files(dir, pattern = package_pattern, full.names = TRUE)

    if(!length(files))
        return(list())
    type <- match.arg(type)
    db <- .process_package_files_for_repository_db(files,
                                                   type,
                                                   fields,
                                                   verbose,
                                                   validate)
    db
}

.process_package_files_for_repository_db <-
function(files, type, fields, verbose, validate = FALSE)
{

    files <- normalizePath(files, mustWork=TRUE) # files comes from list.files, mustWork ok
    ## Add the standard set of fields required to build a repository's
    ## PACKAGES file:
    fields <- unique(c(.get_standard_repository_db_fields(type), fields))
    ## files was without path at this point in original code,
    ## use filetbs instead to compute pkg names and set db names
    filetbs <- basename(files)
    packages <- sapply(strsplit(filetbs, "_", fixed = TRUE), `[`, 1L)
    db <- vector(length(files), mode = "list")
    names(db) <- filetbs #files was not full paths before
    ## Many (roughly length(files)) warnings are *expected*, hence
    ## suppressed.
    op <- options(warn = -1)
    on.exit(options(op))
    if(verbose) message("Processing packages:")
    if(type == "win.binary") {
        for(i in seq_along(files)) {
            if(verbose) message(paste0("  ", files[i]))
            con <- unz(files[i], file.path(packages[i], "DESCRIPTION"))
            temp <- tryCatch(read.dcf(con, fields = fields)[1L, ],
                             error = identity)
            if(inherits(temp, "error")) {
                close(con)
                next
            }
            db[[i]] <- temp
            close(con)
        }
    } else {
        cwd <- getwd()
        if (is.null(cwd))
            stop("current working directory cannot be ascertained")
        td <- tempfile("PACKAGES")
        if(!dir.create(td)) stop("unable to create ", td)
        on.exit(unlink(td, recursive = TRUE), add = TRUE)
        setwd(td)
        for(i in seq_along(files)) {
            if(verbose) message(paste0("  ", files[i]))
            p <- file.path(packages[i], "DESCRIPTION")
            ## temp <- try(system(paste("tar zxf", files[i], p)))
            temp <- try(utils::untar(files[i], files = p))
            if(!inherits(temp, "try-error")) {
                temp <- tryCatch(read.dcf(p, fields = fields)[1L, ],
                                 error = identity)
                if(!inherits(temp, "error")) {
                    if(validate) {
                        ## .check_package_description() by default goes via
                        ## .read_description() which re-encodes and insists on a
                        ## single entry unlike the above read.dcf() call.
                        ok <- .check_package_description(db = temp[!is.na(temp)])
                        ## FIXME: no format.check_package_description yet.
                        if(any(as.integer(lengths(ok)) > 0L)) {
                            message(paste(gettextf("Invalid DESCRIPTION file for package %s",
                                                   sQuote(basename(dirname(p)))),
                                          paste(format(ok), collapse = "\n\n"),
                                          sep = "\n\n"),
                                    domain = NA)
                            next
                        }
                    }
                    if("NeedsCompilation" %in% fields &&
                       is.na(temp["NeedsCompilation"])) {
                        l <- utils::untar(files[i], list = TRUE)
                        temp["NeedsCompilation"] <-
                            if(any(l == file.path(packages[i], "src/"))) "yes" else "no"
                    }
                    temp["MD5sum"] <- md5sum(files[i])
                    db[[i]] <- temp
                } else {
                    message(gettextf("reading DESCRIPTION for package %s failed with message:\n  %s",
                                     sQuote(basename(dirname(p))),
                                     conditionMessage(temp)),
                            domain = NA)
                }
            }
            unlink(packages[i], recursive = TRUE)
        }
        setwd(cwd)
    }
    if(verbose) message("done")

    db
}

.build_repository_package_db_from_source_dirs <-
function(dir, fields = NULL, verbose = getOption("verbose"),
         validate = FALSE)
{
    dir <- file_path_as_absolute(dir)
    fields <- unique(c(.get_standard_repository_db_fields(), fields))
    paths <- list.files(dir, full.names = TRUE)
    paths <- paths[dir.exists(paths) &
                   file_test("-f", file.path(paths, "DESCRIPTION"))]
    db <- vector(length(paths), mode = "list")
    if(verbose) message("Processing packages:")
    for(i in seq_along(paths)) {
        if(verbose) message(paste0("  ", basename(paths[i])))
        temp <- tryCatch(read.dcf(file.path(paths[i], "DESCRIPTION"),
                                  fields = fields)[1L, ],
                         error = identity)
        if(!inherits(temp, "error")) {
            if(validate) {
                ## .check_package_description() by default goes via
                ## .read_description() which re-encodes and insists on a
                ## single entry unlike the above read.dcf() call.
                ok <- .check_package_description(db = temp[!is.na(temp)])
                ## FIXME: no format.check_package_description yet.
                if(any(as.integer(lengths(ok)) > 0L)) {
                    warning(paste(gettextf("Invalid DESCRIPTION file for package %s",
                                           sQuote(basename(paths[i]))),
                                  paste(format(ok), collapse = "\n\n"),
                                  sep = "\n\n"),
                            domain = NA,
                            call. = FALSE)
                    next
                }
            }
            if(is.na(temp["NeedsCompilation"])) {
                temp["NeedsCompilation"] <-
                    if(dir.exists(file.path(paths[i], "src"))) "yes" else "no"
            }
            ## Cannot compute MD5 sum of the source tar.gz when working
            ## on the unpacked sources ...
            db[[i]] <- temp
        } else {
            warning(gettextf("reading DESCRIPTION for package %s failed with message:\n  %s",
                             sQuote(basename(paths[i])),
                             conditionMessage(temp)),
                    domain = NA)
        }
    }
    if(verbose) message("done")
    names(db) <- basename(paths)
    db
}

dependsOnPkgs <-
function(pkgs, dependencies = "strong",
         recursive = TRUE, lib.loc = NULL,
         installed = utils::installed.packages(lib.loc, fields = "Enhances"))
{
    dependencies <- .expand_dependency_type_spec(dependencies)

    av <- installed[, dependencies, drop = FALSE]
    rn <- as.character(installed[, "Package"])
    need <- apply(av, 1L, function(x)
                  any(pkgs %in% utils:::.clean_up_dependencies(x)) )
    uses <- rn[need]
    if(recursive) {
        p <- pkgs
        repeat {
            p <- unique(c(p, uses))
            need <- apply(av, 1L, function(x)
                          any(p %in% utils:::.clean_up_dependencies(x)) )
            uses <- unique(c(p, rn[need]))
            if(length(uses) <= length(p)) break
        }
    }
    setdiff(uses, pkgs)
}

.remove_stale_dups <-
function(ap)
{
    ## Given a matrix from available.packages, return a copy
    ## with no duplicate packages, being sure to keep the packages
    ## with highest version number.
    ## (Also works for data frame package repository dbs.)
    pkgs <- ap[ , "Package"]
    dup_pkgs <- pkgs[duplicated(pkgs)]
    if (length(dup_pkgs) > 100) {
        ## Some packages may be in multiple repositories in the same
        ## version. Handle those specially for performance reasons.
        ap <- ap[!duplicated(ap[, c("Package", "Version")]), , drop = FALSE]
        pkgs <- ap[ , "Package"]
        dup_pkgs <- pkgs[duplicated(pkgs)]
    }
    stale_dups <- integer(length(dup_pkgs))
    i <- 1L
    for (dp in dup_pkgs) {
        wh <- which(dp == pkgs)
        vers <- package_version(ap[wh, "Version"])
        keep_ver <- max(vers)
	keep_idx <- which.max(vers == keep_ver) # they might all be max
        wh <- wh[-keep_idx]
        end_i <- i + length(wh) - 1L
        stale_dups[i:end_i] <- wh
        i <- end_i + 1L
    }
    ## Possible to have only one package in a repository
    if(length(stale_dups)) ap[-stale_dups, , drop = FALSE] else ap
}

package_dependencies <-
function(packages = NULL, db = NULL, which = "strong",
         recursive = FALSE, reverse = FALSE,
         verbose = getOption("verbose"))
{
    packages1 <- unique(packages)

    if(is.null(db)) db <- utils::available.packages()

    fields <- which <- .expand_dependency_type_spec(which)
    if(is.character(recursive)) {
        recursive <- .expand_dependency_type_spec(recursive)
        if(identical(which, recursive))
            recursive <- TRUE
        else
            fields <- unique(c(fields, recursive))
    }

    ind <- if(!is.character(recursive) && !recursive && !reverse &&
              !is.null(packages)) {
               ## For forward non-recursive depends, we can simplify
               ## matters by subscripting the db right away---modulo
               ## boundary cases.
               match(packages1, db[, "Package"], nomatch = 0L)
           } else !duplicated(db[, "Package"])

    db <- as.data.frame(db[ind, c("Package", fields), drop = FALSE])

    ## Avoid recomputing package dependency names in recursive
    ## invocations.
    for(f in fields) {
        if(!is.list(d <- db[[f]]))
            db[[f]] <- lapply(d, .extract_dependency_package_names)
    }

    if(is.character(recursive)) {
        ## Direct dependencies:
        d_d <- Recall(packages, db, which, FALSE,
                      reverse, verbose)
        ## Recursive dependencies of all these:
        d_r <- Recall(unique(unlist(d_d)), db, recursive, TRUE,
                      reverse, verbose)
        ## Now glue together:
        return(lapply(d_d,
                      function(p) {
                          sort(unique(c(p, unlist(d_r[p],
                                                  use.names = FALSE))))
                      }))
    }

    depends <-
        do.call(Map,
                c(list("c"),
                  db[which],
                  list(USE.NAMES = FALSE)))

    depends <- lapply(depends, unique)

    if(!recursive && !reverse) {
        names(depends) <- db$Package
        if(!is.null(packages)) {
            depends <- depends[match(packages, names(depends))]
            names(depends) <- packages
        }
        return(depends)
    }

    all_packages <- sort(unique(c(db$Package, unlist(depends))))

    if(!recursive) {
        ## Need to invert.
        depends <-
            split(rep.int(db$Package, lengths(depends)),
                  factor(unlist(depends), levels = all_packages))
        if(!is.null(packages)) {
            depends <- depends[match(packages, names(depends))]
            names(depends) <- packages
        }
        return(depends)
    }

    ## Recursive dependencies.
    ## We need to compute the transitive closure of the dependency
    ## relation, but e.g. Warshall's algorithm (O(n^3)) is
    ## computationally infeasible.
    ## Hence, in principle, we do the following.
    ## Take the current list of pairs (i,j) in the relation.
    ## Iterate over all j and whenever i R j and j R k add (i,k).
    ## Repeat this until no new pairs get added.
    ## To do this in R, we use a 2-column matrix of (i,j) rows.
    ## We then create two lists which for all j contain the i and k
    ## with i R j and j R k, respectively, and combine these.
    ## This works reasonably well, but of course more efficient
    ## implementations should be possible.
    matchP <- match(rep.int(db$Package, lengths(depends)),
		    all_packages)
    matchD <- match(unlist(depends), all_packages)
    tab <- if(reverse)
	split(matchP,
	      factor(matchD, levels = seq_along(all_packages)))
    else
	split(matchD,
	      factor(matchP, levels = seq_along(all_packages)))
    if(is.null(packages)) {
        if(reverse) {
            packages1 <- all_packages
            p_L <- seq_along(all_packages)
        } else {
            packages1 <- db$Package
            p_L <- match(packages1, all_packages)
        }
    } else {
        p_L <- match(packages1, all_packages, nomatch = 0L)
        if(any(ind <- (p_L == 0L))) {
            p_L <- p_L[!ind]
        }
    }
    p_R <- tab[p_L]
    pos <- cbind(rep.int(p_L, lengths(p_R)), unlist(p_R))
    ctr <- 0L

    ## posunique() speeds up computing "unique(pos)" in the following loop.
    ## When the number of packages is small enough, we can easily represent
    ## an edge from i to j by a single integer number. Finding duplicates in
    ## a vector of integers is much faster than in rows of a matrix.
    shift <- as.integer(2^15)  ## allows to fit two numbers to an integer
    if (length(pos) && max(pos) < shift)
        posunique <- function(p)
            p[!duplicated(p[,1L]*shift + p[,2L]), , drop = FALSE]
    else
        posunique <- function(p) unique(p)

    repeat {
        if(verbose) cat("Cycle:", (ctr <- ctr + 1L))
        p_L <- split(pos[, 1L], pos[, 2L])
        new <- do.call(rbind,
                       Map(function(i, k)
                           cbind(rep.int(i, length(k)),
                                 rep(k, each = length(i))),
                           p_L, tab[as.integer(names(p_L))]))

        ## could be just posunique(rbind(pos, new)), but computing this
        ## iteratively is faster
        npos <- posunique(rbind(pos, posunique(new)))
        nnew <- nrow(npos) - nrow(pos)
        if(verbose) cat(" NNew:", nnew, "\n")
        if(!nnew) break
        pos <- npos
    }
    depends <-
        split(all_packages[pos[, 2L]],
              factor(all_packages[pos[, 1L]], levels = packages1))
    if(!is.null(packages)) {
        depends <- depends[match(packages, names(depends))]
        names(depends) <- packages
    }
    depends
}

.expand_dependency_type_spec <-
function(x)
{
    if(identical(x, "strong"))
        c("Depends", "Imports", "LinkingTo")
    else if(identical(x, "most"))
        c("Depends", "Imports", "LinkingTo", "Suggests")
    else if(identical(x, "all"))
        c("Depends", "Imports", "LinkingTo", "Suggests", "Enhances")
    else
        x
    ## (Could also intersect x with the possible types.)
}

## .extract_dependency_package_names <-
## function(x)
## {
##     ## Assume a character *string*.
##     if(is.na(x)) return(character())
##     x <- strsplit(x, ",", fixed = TRUE)[[1L]]
##     ## FIXME: The following is much faster on Linux but apparently not
##     ## on Windows:
##     ## x <- sub("(?s)[[:space:]]*([[:alnum:].]+).*", "\\1", x, perl = TRUE)
##     x <- sub("[[:space:]]*([[:alnum:].]+).*", "\\1", x)
##     x[nzchar(x) & (x != "R")]
## }

.extract_dependency_package_names <-
function(x)
    .Call(C_package_dependencies_scan, x)