#  File src/library/utils/R/data.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2023 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

data <-
function(..., list = character(), package = NULL, lib.loc = NULL,
         verbose = getOption("verbose"), envir = .GlobalEnv,
         overwrite = TRUE)
{
    fileExt <- function(x) {
        db <- grepl("\\.[^.]+\\.(gz|bz2|xz)$", x)
        ans <- sub(".*\\.", "", x)
        ans[db] <-  sub(".*\\.([^.]+\\.)(gz|bz2|xz)$", "\\1\\2", x[db])
        ans
    }

    my_read_table <- function(...) {
        ## Try canonicalizing the sort order for possible string to
        ## factor conversions.
        lcc <- Sys.getlocale("LC_COLLATE")
        on.exit(Sys.setlocale("LC_COLLATE", lcc))
        Sys.setlocale("LC_COLLATE", "C")
        read.table(...)
    }

    stopifnot(is.character(list))
    names <- c(as.character(substitute(list(...))[-1L]), list)

    ## Find the directories of the given packages and maybe the working
    ## directory.
    if (!is.null(package)) {
        if (!is.character(package))
            stop("'package' must be a character vector or NULL")
    }
    paths <- find.package(package, lib.loc, verbose = verbose)
    if (is.null(lib.loc))
        paths <- c(path.package(package, TRUE),
                   if (!length(package)) getwd(), # ignored if NULL
                   paths)
    paths <- unique(normalizePath(paths[file.exists(paths)]))

    ## Find the directories with a 'data' subdirectory.
    paths <- paths[dir.exists(file.path(paths, "data"))]

    dataExts <- tools:::.make_file_exts("data")

    if (length(names) == 0L) {
        ## List all possible data sets.

        ## Build the data db.
        db <- matrix(character(), nrow = 0L, ncol = 4L)
        for(path in paths) {
            entries <- NULL
            ## Use "." as the 'package name' of the working directory.
            packageName <-
                if (file_test("-f", file.path(path, "DESCRIPTION")))
                    basename(path)
                else
                    "."
            ## Check for new-style 'Meta/data.rds'
            if (file_test("-f", INDEX <- file.path(path, "Meta", "data.rds"))) {
                entries <- readRDS(INDEX)
            } else {
                ## No index: should only be true for ./data >= 2.0.0
                dataDir <- file.path(path, "data")
                entries <- tools::list_files_with_type(dataDir, "data")
                if (length(entries)) {
                    entries <-
                        unique(tools::file_path_sans_ext(basename(entries)))
                    entries <- cbind(entries, "")
                }
            }
            if (NROW(entries)) {
                if (is.matrix(entries) && ncol(entries) == 2L)
                    db <- rbind(db, cbind(packageName, dirname(path), entries))
                else
                    warning(gettextf("data index for package %s is invalid and will be ignored",
                                     sQuote(packageName)),
                            domain=NA, call.=FALSE)
            }
        }
        colnames(db) <- c("Package", "LibPath", "Item", "Title")

        footer <- if (missing(package))
            paste0("Use ",
                   sQuote(paste("data(package =",
                                ".packages(all.available = TRUE))")),
                   "\n",
                   "to list the data sets in all *available* packages.")
        else
            NULL
        y <- list(title = "Data sets", header = NULL, results = db,
                  footer = footer)
        class(y) <- "packageIQR"
        return(y)
    }

    paths <- file.path(paths, "data")
    for(name in names) {
        found <- FALSE
        for(p in paths) {
            tmp_env <- if (overwrite) envir else new.env()
            ## does this package have "Rdata" databases?
            if (file_test("-f", file.path(p, "Rdata.rds"))) {
                rds <- readRDS(file.path(p, "Rdata.rds"))
                if (name %in% names(rds)) {
                    ## found it, so copy objects from database
                    found <- TRUE
                    if (verbose)
                        message(sprintf("name=%s:\t found in Rdata.rds", name),
                                domain=NA)
                    objs <- rds[[name]] # guaranteed an exact match
                    lazyLoad(file.path(p, "Rdata"), envir = tmp_env,
                             filter = function(x) x %in% objs)
                    break
		} else if (verbose)
		    message(sprintf("name=%s:\t NOT found in names() of Rdata.rds, i.e.,\n\t%s\n",
				    name, paste(names(rds), collapse=",")),
				domain=NA)
            }
            files <- list.files(p, full.names = TRUE)
            files <- files[grep(name, files, fixed = TRUE)]
            if (length(files) > 1L) {
                ## more than one candidate
                o <- match(fileExt(files), dataExts, nomatch = 100L)
                paths0 <- dirname(files)
		## Next line seems unnecessary to MM (FIXME?)
		paths0 <- factor(paths0, levels = unique(paths0))
                files <- files[order(paths0, o)]
            }
            if (length(files)) {
                ## have a plausible candidate (or more)
                for(file in files) {
                    if (verbose)
                        message("name=", name, ":\t file= ...",
                                .Platform$file.sep, basename(file), "::\t",
                                appendLF = FALSE, domain = NA)
                    ext <- fileExt(file)
                    ## make sure the match is really for 'name.ext'
                    if (basename(file) != paste0(name, ".", ext))
                        found <- FALSE
                    else {
                        found <- TRUE
                        switch(ext,
                               R = , r = {
                                   ## ensure utils is visible
                                   library("utils")
                                   sys.source(file, chdir = TRUE,
                                              envir = tmp_env)
                               },
                               RData = , rdata = , rda =
                               load(file, envir = tmp_env),
                               TXT = , txt = , tab = ,
                               tab.gz = , tab.bz2 = , tab.xz = ,
                               txt.gz = , txt.bz2 = , txt.xz =
                               assign(name,
                                      ## ensure default for as.is has not been
                                      ## overridden by options(stringsAsFactor)
                                      my_read_table(file,
                                                    header = TRUE,
                                                    as.is = FALSE),
                                      envir = tmp_env),
                               CSV = , csv = ,
                               csv.gz = , csv.bz2 = , csv.xz =
                               assign(name,
                                      my_read_table(file,
                                                    header = TRUE,
                                                    sep = ";",
                                                    as.is = FALSE),
                                      envir = tmp_env),
                               found <- FALSE)
                    }
                    if (found) break # from files
                }
                if (verbose) message(if (!found) "*NOT* ", "found", domain = NA)
            }
            if (found) break # from paths
        }

        if (!found) {
            warning(gettextf("data set %s not found", sQuote(name)),
                    domain = NA)
        } else if (!overwrite) {
            for (o in ls (envir = tmp_env, all.names = TRUE)) {
                if (exists(o, envir = envir, inherits = FALSE))
                    warning(gettextf("an object named %s already exists and will not be overwritten", sQuote(o)))
                else
                    assign(o, get(o, envir = tmp_env, inherits = FALSE),
                           envir = envir)
            }
            rm (tmp_env)
        }
    }
    invisible(names)
}