#  File src/library/tools/R/citation.R
#  Part of the R package, https://www.R-project.org
#
#  Copyright (C) 1995-2014 The R Core Team
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
#  https://www.R-project.org/Licenses/

## Tools for computing on CITATION info.

.parse_CITATION_file <-
function(cfile, encoding = NULL)
{
    if(is.null(encoding))
        encoding <- "ASCII"

    ## The parser can only read valid strings, but single-byte locales
    ## can mark their encoding.  The following allows latin1 and UTF-8
    ## citation files to be read in UTF-8 and any single-byte locale
    ## (including C).
    ##
    ## FIXME: if parse() could be told to read strings bytewise,
    ## we could simply convert to UTF-8.
    if(encoding %in% c("latin1", "UTF-8") && !l10n_info()$MBCS) {
        ## NOTE: in an MBCS locale, calling parse() with a "UTF-8" or
        ## "latin1" encoding argument in order to get character strings
        ## marked as UTF-8 or latin1 does not work, as such encoding
        ## arguments are ignored with a warning.  So one needs to use a
        ## file connection to re-encode as below, and cannot get strings
        ## marked.  One might consider this a bug and not a feature ...
        parse(file = cfile, encoding = encoding)
    } else if(encoding %in% c("C", "ASCII")) {
        ## We do want to make sure this is ASCII: in single-byte
        ## locales 8-bit chars are likely to be parsed as bytes.
        ## Based on showNonASCII()
        x <- readLines(cfile, warn = FALSE)
        asc <- iconv(x, "latin1", "ASCII")
        if (any(is.na(asc) | asc != x))
            stop("non-ASCII input in a CITATION file without a declared encoding")
        parse(file = cfile)
    } else {
        con <- file(cfile, encoding = encoding)
        on.exit(close(con))
        parse(con)
    }
}

.parse_CITATION_file_in_package <-
function(cfile, installed = FALSE)    
{
    cfile <- file_path_as_absolute(cfile)
    dfile <- file.path(if(installed)
                           dirname(cfile)
                       else
                           dirname(dirname(cfile)),
                       "DESCRIPTION")
    meta <- .read_description(dfile)
    if(is.na(encoding <- meta["Encoding"]))
        encoding <- NULL
    .parse_CITATION_file(cfile, encoding)
}
    
BibTeX_entry_field_db <-
    list(Article = c("author", "title", "journal", "year"),
         Book = c("author|editor", "title", "publisher", "year"),
         Booklet = c("title"),
         InBook =
         c("author|editor", "title", "chapter", "publisher", "year"),
         InCollection =
         c("author", "title", "booktitle", "publisher", "year"),
         InProceedings = c("author", "title", "booktitle", "year"),
         Manual = c("title"),
         MastersThesis = c("author", "title", "school", "year"),
         Misc = character(),
         PhdThesis = c("author", "title", "school", "year"),
         Proceedings = c("title", "year"),
         TechReport = c("author", "title", "institution", "year"),
         Unpublished = c("author", "title", "note")
         )
## See e.g. lisp/textmodes/bibtex.el in the GNU Emacs sources.

get_CITATION_entry_fields <-
function(file, encoding = "ASCII")
{
    exprs <- .parse_CITATION_file(file, encoding)

    ## Assume that bibentry() or citEntry() only occur at top level.

    ## Try to detect entry type and field names from the calls.
    FOO1 <- FOO2 <- function() match.call(expand.dots = FALSE)
    formals(FOO1) <- formals(utils::citEntry)
    formals(FOO2) <- formals(utils::bibentry)
    ## Could also hard-wire this, of course.
    get_names_of_nonempty_fields <- function(x) {
        names(x)[vapply(x,
                        function(e) {
                            length(e) &&
                            !(is.character(e) &&
                              all(grepl("^[[:space:]]*$", e)))
                        },
                        NA)]
    }

    out <- lapply(exprs,
           function(e) {
               nm <- as.character(e[[1L]])
               if(nm == "citEntry") {
                   e[[1L]] <- as.name("FOO1")
                   e <- as.list(eval(e))
                   entry <- e$entry
                   fields <- get_names_of_nonempty_fields(e$...)
               }
               else if(nm == "bibentry") {
                   e[[1L]] <- as.name("FOO2")
                   e <- as.list(eval(e))
                   entry <- e$bibtype
                   fields <- get_names_of_nonempty_fields(c(e$...,
                                                            as.list(e$other)[-1L]))
               }
               else return()
               entry <- if(!is.character(entry)) NA_character_ else entry[1L]
               list(entry = entry, fields = as.character(fields))
           })

    out <- Filter(Negate(is.null), out)
    ## If we found nothing return nothing ...
    if(!length(out)) return(NULL)
    entries <- sapply(out, `[[`, 1L)
    fields <- lapply(out, `[[`, 2L)
    out <- data.frame(File = file,
                      Entry = entries,
                      stringsAsFactors = FALSE)
    out$Fields <- fields
    out
}


find_missing_required_BibTeX_fields <-
function(entry, fields)
{
    pos <- match(tolower(entry),
                 tolower(names(BibTeX_entry_field_db)))
    if(is.na(pos)) {
        ## Invalid entry.
        return(NA_character_)
    }
    rfields <- BibTeX_entry_field_db[[pos]]
    if(!length(rfields)) return(character())
    ## Go for legibility/generality rather than efficiency.
    fields <- tolower(fields)
    ok <- vapply(strsplit(rfields, "|", fixed = TRUE),
                 function(f) any(f %in% fields),
                 NA)
    rfields[!ok]
}