## Tools for computing on CITATION info. ## Currently only for validation, and hence not in utils. BibTeX_entry_field_db <- list(Article = c("author", "title", "journal", "year"), Book = c("author|editor", "title", "publisher", "year"), Booklet = c("title"), InBook = c("author|editor", "title", "chapter", "publisher", "year"), InCollection = c("author", "title", "booktitle", "publisher", "year"), InProceedings = c("author", "title", "booktitle", "year"), Manual = c("title"), MastersThesis = c("author", "title", "school", "year"), Misc = character(), PhdThesis = c("author", "title", "school", "year"), Proceedings = c("title", "year"), TechReport = c("author", "title", "institution", "year"), Unpublished = c("author", "title", "note") ) ## See e.g. lisp/textmodes/bibtex.el in the GNU Emacs sources. ## Keep in step with utils::readCitationFile get_CITATION_entry_fields <- function(file, encoding = "unknown") { ## Assume that citEntry() only occurs at top level. ## To parallel readCitationFile, default to latin1. if(encoding == "unknown") encoding <- "latin1" if(encoding %in% c("latin1", "UTF-8") && !l10n_info()$MBCS) { exprs <- tryCatch(parse(file = file, encoding = encoding), error = identity) } else { con <- file(file, encoding = encoding) on.exit(close(con)) exprs <- tryCatch(parse(con), error = identity) } if(inherits(exprs, "error")) return() ## Argh. citEntry() has formals ## (entry, textVersion, header = NULL, footer = NULL, ...) ## so we cannot simply compute of the names of the citEntry() calls. FOO <- function(entry, textVersion, header = NULL, footer = NULL, ...) match.call() out <- lapply(exprs, function(e) { if(as.character(e[[1L]]) != "citEntry") return() e[[1L]] <- as.name("FOO") e <- as.list(eval(e)) entry <- e$entry entry <- if(!is.character(entry)) NA_character_ else entry[1L] fields <- names(e) ## Retain fields textVersion/header/footer, so these must ## be removed in subsequent BibTeX validation computations. fields <- fields[is.na(match(fields, c("", "entry")))] list(entry = entry, fields = fields) }) out <- Filter(Negate(is.null), out) entries <- sapply(out, `[[`, 1L) fields <- lapply(out, `[[`, 2L) out <- data.frame(File = file, Entry = entries, stringsAsFactors = FALSE) out$Fields <- fields out } find_missing_required_BibTeX_fields <- function(entry, fields) { if(!length(fields)) return(character()) pos <- match(tolower(entry), tolower(names(BibTeX_entry_field_db))) if(is.na(pos)) { ## Invalid entry. return(NA_character_) } rfields <- BibTeX_entry_field_db[[pos]] if(!length(rfields)) return(character()) ## Drop non-BibTeX citEntry() fields. fields <- tolower(fields[!fields %in% c("textVersion", "header", "footer")]) ## Go for legibility/generality rather than efficiency. ok <- sapply(strsplit(rfields, "|", fixed = TRUE), function(f) any(f %in% fields)) rfields[!ok] }