# File src/library/tools/R/apitools.R # Part of the R package, https://www.R-project.org # # Copyright (C) 2024 The R Core Team # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # https://www.R-project.org/Licenses/ ## ## Work out the function API from information in WRE ## ## WRE data is now installed in system.file(package = "tools", "wre.txt") ## WRE(newpath) forces a new load with the new path. apidata <- list2env( list( wrelines = NULL, wreloc = NULL, fapi = NULL, rfuns = NULL)) resetAPI <- function(newloc = "") { if (newloc != "") apidata$wreloc <- newloc apidata$wrelines <- NULL apidata$fapi <- NULL ## reset rfuns also? } WRE <- function() { if (is.null(apidata$wrelines)) { if (is.null(apidata$wreloc)) { apidata$wreloc <- system.file(package = "tools", "wre.txt") if (apidata$wreloc == "") apidata$wreloc <- "https://svn.r-project.org/R/trunk/doc/manual/R-exts.texi" } apidata$wrelines <- readLines(apidata$wreloc) } apidata$wrelines } unmap <- function(x) sub("^Rf_", "", gsub("^_|_$", "", trimws(x))) getOneFunAPI <- function(apitype) { wrelines <- WRE() fpat <- sprintf("^@(%s)fun +", apitype) hpat <- sprintf("^@(%s)hdr +", apitype) funs <- sub(fpat, "", grep(fpat, wrelines, value = TRUE)) hdrs <- sub(hpat, "", grep(hpat, wrelines, value = TRUE)) wAPI <- data.frame(name = funs, loc = rep("WRE", length(names))) getHdrAPI <- function(hdr) { hfuns <- getFunsHdr(file.path(R.home("include"), hdr)) data.frame(name = hfuns, loc = rep(hdr, length(hfuns))) } hAPI <- lapply(hdrs, getHdrAPI) val <- rbind(wAPI, do.call(rbind, hAPI)) val$apitype <- rep(apitype, nrow(val)) val$unmapped <-unmap(val$name) rownames(val) <- NULL val } getFunAPI <- function() { apitypes <- c("api", "eapi", "emb") val <- do.call(rbind, lapply(apitypes, getOneFunAPI)) val <- unique(val) val <- by(val, list(val$unmapped), ## picks max WRE > api > eapi > emb ## picks first if unmapped and mapped are in WRE function(x) if (nrow(x) > 1) x[1, ] else x, simplify = FALSE) val <- do.call(rbind, val) val$unmapped <- NULL ## not needed in final output rownames(val) <- NULL val } funAPI <- function() { if (is.null(apidata$fapi)) apidata$fapi <- getFunAPI() apidata$fapi } ## getFunsHdr tries to get the functions declared in a header file ## without additional tools beyond cc -E. Using a proper ## header-parsing tool would be more accurate, but this seems adequate ## for now. getFunsHdr <- function(fpath, lines) { if (missing(lines)) { lines <- readLines(fpath) name <- basename(fpath) } else name <- NULL ## NORET has to be handled before ccE since what it expands into varies lines <- ifelse(grepl("^#", lines), lines, gsub(r"{.*\s*NORET\s*}", " ", lines)) lines <- lines[! grepl("^#\\s*error", lines)] ## for GraphicsDevice.h lines <- ccE(lines) lines <- dropBraces(lines) ## these could be incorporated into the regex lines <- gsub(r"{\s*(const|extern|long|unsigned)\s*}", "", lines) lines <- sub(r"{^\s*(\w*[(])}", "void \\1", lines) lines <- gsub(r"{\(\s*\*\s*(\w+)\s*\)}", "(\\1)", lines) ## original from SO: https://stackoverflow.com/questions/476173/regex-to-pull-out-c-function-prototype-declarations ## funcRegexp <- r"{^\s*(?:(?:inline|static)\s+){0,2}(?!else|typedef|return)\w+\s+\*?\s*(\w+)\s*\([^0]+\)\s*;?}" ## allow for parens around function name ## make closing paren for arguments optional funcRegexp <- r"{^\s*(?:(?:inline|static)\s+){0,2}(?!else|typedef|return)\w+\s*\*?\s*\(?(\w+)\)?\s*\([^0]+\)?\s*;?}" m <- gregexec(funcRegexp, lines, perl = TRUE) v <- regmatches(lines, m) val <- sapply(v[lengths(v) > 0], `[[`, 2) val <- unique(as.character(val)) ## drop halucinations val <- val[! (val %in% letters | val %in% LETTERS)] val <- val[! grepl("_t$", val)] val <- val[! grepl("user_(unif|norm)", val)] val <- val[! grepl("Quartz|Win32", val)] val } ccE <- function(lines, include = R.home("include"), clean = TRUE) { if (Sys.which("cc") == "") stop("'cc' is not on the path") tfile <- tempfile(fileext = ".h") on.exit(unlink(tfile)) writeLines(lines, tfile) cmd <- sprintf("cc -E -I%s %s", include, tfile) val <- system(cmd, intern=TRUE) if (clean) ccEclean(val, tfile) else val } ccEclean <- function(lines, pattern = "Rtmp") { fline <- grepl("^#", lines) keep <- grepl(pattern, lines[fline]) len <- diff(c(which(fline), length(lines) + 1)) keep <- unlist(mapply(rep, keep, len, USE.NAMES = FALSE)) lines <- lines[keep & ! fline] lines } dropBraces <- function(lines) { ## drop {...} fully within a line lines <- sub("[{].*[}]", " ", lines) ## drop {...} crossing several lines start <- grepl("[{]", lines) end <- grepl("[}]", lines) ## could check for balance lines <- lines[cumsum(start - end) == 0 | start | end] lines <- sub("[{].*", "", lines) ## keep stuff before { lines <- lines[! grepl(".*[}]", lines)] ## don't keep stuff after } lines } ## ## Check a shared library's use of R entry points ## checkLibAPI <- function(lpath) { ldata <- readFileSyms(lpath) lsyms <- ldata[ldata$type == "U", ]$name lsyms <- inRfuns(lsyms) lsyms <- data.frame(name = lsyms, unmapped = unmap(lsyms)) api <- funAPI() api$unmapped <- unmap(api$name) api$name <- NULL api$loc <- NULL val <- merge(lsyms, api, all.x = TRUE) val <- val[order(val$apitype), ] val$unmapped <- NULL ## not needed in final output rownames(val) <- NULL val } readFileSyms <- function(fpath) { ## this uses nm ## could try objdump if nm doesn't work v <- read_symbols_from_object_file(fpath) if (is.null(v)) data.frame(name = character(0), type = character(0)) else as.data.frame(v)[c("name", "type")] } ## crude approach based on string matching ## **** this is to crude -- needs to allow more inRfunsCrude <- function(syms) { syms <- union(syms[syms == toupper(syms)], grep("^_?Rf?_", syms, value = TRUE)) pat <- "R_MB_CUR_MAX|R_BaseNamespace|R_BlankScalarString|R_BlankString" pat <- sprintf("%s|R_CStackDir|R_CStackLimit|R_CStackStart", pat) pat <- sprintf("%s|R_Consolefile|R_CurrentExpression|R_Interactive", pat) pat <- sprintf("%s|R_Outputfile|R_Srcref|R_TempDir", pat) pat <- sprintf("%s|R_compact_.*_class|R_ignore_SIGPIPE", pat) pat <- sprintf("%s|R_interrupts_pending|R_interrupts_suspended", pat) pat <- sprintf("%s|R_isForkedChild", pat) pat <- sprintf("%s|R_NilValue|R_MissingArg|R_Visible", pat) pat <- sprintf("%s|R_.*Symbol$|R_dot_|R_Na", pat) pat <- sprintf("%s|R_NilValue|R_GlobalEnv|R_BaseEnv|R_EmptyEnv", pat) pat <- sprintf("%s|R_(Pos|Neg)Inf|R_.*Value$|R_.*Handlers$", pat) syms[! grepl(pat, syms)] } ## approach based on computing the entry points in the executable and core libs ## fall back to the crude approach if entry points can't be found inRfuns <- function(syms) { rfuns <- Rfuns() if (length(rfuns) == 0) inRfunsCrude(syms) else syms[unmap(syms) %in% unmap(rfuns)] } cleanRfuns <- function(val) { ## if Rf_XLENGTH and XLENGTH are both there then keep Rf_XLENGTH if (any(grepl("^_*Rf_XLENGTH_*$", val)) && any(grepl("^_*XLENGTH_*$", val))) val <- val[! grepl("^_*XLENGTH_*$", val)] ## drop tre_ stuff if it is there and some others val[! grepl("tre_|^_*(main|MAIN|start)_*$|yyparse", val)] } getRfuns <- function() { pat <- sprintf("(\\.dylib|%s)$", .Platform$dynlib.ext) ofiles <- c(file.path(R.home("bin"), "exec", "R"), dir(R.home("lib"), pattern = pat, full.names = TRUE), dir(R.home("modules"), pattern = pat, full.names = TRUE)) data <- do.call(rbind, lapply(ofiles, readFileSyms)) fdata <- data[data$type == "T", ] cleanRfuns(fdata$name) } Rfuns <- function() { if (is.null(apidata$rfuns)) apidata$rfuns <- getRfuns() apidata$rfuns } ## ## Check an installed package's use of R entry points ## checkPkgAPI <- function(pkg, lib.loc = NULL, all = FALSE) { libdir <- system.file("libs", package = pkg, lib.loc = lib.loc) libs <- Sys.glob(file.path(libdir, sprintf("*%s", .Platform$dynlib.ext))) if (length(libs) > 0) { val <- do.call(rbind, lapply(libs, checkLibAPI)) if (! all) val <- val[is.na(val$apitype), ] val <- unique(val) rownames(val) <- NULL val } else NULL } checkAllPkgsAPI <- function(lib.loc = NULL, priority = NULL, all = FALSE, Ncpus = getOption("Ncpus", 1L), verbose = getOption("verbose")) { p <- rownames(utils::installed.packages(lib.loc = lib.loc, priority = priority)) checkOne <- function(pkg) { data <- checkPkgAPI(pkg, lib.loc = lib.loc, all = all) if (! is.null(data)) data$pkg <- rep(pkg, nrow(data)) data } val <- do.call(rbind, .package_apply(p, checkOne, Ncpus = Ncpus, verbose = verbose)) rownames(val) <- NULL val } ## ## Find R entry points and variables used in installed packages ## clear_rownames <- function(val) { rownames(val) <- NULL val } rbind_list <- function(args) clear_rownames(do.call(rbind, args)) ofile_syms <- function(fname, keep = c("F", "V", "U")) { ## this uses nm on Linux/macOS; probably doesn't work on Windows, so bail stopifnot(isFALSE(.Platform$OS.type == "windows")) v <- read_symbols_from_object_file(fname) if (is.character(v) && nrow(v) == 0) ofile_syms_od(fname, keep) else if (is.null(v)) data.frame(name = character(0), type = character(0)) else { match_type <-function(type) ifelse(type == "T", "F", ifelse(type == "U", "U", "V")) val <- as.data.frame(v)[c("name", "type")] val <- val[val$type %in% c("U", "B", "D", "T"), ] val$type <- match_type(val$type) val <- val[val$type %in% keep, ] val } } ofile_syms_od <- function(fpath, keep = c("F", "V", "U")) { if (Sys.which("objdump") == "") stop("'objdump' is not on the path") v <- system(sprintf("objdump -T %s", fpath), intern = TRUE) v <- grep("\t", v, value = TRUE) ## data lines contain a \t name <- sub(".*\t.* (.*$)", "\\1", v) ## the name is at the end after the \t type <- sub(".* (.*)\t.*", "\\1", v) ## the type is right before the \t ttbl <- c("*UND*" = "U", ".text" = "F", ".bss" = "V", ".data" = "V", w = "w") val <- data.frame(name, type = ttbl[match(type, names(ttbl), length(ttbl))]) val <- val[val$type %in% keep, ] clear_rownames(val[order(val$name), ]) } Rsyms <- function(keep = c("F", "V")) { rsyms <- apidata$rsyms if (is.null(rsyms)) { ofiles <- c(file.path(R.home("bin"), "exec", "R"), dir(R.home("lib"), full.names = TRUE), dir(R.home("modules"), full.names = TRUE)) rsyms <- rbind_list(lapply(ofiles, ofile_syms, keep)) apidata$rsyms <- rsyms } rsyms } pkgRsyms <- function(pkg, lib.loc = NULL) { libdir <- system.file("libs", package = pkg, lib.loc = lib.loc) libs <- Sys.glob(file.path(libdir, "*.so")) if (length(libs) > 0) { val <- rbind_list(lapply(libs, ofile_syms, keep = "U")) val$package <- rep(pkg, nrow(val)) val$type <- NULL merge(val, Rsyms()) } else NULL } allPkgsRsyms <- function(lib.loc = NULL, Ncpus = getOption("Ncpus", 1L), verbose = getOption("verbose")) { p <- rownames(utils::installed.packages(lib.loc = lib.loc)) rbind_list(.package_apply(p, pkgRsyms, Ncpus = Ncpus, verbose = verbose)) }