help.search <-
function(pattern, fields = c("alias", "title"),
apropos, keyword, whatis, ignore.case = TRUE,
package = NULL, lib.loc = NULL,
help.db = getOption("help.db"),
verbose = getOption("verbose"),
rebuild = FALSE, agrep = NULL)
{
sQuote <- function(s) paste("'", s, "'", sep = "")
### Argument handling.
TABLE <- c("name", "alias", "title", "keyword")
if(!missing(pattern)) {
if(!is.character(pattern) || (length(pattern) > 1))
stop(paste(sQuote("pattern"),
"must be a single character string"))
i <- pmatch(fields, TABLE)
if(any(is.na(i)))
stop("incorrect field specification")
else
fields <- TABLE[i]
} else if(!missing(apropos)) {
if(!is.character(apropos) || (length(apropos) > 1))
stop(paste(sQuote("apropos"),
"must be a single character string"))
else {
pattern <- apropos
fields <- c("alias", "title")
}
} else if(!missing(keyword)) {
if(!is.character(keyword) || (length(keyword) > 1))
stop(paste(sQuote("keyword"),
"must be a single character string"))
else {
pattern <- keyword
fields <- "keyword"
}
} else if(!missing(whatis)) {
if(!is.character(whatis) || (length(whatis) > 1))
stop(paste(sQuote("whatis"),
"must be a single character string"))
else {
pattern <- whatis
fields <- "alias"
}
} else {
stop("don't know what to search")
}
if(is.null(lib.loc))
lib.loc <- .libPaths()
##
## Currently, the information used for help.search is stored in
## package-level CONTENTS files. As it is expensive to build the
## help.search db, we use a global file cache for this information
## if possible. This is wrong because multiple processes or threads
## use the same cache (no locking!), and we should really save the
## information on a package or library level, preferably already at
## package install time. Argh ...
##
### Set up the help db.
if(is.null(help.db) || !file.exists(help.db))
rebuild <- TRUE
if(!rebuild) {
## Try using the saved help db.
##
## Shouldn't we unserialize instead?
load(file = help.db)
##
## If not a list (pre 1.7 format), rebuild.
if(!is.list(db)) rebuild <- TRUE
## Need to find out whether this has the info we need.
## Note that when looking for packages in libraries we always
## use the first location found. Hence if the library search
## path changes we might find different versions of a package.
## Thus we need to rebuild the help db in case the specified
## library path is different from the one used when building the
## help db (stored as its "LibPaths" attribute).
if(!identical(lib.loc, attr(db, "LibPaths")))
rebuild <- TRUE
## We also need to rebuild the help db in case an existing dir
## in the library path was modified more recently than the db,
## as packages might have been installed or removed.
if(any(file.info(help.db)$mtime <
file.info(lib.loc[file.exists(lib.loc)])$mtime))
rebuild <- TRUE
}
if(rebuild) {
## Check whether we can save the help db lateron.
save.db <- FALSE
dir <- switch(.Platform$OS.type,
"windows" = Sys.getenv("R_USER"),
"unix" = Sys.getenv("HOME"),
"mac" = R.home(),
"")
if(nchar(dir) == 0) dir <- getwd()
dir <- file.path(dir, ".R")
dbfile <- file.path(dir, "help.db")
if(((file.exists(dir) && file.info(dir)$isdir)
|| ((unlink(dir) == 0) && dir.create(dir)))
&& (unlink(dbfile) == 0))
save.db <- TRUE
## If we cannot save the help db only use the given packages.
##
## Why don't we just use the given packages? The current logic
## for rebuilding cannot figure out that rebuilding is needed
## the next time (unless we use the same given packages) ...
packagesInHelpDB <- if(!is.null(package) && !save.db)
package
else
.packages(all.available = TRUE, lib.loc = lib.loc)
##
## Create the help db.
contentsEnv <- new.env()
contentsDCFFields <-
c("Entry", "Aliases", "Description", "Keywords")
contentsRDSFields <-
c("Name", "Aliases", "Title", "Keywords")
dbBase <- dbAliases <- dbKeywords <- NULL
nEntries <- 0
if(verbose) {
cat("Packages:\n")
np <- 0
}
for(p in packagesInHelpDB) {
if(verbose)
cat("", p, if((np <- np + 1)%% 5 == 0) "\n")
contents <- NULL
path <- .find.package(p, lib.loc, quiet = TRUE)
if(length(path) == 0)
stop(paste("could not find package", sQuote(p)))
lib <- dirname(path)
## Read the contents info from the respective Rd meta
## files.
if(file.exists(contentsFile <-
file.path(path, "Meta", "Rd.rds"))) {
contents <-
.readRDS(contentsFile)[ , contentsRDSFields,
drop = FALSE]
}
##
## Remove this once 1.7.0 is out.
## (The 1.7 development versions for some time used files
## 'CONTENTS.rds', and 'CONTENTS.rda' generated by save().)
else if(file.exists(contentsFile <-
file.path(path, "CONTENTS.rds"))) {
contents <-
.readRDS(contentsFile)[ , contentsRDSFields,
drop = FALSE]
}
else if(file.exists(contentsFile <-
file.path(path, "CONTENTS.rda"))) {
load(contentsFile, envir = contentsEnv)
contents <-
get("contents", envir = contentsEnv)[ ,
contentsRDSFields,
drop = FALSE]
}
##
else if(file.exists(contentsFile <-
file.path(path, "CONTENTS")))
contents <-
read.dcf(contentsFile, fields = contentsDCFFields)
if(!is.null(contents)) {
## If we found something ...
if((nr <- NROW(contents)) > 0) {
if(!is.data.frame(contents)) {
colnames(contents) <- contentsRDSFields
base <- contents[, c("Name", "Title"), drop = FALSE]
## If the contents db is not a data frame, then
## it has the aliases collapsed. Split again as
## we need the first alias as the help topic to
## indicate for matching Rd objects.
aliases <-
strsplit(contents[, "Aliases"], " +")
## Don't do it for keywords, though, as these
## might be non-standard (and hence contain
## white space ...).
}
else {
base <-
as.matrix(contents[, c("Name", "Title")])
aliases <- contents[, "Aliases"]
}
## IDs holds the numbers of the Rd objects in the
## help.search db.
IDs <- seq(from = nEntries + 1, to = nEntries + nr)
## We create 3 character matrices (cannot use data
## frames for efficiency reasons): 'dbBase' holds
## all character string data, and 'dbAliases' and
## 'dbKeywords' hold character vector data in a
## 3-column character matrix format with entry, ID
## of the Rd object the entry comes from, and the
## package the object comes from. The latter is
## useful when subscripting according to package.
dbBase <-
rbind(dbBase,
cbind(p, lib, IDs, base,
topic = sapply(aliases, "[", 1)))
## If there are no aliases at all, cbind() below
## would give matrix(p, nc = 1). (Of course, Rd
## objects without aliases are useless ...)
if(length(a <- unlist(aliases)) > 0)
dbAliases <-
rbind(dbAliases,
cbind(a,
rep(IDs,
sapply(aliases, length)),
p))
keywords <- contents[, "Keywords"]
## And similarly if there are no keywords at all.
if(length(k <- unlist(keywords)) > 0)
dbKeywords <-
rbind(dbKeywords,
cbind(k,
rep(IDs,
sapply(keywords, length)),
p))
nEntries <- nEntries + nr
} else {
warning(paste("Empty contents for package",
sQuote(p), "in", sQuote(lib)))
}
}
}
if(verbose)
cat(ifelse(np %% 5 == 0, "\n", "\n\n"))
colnames(dbBase) <-
c("Package", "LibPath", "ID", "name", "title", "topic")
colnames(dbAliases) <-
c("Aliases", "ID", "Package")
colnames(dbKeywords) <-
c("Keywords", "ID", "Package")
db <- list(Base = dbBase,
Aliases = dbAliases,
Keywords = dbKeywords)
## Maybe save the help db
##
## Shouldn't we serialize instead?
if(save.db) {
attr(db, "LibPaths") <- lib.loc
save(db, file = dbfile)
options(help.db = dbfile)
}
##
}
### Matching.
if(verbose)
cat("Database of ",
NROW(db$Base), " Rd objects (",
NROW(db$Aliases), " aliases, ",
NROW(db$Keywords), " keywords),\n",
sep = "")
if(!is.null(package)) {
## Argument 'package' was given but we built a larger help db to
## save for future invocations. Need to check that all given
## packages exist, and only search the given ones.
posInHelpDB <-
match(package, unique(db$Base[, "Package"]), nomatch = 0)
if(any(posInHelpDB) == 0)
stop(paste("could not find package",
sQuote(package[posInHelpDB == 0][1])))
db <-
lapply(db,
function(x) {
x[x[, "Package"] %in% package, , drop = FALSE]
})
}
## If agrep is NULL (default), we want to use fuzzy matching iff
## 'pattern' contains no characters special to regular expressions.
## We use the following crude approximation: if pattern contains
## only alphanumeric characters or whitespace or a '-', it is taken
## 'as is', and fuzzy matching is used unless turned off explicitly.
if(is.null(agrep) || is.na(agrep))
agrep <-
(regexpr("^([[:alnum:]]|[[:space:]]|-)+$", pattern) > 0)
if(is.logical(agrep)) {
if(agrep)
max.distance <- 0.15
}
else if(is.numeric(agrep) || is.list(agrep)) {
max.distance <- agrep
agrep <- TRUE
}
else
stop("incorrect agrep specification")
searchFun <- function(x) {
if(agrep)
agrep(pattern, x, ignore.case = ignore.case,
max.distance = max.distance)
else
grep(pattern, x, ignore.case = ignore.case)
}
dbBase <- db$Base
searchDbField <- function(field) {
switch(field,
alias = {
aliases <- db$Aliases
match(aliases[searchFun(aliases[, "Aliases"]),
"ID"],
dbBase[, "ID"])
},
keyword = {
keywords <- db$Keywords
match(keywords[searchFun(keywords[, "Keywords"]),
"ID"],
dbBase[, "ID"])
},
searchFun(db$Base[, field]))
}
i <- NULL
for(f in fields) i <- c(i, searchDbField(f))
db <- dbBase[sort(unique(i)),
c("topic", "title", "Package", "LibPath"),
drop = FALSE]
if(verbose) cat("matched", NROW(db), "objects.\n")
## Retval.
y <- list(pattern = pattern, fields = fields,
type = if(agrep) "fuzzy" else "regexp",
matches = db)
class(y) <- "hsearch"
y
}
print.hsearch <-
function(x, ...)
{
sQuote <- function(s) paste("'", s, "'", sep = "")
fields <- paste(x$fields, collapse = " or ")
type <- switch(x$type, fuzzy = "fuzzy", "regular expression")
db <- x$matches
if(NROW(db) > 0) {
outFile <- tempfile()
outConn <- file(outFile, open = "w")
writeLines(c(strwrap(paste("Help files with", fields,
"matching", sQuote(x$pattern),
"using", type, "matching:")),
"\n\n"),
outConn)
dbnam <- paste(db[ , "topic"], "(",
db[, "Package"], ")",
sep = "")
dbtit <- paste(db[ , "title"], sep = "")
writeLines(formatDL(dbnam, dbtit), outConn)
writeLines(c("\n\n",
strwrap(paste("Type 'help(FOO, package = PKG)' to",
"inspect entry 'FOO(PKG) TITLE'."))),
outConn)
close(outConn)
file.show(outFile, delete.file = TRUE)
} else {
writeLines(strwrap(paste("No help files found with", fields,
"matching", sQuote(x$pattern),
"using", type, "matching.")))
}
}