#XXX Deal with line numbers in the original document. # # We could also do things this way # source(textConnection(saveXML(xsltApplyStyleSheet("cityTemperatures.xml", "~/Projects/org/omegahat/XML/Literate/segment.xsl")))) # # # Allow the user to specify a subset of nodes in which to find the code, etc. nodes. # Or an XPath query to restrict the search. # For example, suppose we have a document with two sections and we want to run the code # in only one of those sections. # getNodeSet(section[@id='second one']) # setOldClass("XMLNodeSet") DefaultXPathNamespaces = c(r = "http://www.r-project.org", s = "http://cm.bell-labs.com/stat/S4", omg = "http://www.omegahat.net", mlb = "http://www.mathworks.com", # matlab sh="http://www.shell.org", perl = "http://www.perl.org", py = "http://www.python.org", fo="http://www.w3.org/1999/XSL/Format", xsl="http://www.w3.org/1999/XSL/Transform", xi="http://www.w3.org/2001/XInclude" ) DefaultXMLSourceXPath = sprintf("%s[not(@eval='false') and not(ancestor::ignore) and not(ancestor::section[@r:eval = 'false'])]", c("//r:init", "//r:function", "//r:init", "//r:code", "//r:plot", "//r:expr")) setGeneric("xmlSource", function(url, ..., envir = globalenv(), xpath = character(), ids = character(), omit = character(), ask = FALSE, example = NA, fatal = TRUE, verbose = TRUE, echo = verbose, print = echo, xnodes = DefaultXMLSourceXPath, namespaces = DefaultXPathNamespaces, section = character(), eval = TRUE, init = TRUE, setNodeNames = FALSE, parse = TRUE, force = FALSE) { standardGeneric("xmlSource") }) # Break up into methods for character and xmlInternalDocument setMethod("xmlSource", c("character"), function(url, ..., envir =globalenv(), xpath = character(), ids = character(), omit = character(), ask = FALSE, example = NA, fatal = TRUE, verbose = TRUE, echo = verbose, print = echo, xnodes = DefaultXMLSourceXPath, namespaces = DefaultXPathNamespaces, section = character(), eval = TRUE, init = TRUE, setNodeNames = FALSE, parse = TRUE, force = FALSE) { doc = xmlTreeParse(url, ..., useInternalNodes = TRUE) xmlSource(doc, ..., envir = envir, xpath = xpath, ids = ids, omit = omit, ask = ask, example = example, fatal = fatal, verbose = verbose, print = print, xnodes = xnodes, namespaces = namespaces, section = section, eval = eval, init = init, setNodeNames = setNodeNames, parse = parse, force = force) }) setMethod("xmlSource", c("XMLInternalDocument"), function(url, ..., envir =globalenv(), xpath = character(), ids = character(), omit = character(), ask = FALSE, example = NA, fatal = TRUE, verbose = TRUE, echo = verbose, print = echo, xnodes = DefaultXMLSourceXPath, namespaces = DefaultXPathNamespaces, section = character(), eval = TRUE, init = TRUE, setNodeNames = FALSE, parse = TRUE, force = FALSE) { doc = url if(inherits(verbose, "numeric")) verbose = verbose - 1 if(!is.character(section)) section = as.integer(section) #XXX use section when processing the examples if(length(example) && !all(is.na(example))) { egs = getNodeSet(doc, "//r:example", namespaces) if(length(egs)) { ids = sapply(egs, xmlGetAttr, "id") if(length(example) == 1 && is.na(example)) { cat("Select an example\n") example = ids[w <- menu(ids)] } if(inherits(example, "numeric")) { i = example } else { i = pmatch(example, ids) if(all(is.na(i))) stop("no example named ", example) } # find any r:init nodes which are not inside an example. init = getNodeSet(doc, "//r:init[not(ancestor::r:example)]", c(r = "http://www.r-project.org")) if(length(init)) { xmlSource(init, envir = envir, omit = omit, verbose = verbose, namespaces = namespaces, eval = eval, force = force) cat("Done doc-level init", length(init), "\n") } ans = sapply(i, function(x) { nodes = getNodeSet(egs[[x]], paste(xnodes, collapse = "|"), namespaces) if(verbose) cat("Example", ids[x], "\n") #XXX put the correct ids in her. xmlSource(nodes, envir = envir, omit = omit, verbose = verbose, namespaces = namespaces, eval = eval, setNodeNames = setNodeNames, parse = parse, force = force) }) return(ans) } } # if(length(section) && is.character(section)) # section = paste("@id", ddQuote(section), sep = "=") if(length(xpath)) { # do an XPath query and then look inside the resulting nodes # for the xnodes of interest. if(length(section)) { # XXX assumes just one section. What about c(1, 2, 4) xpath =paste("//section[", section, "]", xpath, sep = "") } nodes = getNodeSet(doc, xpath, namespaces) v = unlist(lapply(nodes, function(n) { unlist(lapply(xnodes, function(p) getNodeSet(n, p, namespaces)), recursive = FALSE) }), recursive = FALSE) } else { functions = limitXPathToSection(section, "//r:function[not(@eval = 'false') and not(ancestor::ignore)]") xnodes = limitXPathToSection(section, xnodes) # Do we need to ensure the order for the functions first? v = getNodeSet(doc, paste(c(functions, xnodes), collapse = "|"), namespaces) # v = getNodeSet(doc, functions, namespaces) # w = getNodeSet(doc, xnodes, namespaces) # v = c(v, w) } if(is.null(v)) stop("No matching nodes in the document found") class(v) <- "XMLNodeSet" # deal with a top-level node r:codeIds which is of the form # abc # def # ghi # i.e. a single entry on each line which identifies the nodes that are to be read. if(missing(ids) && missing(xnodes) && length(ids <- getNodeSet(doc, "/*/r:codeIds|/*/invisible/r:codeIds", namespaces = c(r = "http://www.r-project.org")))) { if(length(ids) > 1) { warning("more than one r:codeIds node. Using the first one") } # txt = paste(sapply(ids, xmlValue)) ids = strsplit(xmlValue(ids[[1]]), "\\\n")[[1]] ids = unique(ids) ids = ids[ids != ""] } xmlSource(v, ids = ids, omit = omit, ask = ask, fatal = fatal, verbose = verbose, envir = envir, section = if(!is.character(section)) section else character(), eval = eval, setNodeNames = setNodeNames, parse = parse, force = force) }) limitXPathToSection = # # limitToSection(1:3) # limitToSection(letters[1:3]) # limitToSection(letters[1:3], "//r:plot") function(section, xpath = c("//r:code", "//r:func", "//r:plot", "//r:expr")) { if(length(section) == 0) return(paste(xpath, collapse = "|")) if(is.character(section)) section = paste("@id=", sQuote(section), sep = "") paste(outer(section, xpath, function(sect, xp) paste("//section[", sect, "]", xp, sep = "")), collapse = "|") } setMethod("xmlSource", "XMLNodeSet", function(url, ..., envir =globalenv(), xpath = character(), ids = character(), omit = character(), ask = FALSE, example = NA, fatal = TRUE, verbose = TRUE, echo = verbose, print = echo, xnodes = c("r:function[not(@val='false')]", "r:init[not(@eval='false')]", "r:code[not(@eval='false')]", "//r:plot[not(@eval='false')]"), namespaces = DefaultXPathNamespaces, section = character(), eval = TRUE, init = TRUE, setNodeNames = FALSE, parse = TRUE, force = FALSE) { if(ask) { doc = as(url[[1]], "XMLInternalDocument") #XXXX no doc here now. v = getNodeSet(doc, "//r:function|//r:init|//r:code|//r:plot", namespaces) funs = sapply(v, xmlName) == "function" if(any(funs)) { #XXX } } ans = sapply(url, evalNode, envir = envir, verbose = verbose, ids = ids, omit = omit, echo = echo, print = print, ask = ask, eval = eval, parse = parse, force = force) if(setNodeNames) names(ans) = sapply(url, getRCodeNodeName) else names(ans) = sapply(url, getNodePosition) #sapply(url, xmlName, full = TRUE) invisible(ans) }) evalNode = function(node, envir = globalenv(), ids = character(), verbose = FALSE, echo = verbose, omit = character(), namespaces = c(r = "http://www.r-project.org"), print = echo, ask = FALSE, eval = TRUE, parse = TRUE, force = FALSE) { #XXX check all ancestors. Ideally exclude them in the XPath query if(!force && (xmlName(xmlParent(node)) == "ignore" || length(getNodeSet(node, "./ancestor::section[@r:eval='false']|./ancestor::para[@r:eval='false']", c(r = "http://www.r-project.org"))) > 0)) return(FALSE) tmp = xmlGetAttr(node, "id", NA) if(is.na(tmp) && length(ids) > 0 && !("" %in% ids)) return() if(!is.na(tmp)) { if(length(omit) > 0 && tmp %in% omit) { if(verbose) warning("skipping id ", tmp) return() } else if(length(ids) > 0 && !(tmp %in% ids)) { if(verbose) warning("ignoring id ", tmp) return() } } tmp = xmlGetAttr(node, "ignore", NA, converter = as.logical) if(!is.na(tmp) && tmp) { if(verbose) warning("ignoring node", as(node, "character")) return() } # go through the node and see if there are any r:code nodes # with a ref attribute # and go fetch the corresponding node. txt = paste(getRCode(node, namespaces), collapse = "\n") if(!parse) return(txt) # txt = xmlValue(node) if(verbose) cat("*************\nEvaluating node\n") cmd = parse(text = txt) if(echo) print(cmd) if(eval) { if(ask) { w = utils::menu(c("evaluate", "skip", "terminate")) if(w == 2) return(NULL) else if(w == 3) stop("User terminated the xmlSource") } isPlot = xmlName(node) == "plot" if(isPlot) { f = xmlGetAttr(node, "img") if(!is.null(f)) { attrs = xmlAttrs(node) dev = openDevice(f, attrs) } if(!xmlGetAttr(node, "continuePlot", FALSE, as.logical)) on.exit(dev.off()) } ans = eval(cmd, envir) if(isPlot && inherits(ans, "trellis")) print(ans) ans } else cmd } openDevice = function(f, attrs) { if("format" %in% names(attrs)) ext = attrs["format"] else ext = getExtension(f) fun = switch(ext, png = png, jpeg = jpeg, pdf = pdf) args = lapply(c("width", "height"), getDevAttr, attrs, fun, as.numeric) cat("opening device for", f, "\n") fun(f) } getDevAttr = function(name, attrs, devFun, converter = as.character) { if(name %in% names(attrs)) converter(attrs[[name]]) else if(name %in% names(formals(devFun))) formals(devFun)[[name]] else converter(NA) } getExtension = function(f) { gsub(".*\\.", "", basename(f)) } getRCode = function(node, namespaces = c(r = "http://www.r-project.org"), recursive = TRUE, dropOutput = FALSE) { tmp = xmlSApply(node, function(x) { if(inherits(x, c("XMLInternalCommentNode", "XMLInternalPINode"))) { } else if(inherits(x, "XMLInternalElementNode") && xmlName(x, full = TRUE) %in% c("r:code", "r:frag")) { ref = xmlGetAttr(x, "ref", NA) if(!is.na(ref)) { v = getNodeSet(as(x, "XMLInternalDocument"), paste(sapply(c("code", "frag"), function(x) paste("//r:", x , "[@id='", ref, "']", sep = "")), collapse = "|"), namespaces) if(length(v) == 0) stop("No code block/fragment named ", ref) else if(length(v) > 1) stop("More than 1 code block/fragment named ", ref) else if(recursive) getRCode(v[[1]], namespaces, recursive = TRUE, dropOutput = dropOutput) else xmlValue(v[[1]]) } else { if(recursive) getRCode(x, namespaces, recursive = TRUE, dropOutput = dropOutput) else xmlValue(x) } } else if(inherits(x, "XMLInternalElementNode") && xmlName(x, full = TRUE) %in% c("r:error", "r:output")) { } else xmlValue(x) }) if(dropOutput && length(names(tmp))) tmp = tmp[names(tmp) != "output"] paste(tmp, collapse = "\n") } setClass("XMLCodeFile", contains = "character") setClass("XMLCodeDoc", contains = "XMLInternalDocument") setAs("XMLCodeFile", "XMLCodeDoc", function(from) { new("XMLCodeDoc", xmlParse(from)) }) setAs("character", "XMLCodeFile", function(from) { xmlCodeFile(from) }) setAs("character", "XMLCodeDoc", function(from) { xmlCodeFile(from, TRUE) }) xmlCodeFile = function(f, parse = FALSE) { if(parse) new("XMLCodeDoc", xmlParse(f)) else new("XMLCodeFile", f) } utils::globalVariables("use_file") tmp.source = function (file, local = FALSE, echo = verbose, print.eval = echo, verbose = getOption("verbose"), prompt.echo = getOption("prompt"), max.deparse.length = 150, chdir = FALSE, encoding = getOption("encoding"), continue.echo = getOption("continue"), skip.echo = 0, keep.source = getOption("keep.source")) { if(length(verbose) == 0) verbose = FALSE if(chdir) { cwd = getwd() on.exit(setwd(cwd)) setwd(dirname(file)) } xmlSource(file, verbose = verbose) } ## This version would require us to document source() ## setGeneric("source", function(file, ...) standardGeneric("source")) ## tmp.source = ## function (file, verbose = getOption("verbose"), chdir = FALSE, ...) ## { ## if(length(verbose) == 0) ## verbose = FALSE ## if(chdir) { ## cwd = getwd() ## on.exit(setwd(cwd)) ## setwd(dirname(file)) ## } ## xmlSource(file, verbose = verbose) ## } setMethod("source", "XMLCodeFile", tmp.source) setMethod("[[", "XMLCodeFile", function(x, i, j, ..., env = globalenv()) { doc = as(x, "XMLCodeDoc") n = getNodeSet(doc, paste("//*[@id=", sQuote(i), "]")) if(length(n) == 0) { # This needs code from ptoc to determine the name of an "element" ## was updateIds(doc, save = x), which was giving ## byte-compilation warnings in *other* packages. doc = updateIds(doc) } eval(parse(text = xmlValue(n[[1]])), envir = env) }) updateIds = function(doc, ...) { nodes = getNodeSet(doc, "//r:function[not(@id) and not(@eval = 'false')]|//r:code[not(@id) and not(@eval = 'false')]", c("r" = "http://www.r-project.org")) sapply(nodes, getCodeVar) } getCodeVar = function(node) { e = parse(text = getRCode(node)) e = e[[length(e)]] # This should use the code in ptoc in RTools. id = if(class(e) %in% c("=", "<-")) as.character(e[[2]]) else NA if(!is.na(id)) addAttributes(node, id = id) id } # # f = xmlCodeFile("~/Classes/stat242-08/Code/FormulaWeights/rpartScope.xml") # source(f) # f[["rpart.model"]] setGeneric("xmlSourceFunctions", function(doc, ids = character(), parse = TRUE, ...) { standardGeneric("xmlSourceFunctions") }) setMethod("xmlSourceFunctions", "character", # # evaluate the r:function nodes, or restricted to @id from ids. # function(doc, ids = character(), parse = TRUE, ...) { invisible(xmlSourceFunctions(xmlParse(doc), ids, parse = parse, ...)) }) sQuote = function(x) sprintf("'%s'", as.character(x)) setMethod("xmlSourceFunctions", "XMLInternalDocument", # # evaluate the r:function nodes, or restricted to @id from ids. # function(doc, ids = character(), parse = TRUE, setNodeNames = FALSE, ...) { if(length(ids)) nodes = getNodeSet(doc, paste("//r:function[", paste("@id", sQuote(ids), sep = "=", collapse = " or " ), "]"), c(r = "http://www.r-project.org")) else nodes = getNodeSet(doc, "//r:function[not(ancestor-or-self::*/@eval = 'false')]", c(r = "http://www.r-project.org")) if(parse == FALSE) return(nodes) ans = xmlSource(nodes, ...) if(setNodeNames) names(ans) = sapply(nodes, getRCodeNodeName) invisible(ans) }) getRCodeNodeName = function(node) { xmlGetAttr(node, "name", xmlGetAttr(node, "idx", getTaskId(node))) } ################ setGeneric("xmlSourceSection", function(doc, ids = character(), xnodes = c(".//r:function", ".//r:init[not(@eval='false')]", ".//r:code[not(@eval='false')]", ".//r:plot[not(@eval='false')]"), namespaces = DefaultXPathNamespaces, ...) standardGeneric("xmlSourceSection")) setMethod("xmlSourceSection", "character", function(doc, ids = character(), xnodes = c(".//r:function", ".//r:init[not(@eval='false')]", ".//r:code[not(@eval='false')]", ".//r:plot[not(@eval='false')]"), namespaces = DefaultXPathNamespaces, ...) xmlSourceSection(xmlParse(doc), ids, xnodes, namespaces, ...)) setMethod("xmlSourceSection", "XMLInternalDocument", function(doc, ids = character(), xnodes = c(".//r:function", ".//r:init[not(@eval='false')]", ".//r:code[not(@eval='false')]", ".//r:plot[not(@eval='false')]"), namespaces = DefaultXPathNamespaces, ...) { nodes = getNodeSet(doc, "//section") aids = sapply(nodes, xmlGetAttr, "id", NA) m = pmatch(ids, aids) if(any(is.na(m))) { # for those ids the caller gave us that didn't match # compare these to the titles. i = which(is.na(m)) tmp = ids[i] j = pmatch(ids[i], sapply(nodes, function(x) { tmp = getNodeSet(x, "./title") if(length(tmp)) xmlValue(tmp[[1]]) else "" })) m[i [!is.na(j)]] = j[!is.na(j)] } if(any(is.na(m))) stop("cannot match section id or title for ", paste(m[is.na(m)], collapse = ", ")) lapply(nodes[m], evalSection, xnodes, namespaces, ...) }) evalSection = function(node, xnodes, namespaces = DefaultXPathNamespaces, envir = globalenv(), ...) { # Or use xnodes by stripping away any [] and .// if(xmlName(node, TRUE) %in% c("r:function", "r:plot", "r:code", "r:graphics")) return(evalNode(node, envir, ...)) xpath = paste(xnodes, collapse = "|") nodes = getNodeSet(node, xpath, namespaces) sapply(nodes, evalNode, envir, ...) } ############################################# # r:code[@thread='name']|r:code[ancestor::*[@thread='name']] setGeneric("xmlSourceThread", function(doc, id, envir = globalenv(), ..., xnodes = c("r:function", "r:init", "r:code", "r:plot")) standardGeneric("xmlSourceThread")) setMethod("xmlSourceThread", "character", function(doc, id, envir = globalenv(), ..., xnodes = c("r:function", "r:init", "r:code", "r:plot")) xmlSourceThread(xmlParse(doc), id, envir, ..., xnodes = xnodes) ) setMethod("xmlSourceThread", "list", function(doc, id, envir = globalenv(), ..., xnodes = c("r:function", "r:init", "r:code", "r:plot")) sapply(doc, evalNode, envir = envir, ..., xnodes = xnodes)) if(FALSE) setMethod("xmlSourceThread", "XMLNodeList", function(doc, id, envir = globalenv(), ..., xnodes = c(".//r:function", ".//r:init[not(@eval='false')]", ".//r:code[not(@eval='false')]", ".//r:plot[not(@eval='false')]")) sapply(doc, evalNode, envir = envir, ...)) setMethod("xmlSourceThread", "XMLInternalDocument", function(doc, id, envir = globalenv(), ..., xnodes = c("r:function", "r:init", "r:code", "r:plot")) { # all the nodes that are "under" this thread. xp = sprintf("//*[@thread='%s']", id) anc = sprintf("//%s[not(ancestor::*[@thread]) and not(ancestor::altApproach)]", xnodes) xp = paste(c(xp, anc), collapse = " | ") nodes = getNodeSet(doc, xp) sapply(nodes, evalSection, envir = envir, ..., xnodes = xnodes) }) setGeneric("xmlSourceTask", function(doc, id, envir = globalenv(), ...) { standardGeneric("xmlSourceTask") } ) # nodes = c("r:code", "r:plot", "r:expr") # fmt = paste(nodes, "[@thread='%s']", sep = "") # xp = paste(sprintf(fmt, id), collapse = " | ") # paste(nodes[] # getNodeSet # "r:code[@thread='%s']|r:plot[@thread='%s']|r:expr[' # paste(getNode # }) # See tangle.R and xmlTangle. # Fix this up. Just thrown down one morning. xmlToCode = tangle = function(doc, file = stdout()) { e = xmlSourceFunctions(doc, eval = FALSE) # want to avoid parsing. if(!is(file, "connection")) con = file(file, "w") sapply(e, function(x) cat(x, "\n", file = file)) file } getTaskId = function(node) { els = getNodeSet(node, ".//ancestor::task") if(length(els)) xmlGetAttr(els[[1]], "id") else "" }