R version 2.15.0 (2012-03-30) Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(rpart) > mystate <- data.frame(state.x77, region=factor(state.region)) > names(mystate) <- c("population","income" , "illiteracy","life" , + "murder", "hs.grad", "frost", "area", "region") > # > # This little test came out of a query that cp did not scale > # with the data size. It does > # > # tdata = 20 copies of "mystate" > # trees with tdata and trees with mystate should be the same (they are) > # except for the n's > tdata <- rbind(mystate, mystate, mystate, mystate, mystate) > tdata <- rbind(tdata, tdata, tdata, tdata) > tfit1 <- rpart(income ~ population + illiteracy + murder + hs.grad + region, + data = mystate, method = "anova", xval=0, cp=.089) > tfit2 <- rpart(income ~ population + illiteracy + murder + hs.grad + region, + data = tdata, method='anova', xval=0, cp=.089, + minsplit=400, minbucket=140) > > all.equal(tfit1$splits[,-1], tfit2$splits[,-1]) [1] TRUE > > > proc.time() user system elapsed 0.237 0.049 0.324