R version 2.15.2 (2012-10-26) -- "Trick or Treat" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Treble test for class trees with 2 outcomes > # > # fit1 and fit1b failed equality because .7 and .3 are not easily represented > # in binary. Thus a complelxity param was 4e-17 (basically 0, but enough > # to cause a split where it shouldn't be). Eric Lunde 2005-08-03 > library(rpart) > control <- rpart.control(maxsurrogate=0, cp=1e-15, xval=0) > > fit1 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, + control=control, + parms=list(prior=c(.7,.3), + loss=matrix(c(0,1,2,0),nrow=2,ncol=2))) > wts <- rep(3, nrow(kyphosis)) > fit1b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, + control=control, + weights=wts, + parms=list(prior=c(.7,.3), + loss=matrix(c(0,1,2,0),nrow=2,ncol=2))) > fit1b$frame$wt <- fit1b$frame$wt/3 > fit1b$frame$dev <- fit1b$frame$dev/3 > fit1b$frame$yval2[,2:3] <- fit1b$frame$yval2[,2:3]/3 > fit1b$splits[,3] <- fit1b$splits[,3]/3 > fit1b$variable.importance <- fit1b$variable.importance/3 > all.equal(fit1[-3], fit1b[-3]) #all but the "call" [1] TRUE > > # Now for a set of non-equal weights > nn <- nrow(kyphosis) > pseudo <- double(nn) > pseudo[1] <- pi/6 > for (i in 2:nn) pseudo[i] <- 4*pseudo[i-1]*(1-pseudo[i-1]) > > wts <- rep(1:7, length=nn) > temp <- rep(1:nn, wts) #row replicates > xgrp <- rep(1:10, length=nn)[order(pseudo)] > xgrp2<- rep(xgrp, wts) > > # The cp value stops one last split where the two predictors are > # completely equal in importance (perfect surrogates), but the > # weighted and unweighted pick a different one due to round off error > tempc <- rpart.control(minsplit=2, xval=xgrp2, maxsurrogate=0, cp=.039) > # Direct: replicate rows in the data set, and use unweighted > fit2 <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis[temp,], + control=tempc, + parms=list(prior=c(.7,.3), + loss=matrix(c(0,1,2,0),nrow=2,ncol=2))) > # Weighted > tempc <- rpart.control(minsplit=2, xval=xgrp, maxsurrogate=0, cp=.039) > fit2b <- rpart(Kyphosis ~ Age + Number + Start, data=kyphosis, + control=tempc, weights=wts, + parms=list(prior=c(.7,.3), + loss=matrix(c(0,1,2,0),nrow=2,ncol=2))) > > all.equal(fit2$frame[,-2], fit2b$frame[,-2]) # the "n" component won't match [1] TRUE > all.equal(fit2$cptable, fit2b$cptable) [1] TRUE > all.equal(fit2$splits[,-1],fit2b$splits[,-1]) [1] TRUE > all.equal(fit2$csplit, fit2b$csplit) [1] TRUE > > proc.time() user system elapsed 0.200 0.032 0.218