#ifndef RPART_NODE_H #define RPART_NODE_H /* ** definition of a node in the tree * ** The actual size of these structures when allocated in insert_split.c ** depends on the split. ** csplit[0] gets used even for continuous splits. */ typedef struct split { double improve; double adj; /* for surrogates only, adjusted agreement */ double spoint; /* only used if it is continuous */ struct split *nextsplit; int var_num; int count; int csplit[20]; /* the actual length depends on splitting rule */ } Split, *pSplit; /* The real 'nodesize' is set in rpart.c */ typedef struct node { double risk; /* risk for the node */ double complexity; /* complexity at which it will collapse */ double sum_wt; /* sum of the weights for the node */ pSplit primary, surrogate; struct node *rightson; struct node *leftson; int num_obs; int lastsurrogate; double response_est[20]; /* actual length depends on splitting rule */ } Node, *pNode; typedef struct cptable { double cp; double risk; double xrisk; double xstd; int nsplit; struct cptable *forward, *back; } cpTable, *CpTable; /************************************************************************** * * Split: * variable number of the split; 0 = no more surrogates (or primaries) * * split point: the actual split point for a continuous * * improve: For primary splits, the iprovement index returned by the * bsplit routine. This is the measure that determines the * winning split. * For surrogate splits, this holds the error rate, i.e., the * % incorrect guesses of the primary by using this surrogate. * * count: The number of observations split using this variable. For the * first primary, this will = the number of non-missing values. * For surrogates, it will be the number missing in the primary * and all earlier surrogates but not missing on this one. (For * all primaries but the first, the number is theoretical). * * adj: Let "maj" be the %agreement for going with the majority, * and "agree" the %agreement for this surrogate. The * adjusted value is (agree - maj)/(1-maj); the amount of * the potential improvement actually realized. The denominator * for both percents depends on the sur_agree option. * * csplit[0]: For a continuous variable, we also need to know the * direction of the split. We use this "extra" variable * as 1: