Is it possible to get a p-value for nodes in a categorical tree analysis with R? I am using rpart and can't locate a p-value for each node. Maybe this is only possible with a regression and not categories.
structure(list(subj = c(702L, 702L, 702L, 702L, 702L, 702L, 702L,
702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L, 702L,
702L, 702L, 702L, 702L, 702L, 702L), visit = c(4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), run = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("A", "B", "C", "D", "E", "xdur", "xend60", "xpre"
), class = "factor"), ho = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), hph = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), longexer = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("10min", "60min"), class = "factor"),
esq_sick = c(NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA), esq_sick2 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), ll_sick = c(NA, NA, 0L,
NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA,
0L, NA, NA, NA, NA, NA), ll_sick2 = c(NA, NA, 0L, NA, NA,
NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA,
NA, NA, NA, NA), esq_01 = c(NA, NA, 2L, NA, NA, NA, NA, NA,
NA, NA, 2L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA,
NA), esq_02 = c(NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, 2L,
NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA), esq_03 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), esq_04 = c(NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA), esq_05 = c(NA, NA, 0L, NA, NA, NA, NA,
NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA,
NA, NA), esq_06 = c(NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA,
1L, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA),
esq_07 = c(NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA), esq_08 = c(NA,
NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA,
NA, NA, 0L, NA, NA, NA, NA, NA), esq_09 = c(NA, NA, 0L, NA,
NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L,
NA, NA, NA, NA, NA), esq_10 = c(NA, NA, 0L, NA, NA, NA, NA,
NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, 0L, NA, NA, NA,
NA, NA)), .Names = c("subj", "visit", "run", "ho", "hph",
"longexer", "esq_sick", "esq_sick2", "ll_sick", "ll_sick2", "esq_01",
"esq_02", "esq_03", "esq_04", "esq_05", "esq_06", "esq_07", "esq_08",
"esq_09", "esq_10"), row.names = 7:30, class = "data.frame")
alldata = read.table('symptomology CSV2.csv',header=TRUE,sep=",")
library(rpart)
fit <- rpart(esq_sick2~esq_01_bin + esq_02_bin + esq_03_bin + esq_04_bin + esq_05_bin + esq_06_bin + esq_07_bin + esq_08_bin + esq_09_bin + esq_10_bin + esq_11_bin + esq_12_bin + esq_13_bin + esq_14_bin + esq_15_bin + esq_16_bin + esq_17_bin + esq_18_bin + esq_19_bin + esq_20_bin, method="class", data=alldata)
plot(fit, uniform = FALSE, branch = 1, compress = FALSE, nspace, margin = 0.1, minbranch = 0.3)
text(fit, use.n=TRUE, all=TRUE, cex=.8)