Using R I've fit an ordered logistic regression to some data. I need to use the fitted values to predict future values. I have a dataset which has 10 different combinations of parameters I want to predict the probabilities for the 'Result' dependant variable (which the model is fit to). I've used 'split' to split into 10 separate dataframes and I want to try to run the predict statement against each of the outputted dataframes. Can someone show me how to: a. Create a function to run the predict statement, and b. Loop the function through the 10 dataframes? c. Output the prediction probabilities for the three result conditions, and bind them to the relevant input dataframe
Here's what I've tried:
# Fit the model
library(MASS)
fit <- polr(Result ~ State + score1+ score2 + history + pastdefault, data = dat, Hess=TRUE)
## Model Summary
summary(fit)
#Take the new data which doesn't have predictions
t13<- dat[ which(dat$Tranche==13), ]
out <- split( t13 , f = r13$Gamen )
#Function
pred <- function(Result, State, score1, score2, history, pastdefault,...){
cbind(out, predict(fit, out, type = "probs"))}
#Use sapply to run prediction
pred.out <- sapply(out, pred)
Further to a suggestion I've tried running the predict statement against the result of splitting the data into 'out' as per code below:
#Predict against 'out' dataframes
newdat1 <- cbind(out, predict(fit, newdata=out, type = "probs"))
However I get the same error message:
in eval(expr, envir, enclos) : object 'State' not found
Here's what the data looks like using head(data):
State score1 score2 Salaries history exp moody Result pastdefault Tranche
1 Minnesota 6.000 17.5 35.948 9 579 1085 moreinfo yes 1
2 New Mexico 4.586 17.2 28.493 11 530 1015 fail No 1
3 Washington 5.906 20.2 36.151 48 494 937 fail No 1
4 West Virginia 6.107 14.8 31.944 17 484 932 moreinfo No 1
5 Louisiana 4.761 16.8 26.461 9 535 1021 pass yes 2
6 Oklahoma 4.845 15.5 28.172 9 536 1027 pass yes 2
Gamen
1 1
2 2
3 3
4 4
5 1
6 2
Here is the result of dput(data)
:
ture(list(State = structure(c(23L, 31L, 47L, 48L, 18L, 36L,
39L, 42L, 13L, 14L, 16L, 35L, 50L, 1L, 20L, 27L, 30L, 7L, 9L,
34L, 43L, 3L, 4L, 19L, 25L, 26L, 37L, 8L, 29L, 38L, 2L, 21L,
32L, 6L, 10L, 11L, 15L, 24L, 5L, 17L, 33L, 40L, 41L, 46L, 49L,
28L, 12L, 22L, 45L, 32L, 9L, 5L, 44L, 13L, 38L, 19L, 25L, 26L,
37L, 8L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas",
"California", "Colorado", "Connecticut", "Delaware", "Florida",
"Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
"Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts",
"Michigan", "Minnesota", "Mississippi", "Missouri", "Montana",
"Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico",
"New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma",
"Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",
"Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington",
"West Virginia", "Wisconsin", "Wyoming"), class = "factor"),
score1 = c(6, 4.586, 5.906, 6.107, 4.761, 4.845, 7.469, 4.388,
6.136, 5.826, 5.817, 6.162, 6.16, 4.405, 7.245, 5.935, 9.774,
8.817, 5.718, 4.775, 5.222, 4.778, 4.459, 6.428, 5.383, 5.692,
6.436, 7.03, 5.859, 7.109, 8.963, 7.287, 9.623, 5.443, 5.193,
6.078, 5.483, 4.08, 4.992, 5.217, 5.077, 4.797, 4.775, 5.327,
6.93, 5.16, 4.21, 6.994, 6.75, 5.859, 7.109, 8.963, 3.656,
7.287, 9.623, 5.443, 6.078, 5.483, 4.08, 4.992), score2 = c(17.5,
17.2, 20.2, 14.8, 16.8, 15.5, 14.7, 18.6, 17.3, 17.5, 15.1,
16.6, 14.9, 17.2, 17, 14.5, 13.8, 14.4, 19.1, 15.3, 15.7,
19.3, 17.1, 13.8, 15.5, 16.3, 19.9, 16.6, 15.6, 17.1, 17.6,
14.8, 15.2, 18.4, 16.3, 17.9, 15.8, 17.5, 24, 17, 16.2, 16.4,
14.4, 14.6, 15.9, 18.7, 19.1, 20.1, 13.8, 16.6, 15.6, 17.1,
24.3, 17.6, 14.8, 15.2, 24, 17, 16.2, 14.4), Salaries = c(35.948,
28.493, 36.151, 31.944, 26.461, 28.172, 40.729, 32.477, 39.431,
36.785, 34.652, 36.802, 31.285, 31.144, 40.661, 30.922, 46.087,
50.045, 32.588, 26.327, 31.223, 32.175, 28.934, 31.972, 31.189,
28.785, 38.555, 39.076, 34.72, 44.51, 47.951, 40.795, 47.612,
34.571, 32.291, 38.518, 31.511, 26.818, 41.078, 32.257, 30.793,
30.279, 25.994, 33.987, 37.746, 34.836, 29.783, 41.895, 35.406,
28.785, 38.555, 39.076, 29.082, 31.511, 26.818, 41.078, 32.257,
25.994, 33.987, 37.746), history = c(9L, 11L, 48L, 17L, 9L,
9L, 70L, 12L, 13L, 58L, 9L, 23L, 10L, 8L, 64L, 9L, 70L, 81L,
48L, 5L, 47L, 27L, 6L, 68L, 9L, 21L, 51L, 68L, 70L, 70L,
47L, 80L, 74L, 29L, 65L, 57L, 5L, 4L, 45L, 11L, 60L, 58L,
5L, 65L, 9L, 30L, 15L, 11L, 68L, 45L, 11L, 60L, 4L, 58L,
5L, 65L, 9L, 5L, 65L, 80L), exp = c(579L, 530L, 494L, 484L,
535L, 536L, 463L, 543L, 560L, 467L, 557L, 515L, 525L, 538L,
479L, 556L, 478L, 477L, 469L, 592L, 474L, 496L, 523L, 469L,
550L, 536L, 499L, 468L, 491L, 461L, 489L, 477L, 473L, 518L,
448L, 482L, 583L, 540L, 485L, 522L, 454L, 443L, 563L, 468L,
572L, 483L, 511L, 549L, 472L, 526L, 528L, 530L, 563L, 532L,
534L, 536L, 538L, 540L, 542L, 544L), moody = c(1085L, 1015L,
937L, 932L, 1021L, 1027L, 888L, 1040L, 1048L, 882L, 1060L,
975L, 1001L, 1029L, 909L, 1050L, 898L, 908L, 889L, 1107L,
893L, 944L, 1005L, 896L, 1045L, 1009L, 947L, 897L, 935L,
880L, 934L, 907L, 892L, 980L, 854L, 889L, 1099L, 1036L, 902L,
999L, 865L, 844L, 1068L, 896L, 1073L, 917L, 979L, 1033L,
901L, 1002L, 1007L, 1011L, 1076L, 1016L, 900L, 760L, 1029L,
850L, 1038L, 990L), Result = structure(c(3L, 2L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 2L, 4L, 3L, 3L, 3L, 3L,
4L, 4L, 2L, 3L, 3L, 3L, 4L, 2L, 4L, 2L, 4L, 2L, 3L, 2L, 4L,
3L, 4L, 4L, 2L, 2L, 3L, 4L, 2L, 4L, 3L, 3L, 4L, 2L, 3L, 3L,
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"fail", "moreinfo", "pass"), class = "factor"), pastdefault = structure(c(2L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L), .Label = c("No",
"yes"), class = "factor"), Tranche = c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L), Gamen = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 5L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L,
6L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L)), .Names = c("State", "score1", "score2",
"Salaries", "history", "exp", "moody", "Result", "pastdefault",
"Tranche", "Gamen"), class = "data.frame", row.names = c(NA,
-60L))