I would like some help in creating a dataframe in R for per predicted chances.
I have a dataset of 156 patients, 39 without disease and 117 with disease and all patients have a predicted chance of disease (0,00-1). To determine a cut-off point I would like to create a dataset in which per increase of 1% chance the amount of with and without disease is shown.
So a dataset with 101 obs and 3 variables (percent chance, amount of patients with disease, amount of patients without disease)
I created the following loop, but it results in 287 observations.
Valid2$disease_T <- Valid2$disease_present
Valid2$disease_F <- ifelse(Valid2$disease_present == F, T, F)
plot2 <- data.frame()
for (i in 0:100) {
per <- i
amount <- Valid2 %>%
select(disease_T, disease_F, predicted) %>%
filter (predicted >= (i/100)) %>%
count(disease_T, disease_F)
plot2 <- rbind(plot2, per, amount)
}
dput of Valid2
structure(list(disease_T = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
TRUE, FALSE, FALSE, FALSE, TRUE), disease_F = c(FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
FALSE, TRUE, TRUE, TRUE, FALSE), predicted = c(0.839047189743821,
0.967837763160834, 0.786317285878715, 0.675276246104666, 0.989345368556793,
0.555983868574255, 0.990942898545037, 0.811649758224556, 0.98426150620371,
0.736827268936253, 0.979475139855437, 0.774123207779965, 0.957903428160418,
0.91999882380089, 0.942305155149908, 0.991921678870239, 0.991594114313076,
0.849292946424851, 0.97569066014882, 0.884909080808976, 0.979013574591358,
0.998862491367365, 0.923394431637592, 0.885001406361028, 0.877973037376933,
0.728550121991711, 0.999833278501221, 0.897790177969505, 0.979220428963593,
0.999499362617635, 0.954494527800288, 0.961765702221913, 0.769280528680882,
0.882613166009711, 0.986217524084092, 0.954192408993744, 0.872914657137128,
0.531299550904597, 0.893933172484704, 0.480533848683641, 0.993094398777802,
0.824802835501908, 0.947570508969646, 0.980540528086075, 0.583395480728772,
0.822397774199589, 0.883912440796693, 0.594776303699779, 0.820790192098386,
0.798073877022062, 0.985966496663131, 0.897069576653414, 0.831132493632043,
0.852151002809375, 0.998435303514582, 0.953651187490499, 0.986591941625218,
0.955233427577533, 0.683855028732251, 0.996623760378059, 0.899794970651676,
0.89347885202678, 0.769929537527992, 0.795764657045519, 0.982146943465671,
0.776440313355684, 0.914849623160958, 0.970905780657116, 0.500682448346073,
0.863082837731646, 0.898528543977351, 0.961071059679792, 0.914764045084384,
0.740320486660299, 0.558067744300918, 0.924289384162258, 0.640027164915262,
0.991129364781095, 0.94097585658508, 0.948216610615068, 0.789283230550332,
0.965724911188744, 0.992147113271609, 0.990048301774303, 0.929031670004039,
0.909219568552839, 0.417005262010727, 0.954046684763806, 0.954662032660194,
0.592707632714186, 0.71736673909297, 0.815939418957414, 0.530573198572189,
0.411013385804287, 0.456143973275274, 0.98418041813448, 0.999743784673911,
0.748231061596753, 0.957616694642036, 0.936005342173473, 0.990966443212461,
0.998088129637225, 0.920524349831836, 0.995196908913598, 0.974348828931041,
0.973717852722492, 0.938994862330677, 0.533156741117527, 0.990726457099523,
0.513768273986449, 0.638444161218626, 0.858677012819686, 0.791287902868353,
0.588849209133098, 0.44811826975699, 0.508084588253886, 0.530249454616573,
0.488225901918474, 0.500562684131604, 0.317898539696961, 0.242177319047234,
0.609587716312933, 0.539440893692799, 0.355494594307387, 0.266099968050094,
0.723932395532802, 0.723938401792491, 0.53390474557177, 0.634097434175427,
0.775172549607967, 0.570928462844033, 0.522356812838135, 0.724635429147149,
0.610630883290112, 0.565371382980066, 0.285283409047343, 0.343302659495403,
0.816510539572742, 0.656765409452827, 0.626301633190735, 0.383723283273525,
0.594260652384327, 0.556639518107367, 0.418173506333977, 0.278806555045948,
0.516264516564629, 0.292843578210485, 0.576288502786766, 0.408152351764115,
0.650882387290395, 0.396480245419753, 0.834276346007703, 0.413110039326727,
0.561240114285867, 0.387299107426737, 0.620969313796766)), row.names = c(NA,
-156L), class = c("tbl_df", "tbl", "data.frame"))