I have a kaplan meier survival dataset that already contains time, survival probabilities values, and survival probability data points for both the lower & upper 95% CI. I have posted a clip of my dataset below. I was hoping if anyone knew how merge my two plots, normalize them, and make my plots continuous despite missing values. I was hoping for my final graph to look like this 2.
kmcurvetest_2[1:20, ] %>% dput()
structure(list(Time = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 15, 16, 17, 18, 19, 20), Cohort1 = c(0.904255319148936,
0.898936170212766, 0.887769261266023, 0.887769261266023, 0.887769261266023,
0.87631417402388, 0.87631417402388, NA, NA, 0.87631417402388,
0.864551567661143, 0.858629981581273, 0.852708395501402, NA,
0.852708395501402, 0.846745399728665, 0.846745399728665, 0.840740113205766,
NA, 0.840740113205766), C1Lower95 = c(0.852338104650895, 0.846140749965675,
0.833054851312184, 0.833054851312184, 0.833054851312184, 0.819696863257612,
0.819696863257612, NA, NA, 0.819696863257612, 0.806043967960357,
0.799218079053227, 0.792429563598159, NA, 0.792429563598159,
0.785616930383783, 0.785616930383783, 0.778778500012501, NA,
0.778778500012501), C1Upper95 = c(0.938570469008423, 0.934312293965728,
0.92534844712446, 0.92534844712446, 0.92534844712446, 0.916056348120451,
0.916056348120451, NA, NA, 0.916056348120451, 0.906427391600421,
0.901537491012523, 0.8966168920045, NA, 0.8966168920045, 0.891638921203334,
0.891638921203334, 0.886603579837755, NA, 0.886603579837755),
Cohort2 = c(0.707462686567164, 0.692537313432835, 0.683384837924912,
0.674232362416989, 0.674232362416989, 0.668074989244231,
NA, 0.664996302657852, 0.664996302657852, 0.658781383941424,
0.652507275522934, 0.649370221313689, 0.646217938685953,
0.643065656058216, 0.630394411603867, 0.62722660049028, 0.624058789376693,
0.620890978263105, 0.617723167149518, 0.614539027112665),
C2Lower95 = c(0.655564487332025, 0.640091667602195, 0.630607727619003,
0.62114710952213, 0.62114710952213, 0.614788099004335, NA,
0.611612499799214, 0.611612499799214, 0.605202384226936,
0.598734349944198, 0.595504428845739, 0.592259587632446,
0.589017489398546, 0.576004700295779, 0.572758317180272,
0.569514623188025, 0.566273601091399, 0.56303523423295, 0.5597807789553
), C2Upper95 = c(0.753046097156017, 0.738936670959587, 0.730275198102735,
0.721591223004285, 0.721591223004285, 0.715742377703966,
NA, 0.712814219355565, 0.712814219355565, 0.706901638437748,
0.700928732359048, 0.697938428282602, 0.694932646561064,
0.691924293962202, 0.679812432812405, 0.67677809121533, 0.673741229385084,
0.670701861632804, 0.667660001811057, 0.664601682804447)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
My data set contains missing values and I attempted to make my geom_line continuous despite the missing values using ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)
My codes for the two plots are...
# plot cohort 1
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort1),] , mapping = aes(x = Time, y = Cohort1)) +
geom_point(size = 1 ) +
geom_line(color = "blue") +
geom_ribbon(aes(x = Time, ymin = C1Lower95, ymax = C1Upper95),
fill = "blue", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
# plot cohort 2
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)) +
geom_point(size = 1 ) +
geom_line(color = "red") +
geom_ribbon(aes(x = Time, ymin = C2Lower95, ymax = C2Upper95),
fill = "red", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
Thank you I really appreciate it - I have attached the images in question for reference above!