I've used the exact same code for several different spreadsheets, but I seem to be getting an error for this specific spreadsheet and I cannot figure out why. This is the code attached below:
> library(readxl)
> library(sjPlot)
> data <- read_excel("C:\\Users\\hadik\\Downloads\\sentimprovdepression.xlsx")
> head(data)
# A tibble: 6 × 16
`Patient Start Positi…` `Patient Start…` `Patient Start…` Type `Therapist Sta…` `Therapist Sta…`
<dbl> <dbl> <dbl> <chr> <dbl> <dbl>
1 0.33 0.32 0.45 Pres… 0.35 0.31
2 0.34 0.32 0.43 Pres… 0.33 0.32
3 0.33 0.33 0.43 Pres… 0.34 0.32
4 0.32 0.32 0.44 Pres… 0.33 0.31
5 0.35 0.32 0.43 Pres… 0.33 0.32
6 0.35 0.32 0.42 Pres… 0.34 0.31
# … with 10 more variables: `Therapist Start Neutral Sentiment` <dbl>, Gender <chr>,
# Ethnicity <chr>, Employment_Status <chr>, StartPHQ9 <dbl>, StartGAD7 <dbl>, StartWSAS <dbl>,
# Treatment_sessions <dbl>, Age_group <chr>, Improvement <dbl>
> str(data)
tibble [12,805 × 16] (S3: tbl_df/tbl/data.frame)
$ Patient Start Positive Sentiment : num [1:12805] 0.33 0.34 0.33 0.32 0.35 0.35 0.35 0.33 0.35 0.34 ...
$ Patient Start Negative Sentiment : num [1:12805] 0.32 0.32 0.33 0.32 0.32 0.32 0.34 0.31 0.32 0.33 ...
$ Patient Start Neutral Sentiment : num [1:12805] 0.45 0.43 0.43 0.44 0.43 0.42 0.41 0.44 0.42 0.43 ...
$ Type : chr [1:12805] "Prescribed Taking" "Prescribed Taking" "Prescribed Taking" "Prescribed Taking" ...
$ Therapist Start Positive Sentiment: num [1:12805] 0.35 0.33 0.34 0.33 0.33 0.34 0.34 0.34 0.34 0.33 ...
$ Therapist Start Negative Sentiment: num [1:12805] 0.31 0.32 0.32 0.31 0.32 0.31 0.32 0.32 0.32 0.31 ...
$ Therapist Start Neutral Sentiment : num [1:12805] 0.43 0.45 0.44 0.45 0.45 0.45 0.43 0.44 0.44 0.45 ...
$ Gender : chr [1:12805] "M" "M" "F" "F" ...
$ Ethnicity : chr [1:12805] "White" "White" "White" "White" ...
$ Employment_Status : chr [1:12805] "Employed" "Employed" "Other" "Other" ...
$ StartPHQ9 : num [1:12805] 21 5 20 23 21 18 22 12 21 11 ...
$ StartGAD7 : num [1:12805] 18 3 16 15 18 11 21 13 16 4 ...
$ StartWSAS : num [1:12805] 20 8 30 37 32 23 36 25 36 14 ...
$ Treatment_sessions : num [1:12805] 7 5 15 9 13 9 10 13 10 13 ...
$ Age_group : chr [1:12805] "36-45" "46-55" "18-25" "36-45" ...
$ Improvement : num [1:12805] 1 0 0 0 1 1 0 1 1 0 ...
> ##Make sure to scale all the continuous data first##
> data$Gender <- as.factor(data$Gender)
> data$Ethnicity <- as.factor(data$Ethnicity)
> data$'Employment_Status' <- as.factor(data$'Employment_Status')
> data$'Improvement' <- as.factor(data$'Improvement')
> data$'Age_group' <- as.factor(data$'Age_group')
> data$'Type' <- as.factor(data$'Type')
> data$'StartPHQ9' <- as.integer(data$'StartPHQ9')
> data$'StartGAD7' <- as.integer(data$'StartGAD7')
> data$'StartWSAS' <- as.integer(data$'StartWSAS')
> data$'Treatment_sessions' <- as.integer(data$'Treatment_sessions')
> data$Gender <- relevel(data$Gender, ref = "F")
> data$Improvement <- as.factor(data$Improvement)
> str(data)
tibble [12,805 × 16] (S3: tbl_df/tbl/data.frame)
$ Patient Start Positive Sentiment : num [1:12805] 0.33 0.34 0.33 0.32 0.35 0.35 0.35 0.33 0.35 0.34 ...
$ Patient Start Negative Sentiment : num [1:12805] 0.32 0.32 0.33 0.32 0.32 0.32 0.34 0.31 0.32 0.33 ...
$ Patient Start Neutral Sentiment : num [1:12805] 0.45 0.43 0.43 0.44 0.43 0.42 0.41 0.44 0.42 0.43 ...
$ Type : Factor w/ 3 levels "Not Prescribed",..: 3 3 3 3 3 3 3 3 3 3 ...
$ Therapist Start Positive Sentiment: num [1:12805] 0.35 0.33 0.34 0.33 0.33 0.34 0.34 0.34 0.34 0.33 ...
$ Therapist Start Negative Sentiment: num [1:12805] 0.31 0.32 0.32 0.31 0.32 0.31 0.32 0.32 0.32 0.31 ...
$ Therapist Start Neutral Sentiment : num [1:12805] 0.43 0.45 0.44 0.45 0.45 0.45 0.43 0.44 0.44 0.45 ...
$ Gender : Factor w/ 3 levels "F","M","Z": 2 2 1 1 2 2 1 1 1 1 ...
$ Ethnicity : Factor w/ 3 levels "other","U/Z",..: 3 3 3 3 3 3 3 3 1 2 ...
$ Employment_Status : Factor w/ 4 levels "Employed","Other",..: 1 1 2 2 1 1 1 1 1 1 ...
$ StartPHQ9 : int [1:12805] 21 5 20 23 21 18 22 12 21 11 ...
$ StartGAD7 : int [1:12805] 18 3 16 15 18 11 21 13 16 4 ...
$ StartWSAS : int [1:12805] 20 8 30 37 32 23 36 25 36 14 ...
$ Treatment_sessions : int [1:12805] 7 5 15 9 13 9 10 13 10 13 ...
$ Age_group : Factor w/ 6 levels "18-25","26-35",..: 3 4 1 3 3 3 2 1 3 2 ...
$ Improvement : Factor w/ 2 levels "0","1": 2 1 1 1 2 2 1 2 2 1 ...
> logistic <- glm(Improvement ~., data=data, family = "binomial")
> summary(logistic)
Call:
glm(formula = Improvement ~ ., family = "binomial", data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.7150 -1.0930 0.6314 0.9430 1.9979
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.760454 5.572445 0.495 0.620335
`Patient Start Positive Sentiment` 7.340544 3.513784 2.089 0.036701 *
`Patient Start Negative Sentiment` -15.468152 3.730949 -4.146 3.38e-05 ***
`Patient Start Neutral Sentiment` 4.414449 4.052710 1.089 0.276040
TypePrescribed Not Taking -0.260718 0.087927 -2.965 0.003025 **
TypePrescribed Taking -0.173552 0.042759 -4.059 4.93e-05 ***
`Therapist Start Positive Sentiment` 0.355103 3.750067 0.095 0.924559
`Therapist Start Negative Sentiment` -7.829533 4.358366 -1.796 0.072425 .
`Therapist Start Neutral Sentiment` -2.962382 4.514409 -0.656 0.511692
GenderM -0.058465 0.043917 -1.331 0.183101
GenderZ -0.278082 0.302149 -0.920 0.357392
EthnicityU/Z 0.297006 0.083974 3.537 0.000405 ***
EthnicityWhite 0.045357 0.076317 0.594 0.552294
Employment_StatusOther -0.348494 0.050218 -6.940 3.93e-12 ***
Employment_StatusU/Z -0.345905 0.099468 -3.478 0.000506 ***
Employment_StatusUnemployed -0.159407 0.081467 -1.957 0.050380 .
StartPHQ9 0.049765 0.005029 9.896 < 2e-16 ***
StartGAD7 0.089085 0.005057 17.618 < 2e-16 ***
StartWSAS -0.051228 0.002966 -17.272 < 2e-16 ***
Treatment_sessions 0.171704 0.006444 26.646 < 2e-16 ***
Age_group26-35 0.175195 0.052575 3.332 0.000861 ***
Age_group36-45 0.260732 0.058407 4.464 8.04e-06 ***
Age_group46-55 0.391842 0.066858 5.861 4.61e-09 ***
Age_group56-65 0.233992 0.089346 2.619 0.008820 **
Age_group66+ 0.669370 0.157014 4.263 2.02e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 17071 on 12804 degrees of freedom
Residual deviance: 15256 on 12780 degrees of freedom
AIC: 15306
Number of Fisher Scoring iterations: 3
> df <- get_model_data(logistic, type = NULL)
> df
term estimate std.error conf.level conf.low
1 Patient Start Positive Sentiment 1.541550e+03 3.513784263 0.95 1.574052e+00
2 Patient Start Negative Sentiment 1.915432e-07 3.730949432 0.95 1.277841e-10
3 Patient Start Neutral Sentiment 8.263631e+01 4.052710244 0.95 2.934254e-02
4 TypePrescribed Not Taking 7.704983e-01 0.087927189 0.95 6.485268e-01
5 TypePrescribed Taking 8.406734e-01 0.042758555 0.95 7.730920e-01
6 Therapist Start Positive Sentiment 1.426328e+00 3.750066645 0.95 9.165515e-04
7 Therapist Start Negative Sentiment 3.978113e-04 4.358366114 0.95 7.759419e-08
8 Therapist Start Neutral Sentiment 5.169562e-02 4.514409264 0.95 7.426456e-06
9 GenderM 9.432113e-01 0.043916768 0.95 8.654201e-01
10 GenderZ 7.572346e-01 0.302149420 0.95 4.188325e-01
11 EthnicityU/Z 1.345824e+00 0.083973921 0.95 1.141588e+00
12 EthnicityWhite 1.046401e+00 0.076316816 0.95 9.010260e-01
13 Employment_StatusOther 7.057500e-01 0.050217905 0.95 6.395954e-01
14 Employment_StatusU/Z 7.075795e-01 0.099467542 0.95 5.822485e-01
15 Employment_StatusUnemployed 8.526489e-01 0.081466610 0.95 7.268181e-01
16 StartPHQ9 1.051024e+00 0.005028766 0.95 1.040716e+00
17 StartGAD7 1.093174e+00 0.005056586 0.95 1.082393e+00
18 StartWSAS 9.500620e-01 0.002966029 0.95 9.445550e-01
19 Treatment_sessions 1.187326e+00 0.006443815 0.95 1.172425e+00
20 Age_group26-35 1.191479e+00 0.052575083 0.95 1.074816e+00
21 Age_group36-45 1.297880e+00 0.058407189 0.95 1.157493e+00
22 Age_group46-55 1.479703e+00 0.066857937 0.95 1.297971e+00
23 Age_group56-65 1.263634e+00 0.089345836 0.95 1.060645e+00
24 Age_group66+ 1.953006e+00 0.157013623 0.95 1.435666e+00
conf.high statistic df.error p.value p.stars p.label group xpos xmin xmax
1 1.509720e+06 2.08907076 Inf 3.670135e-02 * 1541.55 * pos 24 23.825 24.175
2 2.871155e-04 -4.14590245 Inf 3.384779e-05 *** 0.00 *** neg 23 22.825 23.175
3 2.327256e+05 1.08925854 Inf 2.760399e-01 82.64 pos 22 21.825 22.175
4 9.154096e-01 -2.96515556 Inf 3.025299e-03 ** 0.77 ** neg 21 20.825 21.175
5 9.141627e-01 -4.05888329 Inf 4.930795e-05 *** 0.84 *** neg 20 19.825 20.175
6 2.219637e+03 0.09469253 Inf 9.245590e-01 1.43 pos 19 18.825 19.175
7 2.039507e+00 -1.79643758 Inf 7.242495e-02 0.00 neg 18 17.825 18.175
8 3.598536e+02 -0.65620596 Inf 5.116916e-01 0.05 neg 17 16.825 17.175
9 1.027995e+00 -1.33126781 Inf 1.831009e-01 0.94 neg 16 15.825 16.175
10 1.369054e+00 -0.92034668 Inf 3.573916e-01 0.76 neg 15 14.825 15.175
11 1.586598e+00 3.53688776 Inf 4.048717e-04 *** 1.35 *** pos 14 13.825 14.175
12 1.215232e+00 0.59432601 Inf 5.522941e-01 1.05 pos 13 12.825 13.175
13 7.787471e-01 -6.93964197 Inf 3.930949e-12 *** 0.71 *** neg 12 11.825 12.175
14 8.598886e-01 -3.47756880 Inf 5.059832e-04 *** 0.71 *** neg 11 10.825 11.175
15 1.000264e+00 -1.95672157 Inf 5.038021e-02 0.85 neg 10 9.825 10.175
16 1.061435e+00 9.89613991 Inf 4.326530e-23 *** 1.05 *** pos 9 8.825 9.175
17 1.104062e+00 17.61765088 Inf 1.803395e-69 *** 1.09 *** pos 8 7.825 8.175
18 9.556011e-01 -17.27157987 Inf 7.700516e-67 *** 0.95 *** neg 7 6.825 7.175
19 1.202417e+00 26.64628345 Inf 1.976286e-156 *** 1.19 *** pos 6 5.825 6.175
20 1.320804e+00 3.33228522 Inf 8.613593e-04 *** 1.19 *** pos 5 4.825 5.175
21 1.455295e+00 4.46404717 Inf 8.042584e-06 *** 1.30 *** pos 4 3.825 4.175
22 1.686880e+00 5.86081093 Inf 4.606122e-09 *** 1.48 *** pos 3 2.825 3.175
23 1.505471e+00 2.61894147 Inf 8.820308e-03 ** 1.26 ** pos 2 1.825 2.175
24 2.656770e+00 4.26313269 Inf 2.015807e-05 *** 1.95 *** pos 1 0.825 1.175
> exp(logistic$coefficients)
(Intercept) `Patient Start Positive Sentiment`
1.580702e+01 1.541550e+03
`Patient Start Negative Sentiment` `Patient Start Neutral Sentiment`
1.915432e-07 8.263631e+01
TypePrescribed Not Taking TypePrescribed Taking
7.704983e-01 8.406734e-01
`Therapist Start Positive Sentiment` `Therapist Start Negative Sentiment`
1.426328e+00 3.978113e-04
`Therapist Start Neutral Sentiment` GenderM
5.169562e-02 9.432113e-01
GenderZ EthnicityU/Z
7.572346e-01 1.345824e+00
EthnicityWhite Employment_StatusOther
1.046401e+00 7.057500e-01
Employment_StatusU/Z Employment_StatusUnemployed
7.075795e-01 8.526489e-01
StartPHQ9 StartGAD7
1.051024e+00 1.093174e+00
StartWSAS Treatment_sessions
9.500620e-01 1.187326e+00
Age_group26-35 Age_group36-45
1.191479e+00 1.297880e+00
Age_group46-55 Age_group56-65
1.479703e+00 1.263634e+00
Age_group66+
1.953006e+00
> exp(confint(logistic))
Waiting for profiling to be done...
2.5 % 97.5 %
(Intercept) 2.864647e-04 8.809859e+05
`Patient Start Positive Sentiment` 1.567316e+00 1.507918e+06
`Patient Start Negative Sentiment` 1.266134e-10 2.851379e-04
`Patient Start Neutral Sentiment` 2.909615e-02 2.317040e+05
TypePrescribed Not Taking 6.487173e-01 9.157834e-01
TypePrescribed Taking 7.730574e-01 9.141332e-01
`Therapist Start Positive Sentiment` 9.140112e-04 2.217053e+03
`Therapist Start Negative Sentiment` 7.750747e-08 2.040810e+00
`Therapist Start Neutral Sentiment` 7.394264e-06 3.590291e+02
GenderM 8.654807e-01 1.028082e+00
GenderZ 4.198807e-01 1.379708e+00
EthnicityU/Z 1.141286e+00 1.586287e+00
EthnicityWhite 9.006407e-01 1.214808e+00
Employment_StatusOther 6.396016e-01 7.787699e-01
Employment_StatusU/Z 5.823355e-01 8.601788e-01
Employment_StatusUnemployed 7.271343e-01 1.000799e+00
StartPHQ9 1.040728e+00 1.061449e+00
StartGAD7 1.082418e+00 1.104090e+00
StartWSAS 9.445387e-01 9.555858e-01
Treatment_sessions 1.172494e+00 1.202491e+00
Age_group26-35 1.074829e+00 1.320841e+00
Age_group36-45 1.157607e+00 1.455469e+00
Age_group46-55 1.298312e+00 1.687380e+00
Age_group56-65 1.061158e+00 1.506344e+00
Age_group66+ 1.439507e+00 2.665708e+00
> df <- df[-c(4,5,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24), ]#The numbers inside the parentheses indicates the number of rows you want removed
> df2 <- df
> label <- paste(df2$term)
> mean <- df2$estimate
> mean <- as.numeric(mean)
> lower <- df2$conf.low
> upper <- df2$conf.high
> sig <- df2$p.value
> stars <- df2$p.stars
> df <- data.frame(label,mean,lower,upper,sig,stars)
> df <- factor(df$label, levels=rev(df$label))
> df <- data.frame(label,mean,lower,upper,sig,stars)
> df$label <- factor(df$label, levels=rev(df$label))
> df
label mean lower upper sig stars
1 Patient Start Positive Sentiment 1.541550e+03 1.574052e+00 1.509720e+06 3.670135e-02 *
2 Patient Start Negative Sentiment 1.915432e-07 1.277841e-10 2.871155e-04 3.384779e-05 ***
3 Patient Start Neutral Sentiment 8.263631e+01 2.934254e-02 2.327256e+05 2.760399e-01
4 Therapist Start Positive Sentiment 1.426328e+00 9.165515e-04 2.219637e+03 9.245590e-01
5 Therapist Start Negative Sentiment 3.978113e-04 7.759419e-08 2.039507e+00 7.242495e-02
6 Therapist Start Neutral Sentiment 5.169562e-02 7.426456e-06 3.598536e+02 5.116916e-01
> df2
term estimate std.error conf.level conf.low conf.high
1 Patient Start Positive Sentiment 1.541550e+03 3.513784 0.95 1.574052e+00 1.509720e+06
2 Patient Start Negative Sentiment 1.915432e-07 3.730949 0.95 1.277841e-10 2.871155e-04
3 Patient Start Neutral Sentiment 8.263631e+01 4.052710 0.95 2.934254e-02 2.327256e+05
6 Therapist Start Positive Sentiment 1.426328e+00 3.750067 0.95 9.165515e-04 2.219637e+03
7 Therapist Start Negative Sentiment 3.978113e-04 4.358366 0.95 7.759419e-08 2.039507e+00
8 Therapist Start Neutral Sentiment 5.169562e-02 4.514409 0.95 7.426456e-06 3.598536e+02
statistic df.error p.value p.stars p.label group xpos xmin xmax
1 2.08907076 Inf 3.670135e-02 * 1541.55 * pos 24 23.825 24.175
2 -4.14590245 Inf 3.384779e-05 *** 0.00 *** neg 23 22.825 23.175
3 1.08925854 Inf 2.760399e-01 82.64 pos 22 21.825 22.175
6 0.09469253 Inf 9.245590e-01 1.43 pos 19 18.825 19.175
7 -1.79643758 Inf 7.242495e-02 0.00 neg 18 17.825 18.175
8 -0.65620596 Inf 5.116916e-01 0.05 neg 17 16.825 17.175
> library(dplyr)
> df3 <- arrange(df, -mean)
> df3$label <- reorder(df3$label, df3$mean)
> df3
label mean lower upper sig stars
1 Patient Start Positive Sentiment 1.541550e+03 1.574052e+00 1.509720e+06 3.670135e-02 *
2 Patient Start Neutral Sentiment 8.263631e+01 2.934254e-02 2.327256e+05 2.760399e-01
3 Therapist Start Positive Sentiment 1.426328e+00 9.165515e-04 2.219637e+03 9.245590e-01
4 Therapist Start Neutral Sentiment 5.169562e-02 7.426456e-06 3.598536e+02 5.116916e-01
5 Therapist Start Negative Sentiment 3.978113e-04 7.759419e-08 2.039507e+00 7.242495e-02
6 Patient Start Negative Sentiment 1.915432e-07 1.277841e-10 2.871155e-04 3.384779e-05 ***
> library(ggplot2)
> fp <- ggplot(df2, aes(label, mean, colour = mean>1)) +
+
+ scale_colour_manual(name = 'mean>0', values = setNames(c('royalblue2','red3'),c(T, F))) +
+
+ geom_pointrange(aes(ymin = lower, ymax = upper), size = 1) +
+
+ geom_hline(yintercept=1, lty=2) +
+
+ labs(title = "Interaction Between Medication Status and Sentiment for Improvement - Depression", x = "", y = "Odds Ratio") +
+
+ coord_flip() +
+
+ theme(legend.position="none") +
+
+ ylim(0.0,10.0) +
+
+ geom_text(aes(label=stars), position=position_dodge(width=0.9), hjust = -.35, vjust=.2, size = 6) +
+
+ theme(axis.title = element_text(size = 14,colour = "black")) +
+
+ theme(panel.background = element_rect(fill = "gray88")) +
+
+ theme(plot.title = element_text(size = rel(1.5), hjust = 0.5)) +
+
+ theme(axis.text=element_text(size=15,colour = "black"))
>
> print(fp)
Warning messages:
1: Removed 2 rows containing missing values (geom_pointrange).
2: Removed 2 rows containing missing values (geom_segment).
3: Removed 2 rows containing missing values (geom_text).
>
I've tried everything however the code just doesn't want to work. You can see the error in the last three lines and nothing I do seems to fix it.