I try to generate a summary statistics table for latex in R with stargazer
. The table should contain the summary statistics sorted by three subgroups (Rwanda/Honduras/Nepal).
It worked out fine, when I did seperate tables only for the subgroup. I thought maybe the country variable is the problem.
The all_summary
data frame looks like this:
structure(list(country = structure(c("Honduras", "Nepal", "Rwanda"
), label = "Country", format.stata = "%8s"), headGender = structure(c(0,
1, 0), label = "head_gender", format.stata = "%9.0g"), femaleEduc = structure(c(1,
2, 2), label = "female_educ", format.stata = "%9.0g"), maleEduc = structure(c(1,
1, 2), label = "male_educ", format.stata = "%9.0g"), wVispeople = structure(c(0,
1, 0), label = "w_visitpeople", format.stata = "%9.0g"), wVismarket = structure(c(0,
1, 1), label = "w_vismarket", format.stata = "%9.0g"), wLeavevill = structure(c(0,
1, 0), label = "w_leavevill", format.stata = "%9.0g"), fridge = structure(c(1,
0, 0), label = "fridge_owned_desired", format.stata = "%9.0g"),
radio = structure(c(1, 1, 1), label = "radio_owned_desired", format.stata = "%9.0g"),
fan = structure(c(0, 0, 0), label = "fan_owned_desired", format.stata = "%9.0g"),
pc = structure(c(0, 0, 0), label = "pc_owned_desired", format.stata = "%9.0g"),
tv = structure(c(1, 0, 1), label = "tv_owned_desired", format.stata = "%9.0g"),
minutesSolid = structure(c(3, 2, 448), label = "stoveuseminutes_solids", format.stata = "%9.0g"),
minutesClean = structure(c(0, 0, 0), label = "stoveuseminutes_clean", format.stata = "%9.0g"),
stoveClean = structure(c(0, 0, 0), label = "stove_clean", format.stata = "%9.0g")), row.names = c(NA,
-3L), class = c("tbl_df", "tbl", "data.frame"), label = "Written by R.")
This is how the code looks like:
all_summary <- allcountries %>%
select(Country, head_gender, female_educ, male_educ, w_visitpeople, w_vismarket, w_leavevill, fridge_owned_desired, radio_owned_desired, fan_owned_desired, pc_owned_desired, tv_owned_desired, stoveuseminutes_solids, stoveuseminutes_clean, stove_clean) %>%
rename(country = Country,
headGender = head_gender,
femaleEduc = female_educ,
maleEduc = male_educ,
wVispeople = w_visitpeople,
wVismarket = w_vismarket,
wLeavevill = w_leavevill,
fridge = fridge_owned_desired,
radio = radio_owned_desired,
fan = fan_owned_desired,
pc = pc_owned_desired,
tv = tv_owned_desired,
minutesSolid = stoveuseminutes_solids,
minutesClean = stoveuseminutes_clean,
stoveClean = stove_clean)
#Group by country
all_summary_grouped <- all_summary %>% group_by(country)
sumstats_all_grouped <-
all_summary_grouped %>%
summarise_each(funs(
n = sum(!is.na(.)),
min = min(., na.rm = TRUE),
max = max(., na.rm = TRUE),
mean = mean(., na.rm = TRUE)
))
#Reshape data
sumstatsA <- sumstats_all_grouped %>%
gather(stat, val) %>%
separate(stat, into = c ("var", "stat"), sep = "_") %>%
spread(stat, val) %>%
select(var, n, min, max, mean)
#Round
sumstatsA = sumstatsA %>%
mutate(mean = round(as.numeric(mean),2))
#produce table
stargazer(
sumstatsA,
summary = F,
type = "text",
digits = 2,
header = F ,
title = "Summary statistics for Honduras, Nepal and Rwanda",
rownames = F,
out = "Manuscript/Tables/SummaryAll_grouped.tex")
The error happens after the #Reshape data command section
(Error in spread()
:
! Each row of output must be identified by a unique combination of keys.
Keys are shared for 171 rows:
- 112, 113, 114
- 91, 92, 93
- 106, 107, 108 (to be continued))