I am trying to the share of entity mentions online by month, as the share of total mentions at the monthly level, rather than by the total number of mentions in my dataset.
Print data example
dput(directed_to_whom_monthly[1:4, ])
Output:
structure(list(directed_to_whom = structure(c(3L, 2L, 3L, 3L), .Label = c("MoE",
"MoL", "Private employers"), class = "factor"), treatment_details = structure(c(2L,
2L, 2L, 1L), .Label = c("post", "pre"), class = "factor"), month_year = structure(c(2011.41666666667,
2011.41666666667, 2011.5, 2012.5), class = "yearmon"), n = c(10L,
10L, 8L, 30L), directed_to_whom_percentage = c(0.00279251605696733,
0.00279251605696733, 0.00223401284557386, 0.00837754817090198
), year = c(2011, 2011, 2011, 2012), month = c(6, 6, 7, 7)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
To compute this, I have tried the following:
directed_to_whom_monthly %>%
group_by(directed_to_whom) %>% # group data entity mentions
group_by(month_year) %>%
add_count(treatment_details) %>% # add count of treatment_implementation
unique() %>% # remove duplicates
ungroup() %>% # remove grouping
mutate(directed_to_whom_percentage = n/sum(n)) %>% # ...calculating percentage
But this essentially divides the number of mentions of entity X, by all all mentions in the dataset.
I have also tried a solution from here, as follows, the code works well but it's not computing mentions by the total mentions per month.
test <-directed_to_whom_monthly %>%
group_by(month) %>% mutate(per= prop.table(n) * 100)
dput(test[1:4, ])
Output:
structure(list(directed_to_whom = structure(c(3L, 2L, 3L, 3L), .Label = c("MoE",
"MoL", "Private employers"), class = "factor"), treatment_details = structure(c(2L,
2L, 2L, 1L), .Label = c("post", "pre"), class = "factor"), month_year = structure(c(2011.41666666667,
2011.41666666667, 2011.5, 2012.5), class = "yearmon"), n = c(10L,
10L, 8L, 30L), directed_to_whom_percentage = c(0.00279251605696733,
0.00279251605696733, 0.00223401284557386, 0.00837754817090198
), year = c(2011, 2011, 2011, 2012), month = c(6, 6, 7, 7), per = c(2.49376558603491,
2.49376558603491, 8, 30)), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -4L), groups = structure(list(
month = c(6, 7), .rows = structure(list(1:2, 3:4), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))