0

I am trying to aggregate survey data (the unit of analysis is respondent) into marginals: percentage of responses for each question per year. There are 21 questions (columns). This is what I attempted but it does not get the desired output.

#Place each year into a list element
yr.list = by(data=data, INDICES = data$year, FUN = list)
summary(yr.list)

#Within each element, place responses to all questions into one vector
data.form = function(x){
  n = nrow(x)
  k = ncol(x) - 2
  id = rep(1:n, times = k)
  item = sort(rep(1:k, times=n))
  y = c(unlist(x[,3:ncol(x)]))
  out = data.frame("year"=x$year, "id"=id, "item"=item, "y"=y)
  return(out)
}

yr.list = lapply(X = yr.list, FUN = data.form)

yr.vector = do.call(rbind, yr.list)
yr.vector$occurrences = 1
yr.aggregated = aggregate(occurrences ~ year + item, data = yr.vector, FUN =       sum)

summary(yr.aggregated)

0 Answers0