The following part of the code works fine for me. assessments_jobSmart is an imported csv file. I'm trying to reshape data of multiple column but i couldn't make it by using tapply. So i attempted to bind the matrices by matching user_id, and add row id the id hasn't existed.
individual_assess <- filter(assessments_jobSmart, !is.na(assessments_jobSmart$submitted))
quiz_i = filter(individual_assess, assessment_type == 'quiz')
checkin_i = filter(individual_assess, assessment_type == 'checkin')
groupQuiz_i <- group_by(quiz_i, user_id, program_id, name)
summaryQuiz_i <- summarize(groupQuiz_i, maxQuiz = max(moderated_score))
q1 <- with(summaryQuiz_i, tapply(maxQuiz, list(user_id, name) , I))
q2=matrix(NA, nrow = nrow(q1), ncol = ncol(q1)+1)
colnames(q2) = c("user_id", paste("assess_q", colnames(q1)))
q2[1:nrow(q2),1] = rownames(q1)
q2[1:nrow(q2),2:ncol(q2)] = q1[1:nrow(q1),1:ncol(q1)]
lla = merge(lla, q2, by = 'user_id', all=TRUE)
But for the next part, it gives me error at the end of my last line. I reffered to many links but still can't figure out why. c1 and c2 both have same number of rows.
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names =
TRUE, : arguments imply differing number of rows: 1, 2
faulty code:
groupCheckin_i <- group_by(checkin_i, user_id, program_id, name)
summaryCheckin_i <- summarize(groupCheckin_i, countCheckin = n())
c1 <- with(summaryCheckin_i, tapply(countCheckin, list(user_id, name) , I))
c1[c1=="NULL"]=NA
c2=matrix(NA, nrow = nrow(c1), ncol = ncol(c1)+1)
colnames(c2) = c("user_id", paste("assess_c", colnames(c1)))
c2[1:nrow(c2),1] = rownames(c1)
c2[1:nrow(c2),2:ncol(c2)] = c1[1:nrow(c1),1:ncol(c1)]
lla = merge(lla, c2, by = 'user_id', all=TRUE)
Reproducible example, not sure if i replicate it correctly tho. I'll start from the dataframe imported, grouped, and summarized.
install.packages("dplyr")
install.packages("reshape2")
install.packages('ggplot2', dep = TRUE)
library("dplyr")
library(reshape2)
library(ggplot2)
head = c("user_id", "program", "assessment", "type", "marks")
content = c("111", "program A", "quiz 1", "quiz", "1", "112", "program A", "quiz 1", "quiz", "0.5", "112", "program A", "quiz 2", "quiz", "0.75", "113", "program B", "quiz 2", "quiz", "0.8", "110", "program B", "survey 1", "survey", "1", "113", "program B", "survey 1", "survey", "1")
M = as.dataframe(matrix(content, nrow=5, ncol=5)) #kinda replicate my imported csv file.
s = filter(M, type == 'survey')
q = filter(M, type == 'quiz')
groupS = group_by(s, user_id, program, assessment)
groupQ = group_by(q, user_id, program, assessment)
summaryS <- summarize(groupS, maxMarks = max(marks)) # take only maximum marks if there are duplicate entries
s1 <- with(summaryS, tapply(maxMarks, list(user_id, assessment) , I))
s2 = matrix(NA, nrow=nrow(s1), ncol=ncol(s1)+1)
colnames(s2) = c("user_id", colnames(s1))
s2[q:nrow(s2), 1] = rownames(s1) # everything works alright till here
s2[1:nrow(s2),2:ncol(s2)] = s1[1:nrow(s1),1:nrow(s1)]
summaryQ <- summarize(groupQ, count = n()) # it makes more sense to count survey done
q1 <- with(summaryQ, tapply(count, list(user_id, assessment) , I))
q2 = matrix(NA, nrow=nrow(q1), ncol=ncol(q1)+1)
colnames(q2) = c("user_id", colnames(q1))
q2[q:nrow(q2), 1] = rownames(q1) # everything works alright till here
q2[1:nrow(q2),2:ncol(q2)] = q1[1:nrow(q1),1:nrow(q1)] #q2 becomes a list :(
b = merge(b, a2, by = 'user_id', all+TRUE)
# | program | assessment | marks
# ---------------------------------------
# 111 | program A | quiz 1 | 1
# 112 | program A | quiz 1 | 0.5
# 112 | program A | quiz 2 | 0.75
# 113 | program B | quiz 2 | 0.8
# Then I used tapply to reshape data to get something like:
# | assessment 1 | assessment 2
# -------------------------------------
# 111 | 1 | NA
# 112 | 0.5 | 0.75
# 113 | NA | 0.8
There are many of these tables because the results are extracted differently, so I want to merge them at the end to combine all the results. I don't want user_id to appear twice, each for separate table. I want to compare results with user_id but heading for that column is missing as user_id was treated as rownames. So I create a larger matrix to copy everything and include the user_id column name:
# Let's say a1 is the matrix after tapply, a2 is the new dataframe I want to create, b is the successful new dataframe created using the exact same method on same csv file exported.
I want to get something like this after merging:
user_id | survey 1 | survey 2 | assessment 1 | assessment 2
-------------------------------------------------------------
110 | 1 | NA | NA | NA
111 | NA | NA | 1 | NA
112 | NA | NA | 0.5 | 0.75
113 | 1 | NA | NA | 0.8