I have a data frame with ratings made by 4 different reviewers; each row is a reviewer pair rating an image.
df <- data.frame(Reviewer1 = c("Name1", "Name2", "Name3", "Name4", "Name2", "Name3", "Name1", "Name3", "Name1", "Name4", "Name1", "Name1", "Name1", "Name2", "Name3", "Name4", "Name2", "Name3", "Name1", "Name2", "Name1", "Name4", "Name1", "Name1", "Name3", "Name2", "Name4", "Name3", "Name1", "Name2", "Name1", "Name3", "Name4", "Name3", "Name2", "Name2", "Name2", "Name3", "Name1", "Name3", "Name3", "Name1", "Name4", "Name2", "Name3", "Name4", "Name4", "Name3", "Name4"),
Rating1 = c("Worst", "Worst", "Best", "Bad", "Good", "Worst", "Best", "Worst", "Best", "Bad", "Worst", "Worst", "Worst", "Good", "Best", "Bad", "Good", "Worst", "Best", "Worst", "Best", "Bad", "Worst", "Worst", "Best", "Worst", "Worst", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Good", "Bad", "Good", "Good", "Bad", "Worst", "Good", "Worst", "Worst", "Worst", "Worst", "Good", "Good"),
Reviewer2 = c("Name3", "Name1", "Name1", "Name1", "Name4", "Name4", "Name2", "Name4", "Name2", "Name2", "Name2", "Name2", "Name3", "Name1", "Name1", "Name1", "Name4", "Name4", "Name2", "Name3", "Name3", "Name2", "Name2", "Name2", "Name1", "Name4", "Name3", "Name1", "Name2", "Name3", "Name3", "Name1", "Name2", "Name4", "Name4", "Name1", "Name4", "Name2", "Name3", "Name4", "Name1", "Name3", "Name2", "Name3", "Name1", "Name2", "Name3", "Name2", "Name3"),
Rating2 = c("Best", "Good", "Worst", "Good", "Best", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Bad", "Worst", "Best", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Best", "Worst", "Worst", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Good", "Bad", "Good", "Good", "Bad", "Worst", "Good", "Bad", "Worst", "Worst", "Worst", "Good", "Worst"))
My end goal is to create contingency tables for Cohen's Kappa analyses of each reviewer pair. For that, I need counts of reviewer pair ratings with the following rules:
- Not include pairs that do not exist (e.g., Reviewer1 Name1 and Reviewer2 Name4; note that Reviewer1 Name4 and Reviewer2 Name1 does exist)
- Reviewer pairings with themselves are still included in the first loop output; ideally, these wouldn't be included
- I'd like to have the reviewer pair names (or i and j integers) in the first and second column in the output in the first loop if possible
Because a given rater can function as Reviewer1 as well as Reviewer2 in a given pair, I also need to sum the data where they functioned as both, e.g., sum the YY count for Name2 as Reviewer1 and Name3 as Reviewer2 with Name3 as Reviewer1 and Name2 as Reviewer2. How to do that?
Thank you so much for your help in advance!
EDIT: I've made some changes to the code below that enable adding the reviewer pairs to the output (last point) and remove the reviewer pairings with themselves (second point) though still ideally they wouldn't be in the first loop output at all.
# Prep for the first loop
plist <- unique(df$Reviewer1) # Get count of Names
pseq <- seq(1, length(plist), by = 1) # Create sequence to use numbers for the loop instead of the reviewer names
pmap <- data.frame(pseq, plist) # Map numbers to names
# Initialize empty lists
NN <- c()
YY <- c()
YN <- c()
NY <- c()
ind <- vector()
pairs <- data.frame()
# Loop over pairs
for(i in pseq) {
for(j in pseq) {
if (i!=j)
ind <- c(i,j)
pairs <- rbind(pairs, ind)
NN[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1=='Worst' &
df$Reviewer2==pmap[j,2] & df$Rating2=='Worst'), ])
YY[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1!='Worst' &
df$Reviewer2==pmap[j,2] & df$Rating2!='Worst'), ])
YN[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1!='Worst' &
df$Reviewer2==pmap[j,2] & df$Rating2=='Worst'), ])
NY[j + length(plist)*(i-1)] <- count(df[which(df$Reviewer1==pmap[i,2] & df$Rating1=='Worst' &
df$Reviewer2==pmap[j,2] & df$Rating2!='Worst'), ])
}
}
# Remove the first row as that's Reviewer1 with themselves
NN <- NN[-(1)]
YY <- YY[-(1)]
YN <- YN[-(1)]
NY <- NY[-(1)]
# Put rating lists into one list and convert that to a data frame
resps <- c('YY', 'YN', 'NY', 'NN')
resplist = list(YY, YN, NY, NN)
respdf <- as.data.frame(do.call(cbind, resplist))
colnames(respdf) <- c(resps)
respdf <- cbind(pairs, respdf)
respdf <- respdf[!duplicated(respdf[c('X1', 'X2')]), ] # Remove duplicate rows based on duplicates from the pairs (the duplicates represent 2&2 and 3&3)
# Cohen's Kappa Analyses
# Put data into individual matrices (i.e., contingency tables) and do Kappa analyses, saving the results
kseq <- nrow(respdf)
pabakest = data.frame()
pabakLCI = data.frame()
pabakUCI = data.frame()
kappaest = data.frame()
kappaLCI = data.frame()
kappaUCI = data.frame()
z = data.frame()
p = data.frame()
# Y=all but "Worst" rating and N="Worst" rating
for(i in 1:kseq) {
temp <- as.matrix(respdf[i, 2:5])
tempvec <- unlist(temp)
kappadata <- matrix(tempvec, nrow = 2, byrow = TRUE)
kappa <- epi.kappa(kappadata, method = "cohen", alternative = "greater", conf.level = 0.95)
t1 <- round(kappa[[2]][1],2)
pabakest <- rbind(pabakest,t1)
t2 <- round(kappa[[2]][2],2)
pabakLCI <- rbind(pabakLCI,t2)
t3 <- round(kappa[[2]][3],2)
pabakUCI <- rbind(pabakUCI,t3)
t4 <- round(kappa[[3]][1],2)
kappaest <- rbind(kappaest,t4)
t5 <- round(kappa[[3]][3],2)
kappaLCI <- rbind(kappaLCI,t5)
t6 <- round(kappa[[3]][4],2)
kappaUCI <- rbind(kappaUCI,t6)
t7 <- round(kappa[[4]][1],2)
z <- rbind(z,t7)
t8 <- round(kappa[[4]][2],2)
p <- rbind(p,t8)
}
# Add reviewer pair rows/cols once I have them
kappaoutput <- cbind(pabakest, pabakLCI, pabakUCI, kappaest, kappaLCI, kappaUCI, z, p)