I have data set like this
x<-c('ASON10_SHROFF-1/3/16/1/02-Au4P','ASON10_SHROFF-1/3/16/1/06-Au4P','ASON10_SHROFF-1/3/16/1/09-Au4P', 'ASON10_SHROFF-1/3/16/1/09-Au4P', 'ASON11_TALWAR-1/3/12/2/04-Au4P', 'ASON11_TALWAR-1/3/12/2/04-Au4P')
y <- c('SERVER_SIGNAL_FAILURE-TMe, UNAVAILABLE_TIME-TMe-PMNE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, UNAVAILABLE_TIME-TMe-PMNE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, REMOTE_DEFECT_INDICATION-TMi, UNAVAILABLE_TIME-TMe-PMNE1d, UNAVAILABLE_TIME-TMi-PMFE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, REMOTE_DEFECT_INDICATION-TMi, UNAVAILABLE_TIME-TMi-PMFE1d, UNAVAILABLE_TIME-TMe-PMNE1d','DEGRADED_SIGNAL-TMe, SERVER_SIGNAL_FAILURE-TMe, UNEQUIPPED-TMe','UNEQUIPPED-TMe, UNEQUIPPED-TMe,UNEQUIPPED-TMe')
df <-data.frame(x,y)
df <- data.frame(lapply(df, as.character), stringsAsFactors = F)
I want to remove the same elements in the y column that are separated by comma(,) and count the ratio. I have tried below code but it still remain same entry and also concatenate by x
library(dplyr)
z<-df %>%
mutate(row = row_number(),
y1 = y) %>%
add_count(x, name = 'cx') %>%
tidyr::separate_rows(y1, sep = ",") %>%
group_by(row) %>%
summarise(across(c(x, cx, y), first),
cy = n(),
rat = cy/cx,
n = n_distinct(y1)) %>%
filter(n > 1) %>%
select(-row, -n)
Desire output is
x<-c('ASON10_SHROFF-1/3/16/1/02-Au4P','ASON10_SHROFF-1/3/16/1/06-Au4P','ASON10_SHROFF-1/3/16/1/09-Au4P', 'ASON10_SHROFF-1/3/16/1/09-Au4P', 'ASON11_TALWAR-1/3/12/2/04-Au4P')
cx <-c(1,1,2,2,1)
y <- c('SERVER_SIGNAL_FAILURE-TMe, UNAVAILABLE_TIME-TMe-PMNE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, UNAVAILABLE_TIME-TMe-PMNE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, REMOTE_DEFECT_INDICATION-TMi, UNAVAILABLE_TIME-TMe-PMNE1d, UNAVAILABLE_TIME-TMi-PMFE1d, UNEQUIPPED-TMe', 'SERVER_SIGNAL_FAILURE-TMe, REMOTE_DEFECT_INDICATION-TMi, UNAVAILABLE_TIME-TMi-PMFE1d, UNAVAILABLE_TIME-TMe-PMNE1d','DEGRADED_SIGNAL-TMe, SERVER_SIGNAL_FAILURE-TMe, UNEQUIPPED-TMe')
cy <-c(3,3,5,4,3)
rat <-c(3/1,3/1,5/2,5/2,3,1)