I have a dataframe as follows (dput too long)
$ OC_AH_026C.chr : num 1 1 1 1 1 1 1 1 1 1 ...
$ OC_AH_026C.leftPos : num 240000 1080000 1200000 1320000 1440000 1800000 2400000 2520000 3120000 3360000 ...
$ OC_AH_026C.Means : num 78.1 81.8 156.5 26.8 18.5 ...
$ OC_AH_026C.UL : num 125 125 125 125 125 ...
$ OC_AH_026C.LL : num 1.95 1.95 1.95 1.95 1.95 ...
$ OC_AH_026C.res : num 0 0 1 0 0 0 -1 0 0 0 ...
$ OC_AH_026C.1.chr : num 1 1 1 1 1 1 1 1 1 1 ...
$ OC_AH_026C.1.leftPos: num 240000 1080000 1200000 1320000 1440000 1800000 2400000 2520000 3120000 3360000 ...
$ OC_AH_026C.1.Means : num 97.3 88.9 50.1 33.3 44.2 ...
$ OC_AH_026C.1.UL : num 125 125 125 125 125 ...
$ OC_AH_026C.1.LL : num 2.45 2.45 2.45 2.45 2.45 ...
$ OC_AH_026C.1.res : num 0 0 0 0 0 0 0 0 0 0 ...
$ OC_AH_026T.chr : num 1 1 1 1 1 1 1 1 1 1 ...
$ OC_AH_026T.leftPos : num 240000 1080000 1200000 1320000 1440000 1800000 2400000 2520000 3120000 3360000 ...
$ OC_AH_026T.Means : num 12.8 101.7 124 56.1 91.3 ...
$ OC_AH_026T.UL : num 126 126 126 126 126 ...
$ OC_AH_026T.LL : num 1.83 1.83 1.83 1.83 1.83 ...
$ OC_AH_026T.res : num 0 0 0 0 0 0 0 0 0 0 ...
$ OC_AH_058T.chr : num 1 1 1 1 1 1 1 1 1 1 ...
$ OC_AH_058T.leftPos : num 240000 1080000 1200000 1320000 1440000 1800000 2400000 2520000 3120000 3360000 ...
$ OC_AH_058T.Means : num 103 119 201 118 96 ...
$ OC_AH_058T.UL : num 124 124 124 124 124 ...
$ OC_AH_058T.LL : num 0.684 0.684 0.684 0.684 0.684 ...
$ OC_AH_058T.res : num 0 0 1 0 0 0 0 0 0 0 ...
When comparing two columns with res in the column name, I would like to get the number of rows where the res number is either both 1 or both -1 for the same row.
I would like to deposit this in a matrix so that I end up with something like
OC_AH_026C.res OC_AH_026C.1.res OC_AH_026T.res OC_AH_058T.res
OC_AH_026C.res
OC_AH_026C.1.res
OC_AH_026T.res
OC_AH_058T.res
I'm afraid I have only got as far as here but basically its all wrong
df_list2res <- df_list2[,grep('*.res', names(df_list2))]
Comparison<-lapply(df_list2res,function(df,col3){
matches<-df_list2res[which(col3==col3),] #Should compare one column with all the other columns
nrow(subset(df_list2res,col != 0))
})
but the function to do the row by row comparison for each column and then dump in a matrix has defeated me.
EDIT
Using a limited dput
structure(list(OC_AH_026C.res = c(0, 0, 1, 0, 0, 0), OC_AH_026C.1.res = c(0,
0, 0, 0, 0, 0), OC_AH_026T.res = c(0, 0, 0, 0, 0, 0), OC_AH_058T.res = c(0,
0, 1, 0, 0, 0), OC_AH_084T.res = c(0, 0, 0, 0, 0, 0), OC_AH_086T.res = c(0,
0, 1, 0, 0, 0)), .Names = c("OC_AH_026C.res", "OC_AH_026C.1.res",
"OC_AH_026T.res", "OC_AH_058T.res", "OC_AH_084T.res", "OC_AH_086T.res"
), row.names = c(NA, 6L), class = "data.frame")
The expected output would be (I think- done manually)
OC_AH_026C.res OC_AH_026C.1.res OC_AH_026T.res OC_AH_058T.res OC_AH_084T.res OC_AH_086T.res
OC_AH_026C.res 1 0 0 1 0 1
OC_AH_026C.1.res 0 0 0 0 0 0
OC_AH_026T.res 0 0 0 0 0 0
OC_AH_058T.res 1 0 0 1 0 1
OC_AH_084T.res 0 0 0 0 0 0
OC_AH_086T.res 1 0 0 1 0 1
Using a further dput output
structure(list(OC_AH_026C.res = c(0, 0, 1, 0, 0), OC_AH_026C.1.res = c(0,
0, 0, 0, 0), OC_AH_026T.res = c(0, 0, 0, 0, 0), OC_AH_058T.res = c(0,
0, 1, 0, 0), OC_AH_084T.res = c(0, 0, 0, 0, 0), OC_AH_086T.res = c(0,
0, 1, 0, 0), OC_AH_088T.res = c(1, 1, 0, 1, 0), OC_AH_096T.res = c(0,
0, 0, -1, 0), OC_AH_100T.res = c(0, 0, 0, 0, 0), OC_AH_127T.res = c(0,
0, 0, 0, 0), OC_AH_133T.res = c(0, 0, 0, 0, 0), OC_ED_008T.res = c(0,
0, 1, 0, 0), OC_ED_016T.res = c(0, 0, 0, 0, 0), OC_ED_031T.res = c(0,
1, 1, 0, 0), OC_ED_036T.res = c(0, 0, 0, 0, 0), OC_GS_001T.res = c(0,
0, 0, 0, 0), OC_QE_062T.res = c(0, 0, 0, 0, 0), OC_RS_010T.res = c(0,
0, 0, 0, 0), OC_RS_027C.res = c(0, 0, 1, 0, 0), OC_RS_027C.1.res = c(0,
0, 1, 0, 0), OC_RS_027T.res = c(0, 0, 1, 0, 0), OC_SH_051T.res = c(0,
0, 1, 0, 0), OC_ST_014T.res = c(0, 0, 0, 0, 0), OC_ST_016T.res = c(0,
0, 0, 0, 0), OC_ST_020T.res = c(0, 0, 0, 0, 0), OC_ST_024T.res = c(0,
0, 0, 0, 0), OC_ST_033T.res = c(0, 0, 0, 0, 0), OC_ST_034C.res = c(0,
0, 1, 0, 0), OC_ST_034C.1.res = c(0, 0, 0, 0, 0), OC_ST_036T.res = c(0,
0, 0, 0, 0), OC_ST_037T.res = c(0, 0, 0, 0, 0), OC_ST_040T.res = c(0,
0, 0, 0, 0), OC_WG_001T.res = c(0, 0, 0, 0, 0), OC_WG_002T.res = c(0,
0, 0, 0, 0), OC_WG_005T.res = c(0, 0, 0, 0, 0), OC_WG_009T.res = c(0,
0, 0, 0, 0), OC_WG_019T.res = c(0, 0, 1, 0, 0), Means.res = c(0,
0, 0, 0, 0), sd.res = c(0, 0, 1, 0, 0)), .Names = c("OC_AH_026C.res",
"OC_AH_026C.1.res", "OC_AH_026T.res", "OC_AH_058T.res", "OC_AH_084T.res",
"OC_AH_086T.res", "OC_AH_088T.res", "OC_AH_096T.res", "OC_AH_100T.res",
"OC_AH_127T.res", "OC_AH_133T.res", "OC_ED_008T.res", "OC_ED_016T.res",
"OC_ED_031T.res", "OC_ED_036T.res", "OC_GS_001T.res", "OC_QE_062T.res",
"OC_RS_010T.res", "OC_RS_027C.res", "OC_RS_027C.1.res", "OC_RS_027T.res",
"OC_SH_051T.res", "OC_ST_014T.res", "OC_ST_016T.res", "OC_ST_020T.res",
"OC_ST_024T.res", "OC_ST_033T.res", "OC_ST_034C.res", "OC_ST_034C.1.res",
"OC_ST_036T.res", "OC_ST_037T.res", "OC_ST_040T.res", "OC_WG_001T.res",
"OC_WG_002T.res", "OC_WG_005T.res", "OC_WG_009T.res", "OC_WG_019T.res",
"Means.res", "sd.res"), row.names = c(NA, 5L), class = "data.frame")