0

I have dataframe where the column 3 as duplicate value. Value in column V3 has to be done transpose with unique value of it. Once the transpose is done i need to copy the element of column V4 matching the column V3. However the column V1 has sample name and all the sample does not have all the value matching in V3.

structure(list(V1 = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 4L, 4L, 4L, 4L), V2 = structure(c(3L, 3L, 3L, 3L, 3L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("C", 
"F", "M"), class = "factor"), V3 = structure(c(1L, 2L, 3L, 4L, 
5L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 2L, 3L, 4L, 5L), .Label = c("ARS_BFGL_BAC_19454", 
"ARS_BFGL_BAC_27364", "ARS_BFGL_BAC_35552", "ARS_BFGL_NGS_10035", 
"ARS_BFGL_NGS_101456"), class = "factor"), V4 = structure(c(5L, 
6L, 4L, 1L, 9L, 2L, 3L, 8L, 5L, 7L, 8L, 9L, 3L, 5L, 8L, 4L), .Label = c("A/A", 
"A/C", "A/G", "C/C", "C/T", "G/G", "G/T", "T/C", "T/T"), class = "factor")), .Names = c("V1", 
"V2", "V3", "V4"), class = "data.frame", row.names = c(NA, -16L
 ))

My output has to be like this

structure(list(V1 = c(NA, 1L, 2L, 3L, 4L), V2 = structure(c(2L, 
3L, 1L, 3L, 1L), .Label = c("0", "ARS_BFGL_BAC_19454", "C/T"), class = "factor"), 
V3 = structure(c(1L, 2L, 2L, 2L, 2L), .Label = c("ARS_BFGL_BAC_27364", 
"G/G"), class = "factor"), V4 = structure(c(1L, 2L, 2L, 2L, 
2L), .Label = c("ARS_BFGL_BAC_35552", "C/C"), class = "factor"), 
V5 = structure(c(2L, 1L, 1L, 1L, 1L), .Label = c("A/A", "ARS_BFGL_NGS_10035"
), class = "factor"), V6 = structure(c(2L, 3L, 1L, 1L, 3L
), .Label = c("0", "ARS_BFGL_NGS_101456", "T/T"), class = "factor")), .Names = 
c("V1", 
"V2", "V3", "V4", "V5", "V6"), class = "data.frame", row.names = c(NA, -5L))

I tried

 library(reshape2)
 dcast(myfile1, V1 ~ V3, value.var="V4")

but in empty string it is putting NA instead i want 0 to be added in that

zx8754
  • 52,746
  • 12
  • 114
  • 209
KMISH
  • 49
  • 1
  • 8

2 Answers2

0

You can try reshape from base R to make it

r <- data.frame(Map(as.character,reshape(df[-2],direction = "wide",timevar = "V3",idvar = "V1")),stringsAsFactors = FALSE)
r[is.na(r)]<-0

such that

> r
  V1 V4.ARS_BFGL_BAC_19454 V4.ARS_BFGL_BAC_27364 V4.ARS_BFGL_BAC_35552 V4.ARS_BFGL_NGS_10035 V4.ARS_BFGL_NGS_101456
1  1                   C/T                   G/G                   C/C                   A/A                    T/T
2  2                     0                   A/C                   A/G                   T/C                      0
3  3                   C/T                   G/T                   T/C                   T/T                      0
4  4                     0                   A/G                   C/T                   T/C                    C/C
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81
0

pivot_wider and pivot_longerin tidyr are convenient to reshape data between long to wide form. You can set missing values to what you want by the argument values_fill.

library(dplyr)
library(tidyr)

df %>%
  mutate(V4 = as.character(V4)) %>%
  pivot_wider(-V2, names_from = V3, values_from = V4, values_fill = list(V4 = "0"))

#      V1 ARS_BFGL_BAC_19454 ARS_BFGL_BAC_27364 ARS_BFGL_BAC_35552 ARS_BFGL_NGS_10035 ARS_BFGL_NGS_101456
#   <int> <chr>              <chr>              <chr>              <chr>              <chr>              
# 1     1 C/T                G/G                C/C                A/A                T/T                
# 2     2 0                  A/C                A/G                T/C                0                  
# 3     3 C/T                G/T                T/C                T/T                0                  
# 4     4 0                  A/G                C/T                T/C                C/C  
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51