1

I have two columns

  V1    V2 
  T     1 
  A     0       
  C     0    

If the column V2 is 1, then I want to replace nucleotide complementary if 0 is left as is I wrote a for function (in my many rows of data), but after his performance I get

  V1 V2 V3
  T  1 NA
  A  0 NA
  C  1 G

I use code

for(i in nrow(Tri1_a)){

 if(Tri1_a$V2[i] == 1){

  if(Tri1_a$V1[i] == "T")

    Tri1_a$V3[i] = "A"

  if(Tri1_a$V1[i] == "A")

    Tri1_a$V3[i] = "T"

 if(Tri1_a$V1[i] == "G")
    Tri1_a$V3[i] = "C"
 if(Tri1_a$V1[i] == "C")
    Tri1_a$V3[i] = "G"
 }
else{ 

 Tri1_a$V3[i] = Tri1_a$V1[i]
  }

i = i + 1
}              

but i want get

  V1 V2 V3
  T  1 A
  A  0 A
  C  1 G

where is the mistake?

whether it is possible to do it without the for, for examle using apply?

zx8754
  • 52,746
  • 12
  • 114
  • 209
CN_AE
  • 47
  • 5
  • 1
    "i" in your loop takes one value only: `nrow(Tri1_a)`. You might need `1:nrow(Tri1_a)` and remove the `i = i + 1`. Alternatively, try a [named vector](http://stackoverflow.com/questions/15303283/how-to-do-vlookup-and-fill-down-like-in-excel-in-r) like `map = c(A = "T", T = "A", C = "G", G = "C"); ifelse(as.logical(Tri1_a$V2), map[Tri1_a$V1], Tri1_a$V1)` or, also, `ifelse(as.logical(Tri1_a$V2), chartr("ATCG", "TAGC", Tri1_a$V1), Tri1_a$V1)` – alexis_laz Nov 30 '16 at 15:23

2 Answers2

2

We can use ifelse:

#dummy data
set.seed(1)
genotype <- data.frame(V1=sample(c("A","C","T","G"),size = 10, replace = T ), 
                       V2=sample(c(0,1), size=10, prob = c(0.6, 0.4), replace=T), 
                       stringsAsFactors = F)

genotype$V3 <- ifelse(genotype$V2 == 0, genotype$V1,
                      ifelse(genotype$V1=="A", "T", 
                             ifelse(genotype$V1 == "T", "A",
                                    ifelse(genotype$V1=="C", "G","C"))))

genotype
#    V1 V2 V3
# 1   C  0  C
# 2   C  0  C
# 3   T  1  A
# 4   G  0  G
# 5   A  1  T
# 6   G  0  G
# 7   G  1  C
# 8   T  1  A
# 9   T  0  T
# 10  A  1  T
zx8754
  • 52,746
  • 12
  • 114
  • 209
emilliman5
  • 5,816
  • 3
  • 27
  • 37
  • If the `V1` has some dummy value like "0" for missing genotype, this code will give "C", you might want to add another `ifelse`. – zx8754 Dec 01 '16 at 08:27
0

Using named vector:

#named vector
comp <- c("A", "T", "C", "G")
names(comp) <- c("T", "A", "G", "C")

#map with names when V2 is 1
genotype$V3 <- ifelse(genotype$V2 == 1, comp[genotype$V1], genotype$V1)
zx8754
  • 52,746
  • 12
  • 114
  • 209