0

I have a dataframe with 2 different columns that I want to concatenate the values into a new column. In the first column, Combination, I want to use only the part of the string that comes before the first underscore, and concatenate that to everything in the second column, new_var. This is my dataframe

df = structure(list(Combination = c("Animalborne_Archival", "Animalborne_Satellite_Archival", 
"BRUV_Acoustic", "Stationary_Acoustic_Radio_PIT", "Stationary_Acoustic", 
"Controlled_Archival"), new_var = c(Animalborne_Archival = "NoReceiver", 
Animalborne_Satellite_Archival = "NoReceiver", BRUV_Acoustic = "Receiver", 
Stationary_Acoustic_Radio_PIT = "Receiver", Stationary_Acoustic = "Receiver", 
Controlled_Archival = "NoReceiver")), row.names = c(7L, 188L, 
154L, 41L, 134L, 159L), class = "data.frame")

Any ideas?

jpsmith
  • 11,023
  • 5
  • 15
  • 36
Kristen Cyr
  • 629
  • 5
  • 16
  • 3
    I'm not sure what your data.frame is called, but if it's named `dd` you can do `transform(dd, newcol=paste0(sub("_.*", "", Combination), new_var))`. Just break it down into parts. There are questions about removing parts of a string after a certain character and then `paste()` or `paste0()` is the way you do concatenation in R. See https://stackoverflow.com/questions/38291794/extract-string-before and here https://stackoverflow.com/questions/7201341/how-can-two-strings-be-concatenated – MrFlick Mar 07 '23 at 16:02
  • Also, note that you should break down your example data set to a minimal set of example. It seems there is an unnecessary number of rows here – Maël Mar 07 '23 at 16:04

1 Answers1

0
df = structure(list(Combination = c("Animalborne_Archival", "Animalborne_Satellite_Archival", 
                                    "BRUV_Acoustic", "Stationary_Acoustic_Radio_PIT", "Stationary_Acoustic", 
                                    "Controlled_Archival"), new_var = c(Animalborne_Archival = "NoReceiver", 
                                                                        Animalborne_Satellite_Archival = "NoReceiver", BRUV_Acoustic = "Receiver", 
                                                                        Stationary_Acoustic_Radio_PIT = "Receiver", Stationary_Acoustic = "Receiver", 
                                                                        Controlled_Archival = "NoReceiver")), row.names = c(7L, 188L, 
                                                                                                                            154L, 41L, 134L, 159L), class = "data.frame")

With base R (cf @MrFlick comment):

transform(df, newcol = paste0(sub("_.*", "", Combination), "_", new_var))
#>                        Combination    new_var                 newcol
#> 7             Animalborne_Archival NoReceiver Animalborne_NoReceiver
#> 188 Animalborne_Satellite_Archival NoReceiver Animalborne_NoReceiver
#> 154                  BRUV_Acoustic   Receiver          BRUV_Receiver
#> 41   Stationary_Acoustic_Radio_PIT   Receiver    Stationary_Receiver
#> 134            Stationary_Acoustic   Receiver    Stationary_Receiver
#> 159            Controlled_Archival NoReceiver  Controlled_NoReceiver

With dplyr (very similar):

library(dplyr)
mutate(df, newcol = paste0(sub("_.*", "", Combination), "_", new_var))
#>                        Combination    new_var                 newcol
#> 7             Animalborne_Archival NoReceiver Animalborne_NoReceiver
#> 188 Animalborne_Satellite_Archival NoReceiver Animalborne_NoReceiver
#> 154                  BRUV_Acoustic   Receiver          BRUV_Receiver
#> 41   Stationary_Acoustic_Radio_PIT   Receiver    Stationary_Receiver
#> 134            Stationary_Acoustic   Receiver    Stationary_Receiver
#> 159            Controlled_Archival NoReceiver  Controlled_NoReceiver

With tidyr::unite():

library(tidyr)
df |> 
  mutate(tmp = sub("_.*", "", Combination)) |> 
  unite(tmp, new_var, col = newcol, sep = "_", remove = FALSE) |> 
  select(-tmp)
#>                        Combination                 newcol    new_var
#> 7             Animalborne_Archival Animalborne_NoReceiver NoReceiver
#> 188 Animalborne_Satellite_Archival Animalborne_NoReceiver NoReceiver
#> 154                  BRUV_Acoustic          BRUV_Receiver   Receiver
#> 41   Stationary_Acoustic_Radio_PIT    Stationary_Receiver   Receiver
#> 134            Stationary_Acoustic    Stationary_Receiver   Receiver
#> 159            Controlled_Archival  Controlled_NoReceiver NoReceiver
bretauv
  • 7,756
  • 2
  • 20
  • 57