4

I have a data.frame of the following form:

> set.seed(1)
> myp <- paste0('P', sort(sample(1:15, 10)))
> mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
+ value1=NA, value2=NA, value3=NA)
> rownames(mydf) <- mydf$param
> mydf$param <- NULL
> mydf
       value1 value2 value3
P1B        NA     NA     NA
P3B        NA     NA     NA
P4B        NA     NA     NA
P5B        NA     NA     NA
P6B        NA     NA     NA
P8B        NA     NA     NA
P9B        NA     NA     NA
P10B       NA     NA     NA
P11B       NA     NA     NA
P14B       NA     NA     NA
P1R        NA     NA     NA
P3R        NA     NA     NA
P4R        NA     NA     NA
P5R        NA     NA     NA
P6R        NA     NA     NA
P8R        NA     NA     NA
P9R        NA     NA     NA
P10R       NA     NA     NA
P11R       NA     NA     NA
P14R       NA     NA     NA
P1max      NA     NA     NA
P3max      NA     NA     NA
P4max      NA     NA     NA
P5max      NA     NA     NA
P6max      NA     NA     NA
P8max      NA     NA     NA
P9max      NA     NA     NA
P10max     NA     NA     NA
P11max     NA     NA     NA
P14max     NA     NA     NA
P1min      NA     NA     NA
P3min      NA     NA     NA
P4min      NA     NA     NA
P5min      NA     NA     NA
P6min      NA     NA     NA
P8min      NA     NA     NA
P9min      NA     NA     NA
P10min     NA     NA     NA
P11min     NA     NA     NA
P14min     NA     NA     NA

I want to update the names of the rows, and for that I have a conversion table that looks like the following one:

> conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
> conv.df
   name new.name
1    P1       P1
2    P3       P2
3    P4       P3
4    P5       P4
5    P6       P5
6    P8       P6
7    P9       P7
8   P10       P8
9   P11       P9
10  P14      P10

The only thing I want to do is to update the rownames in mydf so it reflects the new.names in the conversion table conv.df.

It seems very easy but I cannot wrap my head around it... I would try a regular expression approach, my only problem is that I am not 100% sure that I would ONLY encounter rownames of the form PnB, PnR, Pnmax, Pnmin... I would like a solution that applies for any PnX instance (it would always be Pn followed by [:alpha:])

DaniCee
  • 2,397
  • 6
  • 36
  • 59
  • I reckon a boring old loop should do it - something like: https://stackoverflow.com/questions/55564763/how-to-substitute-multiple-characters-in-a-string-in-r or https://stackoverflow.com/questions/26171318/regex-for-preserving-case-pattern-capitalization/26171700 – thelatemail Sep 02 '19 at 04:21

3 Answers3

3

You can make the rowname a column, split the root "Pn" and letter "[:alpha:]" and then rename them as follows,

set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
                     value1=NA, value2=NA, value3=NA)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf


library(tidyverse)

mydf%>%
  rownames_to_column()%>%
  mutate(root = gsub("^(P\\d\\d?).*$","\\1",rowname),
         letter = gsub("^P\\d\\d?(.*)$","\\1",rowname))%>%
  mutate(root = recode(root,
    P1 = "P1",
    P3 = "P2",
    P4 = "P3",
    P5 = "P4",
    P6 = "P5",
    P8 = "P6",
    P9 = "P7",
    P10 = "P8",
    P11 = "P9",
    P14 = "P10"
  ))%>%
  mutate(rowname = paste0(root,letter))%>%
  column_to_rownames()%>%
  select(-root,-letter)

The answer by @teofil is good and it works. Here is another way to do it by still using recode and some meta programming,

library(tidyverse)

rename_col_df <- function(data,colname,df_rename){
  # data is the input data frame
  # colname is the column to be modified
  # df_rename must have columns name and new.name

  colname = enexpr(colname) # Capture the user input col name as a symbol
  old_name = df_rename$name
  new_name = df_rename$new.name

  # Start construcing an expression
  # The following line creates a recode function
  # recode_expr[[1]] is "recode"
  # recode_expr[[2]] is the first argument
  recode_expr = expr(recode(!!colname))

  # All subsequent arguments to recode are added here

  for(i in seq_along(old_name)){
    recode_expr[[old_name[i]]] = new_name[i]
  }


  data = data %>% mutate(!!colname := !!recode_expr)

  return(data)

}

conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)

mydf%>%
  rownames_to_column()%>%
  mutate(root = gsub("^(P\\d\\d?).*$","\\1",rowname),
         letter = gsub("^P\\d\\d?(.*)$","\\1",rowname))%>%
  rename_col_df(root,conv.df) %>%
  mutate(rowname = paste0(root,letter))%>%
  column_to_rownames()%>%
  select(-root,-letter)

If we wanted to apply the renaming to a vector instead of a dataframe,

rename_vec_df <- function(vec,df_rename){
  # vec is the vector to be modified
  # df_rename must have columns name and new.name

  old_name = df_rename$name
  new_name = df_rename$new.name

  # Start construcing an expression
  # The following line creates a recode function
  # recode_expr[[1]] is "recode"
  # recode_expr[[2]] is the first argument
  recode_expr = expr(recode(!!vec))

  # All subsequent arguments to recode are added here

  for(i in seq_along(old_name)){
    recode_expr[[old_name[i]]] = new_name[i]
  }


  vec = eval(recode_expr)

  return(vec)

}

myp <- paste0('P', sort(sample(1:15, 10)))
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)

vec <- c("P1","P14","P10")

rename_vec_df(vec,conv.df)

To learn more about the techniques used here,

https://rlang.r-lib.org/reference/quotation.html https://adv-r.hadley.nz/metaprogramming.html

Sada93
  • 2,785
  • 1
  • 10
  • 21
  • Is there a way to pass `conv.df` to `recode(...)` without having to rewrite every recoding? – thelatemail Sep 02 '19 at 04:41
  • I was just going to ask the same thing... this is just an example, but `n` can be in the hundreds – DaniCee Sep 02 '19 at 04:54
  • You can use `dplyr::left_join` as an alternative to `recode`. – teofil Sep 02 '19 at 04:56
  • Could you write it as a general answer to recode any vector (same as `rownames(mydf)`) given a conversion table (same as `conv.df`)? Many thanks – DaniCee Sep 02 '19 at 05:00
  • @DaniCee the answer by teofil does exactly that. – Sada93 Sep 02 '19 at 05:11
  • Edited my answer to include an alternative method using meta-programming (probably overkill). – Sada93 Sep 02 '19 at 05:19
  • 1
    @DaniCee you can use the function `rename_col_df` to rename any column. Specify the column name and the dataframe which has columns name and new.name. You should be able to modify it yourself for it to accept a vector instead of a dataframe. – Sada93 Sep 02 '19 at 05:27
  • not sure how to do it cause I never used tidyverse... I get `no applicable method for 'mutate_' applied to an object of class "character"`, what's the mutate equivalent for a character vector input instead of a dataframe? – DaniCee Sep 02 '19 at 05:45
  • Added an additional snippet which should work on vectors. – Sada93 Sep 02 '19 at 05:56
  • Thanks! but `vec` should be equal to `rownames(mydf)`, ie like `c("P1B", "P3B", "P10R", "P8max", "P10min", ...)` – DaniCee Sep 02 '19 at 06:24
  • that would be perfect, a function to pass `vec` and `conv.df`, where `vec` is equal to `rownames(mydf)`, and it returns `newvec` recoded – DaniCee Sep 02 '19 at 06:26
  • You can do the change @DaniCee, apply gsub inside the function to split the names. – Sada93 Sep 02 '19 at 06:28
1

Following up on @Sada93 code. To avoid the recode step, use join:

library(tidyverse)
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <-
  data.frame(
    param = c(
      paste0(myp, 'B'),
      paste0(myp, 'R'),
      paste0(myp, 'max'),
      paste0(myp, 'min')
    ),
    value1 = NA,
    value2 = NA,
    value3 = NA
  )
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf

conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))

mydf %>% rownames_to_column() %>%
  mutate(name = gsub("^(P\\d\\d?).*$","\\1",rowname),
         letter = gsub("^P\\d\\d?(.*)$","\\1",rowname)) %>% 
  left_join(., conv.df, by="name") %>% 
  mutate(rowname=paste(new.name, letter, sep="")) %>% 
  column_to_rownames() %>% 
  select(-name, -letter)

       value1 value2 value3 new.name
P1B        NA     NA     NA       P1
P2B        NA     NA     NA       P2
P3B        NA     NA     NA       P3
P4B        NA     NA     NA       P4
P5B        NA     NA     NA       P5
P6B        NA     NA     NA       P6
P7B        NA     NA     NA       P7
P8B        NA     NA     NA       P8
P9B        NA     NA     NA       P9
P10B       NA     NA     NA      P10
P1R        NA     NA     NA       P1
teofil
  • 2,344
  • 1
  • 8
  • 17
  • Thanks! This seems like it; could you write it in such a way that it can recode any vector (not just the rownames of a data frame)? I need to recode other things, and trying to rewrite it to accept a vector instead of a data frame, I'm getting some errors... Thanks! – DaniCee Sep 02 '19 at 05:19
  • @Sada93's edited answer and the function there does what you need. – teofil Sep 02 '19 at 05:55
1

You cannot have duplicate row names in a dataframe. Here is one way in base R to add it as a column name. Here we extract the common part in the original rownames which is "P" followed by a number, match it with conv.df$name and get the corresponding conv.df$new.name.

mydf$new_name <- conv.df$new.name[
                match(sub("(P\\d+).*", "\\1", rownames(mydf)), conv.df$name)]


mydf
#       value1 value2 value3 new_name
#P1B        NA     NA     NA       P1
#P2B        NA     NA     NA       P2
#P3B        NA     NA     NA       P3
#P4B        NA     NA     NA       P4
#P7B        NA     NA     NA       P5
#P8B        NA     NA     NA       P6
#P9B        NA     NA     NA       P7
#P11B       NA     NA     NA       P8
#P12B       NA     NA     NA       P9
#P13B       NA     NA     NA      P10
#P1R        NA     NA     NA       P1
#P2R        NA     NA     NA       P2
#...

where

sub("(P\\d+).*", "\\1", rownames(mydf)) #returns

#[1] "P1"  "P2"  "P3"  "P4"  "P7"  "P8"  "P9"  "P11" "P12" "P13" "P1"  "P2"  "P3"  
#    "P4"  "P7"  "P8"  "P9"  "P11" "P12" "P13" "P1"  "P2"  "P3"  "P4"  "P7"  "P8"  
#    "P9"  "P11" "P12" "P13" "P1"  "P2"  "P3"  "P4"  "P7"  "P8"  "P9"  "P11" "P12" 
#    "P13"
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213