1

I want to add a new column in R which summarizes my subgroups into groups.

Here my example:

id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))

> data
   id   subgroup
1   1   lightred
2   2     marine
3   2       cyan
4   3       rose
5   4   bordeaux
6   4    darkred
7   4        sky
8   5       gras
9   5     bottle
10  5 lightgreen
11  6    darkred
12  6     marine
13  6 lightgreen

Now I want to add a new column "colour" which groups the attributes into 3 gropus "red", "green" and "blue". Can I assign the subgroups to a variable first and then assign them to a group?

red = "lightred", "darkred" , "rose" , "bordeaux"
blue = "marine", "cyan", "sky"
green = "gras", "bottle" , "lightgreen"

It should look like this at the end:

> data
   id   subgroup colour
1   1   lightred    red
2   2     marine   blue
3   2       cyan   blue
4   3       rose    red
5   4   bordeaux    red
6   4    darkred    red
7   4        sky   blue
8   5       gras  green
9   5     bottle  green
10  5 lightgreen  green
11  6    darkred    red
12  6     marine   blue
13  6 lightgreen  green

Thanks!

Jaap
  • 81,064
  • 34
  • 182
  • 193
A_beginner
  • 69
  • 4
  • This might be helpful. https://stackoverflow.com/questions/7547597/dictionary-style-replace-multiple-items. Although, it would require a bit of modification. – Ronak Shah Jul 04 '18 at 04:57
  • Isn't this question a duplicate of your earlier question https://stackoverflow.com/questions/51164948/add-a-new-column-based-on-grouping-attributes ? – rar Jul 04 '18 at 06:03

6 Answers6

2

Using case_when from dplyr:

red <- c("lightred", "darkred", "rose", "bordeaux")
blue <- c("marine", "cyan", "sky")
green <- c("gras", "bottle", "lightgreen")

data$colour <-
case_when(
    data$subgroup %in% red ~ "red",
    data$subgroup %in% blue ~ "blue",
    data$subgroup %in% green ~ "green",
    TRUE ~ data$subgroup
)
Tim Biegeleisen
  • 502,043
  • 27
  • 286
  • 360
2

A little unconventional approach with cut. We create a list of key-value pairs and then match those values with data$subgroup. We create break values as cumulative sum of length of each list and labels as names of the list.

new_list <- list('red' = c("lightred", "darkred" , "rose" , "bordeaux"),
                 'blue' = c("marine", "cyan", "sky"),
                  'green' = c("gras", "bottle" , "lightgreen"))


data$colour <- cut(match(data$subgroup, unlist(new_list)), 
               breaks = c(0,cumsum(lengths(new_list))), 
               labels = names(new_list))


data
#   id   subgroup colour
#1   1   lightred    red
#2   2     marine   blue
#3   2       cyan   blue
#4   3       rose    red
#5   4   bordeaux    red
#6   4    darkred    red
#7   4        sky   blue
#8   5       gras  green
#9   5     bottle  green
#10  5 lightgreen  green
#11  6    darkred    red
#12  6     marine   blue
#13  6 lightgreen  green

where

cumsum(lengths(new_list))
# red  blue green 
#   4     7    10 

Another alternative (suggested by @Jaap) is we create a dataframe from the new_list using stack with values as individual colors and ind as corresponding group. Then we just match, subgroup with values and get the respective group (ind).

ref <- stack(new_list)
data$colour <- ref$ind[match(data$subgroup, ref$values)]
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
0
id = c(1,2,2,3,4,4,4,5,5,5,6,6,6)
subgroup = c("lightred","marine","cyan","rose","bordeaux","darkred","sky","gras","bottle","lightgreen","darkred","marine","lightgreen")
data = data.frame(cbind(id,subgroup))

library(dplyr)
data <- data %>% 
  dplyr::mutate(
    colour = dplyr::case_when(
      grepl("(lightred)|(darkred)|(rose)|(bordeaux)", subgroup, perl = TRUE) ~ "red",
      grepl("(marine)|(cyan)|(sky)", subgroup, perl = TRUE) ~ "blue",
      grepl("(gras)|(bottle)|(lightgreen)", subgroup, perl = TRUE) ~ "green",
      TRUE ~ "else"
    )
  )
data
MHammer
  • 1,274
  • 7
  • 12
0

Using dplyr and plyr:

mapvalues from x to y. Here x represents the unique subgroup and y their corresponding color value to be mapped:

x = c("lightred", "darkred" , "rose" , "bordeaux", "marine", "cyan", "sky", "gras", "bottle" , "lightgreen" )
y = c(rep("red",4), rep("blue", 3), rep("green",3))

data %>% dplyr::mutate(color = plyr::mapvalues(subgroup, x,y))
Mankind_008
  • 2,158
  • 2
  • 9
  • 15
0
data$colour <-  ifelse(data$subgroup %in% red, "red", ifelse(data$subgroup %in% blue, "blue", "green"))

   id   subgroup colour
1   1   lightred    red
2   2     marine   blue
3   2       cyan   blue
4   3       rose    red
5   4   bordeaux    red
6   4    darkred    red
7   4        sky   blue
8   5       gras  green
9   5     bottle  green
10  5 lightgreen  green
11  6    darkred    red
12  6     marine   blue
13  6 lightgreen  green
Lennyy
  • 5,932
  • 2
  • 10
  • 23
0
(a=merge(data,stack(list(red=red,blue=blue,green=green)),by.x="subgroup",by.y="values"))
     subgroup id   ind
1    bordeaux  4   red
2      bottle  5 green
3        cyan  2  blue
4     darkred  4   red
5     darkred  6   red
6        gras  5 green
7  lightgreen  5 green
8  lightgreen  6 green
9    lightred  1   red
10     marine  2  blue
11     marine  6  blue
12       rose  3   red
13        sky  4  blue

a[order(a$id),]

    subgroup id   ind
9    lightred  1   red
3        cyan  2  blue
10     marine  2  blue
12       rose  3   red
1    bordeaux  4   red
4     darkred  4   red
13        sky  4  blue
2      bottle  5 green
6        gras  5 green
7  lightgreen  5 green
5     darkred  6   red
8  lightgreen  6 green
11     marine  6  blue

You can do:

colors=unlist(list(red=red,blue=blue,green=green))
names(colors)=sub("\\d+","",names(colors))
data$color=names(colors[match(subgroup,colors)])
data
  id   subgroup color
1   1   lightred   red
2   2     marine  blue
3   2       cyan  blue
4   3       rose   red
5   4   bordeaux   red
6   4    darkred   red
7   4        sky  blue
8   5       gras green
9   5     bottle green
10  5 lightgreen green
11  6    darkred   red
12  6     marine  blue
13  6 lightgreen green
Onyambu
  • 67,392
  • 3
  • 24
  • 53