0

I have a data frame (I took an exemple from antoher post but it works for question)

library(data.table)
data = data.table(Category=c("First","First","First","Second","Third", "Third", "Second"), 
                  Frequency=c(10,15,5,2,14,20,3))

And I'd like to sum by First and group all the other under the name of "Others" like this

data2 <- data.table(category = c("First", "Others"), Frequency = c(30,39))

How can I do this ? thank you

EDIT

I have edit my df and it looks like

data = data.table(Category=c("First","First","First","Second","Third", "Third", "Second"), 
                  Frequency=c(10,15,5,2,14,20,3),
                  Bloc=c("B1","B2","B1","B1","B1","B2","B1") 
)

How can I do in order to get

data2 <- data.table(category = c("First","First","Others" "Others"), Frequency = c(15,15,19,20), Bloc = c(B1,B2,B1,B2)

Thanks

Alexis Begni
  • 106
  • 9

3 Answers3

1

You could replace all the categories which are not 'First' to 'Other' and the sum by group.

library(data.table)
data[, Category := replace(Category, Category != 'First', 'Other')]
data[, .(Frequency = sum(Frequency)), .(Category, Bloc)]

#   Category Bloc Frequency
#1:    First   B1        15
#2:    First   B2        15
#3:    Other   B1        19
#4:    Other   B2        20
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • When I try, I get this error message ; " Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=")." – Alexis Begni Aug 18 '20 at 13:23
  • Are you using it on the data that you shared? Your data needs to be `data.table`. According to the error message it doesn't look like that it's a `data.table`. – Ronak Shah Aug 18 '20 at 13:28
0

Another option without changing the original dataset

 library(data.table)
 group <- "First"


 ingroup <- sum(data$Frequency[data$Category == group])
 not_ingroup <- sum(data$Frequency[! data$Category == group])

 data.table(Category = c(group, "Others"), Frequency=c(ingroup, not_ingroup) )
ichisa
  • 11
  • 3
0
library(data.table)
data = data.table(Category=c("First","First","First","Second","Third", "Third", "Second"), 
                  Frequency=c(10,15,5,2,14,20,3))


data <- data %>% 
  mutate(Category = case_when(
    Category == 'First' ~ "First", 
    TRUE ~ 'Other'
  )) %>% 
  group_by(Category) %>% 
  mutate(Frequency = sum(Frequency)) %>% 
  group_by(Category, Frequency) %>% 
  summarise(Category = first(Category))

image

Susan Switzer
  • 1,531
  • 8
  • 34