1

I'm trying to accomplish something like what is illustrated in this this question

However, in my situation, I'll have there might be multiple cases where I have 2 columns that evaluates to True:

year cat1 cat2 cat3 ... catN
2000  0    1    1        0
2001  1    0    0        0
2002  0    1    0        1
....
2018  0    1    0        0

In the DF above year 2000 can have cat2 and cat3 categories. In this case, how do I create a new row, that will have the second category. Something like this:

year category 
2000  cat2
2000  cat3
2001  cat1  
2002  cat2
2002  catN 
....
2018  cat2 
capiono
  • 2,875
  • 10
  • 40
  • 76

4 Answers4

2

One way would be to get row/column indices of all the values which are 1, subset the year values from row indices and column names from column indices to create a new dataframe.

mat <- which(df[-1] == 1, arr.ind = TRUE)
df1 <- data.frame(year = df$year[mat[, 1]], category = names(df)[-1][mat[, 2]])
df1[order(df1$year), ]

#  year category
#2 2000     cat2
#5 2000     cat3
#1 2001     cat1
#3 2002     cat2
#6 2002     catN
#4 2018     cat2

data

df <- structure(list(year = c(2000L, 2001L, 2002L, 2018L), cat1 = c(0L, 
1L, 0L, 0L), cat2 = c(1L, 0L, 1L, 1L), cat3 = c(1L, 0L, 0L, 0L
), catN = c(0L, 0L, 1L, 0L)), class = "data.frame", row.names = c(NA, -4L))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
2

You can also use melt in reshape2

new_df = melt(df, id.vars='year')

new_df[new_df$value==1, c('year','variable')]

Data

df = data.frame(year=c(2000,2001),
                cat1=c(0,1),
                cat2=c(1,0),
                cat3=c(1,0))

Output:

  year variable
2 2001     cat1
3 2000     cat2
5 2000     cat3
akrun
  • 874,273
  • 37
  • 540
  • 662
Quang Hoang
  • 146,074
  • 10
  • 56
  • 74
2

You can use gather from the Tidyverse

library(tidyverse)

data = tribble(
  ~year,~ cat1, ~cat2, ~cat3, ~catN,
  2000,  0,    1,    1,        0,
  2001,  1,    0,    0 ,       0,
  2002,  0,    1,    0,        1
)

data %>% 
  gather(key = "cat", value = "bool", 2:ncol(.)) %>% 
  filter(bool == 1)
akrun
  • 874,273
  • 37
  • 540
  • 662
cyrilb38
  • 924
  • 6
  • 17
0

Here is another variation with gather, by mutateing the columns having 0 to NA, then gather while removing the NA elements with na.rm = TRUE

library(dplyr)
library(tidyr)
data %>%
    mutate_at(-1, na_if, y = 0) %>%
    gather(category, val, -year, na.rm = TRUE) %>% 
    select(-val)
# A tibble: 5 x 2
#   year category
#  <dbl> <chr>   
#1  2001 cat1    
#2  2000 cat2    
#3  2002 cat2    
#4  2000 cat3    
#5  2002 catN    

data

data <- structure(list(year = c(2000, 2001, 2002), cat1 = c(0, 1, 0), 
    cat2 = c(1, 0, 1), cat3 = c(1, 0, 0), catN = c(0, 0, 1)), row.names = c(NA, 
-3L), class = c("tbl_df", "tbl", "data.frame"))
akrun
  • 874,273
  • 37
  • 540
  • 662