0

Now, I have the same input data frame:

AAA <- c('APIS', 'APIS', 'PIPIS', 'AGAROS', 'AGAROS', 'AGAROS', 'NOTHING')
BBB <- c('a', 'a', 'a', 'b', 'b', 'c', NA)
CCC <- c(1, 2, NA, 4, 5, 6, 7)
DDD <- c("Mat", "ASG", "MNT", "NBEH", "DJ", "EU", "DHR")
test.data <- data.frame(AAA, BBB, CCC, DDD)
test.data

I want to reshape it so that each unique "AAA" variable become single row. Multiple entry of the "AAA" variable automatically become new column with suffix or prefix 1,2,3..... Sort of like this:

AAA <- c('APIS', 'PIPIS', 'AGAROS', 'NOTHING')
BBB_1 <- c('a', 'a', 'b', NA)
CCC_1 <- c(1, NA, 4, 7)
DDD_1 <- c("Mat", "MNT", "NBEH", "DHR")

BBB_2 <- c('a', NA, 'b', NA)
CCC_2 <- c(2, NA, 5, NA)
DDD_2 <- c("ASG", NA, "DJ", NA)

BBB_3 <- c(NA, NA, 'c', NA)
CCC_3 <- c(NA, NA, 6, NA)
DDD_3 <- c(NA, NA, "EU", NA)

output <- data.frame(AAA, BBB_1, CCC_1, DDD_1, BBB_2, CCC_2, DDD_2, 
                        BBB_3, CCC_3, DDD_3)
output

I've looked at melt and cast and a few other things, but none seem to do the job.

SUMIT
  • 563
  • 4
  • 12

2 Answers2

5

You can create a unique row number for each unique value of AAA and then cast to wide format.

library(dplyr)
library(tidyr)

test.data %>%
  mutate(row = row_number(), .by = AAA) %>%
  pivot_wider(names_from = row, values_from = BBB:DDD, names_vary = "slowest")

# A tibble: 4 × 10
#  AAA     BBB_1 CCC_1 DDD_1 BBB_2 CCC_2 DDD_2 BBB_3 CCC_3 DDD_3
#  <chr>   <chr> <dbl> <chr> <chr> <dbl> <chr> <chr> <dbl> <chr>
#1 APIS    a         1 Mat   a         2 ASG   NA       NA NA   
#2 PIPIS   a        NA MNT   NA       NA NA    NA       NA NA   
#3 AGAROS  b         4 NBEH  b         5 DJ    c         6 EU   
#4 NOTHING NA        7 DHR   NA       NA NA    NA       NA NA   

Using data.table -

library(data.table)
setDT(test.data)

dcast(test.data[, row := seq_len(.N), AAA], 
      AAA ~ row, value.var = c("BBB", "CCC", "DDD"))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
1

Base R Solution:

# Input Data: test.data => data.frame
AAA <- c('APIS', 'APIS', 'PIPIS', 'AGAROS', 'AGAROS', 'AGAROS', 'NOTHING')
BBB <- c('a', 'a', 'a', 'b', 'b', 'c', NA)
CCC <- c(1, 2, NA, 4, 5, 6, 7)
DDD <- c("Mat", "ASG", "MNT", "NBEH", "DJ", "EU", "DHR")
test.data <- data.frame(AAA, BBB, CCC, DDD)
test.data

# Function to column bind all columns in a data.frame, grouped by given
# columns: cbind_by => function
cbind_by <- function(df, by){
  # Test that df is a data.frame: 
  # stop execution if not
  stopifnot(is.data.frame(df))
  # Test that the by vector contains values that are valid column names of the
  # df data.frame: 
  stopifnot(all(by %in% colnames(df)))
  # Private function to robustly row-bind two data.frames 
  # together regardless of schema differences: .robust_rbind => function
  .robust_rbind <- function(lhs_df, rhs_df){
    add_names_lhs <- setdiff(names(rhs_df), names(lhs_df))
    add_names_rhs <- setdiff(names(lhs_df), names(rhs_df))
    lhs_df[,add_names_lhs] <- NA
    rhs_df[,add_names_rhs] <- NA
    res_df <- rbind(lhs_df, rhs_df)
    return(res_df)
  }
  # Private function to robustly row-bind all data.frames in a list together
  # .robust_rbind_df_list => function
  .robust_rbind_df_list <- function(df_list){
    res_df <- Reduce(
      function(x, y){
        .robust_rbind(x, y)
      },
      df_list
    )
    return(res_df)
  }
  # Split-apply combine (column bind) by the by vector: 
  res_df <- .robust_rbind_df_list( 
    lapply(
      split(
        test.data, 
        do.call(
          paste, 
          as.data.frame(
            test.data[,by], 
            stringsAsFactors = FALSE
          )
        )
      ),
      function(x){
        # Cbind rows to cols: 
        ir <- suppressWarnings(
          cbind(
            tmp = x[,by],
            do.call(
              cbind, 
              split(
                x[,setdiff(names(x), by)], 
                seq_len(nrow(x))
              )
            )
          )
        )[1,]
        # Derive new col names: 
        nms <- gsub(
          "(\\d+)\\.(.*)", 
          "\\2_\\1", 
          names(ir[-seq_len(length(by))])
        )
        # Set the col names:
        setNames(
          ir,
          c(by, nms)
        )
      }
    )
  )
  return(res_df)
}

# Apply the function: res_df => data.frame
res_df <- cbind_by(
  df = test.data,
  by = "AAA"
)

# Display the result in the console: data.frame => stdout(console)
res_df
hello_friend
  • 5,682
  • 1
  • 11
  • 15