Here is one option with lapply
from base R
. After looping over the columns of the dataset, remove the NA
elements with is.na
(returns a logical vector for subsetting). Then, pad the list
output with NA
at the end based on the max
imum length
of list elements and cbind
lst1 <- lapply(df1, function(x) x[!is.na(x)])
do.call(cbind, lapply(lst1, `length<-`, max(lengths(lst1))))
# c1 c2 c3 c4
#[1,] "T1" "a" "B" "b"
#[2,] "T2" NA "C" "c"
It can also done with cbind.fill
from rowr
and map
library(purrr)
library(rowr)
map(df1, ~ .x[!is.na(.x)]) %>%
reduce(cbind.fill, fill = NA) %>%
set_names(names(df1))
# c1 c2 c3 c4
#1 T1 a B b
#2 T2 <NA> C c
Or by reshaping into 'long' format while dropping the rows and then reshape it back to 'wide' format
library(tidyr)
df1 %>%
pivot_longer(everything(), values_drop_na = TRUE) %>%
group_by(name) %>%
mutate(rn = row_number()) %>%
pivot_wider(names_from = name, values_from = value) %>%
select(-rn)
# A tibble: 2 x 4
# c1 c2 c3 c4
# <chr> <chr> <chr> <chr>
#1 T1 a B b
#2 T2 <NA> C c
Or with melt/dcast
library(data.table)
dcast(melt(setDT(df1)[, rn := seq_len(.N)], id.var = 'rn',
na.rm = TRUE), rowid(variable) ~ variable, value.var = 'value')
data
df1 <- structure(list(c1 = c("T1", NA, NA, NA, "T2", NA, NA), c2 = c(NA,
"a", NA, NA, NA, NA, NA), c3 = c(NA, NA, "B", NA, NA, "C", NA
), c4 = c(NA, NA, NA, "b", NA, NA, "c")), class = "data.frame",
row.names = c(NA,
-7L))