If I've understood correctly, there are many possible approaches to this problem (e.g. some options for removing 'all NA' columns here: Remove columns from dataframe where ALL values are NA).
One potential solution using functions from the dplyr package is:
library(dplyr)
df_clean <- df %>%
select(where(~any(!is.na(.)))) %>%
slice_head(n = ncol(.) - 1)
And for the loop, you could make a list of dataframes and cycle through them, e.g.
Example data:
df1 <- data.frame(
Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
"Receiver5", "Receiver6","Receiver7", "Receiver8",
"Receiver9", "Receiver10"),
Sender1 = c(0,1,0,0,0,1,0,0,0,0),
Sender2 = c(0,0,0,1,0,0,0,0,0,0),
Sender3 = c(0,0,0,0,0,0,0,0,0,0),
Sender4 = c(0,1,0,0,0,0,1,0,0,0),
Sender5 = c(1,0,1,0,0,1,0,0,0,0),
Sender6 = c(0,1,0,0,0,0,1,0,0,0),
Sender7 = c(0,0,1,0,1,0,0,0,0,0),
Sender8 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
Sender9 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
Sender10 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
)
# make df2 different to df1 (add more NAs)
df2 <- data.frame(
Receiver = c("Receiver1", "Receiver2","Receiver3", "Receiver4",
"Receiver5", "Receiver6","Receiver7", "Receiver8",
"Receiver9", "Receiver10"),
Sender1 = c(0,1,0,0,0,1,0,0,0,NA),
Sender2 = c(0,0,0,1,0,0,0,0,0,NA),
Sender3 = c(0,0,0,0,0,0,0,0,0,NA),
Sender4 = c(0,1,0,0,0,0,1,0,0,NA),
Sender5 = c(1,0,1,0,0,1,0,0,0,NA),
Sender6 = c(0,1,0,0,0,0,1,0,0,NA),
Sender7 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
Sender8 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
Sender9 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA),
Sender10 = c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
)
Example for-loop code:
library(dplyr, warn = FALSE)
# Collate all dataframes you want to 'clean' into a list
list_of_dfs <- mget(grep("^df.*", names(which(unlist(eapply(.GlobalEnv,is.data.frame)))),
value = TRUE))
# create a list to store the results
clean_dfs <- list()
# run the for-loop
for (i in seq_along(list_of_dfs)) {
clean_dfs[[i]] <- list_of_dfs[[i]] %>%
select(where(~any(!is.na(.)))) %>%
slice_head(n = ncol(.) - 1)
}
# Name each cleaned dataframe with the prefix "clean_"
names(clean_dfs) <- paste0("clean_", names(list_of_dfs))
# Export them to your work space
list2env(clean_dfs, envir = .GlobalEnv)
#> <environment: R_GlobalEnv>
# View them
print(clean_df1)
#> Receiver Sender1 Sender2 Sender3 Sender4 Sender5 Sender6 Sender7
#> 1 Receiver1 0 0 0 0 1 0 0
#> 2 Receiver2 1 0 0 1 0 1 0
#> 3 Receiver3 0 0 0 0 1 0 1
#> 4 Receiver4 0 1 0 0 0 0 0
#> 5 Receiver5 0 0 0 0 0 0 1
#> 6 Receiver6 1 0 0 0 1 0 0
#> 7 Receiver7 0 0 0 1 0 1 0
print(clean_df2)
#> Receiver Sender1 Sender2 Sender3 Sender4 Sender5 Sender6
#> 1 Receiver1 0 0 0 0 1 0
#> 2 Receiver2 1 0 0 1 0 1
#> 3 Receiver3 0 0 0 0 1 0
#> 4 Receiver4 0 1 0 0 0 0
#> 5 Receiver5 0 0 0 0 0 0
#> 6 Receiver6 1 0 0 0 1 0
Created on 2023-07-28 with reprex v2.0.2
Edit
I just re-read your question and saw you wanted to do this 200 times; if you have 200 individual xlsx files (1 sheet per file) you could read in each sheet, 'clean' it, then write it as a csv file:
library(tidyverse)
library(readxl)
library(fs)
path <- dir_ls(path = "~/Desktop", glob = "*.xlsx")
list_of_dfs <- map(path, read_excel)
for (i in seq_along(list_of_dfs)) {
list_of_dfs[[i]] %>%
select(where(~any(!. == "NA"))) %>%
slice_head(n = ncol(.) - 1) %>%
write_csv(., paste0(names(list_of_dfs)[[i]], ".csv"))
}
dir_ls(path = "~/Desktop/", glob = "*.xlsx.csv")
#> /Users/jared/Desktop/df1.xlsx.csv /Users/jared/Desktop/df2.xlsx.csv
Created on 2023-07-28 with reprex v2.0.2
Not sure if this helps with your actual use-case, but wanted to include it anyway, just in case it's useful to you. Good luck!