Okay, so this is a bit of a loaded question and am not really sure how to start... So, from the beginning, I have let's say 3 .txt files (but there are actually many more) in my directory. The layout of the text files are the same but the numbers are different. I was able to manipulate the files one at a time using my code but this becomes unreasonable because I would need to manually append the rows for each consecutive .txt file into one data frame. How can I apply my current code to work with one text file at a time, make a data frame, move on to the next text file, and then append it to the rows of the original data frame?
For example...
"sample1.txt"
"sample2.txt"
"sample3.txt"
sample1 lines are read and made into a data frame.
ID picture color
1 1 red
1 2 red
1 3 blue
Now that task is done and I have a data frame I like. Now, What kind of loop or function do I need to write to do the same code on the next text file in line and append it to the first to be something like this?...
ID picture color
1 1 red
1 2 red
1 3 blue
2 1 red
2 3 blue
2 4 green
And so on and so forth for the rest of the files?
Here is my code below. It's lengthy and may not make all complete sense...
#set up working directory, files, and save path and name
list.of.packages = c("dplyr", "berryFunctions", "stringr","gsubfn")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
library ("berryFunctions")
library("stringr")
library("dplyr")
library("gsubfn")
wd <- "C:/Users/PC/Desktop/New folder/GoD/" #change this to where the raw text files are located
setwd(wd)
Export_File_Name <- "GameOfDice_CATCH-018-1.txt" #name of raw file
csvname <- "output.csv" #name of save output file
files <- list.files(pattern = "*.txt") #name of files
#############################################################
#change files to utf8 and the change directory
if (file.exists("./utf8dir")) {unlink("./utf8dir", recursive = TRUE)}
convert_file_to_utf8 <- function(in_file, out_file, encoding = "utf-16") {
in_file_conn <- file(in_file, encoding = encoding)
txt <- readLines(in_file_conn)
close(in_file_conn)
# Create out directory
if (!dir.exists(dirname(out_file))) dir.create(dirname(out_file))
# Write file with new encoding
out_file_conn <- file(out_file, encoding = "utf-8")
writeLines(txt, out_file_conn)
close(out_file_conn)
}
create_utf8_dir <- function(in_dir = "./utf16dir/", out_dir = "./utf8dir/") {
files <- dir(in_dir, full.names = TRUE)
for (in_file in files) {
out_file <- sub(in_dir, out_dir, in_file, fixed = TRUE)
convert_file_to_utf8(in_file, out_file)
}
}
create_utf8_dir(wd)
wd <- "C:/Users/PC/Desktop/New folder/GoD/utf8dir/"
setwd(wd)
savepath <- "C:/Users/PC/Desktop/New folder/GoD/utf8dir/save/" #set to same as "wd" but with "save" added. we'll make that folder later
if (file.exists("./save")) {unlink("./save", recursive = TRUE)}
dir.create(paste(wd,"save",sep="/"))
#############################################################
# file_func <- function(wd)({
files <- list.files(pattern = "*.txt")
df_delim <- read.delim(files)
df_delim <- as.data.frame(df_delim)
colnames(df_delim) <- c("Header_Start")
df_delim$Header_Start <- as.character(df_delim$Header_Start)
df_delim <- subset(df_delim,Header_Start != "")
dat <- df_delim
dat <- insertRows(dat, 1 , new = NA)
dat[1,] <- colnames(dat)
colnames(dat) <- c("1")
df <- dat
df$`1` <- gsub("X....","",df$`1`)
df$`1` <- gsub("[....^]","",df$`1`)
df$`1` <- gsub("[***]","",df$`1`)
df$'2' <- NA
df[c('1', '2')] <- str_split_fixed(df$'1', ':', 2)
df$`1` <- gsub("X....","",df$`1`)
df$`2` <- gsub("X....","",df$`2`)
df$`1` <- gsub("[....^]","",df$`1`)
df$`2` <- gsub("[....^]","",df$`2`)
df$`1` <- gsub("[***]","",df$`1`)
df$`2` <- gsub("[***]","",df$`2`)
df$`1` <- gsub("\\s","",df$`1`)
df$`2` <- gsub("\\s","",df$`2`)
df$`1` <- gsub("\\.","_",df$`1`)
df$`1` <- gsub("\\s","_",df$`1`)
df$`1` <- gsub("-","_",df$`1`)
df$`2` <- gsub("\\{","",df$`2`)
df$`2` <- gsub("\\}","",df$`2`)
rownames(df) <- NULL
firstrows <- as.data.frame(df[1:24,1])
secondrows <- as.data.frame(df[1:24,2])
bound_rows <- as.data.frame(cbind(firstrows,secondrows))
colnames(bound_rows) <- c("1", "2")
df <- df[-c(1:24),]
df <- slice(df, 1:(n() - 22))
c=1
for (row in 1:nrow(df)){
if (df[row,'1']=='LogFrameStart' &
df[row+1,'1'] == 'Procedure'){
sample_data = df[row:(row+30),]
if (c==1){
newdata = sample_data
c=c+1
} else { newdata = cbind(newdata,sample_data[,2])}
}
}
data1 <- newdata
n_col <- seq(ncol(data1[,2:ncol(data1)]))
labels <- data1[,1]
data1 <- data1[,-1]
colnames(data1) <- n_col
data1 <- cbind(labels,data1)
data1 <- data1[-1,]
data1 <- slice(data1, 1:(n() - 2))
data1 <- t(data1)
colnames(data1) <- data1[1,]
rownames(data1) <- NULL
data1 <- data1[-1,]
rownames(data1) <- NULL
rownames(data1)
data1 <- as.data.frame(data1)
data1$Trial <- rownames(data1)
data1 <- data1 %>%
select(Trial, everything())
data1$Subject <- NA
data1 <- data1 %>%
select(Subject, everything())
fill <- as.character(strapplyc(files, "-(.*)-", simplify = TRUE))
data1$Subject <- fill