I have this in my dataframe after reading and rearranging multiple csv files. Basically I want an if else ladder to refer to the ID column and if it matches a number from the list of concatenates then place a word in a new "group" column
# of int. int. not.int. ID
1 50 218.41 372.16 1
3 33 134.94 158.17 3
I then made these concatenates to refer to.
veh = as.character(c('1', '5'))
thc1 = as.character(c('2', '6'))
thc2 = as.character(c('3', '7'))
thc3 = as.character(c('4', '8'))
Then I made an if else ladder to list the corresponding values.
social.dat$group = if (social.dat$ID == veh) {
social.dat$group == "veh"
} else if (social.dat$group == thc1) {
social.dat$group == "thc1"
} else if (social.dat$group == thc2) {
social.dat$group == "thc2"
} else {
social.dat$group == "thc3"
}
However, I then get this warning message.
Warning message:
In if (social.dat$ID == veh) { :
the condition has length > 1 and only the first element will be used
I have looked up this warning message in multiple different variations and have not found anything that really helped. Any help for this would be much appreciated or and alternate options would be good as well. I apologize in advance if there was a solution on stack already if I missed it.
EDIT: I tried using
social.dat$group = ifelse(social.dat$ID == veh, "veh", "thc")
social.dat$group = ifelse(social.dat$ID == thc, "thc", "veh")
but it changed the output of the dataframe after each line.
Here is the full code i am using to rearrange the csv files and get the dataframe that I first mentioned above.
#calls packages
library(tidyr)
library( plyr )
library(reshape2)
#make sure to change your working directory to where the files are kept
setwd("C:/Users/callej03/Desktop/test")
wd = "C:/Users/callej03/Desktop/test"
files = list.files(path=wd, pattern="*.csv", full.names=TRUE,
recursive=FALSE)
################################################################
#this function creates a list of the number of interactions for each file in
the folder
lap.list = lapply(files, function(x) {
dat = read.csv(x, header= TRUE)
dat = dat[-c(1),]
dat = as.data.frame(dat)
dat = separate(data = dat, col = dat, into = c("lap", "duration"), sep = "\\
")
dat$count = 1:nrow(dat)
y = dat$count
i= y%%2==0
dat$interacting = i
int = dat[which(dat$interacting == TRUE),]
interactions = sum(int$interacting)
})
#########################################################################
#this changes the row name to the name of the file - i.e. the rat ID
lap.list = as.data.frame(lap.list)
lap.list = t(lap.list)
colnames(lap.list) = c("# of int.")
row.names(lap.list) = sub(wd, "", files)
row.names(lap.list) = gsub("([0-9]+).*$", "\\1", rownames(lap.list))
row.names(lap.list) = gsub('/', "", row.names(lap.list), fixed = TRUE)
###########################################################################
#this applies almost the same function as the one listed above except I call
it a different vector name so it can be manipulated
int.duration = lapply(files, function(x) {
dat2 = read.csv(x, header= TRUE)
dat2 = dat2[-c(1),]
dat2 = as.data.frame(dat2)
dat2 = separate(data = dat2, col = dat2, into = c("lap", "duration"), sep =
"\\ ")
dat2$count = 1:nrow(dat2)
y = dat2$count
i= y%%2==0
dat2$interacting = i
int = dat2[which(dat2$interacting == TRUE),]
})
noint.duration = lapply(files, function(x) {
dat2 = read.csv(x, header= TRUE)
dat2 = dat2[-c(1),]
dat2 = as.data.frame(dat2)
dat2 = separate(data = dat2, col = dat2, into = c("lap", "duration"), sep =
"\\ ")
dat2$count = 1:nrow(dat2)
y = dat2$count
i= y%%2==0
dat2$interacting = i
noint = dat2[which(dat2$interacting == FALSE),]
})
###################################################################
#this splits the output time of minutes, seconds, and milliseconds.
#then it combines them into a total seconds.milliseconds readout.
#after that, it takes the sum of the times for each file and combines them
with the total interactions dataframe.
int.duration = melt(int.duration)
int.duration = as.data.frame(int.duration)
int.left = as.data.frame(substr(int.duration$duration, 1, 2))
colnames(int.left) = "min"
int.mid = as.data.frame(substr(int.duration$duration, 4, 4 + 2 - 1))
colnames(int.mid) = "sec"
int.right = as.data.frame(substr(int.duration$duration,
nchar(int.duration$duration) - (2-1), nchar(int.duration$duration)))
colnames(int.right) = "ms"
int.time = cbind(int.left, int.mid, int.right)
int.time$min = as.numeric(as.character(int.time$min))
int.time$sec = as.numeric(as.character(int.time$sec))
int.time$ms = as.numeric(as.character(int.time$ms))
int.time$ms = int.time$ms/100
int.time$min = ifelse(int.time$min > 0, int.time$min*60,0)
int.time$sum = rowSums(int.time)
int.file = as.data.frame(int.duration$L1)
colnames(int.file) = "file"
int.time = cbind(int.time, int.file)
int.tot = as.data.frame(tapply(int.time$sum, int.time$file, sum))
colnames(int.tot) = "int."
social.dat = cbind(lap.list, int.tot)
noint.duration = melt(noint.duration)
noint.duration = as.data.frame(noint.duration)
noint.left = as.data.frame(substr(noint.duration$duration, 1, 2))
colnames(noint.left) = "min"
noint.mid = as.data.frame(substr(noint.duration$duration, 4, 4 + 2 - 1))
colnames(noint.mid) = "sec"
noint.right = as.data.frame(substr(noint.duration$duration,
nchar(noint.duration$duration) - (2-1), nchar(noint.duration$duration)))
colnames(noint.right) = "ms"
noint.time = cbind(noint.left, noint.mid, noint.right)
noint.time$min = as.numeric(as.character(noint.time$min))
noint.time$sec = as.numeric(as.character(noint.time$sec))
noint.time$ms = as.numeric(as.character(noint.time$ms))
noint.time$ms = noint.time$ms/100
noint.time$min = ifelse(noint.time$min > 0, noint.time$min*60,0)
noint.time$sum = rowSums(noint.time)
noint.file = as.data.frame(noint.duration$L1)
colnames(noint.file) = "file"
noint.time = cbind(noint.time, noint.file)
noint.tot = as.data.frame(tapply(noint.time$sum, noint.time$file, sum))
colnames(noint.tot) = "not.int."
social.dat = cbind(social.dat, noint.tot)
social.dat$ID = rownames(social.dat)
Here is and axample of a csv file I am working with. The words are all in the same column and separated by spaces.
Total time 10:00.61
Lap times
01 00:07.46
02 00:05.64
03 00:01.07
04 00:01.04
05 00:04.71
06 00:06.43
07 00:12.52
08 00:07.34
09 00:05.46
10 00:05.81
11 00:05.52
12 00:06.51
13 00:10.75
14 00:00.83
15 00:03.64
16 00:02.75
17 00:01.20
18 00:06.17
19 00:04.40
20 00:00.75
21 00:00.84
22 00:01.29
23 00:02.31
24 00:03.04
25 00:02.85
26 00:05.86
27 00:05.76
28 00:05.06
29 00:00.96
30 00:06.91