I created a possible solution. Because I don't know if the datasets are provided in the form of dataframes or matrices, I provide solutions for both possibilities.
First the matrix solution:
#### Example with Matrix #####
V2 <- c(5,8,6,2,3,9)
V3 <- c(8,8,1,15,48,58)
V4 <- c(7,8,9,4,5,6)
V2.predicted <- c(5.5,6.4,8,9,3,4)
V2.actual <- c(8,8,1,15,48,58)
V3.predicted <- c(4,8,6,55,2,3)
V3.actual <- c(5,8,6,2,3,9)
# cbind as matrix
data2 <- cbind(V2.predicted,V2.actual,V3,V4)
data3 <- cbind(V2,V3.predicted,V3.actual,V4)
str(data2)
str(data3)
fun_calc_error <- function(data,name) {
library(plyr)
str(data) # Debugging
# Tests if name is supplied, if not, it trys to extract the name from the dataframe/matrix
# (doesn't work in lapply and ldply, as it tries to access list through X[[1]])
if(missing(name)==TRUE) {
dataname <- deparse(substitute(data)) # extracts the name of the data object
# http://stackoverflow.com/questions/10520772/in-r-how-to-get-an-objects-name-after-it-is-sent-to-a-function
} else {
dataname <- name
}
cat("dataname: ",dataname,"\n") # Debugging
# extract the number of the matrix
df_num <- as.numeric(gsub("data","",dataname)) # extract number of dataframe
# creates column names
col_pred <- paste0("V",df_num,".predicted")
col_act <- paste0("V",df_num,".actual")
# reduce matrix to the 2 columns predicted and actual
new_matrix <- data[,c(col_pred,col_act)]
# split the matrix by row and apply function
error_rate <- aaply(.data=new_matrix,
.margins=1,
.fun=function(new_matrix) error_rate = (new_matrix[1]-new_matrix[2])/new_matrix[2]
)
# debugging
cat("\n str Error rate: ","\n")
str(error_rate)
return(error_rate)
}
# Test function for one matrix
fun_calc_error(data3)
Then the dataframe solution:
#### Example with dataframes #####
V2 <- c(5,8,6,2,3,9)
V3 <- c(8,8,1,15,48,58)
V4 <- c(7,8,9,4,5,6)
V2.predicted <- c(5.5,6.4,8,9,3,4)
V2.actual <- c(8,8,1,15,48,58)
V3.predicted <- c(4,8,6,55,2,3)
V3.actual <- c(5,8,6,2,3,9)
# cbind as matrix
data2 <- cbind.data.frame(V2.predicted,V2.actual,V3,V4,stringsAsFactors=FALSE)
data3 <- cbind.data.frame(V2,V3.predicted,V3.actual,V4,stringsAsFactors=FALSE)
str(data2)
str(data3)
fun_calc_error_df <- function(data,name) {
library(dplyr)
str(data) # Debugging
# Tests if name is supplied, if not, it trys to extract the name from the dataframe/matrix
# (doesn't work in lapply and ldply, as it tries to access list through X[[1]])
if(missing(name)==TRUE) {
dataname <- deparse(substitute(data)) # extracts the name of the data object
# http://stackoverflow.com/questions/10520772/in-r-how-to-get-an-objects-name-after-it-is-sent-to-a-function
} else {
dataname <- name
}
cat("dataname: ",dataname,"\n") # Debugging
df_num <- as.numeric(gsub("data","",dataname)) # extract number of dataframe
# creates column names
col_pred <- paste0("V",df_num,".predicted")
col_act <- paste0("V",df_num,".actual")
new_df <- select_(data,col_pred,col_act)
colnames(new_df) <- c("predicted","actual")
new_df %>%
mutate(error_rate = (predicted-actual)/actual) %>%
select(error_rate) -> error_rate
# debugging
cat("\n str Error rate: ","\n")
str(error_rate)
return(error_rate)
}
# TEST for one dataframe
fun_calc_error_df(data3,"data3")
When you use those functions on one dataframe/matrix it works fine, even without providing the name of the dataframe/matrix, because with
dataname <- deparse(substitute(data))
I can extract it.
If you want to put a list of dataframes in lapply or ldply, to apply the function to a number of dataframes at once, there will be is a problem. ldply addresses a list element with X[[i]] and is not providing the name of the dataframe.
To workaround this issue I used a loop in the following code. Maybe you find a solution for this issue and I hope the code helps.
##### Possible solution for more than one dataframe ####
# Create named!!! list of dataframes
df.list <- list(data2=data2,data3=data3)
# Create list of names
nameslist <- names(df.list)
# Create empty dataframe
df_error_rate <- as.data.frame(NULL)
# loop over list elements
i<-1
while(i <= length(df.list)){
cat(i,"\n") # Debugging
# put list element in variable as dataframe
data <- as.data.frame(df.list[[i]],stringsAsFactors=FALSE)
# put name of dataframe from list in variable
name <- nameslist[i]
# apply function
error_rate <- fun_calc_error_df(data,name)
# create vector with names of dataframe
dataframe <- rep.int(name,nrow(df.list[[i]]))
# bind names and values to data frame
tmp_err_rate <- cbind.data.frame(dataframe,error_rate,stringsAsFactors=FALSE)
# bind rows to big data frame
df_error_rate <- rbind.data.frame(df_error_rate,tmp_err_rate)
# count loop up
i <- i + 1
}