I am using the R programming language. I made an earlier post (R: Using "microbenchmark" and ggplot2 to plot runtimes) where I am learning how to use loops and functions to iterate procedures (7 procedures) in R for sample sizes. Once this is done, I want to produce a plot.
Based on the previous answer, I tried to write a few of these loops in R:
library(dplyr)
library(ggplot2)
library(Rtsne)
library(cluster)
library(dbscan)
library(plotly)
library(microbenchmark)
#simulate data
var_1 <- rnorm(1000,1,4)
var_2<-rnorm(1000,10,5)
var_3 <- sample( LETTERS[1:4], 1000, replace=TRUE, prob=c(0.1, 0.2, 0.65, 0.05) )
var_4 <- sample( LETTERS[1:2], 1000, replace=TRUE, prob=c(0.4, 0.6) )
#put them into a data frame called "f"
f <- data.frame(var_1, var_2, var_3, var_4,ID=1:1000)
#declare var_3 and response_variable as factors
f$var_3 = as.factor(f$var_3)
f$var_4 = as.factor(f$var_4)
# configure run sizes
sizes <- c(100,200,300,400,500,600,700,800,900,1000)
# Procedure 1: :
proc1 <- function(size){
assign(paste0("gower_dist_",size), daisy(f[1:size,-5],
metric = "gower"),envir = .GlobalEnv)
assign(paste0("gower_mat_",size),as.matrix(get(paste0("gower_dist_",size),envir = .GlobalEnv)),
envir = .GlobalEnv)
}
proc1List <- lapply(sizes,function(x){
b <- microbenchmark(proc1(x))
b$obs <- x
b
})
proc1summary <- do.call(rbind,(proc1List))
#procedure2
proc2 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=3),envir = .GlobalEnv)}
proc2List <- lapply(sizes,function(x){
b <- microbenchmark(proc2(x))
b$obs <- x
b
})
proc2summary <- do.call(rbind,(proc2List))
#procedure3
proc3 <- function(size){
assign(paste0("lof_",size), lof(gower_dist, k=5),envir = .GlobalEnv)}
proc3List <- lapply(sizes,function(x){
b <- microbenchmark(proc3(x))
b$obs <- x
b
})
proc3summary <- do.call(rbind,(proc3List))
#procedure4
proc4 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
proc4summary <- do.call(rbind,(proc4List))
#procedure5
proc5 <- function(size){
assign(paste0("tsne_obj_",size),Rtsne(gower_dist, perplexity = 10, is_distance = TRUE),envir = .GlobalEnv)
assign(paste0("tsne_data_",size),tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID) ,envir = .GlobalEnv)}
proc5List <- lapply(sizes,function(x){
b <- microbenchmark(proc5(x))
b$obs <- x
b
})
proc5summary <- do.call(rbind,(proc5List))
#procedure6
proc6 <- function(size){
assign(paste0("plot_",size),ggplot(aes(x = X, y = Y), data = tsne_data) + geom_point(aes()),envir = .GlobalEnv)}
proc6List <- lapply(sizes,function(x){
b <- microbenchmark(proc6(x))
b$obs <- x
b
})
proc6summary <- do.call(rbind,(proc6List))
#procedure 7
proc7 <- function(size) {
assign(paste0 ("tsne_obj_", size), Rtsne(gower_dist, is_distance = TRUE), envir = .GlobalEnv)
assign(paste0 ("tsne_data_", size), tsne_data <- tsne_obj$Y %>%
data.frame() %>%
setNames(c("X", "Y")) %>%
mutate(
name = f$ID,
lof=lof,
var1=f$var_1,
var2=f$var_2,
var3=f$var_3
), envir = .GlobalEnv)
assign(paste0 ("p1_", size), ggplot(aes(x = X, y = Y, size=lof, key=name, var1=var1,
var2=var2, var3=var3), data = tsne_data) +
geom_point(shape=1, col="red") + theme_minimal(), envir = .GlobalEnv)
assign(paste0 ("plotly_", size),
ggplotly(p1, tooltip = c("lof", "name", "var1", "var2", "var3")
), envir = .GlobalEnv)
}
proc7List <- lapply(sizes,function(x){
b <- microbenchmark(proc7(x))
b$obs <- x
b
})
proc7summary <- do.call(rbind,(proc7List))
do.call(rbind,list(proc1summary,proc2summary,proc3summary, proc4summary, proc5summary, proc6summary, proc7summary)) %>%
group_by(expr,obs) %>%
summarise(.,time_ms = mean(time) * .000001) -> proc_time
ggplot(proc_time,aes(obs,time_ms,group = expr)) +
geom_line(aes(group = expr),color = "grey80") +
geom_point(aes(color = expr))
However, for some of these procedures, when I call them though a list, I keep getting an error:
proc4List <- lapply(sizes,function(x){
b <- microbenchmark(proc4(x))
b$obs <- x
b
})
Error: Problem with `mutate()` input `name`.
x Input `name` can't be recycled to size 100.
i Input `name` is `f$ID`.
i Input `name` must be size 100 or 1, not 1000.
I tried reading other stackoverflow posts (Input `typ` can't be recycled to size in R), but I could not understand why this "recycling error" keeps showing up. Is it because "size = 100" is too small? Is it because some of the variables have been named improperly?
Could someone please tell me what I am doing wrong?
Thanks