0

I have used the following code to first bootstrap data for each participant (N = 500) and then sample the number of trials I needed for each iteration (num). Yet, I am wondering if this is a bad approach since I could just bootstrap the number of trials I need for each iteration without then needing to sample. If that approach is a better one, how should I go about changing this code to do it? This is a follow up question to: How to get mean for all participants after selecting only a certain number of trials I tried changing the function get_mean but I did not manage to get it to work. Thank you!

#functions ---------------



   # create function to sample data by certain size and summarize by mean
get_mean <- function(x, n) { 
   dplyr::group_by(x, Participant) %>% # group by participant
      dplyr::sample_n(n) %>% # randomly sample observations
      dplyr::summarise(mean = mean(RT), # get mean of RT
                       n = n(), # get sample size
                       .groups = "keep") %>% 
      dplyr::ungroup() %>% as.data.frame()
      # add a pipe to as.data.frame if you don't want a tibble object
    }
    
    
    agg_f <- function(sample_size,data_num)
      get_mean(
        if(data_num == 2) Data_improb2 else Data_improb3,
        n = sample_size
      )
    
    
    num_imp <- c(75, 100, 125, 150, 175, 200, 225, 250, 275, 300, 325, 350, 375, 400)
    
    corr <- matrix(nrow=1000,ncol=length(num_imp))
    corr_KLD <- matrix(nrow=1000,ncol=length(num_imp))
    colnames(corr) <- num_imp
    n_subj <- length(Desc$Participant)
    
    for(j in 1:1000){
    
      
      for (i in 1:length(num_imp)){
        num <- num_imp[i]
        
        x = 1
        while(x < 46){
        Participant <- Desc[[i, 1]]
        improb2_trials[[x]]<- as.data.frame(sample(Data_improb2[[x]]$RT, 500, replace = TRUE))
        improb3_trials[[x]]<- as.data.frame(sample(Data_improb3[[x]]$RT, 500, replace = TRUE))
        improb2_trials[[x]]$Participant<- Participant
        improb3_trials[[x]]$Participant<- Participant
    
         x = x +1
        }
        agg_improb2 <- lapply(num_imp,agg_f,data_num = 2)
        agg_improb3 <- lapply(num_imp,agg_f,data_num = 3)
        
        agg_prob2 <- aggregate(RT ~ Participant, Data_prob2, mean)
        agg_prob3 <- aggregate(RT ~ Participant, Data_prob3, mean)
        
        Diff2 <- agg_improb2[[as.character(num)]][["mean"]]-agg_prob2$RT
        Diff3 <- agg_improb3[[as.character(num)]][["mean"]]-agg_prob3$RT
    
        corr[j,i] <- cor.test(Diff2, Diff3)$estimate
    
      }
    }
CatM
  • 284
  • 2
  • 12
  • 1
    If you are asking about how to properly model your data, you should ask statistical questions at [stats.se] I'm not sure it's clear to me what your specific programming question is here that's appropriate for Stack Overflow. – MrFlick Jan 25 '21 at 17:46
  • My question is how to change this code to boostrap improb2_trials and improb3_trials instead of boostrapping a large number of trials and then sampling from there. How can I do just the boostrapping of the correct number of trials? – CatM Jan 25 '21 at 17:59

0 Answers0