0

I'm using Maxent for some spatial analysis. I have a long script with many outputs that I collect in lists. It works just fine with a for loop and low resolution climatic predictors in a rasterstack (in my core i5, 6gb notebook) . But I need to use a high resolution set of rasters, and all the problems come from this issue. Even using a 16 core, 32gb virtual machine, the proccessing is veeeery slow, and after 3 days, memory is not enought and the run is closed after about 50 turns in my loop (which has 92 species). I'm trying to improve this script by collecting garbage to clean the memory and using doParallel. After the new script is cleanly running with the low resolution predictors, I'll try it with the high resolution predictors

So, I changed my script to use foreach instead of for, and with %dopar%

But so far, I'm getting this as result:

 [[1]]
 NULL

 [[2]]
 NULL

 [[3]]
 NULL

 [[4]]
 NULL

I saw another question about the same issue, but the very simple solution the guy needed doesn't apply for me.. So, any tips are very welcome

#install.packages("dismo")
library(dismo)
#install.packages("scales")
library(scales)
#install.packages("rgdal")
library(rgdal)
#install.packages("rgeos")
library(rgeos)
#install.packages("rJava")
library(rJava)
#install.packages("foreach")
library(foreach)
#install.packages("doParallel")
library(doParallel)

#Colors to use in the plots
MyRbw2<-c('#f4f4f4','#3288bd','#66c2a5','#e6f598','#fee08b','#f46d43','#9e0142')
colfunc_myrbw2<-colorRampPalette(MyRbw2)

#Create empty lists to recieve outputs
xm_list<-list()
xm_spc_list<-list()
e_spc_list<-list()
px_spc_list<-list()
tr_spc_list<-list()
spc_pol1<-list()
spc_pol5<-list()
tr<-list()


#Create empty data frame to recieve treshold values for each species
tr_df<-data.frame(matrix(NA, nrow=92, ncol=7))
tr_df[,1]<-as.character(tree_list)
names(tr_df)<- c('spp',"kappa","spec_sens","no_omission","prevalence","equal_sens_spec","sensitivity")


# Assigning objects to run Maxent
data_points <- tree_cd_points # this is a list with SpatialPoints for 92 species
data_list <- tree_list # list with the species names
counts_data<- counts_tree_cd # number of points for each species
predictors2<-predictors_low # rasterStack of Bioclim layers (climatic variables), low resolution

#Stablishing extent for Maxent predictions
xmin=-120; xmax=-35; ymin2=-40; ymax=35
limits2 <- c(xmin, xmax, ymin2, ymax)

# Making the cluster for doParallel
cores<-detectCores() # I have 16
cl <- makeCluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl)

#Just to keep track of time
ptime1 <- proc.time()



pdf("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/treesp_maxent_20170823.pdf", 
    paper = "letter", height = 11, width=8,5, pointsize=12,pagecentre = TRUE)
#I have 92 species, but I'll run just the first 4 to test
foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar% {  #Runs only species with 5 or more points to avoid maxent problems

  if (counts_data$n[i]>4) { #If the species has more than 4 occurrence points, run maxent
    tryCatch({ #makes the loop go on despite errors


      #Sets train, test and total points for Maxent
      group <- kfold(x=data_points[[i]], 5)
      pres_train<- data_points[[i]][group != 1, ]
      pres_test <- data_points[[i]][group == 1, ]
      spoints<- data_points[[i]]

      #Sets background points for Maxent
      backg <- randomPoints(predictors2, n=20000, ext=limits2, extf = 1.25)
      colnames(backg) = c('lon', 'lat')
      group <- kfold(backg, 5)
      backg_train <- backg[group != 1, ]
      backg_test <- backg[group == 1, ]



      #The maxent itself (put the xm in the empty list that I created earlier to store all xms)
      xm_spc_list[[i]] <- maxent(x=predictors2, p=spoints, a=backg ,
                   factors='ecoreg',
                   args=c('visible=true',
                          'betamultiplier=1',
                          'randomtestpoints=20',
                          'randomseed=true',
                          'linear=true',
                          'quadratic=true',
                          'product=true',
                          'hinge=true',
                          'threads=4',
                          'responsecurves=true',
                          'jackknife=true',
                          'removeduplicates=false',
                          'extrapolate=true',
                          'pictures=true',
                          'cache=true',
                          'maximumiterations=5000',
                          'askoverwrite=false'),
                   path=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i]), overwrite=TRUE)


      par(mfrow=c(1,1),mar = c(2,2, 2, 2))
      plot(xm_spc_list[[i]], main=paste(data_list[i]))
      response(xm_spc_list[[i]])


      #Evaluating how good is the model and putting the evaluation values in a list
      e_spc_list[[i]] <- evaluate(pres_test, backg_test, xm_spc_list[[i]], predictors2) 



      #Predicting the climatic envelopes and Sending to a list os predictions
      px_spc_list[[i]] <- predict(predictors2, xm_spc_list[[i]], ext=limits2,  progress='text', 
                    filename=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd"), overwrite=TRUE)



      tr_df[i,2:7]<-threshold(e_spc_list[[i]])
      tr[[i]]<-threshold(e_spc_list[[i]], 'spec_sens')


      #Pol 1 will be the regular polygon, default treshold
      spc_pol1[[i]] <- rasterToPolygons(px_spc_list[[i]]>tr[[i]],function(x) x == 1,dissolve=T)
      writeOGR(obj = spc_pol1[[i]], dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm1/",data_list[i]), driver = "ESRI Shapefile",
               layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )




      #Pol 5 will be a 100km^2 circle around the occurrence points
      circ <- circles(spoints, d=5642,lonlat=TRUE)
      circ <- circ@polygons
      crs(circ)<-crs(wrld_cropped)
      circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

      #To write de polygon to a file, the function writeOGR needs an object SPDF, so...
      #Getting Polygon IDs
      circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
      #Making the IDs row names 
      row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
      # Make spatial polygon data frame
      circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

      #Save the polygon, finally
      writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i]), driver = "ESRI Shapefile",
               layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE ) 

      spc_pol5[[i]]<-circ_SPDF

      #Now the plots 
      par(mfrow=c(2,3),mar = c(2,1, 1, 1))

      plot(px_spc_list[[i]], axes=FALSE, legend=TRUE, legend.shrink=1, col=colfunc_myrbw2(20), main=paste((data_list[i]),' - Maxent'))
      plot(wrld_cropped,add=TRUE, border='dark grey',axes=FALSE)
      points(data_points[[i]], pch=21,col="white", bg='hotpink', lwd=0.5, cex=0.7)

      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main='px>tr')  
      plot(spc_pol1[[i]] , main=paste((data_list[i]),' - Range'), add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8),axes=FALSE)
      points(data_points[[i]], pch="°",col="black",  cex=0.7)

      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=paste(data_list[i],"circles"))  
      plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
    }, error=function(e){cat("Warning message:",conditionMessage(e), "\n")})


    #But sometimes, even with >4 occurrence points, Maxent fails... 
    #So I'll make sure that if I have >4 points but maxent didn't work, I get the circles anyway
    f<-paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd", sep="")

    gc() #Just collecting garbage to speed up the process

    if (!file.exists(f)){ # then, if f (maxent output) doesn't exist, create the circles at least

      spoints<- data_points[[i]]

      circ <- circles(spoints, d=5642,lonlat=TRUE)
      circ <- circ@polygons
      crs(circ)<-crs(wrld_cropped)
      circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

      #To write de polygon to a file, the function writeOGR needs an object SPDF, so...
      #Getting Polygon IDs
      circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
      #Making the IDs row names 
      row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
      # Make spatial polygon data frame
      circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

      #Save the polygon, finally
      #dir.create(paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""))
      writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
               layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )  

      spc_pol5[[i]]<-circ_SPDF


      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])  
      plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
      #plot(spoints,pch=21,col="white", bg='hotpink', lwd=0.1, cex=0.5, add=TRUE)

    }


  } else  { #If the species does not have more than 4 points, 
            #do not run maxent, but create a circles polygon

    spoints<- data_points[[i]]

    #For the circle to have 100km2, d should be 5641.9 ... 
    circ <- circles(spoints, d=5642,lonlat=TRUE)
    circ <- circ@polygons
    crs(circ)<-crs(wrld_cropped)
    circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

    circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
    row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
    circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

    writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
             layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )  

    par(mfrow=c(1,1),mar = c(2,2, 2, 2))
    plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])  
    plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
    spc_pol5[[i]]<-circ_SPDF

    gc() #collecting garbage before a nuw run
  }

}
dev.off()
dev.off() #to close that pdf I started before the loop


ptime2<- proc.time() - ptime1 #just checking the time
ptime2
Thai
  • 493
  • 5
  • 15
  • 2
    When asking for help you should provide a [minimal, reproduicble example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with data that can be used to run and test. This is way too much code to ask someone to look over. Try to simplify your problem. Have you successfully used `foreach` in the past? Maybe start small. What do you expected to be saved from each of your iterations? I'm not sure I see a place where your loop returns anything. – MrFlick Sep 19 '17 at 20:26
  • No, I never used foreach before... I spent the last 20 hours learning about it, since I'm quite new to R, but I'm not getting anything... With for loop each iteration gives me a maxent object, a maxent evaluation, a tresholds set for this maxent, a prediction, the prediction's polygon and a second polygon (simpler). All these things are sent to lists, and respective plots are called just to get printed in a PDF I iniciated before the loop. My script works fine with for loop, but no sucess with foreach... I'll try to simplify my problem and find out how to make it reproducible. Thank you! – Thai Sep 19 '17 at 20:36
  • 1
    @Thai If you don't know **foreach**, this [blog post](https://privefl.github.io/blog/a-guide-to-parallelism-in-r/) might be of interest for you. – F. Privé Sep 20 '17 at 07:48
  • Nice, such a great way to learn foreach! Thank you, Privé! – Thai Sep 20 '17 at 19:44

1 Answers1

4

You could call foreach specifying a "collector" variable such as in:

results <- foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar%

Then, before the end of the foreach loop you can collect all your result variable in a common list and return them:

out <- list(xm_spc_list= xm_spc_list,
            e_spc_list = e_spc_list, 
            px_spc_list = px_spc_list, 
            ...  =  ...,
            ...  =  ....)
return(out)
}

Notice that in the foreach you can avoid to use constructs such as xm_spc_list[[i]] <- because foreach will take care of that for you by "binding" the results in a (ordered) list of lists.

To retrieve the "single" outputs from the results list of lists after the foreach, you can then use something like:

xm_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 1)))
e_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 2)))
....
....

HTH (though impossible to test, given the example at hand)

lbusett
  • 5,801
  • 2
  • 24
  • 47