I'm using Maxent for some spatial analysis. I have a long script with many outputs that I collect in lists. It works just fine with a for loop and low resolution climatic predictors in a rasterstack (in my core i5, 6gb notebook) . But I need to use a high resolution set of rasters, and all the problems come from this issue. Even using a 16 core, 32gb virtual machine, the proccessing is veeeery slow, and after 3 days, memory is not enought and the run is closed after about 50 turns in my loop (which has 92 species). I'm trying to improve this script by collecting garbage to clean the memory and using doParallel. After the new script is cleanly running with the low resolution predictors, I'll try it with the high resolution predictors
So, I changed my script to use foreach
instead of for
, and with %dopar%
But so far, I'm getting this as result:
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
I saw another question about the same issue, but the very simple solution the guy needed doesn't apply for me.. So, any tips are very welcome
#install.packages("dismo")
library(dismo)
#install.packages("scales")
library(scales)
#install.packages("rgdal")
library(rgdal)
#install.packages("rgeos")
library(rgeos)
#install.packages("rJava")
library(rJava)
#install.packages("foreach")
library(foreach)
#install.packages("doParallel")
library(doParallel)
#Colors to use in the plots
MyRbw2<-c('#f4f4f4','#3288bd','#66c2a5','#e6f598','#fee08b','#f46d43','#9e0142')
colfunc_myrbw2<-colorRampPalette(MyRbw2)
#Create empty lists to recieve outputs
xm_list<-list()
xm_spc_list<-list()
e_spc_list<-list()
px_spc_list<-list()
tr_spc_list<-list()
spc_pol1<-list()
spc_pol5<-list()
tr<-list()
#Create empty data frame to recieve treshold values for each species
tr_df<-data.frame(matrix(NA, nrow=92, ncol=7))
tr_df[,1]<-as.character(tree_list)
names(tr_df)<- c('spp',"kappa","spec_sens","no_omission","prevalence","equal_sens_spec","sensitivity")
# Assigning objects to run Maxent
data_points <- tree_cd_points # this is a list with SpatialPoints for 92 species
data_list <- tree_list # list with the species names
counts_data<- counts_tree_cd # number of points for each species
predictors2<-predictors_low # rasterStack of Bioclim layers (climatic variables), low resolution
#Stablishing extent for Maxent predictions
xmin=-120; xmax=-35; ymin2=-40; ymax=35
limits2 <- c(xmin, xmax, ymin2, ymax)
# Making the cluster for doParallel
cores<-detectCores() # I have 16
cl <- makeCluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl)
#Just to keep track of time
ptime1 <- proc.time()
pdf("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/treesp_maxent_20170823.pdf",
paper = "letter", height = 11, width=8,5, pointsize=12,pagecentre = TRUE)
#I have 92 species, but I'll run just the first 4 to test
foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar% { #Runs only species with 5 or more points to avoid maxent problems
if (counts_data$n[i]>4) { #If the species has more than 4 occurrence points, run maxent
tryCatch({ #makes the loop go on despite errors
#Sets train, test and total points for Maxent
group <- kfold(x=data_points[[i]], 5)
pres_train<- data_points[[i]][group != 1, ]
pres_test <- data_points[[i]][group == 1, ]
spoints<- data_points[[i]]
#Sets background points for Maxent
backg <- randomPoints(predictors2, n=20000, ext=limits2, extf = 1.25)
colnames(backg) = c('lon', 'lat')
group <- kfold(backg, 5)
backg_train <- backg[group != 1, ]
backg_test <- backg[group == 1, ]
#The maxent itself (put the xm in the empty list that I created earlier to store all xms)
xm_spc_list[[i]] <- maxent(x=predictors2, p=spoints, a=backg ,
factors='ecoreg',
args=c('visible=true',
'betamultiplier=1',
'randomtestpoints=20',
'randomseed=true',
'linear=true',
'quadratic=true',
'product=true',
'hinge=true',
'threads=4',
'responsecurves=true',
'jackknife=true',
'removeduplicates=false',
'extrapolate=true',
'pictures=true',
'cache=true',
'maximumiterations=5000',
'askoverwrite=false'),
path=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i]), overwrite=TRUE)
par(mfrow=c(1,1),mar = c(2,2, 2, 2))
plot(xm_spc_list[[i]], main=paste(data_list[i]))
response(xm_spc_list[[i]])
#Evaluating how good is the model and putting the evaluation values in a list
e_spc_list[[i]] <- evaluate(pres_test, backg_test, xm_spc_list[[i]], predictors2)
#Predicting the climatic envelopes and Sending to a list os predictions
px_spc_list[[i]] <- predict(predictors2, xm_spc_list[[i]], ext=limits2, progress='text',
filename=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd"), overwrite=TRUE)
tr_df[i,2:7]<-threshold(e_spc_list[[i]])
tr[[i]]<-threshold(e_spc_list[[i]], 'spec_sens')
#Pol 1 will be the regular polygon, default treshold
spc_pol1[[i]] <- rasterToPolygons(px_spc_list[[i]]>tr[[i]],function(x) x == 1,dissolve=T)
writeOGR(obj = spc_pol1[[i]], dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm1/",data_list[i]), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
#Pol 5 will be a 100km^2 circle around the occurrence points
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
#To write de polygon to a file, the function writeOGR needs an object SPDF, so...
#Getting Polygon IDs
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
#Making the IDs row names
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
# Make spatial polygon data frame
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
#Save the polygon, finally
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i]), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
spc_pol5[[i]]<-circ_SPDF
#Now the plots
par(mfrow=c(2,3),mar = c(2,1, 1, 1))
plot(px_spc_list[[i]], axes=FALSE, legend=TRUE, legend.shrink=1, col=colfunc_myrbw2(20), main=paste((data_list[i]),' - Maxent'))
plot(wrld_cropped,add=TRUE, border='dark grey',axes=FALSE)
points(data_points[[i]], pch=21,col="white", bg='hotpink', lwd=0.5, cex=0.7)
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main='px>tr')
plot(spc_pol1[[i]] , main=paste((data_list[i]),' - Range'), add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8),axes=FALSE)
points(data_points[[i]], pch="°",col="black", cex=0.7)
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=paste(data_list[i],"circles"))
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
}, error=function(e){cat("Warning message:",conditionMessage(e), "\n")})
#But sometimes, even with >4 occurrence points, Maxent fails...
#So I'll make sure that if I have >4 points but maxent didn't work, I get the circles anyway
f<-paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\\s+', '_', data_list[i]),"_pred.grd", sep="")
gc() #Just collecting garbage to speed up the process
if (!file.exists(f)){ # then, if f (maxent output) doesn't exist, create the circles at least
spoints<- data_points[[i]]
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
#To write de polygon to a file, the function writeOGR needs an object SPDF, so...
#Getting Polygon IDs
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
#Making the IDs row names
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
# Make spatial polygon data frame
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
#Save the polygon, finally
#dir.create(paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""))
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
spc_pol5[[i]]<-circ_SPDF
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
#plot(spoints,pch=21,col="white", bg='hotpink', lwd=0.1, cex=0.5, add=TRUE)
}
} else { #If the species does not have more than 4 points,
#do not run maxent, but create a circles polygon
spoints<- data_points[[i]]
#For the circle to have 100km2, d should be 5641.9 ...
circ <- circles(spoints, d=5642,lonlat=TRUE)
circ <- circ@polygons
crs(circ)<-crs(wrld_cropped)
circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)
circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)
writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
layer = paste0(gsub('\\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )
par(mfrow=c(1,1),mar = c(2,2, 2, 2))
plot(wrld_cropped, border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])
plot(circ, add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
spc_pol5[[i]]<-circ_SPDF
gc() #collecting garbage before a nuw run
}
}
dev.off()
dev.off() #to close that pdf I started before the loop
ptime2<- proc.time() - ptime1 #just checking the time
ptime2