I wrote a (probably-inefficient, but anyway..) Rcpp code using inline to simulate a stochastic SEIR model.
The serial version compiles and works perfectly, but since I need to simulate from it a large number of times and since it seems to me like an embarrassingly parallel problem (just need to simulate again for other parameter values and return a matrix with the results) I tried to add #pragma omp parallel for
and to compile with -fopenmp -lgomp
but ... boom!
I get a segfault even for very small examples!
I tried to add setenv("OMP_STACKSIZE","24M",1);
and values well over 24M but still the segfault happens.
I'll explain briefly the code since it's a bit long (I tried to shorten it but the result change and I can't reproduce it..): I have two nested loops, the inner one execute the model for a given parameter set and the outer one changes the parameters.
The only reason a race condition might happen is if the code were trying to execute set of instructions inside inner the loop in parallel (which cannot be done because of the model structure, on iteration t
it depends on iteration t-1
) and not to parallelize the outer, but if I'm not mistaken that is what the parallel for
constructor does for default if put just outside the outer...
This is basically the form of the code I'm trying to run:
mat result(n_param,T_MAX);
#pragma omp parallel for
for(int i=0,i<n_param_set;i++){
t=0;
rowvec jnk(T_MAX);
while(t < T_MAX){
...
jnk(t) = something(jnk(t-1));
...
t++;
}
result.row(i)=jnk;
}
return wrap(result);
And my question is: How I tell the compiler that I just want to compute in parallel the outer loop (even distributing them statically like n_loops/n_threads for each thread) and not the inner one (which is actually non-parallelizable)?
The real code is a bit more involved and I'll present it here for the sake of reproducibility if you're really willing, but I'm only asking about the behavior of OpenMP. Please notice that the only OpenMP instruction appears at line 122
.
library(Rcpp);library(RcppArmadillo);library(inline)
misc='
#include <math.h>
#define _USE_MATH_DEFINES
#include <omp.h>
using namespace arma;
template <typename T> int sgn(T val) {
return (T(0) < val) - (val < T(0));
}
uvec rmultinomial(int n,vec prob)
{
int K = prob.n_elem;
uvec rN = zeros<uvec>(K);
double p_tot = sum(prob);
double pp;
for(int k = 0; k < K-1; k++) {
if(prob(k)>0) {
pp = prob[k] / p_tot;
rN(k) = ((pp < 1.) ? (rbinom(1,(double) n, pp))(0) : n);
n -= rN[k];
} else
rN[k] = 0;
if(n <= 0) /* we have all*/
return rN;
p_tot -= prob[k]; /* i.e. = sum(prob[(k+1):K]) */
}
rN[K-1] = n;
return rN;
}
'
model_and_summary='
mat SEIR_sim_plus_summaries()
{
vec alpha;
alpha << 0.002 << 0.0045;
vec beta;
beta << 0.01 << 0.01;
vec gamma;
gamma << 1.0/14.0 << 1.0/14.0;
vec sigma;
sigma << 1.0/(3.5) << 1.0/(3.5);
vec phi;
phi << 0.8 << 0.8;
int S_0 = 800;
int E_0 = 100;
int I_0 = 100;
int R_0 = 0;
int pop = 1000;
double tau = 0.01;
double t_0 = 0;
vec obs_time;
obs_time << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << 9 << 10 << 11 << 12 << 13 << 14 << 15 << 16 << 17 << 18 << 19 << 20 << 21 << 22 << 23 << 24;
const int n_obs = obs_time.n_elem;
const int n_part = alpha.n_elem;
mat stat(n_part,6);
//#pragma omp parallel for
for(int k=0;k<n_part;k++) {
ivec INC_i(n_obs);
ivec INC_o(n_obs);
// Event variables
double alpha_t;
int nX; //current number of people moving
vec rates(8);
uvec trans(4); // current transitions, e.g. from S to E,I,R,Universe
vec r(4); // rates e.g. from S to E, I, R, Univ.
/*********************** Initialize **********************/
int S_curr = S_0;
int S_prev = S_0;
int E_curr = E_0;
int E_prev = E_0;
int I_curr = I_0;
int I_prev = I_0;
int R_curr = R_0;
int R_prev = R_0;
int IncI_curr = 0;
int IncI_prev = 0;
int IncO_curr = 0;
int IncO_prev = 0;
double t_curr = t_0;
int t_idx =0;
while( t_idx < n_obs ) {
// next time preparation
t_curr += tau;
S_prev = S_curr;
E_prev = E_curr;
I_prev = I_curr;
R_prev = R_curr;
IncI_prev = IncI_curr;
IncO_prev = IncO_curr;
/*********************** description (rates) of the events **********************/
alpha_t = alpha(k)*(1+phi(k)*sin(2*M_PI*(t_curr+0)/52)); //real contact rate, time expressed in weeks
rates(0) = (alpha_t * ((double)I_curr / (double)pop ) * ((double)S_curr)); //e+1, s-1, r,i one s get infected (goes in E, not yey infectous)
rates(1) = (sigma(k) * E_curr); //e-1, i+1, r,s one exposed become infectous (goes in I) INCIDENCE!!
rates(2) = (gamma(k) * I_curr); //i-1, s,e, r+1 one i recover
rates(3) = (beta(k) * I_curr); //i-1, s, r,e one i dies
rates(4) = (beta(k) * R_curr); //i,e, s, r-1 one r dies
rates(5) = (beta(k) * E_curr); //e-1, s, r,i one e dies
rates(6) = (beta(k) * S_curr); //s-1 e, i ,r one s dies
rates(7) = (beta(k) * pop); //s+1 one susc is born
// Let the events occour
/*********************** S compartement **********************/
if((rates(0)+rates(6))>0){
nX = rbinom(1,S_prev,1-exp(-(rates(0)+rates(6))*tau))(0);
r(0) = rates(0)/(rates(0)+rates(6)); r(1) = 0.0; r(2) = 0; r(3) = rates(6)/(rates(0)+rates(6));
trans = rmultinomial(nX, r);
S_curr -= nX;
E_curr += trans(0);
I_curr += trans(1);
R_curr += trans(2);
//trans(3) contains dead individual, who disappear...we could avoid this using sequential conditional binomial
}
/*********************** E compartement **********************/
if((rates(1)+rates(5))>0){
nX = rbinom(1,E_prev,1-exp(-(rates(1)+rates(5))*tau))(0);
r(0) = 0.0; r(1) = rates(1)/(rates(1)+rates(5)); r(2) = 0.0; r(3) = rates(5)/(rates(1)+rates(5));
trans = rmultinomial(nX, r);
S_curr += trans(0);
E_curr -= nX;
I_curr += trans(1);
R_curr += trans(2);
IncI_curr += trans(1);
}
/*********************** I compartement **********************/
if((rates(2)+rates(3))>0){
nX = rbinom(1,I_prev,1-exp(-(rates(2)+rates(3))*tau))(0);
r(0) = 0.0; r(1) = 0.0; r(2) = rates(2)/(rates(2)+rates(3)); r(3) = rates(3)/(rates(2)+rates(3));
trans = rmultinomial(nX, r);
S_curr += trans(0);
E_curr += trans(1);
I_curr -= nX;
R_curr += trans(2);
IncO_curr += trans(2);
}
/*********************** R compartement **********************/
if(rates(4)>0){
nX = rbinom(1,R_prev,1-exp(-rates(4)*tau))(0);
r(0) = 0.0; r(1) = 0.0; r(2) = 0.0; r(3) = rates(4)/rates(4);
trans = rmultinomial(nX, r);
S_curr += trans(0);
E_curr += trans(1);
I_curr += trans(2);
R_curr -= nX;
}
/*********************** Universe **********************/
S_curr += pop - (S_curr+E_curr+I_curr+R_curr); //it should be poisson, but since the pop is fixed...
/*********************** Save & Continue **********************/
// Check if the time is interesting for us
if(t_curr > obs_time[t_idx]){
INC_i(t_idx) = IncI_curr;
INC_o(t_idx) = IncO_curr;
IncI_curr = IncI_prev = 0;
IncO_curr = IncO_prev = 0;
t_idx++;
}
//else just go on...
}
/*********************** Finished - Starting w/ stats **********************/
// INC_i is the useful variable, how can I change its reference withour copying it?
ivec incidence = INC_i; //just so if I want to use INC_o i have to change just this...
//Scan the epidemics to recover the summary stats (naively divide the data each 52 weeks)
double n_years = ceil((double)obs_time(n_obs-1)/52.0);
vec mu_attack(n_years);
vec ratio_attack(n_years-1);
vec peak(n_years);
vec atk(52);
peak(0)=0.0;
vec tmpExplo(52); //explosiveness
vec explo(n_years);
int year=0;
int week;
for(week=0 ; week<n_obs ; week++){
if(week - 52*year > 51){
mu_attack(year) = sum( atk )/(double)pop;
if(year>0)
ratio_attack(year-1) = mu_attack(year)/mu_attack(year-1);
for(int i=0;i<52;i++){
if(atk(i)>(peak(year)/2.0)){
tmpExplo(i) = 1.0;
} else {
tmpExplo(i) = 0.0;
}
}
explo(year) = sum(tmpExplo);
year++;
peak(year)=0.0;
}
atk(week-52*year) = incidence(week);
if( peak(year) < incidence(week) )
peak(year)=incidence(week);
}
if(week - 52*year > 51){
mu_attack(year) = sum( atk )/(double)pop;
} else {
ivec idx(52);
for(int i=0;i<52;i++)
{ idx(i) = i; } //take just the updated ones...
vec tmp = atk.elem(find(idx<(week - 52*year)));
mu_attack(year) = sum( tmp )/((double)pop * (tmp.n_elem/52.0));
ratio_attack(year-1) = mu_attack(year)/mu_attack(year-1);
for(int i=0;i<tmp.n_elem;i++){
if(tmp(i)>(peak(year)/2.0)){
tmpExplo(i) = 1.0;
} else {
tmpExplo(i) = 0.0;
}
}
for(int i=tmp.n_elem;i<52;i++)
tmpExplo(i) = 0.0; //to reset the others
explo(year) = sum(tmpExplo);
}
double correlation2;
double correlation4;
vec autocorr = acf(peak);
/***** ACF *****/
if(n_years<3){
correlation2=0.0;
correlation4=0.0;
} else {
if(n_years<5){
correlation2 = autocorr(1);
correlation4 = 0.0;
} else {
correlation2 = autocorr(1);
correlation4 = autocorr(3);
}
}
rowvec jnk(6);
jnk << sum(mu_attack)/(year+1.0)
<< (sum( log(ratio_attack)%log(ratio_attack) )/(n_years-1)) - (pow(sum( log(ratio_attack) )/(n_years-1),2))
<< correlation2 << correlation4 << max(peak) << sum(explo)/n_years;
stat.row(k) = jnk;
}
return stat;
}
'
main='
std::cout << "max_num_threads " << omp_get_max_threads() << std::endl;
RNGScope scope;
mat summaries = SEIR_sim_plus_summaries();
return wrap(summaries);
'
plug = getPlugin("RcppArmadillo")
## modify the plugin for Rcpp to support OpenMP
plug$env$PKG_CXXFLAGS <- paste('-fopenmp', plug$env$PKG_CXXFLAGS)
plug$env$PKG_LIBS <- paste('-fopenmp -lgomp', plug$env$PKG_LIBS)
SEIR_sim_summary = cxxfunction(sig=signature(),main,settings=plug,inc = paste(misc,model_and_summary),verbose=TRUE)
SEIR_sim_summary()
Thanks for the help!
NB: before you ask, I slightly modified the Rcpp multinomial sampling function just because I liked that way more than the one using pointer...not any other particular reason! :)