I am trying to optimize a stochastic differential equation. What it means for me is that I have to solve a set of Mmax independent differential equations. For that task I use Numba, but I have the feeling I am doing it wrong, as even if I do not get any errors and I get some time-reduction, it is not as much as I expected.
My code looks like
@njit(fastmath=True)
def Liouvillian(rho,t_list,alpha1,alpha1p):
C = np.zeros((len(rho0)),dtype=np.complex64)
B = L0+alpha1p*L1m+alpha1*L1p
for j in range(len(rho0)):
for i in range(len(rho0)):
C[j] += B[j, i]*rho[i]
return C
@njit(parallel=True, fastmath=True)
def RK4(rho0,t_list,alpha1,alpha1p):
rho=np.zeros((len(rho0),len(t_list),Mmax),dtype=np.complex64)
for m in prange(Mmax):
rho[:,0,m]=rho0
n=0
for t in t_list[0:len(t_list)-1]:
k1=Liouvillian(rho[:,n,m],t,alpha1[st*n,m],alpha1p[st*n,m])
k2=Liouvillian(rho[:,n,m]+dt*k1/2,t+dt/2,alpha1[st*n,m],alpha1p[st*n,m])
k3=Liouvillian(rho[:,n,m]+dt*k2/2,t+dt/2,alpha1[st*n,m],alpha1p[st*n,m])
k4=Liouvillian(rho[:,n,m]+dt*k3,t+dt,alpha1[st*n,m],alpha1p[st*n,m])
rho[:,n+1,m]=rho[:,n,m]+dt*(k1+2*k2+2*k3+k4)/6.0
n=n+1
return rho
where alpha1p and alpha are 2D arrays with elements [t,m] and L0,Lp,Lm are just arrays.
Specifically taking Mmax=10000, without using Numba it takes 16s, while using Numba (both parallel and fastmath), it reduces to 14s. While I see some reduction, this is not what I expected from the parallelization. Is there something wrong in my code or this is what I should get?
Edit: Here I attach the code I use to generate the random arrays I later use on the ODE solver.
def A_matrix(num_modes,kappa,E_0):
A_modes=np.zeros(((2*num_modes),(2*num_modes)),dtype=np.complex64)
for i in range(np.shape(A_modes)[0]):
for j in range(np.shape(A_modes)[0]):
if i==j:
A_modes[i,j]=-kappa/4
if i==0:
A_modes[i,i+num_modes+1]=E_0*kappa/2
A_modes[i,-1]=E_0*kappa/2
elif 0<i<num_modes-1:
A_modes[i,i+num_modes+1]=E_0*kappa/2
A_modes[i,i+num_modes-1]=E_0*kappa/2
elif i==num_modes:
A_modes[i-1,i]=E_0*kappa/2
A_modes[i-1,-2]=E_0*kappa/2
return (A_modes+A_modes.transpose())
def D_mat(num_modes):
D_matrix=np.zeros(((num_modes),(num_modes)),dtype=np.complex64)
for i in range(np.shape(D_matrix)[0]):
for j in range(np.shape(D_matrix)[0]):
if i==j:
if i==0:
D_matrix[i,i+1]=1
D_matrix[i,-1]=1
elif 0<i<num_modes-1:
D_matrix[i,i+1]=1
return (D_matrix+D_matrix.transpose())
def a_part(alpha,t_list):
M=A_matrix(num_modes,kappa,E_0)
return M.dot(alpha)
def b_part(w,t_list):
D_1=D_mat(num_modes)
Zero_modes=np.zeros(((num_modes),(num_modes)),dtype=np.complex64)
D=np.bmat([[D_1, Zero_modes], [Zero_modes, D_1]])
B=sqrtm(D)*np.sqrt(E_0*kappa/2)
return B.dot(w)
def SDE_Param_Euler_Mauyrama(Mmax):
alpha=np.zeros((2*num_modes,Smax+1,Mmax),dtype=np.complex64)
n=0
alpha[:,n,:]=0.0+1j*0.0
for s in s_list[0:len(s_list)-1]:
alpha[:,n+1,:]=alpha[:,n,:]+ds*a_part(alpha[:,n,:],s)+b_part(w[:,n,:],s)
n=n+1
return (alpha)
#Parameters
E_0=0.5
kappa=10.
gamma=1.
num_modes=2 ## number of modes
Mmax=10000 #number of samples
Tmax=20 ##max value for time
dt=1/(2*kappa)
st=10
ds=dt/st
Nmax=int(Tmax/dt) ##number of steps
Smax=int(Tmax/ds) ##number of steps
t_list=np.arange(0,Tmax+dt/2,dt)
s_list=np.arange(0,Tmax+ds/2,ds)
w = np.random.randn(2*num_modes,Smax+1,Mmax)*np.sqrt(ds)
(alpha1,alpha2,alpha1p,alpha2p)=SDE_Param_Euler_Mauyrama(Mmax)
from qutip import *
Delta_a=0
num_qubits=1
##Atom 1
sm_1=sigmam()
sp_1=sigmap()
sx_1=sigmax()
sy_1=sigmay()
sz_1=sigmaz()
state0=np.zeros(2**num_qubits,dtype=np.complex64)
state0[-1]=1
rho0=np.kron(state0,np.conjugate(np.transpose(state0)))
Ha=Delta_a*(sz_1)/2 #Deterministic Liouvillian
L_ops=[np.sqrt(gamma)*sm_1]
L0=np.array(liouvillian(Ha,L_ops),dtype=np.complex64)
L1m=-np.array(np.sqrt(kappa*gamma)*1j*liouvillian(sm_1),dtype=np.complex64)
L1p=np.array(np.sqrt(kappa*gamma)*1j*liouvillian(sp_1),dtype=np.complex64)
rho_1=RK4(rho0,t_list,alpha1,alpha1p).reshape(2**num_qubits,2**num_qubits,len(t_list),Mmax)