I am trying to use pymoo's NSGA-II algorithm to carry out portfolio optimization.
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import Problem
from pymoo.optimize import minimize
from pymoo.visualization.scatter import Scatter
v = df_cov
m = df_mr
K = 10
print("v=", v, "m=", m, "K=", K, "n=", n)
#Examples
x=np.ones(n)/n
print("x=", x, np.sum(x))
print("Risk = ", np.sum(np.dot(x.T,(np.dot(v,x)))))
print(np.dot(x.T,(np.dot(v,x))).shape)
print(x.T.shape, np.dot(v,x).shape)
print("Return = ", np.dot(x,m))
v is an nxn matrix containing the covariance values between n number of assets. m is an array of mean returns of those assets.
I have defined the problem as the following:
class MyProblem(Problem):
def __init__(self, m, v):
super().__init__(n_var=n,
n_obj=2,
n_constr=0,
xl=np.array([0 for _ in range(n)]),
xu=np.array([1 for _ in range(n)]))
self.m = m
self.v = v
def _evaluate(self, x, out, *args, **kwargs):
f1 = np.dot(np.array(x).T, (np.dot(self.v, np.array(x))))
f2 = -(np.dot(np.array(x), self.m))
#g1 = np.sum(np.array(x))-1
#g2 = np.sum(len(np.array(x)))-K
out["F"] = np.column_stack([f1, f2])
#out["G"] = [g1,g2]
problem = MyProblem(m,v)
algorithm = NSGA2(pop_size=100)
from pymoo.factory import get_termination
termination = get_termination("n_gen", 200)
from pymoo.optimize import minimize
res = minimize(problem,
algorithm,
termination,
seed=1,
save_history=True,
verbose=True)
X = res.X
F = res.F
I am firstly trying to run the algorithm without any constraints, however when I run the algorithm I receive this error:
ValueError: shapes (31,31) and (100,31) not aligned: 31 (dim 1) != 100 (dim 0)
It appears that the population size is being passed through the algorithm creating a matrix with the incorrect dimensions. (in this example n=31)
Is there a way to fix this error?
The data used was acquired from http://people.brunel.ac.uk/~mastjjb/jeb/orlib/files/port1.txt
Below is the code used to prepare the data:
import numpy
import pandas as pd
import numpy as np
#read in the chosen dataset
df = pd.read_fwf(
'port1.txt', sep=" ",header=None)
#define n as the number of assets in the dataset
n = int(df[0].iloc[0])
#split the dataframe into two data frames
df_RR = df.iloc[1:n+1]
df_CV = df.iloc[n+1:]
#split the single column into two and give headings to both
df_RR['mean returns'] = [d.split()[0] for d in df_RR[0]]
df_RR['sd'] = [d.split()[1] for d in df_RR[0]]
del df_RR[0]
df_RR = df_RR.reset_index(drop=True)
#split the single column into three and give headings to each
df_CV['i'] = [d.split()[0] for d in df_CV[0]]
df_CV['j'] = [d.split()[1] for d in df_CV[0]]
df_CV['correlation'] = [d.split()[2] for d in df_CV[0]]
del df_CV[0]
df_CV = df_CV.reset_index(drop=True)
#convert variables to correct type (numeric)
df_CV = df_CV.apply(pd.to_numeric)
df_RR = df_RR.apply(pd.to_numeric)
#create correlation matrix
df2 = df_CV.pivot(index='i', columns='j', values='correlation')
df3 = np.triu(df2)
iu = np.triu_indices(n,1)
il = (iu[1],iu[0])
df3[il]=df3[iu]
#create sd array
df_std = np.asarray(df_RR['sd'])
#create covariance matrix
df4 = np.multiply(df3, df_std)
df_cov = np.multiply(df4, df3)
#create mean returns array
df_mr = np.asarray(df_RR['mean returns'])