I am trying to make training my DQN agent faster (for now, it takes +3s per training loop in each step, 300 steps for each episodes, so 15 minutes per episode amounting to 10 days in total to finish 1000 episodes which is the required number of episodes until convergence).
I did ask a similar question a while back here which improved the training speed but it still takes too long to train the DQN agent. I also tried using MirroredStrategy
with 4 CPU cores (I cannot use GPU on my machine) like suggested here and here but it made training even slower (+11s per training loop)
I stumbled upon this which suggests that using a for loop makes the training slower.
This is my code:
n_possible_movements = 9
MINIBATCH_SIZE = 32
class DQNAgent(object):
def __init__(self):
self.epsilon = 1.0
self.epsilon_decay = 0.8
self.epsilon_min = 0.1
self.learning_rate = 10e-4
self.tau = 1e-3
# Main models
self.model_uav_pos = self._build_pos_model()
# Target networks
self.target_model_uav_pos = self._build_pos_model()
# Copy weights
self.target_model_uav_pos.set_weights(self.model_uav_pos.get_weights())
# An array with last n steps for training
self.replay_memory_pos_nn = deque(maxlen=REPLAY_MEMORY_SIZE)
def _build_pos_model(self): # compile the DNN
# create the DNN model
dnn = self.create_pos_dnn()
opt = Adam(learning_rate=self.learning_rate) #, decay=self.epsilon_decay)
dnn.compile(loss="mse", optimizer=opt)
dnn.call = tf.function(dnn.call, jit_compile=True)
return dnn
def create_pos_dnn(self):
# initialize the input shape
pos_input_shape = (2,)
requests_input_shape = (len(env.ues),)
number_of_satisfied_ues_input_shape = (1,)
# How many possible outputs we can have
output_nodes = n_possible_movements
# Initialize the inputs
uav_current_position = Input(shape=pos_input_shape, name='pos')
ues_requests = Input(shape=requests_input_shape, name='requests')
number_of_satisfied_ues = Input(shape=number_of_satisfied_ues_input_shape, name='number_of_satisfied_ues')
# Put them in a list
list_inputs = [uav_current_position, ues_requests, number_of_satisfied_ues]
# Merge all input features into a single large vector
x = layers.concatenate(list_inputs)
# Add a 1st Hidden (Dense) Layer
dense_layer_1 = Dense(512, activation="relu")(x)
# Add a 2nd Hidden (Dense) Layer
dense_layer_2 = Dense(512, activation="relu")(dense_layer_1)
# Add a 3rd Hidden (Dense) Layer
dense_layer_3 = Dense(256, activation="relu")(dense_layer_2)
# Output layer
output_layer = Dense(output_nodes, activation="linear")(dense_layer_3)
model = Model(inputs=list_inputs, outputs=output_layer)
# return the DNN
return model
def remember_pos_nn(self, state, action, reward, next_state, done):
self.replay_memory_pos_nn.append((state, action, reward, next_state, done)) # list of previous experiences, enabling re-training later
def act_upon_choosing_a_new_position(self, state): # state is a tuple (uav_position, requests_array, number_satisfaction)
if np.random.rand() <= self.epsilon: # if acting randomly, take random action
return random.randrange(n_possible_movements)
pos = np.array([state[0]])
reqs = np.array([state[1]])
number_satisfaction = np.array([state[2]])
act_values = self.model_uav_pos([pos, reqs, number_satisfaction]) # if not acting randomly, predict reward value based on current state
return np.argmax(act_values[0])
def target_train(self):
weights = self.model_uav_pos.get_weights()
target_weights = self.target_model_uav_pos.get_weights()
for i in range(len(target_weights)):
target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
self.target_model_uav_pos.set_weights(target_weights)
This is the training function before I tried the solution in the link:
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
minibatch = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
# Enumerate our batches
for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
print('...Starting Training...')
target = 0
pos = np.array([current_state[0]])
reqs = np.array([current_state[1]])
number_satisfaction = np.array([current_state[2]])
pos_next = np.array([new_current_state[0]])
reqs_next = np.array([new_current_state[1]])
number_satisfaction_next = np.array([new_current_state[2]])
# If not a terminal state, get new q from future states, otherwise set it to 0
# almost like with Q Learning, but we use just part of equation here
if not done:
print("Predict Next State")
target = reward + DISCOUNT * np.amax(self.target_model_uav_pos([pos_next, reqs_next, number_satisfaction_next]))
else:
target = reward
# Update Q value for given state
print("Predict State")
target_f = self.model_uav_pos([pos, reqs, number_satisfaction])
target_f = np.array(target_f)
target_f[0][action] = target
self.model_uav_pos.fit([pos, reqs, number_satisfaction], \
target_f, \
verbose=2, \
shuffle=False, \
callbacks=None, \
epochs=1 \
)
end_time = time.time()
print("Time", end_time - start_time)
# Update target network counter every episode
self.target_train()
This is the training function after I tried the solution suggested in the link:
def train_pos_nn(self):
print("In Training..")
# Start training only if certain number of samples is already saved
if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
print("Exiting Training: Replay Memory Not Full Enough...")
return
# Get a minibatch of random samples from memory replay table
list_memory = list(self.replay_memory_pos_nn)
random.shuffle(list_memory)
# Draw a sample
samples = random.sample(list_memory, MINIBATCH_SIZE)
start_time = time.time()
# Prepare the batch
state, action, reward, new_state, done = zip(*samples)
nstate = []
for n_state in new_state:
pos_next = np.array([n_state[0]])
reqs_next = np.array([n_state[1]])
number_satisfaction_next = np.array([n_state[2]])
nstate.append([pos_next,reqs_next,number_satisfaction_next])
done = np.array(done)[:,None]
state = np.concatenate(state)
reward = np.array(reward)[:,None]
q_future = self.target_model_uav_pos(nstate)
targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)
# Fit the model
self.model.fit(state, targets, epochs=1, verbose=2)
end_time = time.time()
print("Time", end_time - start_time)
# Update target network counter every episode
self.target_train()
This call to predict q_future = self.target_model_uav_pos(nstate)
gives me the error ValueError: Layer "model_19" expects 3 input(s), but it received 96 input tensors.
How do I do it correctly? And is there any other way to speed up the training process?