I'm trying to execute Python code on GPU using CuPy
library.
However, when I run nvidia-smi
, no GPU processes are found.
Here's the code:
import numpy as np
import cupy as cp
from scipy.stats import rankdata
def get_top_one_probability(vector):
return (cp.exp(vector) / cp.sum(cp.exp(vector)))
def get_listnet_gradient(training_dataset, real_labels, predicted_labels):
ly_topp = get_top_one_probability(real_labels)
cp.cuda.Stream.null.synchronize()
s1 = -cp.matmul(cp.transpose(training_dataset), cp.reshape(ly_topp, (np.shape(cp.asnumpy(ly_topp))[0], 1)))
cp.cuda.Stream.null.synchronize()
exp_lz_sum = cp.sum(cp.exp(predicted_labels))
cp.cuda.Stream.null.synchronize()
s2 = 1 / exp_lz_sum
s3 = cp.matmul(cp.transpose(training_dataset), cp.exp(predicted_labels))
cp.cuda.Stream.null.synchronize()
s2_s3 = s2 * s3 # s2 is a scalar value
s1.reshape(np.shape(cp.asnumpy(s1))[0], 1)
cp.cuda.Stream.null.synchronize()
s1s2s3 = cp.add(s1, s2_s3)
cp.cuda.Stream.null.synchronize()
return s1s2s3
def relu(matrix):
return cp.maximum(0, matrix)
def get_groups_id_count(groups_id):
current_group = 1
group_counter = 0
groups_id_counter = []
for element in groups_id:
if element != current_group:
groups_id_counter.append((current_group, group_counter))
current_group += 1
group_counter = 1
else:
group_counter += 1
return groups_id_counter
def mul_matrix(matrix1, matrix2):
return cp.matmul(matrix1, matrix2)
if mode == 'train': # Train MLP
number_of_features = np.shape(training_set_data)[1]
# Input neurons are equal to the number of training dataset features
input_neurons = number_of_features
# Assuming that number of hidden neurons are equal to the number of training dataset (input neurons) features + 10
hidden_neurons = number_of_features + 10
# Weights random initialization
input_hidden_weights = cp.array(np.random.rand(number_of_features, hidden_neurons) * init_var)
# Assuming that number of output neurons is 1
hidden_output_weights = cp.array(np.float32(np.random.rand(hidden_neurons, 1) * init_var))
listwise_gradients = np.array([])
for epoch in range(0, 70):
print('Epoch {0} started...'.format(epoch))
start_range = 0
for group in groups_id_count:
end_range = (start_range + group[1]) # Batch is a group of words with same group id
batch_dataset = cp.array(training_set_data[start_range:end_range, :])
cp.cuda.Stream.null.synchronize()
batch_labels = cp.array(dataset_labels[start_range:end_range])
cp.cuda.Stream.null.synchronize()
input_hidden_mul = mul_matrix(batch_dataset, input_hidden_weights)
cp.cuda.Stream.null.synchronize()
hidden_neurons_output = relu(input_hidden_mul)
cp.cuda.Stream.null.synchronize()
mlp_output = relu(mul_matrix(hidden_neurons_output, hidden_output_weights))
cp.cuda.Stream.null.synchronize()
batch_gradient = get_listnet_gradient(batch_dataset, batch_labels, mlp_output)
batch_gradient = cp.mean(cp.transpose(batch_gradient), axis=1)
aggregated_listwise_gradient = cp.sum(batch_gradient, axis=0)
cp.cuda.Stream.null.synchronize()
hidden_output_weights = hidden_output_weights - (learning_rate * aggregated_listwise_gradient)
cp.cuda.Stream.null.synchronize()
input_hidden_weights = input_hidden_weights - (learning_rate * aggregated_listwise_gradient)
cp.cuda.Stream.null.synchronize()
start_range = end_range
listwise_gradients = np.append(listwise_gradients, cp.asnumpy(aggregated_listwise_gradient))
print('Gradients: ', listwise_gradients)
I'm using cp.cuda.Stream.null.synchronize()
because I read that this statement ensures that the code finishes executing on the GPU before going to the next line.
Could anyone help me to run the code on GPU? Thanks in advance