I am trying to accelerate encryption using the RSA algorithm using CUDA. I can't properly perform power-modulo in the kernel function.
I am using Cuda compilation tools on AWS, release 9.0, V9.0.176 to compile.
#include <cstdio>
#include <math.h>
#include "main.h"
// Kernel function to encrypt the message (m_in) elements into cipher (c_out)
__global__
void enc(int numElements, int e, int n, int *m_in, int *c_out)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
printf("e = %d, n = %d, numElements = %d\n", e, n, numElements);
for (int i = index; i < numElements; i += stride)
{
// POINT OF ERROR //
// c_out[i] = (m_in[i]^e) % n; //**GIVES WRONG RESULTS**
c_out[i] = __pow(m_in[i], e) % n; //**GIVES, error: expression must have integral or enum type**
}
}
// This function is called from main() from other file.
int* cuda_rsa(int numElements, int* data, int public_key, int key_length)
{
int e = public_key;
int n = key_length;
// Allocate Unified Memory – accessible from CPU or GPU
int* message_array;
cudaMallocManaged(&message_array, numElements*sizeof(int));
int* cipher_shared_array; //Array shared by CPU and GPU
cudaMallocManaged(&cipher_shared_array, numElements*sizeof(int));
int* cipher_array = (int*)malloc(numElements * sizeof(int));
//Put message array to be encrypted in a managed array
for(int i=0; i<numElements; i++)
{
message_array[i] = data[i];
}
// Run kernel on 16M elements on the GPU
enc<<<1, 1>>>(numElements, e, n, message_array, cipher_shared_array);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
//Copy into a host array and pass it to main() function for verification.
//Ignored memory leaks.
for(int i=0; i<numElements; i++)
{
cipher_array[i] = cipher_shared_array[i];
}
return (cipher_array);
}
Please help me with this error. How can I implement power-modulo (as follows) on CUDA kernel?
(x ^ y) % n;
I would really appreciate any help.