Let's say i have a program which generates big randomly filled integer array and allows me to check amount of the items divisible by some user-inputted number, using GPU for this purpose.
c# code
[DllImport("AMP.dll", CharSet = CharSet.Unicode, CallingConvention = CallingConvention.Cdecl)]
public static extern int RunTest(int* CPUinput, int length, int num);
static void Main(string[] args)
{
Random rnd = new Random();
int[] arr = new int[10000000];
for (int i = 0; i < arr.Length; i++)
arr[i] = rnd.Next(1, int.MaxValue);
fixed (int* arrPtr = &arr[0])
{
while (true)
{
int num = int.Parse(Console.ReadLine());
Console.WriteLine($"There are {RunTest(arrPtr, arr.Length, num)} numbers in array divisible by {num}");
}
}
}
c++ code
extern "C" { __declspec(dllexport) int RunTest(int* input, int length, int num); }
int RunTest(int* CPUinput, int length, int num)
{
int CPUresult[1];
CPUresult[0] = 0;
array_view<int, 1> GPUinput(length, CPUinput);
array_view<int, 1> GPUresult(1, CPUresult);
parallel_for_each(GPUinput.get_extent(), [=](index<1> idx) restrict(amp) {
if (GPUinput[idx[0]] % num == 0)
atomic_fetch_inc(&GPUresult[0]);
});
GPUinput.discard_data();
GPUresult.synchronize();
return CPUresult[0];
}
Obviously, copying array each time i run the test is a bad idea. In fact it is a bottleneck in this case. How can i store the array in gpu memory among several library calls?