#include "opencv2\opencv.hpp"
#include <stdint.h>
#include <stdio.h>
#include <cuda.h>
using namespace cv;
using namespace std;
#define count 200000
__global__
void SubArrays(int * a, int * b, int size)
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < size)
{
a[id] -= b[id];
}
}
int image1[count];
int image2[count];
int main(int argv, char** argc)
{
Mat im1 = imread("1.jpg", CV_LOAD_IMAGE_GRAYSCALE);
Mat im2 = imread("2.jpg", CV_LOAD_IMAGE_GRAYSCALE);
int size = (im1.rows*im1.cols);
printf("size: %d \n\n\n\n", size);
int i = 0;
for (int r = 0; r < im1.rows; r++)
{
for (int c =0; c< im1.cols; c++,i++)
{
image1[i] = im1.at<uint8_t>(r, c);
image2[i] = im2.at<uint8_t>(r, c);
}
}
printf("This is first image array's first 5 elements\n\n");
for (int b = 0; b < 5; b++)
{
printf("%d\n",image1[b]);
}
printf("This is second image array's first 5 elements\n\n");
for (int b = 0; b < 5; b++)
{
printf("%d\n", image2[b]);
}
int * h_a = image1;
int * h_b = image2;
int * d_a;
int * d_b;
cudaMalloc(&d_a, sizeof(char)*size);
cudaMalloc(&d_b, sizeof(char)*size);
cudaMemcpy(d_a, h_a, sizeof(char)*size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, h_b, sizeof(char)*size, cudaMemcpyHostToDevice);
SubArrays << <1, 1024 >> >(d_a, d_b, size);
cudaMemcpy(h_a, d_a, sizeof(char)*size, cudaMemcpyDeviceToHost);
printf("calculating result\n\n");
for (int check = 0; check < size ; check++)
{
printf("%d \n", h_a[check]);
}
cudaFree(d_a);
cudaFree(d_b);
return 0;
}
when my kernel runs it gives first few value i.e the difference correct and then it starts showing old values ? what am I doing wrong ? I am noob although i think these are the numbers of blocks and threads that I have chosen wrong. my gpu CC is 3.2