Today I was trying to create a program that copied an image using the GPU. I created a simple program that does this. To load the image I am using lodepng. The problem isn't with copying via cudaMemcpy
because when I copy the image to GPU and back it stays intact, but when I try to copy it in the kernel, it doesn't. Feel free to ask any questions that you have about my problem.
The code:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <iostream>
#include <Windows.h>
#include <math.h>
#include <LodePNG\lodepng.h>
const int BLOCK_WIDTH = 32;
using namespace std;
__global__ void expousure(unsigned char *in, unsigned char *out)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int pitch = blockDim.x * gridDim.x;
int absIdx = x + y * pitch;
out[absIdx] = in[absIdx];
}
void decode(std::vector<unsigned char>& image, const char* filename, int& width, int& height)
{
unsigned widthU, heightU;
//decode
unsigned error = lodepng::decode(image, widthU, heightU, filename);
width = int(widthU);
height = int(heightU);
//if there's an error, display it
if (error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl;
//the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ...
}
void encodeAndSave(const std::vector<unsigned char>& inPixels, const char* filename, int width, int height)
{
std::vector<unsigned char> outEncoded;
unsigned error = lodepng::encode(outEncoded, inPixels, unsigned(width), unsigned(height));
if (error){
std::cout << "encoder error" << error << ": " << lodepng_error_text(error) << std::endl;
return;
}
lodepng::save_file(outEncoded, filename);
}
void encodeAndSave(unsigned char* inPixels, const char* filename, int width, int height)
{
std::vector<unsigned char> outEncoded;
unsigned error = lodepng::encode(outEncoded, inPixels, unsigned(width), unsigned(height));
if (error){
std::cout << "encoder error" << error << ": " << lodepng_error_text(error) << std::endl;
return;
}
lodepng::save_file(outEncoded, filename);
}
int main(int argc, char *argv[])
{
// decode the image to image from filename
int width, height;
const char* filename = argc > 1 ? argv[1] : "C:/Users/Russell/Documents/Visual Studio 2013/Projects/Hello CUDA/Release/test.png";
vector <unsigned char> h_image;
decode(h_image, filename, width, height);
unsigned char *d_in;
unsigned char *d_out;
cudaMalloc(&d_in, sizeof(unsigned char) * width * height * 4);
cudaMalloc(&d_out, sizeof(unsigned char) * width * height * 4);
cudaMemcpy(d_in, &h_image[0], sizeof(unsigned char) * width * height * 4, cudaMemcpyHostToDevice);
expousure<<<dim3(width / BLOCK_WIDTH, height / BLOCK_WIDTH, 1), dim3(BLOCK_WIDTH, BLOCK_WIDTH, 1) >>>(d_in, d_out);
unsigned char h_out[256 * 256 * 4];
cudaMemcpy(h_out, d_out, sizeof(unsigned char) * width * height * 4, cudaMemcpyDeviceToHost);
// encode and save image from image to filename
vector <unsigned char> imageOUT;
const char* outname = "C:/Users/Russell/Documents/Visual Studio 2013/Projects/Hello CUDA/Release/testOUT.png";
encodeAndSave(h_out, outname, width, height);
}
The input image: https://i.stack.imgur.com/Rx0mF.png
The output image: https://i.stack.imgur.com/HLmPQ.png