I'm getting a strange error when using nppi geometry transform functions from nppi cuda libraries. The code is here:
#include <nppi.h>
#include <nppi_geometry_transforms.h>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <vector>
void write(const cv::Mat &mat1, const std::string &path) {
auto mat2 = cv::Mat(mat1.rows, mat1.cols, CV_8UC4);
for (int i = 0; i < mat1.rows; i++) {
for (int j = 0; j < mat1.cols; j++) {
auto &bgra = mat2.at<cv::Vec4b>(i, j);
auto &rgb = mat1.at<cv::Vec3b>(i, j);
bgra[0] = rgb[2];
bgra[1] = rgb[1];
bgra[2] = rgb[0];
bgra[3] = UCHAR_MAX;
}
}
std::vector<int> compression_params;
compression_params.push_back(cv::IMWRITE_PNG_COMPRESSION);
compression_params.push_back(9);
cv::imwrite(path, mat2, compression_params);
}
int main() {
std::cout << "Hello, World!" << std::endl;
auto mat = cv::Mat(256, 256, CV_8UC3);
for (int i = 0; i < mat.rows; i++) {
for (int j = 0; j < mat.cols; j++) {
auto &rgb = mat.at<cv::Vec3b>(i, j);
rgb[0] = (uint8_t)j;
rgb[1] = (uint8_t)i;
rgb[2] = (uint8_t)(UCHAR_MAX - j);
}
}
write(mat, "./test.png");
uint8_t *gpuBuffer1;
uint8_t *gpuBuffer2;
cudaMalloc(&gpuBuffer1, mat.total());
cudaMalloc(&gpuBuffer2, mat.total());
cudaMemcpy(gpuBuffer1, mat.data, mat.total(), cudaMemcpyHostToDevice);
auto status = nppiResize_8u_C3R(
gpuBuffer1, mat.cols * 3, {.width = mat.cols, .height = mat.rows},
{.x = 0, .y = 0, .width = mat.cols, .height = mat.rows}, gpuBuffer2,
mat.cols * 3, {.width = mat.cols, .height = mat.rows},
{.x = 0, .y = 0, .width = mat.cols, .height = mat.rows},
NPPI_INTER_NN);
if (status != NPP_SUCCESS) {
std::cerr << "Error executing Resize -- code: " << status << std::endl;
}
auto mat2 = cv::Mat(mat.rows, mat.cols, CV_8UC3);
cudaMemcpy(mat2.data, gpuBuffer2, mat.total(), cudaMemcpyDeviceToHost);
write(mat2, "./test1.png");
}
Basically I display a rainbow picture. Then write it to the GPU then resize it to the EXACT same size, then copy it back to the host then display it again. What I'm getting is garbled data in about 2/3s of the return picture.
First picture is the input picture. Second input picture is the output picture.
I expect both pictures to be the same.
If I adjust the ROI with offsets and change the width and height for the destination buffer the pixels in the top 1/3 of the resized picture actually moves and resizes correctly. But the rest of the picture is garbled. Not sure what's wrong. Does anyone with experience in cuda nppi libraries or image processing in general have an idea what's going on?
CMake file included below for convenience to anyone who wants to compile it. You have to have opencv and cuda toolkit installed as C++ libs:
cmake_minimum_required(VERSION 3.18)
project(test_nppi)
enable_language(CUDA)
set(CMAKE_CXX_STANDARD 17)
find_package(CUDAToolkit REQUIRED)
find_package(OpenCV)
message(STATUS ${CUDAToolkit_INCLUDE_DIRS})
add_executable(test_nppi main.cu)
target_link_libraries(test_nppi ${OpenCV_LIBS} CUDA::nppig)
target_include_directories(test_nppi PUBLIC ${OpenCV_INCLUDE_DIRS} ${CUDAToolkit_INCLUDE_DIRS})
set_target_properties(test_nppi PROPERTIES
CUDA_SEPARABLE_COMPILATION ON)
I've used the nppi resize function for single channel pictures before and I don't have this issue. The 3 channel nppi resize function is getting weird output and I'm thinking I'm not completely understanding the input parameters. The Step is multiplied by 3 because of 3 color channels, but all other sizes just are measuring the dimensions by pixels; and the sizes of src and destination are the same... not sure what I'm not understanding here.