How to turn OpenCV_GPUMat into CUdeviceptr?

Question

I was modiying the NVTranscoder project from the Video_Codec_SDK_8.0.14 in order to adding some signal processing works into the video frames. However, I encounter some problems when I turn the GPUMat into CUdeviceptr. I was wondering how can I turn the GPUMat into CUdeviceptr. After I performed the blurring function where I have emphasized as below, I want to turn the processed mat into a CUdeviceptr.

Besides, the part converting the CUdeviceptr into GPUmat is also wrong, as it shows the gpuInput cannot read memory.

Besides, can anyone point out some problems of my implementation? The code is as follows:

#include <time.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <pthread.h>
#endif

#include <stdio.h>
#include <string.h>
#include "dynlink_cuda.h"    // <cuda.h>

#include "VideoDecoder.h"
#include "VideoEncoder.h"
#include "../common/inc/nvUtils.h"

#include <opencv2/opencv.hpp>
#include "opencv2/gpu/gpu.hpp"   



using namespace cv;

#ifdef _WIN32
DWORD WINAPI DecodeProc(LPVOID lpParameter)
{
    CudaDecoder* pDecoder = (CudaDecoder*)lpParameter;
    pDecoder->Start();

    return 0;
}

#else
void* DecodeProc(void *arg)
{
    CudaDecoder* pDecoder = (CudaDecoder*)arg;
    pDecoder->Start();

    return NULL;
}

#endif

int MatchFPS(const float fpsRatio, int decodedFrames, int encodedFrames)
{
    if (fpsRatio < 1.f) {
        // need to drop frame
        if (decodedFrames * fpsRatio < (encodedFrames + 1)) {
            return -1;
        }
    }
    else if (fpsRatio > 1.f) {
        // need to duplicate frame
        int duplicate = 0;
        while (decodedFrames*fpsRatio > encodedFrames + duplicate + 1) {
            duplicate++;
        }

        return duplicate;
    }

    return 0;
}

void PrintHelp()
{
    printf("Usage : NvTranscoder \n"
        "-i <string>                  Specify input .h264 file\n"
        "-o <string>                  Specify output bitstream file\n"
        "\n### Optional parameters ###\n"
        "-size <int int>              Specify output resolution <width height>\n"
        "-codec <integer>             Specify the codec \n"
        "                                 0: H264\n"
        "                                 1: HEVC\n"
        "-preset <string>             Specify the preset for encoder settings\n"
        "                                 hq : nvenc HQ \n"
        "                                 hp : nvenc HP \n"
        "                                 lowLatencyHP : nvenc low latency HP \n"
        "                                 lowLatencyHQ : nvenc low latency HQ \n"
        "                                 lossless : nvenc Lossless HP \n"
        "-fps <integer>               Specify encoding frame rate\n"
        "-goplength <integer>         Specify gop length\n"
        "-numB <integer>              Specify number of B frames\n"
        "-bitrate <integer>           Specify the encoding average bitrate\n"
        "-vbvMaxBitrate <integer>     Specify the vbv max bitrate\n"
        "-vbvSize <integer>           Specify the encoding vbv/hrd buffer size\n"
        "-rcmode <integer>            Specify the rate control mode\n"
        "                                 0:  Constant QP mode\n"
        "                                 1:  Variable bitrate mode\n"
        "                                 2:  Constant bitrate mode\n"
        "                                 8:  low-delay CBR, high quality\n"
        "                                 16: CBR, high quality (slower)\n"
        "                                 32: VBR, high quality (slower)\n"
        "-qp <integer>                Specify qp for Constant QP mode\n"
        "-i_qfactor <float>           Specify qscale difference between I-frames and P-frames\n"
        "-b_qfactor <float>           Specify qscale difference between P-frames and B-frames\n" 
        "-i_qoffset <float>           Specify qscale offset between I-frames and P-frames\n"
        "-b_qoffset <float>           Specify qscale offset between P-frames and B-frames\n" 
        "-deviceID <integer>          Specify the GPU device on which encoding will take place\n"
        "-help                        Prints Help Information\n\n"
        );
}

int main(int argc, char* argv[])
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
    typedef HMODULE CUDADRIVER;
#else
    typedef void *CUDADRIVER;
#endif
    CUDADRIVER hHandleDriver = 0;

    __cu(cuInit(0, __CUDA_API_VERSION, hHandleDriver));
    __cu(cuvidInit(0));

    EncodeConfig encodeConfig = { 0 };
    encodeConfig.endFrameIdx = INT_MAX;
    encodeConfig.bitrate = 5000000;
    encodeConfig.rcMode = NV_ENC_PARAMS_RC_CONSTQP;
    encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH;
    encodeConfig.codec = NV_ENC_H264;
    encodeConfig.fps = 0;
    encodeConfig.qp = 28;
    encodeConfig.i_quant_factor = DEFAULT_I_QFACTOR;
    encodeConfig.b_quant_factor = DEFAULT_B_QFACTOR;  
    encodeConfig.i_quant_offset = DEFAULT_I_QOFFSET;
    encodeConfig.b_quant_offset = DEFAULT_B_QOFFSET;   
    encodeConfig.presetGUID = NV_ENC_PRESET_DEFAULT_GUID;
    encodeConfig.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;

    NVENCSTATUS nvStatus = CNvHWEncoder::ParseArguments(&encodeConfig, argc, argv);
    if (nvStatus != NV_ENC_SUCCESS)
    {
        PrintHelp();
        return 1;
    }

    if (!encodeConfig.inputFileName || !encodeConfig.outputFileName)
    {
        PrintHelp();
        return 1;
    }

    encodeConfig.fOutput = fopen(encodeConfig.outputFileName, "wb");
    if (encodeConfig.fOutput == NULL)
    {
        PRINTERR("Failed to create \"%s\"\n", encodeConfig.outputFileName);
        return 1;
    }

    //init cuda
    CUcontext cudaCtx;
    CUdevice device;
    __cu(cuDeviceGet(&device, encodeConfig.deviceID));
    __cu(cuCtxCreate(&cudaCtx, CU_CTX_SCHED_AUTO, device));

    CUcontext curCtx;
    CUvideoctxlock ctxLock;
    __cu(cuCtxPopCurrent(&curCtx));
    __cu(cuvidCtxLockCreate(&ctxLock, curCtx));

    CudaDecoder* pDecoder   = new CudaDecoder;
    FrameQueue* pFrameQueue = new CUVIDFrameQueue(ctxLock);
    pDecoder->InitVideoDecoder(encodeConfig.inputFileName, ctxLock, pFrameQueue, encodeConfig.width, encodeConfig.height);

    int decodedW, decodedH, decodedFRN, decodedFRD, isProgressive;
    pDecoder->GetCodecParam(&decodedW, &decodedH, &decodedFRN, &decodedFRD, &isProgressive);
    if (decodedFRN <= 0 || decodedFRD <= 0) {
        decodedFRN = 30;
        decodedFRD = 1;
    }

    if(encodeConfig.width <= 0 || encodeConfig.height <= 0) {
        encodeConfig.width  = decodedW;
        encodeConfig.height = decodedH;
    }

    float fpsRatio = 1.f;
    if (encodeConfig.fps <= 0) {
        encodeConfig.fps = decodedFRN / decodedFRD;
    }
    else {
        fpsRatio = (float)encodeConfig.fps * decodedFRD / decodedFRN;
    }

    encodeConfig.pictureStruct = (isProgressive ? NV_ENC_PIC_STRUCT_FRAME : 0);
    pFrameQueue->init(encodeConfig.width, encodeConfig.height);

    VideoEncoder* pEncoder = new VideoEncoder(ctxLock);
    assert(pEncoder->GetHWEncoder());

    nvStatus = pEncoder->GetHWEncoder()->Initialize(cudaCtx, NV_ENC_DEVICE_TYPE_CUDA);
    if (nvStatus != NV_ENC_SUCCESS)
        return 1;

    encodeConfig.presetGUID = pEncoder->GetHWEncoder()->GetPresetGUID(encodeConfig.encoderPreset, encodeConfig.codec);

    printf("Encoding input           : \"%s\"\n", encodeConfig.inputFileName);
    printf("         output          : \"%s\"\n", encodeConfig.outputFileName);
    printf("         codec           : \"%s\"\n", encodeConfig.codec == NV_ENC_HEVC ? "HEVC" : "H264");
    printf("         size            : %dx%d\n", encodeConfig.width, encodeConfig.height);
    printf("         bitrate         : %d bits/sec\n", encodeConfig.bitrate);
    printf("         vbvMaxBitrate   : %d bits/sec\n", encodeConfig.vbvMaxBitrate);
    printf("         vbvSize         : %d bits\n", encodeConfig.vbvSize);
    printf("         fps             : %d frames/sec\n", encodeConfig.fps);
    printf("         rcMode          : %s\n", encodeConfig.rcMode == NV_ENC_PARAMS_RC_CONSTQP ? "CONSTQP" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR ? "VBR" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR ? "CBR" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_MINQP ? "VBR MINQP (deprecated)" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ ? "CBR_LOWDELAY_HQ" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_CBR_HQ ? "CBR_HQ" :
                                              encodeConfig.rcMode == NV_ENC_PARAMS_RC_VBR_HQ ? "VBR_HQ" : "UNKNOWN");
    if (encodeConfig.gopLength == NVENC_INFINITE_GOPLENGTH)
        printf("         goplength       : INFINITE GOP \n");
    else
        printf("         goplength       : %d \n", encodeConfig.gopLength);
    printf("         B frames        : %d \n", encodeConfig.numB);
    printf("         QP              : %d \n", encodeConfig.qp);
    printf("         preset          : %s\n", (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HQ_GUID) ? "LOW_LATENCY_HQ" :
        (encodeConfig.presetGUID == NV_ENC_PRESET_LOW_LATENCY_HP_GUID) ? "LOW_LATENCY_HP" :
        (encodeConfig.presetGUID == NV_ENC_PRESET_HQ_GUID) ? "HQ_PRESET" :
        (encodeConfig.presetGUID == NV_ENC_PRESET_HP_GUID) ? "HP_PRESET" :
        (encodeConfig.presetGUID == NV_ENC_PRESET_LOSSLESS_HP_GUID) ? "LOSSLESS_HP" : "LOW_LATENCY_DEFAULT");
    printf("\n");

    nvStatus = pEncoder->GetHWEncoder()->CreateEncoder(&encodeConfig);
    if (nvStatus != NV_ENC_SUCCESS)
        return 1;

    nvStatus = pEncoder->AllocateIOBuffers(&encodeConfig);
    if (nvStatus != NV_ENC_SUCCESS)
        return 1;

    unsigned long long lStart, lEnd, lFreq;
    NvQueryPerformanceCounter(&lStart);

    //start decoding thread
#ifdef _WIN32
    HANDLE decodeThread = CreateThread(NULL, 0, DecodeProc, (LPVOID)pDecoder, 0, NULL);
#else
    pthread_t pid;
    pthread_create(&pid, NULL, DecodeProc, (void*)pDecoder);
#endif

    //start encoding thread
    int frmProcessed = 0;
    int frmActual = 0;
    while(!(pFrameQueue->isEndOfDecode() && pFrameQueue->isEmpty()) ) {

        CUVIDPARSERDISPINFO pInfo;
        if(pFrameQueue->dequeue(&pInfo)) {
            CUdeviceptr dMappedFrame = 0;
            unsigned int pitch;
            CUVIDPROCPARAMS oVPP = { 0 };
            oVPP.progressive_frame = pInfo.progressive_frame;
            oVPP.second_field = 0;
            oVPP.top_field_first = pInfo.top_field_first;
            oVPP.unpaired_field = (pInfo.progressive_frame == 1 || pInfo.repeat_first_field <= 1);

            cuvidMapVideoFrame(pDecoder->GetDecoder(), pInfo.picture_index, &dMappedFrame, &pitch, &oVPP);

            vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
            gpu::GpuMat gpuInput = gpu::GpuMat(decodedH, decodedW, CV_8UC3, (void*)dMappedFrame, pitch);
            gpu::GpuMat d_dst;
            gpu::GpuMat d_buf;
            gpu::GaussianBlur(gpuInput, d_dst, cv::Size(3, 3), 0);
            cv::Mat result;
            d_dst.download(result);
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

            EncodeFrameConfig stEncodeConfig = { 0 };
            NV_ENC_PIC_STRUCT picType = (pInfo.progressive_frame || pInfo.repeat_first_field >= 2 ? NV_ENC_PIC_STRUCT_FRAME :
                (pInfo.top_field_first ? NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM : NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP));
            vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
            stEncodeConfig.dptr = result.data;//dMappedFrame;
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            stEncodeConfig.pitch = pitch;
            stEncodeConfig.width = encodeConfig.width;
            stEncodeConfig.height = encodeConfig.height;

            int dropOrDuplicate = MatchFPS(fpsRatio, frmProcessed, frmActual);
            for (int i = 0; i <= dropOrDuplicate; i++) {
                pEncoder->EncodeFrame(&stEncodeConfig, picType);
                frmActual++;
            }
            frmProcessed++;

            vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv  
            cuvidUnmapVideoFrame(pDecoder->GetDecoder(), dMappedFrame);
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
            pFrameQueue->releaseFrame(&pInfo);
       }
    }

    pEncoder->EncodeFrame(NULL, NV_ENC_PIC_STRUCT_FRAME, true);

#ifdef _WIN32
    WaitForSingleObject(decodeThread, INFINITE);
#else
    pthread_join(pid, NULL);
#endif

    if (pEncoder->GetEncodedFrames() > 0)
    {
        NvQueryPerformanceCounter(&lEnd);
        NvQueryPerformanceFrequency(&lFreq);
        double elapsedTime = (double)(lEnd - lStart)/(double)lFreq;
        printf("Total time: %fms, Decoded Frames: %d, Encoded Frames: %d, Average FPS: %f\n",
        elapsedTime * 1000,
        pDecoder->m_decodedFrames,
        pEncoder->GetEncodedFrames(),
        (float)pEncoder->GetEncodedFrames() / elapsedTime);
    }

    pEncoder->Deinitialize();
    delete pDecoder;
    delete pEncoder;
    delete pFrameQueue;

    cuvidCtxLockDestroy(ctxLock);
    __cu(cuCtxDestroy(cudaCtx));

    return 0;
}

I run the argument "-i C:\test\input.h264 -o C:\test\output.h264 -size 352 288"

The decoded frame is in NV12 format.

score 1 · Answer 1 · edited Oct 10 '17 at 10:07

1

What is your exactly the cuda error code returned ? can you check for cuda Errors and post it : this post will help you

Is the member dptr waiting for a device pointer and you are giving a pointer on data stored on the host (cv::Mat instead of cv::cuda::GpuMat)?

can you try this

stEncodeConfig.dptr = dst.data;

edited Oct 10 '17 at 10:07

talonmies

70,661
34
192
269

answered Oct 10 '17 at 09:38

X3liF

1,054
6
10

The gpuInput shows its data = 0x0000000c01f80000 , so I cannot run the code further. – md612 Oct 10 '17 at 09:42
Is it your original error or the error with the code i asked you to try ? – X3liF Oct 10 '17 at 09:58

score 1 · Answer 2 · answered Oct 17 '17 at 14:49

1

As I pointed out in the comments of your linked post, the decoded frame is in raw format (i.e YUV etc). You're providing CV_8UC3 image format which is incorrect.

To test whether GpuMat is being created from CUdeviceptr:

cv::cuda::GpuMat dimg(cv::Size(decodedW, decodedH),CV_8UC1, (void*)(dMappedFrame),pitch);
cv::Mat img;
dimg.download(img);
cv::imshow("Decoded Frame", img);
cv::waitKey(1);

For further details, see this link.

answered Oct 17 '17 at 14:49

zindarod

6,328
3
30
58

You're right. And I would like to know how can I do the following process: 1. stEncodeConfig.dptr = (CUdeviceptr)img.data; //dMappedFrame; 2. cuvidUnmapVideoFrame(pDecoder->GetDecoder(), (CUdeviceptr)img.data); What's wrong about my assignment? – md612 Oct 18 '17 at 01:03
Besides, the GPUmat only has the Y component, how can I remain the other UV components for processing? – md612 Oct 18 '17 at 04:13
`CUdeviceptr` is device memory, `img.data` is host memory; you cannot do `stEncodeConfig.dptr=(CUdeviceptr)img.data`. You've to explicitly copy `img.data` from host to device using CUDA, or you could do `stEncodeConfig.dptr=(CUdeviceptr)dimg.data`, because `dimg.data` is device(GPU) memory. – zindarod Oct 18 '17 at 16:55
I would like to know how can I obtain the whole YUV channels of video, as it just use 8UC1 for the Y component now. It is not my purpose to do video processing for only one channel. – md612 Oct 19 '17 at 02:25

How to turn OpenCV_GPUMat into CUdeviceptr?

2 Answers2

Linked