2

I am working on a program to capture real-time video frames from a specific application i.e. Microsoft Word.

Currently, I am capturing the entire desktop of the windows machine, and If shuffle from one application to another application, it is capturing everything on the screen.

enter image description here

Using this article: The Sink writer to Encode Video, I have managed to implement this simple program in c++ such as;

// Required Components
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mfuuid")
#pragma comment(lib, "d3d9.lib")

#include <iostream>
#include <Windows.h>

#include <mfapi.h>
#include <mfidl.h>
#include <Mfreadwrite.h>
#include <mferror.h>
#include <d3d9.h>


template<class T>
void SafeRelease(T **ppT) {

    if (*ppT) {
        (*ppT)->Release();
        *ppT = NULL;
    }
}

#define REVERSE_IMAGE

// Format constants
const UINT32 VIDEO_FPS = 30;
const UINT64 VIDEO_FRAME_DURATION = 10 * 1000 * 1000 / VIDEO_FPS;
const UINT32 VIDEO_BIT_RATE = 800000000;
const GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_H264;
const GUID VIDEO_INPUT_FORMAT = MFVideoFormat_RGB32;
const UINT32 VIDEO_FRAME_COUNT = 10 * VIDEO_FPS;

HRESULT
InitializeDirect3D9(IDirect3DDevice9 **ppDevice, IDirect3DSurface9 **ppSurface, UINT32 &uiWidth, UINT32 &uiHeight) {

    IDirect3D9 *d3d = NULL;

    d3d = Direct3DCreate9(D3D_SDK_VERSION);

    if (d3d == NULL)
        return E_POINTER;

    D3DDISPLAYMODE mode;
    HRESULT hr = d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &mode);

    if (FAILED(hr)) {
        SafeRelease(&d3d);
        return hr;
    }

    D3DPRESENT_PARAMETERS parameters = {0};

    parameters.Windowed = TRUE;
    parameters.BackBufferCount = 1;
    uiHeight = parameters.BackBufferHeight = mode.Height;
    uiWidth = parameters.BackBufferWidth = mode.Width;
    parameters.SwapEffect = D3DSWAPEFFECT_DISCARD;
    parameters.hDeviceWindow = NULL;

    hr = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, NULL, D3DCREATE_SOFTWARE_VERTEXPROCESSING, &parameters,
                           ppDevice);

    if (FAILED(hr)) {
        SafeRelease(&d3d);
        return hr;
    }

    hr = (*ppDevice)->CreateOffscreenPlainSurface(mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM,
                                                  ppSurface, nullptr);

    SafeRelease(&d3d);

    return hr;
}

HRESULT
InitializeSinkWriter(IMFSinkWriter **ppWriter, DWORD *pStreamIndex, const UINT32 uiWidth, const UINT32 uiHeight) {

    *ppWriter = NULL;
    *pStreamIndex = NULL;

    IMFSinkWriter *pSinkWriter = NULL;
    IMFMediaType *pMediaTypeOut = NULL;
    IMFMediaType *pMediaTypeIn = NULL;
    DWORD streamIndex;

    HRESULT hr = MFCreateSinkWriterFromURL(L"output.mp4", NULL, NULL, &pSinkWriter);

    // Set the output media type.
    if (SUCCEEDED(hr)) {
        hr = MFCreateMediaType(&pMediaTypeOut);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeOut->SetGUID(MF_MT_SUBTYPE, VIDEO_ENCODING_FORMAT);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeOut->SetUINT32(MF_MT_AVG_BITRATE, VIDEO_BIT_RATE);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeOut->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeSize(pMediaTypeOut, MF_MT_FRAME_SIZE, uiWidth, uiHeight);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeRatio(pMediaTypeOut, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
    }
    if (SUCCEEDED(hr)) {
        hr = pSinkWriter->AddStream(pMediaTypeOut, &streamIndex);
    }

    // Set the input media type.
    if (SUCCEEDED(hr)) {
        hr = MFCreateMediaType(&pMediaTypeIn);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeIn->SetGUID(MF_MT_SUBTYPE, VIDEO_INPUT_FORMAT);
    }
    if (SUCCEEDED(hr)) {
        hr = pMediaTypeIn->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeSize(pMediaTypeIn, MF_MT_FRAME_SIZE, uiWidth, uiHeight);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_FRAME_RATE, VIDEO_FPS, 1);
    }
    if (SUCCEEDED(hr)) {
        hr = MFSetAttributeRatio(pMediaTypeIn, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
    }
    if (SUCCEEDED(hr)) {
        hr = pSinkWriter->SetInputMediaType(streamIndex, pMediaTypeIn, NULL);
    }

    // Tell the sink writer to start accepting data.
    if (SUCCEEDED(hr)) {
        hr = pSinkWriter->BeginWriting();
    }

    // Return the pointer to the caller.
    if (SUCCEEDED(hr)) {

        *ppWriter = pSinkWriter;
        (*ppWriter)->AddRef();
        *pStreamIndex = streamIndex;
    }

    SafeRelease(&pSinkWriter);
    SafeRelease(&pMediaTypeOut);
    SafeRelease(&pMediaTypeIn);
    return hr;
}

HRESULT WriteFrame(IDirect3DDevice9 *pDevice, IDirect3DSurface9 *pSurface, IMFSinkWriter *pWriter, DWORD streamIndex,
                   const LONGLONG &rtStart, const UINT32 uiWidth, const UINT32 uiHeight) {

    HRESULT hr = pDevice->GetFrontBufferData(0, pSurface);

    if (FAILED(hr)) {
        return hr;
    }

    D3DLOCKED_RECT rc;
    hr = pSurface->LockRect(&rc, NULL, 0);

    if (FAILED(hr)) {
        return hr;
    }

    IMFSample *pSample = NULL;
    IMFMediaBuffer *pBuffer = NULL;

    const LONG cbWidth = 4 * uiWidth;
    const DWORD cbBuffer = cbWidth * uiHeight;

    BYTE *pData = NULL;

    // Create a new memory buffer.
    hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);

    // Lock the buffer and copy the video frame to the buffer.
    if (SUCCEEDED(hr)) {
        hr = pBuffer->Lock(&pData, NULL, NULL);
    }

    if (SUCCEEDED(hr)) {

#ifdef REVERSE_IMAGE
        for (int i = 0, j = uiHeight - 1; i < uiHeight; i++, j--)
            for (int k = 0; k < cbWidth; k++)
                pData[(i * cbWidth) + k] = ((BYTE *) rc.pBits)[(j * cbWidth) + k];
#else
        hr = MFCopyImage(pData, cbWidth, (BYTE*)rc.pBits, rc.Pitch, cbWidth, uiHeight);
#endif
    }

    if (pBuffer) {
        pBuffer->Unlock();
    }

    // Set the data length of the buffer.
    if (SUCCEEDED(hr)) {
        hr = pBuffer->SetCurrentLength(cbBuffer);
    }

    // Create a media sample and add the buffer to the sample.
    if (SUCCEEDED(hr)) {
        hr = MFCreateSample(&pSample);
    }

    if (SUCCEEDED(hr)) {
        hr = pSample->AddBuffer(pBuffer);
    }

    // Set the time stamp and the duration.
    if (SUCCEEDED(hr)) {
        hr = pSample->SetSampleTime(rtStart);
    }

    if (SUCCEEDED(hr)) {
        hr = pSample->SetSampleDuration(VIDEO_FRAME_DURATION);
    }

    // Send the sample to the Sink Writer.
    if (SUCCEEDED(hr)) {
        hr = pWriter->WriteSample(streamIndex, pSample);
    }

    hr = pSurface->UnlockRect();

    SafeRelease(&pSample);
    SafeRelease(&pBuffer);
    return hr;
}


void start_engine(){

    HRESULT hr = CoInitializeEx(NULL, COINIT_APARTMENTTHREADED);

    if (SUCCEEDED(hr)) {

        hr = MFStartup(MF_VERSION);

        if (SUCCEEDED(hr)) {

            //4k = 3840 x 2160
            UINT32 uiWidth = 3840;
            UINT32 uiHeight = 2160;

            IDirect3DDevice9 *pDevice = NULL;
            IDirect3DSurface9 *pSurface = NULL;

            hr = InitializeDirect3D9(&pDevice, &pSurface, uiWidth, uiHeight);

            if (SUCCEEDED(hr)) {

                IMFSinkWriter *pSinkWriter = NULL;
                DWORD stream;

                hr = InitializeSinkWriter(&pSinkWriter, &stream, uiWidth, uiHeight);

                if (SUCCEEDED(hr)) {

                    LONGLONG rtStart = 0;

                    for (DWORD i = 0; i < VIDEO_FRAME_COUNT; ++i) {

                        hr = WriteFrame(pDevice, pSurface, pSinkWriter, stream, rtStart, uiWidth, uiHeight);

                        if (FAILED(hr)) {
                            break;
                        }

                        rtStart += VIDEO_FRAME_DURATION;
                    }
                }

                if (SUCCEEDED(hr)) {
                    hr = pSinkWriter->Finalize();
                }

                SafeRelease(&pSinkWriter);
            }

            SafeRelease(&pDevice);
            SafeRelease(&pSurface);
            MFShutdown();
        }

        CoUninitialize();
    }


}

int main() {

    std::cout << "Screen Capturing has started..." << std::endl;
    start_engine();
    std::cout << "Check: 'output.mp4' in the same directory." << std::endl;


    return 0;
}

This program is working fine, I have tested it on different FPS i.e. (30, 60), bitrate, and by changing the video encoder. What I truly want is to capture the content of a specific window (regardless of its Position/Size) i.e. Microsoft Word.

I have tried the answers in the following article;

However, I am unable to capture frames from a specific Window. Video Frames are quite important for me as I want to write those frames to an MP4 file to observe and improve the video quality by using this program.

I am working on WinAPI in parallel to check if that might help me to get the required functionality in order to obtain video frames.

Is it possible to get video frames from the screen capturing a specific window i.e. Microsoft Word (Definitely When Microsft Word is opened.)?

If someone can help me to modify this current implementation that would be a great help, Indeed. I need some programmatic examples or advice. Thanks

Muhammad Usman Bashir
  • 1,441
  • 2
  • 14
  • 43
  • OBS is open source and can do want you want. Perhaps you could look at it's source code - https://obsproject.com/ – Richard Critten Oct 30 '21 at 17:23
  • I have worked with OBS and I am able to get the required screen captured using OBS `UDP sockets` and obtained in my program. I have also worked with `Nvidia CloudXR` for capturing frames. Actually, I am doing this from scratch. My current program is running perfectly, I just want to capture the screen of any specific Application using this program. – Muhammad Usman Bashir Nov 01 '21 at 09:36

0 Answers0