1

I try to save several image data(in memory, with BGR format) to a output.mp4 file, here is the C++ code to call the ffmpeg library, the code builds correctly, but will crash when I call the ret = avformat_write_header(outFormatCtx, nullptr);, do you know how to solve this crash issue?

Thanks.

#include <iostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavutil/imgutils.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
}

using namespace std;
using namespace cv;

int main()
{
    // Set up input frames as BGR byte arrays
    vector<Mat> frames;

    int width = 640;
    int height = 480;
    int num_frames = 100;
    Scalar black(0, 0, 0);
    Scalar white(255, 255, 255);
    int font = FONT_HERSHEY_SIMPLEX;
    double font_scale = 1.0;
    int thickness = 2;

    for (int i = 0; i < num_frames; i++) {
        Mat frame = Mat::zeros(height, width, CV_8UC3);
        putText(frame, std::to_string(i), Point(width / 2 - 50, height / 2), font, font_scale, white, thickness);
        frames.push_back(frame);
    }


    // Populate frames with BGR byte arrays

    // Initialize FFmpeg
    //av_register_all();

    // Set up output file
    AVFormatContext* outFormatCtx = nullptr;
    //AVCodec* outCodec = nullptr;
    AVCodecContext* outCodecCtx = nullptr;
    //AVStream* outStream = nullptr;
    AVPacket outPacket;

    const char* outFile = "output.mp4";
    int outWidth = frames[0].cols;
    int outHeight = frames[0].rows;
    int fps = 30;

    // Open output file
    avformat_alloc_output_context2(&outFormatCtx, nullptr, nullptr, outFile);
    if (!outFormatCtx) {
        cerr << "Error: Could not allocate output format context" << endl;
        return -1;
    }

    // Set up output codec
    const AVCodec* outCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
    if (!outCodec) {
        cerr << "Error: Could not find H.264 codec" << endl;
        return -1;
    }

    outCodecCtx = avcodec_alloc_context3(outCodec);
    if (!outCodecCtx) {
        cerr << "Error: Could not allocate output codec context" << endl;
        return -1;
    }
    outCodecCtx->codec_id = AV_CODEC_ID_H264;
    outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
    outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
    outCodecCtx->width = outWidth;
    outCodecCtx->height = outHeight;
    outCodecCtx->time_base = { 1, fps };

    // Open output codec
    if (avcodec_open2(outCodecCtx, outCodec, nullptr) < 0) {
        cerr << "Error: Could not open output codec" << endl;
        return -1;
    }

    // Create output stream
    AVStream* outStream = avformat_new_stream(outFormatCtx, outCodec);
    if (!outStream) {
        cerr << "Error: Could not allocate output stream" << endl;
        return -1;
    }

    // Configure output stream parameters (e.g., time base, codec parameters, etc.)
    // ...

    // Connect output stream to format context
    outStream->codecpar->codec_id = outCodecCtx->codec_id;
    outStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
    outStream->codecpar->width = outCodecCtx->width;
    outStream->codecpar->height = outCodecCtx->height;
    outStream->codecpar->format = outCodecCtx->pix_fmt;
    outStream->time_base = outCodecCtx->time_base;

    int ret = avcodec_parameters_from_context(outStream->codecpar, outCodecCtx);
    if (ret < 0) {
        cerr << "Error: Could not copy codec parameters to output stream" << endl;
        return -1;
    }

    outStream->avg_frame_rate = outCodecCtx->framerate;
    outStream->id = outFormatCtx->nb_streams++;


    ret = avformat_write_header(outFormatCtx, nullptr);
    if (ret < 0) {
        cerr << "Error: Could not write output header" << endl;
        return -1;
    }

    // Convert frames to YUV format and write to output file
    for (const auto& frame : frames) {
        AVFrame* yuvFrame = av_frame_alloc();
        if (!yuvFrame) {
            cerr << "Error: Could not allocate YUV frame" << endl;
            return -1;
        }
        av_image_alloc(yuvFrame->data, yuvFrame->linesize, outWidth, outHeight, AV_PIX_FMT_YUV420P, 32);

        // Convert BGR frame to YUV format
        Mat yuvMat;
        cvtColor(frame, yuvMat, COLOR_BGR2YUV_I420);
        memcpy(yuvFrame->data[0], yuvMat.data, outWidth * outHeight);
        memcpy(yuvFrame->data[1], yuvMat.data + outWidth * outHeight, outWidth * outHeight / 4);
        memcpy(yuvFrame->data[2], yuvMat.data + outWidth * outHeight * 5 / 4, outWidth * outHeight / 4);

        // Set up output packet
        av_init_packet(&outPacket);
        outPacket.data = nullptr;
        outPacket.size = 0;

        // Encode frame and write to output file
        int ret = avcodec_send_frame(outCodecCtx, yuvFrame);
        if (ret < 0) {
            cerr << "Error: Could not send frame to output codec" << endl;
            return -1;
        }
        while (ret >= 0) {
            ret = avcodec_receive_packet(outCodecCtx, &outPacket);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                break;
            } else if (ret < 0) {
                cerr << "Error: Could not receive packet from output codec" << endl;
                return -1;
            }

            av_packet_rescale_ts(&outPacket, outCodecCtx->time_base, outStream->time_base);
            outPacket.stream_index = outStream->index;

            ret = av_interleaved_write_frame(outFormatCtx, &outPacket);
            if (ret < 0) {
                cerr << "Error: Could not write packet to output file" << endl;
                return -1;
            }
        }

        av_frame_free(&yuvFrame);
    }

    // Write output trailer
    av_write_trailer(outFormatCtx);

    // Clean up
    avcodec_close(outCodecCtx);
    avcodec_free_context(&outCodecCtx);
    avformat_free_context(outFormatCtx);

    return 0;
}

In-fact, I try to solve my original question here: What is the best way to save an image sequence with different time intervals in a simgle file in C++, but in that discussion, it is difficult to write a c++ code for ffmpeg library.

ollydbg23
  • 1,124
  • 1
  • 12
  • 38

1 Answers1

1

The main reason from the crash is the following statement:

outStream->id = outFormatCtx->nb_streams++  

We should not increase nb_streams, and should not modify the id.
Remove this line from the code!


Other issues related to avformat_write_header:

  • We have to open the output file for writing:

     if (avio_open(&outFormatCtx->pb, outFile, AVIO_FLAG_WRITE) < 0) {
         cerr << "Error opening output file" << std::endl;
         return -1;
     }
    
  • Add AV_CODEC_FLAG_GLOBAL_HEADER flag as described in ffmpeg-libav-tutorial:

     //We set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it can use the global headers.
     if (outFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
     {
         outCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; //
     }
    

There are multiple other issues that I tried to fix.

Updated code sample:

#include <iostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <sstream>
#include <stdexcept>
#include <opencv2/opencv.hpp>
extern "C" {
#include <libavutil/imgutils.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
}

using namespace std;
using namespace cv;

int main()
{
    // Set up input frames as BGR byte arrays
    vector<Mat> frames;

    int width = 640;
    int height = 480;
    int num_frames = 100;
    Scalar black(0, 0, 0);
    Scalar white(255, 255, 255);
    int font = FONT_HERSHEY_SIMPLEX;
    double font_scale = 1.0;
    int thickness = 2;

    for (int i = 0; i < num_frames; i++) {
        Mat frame = Mat::zeros(height, width, CV_8UC3);
        putText(frame, std::to_string(i), Point(width / 2 - 50, height / 2), font, font_scale, white, thickness);
        frames.push_back(frame);
    }


    // Populate frames with BGR byte arrays

    // Initialize FFmpeg
    //av_register_all();

    // Set up output file
    AVFormatContext* outFormatCtx = nullptr;
    //AVCodec* outCodec = nullptr;
    AVCodecContext* outCodecCtx = nullptr;
    //AVStream* outStream = nullptr;
    //AVPacket outPacket;

    const char* outFile = "output.mp4";
    int outWidth = frames[0].cols;
    int outHeight = frames[0].rows;
    int fps = 30;

    // Open the output file context
    avformat_alloc_output_context2(&outFormatCtx, nullptr, nullptr, outFile);
    if (!outFormatCtx) {
        cerr << "Error: Could not allocate output format context" << endl;
        return -1;
    }

    // Open the output file
    if (avio_open(&outFormatCtx->pb, outFile, AVIO_FLAG_WRITE) < 0) {
        cerr << "Error opening output file" << std::endl;
        return -1;
    }

    // Set up output codec
    const AVCodec* outCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
    if (!outCodec) {
        cerr << "Error: Could not find H.264 codec" << endl;
        return -1;
    }

    outCodecCtx = avcodec_alloc_context3(outCodec);
    if (!outCodecCtx) {
        cerr << "Error: Could not allocate output codec context" << endl;
        return -1;
    }
    outCodecCtx->codec_id = AV_CODEC_ID_H264;
    outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
    outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
    outCodecCtx->width = outWidth;
    outCodecCtx->height = outHeight;
    outCodecCtx->time_base = { 1, fps };
    outCodecCtx->framerate = {fps, 1};
    outCodecCtx->bit_rate = 4000000;

    //https://github.com/leandromoreira/ffmpeg-libav-tutorial
    //We set the flag AV_CODEC_FLAG_GLOBAL_HEADER which tells the encoder that it can use the global headers.
    if (outFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
    {
        outCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; //
    }

    // Open output codec
    if (avcodec_open2(outCodecCtx, outCodec, nullptr) < 0) {
        cerr << "Error: Could not open output codec" << endl;
        return -1;
    }

    // Create output stream
    AVStream* outStream = avformat_new_stream(outFormatCtx, outCodec);
    if (!outStream) {
        cerr << "Error: Could not allocate output stream" << endl;
        return -1;
    }

    // Configure output stream parameters (e.g., time base, codec parameters, etc.)
    // ...

    // Connect output stream to format context
    outStream->codecpar->codec_id = outCodecCtx->codec_id;
    outStream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
    outStream->codecpar->width = outCodecCtx->width;
    outStream->codecpar->height = outCodecCtx->height;
    outStream->codecpar->format = outCodecCtx->pix_fmt;
    outStream->time_base = outCodecCtx->time_base;

    int ret = avcodec_parameters_from_context(outStream->codecpar, outCodecCtx);
    if (ret < 0) {
        cerr << "Error: Could not copy codec parameters to output stream" << endl;
        return -1;
    }

    outStream->avg_frame_rate = outCodecCtx->framerate;
    //outStream->id = outFormatCtx->nb_streams++;  <--- We shouldn't modify outStream->id

    ret = avformat_write_header(outFormatCtx, nullptr);
    if (ret < 0) {
        cerr << "Error: Could not write output header" << endl;
        return -1;
    }

    // Convert frames to YUV format and write to output file
    int frame_count = -1;
    for (const auto& frame : frames) {
        frame_count++;
        AVFrame* yuvFrame = av_frame_alloc();
        if (!yuvFrame) {
            cerr << "Error: Could not allocate YUV frame" << endl;
            return -1;
        }
        av_image_alloc(yuvFrame->data, yuvFrame->linesize, outWidth, outHeight, AV_PIX_FMT_YUV420P, 32);

        yuvFrame->width = outWidth;
        yuvFrame->height = outHeight;
        yuvFrame->format = AV_PIX_FMT_YUV420P;

        // Convert BGR frame to YUV format
        Mat yuvMat;
        cvtColor(frame, yuvMat, COLOR_BGR2YUV_I420);
        memcpy(yuvFrame->data[0], yuvMat.data, outWidth * outHeight);
        memcpy(yuvFrame->data[1], yuvMat.data + outWidth * outHeight, outWidth * outHeight / 4);
        memcpy(yuvFrame->data[2], yuvMat.data + outWidth * outHeight * 5 / 4, outWidth * outHeight / 4);

        // Set up output packet
        //av_init_packet(&outPacket); //error C4996: 'av_init_packet': was declared deprecated
        AVPacket* outPacket = av_packet_alloc();
        memset(outPacket, 0, sizeof(outPacket)); //Use memset instead of av_init_packet (probably unnecessary).
        //outPacket->data = nullptr;
        //outPacket->size = 0;

        yuvFrame->pts = av_rescale_q(frame_count, outCodecCtx->time_base, outStream->time_base); //Set PTS timestamp

        // Encode frame and write to output file
        int ret = avcodec_send_frame(outCodecCtx, yuvFrame);
        if (ret < 0) {
            cerr << "Error: Could not send frame to output codec" << endl;
            return -1;
        }
        while (ret >= 0) {
            ret = avcodec_receive_packet(outCodecCtx, outPacket);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                break;
            } else if (ret < 0) {
                cerr << "Error: Could not receive packet from output codec" << endl;
                return -1;
            }

            //av_packet_rescale_ts(&outPacket, outCodecCtx->time_base, outStream->time_base);

            outPacket->stream_index = outStream->index;

            outPacket->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base);   // Set packet duration

            ret = av_interleaved_write_frame(outFormatCtx, outPacket);
            av_packet_unref(outPacket);
            if (ret < 0) {
                cerr << "Error: Could not write packet to output file" << endl;
                return -1;
            }
        }

        av_frame_free(&yuvFrame);
    }

    // Flush the encoder
    ret = avcodec_send_frame(outCodecCtx, nullptr);
    if (ret < 0) {
        std::cerr << "Error flushing encoder: " << std::endl;
        return -1;
    }

    while (ret >= 0) {
        AVPacket* pkt = av_packet_alloc();
        if (!pkt) {
            std::cerr << "Error allocating packet" << std::endl;
            return -1;
        }
        ret = avcodec_receive_packet(outCodecCtx, pkt);

        // Write the packet to the output file 
        if (ret == 0)
        {
            pkt->stream_index = outStream->index;
            pkt->duration = av_rescale_q(1, outCodecCtx->time_base, outStream->time_base);   // <---- Set packet duration
            ret = av_interleaved_write_frame(outFormatCtx, pkt);
            av_packet_unref(pkt);
            if (ret < 0) {
                std::cerr << "Error writing packet to output file: " << std::endl;
                return -1;
            }
        }
    }


    // Write output trailer
    av_write_trailer(outFormatCtx);

    // Clean up
    avcodec_close(outCodecCtx);
    avcodec_free_context(&outCodecCtx);
    avformat_free_context(outFormatCtx);

    return 0;
}
Rotem
  • 30,366
  • 4
  • 32
  • 65
  • Hi, @Rotem, many thanks, as a beginner of ffmpeg library, I really think it is a bit complex for me to handle such things. I did read the [ffmpeg-libav-tutorial](https://github.com/leandromoreira/ffmpeg-libav-tutorial), the hard way part is still a bit hard for me, especially there are too many steps to tweak and too many function call return code to check. Now, back to my original question mentioned in the question, if I have different time intervals, do I need to change the `pts` value as: `0, 3, 5, 8` in the loop instead of the original `0,1,2,3,4` as in your code? Thanks. – ollydbg23 May 01 '23 at 10:59
  • Hi, Please note that I am not part of FFmpeg development team. I agree that the C API is too complicated, but it is what it is... I am learning the steps from answer to answer. What do you mean by "different time intervals"? – Rotem May 01 '23 at 11:12
  • I have edit my original question, and I think I have explain what the "different time intervals" mean. In my application, I got many frames(images), but those images are not in a const frequency rate. About the learning path of ffmpeg, I think asking questions in stackoverflow site is my last choice, because I have tried so many other ways. – ollydbg23 May 01 '23 at 11:30
  • Please revert your edit, and post a new question. I guess you mean generating video with Variable Frame Rate (VFR) opposed to Constant Frame Rate (CFR). You may change the PTS timestamps for achieving VRF video. It is recommended to use high resolution time-base like `time_base = { 1, 1000000 }` instead of `time_base = { 1, fps }`. Scale the timestamps according to the new time-base. I recommend you to do some research about VFR, timestamps and time-base. – Rotem May 01 '23 at 11:42
  • OK, I have post another question here: [save a serial of images(cv::Mat) to a mp4 file in Variable Frame Rate mode by using ffmpeg library, how to set the pts?](https://stackoverflow.com/questions/76146859/save-a-serial-of-imagescvmat-to-a-mp4-file-in-variable-frame-rate-mode-by-us), and I have reverted my question here. Thanks. – ollydbg23 May 01 '23 at 12:19
  • I meant that you do some research about VFR, timestamps and time-base, before posting the new question. You have to show what you have tried so far. `timestamp[100] = {0, 50, 75, ...}` are not well defined. What are the units? I recommend you to fix the new question and include a reproducible code sample. – Rotem May 01 '23 at 12:28
  • Done, I have added what I have done for the timestamps array in that question, but I still have several running warnings and errors. The `0` is for 0ms, the `50` is for 50ms, ms means millisecond. – ollydbg23 May 01 '23 at 13:56
  • I think you may learn from my [following answer](https://stackoverflow.com/a/75348681/4926757). At least use `ffprobe -show_packets` for testing. – Rotem May 01 '23 at 14:23
  • Hi, Rotem, thanks, I have read the answer by using `pyav`. But I still can't fix my C++ issue for the VRF video. In that answer, changing for `pts` and `dts` are using the same method, and that is for the `packet` struct, while in my issue, it is for the `frame` struct, and I don't see the `dts` value is shown when I run the `ffprobe -show_frames input.mp4`. Does that mean the dts is for only for `packet` while `pts` is for both `packet` and `frame`? – ollydbg23 May 02 '23 at 03:39
  • The PyAV example is for "Remuxing". What you want is Encoding. When encoding, we set the PTS of the frame. The PTS and DTS of the packets are filled automatically (from the frame PTS). All you have to do is `pts = pts_list[frame_count]` and scale the pts according to the timebase of the stream. FFmpeg C interface includes functions for scaling the timebase. – Rotem May 02 '23 at 05:39
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/253423/discussion-between-ollydbg23-and-rotem). – ollydbg23 May 02 '23 at 07:03