1

I am trying to write a program to generate frames to be encoded via ffmpeg/libav into an mp4 file with a single h264 stream. I found these two examples and am sort of trying to merge them together to make what I want: [video transcoder] [raw MPEG1 encoder]

I have been able to get video output (green circle changing size), but no matter how I set the PTS values of the frames or what time_base I specify in the AVCodecContext or AVStream, I'm getting frame rates of about 7000-15000 instead of 60, resulting in a video file that lasts 70ms instead of 1000 frames / 60 fps = 166 seconds. Every time I change some of my code, the frame rate changes a little bit, almost as if it's reading from uninitialized memory. Other references to an issue like this on StackOverflow seem to be related to incorrectly set PTS values; however, I've tried printing out all the PTS, DTS, and time base values I can find and they all seem normal. Here's my proof-of-concept code (with the error catching stuff around the libav calls removed for clarity):

#include <iostream>
#include <opencv2/opencv.hpp>
#include <math.h>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
}

using namespace cv;

int main(int argc, char *argv[]) {
    const char *filename = "testvideo.mp4";
    
    AVFormatContext *avfc;
    avformat_alloc_output_context2(&avfc, NULL, NULL, filename);
    
    AVStream *stream = avformat_new_stream(avfc, NULL);
    AVCodec *h264 = avcodec_find_encoder(AV_CODEC_ID_H264);
    AVCodecContext *avcc = avcodec_alloc_context3(h264);
    
    av_opt_set(avcc->priv_data, "preset", "fast", 0);
    av_opt_set(avcc->priv_data, "crf", "20", 0);
    avcc->thread_count = 1;
    avcc->width = 1920;
    avcc->height = 1080;
    avcc->pix_fmt = AV_PIX_FMT_YUV420P;
    avcc->time_base = av_make_q(1, 60);
    stream->time_base = avcc->time_base;
    
    if(avfc->oformat->flags & AVFMT_GLOBALHEADER)
        avcc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    
    avcodec_open2(avcc, h264, NULL);
    avcodec_parameters_from_context(stream->codecpar, avcc);
    
    avio_open(&avfc->pb, filename, AVIO_FLAG_WRITE);
    
    avformat_write_header(avfc, NULL);
    
    Mat frame, nothing = Mat::zeros(1080, 1920, CV_8UC1);
    AVFrame *avf = av_frame_alloc();
    AVPacket *avp = av_packet_alloc();
    int ret;
    
    avf->format = AV_PIX_FMT_YUV420P;
    avf->width = 1920;
    avf->height = 1080;
    avf->linesize[0] = 1920;
    avf->linesize[1] = 1920;
    avf->linesize[2] = 1920;
    
    for(int x=0; x<1000; x++) {
        frame = Mat::zeros(1080, 1920, CV_8UC1);
        circle(frame, Point(1920/2, 1080/2), 250*(sin(2*M_PI*x/1000*3)+1.01), Scalar(255), 10);
        
        avf->data[0] = frame.data;
        avf->data[1] = nothing.data;
        avf->data[2] = nothing.data;
        avf->pts = x;
        
        ret = 0;
        do {
            if(ret == AVERROR(EAGAIN)) {
                av_packet_unref(avp);
                ret = avcodec_receive_packet(avcc, avp);
                if(ret) break; // deal with error
                av_write_frame(avfc, avp);
            } //else if(ret) deal with error
            ret = avcodec_send_frame(avcc, avf);
        } while(ret);
    }
    
    // flush the rest of the packets
    avcodec_send_frame(avcc, NULL);
    do {
        av_packet_unref(avp);
        ret = avcodec_receive_packet(avcc, avp);
        if(!ret)
            av_write_frame(avfc, avp);
    } while(!ret);
    
    av_frame_free(&avf);
    av_packet_free(&avp);
    
    av_write_trailer(avfc);
    avformat_close_input(&avfc);
    avformat_free_context(avfc);
    avcodec_free_context(&avcc);
    return 0;
}

This is the output of ffprobe run on the output video file

Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'testvideo.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf58.76.100
  Duration: 00:00:00.07, start: 0.000000, bitrate: 115192 kb/s
  Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 1920x1080, 115389 kb/s, 15375.38 fps, 15360 tbr, 15360 tbn, 120 tbc (default)
    Metadata:
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]

What might be causing my frame rate to be so high? Thanks in advance for any help.

unknownperson
  • 339
  • 1
  • 9

2 Answers2

1

You are getting high frame rate because you have failed to set packet duration.

  • Set the time_base to higher resolution (like 1/60000) as described here:

     avcc->time_base = av_make_q(1, 60000);
    
  • Set avp->duration as described here:

     AVRational avg_frame_rate = av_make_q(60, 1);   //60 fps
     avp->duration = avcc->time_base.den / avcc->time_base.num / avg_frame_rate.num * avg_frame_rate.den;    //avp->duration = 1000 (60000/60)
    

    And set the pts accordingly.


Complete code:

#include <iostream>
#include <opencv2/opencv.hpp>
#include <math.h>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libavutil/timestamp.h>
}

using namespace cv;

int main(int argc, char* argv[]) {
    const char* filename = "testvideo.mp4";

    AVFormatContext* avfc;
    avformat_alloc_output_context2(&avfc, NULL, NULL, filename);

    AVStream* stream = avformat_new_stream(avfc, NULL);
    AVCodec* h264 = avcodec_find_encoder(AV_CODEC_ID_H264);
    AVCodecContext* avcc = avcodec_alloc_context3(h264);

    av_opt_set(avcc->priv_data, "preset", "fast", 0);
    av_opt_set(avcc->priv_data, "crf", "20", 0);
    avcc->thread_count = 1;
    avcc->width = 1920;
    avcc->height = 1080;
    avcc->pix_fmt = AV_PIX_FMT_YUV420P;
    //Sey the time_base to higher resolution like 1/60000
    avcc->time_base = av_make_q(1, 60000); //avcc->time_base = av_make_q(1, 60);
    stream->time_base = avcc->time_base;

    if (avfc->oformat->flags & AVFMT_GLOBALHEADER)
        avcc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    avcodec_open2(avcc, h264, NULL);
    avcodec_parameters_from_context(stream->codecpar, avcc);

    avio_open(&avfc->pb, filename, AVIO_FLAG_WRITE);

    avformat_write_header(avfc, NULL);

    Mat frame, nothing = Mat::zeros(1080, 1920, CV_8UC1);
    AVFrame* avf = av_frame_alloc();
    AVPacket* avp = av_packet_alloc();
    int ret;

    avf->format = AV_PIX_FMT_YUV420P;
    avf->width = 1920;
    avf->height = 1080;
    avf->linesize[0] = 1920;
    avf->linesize[1] = 1920;
    avf->linesize[2] = 1920;

    for (int x = 0; x < 1000; x++) {
        frame = Mat::zeros(1080, 1920, CV_8UC1);
        circle(frame, Point(1920 / 2, 1080 / 2), (int)(250.0 * (sin(2 * M_PI * x / 1000 * 3) + 1.01)), Scalar(255), 10);

        AVRational avg_frame_rate = av_make_q(60, 1);   //60 fps

        int64_t avp_duration = avcc->time_base.den / avcc->time_base.num / avg_frame_rate.num * avg_frame_rate.den;

        avf->data[0] = frame.data;
        avf->data[1] = nothing.data;
        avf->data[2] = nothing.data;
        avf->pts = (int64_t)x * avp_duration; // avp->duration = 1000

        ret = 0;
        do {
            if (ret == AVERROR(EAGAIN)) {
                av_packet_unref(avp);
                ret = avcodec_receive_packet(avcc, avp);
                if (ret) break; // deal with error

                ////////////////////////////////////////////////////////////////
                //avp->duration was zero.
                avp->duration = avp_duration;    //avp->duration = 1000 (60000/60)

                //avp->pts = (int64_t)x * avp->duration;
                ////////////////////////////////////////////////////////////////

                av_write_frame(avfc, avp);
            } //else if(ret) deal with error
            ret = avcodec_send_frame(avcc, avf);
        } while (ret);
    }

    // flush the rest of the packets
    avcodec_send_frame(avcc, NULL);
    do {
        av_packet_unref(avp);
        ret = avcodec_receive_packet(avcc, avp);
        if (!ret)
            av_write_frame(avfc, avp);
    } while (!ret);

    av_frame_free(&avf);
    av_packet_free(&avp);

    av_write_trailer(avfc);
    avformat_close_input(&avfc);
    avformat_free_context(avfc);
    avcodec_free_context(&avcc);
    return 0;
}

Result of FFprobe:

Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'testvideo.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf58.76.100
  Duration: 00:00:16.65, start: 0.000000, bitrate: 456 kb/s
  Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 1920x1080, 450 kb/s, 60.06 fps, 60 tbr, 60k tbn, 120k tbc (default)
    Metadata:
      handler_name    : VideoHandler
      vendor_id       : [0][0][0][0]

Notes:

  • I don't know why the fps is 60.06 and not 60.
  • There is a warning message MB rate (734400000) > level limit (16711680) that I didn't fix.
Rotem
  • 30,366
  • 4
  • 32
  • 65
  • Thank you! I missed the duration when looking through the examples. Out of curiosity, why do we need to set the time base to 1/60000? In the example I see it's set to `video_avcc->time_base = av_inv_q(input_framerate)`, which I assume sets it to 1/60. When I try this (1/60 timebase, increment pts by 1, packet duration of 1), it goes back to hyper speed. (Also the 60.06fps is because you didn't set the duration in the last packets in my second flushing loop.) – unknownperson Nov 23 '21 at 20:05
  • Setting the time base to 1/60000 is the value used in the tutorial. If there is only video, (no audio), it's probably OK to set the time base to 1/60, but when there is an audio stream that needs to be synchronized with the video, we need higher resolution time base (so it's not a good practice to set it to 1/60). The legacy time base used to be 1/27000 (something to do with analog video). Probing some video files, I can find values as 1/16384, 1/1000, 1/11988, 125/2997 (specific value is probably not critical). – Rotem Nov 23 '21 at 21:20
0

Though the answer I accepted fixes the problem I was having, here is some more information I've figured out that may be useful:

The time_base field has some restrictions on its value (for example 1/10000 works, but 1/9999 doesn't) based on the container format, and this seems to have been the root problem I was having. When the time base was set to 1/60, the call to avformat_write_header() changed it to 1/15360. Because I had hardcoded the PTS increment to 1, this resulted in the 15360 FPS video. The strange denominator of 15360 seems to result from the given denominator being multiplied by 2 repeatedly until it reaches some minimum value. I have no idea how this algorithm works actually works. This SO question led me on to this.

By setting the time base to 1/60000 and making the PTS increment by 1000 each frame, the fast video problem was fixed. Setting the packet duration doesn't seem necessary, but is probably a good idea.

The main lesson here is to use whatever time_base libav gives you instead of assuming the value you set it to stays unchanged. @Rotem's updated code does this, and would therefore "work" with a time base of 1/60, since the PTS and packet duration will actually be based off the 1/15360 value time_base changes to.

unknownperson
  • 339
  • 1
  • 9