I render some images with OpenGL and need to compose them into a video file. Each of the images is a sequence of uint8_t values representing a sRGB color component (image array looks like ...rgbrgbrgb...)
I know very little about video processing and have no experience with ffmpeg libraries at all. I made a little test program using these sources as reference:
https://ffmpeg.org/doxygen/trunk/encode_video_8c-example.html
How to convert RGB from YUV420p for ffmpeg encoder?
The test program is supposed to make a video about growing green vertical stripe. I'm just trying to figure out how to make a video using some source of raw RGB data.
Here is my code:
#include <iostream>
#include <vector>
#include <algorithm>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
static void encode( AVCodecContext* enc_ctx,
AVFrame* frame, AVPacket* pkt,
FILE* outfile )
{
int ret;
ret = avcodec_send_frame(enc_ctx, frame);
if (ret < 0) {
std::cerr << "Error sending a frame for encoding\n";
return;
}
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
exit(1);
}
fwrite(pkt->data, 1, pkt->size, outfile);
av_packet_unref(pkt);
}
}
static constexpr int w = 1920, h = 1080;
static constexpr float fps = 20.f, time = 5.f;
static constexpr int nFrames = static_cast<int>(fps * time);
static std::vector<uint8_t> imageRGB(w * h * 3, 0);
static void UpdateImageRGB()
{
static int d = 50;
imageRGB.assign(w * h * 3, 0);
for (int i = 0; i < h; ++i)
for ( int j = std::max(0, w / 2 - d);
j < std::min(w, w / 2 + d);
++j )
{
imageRGB[(w * i + j) * 3 + 0] = 50;
imageRGB[(w * i + j) * 3 + 1] = 200;
imageRGB[(w * i + j) * 3 + 2] = 50;
}
d += 5;
}
int main()
{
int ret = 0;
auto filename = "test.mp4";
auto codec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!codec) {
std::cerr << "Codec \"x.264\" not found\n";
return 1;
}
auto c = avcodec_alloc_context3(codec);
if (!c) {
std::cerr << "Could not allocate video codec context\n";
return 1;
}
auto pkt = av_packet_alloc();
if (!pkt) return 1;
// 1.8 bits / (pixel * frame)
c->bit_rate = static_cast<int64_t>(1.8f * w * h * fps);
/* resolution must be a multiple of two */
c->width = w;
c->height = h;
/* frames per second */
c->time_base = AVRational{ 1, static_cast<int>(fps) };
c->framerate = AVRational{ static_cast<int>(fps), 1 };
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
av_opt_set(c->priv_data, "preset", "slow", 0);
av_opt_set(c->priv_data, "preset", "slow", 0);
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
char str[AV_ERROR_MAX_STRING_SIZE];
std::cerr << "Could not open codec: "
<< av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, ret)
<< "\n";
return 1;
}
FILE * f;
fopen_s(&f, filename, "wb");
if (!f) {
std::cerr << "Could not open " << filename << '\n';
return 1;
}
auto frame = av_frame_alloc();
if (!frame) {
std::cerr << "Could not allocate video frame\n";
return 1;
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
std::cerr << stderr, "Could not allocate the video frame data\n";
return 1;
}
SwsContext* ctx = sws_getContext( w, h, AV_PIX_FMT_RGB24,
w, h, AV_PIX_FMT_YUV420P,
0, 0, 0, 0 );
for (int i = 0; i < nFrames; i++)
{
ret = av_frame_make_writable(frame);
UpdateImageRGB();
static const uint8_t* rgbData[1] = { &imageRGB[0] };
static constexpr int rgbLinesize[1] = { 3 * w };
sws_scale( ctx, rgbData, rgbLinesize, 0, h,
frame->data, frame->linesize );
frame->pts = i;
/* encode the image */
encode(c, frame, pkt, f);
}
encode(c, NULL, pkt, f);
fclose(f);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
The program generates 33.9k video file with further console output:
[libx264 @ 0000020c18681800] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0000020c18681800] profile High, level 5.0, 4:2:0, 8-bit
[libx264 @ 0000020c18681800] frame I:11 Avg QP: 0.00 size: 639
[libx264 @ 0000020c18681800] frame P:74 Avg QP: 0.32 size: 174
[libx264 @ 0000020c18681800] frame B:15 Avg QP: 2.26 size: 990
[libx264 @ 0000020c18681800] consecutive B-frames: 70.0% 30.0%
[libx264 @ 0000020c18681800] mb I I16..4: 100.0% 0.0% 0.0%
[libx264 @ 0000020c18681800] mb P I16..4: 0.6% 0.0% 0.0% P16..4: 2.1% 0.0% 0.0% 0.0% 0.0% skip:97.3%
[libx264 @ 0000020c18681800] mb B I16..4: 0.1% 0.0% 0.0% B16..8: 0.6% 0.0% 0.0% direct: 0.6% skip:98.7% L0:39.8% L1:60.2% BI: 0.0%
[libx264 @ 0000020c18681800] final ratefactor: -46.47
[libx264 @ 0000020c18681800] 8x8 transform intra:0.0%
[libx264 @ 0000020c18681800] direct mvs spatial:0.0% temporal:100.0%
[libx264 @ 0000020c18681800] coded y,uvDC,uvAC intra: 0.0% 0.1% 0.1% inter: 0.0% 0.1% 0.1%
[libx264 @ 0000020c18681800] i16 v,h,dc,p: 99% 0% 1% 0%
[libx264 @ 0000020c18681800] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 0% 0% 100% 0% 0% 0% 0% 0% 0%
[libx264 @ 0000020c18681800] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 46% 0% 54% 0% 0% 0% 0% 0% 0%
[libx264 @ 0000020c18681800] i8c dc,h,v,p: 96% 1% 3% 0%
[libx264 @ 0000020c18681800] Weighted P-Frames: Y:0.0% UV:0.0%
[libx264 @ 0000020c18681800] ref P L0: 70.2% 0.0% 29.8% 0.0% 0.0%
[libx264 @ 0000020c18681800] kb/s:55.61
- "Media Player Classic" on Windows plays this video but the time slider doesn't move, and the video cannot be fast-forwarded to some frame
- VLC cannot play the video at all. It launches, shows me VLC logo, and time slider (which is unusually big) jumps from left to right, not responding to my clicks
- If I set time = 0.05 to make a video of only 1 frame, I cannot play it even with "Media Player Classic". I want to make an algorithm to convert the arbitrary number of raw RGB images into the video files, even if there's only one image, and with arbitrary image size (that is, width and height may be odd).
- As I said, I don't really understand what am I doing. There are low-level codec settings in lines 83-84. Are they all right?
- Do I have to manually set a bit rate (line 75)? Shouldn't it be calculated automatically by the codec?