I am trying to take two videos and put them together into one video. However, when I run my code, I get this error when decoding/encoding the second video:
Application provided invalid, non monotonically increasing dts to muxer in stream 0
When the code finishes, the first video is completely fine, but the second video is not. The best result that I have been able to produce is where the second half of the second video comes right after the first video. The funny thing is, the audio is completely fine and is as it should be.
In the past, I have been able to use my decoding/encoding code to simply copy a video (both video and audio).
I have searched online regarding this specific problem and tried the suggested solutions, but none of them seem to fix my issue. These are the threads that I have been looking at:
FFmpeg - What does non monotonically increasing dts mean?
How to use libavformat to concat 2 video files with same codec (re-muxing)?
Non monotonically increasing dts to muxer in stream
This is the current code that I have written:
Video and ClipSequence structs:
typedef struct Video {
char* filename;
AVFormatContext* inputContext;
AVFormatContext* outputContext;
AVCodec* videoCodec;
AVCodec* audioCodec;
AVStream* inputStream;
AVStream* outputStream;
AVCodecContext* videoCodecContext_I; // Input
AVCodecContext* audioCodecContext_I; // Input
AVCodecContext* videoCodecContext_O; // Output
AVCodecContext* audioCodecContext_O; // Output
int videoStream;
int audioStream;
SwrContext* swrContext;
} Video;
typedef struct ClipSequence {
VideoList* videos;
AVFormatContext* outputContext;
AVStream* outputStream;
int64_t lastpts, lastdts;
int64_t currentpts, currentdts;
} ClipSequence;
Decoding and encoding (same code for audio):
int decodeVideoSequence(ClipSequence* sequence, Video* video, AVPacket* packet, AVFrame* frame) {
int response = avcodec_send_packet(video->videoCodecContext_I, packet);
if (response < 0) {
printf("[ERROR] Failed to send video packet to decoder\n");
return response;
}
while (response >= 0) {
response = avcodec_receive_frame(video->videoCodecContext_I, frame);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
break;
} else if (response < 0) {
printf("[ERROR] Failed to receive video frame from decoder\n");
return response;
}
if (response >= 0) {
// Do stuff and encode
sequence->currentpts = packet->pts; // Store decoded packet's pts and dts
sequence->currentdts = packet->dts;
if (encodeVideoSequence(sequence, video, frame) < 0) {
printf("[ERROR] Failed to encode new video\n");
return -1;
}
}
av_frame_unref(frame);
}
return 0;
}
int encodeVideoSequence(ClipSequence* sequence, Video* video, AVFrame* frame) {
AVPacket* packet = av_packet_alloc();
if (!packet) {
printf("[ERROR] Could not allocate memory for video output packet\n");
return -1;
}
int response = avcodec_send_frame(video->videoCodecContext_O, frame);
if (response < 0) {
printf("[ERROR] Failed to send video frame for encoding\n");
return response;
}
while (response >= 0) {
response = avcodec_receive_packet(video->videoCodecContext_O, packet);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
break;
} else if (response < 0) {
printf("[ERROR] Failed to receive video packet from encoder\n");
return response;
}
// Set packet to have pts and dts based on the previous video's pts and dts
packet->flags |= AV_PKT_FLAG_KEY;
packet->pts = sequence->currentpts + sequence->lastpts;
packet->dts = sequence->currentdts + sequence->lastdts;
packet->stream_index = video->videoStream;
packet->duration = 1000; // 60 fps
response = av_interleaved_write_frame(sequence->outputContext, packet);
if (response < 0) {
printf("[ERROR] Failed to write video packet\n");
break;
}
}
av_packet_unref(packet);
av_packet_free(&packet);
return 0;
}
Reading the frames:
int readSequenceFrames(ClipSequence* sequence, Video* video, AVPacket* packet, AVFrame* frame) {
if (!packet) {
printf("[ERROR] Packet not allocated to be read\n");
return -1;
}
if (!frame) {
printf("[ERROR] Frame not allocated to be read\n");
return -1;
}
// Sets video and audio codec context parameters
if (prepareVideoOutStream(video) < 0) {
printf("[ERROR] Failed to prepare output video stream\n");
return -1;
}
if (prepareAudioOutStream(video) < 0) {
printf("[ERROR] Failed to prepare output audio stream\n");
return -1;
}
// Prepares audio resampling
if (initResampler(video->audioCodecContext_I, video->audioCodecContext_O, &(video->swrContext)) < 0) {
printf("[ERROR] Failed to init audio resampler\n");
return -1;
}
// Read packets
int frameNum = 0;
while (av_read_frame(video->inputContext, packet) >= 0) {
printf("[READ] Reading frame %i\n", frameNum);
if (packet->stream_index == video->videoStream) {
if (decodeVideoSequence(sequence, video, packet, frame) < 0) {
printf("[ERROR] Failed to decode and encode video\n");
return -1;
}
} else if (packet->stream_index == video->audioStream) {
if (decodeAudioSequence(sequence, video, packet, frame) < 0) {
printf("[ERROR] Failed to decode and encode audio\n");
return -1;
}
}
av_packet_unref(packet);
frameNum++;
}
// Increment pts and dts by the last pts and dts in the current video
sequence->lastpts += sequence->currentpts;
sequence->lastdts += sequence->currentdts;
return 0;
}
I believe that I have the right logic when I am increasing the pts and dts. I am not sure what exactly that I am missing.
Thanks.