0

I am writing some images to an AVStream and after that I am reading an mp3 file and writing it to an diffrent AVStream. The propblem is that the audio stream is a bit shorter then the video stream, so if I add more images and another audio file the audio is not in sync with the video any more. So my idear was to write silent audio data to the audio stream before I write another audio file to the audio stream. But I can not figure out how to write the silent data to the audio stream.

I found this post but I don't know how to calculate the packet size or how to write the packet to the audio stream.

This was my most "successfully" approach so far, but the result (audioTest(0xff).mp4) is far from silent.

    /* set up the audio convert context */
    libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
    libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
    libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
    libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
    int ret = libffmpeg::swr_init(audioConvertContext);
    if (ret < 0)
    {
        Helper::ThrowError("Failed to allocate audio reformat context.", ret);
    }

    /* set up silent frame */
    libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
    if (!silentFrame)
    {
        Helper::ThrowError("Failed to allocate audio encode frame.");
    }

    silentFrame->nb_samples = data->audioCodecContext->frame_size;
    silentFrame->format = data->audioCodecContext->sample_fmt;
    silentFrame->channel_layout = data->audioCodecContext->channel_layout;
    silentFrame->channels = data->audioCodecContext->channels;
    silentFrame->sample_rate = data->audioCodecContext->sample_rate;

    /* alloc the frame buffer */
    ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
    if (ret < 0)
    {
        Helper::ThrowError("Could not allocate audio data buffers.");
    }

    int got_output;
    int samples_count;
    double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
    while (av_stream_get_end_pts(data->audioStream) < duration)
    {
        libffmpeg::AVPacket pkt;
        libffmpeg::av_init_packet(&pkt);

        ret = libffmpeg::av_frame_make_writable(silentFrame);
        if (ret < 0)
        {
            Helper::ThrowError("Could not make frame writable.");
        }

        for (int j = 0; j < data->audioCodecContext->frame_size; j++)
        {
            silentFrame->data[0][2 * j] = 0xff;

            for (int k = 1; k < data->audioCodecContext->channels; k++)
            {
                silentFrame->data[0][2 * j + k] = silentFrame->data[0][2 * j];
            }
        }

        int dst_nb_samples = libffmpeg::av_rescale_rnd(
            libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
            data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
            libffmpeg::AV_ROUND_UP);

        ret = libffmpeg::swr_convert(
            audioConvertContext,
            silentFrame->data, dst_nb_samples,
            (const libffmpeg::uint8_t * *) & silentFrame->data,
            silentFrame->nb_samples);

        if (ret < 0)
        {
            Helper::ThrowError("Error while converting audio frame.", ret);
        }

        silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
        samples_count += dst_nb_samples;

        ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
        if (ret < 0)
        {
            Helper::ThrowError("Error while encoding audio frame.", ret);
        }

        if (got_output)
        {
            pkt.stream_index = data->audioStream->index;

            if (ret = av_write_frame(data->formatContext, &pkt))
            {
                Helper::ThrowError("Error while writing audio frame.", ret);
            }

            libffmpeg::av_packet_unref(&pkt);
        }
    }

    libffmpeg::av_frame_free(&silentFrame);
LeFrosch
  • 111
  • 1
  • 2
  • 11
  • 1
    Hello! Can you please share what you have tried so far so we can help you? – dashdashzako Jun 13 '19 at 13:38
  • @dashdashzako Thanks for the advice. I added my current approach to the question. I hope it is helpful. – LeFrosch Jun 13 '19 at 20:43
  • Forgive me, I am not a C++ coder, but are you filling the audio array with hex FF? When I do this sort of thing in Java, the PCM values are 0 for silence. Maybe that is what you are doing and I am misunderstanding. – Phil Freihofner Jun 14 '19 at 09:49
  • @PhilFreihofner I tried filling the audio array with 0. The [result (audioTest(0).mp4)](https://github.com/LeFrosch/AudioFile) actually is quite silent except for the beginning. But I noticed that it does not matter what value I use to fill the array, because the result always sounds like [this (audioTest(0xff).mp4)](https://github.com/LeFrosch/AudioFile). – LeFrosch Jun 14 '19 at 11:54
  • @PhilFreihofner: Any constant value means that the speaker won't move, which does produce silence. Using all-ones (-1) or all zeros (0) does nit really matter. – MSalters Jun 14 '19 at 12:34

1 Answers1

0

The mistake was how I wrote to the array. I am not that used to c++ so my solution maybe a bit messy, but at least it works now.

    /* set up the audio convert context */
    libffmpeg::SwrContext* audioConvertContext = libffmpeg::swr_alloc();
    libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_count", data->audioCodecContext->channels, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_count", data->audioCodecContext->channels, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "in_channel_layout", data->audioCodecContext->channel_layout, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_channel_layout", data->audioCodecContext->channel_layout, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "in_sample_rate", data->audioCodecContext->sample_rate, 0);
    libffmpeg::av_opt_set_int(audioConvertContext, "out_sample_rate", data->audioCodecContext->sample_rate, 0);
    libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "in_sample_fmt", libffmpeg::AV_SAMPLE_FMT_S16, 0);
    libffmpeg::av_opt_set_sample_fmt(audioConvertContext, "out_sample_fmt", data->audioCodecContext->sample_fmt, 0);
    int ret = libffmpeg::swr_init(audioConvertContext);
    if (ret < 0)
    {
        Helper::ThrowError("Failed to allocate audio reformat context.", ret);
    }

    /* set up silent frame */
    libffmpeg::AVFrame* silentFrame = libffmpeg::av_frame_alloc();
    if (!silentFrame)
    {
        Helper::ThrowError("Failed to allocate audio encode frame.");
    }

    silentFrame->nb_samples = data->audioCodecContext->frame_size;
    silentFrame->format = data->audioCodecContext->sample_fmt;
    silentFrame->channel_layout = data->audioCodecContext->channel_layout;
    silentFrame->channels = data->audioCodecContext->channels;
    silentFrame->sample_rate = data->audioCodecContext->sample_rate;

    /* alloc the frame buffer */
    ret = libffmpeg::av_frame_get_buffer(silentFrame, 0);
    if (ret < 0)
    {
        Helper::ThrowError("Could not allocate audio data buffers.");
    }

    libffmpeg::AVPacket* pkt = libffmpeg::av_packet_alloc();
    if (!pkt) 
    {
        Helper::ThrowError("could not allocate the packet.");
    }

    void* buffer = malloc(data->audioCodecContext->frame_size * data->audioCodecContext->channels * 16);
    for (int i = 0; i < data->audioCodecContext->frame_size * data->audioCodecContext->channels * 2; i++)
    {
        *((int*)buffer + i) = 0x0;
    }

    int got_output;
    int samples_count;
    double duration = 4 * (double)data->audioStream->time_base.den / (double)data->audioStream->time_base.num;
    while (av_stream_get_end_pts(data->audioStream) < duration)
    {
        libffmpeg::AVPacket pkt;
        libffmpeg::av_init_packet(&pkt);

        ret = libffmpeg::av_frame_make_writable(silentFrame);
        if (ret < 0)
        {
            Helper::ThrowError("Could not make frame writable.");
        }

        silentFrame->data[0] = (libffmpeg::uint8_t*) buffer;

        int dst_nb_samples = libffmpeg::av_rescale_rnd(
            libffmpeg::swr_get_delay(audioConvertContext, data->audioCodecContext->sample_rate) + silentFrame->nb_samples,
            data->audioCodecContext->sample_rate, data->audioCodecContext->sample_rate,
            libffmpeg::AV_ROUND_UP);

        ret = libffmpeg::swr_convert(
            audioConvertContext,
            silentFrame->data, dst_nb_samples,
            (const libffmpeg::uint8_t * *) & silentFrame->data,
            silentFrame->nb_samples);

        if (ret < 0)
        {
            Helper::ThrowError("Error while converting audio frame.", ret);
        }

        silentFrame->pts = libffmpeg::av_rescale_q(samples_count, libffmpeg::AVRational{ 1, data->audioCodecContext->sample_rate }, data->audioCodecContext->time_base);
        samples_count += dst_nb_samples;

        ret = libffmpeg::avcodec_encode_audio2(data->audioCodecContext, &pkt, silentFrame, &got_output);
        if (ret < 0)
        {
            Helper::ThrowError("Error while encoding audio frame.", ret);
        }

        if (got_output)
        {
            pkt.stream_index = data->audioStream->index;

            if (ret = av_write_frame(data->formatContext, &pkt))
            {
                Helper::ThrowError("Error while writing audio frame.", ret);
            }

            libffmpeg::av_packet_unref(&pkt);
        }
    }

    free(buffer);
    libffmpeg::av_frame_free(&silentFrame);
LeFrosch
  • 111
  • 1
  • 2
  • 11