Resampling audio using libswresample, leaves small amount of noise after resampling

Question

I'm trying to resample audio from 44Khz to 48Khz and I'm getting s small light noise after resampling. As if someone is gently ticking the mic. This happens both ways. From 48Khz to 44Khz and vice versa.

I've read that this can happen because swrContext still has some data left and that I shoudl flush the context before resampling next frame. And although this helps a little (less noticeable noise), it's still present.

I've tried using FFmpeg resample filter instead, but the output is just loud incoherent noise. I'm pretty sure that libswresample should not output any noise on resampling which means that I just don't know how to use it well and I'm missing some options.

This is the code for resampler.

int ResampleFrame(VideoState * videoState, AVFrame *decoded_audio_frame,     enum AVSampleFormat out_sample_fmt, uint8_t * out_buf)
{
 int in_sample_rate = videoState->audio->ptrAudioCodecCtx_->sample_rate;
 int out_sample_rate = SAMPLE_RATE;

// get an instance of the AudioResamplingState struct, create if NULL
AudioResamplingState* arState = getAudioResampling(videoState->audio->ptrAudioCodecCtx_->channel_layout);

if (!arState->swr_ctx)
{
    printf("swr_alloc error.\n");
    return -1;
}

// get input audio channels
arState->in_channel_layout = (videoState->audio->ptrAudioCodecCtx_->channels ==
            av_get_channel_layout_nb_channels(videoState->audio->ptrAudioCodecCtx_->channel_layout)) ?
            videoState->audio->ptrAudioCodecCtx_->channel_layout :
            av_get_default_channel_layout(videoState->audio->ptrAudioCodecCtx_->channels);


// check input audio channels correctly retrieved
if (arState->in_channel_layout <= 0)
{
    printf("in_channel_layout error.\n");
    return -1;
}


arState->out_channel_layout = AV_CH_LAYOUT_STEREO;

// retrieve number of audio samples (per channel)
arState->in_nb_samples = decoded_audio_frame->nb_samples;
if (arState->in_nb_samples <= 0)
{
    printf("in_nb_samples error.\n");
    return -1;
}

// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "in_channel_layout", arState->in_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "in_sample_fmt", videoState->audio->ptrAudioCodecCtx_->sample_fmt, 0);


// Set SwrContext parameters for resampling
av_opt_set_int(arState->swr_ctx, "out_channel_layout", arState->out_channel_layout, 0);
av_opt_set_int(arState->swr_ctx, "out_sample_rate", out_sample_rate, 0);
av_opt_set_sample_fmt(arState->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);


// initialize SWR context after user parameters have been set
int ret = swr_init(arState->swr_ctx);
if (ret < 0)
   {
    printf("Failed to initialize the resampling context.\n");
    return -1;
   }


 // retrieve output samples number taking into account the progressive delay
int64_t delay = swr_get_delay(arState->swr_ctx, videoState->audio->ptrAudioCodecCtx_->sample_rate) + arState->in_nb_samples;
arState->out_nb_samples = av_rescale_rnd(delay, out_sample_rate, in_sample_rate, AV_ROUND_UP );

// check output samples number was correctly rescaled
if (arState->out_nb_samples <= 0)
{
    printf("av_rescale_rnd error\n");
    return -1;
}

// get number of output audio channels
arState->out_nb_channels = av_get_channel_layout_nb_channels(arState->out_channel_layout);

// allocate data pointers array for arState->resampled_data and fill data
// pointers and linesize accordingly
// check memory allocation for the resampled data was successful
ret = av_samples_alloc_array_and_samples(&arState->resampled_data, &arState->out_linesize, arState->out_nb_channels, arState->out_nb_samples, out_sample_fmt, 0);
if (ret < 0)
   {
    printf("av_samples_alloc_array_and_samples() error: Could not allocate destination samples.\n");
    return -1;
   }


if (arState->swr_ctx)
   {
    // do the actual audio data resampling
    // check audio conversion was successful
    int ret_num_samples = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);
    //int ret_num_samples = swr_convert_frame(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,(const uint8_t**)decoded_audio_frame->data, decoded_audio_frame->nb_samples);

    if (ret_num_samples < 0)
       {
        printf("swr_convert_error.\n");
        return -1;
       }


    // get the required buffer size for the given audio parameters
    // check audio buffer size
    arState->resampled_data_size = av_samples_get_buffer_size(&arState->out_linesize,   arState->out_nb_channels,ret_num_samples,out_sample_fmt,1);

    if (arState->resampled_data_size < 0)
       {
        printf("av_samples_get_buffer_size error.\n");
        return -1;
       }
   } else {
           printf("swr_ctx null error.\n");
           return -1;
          }



// copy the resampled data to the output buffer
memcpy(out_buf, arState->resampled_data[0], arState->resampled_data_size);


// flush the swr context
int delayed = swr_convert(arState->swr_ctx,arState->resampled_data,arState->out_nb_samples,NULL,0);



if (arState->resampled_data)
   {
    av_freep(&arState->resampled_data[0]);
   }

av_freep(&arState->resampled_data);
arState->resampled_data = NULL;

int ret_data_size = arState->resampled_data_size;



return ret_data_size;
}

I also tries using the filter as shown here but my output is just noise.

This is my filter code

int  ResampleFrame(AVFrame *frame, uint8_t *out_buf)
{
   /* Push the decoded frame into the filtergraph */
    qint32 ret;
    ret = av_buffersrc_add_frame_flags(buffersrc_ctx1, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
    if (ret < 0) 
       {
        printf("ResampleFrame: Error adding frame to buffer\n");
        // Delete input frame and return null
        av_frame_unref(frame);
        return 0;
    }


    //printf("resampling\n");
    AVFrame *resampled_frame = av_frame_alloc();


    /* Pull filtered frames from the filtergraph */
    ret = av_buffersink_get_frame(buffersink_ctx1, resampled_frame);

    /* Set the timestamp on the resampled frame */
    resampled_frame->best_effort_timestamp = resampled_frame->pts;

    if (ret < 0) 
       {
        av_frame_unref(frame);
        av_frame_unref(resampled_frame);
        return 0;
       }


    int buffer_size = av_samples_get_buffer_size(NULL,   2,resampled_frame->nb_samples,AV_SAMPLE_FMT_S16,1);

    memcpy(out_buf,resampled_frame->data,buffer_size);

    //av_frame_unref(frame);
    av_frame_unref(resampled_frame);
    return buffer_size;
}





QString filter_description1 = "aresample=48000,aformat=sample_fmts=s16:channel_layouts=stereo,asetnsamples=n=1024:p=0";

int InitAudioFilter(AVStream *inputStream) 
{

    char args[512];
    int ret;
    const AVFilter *buffersrc = avfilter_get_by_name("abuffer");
    const AVFilter *buffersink = avfilter_get_by_name("abuffersink");
    AVFilterInOut *outputs = avfilter_inout_alloc();
    AVFilterInOut *inputs = avfilter_inout_alloc();
    filter_graph = avfilter_graph_alloc();


    const enum AVSampleFormat out_sample_fmts[] = {AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE};
    const int64_t out_channel_layouts[] = {AV_CH_LAYOUT_STEREO, -1};
    const int out_sample_rates[] = {48000, -1};

    snprintf(args, sizeof(args), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
                         inputStream->codec->time_base.num, inputStream->codec->time_base.den,
                         inputStream->codec->sample_rate,
                         av_get_sample_fmt_name(inputStream->codec->sample_fmt),
                         inputStream->codec->channel_layout);


    ret = avfilter_graph_create_filter(&buffersrc_ctx1, buffersrc, "in", args, NULL, filter_graph);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Unable to create buffersrc\n");
        return -1;
       }

    ret = avfilter_graph_create_filter(&buffersink_ctx1, buffersink, "out", NULL, NULL, filter_graph);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Unable to create buffersink\n");
        return ret;
       }

    // set opt SAMPLE FORMATS
    ret = av_opt_set_int_list(buffersink_ctx1, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Cannot set output sample format\n");
        return ret;
       }

    // set opt CHANNEL LAYOUTS
    ret = av_opt_set_int_list(buffersink_ctx1, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) {
        printf("InitAudioFilter: Cannot set output channel layout\n");
        return ret;
    }

    // set opt OUT SAMPLE RATES
    ret = av_opt_set_int_list(buffersink_ctx1, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN);

    if (ret < 0) 
       {
        printf("InitAudioFilter: Cannot set output sample rate\n");
        return ret;
       }

    /* Endpoints for the filter graph. */
    outputs -> name = av_strdup("in");
    outputs -> filter_ctx = buffersrc_ctx1;
    outputs -> pad_idx = 0;
    outputs -> next = NULL;

    /* Endpoints for the filter graph. */
    inputs -> name = av_strdup("out");
    inputs -> filter_ctx = buffersink_ctx1;
    inputs -> pad_idx = 0;
    inputs -> next = NULL;


    if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description1.toStdString().c_str(), &inputs, &outputs, NULL)) < 0) 
       {
        printf("InitAudioFilter: Could not add the filter to graph\n");
       }


    if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) 
       {
        printf("InitAudioFilter: Could not configure the graph\n");
       }

    /* Print summary of the sink buffer
     * Note: args buffer is reused to store channel layout string */
    AVFilterLink *outlink = buffersink_ctx1->inputs[0];
    av_get_channel_layout_string(args, sizeof(args), -1, outlink->channel_layout);

    QString str = args;
    printf("Output: srate:%dHz fmt:%s chlayout: %s\n", (int) outlink->sample_rate, 
                                                      av_get_sample_fmt_name((AVSampleFormat) outlink->format),
                                                      str.toStdString().c_str());


    filterGraphInitialized_ = true; 
}

And since I don't have much experience with filters or audio for that matter, I'm also probably missing something here. But Can't figure out what.

Thanks

worth putting your output into a audio editor with a waveform view (e.g. audacity) which might tell you whether you are missing samples or have junk samples in your waveform — Alan Birtles, Jul 20 '20 at 07:22
I exported a few seconds of resampled audio as described [here](https://manual.audacityteam.org/man/sample_data_import.html) and imported into Audicity. And although the sound is completely distorted, (probably some error I made while processing samples), I can still hear crackling as if part of the samples data is cut of. But small size maybe few dozen bytes. I can also see that the number of flushed bytes with swr_convert return 17. Maybe these values are connected? — Milo, Jul 20 '20 at 11:09

Resampling audio using libswresample, leaves small amount of noise after resampling

0 Answers0