3

I developed a simple app that changes the pitch of an audio file with libsox (using this example). Here is my code. It works with 2 input arguments - input file path & output file path:

#include <sox.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>

int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    sox_signalinfo_t interm_signal; /* @ intermediate points in the chain. */
    sox_encodinginfo_t out_encoding = {
        SOX_ENCODING_SIGN2,
        16,
        0,
        sox_option_default,
        sox_option_default,
        sox_option_default,
        sox_false
    };
    sox_signalinfo_t out_signal = {
        16000,
        1,
        0,
        0,
        NULL
    };

assert(argc == 3);
assert(sox_init() == SOX_SUCCESS);
assert(in = sox_open_read(argv[1], NULL, NULL, NULL));
assert(out = sox_open_write(argv[2], &out_signal, &out_encoding, NULL, NULL, NULL));

chain = sox_create_effects_chain(&in->encoding, &out->encoding);

interm_signal = in->signal; /* NB: deep copy */

e = sox_create_effect(sox_find_effect("input"));
args[0] = (char *)in; 
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &in->signal) == SOX_SUCCESS);
free(e);

e = sox_create_effect(sox_find_effect("pitch"));
args[0] = "1000";
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
free(e);

e = sox_create_effect(sox_find_effect("output"));
args[0] = (char *)out;
assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
free(e);

sox_flow_effects(chain, NULL, NULL);

sox_delete_effects_chain(chain);
sox_close(out);
sox_close(in);
sox_quit();

return 0;
}

But the result I got from the code above is a file with tempo changed. Here are the input and output details:

Input File     : 'input.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:11.87 = 189921 samples ~ 890.255 CDDA sectors

Input File     : 'output.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:21.15 = 338401 samples ~ 1586.25 CDDA sectors

Another thing, the sox app works fine.

sox input.wav output_app.wav pitch 1000

It generates a file with the same duration as input:

Input File     : 'output_app.wav'
Channels       : 1
Sample Rate    : 16000
Precision      : 16-bit
Duration       : 00:00:11.87 = 189921 samples ~ 890.255 CDDA sectors

Does anyone here get the same issue? Or is there any other option that I have to provide to the sox_effect to make this effect works properly?

DucTM31
  • 89
  • 6
  • 2
    What's with those `assert`s with comma operator, please unlearn that :D that looks really hard to read! Also you're misusing assert. An `assert` is supposed to never fail, it will *state the invariant*, so you've stated that *"no one will ever invoke this program without giving it exactly 2 command line arguments"* and *"sox will always successfully open the file given to it by the first argument"*. Other than that, I believe a good question and useful for future readers :D – Antti Haapala -- Слава Україні Apr 16 '20 at 04:29
  • regarding: *It generates a file with the same duration as input:* Perhaps I'm missing some key detail, but `00:00:11.87` does not look like `00:00:21.15` – user3629249 Apr 16 '20 at 14:19
  • regarding `assert(argc == 3);` When checking for command line parameters (and they are not there) it is much better to tell the user the right way to invoke the program. Similar to: `fprintf( stderr, "USAGE: %s \n", argv[0] );` Similar considerations exist for the other calls to `assert()` – user3629249 Apr 16 '20 at 14:27
  • to change the pitch, the duration of the sound, the number of samples taken, should not change. Only the contents of the samples. Also the sample rate is well beyond the nyquist frequency so the modification of the pitch should not distort the sound – user3629249 Apr 16 '20 at 14:34
  • @user3629249 I just copy the code from the example and change the effect to see whether or not it works as expected. The statement "It generates a file with the same duration as input" is about running the sox app from terminal. And it generated a file with 11.87s (the output_app.wav) file. – DucTM31 Apr 17 '20 at 02:22

2 Answers2

2

The "pitch" effect of libsox will change the audio samplerate. If you notice the samplerate after "pitch", you'll find it has been changed. In order to save audio file with the same samplerate, you'll need to add "rate" effect after "pitch" effect. Like this:

#include <sox.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>

int main(int argc, char * argv[])
{
    static sox_format_t * in, * out; /* input and output files */
    sox_effects_chain_t * chain;
    sox_effect_t * e;
    char * args[10];
    sox_signalinfo_t interm_signal; /* @ intermediate points in the chain. */
    sox_encodinginfo_t out_encoding = {
        SOX_ENCODING_SIGN2,
        16,
        0,
        sox_option_default,
        sox_option_default,
        sox_option_default,
        sox_false
    };
    sox_signalinfo_t out_signal = {
        16000,
        1,
        0,
        0,
        NULL
    };

    assert(argc == 3);
    assert(sox_init() == SOX_SUCCESS);
    assert(in = sox_open_read(argv[1], NULL, NULL, NULL));
    assert(out = sox_open_write(argv[2], &out_signal, &out_encoding, NULL, NULL, NULL));

    chain = sox_create_effects_chain(&in->encoding, &out->encoding);

    interm_signal = in->signal; /* NB: deep copy */

    e = sox_create_effect(sox_find_effect("input"));
    args[0] = (char *)in; 
    assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &interm_signal, &in->signal) == SOX_SUCCESS);
    free(e);

    e = sox_create_effect(sox_find_effect("pitch"));
    args[0] = "1000";
    assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
    free(e);

    // Note: interm_signal.rate changed now, we need to rate it back

    e = sox_create_effect(sox_find_effect("rate"));
    args[0] = "-m";
    assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
    free(e);

    e = sox_create_effect(sox_find_effect("output"));
    args[0] = (char *)out;
    assert(sox_effect_options(e, 1, args) == SOX_SUCCESS);
    assert(sox_add_effect(chain, e, &interm_signal, &out->signal) == SOX_SUCCESS);
    free(e);

    sox_flow_effects(chain, NULL, NULL);

    sox_delete_effects_chain(chain);
    sox_close(out);
    sox_close(in);
    sox_quit();

    return 0;
}

Optionally, you may add a "dither" effect after "rate" to obtain better result.

tgarm
  • 473
  • 3
  • 8
0

After searching for a while, thank to this. I found out that in order to maintain the audio tempo, a rate effect must be added into the effect chain after the pitch effect.

DucTM31
  • 89
  • 6