28

I plan to create a program that will visualize the audio waveform of a .wav file.

So far, I have started by properly reading the header part of the said wav file. The code I use would be this:

#include <iostream>
#include <string>
#include <fstream>

using namespace std;
using std::string;
using std::fstream;

typedef struct  WAV_HEADER{
    char                RIFF[4];        // RIFF Header      Magic header
    unsigned long       ChunkSize;      // RIFF Chunk Size  
    char                WAVE[4];        // WAVE Header      
    char                fmt[4];         // FMT header       
    unsigned long       Subchunk1Size;  // Size of the fmt chunk                                
    unsigned short      AudioFormat;    // Audio format 1=PCM,6=mulaw,7=alaw, 257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM 
    unsigned short      NumOfChan;      // Number of channels 1=Mono 2=Sterio                   
    unsigned long       SamplesPerSec;  // Sampling Frequency in Hz                             
    unsigned long       bytesPerSec;    // bytes per second 
    unsigned short      blockAlign;     // 2=16-bit mono, 4=16-bit stereo 
    unsigned short      bitsPerSample;  // Number of bits per sample      
    char                Subchunk2ID[4]; // "data"  string   
    unsigned long       Subchunk2Size;  // Sampled data length    

}wav_hdr; 

// Function prototypes 
int getFileSize(FILE *inFile); 

int main(int argc,char *argv[]){
    wav_hdr wavHeader;
    FILE *wavFile;
    int headerSize = sizeof(wav_hdr),filelength = 0;

    string answer;

    do{
        string input;
        string answer;

        const char* filePath;

        cout << "Pick wav file from the Windows Media File: ";
        cin >> input;
        cin.get();

        cout << endl;

        path = "C:\\Windows\\Media\\" + input + ".wav";
        filePath = path.c_str();

        wavFile = fopen( filePath , "r" );

        if(wavFile == NULL){
            printf("Can not able to open wave file\n");
            //exit(EXIT_FAILURE);
        }

        fread(&wavHeader,headerSize,1,wavFile);
        filelength = getFileSize(wavFile);
        fclose(wavFile);

        cout << "File is                    :" << filelength << " bytes." << endl;

        cout << "RIFF header                :" << wavHeader.RIFF[0] 
                                                << wavHeader.RIFF[1] 
                                                << wavHeader.RIFF[2] 
                                                << wavHeader.RIFF[3] << endl;

        cout << "WAVE header                :" << wavHeader.WAVE[0] 
                                                << wavHeader.WAVE[1] 
                                                << wavHeader.WAVE[2] 
                                                << wavHeader.WAVE[3] 
                                                << endl;

        cout << "FMT                        :" << wavHeader.fmt[0] 
                                                << wavHeader.fmt[1] 
                                                << wavHeader.fmt[2] 
                                                << wavHeader.fmt[3] 
                                                << endl;

        cout << "Data size                  :" << wavHeader.ChunkSize << endl;

        // Display the sampling Rate form the header
        cout << "Sampling Rate              :" << wavHeader.SamplesPerSec << endl;
        cout << "Number of bits used        :" << wavHeader.bitsPerSample << endl;
        cout << "Number of channels         :" << wavHeader.NumOfChan << endl;
        cout << "Number of bytes per second :" << wavHeader.bytesPerSec << endl;
        cout << "Data length                :" << wavHeader.Subchunk2Size << endl;
        cout << "Audio Format               :" << wavHeader.AudioFormat << endl;
        // Audio format 1=PCM,6=mulaw,7=alaw, 257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM 


        cout << "Block align                :" << wavHeader.blockAlign << endl;

        cout << "Data string                :" << wavHeader.Subchunk2ID[0] 
                                                << wavHeader.Subchunk2ID[1]
                                                << wavHeader.Subchunk2ID[2] 
                                                << wavHeader.Subchunk2ID[3] 
                                                << endl;

        cout << endl << endl << "Try something else? (y/n)";
        cin >> answer;
        //cin.get();
        cout << endl << endl;

    }while( answer == "y" );


    getchar();
    return 0;
} 
// find the file size 
int getFileSize(FILE *inFile){
    int fileSize = 0;
    fseek(inFile,0,SEEK_END);

    fileSize=ftell(inFile);

    fseek(inFile,0,SEEK_SET);
    return fileSize;
}

I've tried it several times and the data it gives seems consistent through different wav files in the Media folder in the Windows folder.

The next step then would be storing the actual data of the wav file in a vector. However, I'm quite clueless on how to do this. Online solutions that I found only went as far as reading the header file.

Any ideas on how to store (and hopefully display) the actual data of the wav file? Thanks!

Razgriz
  • 7,179
  • 17
  • 78
  • 150
  • 6
    Just a note, you should _not_ use `unsinged long`, `short` or even `char` or other such types for reading binary files. The size and signedness of those types may not be exactly what you expect them to be (especially `long` which can be either 32 or 64 bits depending on platform). Instead use the types from [``](http://en.cppreference.com/w/cpp/types/integer), like `uint32_t` etc. – Some programmer dude Dec 01 '12 at 15:25
  • [Similar question for C#](https://stackoverflow.com/q/8754111/) and (some specific questions with generic title) [1](https://stackoverflow.com/q/2457482/) [2](https://stackoverflow.com/q/18771375) [3](https://stackoverflow.com/q/69649876) [4](https://stackoverflow.com/q/20028389) – user202729 Oct 24 '21 at 00:36

3 Answers3

23

This image is taken from a Stanford course

WAV File Format

So you can see that the audio data occurs immediately after the headers you already read and there will be Subchunk2Size bytes of audio data.

The pseudocode for this would be

ReadRIFF();
ReadFMT();
int32 chunk2Id = Read32(BigEndian);
int32 chunk2Size = Read32(LittleEndian);
for (int i = 0; i < chunk2Size; i++)
{
    audioData[i] = ReadByte();
}

If the audio is stereo you'll have two audio streams in data. If the audio is compressed (mp3, aac, etc) you'll have to decompress it first.

Scott Stensland
  • 26,870
  • 12
  • 93
  • 104
James
  • 9,064
  • 3
  • 31
  • 49
  • 9
    One very important thing to note is that fmt chunks are not always the same length. They can be an instance of WAVEFORMATEX which has extra bytes at the end. Use Subchunk1 size to find out what the real size of the fmt section is. You also need to be aware that the data chunk doesn't necessarily follow the fmt chunk. A WAV file can have more than just a fmt or data chunk, so it is always best to check that chunk2 Id is 'data' and if not, skip over it until you find the data chunk. – Mark Heath Dec 02 '12 at 07:51
  • Another important thing to note is that RIFF is an extensible format, and the "DATA" sub-chunk is not guaranteed to come immediately after the "FMT " chunk. http://tinyurl.com/riff-wav – Talia Mar 12 '14 at 02:06
  • 1
    Furthermore, if you are decoding the IBM/MS RIFF format, all the multi-byte words are little endian. None are big endian, as this image claims. (In fact, the numbers that this image claims are big endian aren't meant to represent numbers at all, and the endianness is merely a matter of how you wish to interpret the series of bytes as a number.) – Talia Mar 12 '14 at 02:12
  • Your reference link is dead, so it's not clear how *canonical* your advice is. I think it's dangerous to expect the `"data"` subchunk right after `"fmt "` – Wolf Jan 04 '17 at 21:13
16

I know this is an old post, but your fread parameters are switched, here is a more correct version (requires g++-4.7 or higher with -std=c++11 flag like this "g++ -std=c++11 WaveReader.cpp -o WaveReader").

#include <iostream>
#include <string>
#include <fstream>
#include <cstdint>

using std::cin;
using std::cout;
using std::endl;
using std::fstream;
using std::string;

typedef struct  WAV_HEADER
{
    /* RIFF Chunk Descriptor */
    uint8_t         RIFF[4];        // RIFF Header Magic header
    uint32_t        ChunkSize;      // RIFF Chunk Size
    uint8_t         WAVE[4];        // WAVE Header
    /* "fmt" sub-chunk */
    uint8_t         fmt[4];         // FMT header
    uint32_t        Subchunk1Size;  // Size of the fmt chunk
    uint16_t        AudioFormat;    // Audio format 1=PCM,6=mulaw,7=alaw,     257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM
    uint16_t        NumOfChan;      // Number of channels 1=Mono 2=Sterio
    uint32_t        SamplesPerSec;  // Sampling Frequency in Hz
    uint32_t        bytesPerSec;    // bytes per second
    uint16_t        blockAlign;     // 2=16-bit mono, 4=16-bit stereo
    uint16_t        bitsPerSample;  // Number of bits per sample
    /* "data" sub-chunk */
    uint8_t         Subchunk2ID[4]; // "data"  string
    uint32_t        Subchunk2Size;  // Sampled data length
} wav_hdr;

// Function prototypes
int getFileSize(FILE* inFile);

int main(int argc, char* argv[])
{
    wav_hdr wavHeader;
    int headerSize = sizeof(wav_hdr), filelength = 0;

    const char* filePath;
    string input;
    if (argc <= 1)
    {
        cout << "Input wave file name: ";
        cin >> input;
        cin.get();
        filePath = input.c_str();
    }
    else
    {
        filePath = argv[1];
        cout << "Input wave file name: " << filePath << endl;
    }

    FILE* wavFile = fopen(filePath, "r");
    if (wavFile == nullptr)
    {
        fprintf(stderr, "Unable to open wave file: %s\n", filePath);
        return 1;
    }

    //Read the header
    size_t bytesRead = fread(&wavHeader, 1, headerSize, wavFile);
    cout << "Header Read " << bytesRead << " bytes." << endl;
    if (bytesRead > 0)
    {
        //Read the data
        uint16_t bytesPerSample = wavHeader.bitsPerSample / 8;      //Number     of bytes per sample
        uint64_t numSamples = wavHeader.ChunkSize / bytesPerSample; //How many samples are in the wav file?
        static const uint16_t BUFFER_SIZE = 4096;
        int8_t* buffer = new int8_t[BUFFER_SIZE];
        while ((bytesRead = fread(buffer, sizeof buffer[0], BUFFER_SIZE / (sizeof buffer[0]), wavFile)) > 0)
        {
            /** DO SOMETHING WITH THE WAVE DATA HERE **/
            cout << "Read " << bytesRead << " bytes." << endl;
        }
        delete [] buffer;
        buffer = nullptr;
        filelength = getFileSize(wavFile);

        cout << "File is                    :" << filelength << " bytes." << endl;
        cout << "RIFF header                :" << wavHeader.RIFF[0] << wavHeader.RIFF[1] << wavHeader.RIFF[2] << wavHeader.RIFF[3] << endl;
        cout << "WAVE header                :" << wavHeader.WAVE[0] << wavHeader.WAVE[1] << wavHeader.WAVE[2] << wavHeader.WAVE[3] << endl;
        cout << "FMT                        :" << wavHeader.fmt[0] << wavHeader.fmt[1] << wavHeader.fmt[2] << wavHeader.fmt[3] << endl;
        cout << "Data size                  :" << wavHeader.ChunkSize << endl;

        // Display the sampling Rate from the header
        cout << "Sampling Rate              :" << wavHeader.SamplesPerSec << endl;
        cout << "Number of bits used        :" << wavHeader.bitsPerSample << endl;
        cout << "Number of channels         :" << wavHeader.NumOfChan << endl;
        cout << "Number of bytes per second :" << wavHeader.bytesPerSec << endl;
        cout << "Data length                :" << wavHeader.Subchunk2Size << endl;
        cout << "Audio Format               :" << wavHeader.AudioFormat << endl;
        // Audio format 1=PCM,6=mulaw,7=alaw, 257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM

        cout << "Block align                :" << wavHeader.blockAlign << endl;
        cout << "Data string                :" << wavHeader.Subchunk2ID[0] << wavHeader.Subchunk2ID[1] << wavHeader.Subchunk2ID[2] << wavHeader.Subchunk2ID[3] << endl;
    }
    fclose(wavFile);
    return 0;
}

// find the file size
int getFileSize(FILE* inFile)
{
    int fileSize = 0;
    fseek(inFile, 0, SEEK_END);

    fileSize = ftell(inFile);

    fseek(inFile, 0, SEEK_SET);
    return fileSize;
}
kory
  • 472
  • 4
  • 9
  • 3
    This is only correct for some WAV files. The only guarantee for `"fmt "` and `"data"` sub chunks are that `"data"` comes after `"fmt "`, there may be chunks you have to skip. Each sub chunk has a 32-bit lenght after the ID, so skipping unknown/unsupported sub chunks is easy. – Wolf Jan 04 '17 at 21:11
  • @Wolf are you aware of any library, where one can get raw PCM data starting from frame `m` up to frame `n` and store it into something like `std::vector` ? Sometimes a person needs only raw data in order to process it, and there are so many complicated libraries around that I am getting really confused which one can perform such a very simple task without writing several pages of code... – John Smith Jun 28 '22 at 09:43
  • No I'm not. I think it's not too hard to build your own extraction routines using the information given in the answers so far (also in: [*Microsoft WAVE soundfile format*](http://soundfile.sapp.org/doc/WaveFormat/ "Microsoft WAVE soundfile format")) – Wolf Jun 29 '22 at 05:46
0

If you want to read a WAV file while covering cases where fmt and data chunks are "mixed" with other chunks:

#include <iostream>
#include <fstream>
#include <cstring>

using namespace std;

struct RIFFHeader{
    char chunk_id[4];
    uint32_t chunk_size;
    char format[4];
};

struct ChunkInfo{
    char chunk_id[4];
    uint32_t chunk_size;
};

struct FmtChunk{
    uint16_t audio_format;
    uint16_t num_channels;
    uint32_t sample_rate;
    uint32_t byte_rate;
    uint16_t block_align;
    uint16_t bits_per_sample;
};

struct DataChunk
// We assume 16-bit monochannel samples
{  
    int16_t* data;
    int nb_of_samples;
    DataChunk(int s): nb_of_samples{s}, data{new int16_t[s]} {}
    ~DataChunk(){delete[] data;}
};

int main(){
    constexpr char riff_id[4] = {'R','I','F','F'};
    constexpr char format[4] = {'W','A','V','E'};
    constexpr char fmt_id[4] = {'f','m','t',' '};
    constexpr char data_id[4] = {'d','a','t','a'};

    ifstream ifs{"../audio.wav", ios_base::binary};
    if (!ifs){
        cerr << "Cannot open file" << endl;
        return -1;
    }

    // first read RIFF header
    RIFFHeader h;
    ifs.read((char*)(&h), sizeof(h));
    if (!ifs || memcmp(h.chunk_id, riff_id, 4) || memcmp(h.format, format, 4)){
        cerr << "Bad formatting" << endl;
        return -1;
    }

    // read chunk infos iteratively
    ChunkInfo ch;
    bool fmt_read = false;
    bool data_read = false;
    while(ifs.read((char*)(&ch), sizeof(ch))){

        // if fmt chunk?
        if (memcmp(ch.chunk_id, fmt_id, 4) == 0){
            FmtChunk fmt;
            ifs.read((char*)(&fmt), ch.chunk_size);
            fmt_read = true;
        }
        // is data chunk?
        else if(memcmp(ch.chunk_id, data_id, 4) == 0){
            DataChunk dat_chunk(ch.chunk_size/sizeof(int16_t));
            ifs.read((char*)dat_chunk.data, ch.chunk_size);
            data_read = true;
        }
        // otherwise skip the chunk
        else{
            ifs.seekg(ch.chunk_size, ios_base::cur);
        }
    }
    if (!data_read || !fmt_read){
        cout << "Problem when reading data" << endl;
        return -1;
    }
}

NB: I assumed mono-channel 16-bit samples here. Though it can be adapted to handle more formats...

algat
  • 11
  • 1