I'm working with Wave files. I'm reading all the data but in some files data size (specified amount of bytes to be read) is negative, which breaks the reading mechanism. Are wave files constant in terms of byte offsets? Is x offset always the same value?
-
2Care to show your code ? This could be a signed/unsigned integer issue. The RIFF/WAF format is [thoroughly documented](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html), so you shouln't get any surprise, provided that you stick to the specifications. – xbug Sep 24 '18 at 15:03
-
my code is pretty messy at that point so I'd prefer not to show it. I'm reading it as a little endian integer without any changes to the sign of value. – GPlayer Sep 24 '18 at 18:23
-
@GPlayer I updated my answer to match new formats ... – Spektre Jan 13 '19 at 10:12
3 Answers
Since the unit of datasize in the .wav header is unsigned, and since you're getting a signed value, it suggests you're not reading the value from the wave header correctly...

- 31
- 3
If it helps this is what I use for wav decoding/encoding (I coded some years ago):
//---------------------------------------------------------------------------
//--- RIFF WAVE format: 1.01 ------------------------------------------------
//---------------------------------------------------------------------------
#ifndef _RIFF_h
#define _RIFF_h
//---------------------------------------------------------------------------
// 8bit PCM is unsigned
// 16bit PCM is signed 2'os complement little endian (big endian is RIFX)
//---------------------------------------------------------------------------
struct _wave_chunk
{
DWORD ids;
DWORD len;
_wave_chunk(){ ids=' '; len=0; }
_wave_chunk(_wave_chunk& a){ *this=a; }; ~_wave_chunk(){}; _wave_chunk* operator = (const _wave_chunk *a) { *this=*a; return this; }; /*_wave_chunk* operator = (const _wave_chunk &a) { ...copy... return this; };*/
};
struct _wave_hdr
{
DWORD ids; // "RIFF"
DWORD len;
DWORD tps; // "WAVE"
_wave_hdr(){ ids='FFIR'; len=0; tps='EVAW'; }
_wave_hdr(_wave_hdr& a){ *this=a; }; ~_wave_hdr(){}; _wave_hdr* operator = (const _wave_hdr *a) { *this=*a; return this; }; /*_wave_hdr* operator = (const _wave_hdr &a) { ...copy... return this; };*/
};
struct _wave_fmt
{
DWORD ids; // "fmt "
DWORD len; // 16,18,40
WORD format; // 1 = PCM linear quantization
/* 0x0001 WAVE_FORMAT_PCM PCM
0x0003 WAVE_FORMAT_IEEE_FLOAT IEEE float
0x0006 WAVE_FORMAT_ALAW 8-bit ITU-T G.711 A-law
0x0007 WAVE_FORMAT_MULAW 8-bit ITU-T G.711 µ-law
0xFFFE WAVE_FORMAT_EXTENSIBLE Determined by SubFormat */
WORD chanels;
DWORD samplerate;
DWORD byterate;
WORD blockalign;
WORD bits;
WORD ext_len; // extension length 0,22
WORD ext_validbits;
DWORD ext_channelmask;
BYTE ext_subformat[16];
_wave_fmt(){ ids=' tmf'; len=16; format=1; chanels=1; samplerate=44100; bits=8; ext_len=0; ext_validbits=0; ext_channelmask=0; for (int i=0;i<16;i++) ext_subformat[i]=0; compute(); }
_wave_fmt(_wave_fmt& a){ *this=a; }; ~_wave_fmt(){}; _wave_fmt* operator = (const _wave_fmt *a) { *this=*a; return this; }; /*_wave_fmt* operator = (const _wave_fmt &a) { ...copy... return this; };*/
void compute()
{
byterate=(chanels*samplerate*bits)/8;
blockalign=(chanels*bits)/8;
}
};
struct _wave_dat
{
DWORD ids; // "data"
DWORD len;
_wave_dat(){ ids='atad'; len=0; }
_wave_dat(_wave_dat& a){ *this=a; }; ~_wave_dat(){}; _wave_dat* operator = (const _wave_dat *a) { *this=*a; return this; }; /*_wave_dat* operator = (const _wave_dat &a) { ...copy... return this; };*/
};
//---------------------------------------------------------------------------
class wave
{
public:
AnsiString name;
int hnd;
bool readonly;
_wave_hdr hdr;
_wave_fmt fmt;
_wave_dat dat;
wave();
~wave();
void create(AnsiString _name);
void write(BYTE *data,DWORD size);
bool open(AnsiString _name);
DWORD read(BYTE *data,DWORD size);
void close();
};
//---------------------------------------------------------------------------
wave::wave()
{
name=0;
hnd=-1;
readonly=true;
}
//---------------------------------------------------------------------------
wave::~wave()
{
close();
}
//---------------------------------------------------------------------------
void wave::create(AnsiString _name)
{
close();
readonly=true;
// hdr=_wave_hdr();
// fmt=_wave_fmt();
// dat=_wave_dat();
hdr.len=sizeof(hdr)-8;
dat.len=0;
fmt.compute();
name=_name;
hnd=FileCreate(name);
if (hnd<0) return;
FileWrite(hnd,&hdr,sizeof(hdr));
FileWrite(hnd,&fmt,fmt.len+8);
FileWrite(hnd,&dat,sizeof(dat));
readonly=false;
}
//---------------------------------------------------------------------------
bool wave::open(AnsiString _name)
{
close();
readonly=true;
name=_name;
hnd=FileOpen(name,fmOpenRead);
if (hnd<0) return false;
if (FileRead(hnd,&hdr,sizeof(hdr))<sizeof(hdr)){ close(); return false; }
if (hdr.ids!='FFIR') return false;
if (hdr.tps!='EVAW') return false;
_wave_chunk chk;
DWORD sz=sizeof(chk),l;
for(;;)
{
if (FileRead(hnd,&chk,sz)<sz){ close(); return false; }
if (chk.ids==' tmf')
{
fmt.ids=chk.ids;
fmt.len=chk.len;
if (FileRead(hnd,((BYTE*)&fmt)+sz,chk.len)<chk.len){ close(); return false; }
}
else if (chk.ids=='atad')
{
dat.ids=chk.ids;
dat.len=chk.len;
return true;
}
else FileSeek(hnd,int(chk.len),1);
}
}
//---------------------------------------------------------------------------
void wave::write(BYTE *data,DWORD size)
{
if (hnd<0) return;
hdr.len+=size;
dat.len+=size;
if (!readonly) FileWrite(hnd,data,size);
}
//---------------------------------------------------------------------------
DWORD wave::read(BYTE *data,DWORD size)
{
if (hnd<0) return 0;
return FileRead(hnd,data,size);
}
//---------------------------------------------------------------------------
void wave::close()
{
name="";
if (hnd<0) return;
FileSeek(hnd,0,0);
if (!readonly) FileWrite(hnd,&hdr,sizeof(hdr));
FileClose(hnd);
hnd=-1;
}
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
it does not support all the formats but should be enough to compare and repair your code resp. extract the addressing ... The code is C++/VCL based so you need to port the binary file access and AnsiString
datatype to your environment ...
However negative value suggest you are using signed integers again (just like in your other question) so use unsigned datatype instead ... The offsets in RIFF Wave files are never negative !!!
[Edit1] I updated the code for newer formats.
After adding some extensions (see format info from first xbug's comment under OP) to the Wave format the format fmt
chunk can have variable size now. Also they added few other chunks (related to compression) so in order to read reliably you need to pass all the RIFF chunks based on their chunk size until you hit the data
chunk your data samples should follow...
My original code was for format 1.0 (as mention it was coded years ago) and did not load newer wave files properly (thx to dsp_user for spotting it). I updated it with the new formating so it should be safe to use again.

- 49,595
- 11
- 110
- 380
-
1your wave_header structure assumes that the sound samples start at offset 44 but the wave format specification doesn't require this. In fact, many audio editors use other offsets for the data section because they usually have some additional sections (e.g fact section, list section). So, to correctly parse a wave file, you need to search for the data section and not rely on a fixed offset. – dsp_user Jan 03 '19 at 11:46
-
@dsp_user could you share me some wav file with the offseted data for testing? – Spektre Jan 08 '19 at 19:54
-
How do I share a file here? Anyway, if you have Audacity then you can create your own file. The easiest way is to create a simple sine tone (using the built-in tone generator) and export it to a wav format. Audacity creates files where samples don't start at offset 44 (at least not in the versions I've used). – dsp_user Jan 08 '19 at 20:48
-
@dsp_user I do not have any SW for audio processing thats why I am asking. You can not share wav here directly only images are supported but you can post anywhere and comment me with link... I like [ulozto](http://www.ulozto.net) as it does not require registration (if you chose slow download) – Spektre Jan 08 '19 at 22:11
-
1Here it is ( https://ulozto.net/!AJw7Ywpw8djU/80-aud-wav ). The format is 22050Hz/32 bit floating point/1 channel. – dsp_user Jan 09 '19 at 16:29
-
1@dsp_user thanks for the sample file I updated the code to match new formating looks like it works in my apps vs. media player... – Spektre Jan 13 '19 at 10:12
Are wave files constant in terms of byte offsets? Is x offset always the same value?
No, at least the data section is not always found at the same offset. Some simple programs assume that the sound samples start at a fixed offset (44) but this is not always the case. The only way to reliably read a wave file is to look for the data section and once you find it, the data size field will be at offset +4 relative to the data section.

- 2,061
- 2
- 16
- 23