2

On Delphi 7 I am running this code with NewAC Audio library. I am having short wav file, 44.100 kHz, mono, 16 bit.

unit Main;

interface

uses Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, Dialogs, ACS_Classes, ACS_DXAudio, ACS_Wave, ACS_Misc, ACS_Types, StdCtrls;

type
  TForm1 = class(TForm)
    AudioProcessor1: TAudioProcessor;
    WaveIn1: TWaveIn;
    DXAudioOut1: TDXAudioOut;
    OpenDialog1: TOpenDialog;
    Button1: TButton;
    Button2: TButton;
    procedure AudioProcessor1GetData(
      Sender: TComponent;
      var Buffer: Pointer;
      var NBlockBytes: Cardinal);
    procedure Button1Click(Sender: TObject);
    procedure Button2Click(Sender: TObject);
    procedure DXAudioOut1Done(Sender: TComponent);
    procedure AudioProcessor1Init(Sender: TComponent; var TotalSize: Int64);
    procedure AudioProcessor1Flush(Sender: TComponent);
  end;

var Form1: TForm1;
implementation
{$R *.dfm}

procedure TForm1.AudioProcessor1GetData(Sender: TComponent;
  var Buffer: Pointer; var NBlockBytes: Cardinal);
var Tmp : Integer;
 i : Integer;
 list1: TStringList;
 list2: TStringList;
 b1, b2, b3, b4:byte;
 si1, si2, si3, si4: ShortInt;
 mono: Boolean;
 values: array of word;
begin
  list1 := TStringList.Create;
  list2 := TStringList.Create;
  AudioProcessor1.Input.GetData(Buffer, NBlockBytes);
  if Buffer = nil then
    Exit;
  mono := false;
  case AudioProcessor1.Input.BitsPerSample of
    16 :
    begin
      B16 := Buffer;
      setlength(values, NBlockBytes div 2);
      for i := 0 to (NBlockBytes div 4) - 1 do
      begin
        Tmp := B16[i*2];
        move(B16[i*2], b1, 1); // copy left channel
        move(B16[i*2+1], b2, 1); // copy right channel
        move(B16[i*2+2], b3, 1); // copy left channel
        move(B16[i*2+3], b4, 1); // copy right channel
        si1 := b1;
        si2 := b2;
        si3 := b3;
        si4 := b4;
        list1.add(''+inttostr(si1));
        list2.add(''+inttostr(si2));
        list1.add(''+inttostr(si3));
        list2.add(''+inttostr(si4));
        B16[i*2] := B16[i*2 + 1];
        B16[i*2 + 1] := Tmp;
      end;
    end;
  end;
list1.free;
list2.free;

end;

procedure TForm1.AudioProcessor1Init(Sender: TComponent; var TotalSize: Int64);
begin
  TAudioProcessor(Sender).Input.Init;
  TotalSize := TAudioProcessor(Sender).Input.Size
end;

procedure TForm1.AudioProcessor1Flush(Sender: TComponent);
begin
  TAudioProcessor(Sender).Input.Flush;
end;


procedure TForm1.Button1Click(Sender: TObject);
begin
  if OpenDialog1.Execute then
  begin
    Button1.Enabled := False;
    WaveIn1.FileName := OpenDialog1.FileName;
    DXAudioOut1.Run;
  end;
end;

procedure TForm1.Button2Click(Sender: TObject);
begin
  DXAudioOut1.Stop;
end;    

procedure TForm1.DXAudioOut1Done(Sender: TComponent);
begin
  Button1.Enabled := True;
end;

end.

When I open the file in editing software I can see the amplitude of the sound and I see that the beginning values are 0. But when I run this program and I add the si1, si2, si3 and si4 to watch (in this order are the variables in watch), so I have these values in first iteration:

80,124,104,32.

I expected that these values should be 0 because there is silence on the begin.

First, may you explain why these are not zero?

Second, I am not sure what these values really represent. I know that si1 and si2 are first sample. But is it really level of the volume? How to correct the program to recognize the silence in the begin?

Tested file -> the section which should be passed to the function as first.

The part

This part is not proccessed (because I processed only few cicles of the first loop):

tested file

I did some tests with file "silence plus", amplifications and see the first 8 cicles values.

enter image description here

Another test with word instead byte:

B16 := Buffer;
...
move(B16[i*2], w1, 2);
move(B16[i*2+1], w2, 2);

word

It really looks like the bits need to swap. I thought that in Windows XP I have little endian bit order. So I will write a swapper.

Johny Bony
  • 375
  • 2
  • 9
  • 1
    the audio contents of wav files are [PCM encoded](https://en.wikipedia.org/wiki/Pulse-code_modulation). Since you have 16 bits per sample in your case, the highest bits represent to top of the waveform and the lowest bits the bottom, so silence is at the middle (so it is not 0) – whosrdaddy Nov 06 '19 at 17:45
  • What does the value represent, is it volume level? – Johny Bony Nov 06 '19 at 18:05
  • Did you read the wiki article? – whosrdaddy Nov 06 '19 at 18:05
  • They write it is amplitude. And "For each sample, one of the available values (on the y-axis) is chosen". But still reading. This does not tell that it is level of volume. – Johny Bony Nov 06 '19 at 18:12
  • 1
    Amplitude = volume – whosrdaddy Nov 06 '19 at 18:14
  • 1
    more info [here](https://stackoverflow.com/a/13053717/800214) – whosrdaddy Nov 06 '19 at 18:25
  • @whosrdaddy: re: first comment... How can you say one is top and one is bottom? When I watch some/any curve of audio "diagram" (where x axis =time,y axis=samples) in Editing software there is something like this: first plus values like +10 +15 +20 +15 +10 and then 0 -10 -15 -20 -15 -10 and so on... Wait a moment I will upload an image. – Johny Bony Nov 07 '19 at 10:12
  • 1
    You have to be aware that there 50+ possible formats out there so the bytes themselves mean nothing without looking at the header. Anyway your code is not correct, the file you specify is 16bit mono,this means that you need to read 2 bytes per sample (and no left or right channel and ditch the shortints) and you need to swap them because wav data is stored in little endian format. Do some [more reading](https://blogs.msdn.microsoft.com/dawate/2009/06/23/intro-to-audio-programming-part-2-demystifying-the-wav-format/) – whosrdaddy Nov 07 '19 at 10:52
  • @whosdaddy: which one swapper is correct? `swap(WordValue)` or `WordValue xor $8000;` ... `swap(WordValue)` returns 0 always, even with the file with my voice which has no zeros. `xor $8000` returns the numbers in the second table (so no change is applied). I think this should mean that the values in table are already little endian. – Johny Bony Nov 07 '19 at 12:02
  • yes my bad, no need to swap, there are some sections in the header that are in big endian format though. – whosrdaddy Nov 07 '19 at 12:41
  • @whosrdaddy: now when I read the doc they write the data are signned, so should I copy the data to SmallInt? – Johny Bony Nov 07 '19 at 15:06
  • yes smallint seems like a good fit, It seems that you solved your problem. I don't know your specific use case but I suspect your code is far from complete ;) – whosrdaddy Nov 07 '19 at 16:05
  • Thank you for help and patience. – Johny Bony Nov 07 '19 at 17:56

1 Answers1

1

The main problems of my code were:

1) Reading 1 byte of sample instead 2 bytes of sample.

2) The sample is signed, not unsigned. So when I tried to read two bytes of word, I get wrong numbers (see the last table in question).

3) I also tried to use two bytes of SmallInt swapped, but that resulted to crazy numbers like -25345, -1281, 26624, -19968 ... This is because on my system I use Little endian (Windows XP). There is not need to swap it on Windows.

So the solution was to copy 16 bits to SmallInt, no swap.

unit Main;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, ACS_Classes, ACS_DXAudio, ACS_Wave, ACS_Misc, ACS_Types, StdCtrls;

type
  TForm1 = class(TForm)
    AudioProcessor1: TAudioProcessor;
    WaveIn1: TWaveIn;
    DXAudioOut1: TDXAudioOut;
    OpenDialog1: TOpenDialog;
    Button1: TButton;
    Button2: TButton;
    procedure AudioProcessor1GetData(
      Sender: TComponent;
      var Buffer: Pointer;
      var NBlockBytes: Cardinal);
    procedure Button1Click(Sender: TObject);
    procedure Button2Click(Sender: TObject);
    procedure DXAudioOut1Done(Sender: TComponent);
    procedure AudioProcessor1Init(Sender: TComponent; var TotalSize: Int64);
    procedure AudioProcessor1Flush(Sender: TComponent);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var  Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.AudioProcessor1GetData(Sender: TComponent;
   var Buffer: Pointer; var NBlockBytes: Cardinal);
var
 B16 : PBuffer16;
 i, end_  : Integer;
 si1, si2: SmallInt;
begin
  AudioProcessor1.Input.GetData(Buffer, NBlockBytes);
  if Buffer = nil then
    Exit;
  case AudioProcessor1.Input.BitsPerSample of
    16 :
    begin
      B16 := Buffer;
      end_ := (NBlockBytes div 2) - 1;
      for i := 0 to end_ do
      begin
        move(B16[i*2], si1, 2);
        move(B16[i*2+1], si2, 2);
      end;
    end;
  end;
end;

procedure TForm1.AudioProcessor1Init(Sender: TComponent; var TotalSize: Int64);
begin
  TAudioProcessor(Sender).Input.Init;
  TotalSize := TAudioProcessor(Sender).Input.Size
end;    

procedure TForm1.AudioProcessor1Flush(Sender: TComponent);
begin
  TAudioProcessor(Sender).Input.Flush;
end;


procedure TForm1.Button1Click(Sender: TObject);
begin
  if OpenDialog1.Execute then
  begin
    Button1.Enabled := False;
    WaveIn1.FileName := OpenDialog1.FileName;
    DXAudioOut1.Run;
  end;
end;

procedure TForm1.Button2Click(Sender: TObject);
begin
  DXAudioOut1.Stop;
end;

procedure TForm1.DXAudioOut1Done(Sender: TComponent);
begin
  Button1.Enabled := True;
end;

end.

Here are the values:

enter image description here

Johny Bony
  • 375
  • 2
  • 9