I have found the answer for Java: https://stackoverflow.com/a/36357819/202179 and tried to port it to Xamarin.
Here is the code that I've made:
const string COMPRESSED_AUDIO_FILE_MIME_TYPE = "audio/mp4a-latm";
const int COMPRESSED_AUDIO_FILE_BIT_RATE = 64000; // 64kbps
const int SAMPLING_RATE = 48000;
const int BUFFER_SIZE = 48000;
const int CODEC_TIMEOUT_IN_MS = 5000;
void Compress()
{
var inputFile = new Java.IO.File(tempFileWavPath);
var fis = new Java.IO.FileInputStream(inputFile);
var outputFile = new Java.IO.File(fileM4APath);
if (outputFile.Exists())
outputFile.Delete();
var mux = new MediaMuxer(outputFile.AbsolutePath, MuxerOutputType.Mpeg4);
MediaFormat outputFormat = MediaFormat.CreateAudioFormat(COMPRESSED_AUDIO_FILE_MIME_TYPE, SAMPLING_RATE, 1);
outputFormat.SetInteger(MediaFormat.KeyAacProfile, (int)MediaCodecProfileType.Aacobjectlc);
outputFormat.SetInteger(MediaFormat.KeyBitRate, COMPRESSED_AUDIO_FILE_BIT_RATE);
outputFormat.SetInteger(MediaFormat.KeyMaxInputSize, 16384);
MediaCodec codec = MediaCodec.CreateEncoderByType(COMPRESSED_AUDIO_FILE_MIME_TYPE);
codec.Configure(outputFormat, null, null, MediaCodecConfigFlags.Encode);
codec.Start();
MediaCodec.BufferInfo outBuffInfo = new MediaCodec.BufferInfo();
byte[] tempBuffer = new byte[BUFFER_SIZE];
var hasMoreData = true;
double presentationTimeUs = 0;
int audioTrackIdx = 0;
int totalBytesRead = 0;
int percentComplete = 0;
do
{
int inputBufIndex = 0;
while (inputBufIndex != -1 && hasMoreData)
{
inputBufIndex = codec.DequeueInputBuffer(CODEC_TIMEOUT_IN_MS);
if (inputBufIndex >= 0)
{
var dstBuf = codec.GetInputBuffer(inputBufIndex);
dstBuf.Clear();
int bytesRead = fis.Read(tempBuffer, 0, dstBuf.Limit());
if (bytesRead == -1)
{ // -1 implies EOS
hasMoreData = false;
codec.QueueInputBuffer(inputBufIndex, 0, 0, (long)presentationTimeUs, MediaCodecBufferFlags.EndOfStream);
}
else
{
totalBytesRead += bytesRead;
dstBuf.Put(tempBuffer, 0, bytesRead);
codec.QueueInputBuffer(inputBufIndex, 0, bytesRead, (long)presentationTimeUs, 0);
presentationTimeUs = 1000000l * (totalBytesRead / 2) / SAMPLING_RATE;
}
}
}
// Drain audio
int outputBufIndex = 0;
while (outputBufIndex != (int)MediaCodecInfoState.TryAgainLater)
{
outputBufIndex = codec.DequeueOutputBuffer(outBuffInfo, CODEC_TIMEOUT_IN_MS);
if (outputBufIndex >= 0)
{
var encodedData = codec.GetOutputBuffer(outputBufIndex);
encodedData.Position(outBuffInfo.Offset);
encodedData.Limit(outBuffInfo.Offset + outBuffInfo.Size);
if ((outBuffInfo.Flags & MediaCodecBufferFlags.CodecConfig) != 0 && outBuffInfo.Size != 0)
{
codec.ReleaseOutputBuffer(outputBufIndex, false);
}
else
{
mux.WriteSampleData(audioTrackIdx, encodedData, outBuffInfo);
codec.ReleaseOutputBuffer(outputBufIndex, false);
}
}
else if (outputBufIndex == (int)MediaCodecInfoState.OutputFormatChanged)
{
outputFormat = codec.OutputFormat;
audioTrackIdx = mux.AddTrack(outputFormat);
mux.Start();
}
}
percentComplete = (int)Math.Round(((float)totalBytesRead / (float)inputFile.Length()) * 100.0);
} while (outBuffInfo.Flags != MediaCodecBufferFlags.EndOfStream);
fis.Close();
mux.Stop();
mux.Release();
}
This almost works as it converts the file, but the resulting file appears to be encoded too fast - the pitch is too high and speed is too high and the reproduction lasts shorter than expected.
It is likely that just some slight change is needed, but I am not sure what. Can anyone suggest?