18

I'm modifying an Android Framework example to package the elementary AAC streams produced by MediaCodec into a standalone .mp4 file. I'm using a single MediaMuxer instance containing one AAC track generated by a MediaCodec instance.

However I always eventually get an error message on a call to mMediaMuxer.writeSampleData(trackIndex, encodedData, bufferInfo):

E/MPEG4Writer﹕timestampUs 0 < lastTimestampUs XXXXX for Audio track

When I queue the raw input data in mCodec.queueInputBuffer(...) I provide 0 as the timestamp value per the Framework Example (I've also tried using monotonically increasing timestamp values with the same result. I've successfully encoded raw camera frames to h264/mp4 files with this same method).

Check out the full source

Most relevant snippet:

private static void testEncoder(String componentName, MediaFormat format, Context c) {
    int trackIndex = 0;
    boolean mMuxerStarted = false;
    File f = FileUtils.createTempFileInRootAppStorage(c, "aac_test_" + new Date().getTime() + ".mp4");
    MediaCodec codec = MediaCodec.createByCodecName(componentName);

    try {
        codec.configure(
                format,
                null /* surface */,
                null /* crypto */,
                MediaCodec.CONFIGURE_FLAG_ENCODE);
    } catch (IllegalStateException e) {
        Log.e(TAG, "codec '" + componentName + "' failed configuration.");

    }

    codec.start();

    try {
        mMediaMuxer = new MediaMuxer(f.getAbsolutePath(), MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
    } catch (IOException ioe) {
        throw new RuntimeException("MediaMuxer creation failed", ioe);
    }

    ByteBuffer[] codecInputBuffers = codec.getInputBuffers();
    ByteBuffer[] codecOutputBuffers = codec.getOutputBuffers();

    int numBytesSubmitted = 0;
    boolean doneSubmittingInput = false;
    int numBytesDequeued = 0;

    while (true) {
        int index;

        if (!doneSubmittingInput) {
            index = codec.dequeueInputBuffer(kTimeoutUs /* timeoutUs */);

            if (index != MediaCodec.INFO_TRY_AGAIN_LATER) {
                if (numBytesSubmitted >= kNumInputBytes) {
                    Log.i(TAG, "queueing EOS to inputBuffer");
                    codec.queueInputBuffer(
                            index,
                            0 /* offset */,
                            0 /* size */,
                            0 /* timeUs */,
                            MediaCodec.BUFFER_FLAG_END_OF_STREAM);

                    if (VERBOSE) {
                        Log.d(TAG, "queued input EOS.");
                    }

                    doneSubmittingInput = true;
                } else {
                    int size = queueInputBuffer(
                            codec, codecInputBuffers, index);

                    numBytesSubmitted += size;

                    if (VERBOSE) {
                        Log.d(TAG, "queued " + size + " bytes of input data.");
                    }
                }
            }
        }

        MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
        index = codec.dequeueOutputBuffer(info, kTimeoutUs /* timeoutUs */);

        if (index == MediaCodec.INFO_TRY_AGAIN_LATER) {
        } else if (index == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
            MediaFormat newFormat = codec.getOutputFormat();
            trackIndex = mMediaMuxer.addTrack(newFormat);
            mMediaMuxer.start();
            mMuxerStarted = true;
        } else if (index == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
            codecOutputBuffers = codec.getOutputBuffers();
        } else {
            // Write to muxer
            ByteBuffer encodedData = codecOutputBuffers[index];
            if (encodedData == null) {
                throw new RuntimeException("encoderOutputBuffer " + index +
                        " was null");
            }

            if ((info.flags & MediaCodec.BUFFER_FLAG_CODEC_CONFIG) != 0) {
                // The codec config data was pulled out and fed to the muxer when we got
                // the INFO_OUTPUT_FORMAT_CHANGED status.  Ignore it.
                if (VERBOSE) Log.d(TAG, "ignoring BUFFER_FLAG_CODEC_CONFIG");
                info.size = 0;
            }

            if (info.size != 0) {
                if (!mMuxerStarted) {
                    throw new RuntimeException("muxer hasn't started");
                }

                // adjust the ByteBuffer values to match BufferInfo (not needed?)
                encodedData.position(info.offset);
                encodedData.limit(info.offset + info.size);

                mMediaMuxer.writeSampleData(trackIndex, encodedData, info);
                if (VERBOSE) Log.d(TAG, "sent " + info.size + " audio bytes to muxer with pts " + info.presentationTimeUs);
            }

            codec.releaseOutputBuffer(index, false);

            // End write to muxer
            numBytesDequeued += info.size;

            if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
                if (VERBOSE) {
                    Log.d(TAG, "dequeued output EOS.");
                }
                break;
            }

            if (VERBOSE) {
                Log.d(TAG, "dequeued " + info.size + " bytes of output data.");
            }
        }
    }

    if (VERBOSE) {
        Log.d(TAG, "queued a total of " + numBytesSubmitted + "bytes, "
                + "dequeued " + numBytesDequeued + " bytes.");
    }

    int sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE);
    int channelCount = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT);
    int inBitrate = sampleRate * channelCount * 16;  // bit/sec
    int outBitrate = format.getInteger(MediaFormat.KEY_BIT_RATE);

    float desiredRatio = (float)outBitrate / (float)inBitrate;
    float actualRatio = (float)numBytesDequeued / (float)numBytesSubmitted;

    if (actualRatio < 0.9 * desiredRatio || actualRatio > 1.1 * desiredRatio) {
        Log.w(TAG, "desiredRatio = " + desiredRatio
                + ", actualRatio = " + actualRatio);
    }


    codec.release();
    mMediaMuxer.stop();
    mMediaMuxer.release();
    codec = null;
}

Update: I've found a root symptom I think lies within MediaCodec.:

I send presentationTimeUs=1000 to queueInputBuffer(...) but receive info.presentationTimeUs= 33219 after calling MediaCodec.dequeueOutputBuffer(info, timeoutUs). fadden left a helpful comment related to this behavior.

dbro
  • 1,718
  • 1
  • 20
  • 34
  • 1
    Sounds like MediaMuxer is getting zero and non-zero timestamps. Have you tried logging the contents of `info` at each `writeSampleData` call to verify that it has the values you expect? – fadden Sep 17 '13 at 19:19
  • I logged the output and indeed, before the error is thrown info contains a non-zero `presentationTimeUs`. How can this this value differ from what is provided to `queueInputBuffer(...)`? – dbro Sep 17 '13 at 19:25
  • 1
    I don't know. Does the value appear to be a fixed offset from a previous value -- i.e. is it the same value every time, but if you pass a constant nonzero value in for the timestamp it changes? – fadden Sep 17 '13 at 19:51
  • Yes, the unexplained timestamp always differs from the constant timestamp I provide by a fixed value: 23219. – dbro Sep 17 '13 at 20:28
  • 2
    Best guess: the encoder is doing something with the output -- maybe splitting an input packet into two output packets -- that requires it to synthesize a timestamp. It takes the timestamp of the start of the packet and adds a value based on the bit rate and number of bytes. If you generate timestamps with reasonably correct presentation times you shouldn't see it go backwards when the "in-between" timestamp is generated. – fadden Sep 17 '13 at 22:23

3 Answers3

7

Thanks to fadden's help I've got a proof-of-concept audio encoder and video+audio encoder on Github. In summary:

Send AudioRecord's samples to a MediaCodec + MediaMuxer wrapper. Using the system time at audioRecord.read(...) works sufficiently well as an audio timestamp, provided you poll often enough to avoid filling up AudioRecord's internal buffer (to avoid drift between the time you call read and the time AudioRecord recorded the samples). Too bad AudioRecord doesn't directly communicate timestamps...

// Setup AudioRecord
while (isRecording) {
    audioPresentationTimeNs = System.nanoTime();
    audioRecord.read(dataBuffer, 0, samplesPerFrame);
    hwEncoder.offerAudioEncoder(dataBuffer.clone(), audioPresentationTimeNs);
}

Note that AudioRecord only guarantees support for 16 bit PCM samples, though MediaCodec.queueInputBuffer takes input as byte[]. Passing a byte[] to audioRecord.read(dataBuffer,...) will truncate split the 16 bit samples into 8 bit for you.

I found that polling in this way still occasionally generated a timestampUs XXX < lastTimestampUs XXX for Audio track error, so I included some logic to keep track of the bufferInfo.presentationTimeUs reported by mediaCodec.dequeueOutputBuffer(bufferInfo, timeoutMs) and adjust if necessary before calling mediaMuxer.writeSampleData(trackIndex, encodedData, bufferInfo).

dbro
  • 1,718
  • 1
  • 20
  • 34
  • I've managed to encode the previewTextures of my camera preview stream - also with faddens example code... but when I use quite good bitrates (start at ~5.000.000) the "writeSampleData" occasionally produces a lag of about 500ms... do you have an idea whats going wrong there? – mAx Oct 14 '13 at 13:13
  • I have created a new question to this issue with more details here: http://stackoverflow.com/questions/19361770/muxing-camera-preview-h264-encoded-elementary-stream-with-mediamuxer – mAx Oct 14 '13 at 13:49
  • 1
    The samples don't get truncated. Truncated would be each 16 bit frame is shortened into an 8 bit frame, which is not what happens, each 16 bit frame is split into two bytes, but this is probably just semantics. – HPP Oct 24 '13 at 01:48
  • I mixed audio and video successfully with MediaMuxer and MediaCodec, and the mp4 video file can be played, but there is something wrong. For example, I record a video for 12 seconds, when I play the video with system player, the player show the video duration 12 seconds which is right, but It takes the player 10 seconds to play to the end. Did you implement the mix successfully without any errors? How did you make it?@dbro – dragonfly Jun 04 '15 at 10:33
  • @dragonfly My result: https://github.com/kickflip/kickflip-android-sdk. See [AVRecorder.java](https://github.com/Kickflip/kickflip-android-sdk/blob/c10ef0961141aba46594b781814eff6c23de453c/sdk/src/main/java/io/kickflip/sdk/av/AVRecorder.java), [CameraEncoder.java](https://github.com/Kickflip/kickflip-android-sdk/blob/c10ef0961141aba46594b781814eff6c23de453c/sdk/src/main/java/io/kickflip/sdk/av/CameraEncoder.java), [MicrophoneEncoder.java](https://github.com/Kickflip/kickflip-android-sdk/blob/c10ef0961141aba46594b781814eff6c23de453c/sdk/src/main/java/io/kickflip/sdk/av/MicrophoneEncoder.java) – dbro Jun 04 '15 at 16:05
  • @dragonfly Also see [AndroidMuxer](https://github.com/Kickflip/kickflip-android-sdk/blob/c10ef0961141aba46594b781814eff6c23de453c/sdk/src/main/java/io/kickflip/sdk/av/AndroidMuxer.java) which extends [Muxer.java](https://github.com/Kickflip/kickflip-android-sdk/blob/c10ef0961141aba46594b781814eff6c23de453c/sdk/src/main/java/io/kickflip/sdk/av/Muxer.java). Make sure your audio and video timestamps both start at 0. – dbro Jun 04 '15 at 16:10
  • @dbro Thanks for your generosity, I have downloaded your code. I found that you used "getJitterFreePTS" to modify system current time when send pcm data to audio encoder. Why did you modify the timestamp and why did you use "audioInputLength / 2" as the second parameter of "getJitterFreePTS"? – dragonfly Jun 05 '15 at 09:17
  • @dbro Hi, I have read the class Muxer.java, you defined 2 members about timestamp: protected long mFirstPts; protected long mLastPts[]; You said that we should make sure audio and video ts both start at 0. But in my opinion, in order to make sure it, you should define 2 members like this: private long mFirstPts[] = {0, 0}; private long mLastPts[] = {0, 0}; Could you please look around the class Muxer.java and think about my opinion? – dragonfly Jun 05 '15 at 13:13
  • @dbro I have wrote a demo and test the a/v recording, but I came with some problems. I post the question at http://stackoverflow.com/questions/30668846/record-video-with-mediacodec-and-mediamuxer-but-the-bitrate-and-framerate-are-i Could you please have a look and give me some help? – dragonfly Jun 05 '15 at 14:26
4

The code from above answer https://stackoverflow.com/a/18966374/6463821 also provides timestampUs XXX < lastTimestampUs XXX for Audio track error, because if you read from AudioRecord`s buffer faster then need, duration between generated timstamps will smaller than real duration between audio samples.

So my solution for this issue is generate first timstamp and each next sample increase timestamp by duration of your sample (depends on bit-rate, audio format, channel config).

BUFFER_DURATION_US = 1_000_000 * (ARR_SIZE / AUDIO_CHANNELS) / SAMPLE_AUDIO_RATE_IN_HZ;

...

long firstPresentationTimeUs = System.nanoTime() / 1000;

...

audioRecord.read(shortBuffer, OFFSET, ARR_SIZE);
long presentationTimeUs = count++ * BUFFER_DURATION + firstPresentationTimeUs;

Reading from AudioRecord should be in separate thread, and all read buffers should be added to queue without waiting for encoding or any other actions with them, to prevent losing of audio samples.

worker =
        new Thread() {

            @Override
            public void run() {
                try {

                    AudioFrameReader reader =
                            new AudioFrameReader(audioRecord);

                    while (!isInterrupted()) {
                        Thread.sleep(10);

                        addToQueue(
                                reader
                                        .read());
                    }

                } catch (InterruptedException e) {
                    Log.w(TAG, "run: ", e);
                }
            }
        };
Oleg Sokolov
  • 1,134
  • 1
  • 12
  • 19
  • 2
    The best answer...one comment: the line BUFFER_DURATION_US = 1_000_000 * (ARR_SIZE / AUDIO_CHANNELS) / SAMPLE_AUDIO_RATE_IN_HZ; is true only if you poll AudioRecord's buffer with short[]. If you use byte[] the line becomes like this: BUFFER_DURATION_US = 1_000_000 * (ARR_SIZE / AUDIO_CHANNELS) / SAMPLE_AUDIO_RATE_IN_HZ / 2; – Alexandru Circus Jun 03 '17 at 11:15
  • Oh yes, I forgot to specify array type. The answer already updated, thank you @AlexandruCircus! – Oleg Sokolov Jun 05 '17 at 19:27
  • @OlehSokolov sir have problem https://stackoverflow.com/questions/54765921/android-media-muxer-not-working-android-java – Ashvin solanki Feb 20 '19 at 11:51
  • what is ARR_SIZE? – Matt Wolfe Feb 22 '19 at 23:11
  • @MattWolfe an array size. An array what you will use to read samples from the AudioRecord – Oleg Sokolov Feb 23 '19 at 05:52
0

Issue occured because you receive buffers disorderly : Try to add the following test :

if(lastAudioPresentationTime == -1) {
    lastAudioPresentationTime = bufferInfo.presentationTimeUs;
}
else if (lastAudioPresentationTime < bufferInfo.presentationTimeUs) {
    lastAudioPresentationTime = bufferInfo.presentationTimeUs;
}
if ((bufferInfo.size != 0) && (lastAudioPresentationTime <= bufferInfo.presentationTimeUs)) {
    if (!mMuxerStarted) {
        throw new RuntimeException("muxer hasn't started");
    }
    // adjust the ByteBuffer values to match BufferInfo (not needed?)
    encodedData.position(bufferInfo.offset);
    encodedData.limit(bufferInfo.offset + bufferInfo.size);
    mMuxer.writeSampleData(trackIndex.index, encodedData, bufferInfo);
}

encoder.releaseOutputBuffer(encoderStatus, false);
KRiadh
  • 246
  • 2
  • 5