Iam using OpenH264 as encoder and I want to mux its output into a playable mp4 using libmp4v2
The resulting .mp4
only work partially. It is playable in VLC and MPC-HC but not in Windows Media Player nor the Windows 10 "Movie and Tv" App.
My goal is it that the file works with all these players.
Both Windows players are telling my they don't know the codec so they can't play it back:
This is not really true since I can play a manually muxed file using the same h264 bitstream by using FFmpeg from the cli:
ffmpeg -i "testenc.h264" -c:v copy -f mp4 "output.mp4"
According to this knowlege I think my encoding process works fine and the problem is located in the muxing procedure.
Edit:
Thanks to Rudolfs Bundulis answer who pointed out that the SPS/PPS data is missing I was able to restructure my code. It now trys to include the missing data by analysing the encoders bitstream and calling MP4AddH264SequenceParameterSet
or MP4AddH264PictureParameterSet
when necessary. But still without success.
My full code:
#include "stdafx.h"
#include <iostream>
#include <stdio.h>
#include <chrono>
#include "mp4v2/mp4v2.h"
#include "codec_api.h"
#define WIDTH 1280
#define HEIGHT 960
#define DURATION MP4_INVALID_DURATION
#define NAL_SPS 1
#define NAL_PPS 2
#define NAL_I 3
#define NAL_P 4
using namespace std;
using namespace chrono;
/* Just some dummy data to see artifacts ect */
void prepareFrame(int i, SSourcePicture* pic) {
for (int y = 0; y<HEIGHT; y++) {
for (int x = 0; x<WIDTH; x++) {
pic->pData[0][y * WIDTH + x] = x + y + i * 3;
}
}
for (int y = 0; y<HEIGHT / 2; y++) {
for (int x = 0; x<WIDTH / 2; x++) {
pic->pData[1][y * (WIDTH / 2) + x] = 128 + y + i * 2;
pic->pData[2][y * (WIDTH / 2) + x] = 64 + x + i * 5;
}
}
pic->uiTimeStamp = (i + 1) * 1000 / 75;
}
void printHex(const unsigned char* arr, int len) {
for (int i = 0; i < len; i++) {
if (arr[i] < 16) {
cout << "0";
}
cout << hex << (int)arr[i] << " ";
}
cout << endl;
}
void mp4Encode(MP4FileHandle mp4Handle, MP4TrackId track, uint8_t * bitstream, int length) {
int index = -1;
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x67) {
index = NAL_SPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x68) {
index = NAL_PPS;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x65) {
index = NAL_I;
}
if (bitstream[0] == 0 && bitstream[1] == 0 && bitstream[2] == 0 && bitstream[3] == 1 && bitstream[4] == 0x61) {
index = NAL_P;
}
switch (index) {
case NAL_SPS:
cout << "Detected SPS" << endl;
MP4AddH264SequenceParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_PPS:
cout << "Detected PPS" << endl;
MP4AddH264PictureParameterSet(mp4Handle, track, bitstream + 4, length - 4);
break;
case NAL_I:
{
cout << "Detected I" << endl;
uint8_t * IFrameData = (uint8_t *) malloc(length + 1);
IFrameData[0] = (length - 3) >> 24;
IFrameData[1] = (length - 3) >> 16;
IFrameData[2] = (length - 3) >> 8;
IFrameData[3] = (length - 3) & 0xff;
memcpy(IFrameData + 4, bitstream + 3, length - 3);
if (!MP4WriteSample(mp4Handle, track, IFrameData, length + 1, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
free(IFrameData);
break;
}
case NAL_P:
{
cout << "Detected P" << endl;
bitstream[0] = (length - 4) >> 24;
bitstream[1] = (length - 4) >> 16;
bitstream[2] = (length - 4) >> 8;
bitstream[3] = (length - 4) & 0xff;
if (!MP4WriteSample(mp4Handle, track, bitstream, length, DURATION, 0, 1)) {
cout << "Error when writing sample" << endl;
system("pause");
exit(1);
}
break;
}
}
if (index == -1) {
cout << "Could not detect nal type" << endl;
system("pause");
exit(1);
}
}
int main()
{
//just to measure performance
high_resolution_clock::time_point time = high_resolution_clock::now();
//Create MP4
MP4FileHandle mp4Handle = MP4Create("test.mp4", 0);
MP4SetTimeScale(mp4Handle, 90000);
//Create filestream for binary h264 output for testing
FILE* targetFile;
targetFile = fopen("testenc.h264", "wb");
if (!targetFile) {
cout << "failed to create file" << endl;
system("pause");
return 1;
}
ISVCEncoder *encoder;
int rv = WelsCreateSVCEncoder(&encoder);
//Encoder params
SEncParamExt param;
encoder->GetDefaultParams(¶m);
param.iUsageType = CAMERA_VIDEO_REAL_TIME;
param.fMaxFrameRate = 75.f;
param.iLtrMarkPeriod = 75;
param.iPicWidth = WIDTH;
param.iPicHeight = HEIGHT;
param.iTargetBitrate = 40000000;
param.bEnableDenoise = false;
param.iSpatialLayerNum = 1;
param.bUseLoadBalancing = false;
param.bEnableSceneChangeDetect = false;
param.bEnableBackgroundDetection = false;
param.bEnableAdaptiveQuant = false;
param.bEnableFrameSkip = false;
param.iMultipleThreadIdc = 16;
//param.uiIntraPeriod = 10;
for (int i = 0; i < param.iSpatialLayerNum; i++) {
param.sSpatialLayers[i].iVideoWidth = WIDTH >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].iVideoHeight = HEIGHT >> (param.iSpatialLayerNum - 1 - i);
param.sSpatialLayers[i].fFrameRate = 75.f;
param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
param.sSpatialLayers[i].uiProfileIdc = PRO_BASELINE;
param.sSpatialLayers[i].uiLevelIdc = LEVEL_4_2;
param.sSpatialLayers[i].iDLayerQp = 42;
SSliceArgument sliceArg;
sliceArg.uiSliceMode = SM_FIXEDSLCNUM_SLICE;
sliceArg.uiSliceNum = 16;
param.sSpatialLayers[i].sSliceArgument = sliceArg;
}
param.uiMaxNalSize = 1500;
param.iTargetBitrate *= param.iSpatialLayerNum;
encoder->InitializeExt(¶m);
int videoFormat = videoFormatI420;
encoder->SetOption(ENCODER_OPTION_DATAFORMAT, &videoFormat);
MP4TrackId track = MP4AddH264VideoTrack(mp4Handle, 90000, 90000/25, WIDTH, HEIGHT, 66, 192, 42, 3);
MP4SetVideoProfileLevel(mp4Handle, 0x7f);
SFrameBSInfo info;
memset(&info, 0, sizeof(SFrameBSInfo));
SSourcePicture pic;
memset(&pic, 0, sizeof(SSourcePicture));
pic.iPicWidth = WIDTH;
pic.iPicHeight = HEIGHT;
pic.iColorFormat = videoFormatI420;
pic.iStride[0] = pic.iPicWidth;
pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
int frameSize = WIDTH * HEIGHT * 3 / 2;
pic.pData[0] = new unsigned char[frameSize];
pic.pData[1] = pic.pData[0] + WIDTH * HEIGHT;
pic.pData[2] = pic.pData[1] + (WIDTH * HEIGHT >> 2);
for (int num = 0; num<75; num++) {
cout << "-------FRAME " << dec << num << "-------" << endl;
prepareFrame(num, &pic);
rv = encoder->EncodeFrame(&pic, &info);
if (!rv == cmResultSuccess) {
cout << "encode failed" << endl;
continue;
}
if (info.eFrameType != videoFrameTypeSkip) {
for (int i = 0; i < info.iLayerNum; ++i) {
int len = 0;
const SLayerBSInfo& layerInfo = info.sLayerInfo[i];
for (int j = 0; j < layerInfo.iNalCount; ++j) {
cout << "Layer: " << dec << i << "| Nal: " << j << endl << "Hex: ";
printHex(info.sLayerInfo[i].pBsBuf + len, 20);
mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf + len, layerInfo.pNalLengthInByte[j]);
len += layerInfo.pNalLengthInByte[j];
}
//mp4Encode(mp4Handle, track, info.sLayerInfo[i].pBsBuf, len);
}
//fwrite(info.sLayerInfo[0].pBsBuf, 1, len, targetFile);
}
}
int res = 0;
encoder->GetOption(ENCODER_OPTION_PROFILE, &res);
cout << res << endl;
fflush(targetFile);
fclose(targetFile);
encoder->Uninitialize();
WelsDestroySVCEncoder(encoder);
//Close MP4
MP4Close(mp4Handle);
cout << "done in: ";
cout << duration_cast<milliseconds>(high_resolution_clock::now() - time).count() << endl;
system("pause");
return 0;
}