To work on learning Rust (in a Tauri project) I am converting a Python 2 program that uses ffmpeg to create a custom video format from a GUI. The video portion converts successfully, but I am unable to get the audio to work. With the debugging I have done for the past few days, it looks like I am not able to read in the audio data in Rust correctly from the terminal pipe - what is working to read in the video data is not working for the audio. I have tried reading in the audio data as a string and then converting it to bytes but then the byte array appears empty. I have been researching the 'Pipe'-ing of data from the rust documentation and python documentation and am unsure how the Rust pipe could be empty or incorrect if it's working for the video.
From this python article and this rust stack overflow exchange, it looks like the python stdout pipe is equivalent to the rust stdin pipe?
The python code snippet for video and audio conversion:
output=open(self.outputFile, 'wb')
devnull = open(os.devnull, 'wb')
vidcommand = [ FFMPEG_BIN,
'-i', self.inputFile,
'-f', 'image2pipe',
'-r', '%d' % (self.outputFrameRate),
'-vf', scaleCommand,
'-vcodec', 'rawvideo',
'-pix_fmt', 'bgr565be',
'-f', 'rawvideo', '-']
vidPipe = '';
if os.name=='nt' :
startupinfo = sp.STARTUPINFO()
startupinfo.dwFlags |= sp.STARTF_USESHOWWINDOW
vidPipe=sp.Popen(vidcommand, stdin = sp.PIPE, stdout = sp.PIPE, stderr = devnull, bufsize=self.inputVidFrameBytes*10, startupinfo=startupinfo)
else:
vidPipe=sp.Popen(vidcommand, stdin = sp.PIPE, stdout = sp.PIPE, stderr = devnull, bufsize=self.inputVidFrameBytes*10)
vidFrame = vidPipe.stdout.read(self.inputVidFrameBytes)
audioCommand = [ FFMPEG_BIN,
'-i', self.inputFile,
'-f', 's16le',
'-acodec', 'pcm_s16le',
'-ar', '%d' % (self.outputAudioSampleRate),
'-ac', '1',
'-']
audioPipe=''
if (self.audioEnable.get() == 1):
if os.name=='nt' :
startupinfo = sp.STARTUPINFO()
startupinfo.dwFlags |= sp.STARTF_USESHOWWINDOW
audioPipe = sp.Popen(audioCommand, stdin = sp.PIPE, stdout=sp.PIPE, stderr = devnull, bufsize=self.audioFrameBytes*10, startupinfo=startupinfo)
else:
audioPipe = sp.Popen(audioCommand, stdin = sp.PIPE, stdout=sp.PIPE, stderr = devnull, bufsize=self.audioFrameBytes*10)
audioFrame = audioPipe.stdout.read(self.audioFrameBytes)
currentFrame=0;
while len(vidFrame)==self.inputVidFrameBytes:
currentFrame+=1
if(currentFrame%30==0):
self.progressBarVar.set(100.0*(currentFrame*1.0)/self.totalFrames)
if (self.videoBitDepth.get() == 16):
output.write(vidFrame)
else:
b16VidFrame=bytearray(vidFrame)
b8VidFrame=[]
for p in range(self.outputVidFrameBytes):
b8VidFrame.append(((b16VidFrame[(p*2)+0]>>0)&0xE0)|((b16VidFrame[(p*2)+0]<<2)&0x1C)|((b16VidFrame[(p*2)+1]>>3)&0x03))
output.write(bytearray(b8VidFrame))
vidFrame = vidPipe.stdout.read(self.inputVidFrameBytes) # Read where vidframe is to match up with audio frame and output?
if (self.audioEnable.get() == 1):
if len(audioFrame)==self.audioFrameBytes:
audioData=bytearray(audioFrame)
for j in range(int(round(self.audioFrameBytes/2))):
sample = ((audioData[(j*2)+1]<<8) | audioData[j*2]) + 0x8000
sample = (sample>>(16-self.outputAudioSampleBitDepth)) & (0x0000FFFF>>(16-self.outputAudioSampleBitDepth))
audioData[j*2] = sample & 0xFF
audioData[(j*2)+1] = sample>>8
output.write(audioData)
audioFrame = audioPipe.stdout.read(self.audioFrameBytes)
else:
emptySamples=[]
for samples in range(int(round(self.audioFrameBytes/2))):
emptySamples.append(0x00)
emptySamples.append(0x00)
output.write(bytearray(emptySamples))
self.progressBarVar.set(100.0)
vidPipe.terminate()
vidPipe.stdout.close()
vidPipe.wait()
if (self.audioEnable.get() == 1):
audioPipe.terminate()
audioPipe.stdout.close()
audioPipe.wait()
output.close()
The Rust snippet that should accomplish the same goals:
let output_file = OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(&output_path)
.unwrap();
let mut writer = BufWriter::with_capacity(
options.video_frame_bytes.max(options.audio_frame_bytes),
output_file,
);
let ffmpeg_path = sidecar_path("ffmpeg");
#[cfg(debug_assertions)]
let timer = Instant::now();
let mut video_cmd = Command::new(&ffmpeg_path);
#[rustfmt::skip]
video_cmd.args([
"-i", options.path,
"-f", "image2pipe",
"-r", options.frame_rate,
"-vf", options.scale,
"-vcodec", "rawvideo",
"-pix_fmt", "bgr565be",
"-f", "rawvideo",
"-",
])
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::null());
// windows creation flag CREATE_NO_WINDOW: stops the process from creating a CMD window
// https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags
#[cfg(windows)]
video_cmd.creation_flags(0x08000000);
let mut video_child = video_cmd.spawn().unwrap();
let mut video_stdout = video_child.stdout.take().unwrap();
let mut video_frame = vec![0; options.video_frame_bytes];
let mut audio_cmd = Command::new(&ffmpeg_path);
#[rustfmt::skip]
audio_cmd.args([
"-i", options.path,
"-f", "s16le",
"-acodec", "pcm_s16le",
"-ar", options.sample_rate,
"-ac", "1",
"-",
])
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::null());
#[cfg(windows)]
audio_cmd.creation_flags(0x08000000);
let mut audio_child = audio_cmd.spawn().unwrap();
let mut audio_stdout = audio_child.stdout.take().unwrap();
let mut audio_frame = vec![0; options.audio_frame_bytes];
while video_stdout.read_exact(&mut video_frame).is_ok() {
writer.write_all(&video_frame).unwrap();
if audio_stdout.read_to_end(&mut audio_frame).is_ok() {
if audio_frame.len() == options.audio_frame_bytes {
for i in 0..options.audio_frame_bytes / 2 {
let temp_sample = ((u32::from(audio_frame[(i * 2) + 1]) << 8)
| u32::from(audio_frame[i * 2]))
+ 0x8000;
let sample = (temp_sample >> (16 - 10)) & (0x0000FFFF >> (16 - 10));
audio_frame[i * 2] = (sample & 0xFF) as u8;
audio_frame[(i * 2) + 1] = (sample >> 8) as u8;
}
} else {
audio_frame.fill(0x00);
}
}
writer.write_all(&audio_frame).unwrap();
}
video_child.wait().unwrap();
audio_child.wait().unwrap();
#[cfg(debug_assertions)]
{
let elapsed = timer.elapsed();
dbg!(elapsed);
}
writer.flush().unwrap();
I have looked at the hex data of the files using HxD - regardless of how I alter the Rust program, I am unable to get data different from what is previewed in the attached image - so the audio pipe is incorrectly interfaced. I included a screenshot of the hex data from the working python program that converts the video and audio correctly.
HxD Python program hex output:
HxD Rust program hex output: