My app records an audiofile from the microphone in the browser, typically Chrome; sends it to Firebase Storage; then a Firebase Cloud Function sends the audiofile to Google Cloud Speech-to-Text. Everything works with IBM Cloud Speech-to-Text. With Google Cloud Speech-to-Text it works if I send the audio/flac
sample file "several tornadoes touched down as a line of severe thunderstorms swept through Colorado on Sunday". But when I send an audiofile recorded in the browser I get back an error message:
Error: 3 INVALID_ARGUMENT: Request contains an invalid argument.
Here's the browser code. The audio settings are at the top: audio/webm;codecs=opus
and 48000
bits per second. This is the only media file format and encoding that Chrome supports.
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then(stream => {
var options = {
audioBitsPerSecond: 48000, // switch to 8000 on slow connections?
mimeType: 'audio/webm;codecs=opus' // only options on Chrome
};
const mediaRecorder = new MediaRecorder(stream, options);
mediaRecorder.start();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks);
firebase.storage().ref('Users/' + $scope.user.uid + '/Pronunciation_Test').put(audioBlob) // upload to Firebase Storage
.then(function(snapshot) {
firebase.storage().ref(snapshot.ref.location.path).getDownloadURL() // get downloadURL
.then(function(url) {
firebase.firestore().collection('Users').doc($scope.user.uid).collection("Pronunciation_Test").doc('downloadURL').set({downloadURL: url})
.then(function() {
console.log("Document successfully written!");
})
.catch(function(error) {
console.error("Error writing document: ", error);
});
})
.catch(error => console.error(error))
})
.catch(error => console.error(error));
// play back the audio blob
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
});
setTimeout(() => {
mediaRecorder.stop();
}, 3000);
})
.catch(function(error) {
console.log(error.name + ": " + error.message);
});
Firebase Storage converts the audiofile from webm/opus
to application/octet-streaming
.
Here's my Firebase Cloud Function that gets an audiofile from Firebase Storage and sends it to Google Cloud Speech-to-Text.
exports.Google_Speech_to_Text = functions.firestore.document('Users/{userID}/Pronunciation_Test/downloadURL').onUpdate((change, context) => {
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
// Creates a client
const client = new speech.SpeechClient();
const downloadURL = change.after.data().downloadURL;
const gcsUri = downloadURL;
const encoding = 'application/octet-stream';
const sampleRateHertz = 48000;
const languageCode = 'en-US';
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const audio = {
uri: gcsUri,
};
const request = {
config: config,
audio: audio,
};
// Detects speech in the audio file
return response = client.recognize(request)
.then(function(response) {
const [responseArray] = response;
const transcription = responseArray.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})
.catch((err) => { console.error(err); });
}); // close Google_Speech_to_Text
Here's the list of supported media formats and encodings for Google Cloud Speech-to-Text:
MP3
FLAC
LINEAR16
MULAW
AMR
AMR_WB
OGG_OPUS
SPEEX_WITH_HEADER_BYTE
webm/opus
and application/octet-streaming
aren't on the list.
Am I missing something or is it impossible to record an audiofile in Chrome, save it in Firebase Storage, and then send it to Google Cloud Speech-to-Text? It seems strange that Google products wouldn't work together. Do I have to recode the audiofile with ffmpeg
before I send it to Google Cloud Speech-to-Text?