I'm currently using cameraX to record video with audio and it's working pretty well, however now I have to use SpeechRecognizer while recording the video and get the transcription of what the user is saying .
UPDATE:
I've tried extracting the audio from the video and then feed it to the intent as an extra but it doesn't work. So I thought about getting the audio from the SpeechRecognizer intent directly but that was a dead end since there is no mention of a uri or a temporary file anywhere in the doc. I'm now trying to launch a mediaRecorder when the speechRecognizer starts listening however that only works in the first listen .
I have basically tried everything under the sun but made zero headway, anyone got new ideas?
private fun doWhileRecording(state:UiState){
when(state){
UiState.IDLE -> {
}
UiState.RECORDING -> {
tts!!.stop()
startSpeechToText()
}
UiState.FINALIZED -> {
}
}
private fun startSpeechToText() {
val speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this.requireContext())
val speechRecognizerIntent = Intent(RecognizerIntent.EXTRA_AUDIO_INJECT_SOURCE)
speechRecognizerIntent.putExtra(
RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM
)
speechRecognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
speechRecognizer.setRecognitionListener(object : RecognitionListener {
override fun onReadyForSpeech(bundle: Bundle?) {
Log.i("STT","ready")
}
override fun onBeginningOfSpeech() {
Log.i("STT","begin")
startRecordingAudio()
}
override fun onRmsChanged(v: Float) {}
override fun onBufferReceived(bytes: ByteArray?) {}
override fun onEndOfSpeech() {}
override fun onError(i: Int) {}
override fun onResults(bundle: Bundle) {
val result = bundle.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
if (result != null) {
// result[0] will give the output of speech
Log.i("STT","you said: ${result[0]}")
stopAudioRecording()
}else{
Log.i("STT","results null boo")
}
}
override fun onPartialResults(bundle: Bundle) {}
override fun onEvent(i: Int, bundle: Bundle?) {}
})
// starts listening ...
speechRecognizer.startListening(speechRecognizerIntent)
}
private fun startRecordingAudio() {
// below method is used to initialize
// the media recorder clss
mRecorder = MediaRecorder()
// below method is used to set the audio
// source which we are using a mic.
mRecorder.setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION)
mRecorder.setOutputFormat(MediaRecorder.OutputFormat.THREE_GPP)
mRecorder.setAudioEncoder(MediaRecorder.AudioEncoder.AMR_NB)
mRecorder.setOutputFile(audioFileRecorded.path)
try {
mRecorder.prepare()
} catch (e: IOException) {
Log.e("TAG", "prepare() failed")
}
// start method will start
// the audio recording.
mRecorder.start()
}
private fun stopAudioRecording(){
mRecorder.stop();
// below method will release
// the media recorder class.
mRecorder.release()
}